Structure analyser for the new lexer.

This commit is contained in:
Adam Sampson 2007-08-20 00:48:55 +00:00
parent 543a7b6872
commit a9f1e52103
3 changed files with 101 additions and 25 deletions

View File

@ -20,10 +20,10 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
module LexOccam where
import Data.Generics
import System
import Errors
import Metadata
import PrettyShow
import Pass
}
%wrapper "posn"
@ -96,7 +96,13 @@ $white+ ;
{
-- | An occam source token.
data Token = Token TokenType Meta String
-- Only `Token` is generated by the lexer itself; the others are added later
-- once the indentation has been analysed.
data Token =
Token TokenType Meta String -- ^ A real token read from the source
| Indent -- ^ Indentation increase
| Outdent -- ^ Indentation decrease
| EndOfLine -- ^ End of line
deriving (Show, Eq, Typeable, Data)
-- | The type of a source token.
@ -111,22 +117,19 @@ mkToken :: TokenType -> AlexPosn -> String -> Token
mkToken tt (AlexPn _ line col) s = Token tt emptyMeta s
-- | Run the lexer, returning either an error position or a list of tokens.
-- (This is based on the `alexScanTokens` function that Alex provides, but it
-- adds error reporting.)
runLexer :: String -> String -> Either Meta [Token]
-- (This is based on the `alexScanTokens` function that Alex provides.)
runLexer :: String -> String -> PassM [Token]
runLexer filename str = go (alexStartPos, '\n', str)
where
go inp@(pos@(AlexPn _ line col), _, str) =
case alexScan inp 0 of
AlexEOF -> Right []
AlexError _ -> Left meta
AlexEOF -> return []
AlexError _ -> dieP meta "Unrecognised token"
AlexSkip inp' len -> go inp'
AlexToken inp' len act ->
case go inp' of
e@(Left _) -> e
Right ts -> Right $ tok : ts
where (Token tt _ s) = act pos (take len str)
tok = Token tt meta s
do ts <- go inp'
let (Token tt _ s) = act pos (take len str)
return $ (Token tt meta s) : ts
where
meta = emptyMeta {
@ -134,16 +137,5 @@ runLexer filename str = go (alexStartPos, '\n', str)
metaLine = line,
metaColumn = col
}
-- | Main function for testing the lexer.
main :: IO ()
main
= do (arg:_) <- getArgs
s <- readFile arg
let tokens =
case runLexer arg s of
Left m -> error $ "Lex error: " ++ show m
Right ts -> ts
putStrLn $ pshow tokens
}

View File

@ -21,7 +21,7 @@ tocktest: $(sources)
ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain
lextest: $(sources)
ghc $(ghc_opts) -o lextest -main-is LexOccam --make LexOccam
ghc $(ghc_opts) -o lextest -main-is StructureOccam --make StructureOccam
CFLAGS = \
-O2 \

84
StructureOccam.hs Normal file
View File

@ -0,0 +1,84 @@
{-
Tock: a compiler for parallel languages
Copyright (C) 2007 University of Kent
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 2 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>.
-}
-- | Analyse syntactic structure of occam code.
module StructureOccam where
import Control.Monad.Error
import Control.Monad.State
import Data.Generics
import System
import CompState
import Errors
import LexOccam
import Metadata
import Pass
import PrettyShow
-- | Given the output of the lexer for a single file, add `Indent`, `Outdent`
-- and `EndOfLine` markers.
structureOccam :: [Token] -> PassM [Token]
structureOccam [] = return []
structureOccam ts = analyse 1 firstLine ts
where
-- Find the first line that's actually got something on it.
firstLine
= case ts of (Token _ m _:_) -> metaLine m
analyse :: Int -> Int -> [Token] -> PassM [Token]
-- Add extra EndOfLine at the end of the file.
analyse _ _ [] = return [EndOfLine]
analyse prevCol prevLine (t@(Token _ m _):ts)
= if line /= prevLine
then do rest <- analyse col line ts
newLine $ t : rest
else do rest <- analyse prevCol line ts
return $ t : rest
where
col = metaColumn m
line = metaLine m
-- A new line -- look to see what's going on with the indentation.
newLine rest
| col == prevCol + 2 = return $ EndOfLine : Indent : rest
-- FIXME: If col > prevCol, then look to see if there's a VALOF
-- coming up before the next column change...
| col < prevCol
= if (prevCol - col) `mod` 2 == 0
then return $ EndOfLine : (replicate steps Outdent ++ rest)
else dieP m "Invalid indentation"
| col == prevCol = return $ EndOfLine : rest
| otherwise = dieP m "Invalid indentation"
where
steps = (prevCol - col) `div` 2
-- | Main function for testing.
main :: IO ()
main
= do (arg:_) <- getArgs
s <- readFile arg
e <- evalStateT (runErrorT (test arg s)) emptyState
return ()
where
test :: String -> String -> PassM ()
test arg s
= do tokens <- runLexer arg s
tokens' <- structureOccam tokens
liftIO $ putStrLn $ pshow tokens'