diff --git a/LexOccam.x b/LexOccam.x index b5e2974..c3555b9 100644 --- a/LexOccam.x +++ b/LexOccam.x @@ -20,10 +20,10 @@ with this program. If not, see . module LexOccam where import Data.Generics -import System +import Errors import Metadata -import PrettyShow +import Pass } %wrapper "posn" @@ -96,7 +96,13 @@ $white+ ; { -- | An occam source token. -data Token = Token TokenType Meta String +-- Only `Token` is generated by the lexer itself; the others are added later +-- once the indentation has been analysed. +data Token = + Token TokenType Meta String -- ^ A real token read from the source + | Indent -- ^ Indentation increase + | Outdent -- ^ Indentation decrease + | EndOfLine -- ^ End of line deriving (Show, Eq, Typeable, Data) -- | The type of a source token. @@ -111,22 +117,19 @@ mkToken :: TokenType -> AlexPosn -> String -> Token mkToken tt (AlexPn _ line col) s = Token tt emptyMeta s -- | Run the lexer, returning either an error position or a list of tokens. --- (This is based on the `alexScanTokens` function that Alex provides, but it --- adds error reporting.) -runLexer :: String -> String -> Either Meta [Token] +-- (This is based on the `alexScanTokens` function that Alex provides.) +runLexer :: String -> String -> PassM [Token] runLexer filename str = go (alexStartPos, '\n', str) where go inp@(pos@(AlexPn _ line col), _, str) = case alexScan inp 0 of - AlexEOF -> Right [] - AlexError _ -> Left meta + AlexEOF -> return [] + AlexError _ -> dieP meta "Unrecognised token" AlexSkip inp' len -> go inp' AlexToken inp' len act -> - case go inp' of - e@(Left _) -> e - Right ts -> Right $ tok : ts - where (Token tt _ s) = act pos (take len str) - tok = Token tt meta s + do ts <- go inp' + let (Token tt _ s) = act pos (take len str) + return $ (Token tt meta s) : ts where meta = emptyMeta { @@ -134,16 +137,5 @@ runLexer filename str = go (alexStartPos, '\n', str) metaLine = line, metaColumn = col } - --- | Main function for testing the lexer. -main :: IO () -main - = do (arg:_) <- getArgs - s <- readFile arg - let tokens = - case runLexer arg s of - Left m -> error $ "Lex error: " ++ show m - Right ts -> ts - putStrLn $ pshow tokens } diff --git a/Makefile b/Makefile index 5394140..d02db7e 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ tocktest: $(sources) ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain lextest: $(sources) - ghc $(ghc_opts) -o lextest -main-is LexOccam --make LexOccam + ghc $(ghc_opts) -o lextest -main-is StructureOccam --make StructureOccam CFLAGS = \ -O2 \ diff --git a/StructureOccam.hs b/StructureOccam.hs new file mode 100644 index 0000000..10341ee --- /dev/null +++ b/StructureOccam.hs @@ -0,0 +1,84 @@ +{- +Tock: a compiler for parallel languages +Copyright (C) 2007 University of Kent + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation, either version 2 of the License, or (at your +option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program. If not, see . +-} + +-- | Analyse syntactic structure of occam code. +module StructureOccam where + +import Control.Monad.Error +import Control.Monad.State +import Data.Generics +import System + +import CompState +import Errors +import LexOccam +import Metadata +import Pass +import PrettyShow + +-- | Given the output of the lexer for a single file, add `Indent`, `Outdent` +-- and `EndOfLine` markers. +structureOccam :: [Token] -> PassM [Token] +structureOccam [] = return [] +structureOccam ts = analyse 1 firstLine ts + where + -- Find the first line that's actually got something on it. + firstLine + = case ts of (Token _ m _:_) -> metaLine m + + analyse :: Int -> Int -> [Token] -> PassM [Token] + -- Add extra EndOfLine at the end of the file. + analyse _ _ [] = return [EndOfLine] + analyse prevCol prevLine (t@(Token _ m _):ts) + = if line /= prevLine + then do rest <- analyse col line ts + newLine $ t : rest + else do rest <- analyse prevCol line ts + return $ t : rest + where + col = metaColumn m + line = metaLine m + + -- A new line -- look to see what's going on with the indentation. + newLine rest + | col == prevCol + 2 = return $ EndOfLine : Indent : rest + -- FIXME: If col > prevCol, then look to see if there's a VALOF + -- coming up before the next column change... + | col < prevCol + = if (prevCol - col) `mod` 2 == 0 + then return $ EndOfLine : (replicate steps Outdent ++ rest) + else dieP m "Invalid indentation" + | col == prevCol = return $ EndOfLine : rest + | otherwise = dieP m "Invalid indentation" + where + steps = (prevCol - col) `div` 2 + +-- | Main function for testing. +main :: IO () +main + = do (arg:_) <- getArgs + s <- readFile arg + e <- evalStateT (runErrorT (test arg s)) emptyState + return () + where + test :: String -> String -> PassM () + test arg s + = do tokens <- runLexer arg s + tokens' <- structureOccam tokens + liftIO $ putStrLn $ pshow tokens' +