diff --git a/LexOccam.x b/LexOccam.x
index b5e2974..c3555b9 100644
--- a/LexOccam.x
+++ b/LexOccam.x
@@ -20,10 +20,10 @@ with this program. If not, see .
module LexOccam where
import Data.Generics
-import System
+import Errors
import Metadata
-import PrettyShow
+import Pass
}
%wrapper "posn"
@@ -96,7 +96,13 @@ $white+ ;
{
-- | An occam source token.
-data Token = Token TokenType Meta String
+-- Only `Token` is generated by the lexer itself; the others are added later
+-- once the indentation has been analysed.
+data Token =
+ Token TokenType Meta String -- ^ A real token read from the source
+ | Indent -- ^ Indentation increase
+ | Outdent -- ^ Indentation decrease
+ | EndOfLine -- ^ End of line
deriving (Show, Eq, Typeable, Data)
-- | The type of a source token.
@@ -111,22 +117,19 @@ mkToken :: TokenType -> AlexPosn -> String -> Token
mkToken tt (AlexPn _ line col) s = Token tt emptyMeta s
-- | Run the lexer, returning either an error position or a list of tokens.
--- (This is based on the `alexScanTokens` function that Alex provides, but it
--- adds error reporting.)
-runLexer :: String -> String -> Either Meta [Token]
+-- (This is based on the `alexScanTokens` function that Alex provides.)
+runLexer :: String -> String -> PassM [Token]
runLexer filename str = go (alexStartPos, '\n', str)
where
go inp@(pos@(AlexPn _ line col), _, str) =
case alexScan inp 0 of
- AlexEOF -> Right []
- AlexError _ -> Left meta
+ AlexEOF -> return []
+ AlexError _ -> dieP meta "Unrecognised token"
AlexSkip inp' len -> go inp'
AlexToken inp' len act ->
- case go inp' of
- e@(Left _) -> e
- Right ts -> Right $ tok : ts
- where (Token tt _ s) = act pos (take len str)
- tok = Token tt meta s
+ do ts <- go inp'
+ let (Token tt _ s) = act pos (take len str)
+ return $ (Token tt meta s) : ts
where
meta = emptyMeta {
@@ -134,16 +137,5 @@ runLexer filename str = go (alexStartPos, '\n', str)
metaLine = line,
metaColumn = col
}
-
--- | Main function for testing the lexer.
-main :: IO ()
-main
- = do (arg:_) <- getArgs
- s <- readFile arg
- let tokens =
- case runLexer arg s of
- Left m -> error $ "Lex error: " ++ show m
- Right ts -> ts
- putStrLn $ pshow tokens
}
diff --git a/Makefile b/Makefile
index 5394140..d02db7e 100644
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,7 @@ tocktest: $(sources)
ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain
lextest: $(sources)
- ghc $(ghc_opts) -o lextest -main-is LexOccam --make LexOccam
+ ghc $(ghc_opts) -o lextest -main-is StructureOccam --make StructureOccam
CFLAGS = \
-O2 \
diff --git a/StructureOccam.hs b/StructureOccam.hs
new file mode 100644
index 0000000..10341ee
--- /dev/null
+++ b/StructureOccam.hs
@@ -0,0 +1,84 @@
+{-
+Tock: a compiler for parallel languages
+Copyright (C) 2007 University of Kent
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation, either version 2 of the License, or (at your
+option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program. If not, see .
+-}
+
+-- | Analyse syntactic structure of occam code.
+module StructureOccam where
+
+import Control.Monad.Error
+import Control.Monad.State
+import Data.Generics
+import System
+
+import CompState
+import Errors
+import LexOccam
+import Metadata
+import Pass
+import PrettyShow
+
+-- | Given the output of the lexer for a single file, add `Indent`, `Outdent`
+-- and `EndOfLine` markers.
+structureOccam :: [Token] -> PassM [Token]
+structureOccam [] = return []
+structureOccam ts = analyse 1 firstLine ts
+ where
+ -- Find the first line that's actually got something on it.
+ firstLine
+ = case ts of (Token _ m _:_) -> metaLine m
+
+ analyse :: Int -> Int -> [Token] -> PassM [Token]
+ -- Add extra EndOfLine at the end of the file.
+ analyse _ _ [] = return [EndOfLine]
+ analyse prevCol prevLine (t@(Token _ m _):ts)
+ = if line /= prevLine
+ then do rest <- analyse col line ts
+ newLine $ t : rest
+ else do rest <- analyse prevCol line ts
+ return $ t : rest
+ where
+ col = metaColumn m
+ line = metaLine m
+
+ -- A new line -- look to see what's going on with the indentation.
+ newLine rest
+ | col == prevCol + 2 = return $ EndOfLine : Indent : rest
+ -- FIXME: If col > prevCol, then look to see if there's a VALOF
+ -- coming up before the next column change...
+ | col < prevCol
+ = if (prevCol - col) `mod` 2 == 0
+ then return $ EndOfLine : (replicate steps Outdent ++ rest)
+ else dieP m "Invalid indentation"
+ | col == prevCol = return $ EndOfLine : rest
+ | otherwise = dieP m "Invalid indentation"
+ where
+ steps = (prevCol - col) `div` 2
+
+-- | Main function for testing.
+main :: IO ()
+main
+ = do (arg:_) <- getArgs
+ s <- readFile arg
+ e <- evalStateT (runErrorT (test arg s)) emptyState
+ return ()
+ where
+ test :: String -> String -> PassM ()
+ test arg s
+ = do tokens <- runLexer arg s
+ tokens' <- structureOccam tokens
+ liftIO $ putStrLn $ pshow tokens'
+