Rework the parser to use the new lexer.

The occam parser is now a GenParser Token OccState, rather than a GenParser Char OccState, and a lot of now-redundant code has been removed. The parser is also somewhat faster, which wasn't intended but is nice anyway. I've also modified the Rain parser to not rely on the old preprocessing code; it wasn't appropriate for Rain's syntax anyway, so I assume Neil will be replacing it eventually.
2007-08-21 20:44:15 +00:00 · 2007-08-21 20:44:15 +00:00 · 1bac142a53
commit 1bac142a53
parent 1f490e9f7f
9 changed files with 204 additions and 636 deletions
--- a/CompState.hs
+++ b/CompState.hs
@ -50,15 +50,10 @@ data CompState = CompState {
    csVerboseLevel :: Int,
    csOutputFile :: String,
-    -- Set by (new) preprocessor
+    -- Set by preprocessor
    csCurrentFile :: String,
    csUsedFiles :: Set.Set String,
    -- Set by (old) preprocessor
    csSourceFiles :: Map String String,
    csIndentLinesIn :: [String],
    csIndentLinesOut :: [String],
    -- Set by Parse
    csLocalNames :: [(String, A.Name)],
    csMainLocals :: [(String, A.Name)],
@ -93,10 +88,6 @@ emptyState = CompState {
    csCurrentFile = "none",
    csUsedFiles = Set.empty,
    csSourceFiles = Map.empty,
    csIndentLinesIn = [],
    csIndentLinesOut = [],
    csLocalNames = [],
    csMainLocals = [],
    csNames = Map.empty,
--- a/Indentation.hs
+++ b/Indentation.hs
@ -1,191 +0,0 @@
 {-
 Tock: a compiler for parallel languages
 Copyright (C) 2007  University of Kent
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
 Free Software Foundation, either version 2 of the License, or (at your
 option) any later version.
 This program is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program.  If not, see <http://www.gnu.org/licenses/>.
 -}
 -- | Parse indentation in occam source.
 module Indentation (removeIndentation, indentMarker, outdentMarker, eolMarker) where
 import Control.Monad
 import Control.Monad.Error
 import Control.Monad.State
 import Data.List
 import Text.Regex
 import CompState
 import Errors
 import Pass
 -- FIXME When this joins continuation lines, it should stash the details of
 -- what it joined into CompState so that error reporting later on can
 -- reconstruct the original position.
 indentMarker = "__indent"
 outdentMarker = "__outdent"
 eolMarker = "__eol"
 -- FIXME: There's probably a nicer way of doing this.
 -- (Well, trivially, use a WriterT...)
 -- | Preprocess occam source code to remove comments and turn indentation into
 -- explicit markers.
 removeIndentation :: String -> String -> PassM String
 removeIndentation filename orig
    =  do modify $ (\ps -> ps { csIndentLinesIn = origLines,
                                csIndentLinesOut = [] })
          catchError (nextLine 0) reportError
          ps <- get
          let out = concat $ intersperse "\n" $ reverse $ csIndentLinesOut ps
          modify $ (\ps -> ps { csIndentLinesIn = [],
                                csIndentLinesOut = [] })
          return out
  where
    origLines = lines orig
    -- | When something goes wrong, figure out how far through the file we'd got.
    reportError :: String -> PassM ()
    reportError error
        =  do ps <- get
              let lineNumber = length origLines - length (csIndentLinesIn ps)
              die $ filename ++ ":" ++ show lineNumber ++ ": " ++ error
    -- | Get the next raw line from the input.
    getLine :: PassM (Maybe String)
    getLine
        =  do ps <- get
              case csIndentLinesIn ps of
                [] -> return Nothing
                (line:rest) ->
                  do put $ ps { csIndentLinesIn = rest }
                     return $ Just line
    -- | Add a line to the output.
    putLine :: String -> PassM ()
    putLine line
        = modify $ (\ps -> ps { csIndentLinesOut = line : csIndentLinesOut ps })
    -- | Append to the *previous* line added.
    addToLine :: String -> PassM ()
    addToLine s
        = modify $ (\ps -> ps { csIndentLinesOut =
                                  case csIndentLinesOut ps of (l:ls) -> ((l ++ s):ls) })
    -- | Given a line, read the rest of it, then return the complete thing.
    finishLine :: String -> String -> Bool -> Bool -> String -> PassM String
    finishLine left soFar inStr isChar afterStr
        = case (left, inStr, isChar) of
            ([], False, _) -> plainEOL
            ('-':'-':cs, False, _) -> plainEOL
            ([], True, _) -> die "end of line in string without continuation"
            (['*'], True, _) -> stringEOL
            ('\'':cs, False, _) -> finishLine cs (afterStr ++ ('\'':soFar)) True True ""
            ('\'':cs, True, True) -> finishLine cs (afterStr ++ ('\'':soFar)) False False ""
            ('"':cs, False, _) -> finishLine cs (afterStr ++ ('"':soFar)) True False ""
            ('"':cs, True, False) -> finishLine cs (afterStr ++ ('"':soFar)) False False ""
            ('*':'*':cs, True, _) -> finishLine cs ('*':'*':soFar) True isChar afterStr
            ('*':'"':cs, True, _) -> finishLine cs ('"':'*':soFar) True isChar afterStr
            ('*':'\'':cs, True, _) -> finishLine cs ('\'':'*':soFar) True isChar afterStr
            (c:cs, _, _) -> finishLine cs (c:soFar) inStr isChar afterStr
      where
        -- | Finish a regular line.
        plainEOL :: PassM String
        plainEOL
            =  do let s = reverse soFar
                  if hasContinuation s
                    then do l <- getLine >>= checkJust "no continuation line"
                            finishLine l ('\n':soFar) False False ""
                    else return s
        -- | Finish a line where we're in the middle of a string.
        stringEOL :: PassM String
        stringEOL
            =  do l <- getLine >>= checkJust "no string continuation line"
                  l' <- contStringStart l
                  -- When we hit the end of the string, add a \n after it to
                  -- make the line numbers match up again.
                  finishLine l' soFar True isChar ('\n':afterStr)
    -- | Does a line have a continuation line following it?
    hasContinuation :: String -> Bool
    hasContinuation s
        = case matchRegex contRE s of
            Just _ -> True
            Nothing -> False
      where
        -- FIXME This should probably be based on the list of operators and
        -- reserved words that the parser already has; for now this is the
        -- regexp that occamdoc uses.
        contRE = mkRegexWithOpts "(-|~|\\+|-|\\*|/|\\\\|/\\\\|\\\\/|><|=|<>|<|>|>=|<=|,|;|:=|<<|>>|([[:space:]](MINUS|BITNOT|NOT|SIZE|REM|PLUS|MINUS|TIMES|BITAND|BITOR|AND|OR|AFTER|FROM|FOR|IS|RETYPES|RESHAPES)))[[:space:]]*$" False True
    -- | Strip the spaces-then-star beginning off a string continuation line.
    contStringStart :: String -> PassM String
    contStringStart (' ':cs) = contStringStart cs
    contStringStart ('*':cs) = return cs
    contStringStart _ = die "string continuation line doesn't start with *"
    -- | Get the next *complete* line from the input, resolving continuations.
    readLine :: PassM (Maybe String)
    readLine
        =  do line <- getLine
              case line of
                Just s ->
                  do r <- finishLine s "" False False ""
                     return $ Just r
                Nothing -> return Nothing
    -- | Compute the indentation level of a line, and return it without the indentation.
    countIndent :: String -> Int -> PassM (Int, String)
    -- Tabs are 8 spaces.
    countIndent ('\t':cs) soFar = countIndent cs (soFar + 4)
    countIndent (' ':' ':cs) soFar = countIndent cs (soFar + 1)
    countIndent [' '] soFar = return (soFar, [])
    countIndent (' ':_) soFar
        = die "bad indentation (odd number of spaces)"
    countIndent cs soFar = return (soFar, cs)
    -- | Repeat a string N times.
    rep :: Int -> String -> String
    rep n s = concat $ take n (repeat s)
    -- | Process the next line from the input.
    nextLine :: Int -> PassM ()
    nextLine level
        =  do l <- readLine
              case l of
                Nothing -> return ()
                Just line ->
                  do (newLevel, stripped) <- countIndent line 0
                     addLine level newLevel line stripped
    -- | Once a line's been retrieved, add it to the output along with the
    -- appropriate markers, then go and process the next one.
    addLine :: Int -> Int -> String -> String -> PassM ()
    addLine level newLevel line stripped
      | stripped == "" =
        do putLine ""
           nextLine level
      | newLevel > level =
        do addToLine $ rep (newLevel - level) (" " ++ indentMarker)
           putLine $ line ++ " " ++ eolMarker
           nextLine newLevel
      | newLevel < level =
        do addToLine $ rep (level - newLevel) (" " ++ outdentMarker)
           putLine $ line ++ " " ++ eolMarker
           nextLine newLevel
      | otherwise =
        do putLine $ line ++ " " ++ eolMarker
           nextLine level
--- a/Main.hs
+++ b/Main.hs
@ -17,7 +17,7 @@ with this program.  If not, see <http://www.gnu.org/licenses/>.
 -}
 -- | Driver for the compiler.
-module Main where
+module Main (main) where
 import Control.Monad
 import Control.Monad.Error
@ -34,6 +34,7 @@ import GenerateC
 import GenerateCPPCSP
 import Parse
 import Pass
 import PreprocessOccam
 import PrettyShow
 import RainParse
 import RainPasses
@ -141,14 +142,10 @@ compile :: String -> PassM ()
 compile fn
  =  do optsPS <- get
        debug "{{{ Preprocess"
        loadSource fn
        debug "}}}"
        debug "{{{ Parse"
        progress "Parse"
        ast1 <- case csFrontend optsPS of
-          FrontendOccam -> parseProgram fn
+          FrontendOccam -> preprocessOccamProgram fn >>= parseOccamProgram
          FrontendRain -> parseRainProgram fn
        debugAST ast1
        debug "}}}"
--- a/5
+++ b/5
@ -1,4 +1,4 @@
-targets = tock tocktest lextest
+targets = tock tocktest
 all: $(targets)
@ -20,9 +20,6 @@ tock: $(sources)
 tocktest: $(sources)
 	ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain
 lextest: $(sources)
 	ghc $(ghc_opts) -o lextest -main-is PreprocessOccam --make PreprocessOccam
 CFLAGS = \
 	-O2 \
 	-g -Wall \
--- a/Parse.hs
+++ b/Parse.hs
@ -17,7 +17,7 @@ with this program.  If not, see <http://www.gnu.org/licenses/>.
 -}
 -- | Parse occam code into an AST.
-module Parse where
+module Parse (parseOccamProgram) where
 import Control.Monad (liftM, when)
 import Control.Monad.Error (runErrorT)
@ -26,26 +26,23 @@ import Data.List
 import qualified Data.Map as Map
 import Data.Maybe
 import Debug.Trace
 import qualified IO
 import Text.ParserCombinators.Parsec
-import Text.ParserCombinators.Parsec.Language (emptyDef)
+import Text.ParserCombinators.Parsec.Pos (newPos)
 import qualified Text.ParserCombinators.Parsec.Token as P
 import Text.Regex
 import qualified AST as A
 import CompState
 import Errors
 import EvalConstants
 import EvalLiterals
 import Indentation
 import Intrinsics
 import LexOccam
 import Metadata
 import Pass
 import Types
 import Utils
--{{{ setup stuff for Parsec
+--{{{ the parser monad
-type OccParser = GenParser Char CompState
+type OccParser = GenParser Token CompState
 -- | Make MonadState functions work in the parser monad.
 -- This came from <http://hackage.haskell.org/trac/ghc/ticket/1274> -- which means
@ -56,145 +53,48 @@ instance MonadState st (GenParser tok st) where
 instance Die (GenParser tok st) where
  die = fail
 occamStyle
    = emptyDef
    { P.commentLine    = "--"
    , P.nestedComments = False
    , P.identStart     = letter
    , P.identLetter    = alphaNum <|> char '.'
    , P.opStart        = oneOf "+-*/\\>=<~"
    , P.opLetter       = oneOf "/\\>=<"
    , P.reservedOpNames= [
                          "+",
                          "-",
                          "*",
                          "/",
                          "\\",
                          "/\\",
                          "\\/",
                          "><",
                          "<<",
                          ">>",
                          "=",
                          "<>",
                          "<",
                          ">",
                          ">=",
                          "<=",
                          "-",
                          "~"
                         ]
    , P.reservedNames  = [
                          "AFTER",
                          "ALT",
                          "AND",
                          "ANY",
                          "AT",
                          "BITAND",
                          "BITNOT",
                          "BITOR",
                          "BOOL",
                          "BYTE",
                          "BYTESIN",
                          "CASE",
                          "CHAN",
                          "DATA",
                          "ELSE",
                          "FALSE",
                          "FOR",
                          "FROM",
                          "FUNCTION",
                          "IF",
                          "IN",
                          "INLINE",
                          "INT",
                          "INT16",
                          "INT32",
                          "INT64",
                          "IS",
                          "MINUS",
                          "MOSTNEG",
                          "MOSTPOS",
                          "NOT",
                          "OF",
                          "OFFSETOF",
                          "OR",
                          "PACKED",
                          "PAR",
                          "PLACE",
                          "PLACED",
                          "PLUS",
                          "PORT",
                          "PRI",
                          "PROC",
                          "PROCESSOR",
                          "PROTOCOL",
                          "REAL32",
                          "REAL64",
                          "RECORD",
                          "REM",
                          "RESHAPES",
                          "RESULT",
                          "RETYPES",
                          "ROUND",
                          "SEQ",
                          "SIZE",
                          "SKIP",
                          "STOP",
                          "TIMER",
                          "TIMES",
                          "TRUE",
                          "TRUNC",
                          "TYPE",
                          "VAL",
                          "VALOF",
                          "WHILE",
                          "WORKSPACE",
                          "VECSPACE",
                          "#INCLUDE",
                          "#USE",
                          indentMarker,
                          outdentMarker,
                          eolMarker,
                          mainMarker
                         ]
    , P.caseSensitive  = True
    }
 lexer :: P.TokenParser CompState
 lexer  = P.makeTokenParser occamStyle
 -- XXX replace whitespace with something that doesn't eat \ns
 whiteSpace = P.whiteSpace lexer
 lexeme = P.lexeme lexer
 symbol = P.symbol lexer
 natural = P.natural lexer
 parens = P.parens lexer
 semi = P.semi lexer
 identifier = P.identifier lexer
 reserved = P.reserved lexer
 reservedOp = P.reservedOp lexer
 --}}}
 --{{{ matching rules for raw tokens
 -- | Extract source position from a `Token`.
 tokenPos :: Token -> SourcePos
 tokenPos (m, _) = metaToSourcePos m
 genToken :: (Token -> Maybe a) -> OccParser a
 genToken test = token show tokenPos test
 reserved :: String -> OccParser ()
 reserved name = genToken test
  where
    test (_, TokReserved name')
        = if name' == name then Just () else Nothing
    test _ = Nothing
 identifier :: OccParser String
 identifier = genToken test
  where
    test (_, TokIdentifier s) = Just s
    test _ = Nothing
 plainToken :: TokenType -> OccParser ()
 plainToken t = genToken test
  where
    test (_, t') = if t == t' then Just () else Nothing
 --}}}
 --{{{ symbols
-sLeft = try $ symbol "["
+sAmp = reserved "&"
-sRight = try $ symbol "]"
+sAssign = reserved ":="
-sLeftR = try $ symbol "("
+sBang = reserved "!"
-sRightR = try $ symbol ")"
+sColon = reserved ":"
-sAssign = try $ symbol ":="
+sColons = reserved "::"
-sColon = try $ symbol ":"
+sComma = reserved ","
-sColons = try $ symbol "::"
+sEq = reserved "="
-sComma = try $ symbol ","
+sLeft = reserved "["
-sSemi = try $ symbol ";"
+sLeftR = reserved "("
-sAmp = try $ symbol "&"
+sQuest = reserved "?"
-sQuest = try $ symbol "?"
+sRight = reserved "]"
-sBang = try $ symbol "!"
+sRightR = reserved ")"
-sEq = try $ symbol "="
+sSemi = reserved ";"
 sApos = try $ symbol "'"
 sQuote = try $ symbol "\""
 sHash = try $ symbol "#"
 --}}}
 --{{{ keywords
 sAFTER = reserved "AFTER"
@ -263,29 +163,40 @@ sVALOF = reserved "VALOF"
 sWHILE = reserved "WHILE"
 sWORKSPACE = reserved "WORKSPACE"
 sVECSPACE = reserved "VECSPACE"
 sppINCLUDE = reserved "#INCLUDE"
 sppUSE = reserved "#USE"
 --}}}
 --{{{ markers inserted by the preprocessor
-- XXX could handle VALOF by translating each step to one { and matching multiple ones?
+indent = do { plainToken Indent } <?> "indentation increase"
-mainMarker = "__main"
+outdent = do { plainToken Outdent } <?> "indentation decrease"
-
+eol = do { plainToken EndOfLine } <?> "end of line"
 sMainMarker = do { whiteSpace; reserved mainMarker } <?> "end of input (top-level process)"
 indent = do { whiteSpace; reserved indentMarker } <?> "indentation increase"
 outdent = do { whiteSpace; reserved outdentMarker } <?> "indentation decrease"
 eol = do { whiteSpace; reserved eolMarker } <?> "end of line"
 --}}}
 --{{{ helper functions
 md :: OccParser Meta
 md
  =  do pos <- getPosition
-        return Meta {
+        return $ sourcePosToMeta pos
-                 metaFile = Just $ sourceName pos,
+
 --{{{ Meta to/from SourcePos
 -- | Convert source position into Parsec's format.
 metaToSourcePos :: Meta -> SourcePos
 metaToSourcePos meta
    = newPos filename (metaLine meta) (metaColumn meta)
  where
    filename = case metaFile meta of
                 Just s -> s
                 Nothing -> ""
 -- | Convert source position out of Parsec's format.
 sourcePosToMeta :: SourcePos -> Meta
 sourcePosToMeta pos
    = emptyMeta {
        metaFile = case sourceName pos of
                     "" -> Nothing
                     s -> Just s,
        metaLine = sourceLine pos,
        metaColumn = sourceColumn pos
      }
 --}}}
 --{{{ try*
 -- These functions let you try a sequence of productions and only retrieve the
@ -335,12 +246,6 @@ tryXVVX a b c d = try (do { a; bv <- b; cv <- c; d; return (bv, cv) })
 tryVXXV :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, d)
 tryVXXV a b c d = try (do { av <- a; b; c; dv <- d; return (av, dv) })
 tryVXVX :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, c)
 tryVXVX a b c d = try (do { av <- a; b; cv <- c; d; return (av, cv) })
 tryVVXX :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, b)
 tryVVXX a b c d = try (do { av <- a; bv <- b; c; d; return (av, bv) })
 tryVVXV :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, b, d)
 tryVVXV a b c d = try (do { av <- a; bv <- b; c; dv <- d; return (av, bv, dv) })
@ -431,15 +336,6 @@ handleSpecs specs inner specMarker
           mapM scopeOutSpec ss'
           return $ foldl (\e s -> specMarker m s e) v ss'
 -- | Like sepBy1, but not eager: it won't consume the separator unless it finds
 -- another item after it.
 sepBy1NE :: OccParser a -> OccParser b -> OccParser [a]
 sepBy1NE item sep
    =  do i <- item
          rest <- option [] $ try (do sep
                                      sepBy1NE item sep)
          return $ i : rest
 -- | Run several different parsers with a separator between them.
 -- If you give it [a, b, c] and s, it'll parse [a, s, b, s, c] then
 -- give you back the results from [a, b, c].
@ -803,42 +699,31 @@ untypedLiteral
 real :: OccParser A.LiteralRepr
 real
    =   do m <- md
-           (l, r) <- tryVXVX digits (char '.') digits (char 'E')
+           genToken (test m)
           e <- lexeme occamExponent
           return $ A.RealLiteral m (l ++ "." ++ r ++ "E" ++ e)
    <|> do m <- md
           l <- tryVX digits (char '.')
           r <- lexeme digits
           return $ A.RealLiteral m (l ++ "." ++ r)
    <?> "real literal"
-
+  where
-occamExponent :: OccParser String
+    test m (_, TokRealLiteral s) = Just $ A.RealLiteral m s
-occamExponent
+    test _ _ = Nothing
    =  do c <- oneOf "+-"
          d <- digits
          return $ c : d
    <?> "exponent"
 integer :: OccParser A.LiteralRepr
 integer
    =  do m <- md
-          do { d <- lexeme digits; return $ A.IntLiteral m d }
+          genToken (test m)
            <|> do { d <- lexeme (sHash >> many1 hexDigit); return $ A.HexLiteral m d }
    <?> "integer literal"
-
+  where
-digits :: OccParser String
+    test m (_, TokIntLiteral s) = Just $ A.IntLiteral m s
-digits
+    test m (_, TokHexLiteral s) = Just $ A.HexLiteral m (drop 1 s)
-    =   many1 digit
+    test _ _ = Nothing
    <?> "decimal digits"
 byte :: OccParser A.LiteralRepr
 byte
    =  do m <- md
-          char '\''
+          genToken (test m)
          c <- literalCharacter
          sApos
          return c
    <?> "byte literal"
  where
    test m (_, TokCharLiteral s)
        = case splitStringLiteral m (chop 1 1 s) of [lr] -> Just lr
    test _ _ = Nothing
 -- | Parse a table -- an array literal which might be subscripted or sliced.
 -- (The implication of this is that the type of the expression this parses
@ -884,29 +769,45 @@ tableElems
 stringLiteral :: OccParser (A.LiteralRepr, A.Dimension)
 stringLiteral
    =  do m <- md
-          char '"'
+          cs <- stringCont <|> stringLit
-          cs <- manyTill literalCharacter sQuote
+          let aes = [A.ArrayElemExpr $ A.Literal m' A.Byte c
-          let aes = [A.ArrayElemExpr $ A.Literal m A.Byte c | c <- cs]
+                     | c@(A.ByteLiteral m' _) <- cs]
          return (A.ArrayLiteral m aes, A.Dimension $ length cs)
    <?> "string literal"
-
+  where
-character :: OccParser String
+    stringCont :: OccParser [A.LiteralRepr]
-character
+    stringCont
    =   do char '*'
           do char '#'
              a <- hexDigit
              b <- hexDigit
              return $ ['*', '#', a, b]
            <|> do { c <- anyChar; return ['*', c] }
    <|> do c <- anyChar
           return [c]
    <?> "character"
 literalCharacter :: OccParser A.LiteralRepr
 literalCharacter
        =  do m <- md
-           c <- character
+              s <- genToken test
-           return $ A.ByteLiteral m c
+              rest <- stringCont <|> stringLit
              return $ (splitStringLiteral m s) ++ rest
      where
        test (_, TokStringCont s) = Just (chop 1 2 s)
        test _ = Nothing
    stringLit :: OccParser [A.LiteralRepr]
    stringLit
        =  do m <- md
              s <- genToken test
              return $ splitStringLiteral m s
      where
        test (_, TokStringLiteral s) = Just (chop 1 1 s)
        test _ = Nothing
 -- | Parse a string literal.
 -- FIXME: This should decode the occam escapes.
 splitStringLiteral :: Meta -> String -> [A.LiteralRepr]
 splitStringLiteral m cs = ssl cs
  where
    ssl [] = []
    ssl ('*':'#':a:b:cs)
        = (A.ByteLiteral m ['*', '#', a, b]) : ssl cs
    ssl ('*':'\n':cs)
        = (A.ByteLiteral m $ tail $ dropWhile (/= '*') cs) : ssl cs
    ssl ('*':c:cs)
        = (A.ByteLiteral m ['*', c]) : ssl cs
    ssl (c:cs)
        = (A.ByteLiteral m [c]) : ssl cs
 --}}}
 --{{{ expressions
 expressionList :: [A.Type] -> OccParser A.ExpressionList
@ -1071,42 +972,42 @@ intrinsicFunctionSingle
 monadicOperator :: OccParser A.MonadicOp
 monadicOperator
-    =   do { reservedOp "-" <|> sMINUS; return A.MonadicSubtr }
+    =   do { reserved "-" <|> sMINUS; return A.MonadicSubtr }
-    <|> do { reservedOp "~" <|> sBITNOT; return A.MonadicBitNot }
+    <|> do { reserved "~" <|> sBITNOT; return A.MonadicBitNot }
    <|> do { sNOT; return A.MonadicNot }
    <?> "monadic operator"
 dyadicOperator :: OccParser A.DyadicOp
 dyadicOperator
-    =   do { reservedOp "+"; return A.Add }
+    =   do { reserved "+"; return A.Add }
-    <|> do { reservedOp "-"; return A.Subtr }
+    <|> do { reserved "-"; return A.Subtr }
-    <|> do { reservedOp "*"; return A.Mul }
+    <|> do { reserved "*"; return A.Mul }
-    <|> do { reservedOp "/"; return A.Div }
+    <|> do { reserved "/"; return A.Div }
-    <|> do { reservedOp "\\"; return A.Rem }
+    <|> do { reserved "\\"; return A.Rem }
    <|> do { sREM; return A.Rem }
    <|> do { sMINUS; return A.Minus }
-    <|> do { reservedOp "/\\" <|> sBITAND; return A.BitAnd }
+    <|> do { reserved "/\\" <|> sBITAND; return A.BitAnd }
-    <|> do { reservedOp "\\/" <|> sBITOR; return A.BitOr }
+    <|> do { reserved "\\/" <|> sBITOR; return A.BitOr }
-    <|> do { reservedOp "><"; return A.BitXor }
+    <|> do { reserved "><"; return A.BitXor }
    <?> "dyadic operator"
 -- These always need an INT on their right-hand side.
 shiftOperator :: OccParser A.DyadicOp
 shiftOperator
-    =   do { reservedOp "<<"; return A.LeftShift }
+    =   do { reserved "<<"; return A.LeftShift }
-    <|> do { reservedOp ">>"; return A.RightShift }
+    <|> do { reserved ">>"; return A.RightShift }
    <?> "shift operator"
 -- These always return a BOOL, so we have to deal with them specially for type
 -- context.
 comparisonOperator :: OccParser A.DyadicOp
 comparisonOperator
-    =   do { reservedOp "="; return A.Eq }
+    =   do { reserved "="; return A.Eq }
-    <|> do { reservedOp "<>"; return A.NotEq }
+    <|> do { reserved "<>"; return A.NotEq }
-    <|> do { reservedOp "<"; return A.Less }
+    <|> do { reserved "<"; return A.Less }
-    <|> do { reservedOp ">"; return A.More }
+    <|> do { reserved ">"; return A.More }
-    <|> do { reservedOp "<="; return A.LessEq }
+    <|> do { reserved "<="; return A.LessEq }
-    <|> do { reservedOp ">="; return A.MoreEq }
+    <|> do { reserved ">="; return A.MoreEq }
    <|> do { sAFTER; return A.After }
    <?> "comparison operator"
@ -1617,7 +1518,6 @@ process
    <|> mainProcess
    <|> handleSpecs (allocation <|> specification) process
                    (\m s p -> A.Seq m (A.Spec m s (A.OnlyP m p)))
    <|> preprocessorDirective
    <?> "process"
 --{{{ assignment (:=)
@ -2021,67 +1921,11 @@ intrinsicProc
          return $ A.IntrinsicProcCall m s as
    <?> "intrinsic PROC instance"
 --}}}
 --{{{ preprocessor directives
 preprocessorDirective :: OccParser A.Process
 preprocessorDirective
    =   ppInclude
    <|> ppUse
    <|> unknownPP
    <?> "preprocessor directive"
 ppInclude :: OccParser A.Process
 ppInclude
    =  do sppINCLUDE
          char '"'
          file <- manyTill character sQuote
          eol
          includeFile $ concat file
    <?> "#INCLUDE directive"
 ppUse :: OccParser A.Process
 ppUse
    =  do sppUSE
          char '"'
          mod <- manyTill character sQuote
          eol
          let file = mangleModName $ concat mod
          -- Check whether it's been included already.
          ps <- getState
          if file `elem` csLoadedFiles ps
            then process
            else includeFile file
    <?> "#USE directive"
 -- | Invoke the parser recursively to handle an included file.
 includeFile :: String -> OccParser A.Process
 includeFile file
    =  do ps <- getState
          (r, ps') <- parseFile file includedFile ps
          case r of
            Left p ->
              do setState ps'
                 return p
            Right f ->
              do setState ps' { csLocalNames = csMainLocals ps' }
                 p <- process
                 return $ f p
 unknownPP :: OccParser A.Process
 unknownPP
    =  do m <- md
          char '#'
          rest <- manyTill anyChar (try eol)
          addWarning m $ "unknown preprocessor directive ignored: " ++ rest
          process
    <?> "unknown preprocessor directive"
 --}}}
 --{{{ main process
 mainProcess :: OccParser A.Process
 mainProcess
    =  do m <- md
-          sMainMarker
+          eof
          eol
          -- Stash the current locals so that we can either restore them
          -- when we get back to the file we included this one from, or
          -- pull the TLP name from them at the end.
@ -2096,100 +1940,26 @@ mainProcess
 -- have the earlier ones in scope, so we can't parse them separately.
 sourceFile :: OccParser (A.Process, CompState)
 sourceFile
-    =   do whiteSpace
+    =   do p <- process
           p <- process
           s <- getState
           return (p, s)
 -- | An included file is either a process, or a bunch of specs that can be
 -- applied to a process (which we return as a function). This is likewise a bit
 -- of a cheat, in that included files should really be *textually* included,
 -- but it's good enough for most reasonable uses.
 includedFile :: OccParser (Either A.Process (A.Process -> A.Process), CompState)
 includedFile
    =  do whiteSpace
          p <- process
          s <- getState
          do eof
             return $ (Right $ replaceMain p, s)
           <|> do sMainMarker
                  eol
                  return $ (Left p, s)
  where
    replaceMain :: A.Process -> A.Process -> A.Process
    replaceMain (A.Seq m (A.Spec m' s (A.OnlyP m'' p))) np
        = A.Seq m (A.Spec m' s (A.OnlyP m'' (replaceMain p np)))
    replaceMain (A.Main _) np = np
 --}}}
 --}}}
 --{{{  preprocessor
 -- XXX Doesn't handle conditionals.
 -- | Find (via a nasty regex search) all the files that this source file includes.
 preFindIncludes :: String -> [String]
 preFindIncludes source
    = concat [case matchRegex incRE l of
                Just [_, fn] -> [fn]
                Nothing -> []
              | l <- lines source]
  where
    incRE = mkRegex "^ *#(INCLUDE|USE) +\"([^\"]*)\""
 -- | If a module name doesn't already have a suffix, add one.
 mangleModName :: String -> String
 mangleModName mod
    = if ".occ" `isSuffixOf` mod || ".inc" `isSuffixOf` mod
        then mod
        else mod ++ ".occ"
 -- | Load all the source files necessary for a program.
 -- We have to do this now, before entering the parser, because the parser
 -- doesn't run in the IO monad. If there were a monad transformer version of
 -- Parsec then we could just open files as we need them.
 loadSource :: String -> PassM ()
 loadSource file = load file file
  where
    load :: String -> String -> PassM ()
    load file realName
        =  do ps <- get
              case Map.lookup file (csSourceFiles ps) of
                Just _ -> return ()
                Nothing ->
                  do progress $ "Loading source file " ++ realName
                     rawSource <- liftIO $ readFile realName
                     source <- removeIndentation realName (rawSource ++ "\n" ++ mainMarker ++ "\n")
                     debug $ "Preprocessed source:"
                     debug $ numberLines source
                     modify $ (\ps -> ps { csSourceFiles = Map.insert file source (csSourceFiles ps) })
                     let deps = map mangleModName $ preFindIncludes source
                     sequence_ [load dep (joinPath realName dep) | dep <- deps]
 --}}}
 --{{{  entry points for the parser itself
-- | Test a parser production (for use from ghci while debugging the parser).
+-- | Parse a token stream with the given production.
-testParse :: Show a => OccParser a -> String -> IO ()
+runTockParser :: [Token] -> OccParser t -> CompState -> PassM t
-testParse prod text
+runTockParser toks prod cs
-    = do let r = runParser prod emptyState "" text
+    =  do case runParser prod cs "irrelevant filename" toks of
-         putStrLn $ "Result: " ++ show r
+            Left err -> die $ "Parse error: " ++ show err
 -- | Parse a file with the given production.
 parseFile :: Monad m => String -> OccParser t -> CompState -> m t
 parseFile file prod ps
    =  do let source = case Map.lookup file (csSourceFiles ps) of
                         Just s -> s
                         Nothing -> dieIO $ "Failed to preload file: " ++ show file
          let ps' = ps { csLoadedFiles = file : csLoadedFiles ps }
          case runParser prod ps' file source of
            Left err -> dieIO $ "Parse error: " ++ show err
            Right r -> return r
-- | Parse the top level source file in a program.
+-- | Parse an occam program.
-parseProgram :: String -> PassM A.Process
+parseOccamProgram :: [Token] -> PassM A.Process
-parseProgram file
+parseOccamProgram toks
-    =  do ps <- get
+    =  do cs <- get
-          (p, ps') <- parseFile file sourceFile ps
+          (p, cs') <- runTockParser toks sourceFile cs
-          put ps'
+          put cs'
          return p
 --}}}
--- a/PreprocessOccam.hs
+++ b/PreprocessOccam.hs
@ -17,26 +17,23 @@ with this program.  If not, see <http://www.gnu.org/licenses/>.
 -}
 -- | Preprocess occam code.
-module PreprocessOccam where
+module PreprocessOccam (preprocessOccamProgram) where
 import Control.Monad.State
 import Data.List
 import qualified Data.Set as Set
 import System.IO
 import Text.Regex
 import CompState
 import Errors
 import LexOccam
 import Metadata
 import Pass
 import PrettyShow
 import StructureOccam
 import Utils
 import CompState
 import Control.Monad.Error
 import Control.Monad.State
 import System
 import System.IO
 import PrettyShow
 -- | Open an included file, looking for it in the search path.
 -- Return the open filehandle and the location of the file.
 -- FIXME: This doesn't actually look at the search path yet.
@ -59,21 +56,38 @@ searchFile m filename
 preprocessFile :: Meta -> String -> PassM [Token]
 preprocessFile m filename
    =  do (handle, realFilename) <- searchFile m filename
-          liftIO $ putStrLn $ "Loading " ++ realFilename
+          progress $ "Loading source file " ++ realFilename
          origCS <- get
          modify (\cs -> cs { csCurrentFile = realFilename })
          s <- liftIO $ hGetContents handle
-          toks <- runLexer realFilename s >>= structureOccam >>= preprocessOccam
+          toks <- runLexer realFilename s
          veryDebug $ "{{{ lexer tokens"
          veryDebug $ pshow toks
          veryDebug $ "}}}"
          toks' <- structureOccam toks
          veryDebug $ "{{{ structured tokens"
          veryDebug $ pshow toks'
          veryDebug $ "}}}"
          toks'' <- preprocessOccam toks'
          veryDebug $ "{{{ preprocessed tokens"
          veryDebug $ pshow toks''
          veryDebug $ "}}}"
          modify (\cs -> cs { csCurrentFile = csCurrentFile origCS })
-          return toks
+          return toks''
 -- | Preprocess a token stream.
 preprocessOccam :: [Token] -> PassM [Token]
 preprocessOccam [] = return []
-preprocessOccam ((m, TokPreprocessor ('#':s)):(_, EndOfLine):ts)
+preprocessOccam ((m, TokPreprocessor s):(_, EndOfLine):ts)
-    =  do func <- handleDirective m s
+    =  do func <- handleDirective m (stripPrefix s)
          rest <- preprocessOccam ts
          return $ func rest
  where
    stripPrefix :: String -> String
    stripPrefix (' ':cs) = stripPrefix cs
    stripPrefix ('\t':cs) = stripPrefix cs
    stripPrefix ('#':cs) = cs
    stripPrefix _ = error "bad TokPreprocessor prefix"
 -- Check the above case didn't miss something.
 preprocessOccam ((_, TokPreprocessor _):_)
    = error "bad TokPreprocessor token"
@ -134,17 +148,12 @@ handleUse m [modName]
            else mod ++ ".occ"
 --}}}
-- | Main function for testing.
+-- | Load and preprocess an occam program.
-main :: IO ()
+preprocessOccamProgram :: String -> PassM [Token]
-main
+preprocessOccamProgram filename
-    =  do (arg:_) <- getArgs
+    =  do toks <- preprocessFile emptyMeta filename
-          v <- evalStateT (runErrorT (test arg)) emptyState
+          veryDebug $ "{{{ tokenised source"
-          case v of
+          veryDebug $ pshow toks
-            Left e -> dieIO e
+          veryDebug $ "}}}"
-            Right r -> return ()
+          return toks
  where
    test :: String -> PassM ()
    test fn
        = do tokens <- preprocessFile emptyMeta fn
             liftIO $ putStrLn $ pshow tokens
--- a/RainParse.hs
+++ b/RainParse.hs
@ -43,7 +43,6 @@ import CompState
 import Errors
 import EvalConstants
 import EvalLiterals
 import Indentation
 import Intrinsics
 import Metadata
 import Pass
@ -296,21 +295,13 @@ rainSourceFile
           s <- getState
           return (A.Seq emptyMeta p, s)
-- | Parse a file with the given production.
+-- | Load and parse a Rain source file.
 -- This is copied from Parse.hs (because OccParser is about to be changed to not be the same as RainParser):
 parseFile :: Monad m => String -> RainParser t -> CompState -> m t
 parseFile file prod ps
    =  do let source = case Map.lookup file (csSourceFiles ps) of
                         Just s -> s
                         Nothing -> dieIO $ "Failed to preload file: " ++ show file
          let ps' = ps { csLoadedFiles = file : csLoadedFiles ps }
          case runParser prod ps' file source of
            Left err -> dieIO $ "Parse error: " ++ show err
            Right r -> return r
 parseRainProgram :: String -> PassM A.Process
-parseRainProgram file
+parseRainProgram filename
-    =  do ps <- get
+    =  do source <- liftIO $ readFile filename
-          (p, ps') <- parseFile file rainSourceFile ps
+          cs <- get
-          put ps'
+          case runParser rainSourceFile cs filename source of
            Left err -> dieIO $ "Parse error: " ++ show err
            Right (p, cs') ->
              do put cs'
                 return p
--- a/Utils.hs
+++ b/Utils.hs
@ -66,3 +66,7 @@ transformEither funcLeft funcRight x = case x of
 maybeIO :: IO a -> IO (Maybe a)
 maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing)
 -- | Remove a number of items from the start and end of a list.
 chop :: Int -> Int -> [a] -> [a]
 chop start end s = drop start (take (length s - end) s)
--- a/testcases/stringlit.occ
+++ b/testcases/stringlit.occ
@ -10,6 +10,6 @@ PROC P ()
  VAL []BYTE mls IS "first*
                    *second*
                    *third":
-  VAL [5][5]BYTE square IS ["sator", "arepo", "tenas", "opera", "rotas"]:
+  VAL [5][5]BYTE square IS ["sator", "arepo", "tenat", "opera", "rotas"]:
  SKIP
 :