diff --git a/CompState.hs b/CompState.hs index d2d49b6..7e6d2ce 100644 --- a/CompState.hs +++ b/CompState.hs @@ -50,15 +50,10 @@ data CompState = CompState { csVerboseLevel :: Int, csOutputFile :: String, - -- Set by (new) preprocessor + -- Set by preprocessor csCurrentFile :: String, csUsedFiles :: Set.Set String, - -- Set by (old) preprocessor - csSourceFiles :: Map String String, - csIndentLinesIn :: [String], - csIndentLinesOut :: [String], - -- Set by Parse csLocalNames :: [(String, A.Name)], csMainLocals :: [(String, A.Name)], @@ -93,10 +88,6 @@ emptyState = CompState { csCurrentFile = "none", csUsedFiles = Set.empty, - csSourceFiles = Map.empty, - csIndentLinesIn = [], - csIndentLinesOut = [], - csLocalNames = [], csMainLocals = [], csNames = Map.empty, diff --git a/Indentation.hs b/Indentation.hs deleted file mode 100644 index 770f612..0000000 --- a/Indentation.hs +++ /dev/null @@ -1,191 +0,0 @@ -{- -Tock: a compiler for parallel languages -Copyright (C) 2007 University of Kent - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation, either version 2 of the License, or (at your -option) any later version. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program. If not, see . --} - --- | Parse indentation in occam source. -module Indentation (removeIndentation, indentMarker, outdentMarker, eolMarker) where - -import Control.Monad -import Control.Monad.Error -import Control.Monad.State -import Data.List -import Text.Regex - -import CompState -import Errors -import Pass - --- FIXME When this joins continuation lines, it should stash the details of --- what it joined into CompState so that error reporting later on can --- reconstruct the original position. - -indentMarker = "__indent" -outdentMarker = "__outdent" -eolMarker = "__eol" - --- FIXME: There's probably a nicer way of doing this. --- (Well, trivially, use a WriterT...) - --- | Preprocess occam source code to remove comments and turn indentation into --- explicit markers. -removeIndentation :: String -> String -> PassM String -removeIndentation filename orig - = do modify $ (\ps -> ps { csIndentLinesIn = origLines, - csIndentLinesOut = [] }) - catchError (nextLine 0) reportError - ps <- get - let out = concat $ intersperse "\n" $ reverse $ csIndentLinesOut ps - modify $ (\ps -> ps { csIndentLinesIn = [], - csIndentLinesOut = [] }) - return out - where - origLines = lines orig - - -- | When something goes wrong, figure out how far through the file we'd got. - reportError :: String -> PassM () - reportError error - = do ps <- get - let lineNumber = length origLines - length (csIndentLinesIn ps) - die $ filename ++ ":" ++ show lineNumber ++ ": " ++ error - - -- | Get the next raw line from the input. - getLine :: PassM (Maybe String) - getLine - = do ps <- get - case csIndentLinesIn ps of - [] -> return Nothing - (line:rest) -> - do put $ ps { csIndentLinesIn = rest } - return $ Just line - - -- | Add a line to the output. - putLine :: String -> PassM () - putLine line - = modify $ (\ps -> ps { csIndentLinesOut = line : csIndentLinesOut ps }) - - -- | Append to the *previous* line added. - addToLine :: String -> PassM () - addToLine s - = modify $ (\ps -> ps { csIndentLinesOut = - case csIndentLinesOut ps of (l:ls) -> ((l ++ s):ls) }) - - -- | Given a line, read the rest of it, then return the complete thing. - finishLine :: String -> String -> Bool -> Bool -> String -> PassM String - finishLine left soFar inStr isChar afterStr - = case (left, inStr, isChar) of - ([], False, _) -> plainEOL - ('-':'-':cs, False, _) -> plainEOL - ([], True, _) -> die "end of line in string without continuation" - (['*'], True, _) -> stringEOL - ('\'':cs, False, _) -> finishLine cs (afterStr ++ ('\'':soFar)) True True "" - ('\'':cs, True, True) -> finishLine cs (afterStr ++ ('\'':soFar)) False False "" - ('"':cs, False, _) -> finishLine cs (afterStr ++ ('"':soFar)) True False "" - ('"':cs, True, False) -> finishLine cs (afterStr ++ ('"':soFar)) False False "" - ('*':'*':cs, True, _) -> finishLine cs ('*':'*':soFar) True isChar afterStr - ('*':'"':cs, True, _) -> finishLine cs ('"':'*':soFar) True isChar afterStr - ('*':'\'':cs, True, _) -> finishLine cs ('\'':'*':soFar) True isChar afterStr - (c:cs, _, _) -> finishLine cs (c:soFar) inStr isChar afterStr - where - -- | Finish a regular line. - plainEOL :: PassM String - plainEOL - = do let s = reverse soFar - if hasContinuation s - then do l <- getLine >>= checkJust "no continuation line" - finishLine l ('\n':soFar) False False "" - else return s - - -- | Finish a line where we're in the middle of a string. - stringEOL :: PassM String - stringEOL - = do l <- getLine >>= checkJust "no string continuation line" - l' <- contStringStart l - -- When we hit the end of the string, add a \n after it to - -- make the line numbers match up again. - finishLine l' soFar True isChar ('\n':afterStr) - - -- | Does a line have a continuation line following it? - hasContinuation :: String -> Bool - hasContinuation s - = case matchRegex contRE s of - Just _ -> True - Nothing -> False - where - -- FIXME This should probably be based on the list of operators and - -- reserved words that the parser already has; for now this is the - -- regexp that occamdoc uses. - contRE = mkRegexWithOpts "(-|~|\\+|-|\\*|/|\\\\|/\\\\|\\\\/|><|=|<>|<|>|>=|<=|,|;|:=|<<|>>|([[:space:]](MINUS|BITNOT|NOT|SIZE|REM|PLUS|MINUS|TIMES|BITAND|BITOR|AND|OR|AFTER|FROM|FOR|IS|RETYPES|RESHAPES)))[[:space:]]*$" False True - - -- | Strip the spaces-then-star beginning off a string continuation line. - contStringStart :: String -> PassM String - contStringStart (' ':cs) = contStringStart cs - contStringStart ('*':cs) = return cs - contStringStart _ = die "string continuation line doesn't start with *" - - -- | Get the next *complete* line from the input, resolving continuations. - readLine :: PassM (Maybe String) - readLine - = do line <- getLine - case line of - Just s -> - do r <- finishLine s "" False False "" - return $ Just r - Nothing -> return Nothing - - -- | Compute the indentation level of a line, and return it without the indentation. - countIndent :: String -> Int -> PassM (Int, String) - -- Tabs are 8 spaces. - countIndent ('\t':cs) soFar = countIndent cs (soFar + 4) - countIndent (' ':' ':cs) soFar = countIndent cs (soFar + 1) - countIndent [' '] soFar = return (soFar, []) - countIndent (' ':_) soFar - = die "bad indentation (odd number of spaces)" - countIndent cs soFar = return (soFar, cs) - - -- | Repeat a string N times. - rep :: Int -> String -> String - rep n s = concat $ take n (repeat s) - - -- | Process the next line from the input. - nextLine :: Int -> PassM () - nextLine level - = do l <- readLine - case l of - Nothing -> return () - Just line -> - do (newLevel, stripped) <- countIndent line 0 - addLine level newLevel line stripped - - -- | Once a line's been retrieved, add it to the output along with the - -- appropriate markers, then go and process the next one. - addLine :: Int -> Int -> String -> String -> PassM () - addLine level newLevel line stripped - | stripped == "" = - do putLine "" - nextLine level - | newLevel > level = - do addToLine $ rep (newLevel - level) (" " ++ indentMarker) - putLine $ line ++ " " ++ eolMarker - nextLine newLevel - | newLevel < level = - do addToLine $ rep (level - newLevel) (" " ++ outdentMarker) - putLine $ line ++ " " ++ eolMarker - nextLine newLevel - | otherwise = - do putLine $ line ++ " " ++ eolMarker - nextLine level - diff --git a/Main.hs b/Main.hs index 7a718f4..2263402 100644 --- a/Main.hs +++ b/Main.hs @@ -17,7 +17,7 @@ with this program. If not, see . -} -- | Driver for the compiler. -module Main where +module Main (main) where import Control.Monad import Control.Monad.Error @@ -34,6 +34,7 @@ import GenerateC import GenerateCPPCSP import Parse import Pass +import PreprocessOccam import PrettyShow import RainParse import RainPasses @@ -141,14 +142,10 @@ compile :: String -> PassM () compile fn = do optsPS <- get - debug "{{{ Preprocess" - loadSource fn - debug "}}}" - debug "{{{ Parse" progress "Parse" ast1 <- case csFrontend optsPS of - FrontendOccam -> parseProgram fn + FrontendOccam -> preprocessOccamProgram fn >>= parseOccamProgram FrontendRain -> parseRainProgram fn debugAST ast1 debug "}}}" diff --git a/Makefile b/Makefile index 7657a4f..c16e7b2 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -targets = tock tocktest lextest +targets = tock tocktest all: $(targets) @@ -20,9 +20,6 @@ tock: $(sources) tocktest: $(sources) ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain -lextest: $(sources) - ghc $(ghc_opts) -o lextest -main-is PreprocessOccam --make PreprocessOccam - CFLAGS = \ -O2 \ -g -Wall \ diff --git a/Parse.hs b/Parse.hs index c05e945..f14daf3 100644 --- a/Parse.hs +++ b/Parse.hs @@ -17,7 +17,7 @@ with this program. If not, see . -} -- | Parse occam code into an AST. -module Parse where +module Parse (parseOccamProgram) where import Control.Monad (liftM, when) import Control.Monad.Error (runErrorT) @@ -26,26 +26,23 @@ import Data.List import qualified Data.Map as Map import Data.Maybe import Debug.Trace -import qualified IO import Text.ParserCombinators.Parsec -import Text.ParserCombinators.Parsec.Language (emptyDef) -import qualified Text.ParserCombinators.Parsec.Token as P -import Text.Regex +import Text.ParserCombinators.Parsec.Pos (newPos) import qualified AST as A import CompState import Errors import EvalConstants import EvalLiterals -import Indentation import Intrinsics +import LexOccam import Metadata import Pass import Types import Utils ---{{{ setup stuff for Parsec -type OccParser = GenParser Char CompState +--{{{ the parser monad +type OccParser = GenParser Token CompState -- | Make MonadState functions work in the parser monad. -- This came from -- which means @@ -56,145 +53,48 @@ instance MonadState st (GenParser tok st) where instance Die (GenParser tok st) where die = fail - -occamStyle - = emptyDef - { P.commentLine = "--" - , P.nestedComments = False - , P.identStart = letter - , P.identLetter = alphaNum <|> char '.' - , P.opStart = oneOf "+-*/\\>=<~" - , P.opLetter = oneOf "/\\>=<" - , P.reservedOpNames= [ - "+", - "-", - "*", - "/", - "\\", - "/\\", - "\\/", - "><", - "<<", - ">>", - "=", - "<>", - "<", - ">", - ">=", - "<=", - "-", - "~" - ] - , P.reservedNames = [ - "AFTER", - "ALT", - "AND", - "ANY", - "AT", - "BITAND", - "BITNOT", - "BITOR", - "BOOL", - "BYTE", - "BYTESIN", - "CASE", - "CHAN", - "DATA", - "ELSE", - "FALSE", - "FOR", - "FROM", - "FUNCTION", - "IF", - "IN", - "INLINE", - "INT", - "INT16", - "INT32", - "INT64", - "IS", - "MINUS", - "MOSTNEG", - "MOSTPOS", - "NOT", - "OF", - "OFFSETOF", - "OR", - "PACKED", - "PAR", - "PLACE", - "PLACED", - "PLUS", - "PORT", - "PRI", - "PROC", - "PROCESSOR", - "PROTOCOL", - "REAL32", - "REAL64", - "RECORD", - "REM", - "RESHAPES", - "RESULT", - "RETYPES", - "ROUND", - "SEQ", - "SIZE", - "SKIP", - "STOP", - "TIMER", - "TIMES", - "TRUE", - "TRUNC", - "TYPE", - "VAL", - "VALOF", - "WHILE", - "WORKSPACE", - "VECSPACE", - "#INCLUDE", - "#USE", - indentMarker, - outdentMarker, - eolMarker, - mainMarker - ] - , P.caseSensitive = True - } - -lexer :: P.TokenParser CompState -lexer = P.makeTokenParser occamStyle - --- XXX replace whitespace with something that doesn't eat \ns - -whiteSpace = P.whiteSpace lexer -lexeme = P.lexeme lexer -symbol = P.symbol lexer -natural = P.natural lexer -parens = P.parens lexer -semi = P.semi lexer -identifier = P.identifier lexer -reserved = P.reserved lexer -reservedOp = P.reservedOp lexer --}}} +--{{{ matching rules for raw tokens +-- | Extract source position from a `Token`. +tokenPos :: Token -> SourcePos +tokenPos (m, _) = metaToSourcePos m + +genToken :: (Token -> Maybe a) -> OccParser a +genToken test = token show tokenPos test + +reserved :: String -> OccParser () +reserved name = genToken test + where + test (_, TokReserved name') + = if name' == name then Just () else Nothing + test _ = Nothing + +identifier :: OccParser String +identifier = genToken test + where + test (_, TokIdentifier s) = Just s + test _ = Nothing + +plainToken :: TokenType -> OccParser () +plainToken t = genToken test + where + test (_, t') = if t == t' then Just () else Nothing +--}}} --{{{ symbols -sLeft = try $ symbol "[" -sRight = try $ symbol "]" -sLeftR = try $ symbol "(" -sRightR = try $ symbol ")" -sAssign = try $ symbol ":=" -sColon = try $ symbol ":" -sColons = try $ symbol "::" -sComma = try $ symbol "," -sSemi = try $ symbol ";" -sAmp = try $ symbol "&" -sQuest = try $ symbol "?" -sBang = try $ symbol "!" -sEq = try $ symbol "=" -sApos = try $ symbol "'" -sQuote = try $ symbol "\"" -sHash = try $ symbol "#" +sAmp = reserved "&" +sAssign = reserved ":=" +sBang = reserved "!" +sColon = reserved ":" +sColons = reserved "::" +sComma = reserved "," +sEq = reserved "=" +sLeft = reserved "[" +sLeftR = reserved "(" +sQuest = reserved "?" +sRight = reserved "]" +sRightR = reserved ")" +sSemi = reserved ";" --}}} --{{{ keywords sAFTER = reserved "AFTER" @@ -263,29 +163,40 @@ sVALOF = reserved "VALOF" sWHILE = reserved "WHILE" sWORKSPACE = reserved "WORKSPACE" sVECSPACE = reserved "VECSPACE" -sppINCLUDE = reserved "#INCLUDE" -sppUSE = reserved "#USE" --}}} --{{{ markers inserted by the preprocessor --- XXX could handle VALOF by translating each step to one { and matching multiple ones? -mainMarker = "__main" - -sMainMarker = do { whiteSpace; reserved mainMarker } "end of input (top-level process)" - -indent = do { whiteSpace; reserved indentMarker } "indentation increase" -outdent = do { whiteSpace; reserved outdentMarker } "indentation decrease" -eol = do { whiteSpace; reserved eolMarker } "end of line" +indent = do { plainToken Indent } "indentation increase" +outdent = do { plainToken Outdent } "indentation decrease" +eol = do { plainToken EndOfLine } "end of line" --}}} --{{{ helper functions md :: OccParser Meta md = do pos <- getPosition - return Meta { - metaFile = Just $ sourceName pos, - metaLine = sourceLine pos, - metaColumn = sourceColumn pos - } + return $ sourcePosToMeta pos + +--{{{ Meta to/from SourcePos +-- | Convert source position into Parsec's format. +metaToSourcePos :: Meta -> SourcePos +metaToSourcePos meta + = newPos filename (metaLine meta) (metaColumn meta) + where + filename = case metaFile meta of + Just s -> s + Nothing -> "" + +-- | Convert source position out of Parsec's format. +sourcePosToMeta :: SourcePos -> Meta +sourcePosToMeta pos + = emptyMeta { + metaFile = case sourceName pos of + "" -> Nothing + s -> Just s, + metaLine = sourceLine pos, + metaColumn = sourceColumn pos + } +--}}} --{{{ try* -- These functions let you try a sequence of productions and only retrieve the @@ -335,12 +246,6 @@ tryXVVX a b c d = try (do { a; bv <- b; cv <- c; d; return (bv, cv) }) tryVXXV :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, d) tryVXXV a b c d = try (do { av <- a; b; c; dv <- d; return (av, dv) }) -tryVXVX :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, c) -tryVXVX a b c d = try (do { av <- a; b; cv <- c; d; return (av, cv) }) - -tryVVXX :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, b) -tryVVXX a b c d = try (do { av <- a; bv <- b; c; d; return (av, bv) }) - tryVVXV :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, b, d) tryVVXV a b c d = try (do { av <- a; bv <- b; c; dv <- d; return (av, bv, dv) }) @@ -431,15 +336,6 @@ handleSpecs specs inner specMarker mapM scopeOutSpec ss' return $ foldl (\e s -> specMarker m s e) v ss' --- | Like sepBy1, but not eager: it won't consume the separator unless it finds --- another item after it. -sepBy1NE :: OccParser a -> OccParser b -> OccParser [a] -sepBy1NE item sep - = do i <- item - rest <- option [] $ try (do sep - sepBy1NE item sep) - return $ i : rest - -- | Run several different parsers with a separator between them. -- If you give it [a, b, c] and s, it'll parse [a, s, b, s, c] then -- give you back the results from [a, b, c]. @@ -803,42 +699,31 @@ untypedLiteral real :: OccParser A.LiteralRepr real = do m <- md - (l, r) <- tryVXVX digits (char '.') digits (char 'E') - e <- lexeme occamExponent - return $ A.RealLiteral m (l ++ "." ++ r ++ "E" ++ e) - <|> do m <- md - l <- tryVX digits (char '.') - r <- lexeme digits - return $ A.RealLiteral m (l ++ "." ++ r) + genToken (test m) "real literal" - -occamExponent :: OccParser String -occamExponent - = do c <- oneOf "+-" - d <- digits - return $ c : d - "exponent" + where + test m (_, TokRealLiteral s) = Just $ A.RealLiteral m s + test _ _ = Nothing integer :: OccParser A.LiteralRepr integer = do m <- md - do { d <- lexeme digits; return $ A.IntLiteral m d } - <|> do { d <- lexeme (sHash >> many1 hexDigit); return $ A.HexLiteral m d } + genToken (test m) "integer literal" - -digits :: OccParser String -digits - = many1 digit - "decimal digits" + where + test m (_, TokIntLiteral s) = Just $ A.IntLiteral m s + test m (_, TokHexLiteral s) = Just $ A.HexLiteral m (drop 1 s) + test _ _ = Nothing byte :: OccParser A.LiteralRepr byte = do m <- md - char '\'' - c <- literalCharacter - sApos - return c + genToken (test m) "byte literal" + where + test m (_, TokCharLiteral s) + = case splitStringLiteral m (chop 1 1 s) of [lr] -> Just lr + test _ _ = Nothing -- | Parse a table -- an array literal which might be subscripted or sliced. -- (The implication of this is that the type of the expression this parses @@ -884,29 +769,45 @@ tableElems stringLiteral :: OccParser (A.LiteralRepr, A.Dimension) stringLiteral = do m <- md - char '"' - cs <- manyTill literalCharacter sQuote - let aes = [A.ArrayElemExpr $ A.Literal m A.Byte c | c <- cs] + cs <- stringCont <|> stringLit + let aes = [A.ArrayElemExpr $ A.Literal m' A.Byte c + | c@(A.ByteLiteral m' _) <- cs] return (A.ArrayLiteral m aes, A.Dimension $ length cs) "string literal" + where + stringCont :: OccParser [A.LiteralRepr] + stringCont + = do m <- md + s <- genToken test + rest <- stringCont <|> stringLit + return $ (splitStringLiteral m s) ++ rest + where + test (_, TokStringCont s) = Just (chop 1 2 s) + test _ = Nothing -character :: OccParser String -character - = do char '*' - do char '#' - a <- hexDigit - b <- hexDigit - return $ ['*', '#', a, b] - <|> do { c <- anyChar; return ['*', c] } - <|> do c <- anyChar - return [c] - "character" + stringLit :: OccParser [A.LiteralRepr] + stringLit + = do m <- md + s <- genToken test + return $ splitStringLiteral m s + where + test (_, TokStringLiteral s) = Just (chop 1 1 s) + test _ = Nothing -literalCharacter :: OccParser A.LiteralRepr -literalCharacter - = do m <- md - c <- character - return $ A.ByteLiteral m c +-- | Parse a string literal. +-- FIXME: This should decode the occam escapes. +splitStringLiteral :: Meta -> String -> [A.LiteralRepr] +splitStringLiteral m cs = ssl cs + where + ssl [] = [] + ssl ('*':'#':a:b:cs) + = (A.ByteLiteral m ['*', '#', a, b]) : ssl cs + ssl ('*':'\n':cs) + = (A.ByteLiteral m $ tail $ dropWhile (/= '*') cs) : ssl cs + ssl ('*':c:cs) + = (A.ByteLiteral m ['*', c]) : ssl cs + ssl (c:cs) + = (A.ByteLiteral m [c]) : ssl cs --}}} --{{{ expressions expressionList :: [A.Type] -> OccParser A.ExpressionList @@ -1071,42 +972,42 @@ intrinsicFunctionSingle monadicOperator :: OccParser A.MonadicOp monadicOperator - = do { reservedOp "-" <|> sMINUS; return A.MonadicSubtr } - <|> do { reservedOp "~" <|> sBITNOT; return A.MonadicBitNot } + = do { reserved "-" <|> sMINUS; return A.MonadicSubtr } + <|> do { reserved "~" <|> sBITNOT; return A.MonadicBitNot } <|> do { sNOT; return A.MonadicNot } "monadic operator" dyadicOperator :: OccParser A.DyadicOp dyadicOperator - = do { reservedOp "+"; return A.Add } - <|> do { reservedOp "-"; return A.Subtr } - <|> do { reservedOp "*"; return A.Mul } - <|> do { reservedOp "/"; return A.Div } - <|> do { reservedOp "\\"; return A.Rem } + = do { reserved "+"; return A.Add } + <|> do { reserved "-"; return A.Subtr } + <|> do { reserved "*"; return A.Mul } + <|> do { reserved "/"; return A.Div } + <|> do { reserved "\\"; return A.Rem } <|> do { sREM; return A.Rem } <|> do { sMINUS; return A.Minus } - <|> do { reservedOp "/\\" <|> sBITAND; return A.BitAnd } - <|> do { reservedOp "\\/" <|> sBITOR; return A.BitOr } - <|> do { reservedOp "><"; return A.BitXor } + <|> do { reserved "/\\" <|> sBITAND; return A.BitAnd } + <|> do { reserved "\\/" <|> sBITOR; return A.BitOr } + <|> do { reserved "><"; return A.BitXor } "dyadic operator" -- These always need an INT on their right-hand side. shiftOperator :: OccParser A.DyadicOp shiftOperator - = do { reservedOp "<<"; return A.LeftShift } - <|> do { reservedOp ">>"; return A.RightShift } + = do { reserved "<<"; return A.LeftShift } + <|> do { reserved ">>"; return A.RightShift } "shift operator" -- These always return a BOOL, so we have to deal with them specially for type -- context. comparisonOperator :: OccParser A.DyadicOp comparisonOperator - = do { reservedOp "="; return A.Eq } - <|> do { reservedOp "<>"; return A.NotEq } - <|> do { reservedOp "<"; return A.Less } - <|> do { reservedOp ">"; return A.More } - <|> do { reservedOp "<="; return A.LessEq } - <|> do { reservedOp ">="; return A.MoreEq } + = do { reserved "="; return A.Eq } + <|> do { reserved "<>"; return A.NotEq } + <|> do { reserved "<"; return A.Less } + <|> do { reserved ">"; return A.More } + <|> do { reserved "<="; return A.LessEq } + <|> do { reserved ">="; return A.MoreEq } <|> do { sAFTER; return A.After } "comparison operator" @@ -1617,7 +1518,6 @@ process <|> mainProcess <|> handleSpecs (allocation <|> specification) process (\m s p -> A.Seq m (A.Spec m s (A.OnlyP m p))) - <|> preprocessorDirective "process" --{{{ assignment (:=) @@ -2021,67 +1921,11 @@ intrinsicProc return $ A.IntrinsicProcCall m s as "intrinsic PROC instance" --}}} ---{{{ preprocessor directives -preprocessorDirective :: OccParser A.Process -preprocessorDirective - = ppInclude - <|> ppUse - <|> unknownPP - "preprocessor directive" - -ppInclude :: OccParser A.Process -ppInclude - = do sppINCLUDE - char '"' - file <- manyTill character sQuote - eol - includeFile $ concat file - "#INCLUDE directive" - -ppUse :: OccParser A.Process -ppUse - = do sppUSE - char '"' - mod <- manyTill character sQuote - eol - let file = mangleModName $ concat mod - - -- Check whether it's been included already. - ps <- getState - if file `elem` csLoadedFiles ps - then process - else includeFile file - "#USE directive" - --- | Invoke the parser recursively to handle an included file. -includeFile :: String -> OccParser A.Process -includeFile file - = do ps <- getState - (r, ps') <- parseFile file includedFile ps - case r of - Left p -> - do setState ps' - return p - Right f -> - do setState ps' { csLocalNames = csMainLocals ps' } - p <- process - return $ f p - -unknownPP :: OccParser A.Process -unknownPP - = do m <- md - char '#' - rest <- manyTill anyChar (try eol) - addWarning m $ "unknown preprocessor directive ignored: " ++ rest - process - "unknown preprocessor directive" ---}}} --{{{ main process mainProcess :: OccParser A.Process mainProcess = do m <- md - sMainMarker - eol + eof -- Stash the current locals so that we can either restore them -- when we get back to the file we included this one from, or -- pull the TLP name from them at the end. @@ -2096,100 +1940,26 @@ mainProcess -- have the earlier ones in scope, so we can't parse them separately. sourceFile :: OccParser (A.Process, CompState) sourceFile - = do whiteSpace - p <- process + = do p <- process s <- getState return (p, s) - --- | An included file is either a process, or a bunch of specs that can be --- applied to a process (which we return as a function). This is likewise a bit --- of a cheat, in that included files should really be *textually* included, --- but it's good enough for most reasonable uses. -includedFile :: OccParser (Either A.Process (A.Process -> A.Process), CompState) -includedFile - = do whiteSpace - p <- process - s <- getState - do eof - return $ (Right $ replaceMain p, s) - <|> do sMainMarker - eol - return $ (Left p, s) - where - replaceMain :: A.Process -> A.Process -> A.Process - replaceMain (A.Seq m (A.Spec m' s (A.OnlyP m'' p))) np - = A.Seq m (A.Spec m' s (A.OnlyP m'' (replaceMain p np))) - replaceMain (A.Main _) np = np --}}} --}}} ---{{{ preprocessor --- XXX Doesn't handle conditionals. - --- | Find (via a nasty regex search) all the files that this source file includes. -preFindIncludes :: String -> [String] -preFindIncludes source - = concat [case matchRegex incRE l of - Just [_, fn] -> [fn] - Nothing -> [] - | l <- lines source] - where - incRE = mkRegex "^ *#(INCLUDE|USE) +\"([^\"]*)\"" - --- | If a module name doesn't already have a suffix, add one. -mangleModName :: String -> String -mangleModName mod - = if ".occ" `isSuffixOf` mod || ".inc" `isSuffixOf` mod - then mod - else mod ++ ".occ" - --- | Load all the source files necessary for a program. --- We have to do this now, before entering the parser, because the parser --- doesn't run in the IO monad. If there were a monad transformer version of --- Parsec then we could just open files as we need them. -loadSource :: String -> PassM () -loadSource file = load file file - where - load :: String -> String -> PassM () - load file realName - = do ps <- get - case Map.lookup file (csSourceFiles ps) of - Just _ -> return () - Nothing -> - do progress $ "Loading source file " ++ realName - rawSource <- liftIO $ readFile realName - source <- removeIndentation realName (rawSource ++ "\n" ++ mainMarker ++ "\n") - debug $ "Preprocessed source:" - debug $ numberLines source - modify $ (\ps -> ps { csSourceFiles = Map.insert file source (csSourceFiles ps) }) - let deps = map mangleModName $ preFindIncludes source - sequence_ [load dep (joinPath realName dep) | dep <- deps] ---}}} - --{{{ entry points for the parser itself --- | Test a parser production (for use from ghci while debugging the parser). -testParse :: Show a => OccParser a -> String -> IO () -testParse prod text - = do let r = runParser prod emptyState "" text - putStrLn $ "Result: " ++ show r - --- | Parse a file with the given production. -parseFile :: Monad m => String -> OccParser t -> CompState -> m t -parseFile file prod ps - = do let source = case Map.lookup file (csSourceFiles ps) of - Just s -> s - Nothing -> dieIO $ "Failed to preload file: " ++ show file - let ps' = ps { csLoadedFiles = file : csLoadedFiles ps } - case runParser prod ps' file source of - Left err -> dieIO $ "Parse error: " ++ show err +-- | Parse a token stream with the given production. +runTockParser :: [Token] -> OccParser t -> CompState -> PassM t +runTockParser toks prod cs + = do case runParser prod cs "irrelevant filename" toks of + Left err -> die $ "Parse error: " ++ show err Right r -> return r --- | Parse the top level source file in a program. -parseProgram :: String -> PassM A.Process -parseProgram file - = do ps <- get - (p, ps') <- parseFile file sourceFile ps - put ps' +-- | Parse an occam program. +parseOccamProgram :: [Token] -> PassM A.Process +parseOccamProgram toks + = do cs <- get + (p, cs') <- runTockParser toks sourceFile cs + put cs' return p --}}} diff --git a/PreprocessOccam.hs b/PreprocessOccam.hs index 7342f33..02e5e21 100644 --- a/PreprocessOccam.hs +++ b/PreprocessOccam.hs @@ -17,26 +17,23 @@ with this program. If not, see . -} -- | Preprocess occam code. -module PreprocessOccam where +module PreprocessOccam (preprocessOccamProgram) where +import Control.Monad.State import Data.List import qualified Data.Set as Set +import System.IO import Text.Regex +import CompState import Errors import LexOccam import Metadata import Pass +import PrettyShow import StructureOccam import Utils -import CompState -import Control.Monad.Error -import Control.Monad.State -import System -import System.IO -import PrettyShow - -- | Open an included file, looking for it in the search path. -- Return the open filehandle and the location of the file. -- FIXME: This doesn't actually look at the search path yet. @@ -59,21 +56,38 @@ searchFile m filename preprocessFile :: Meta -> String -> PassM [Token] preprocessFile m filename = do (handle, realFilename) <- searchFile m filename - liftIO $ putStrLn $ "Loading " ++ realFilename + progress $ "Loading source file " ++ realFilename origCS <- get modify (\cs -> cs { csCurrentFile = realFilename }) s <- liftIO $ hGetContents handle - toks <- runLexer realFilename s >>= structureOccam >>= preprocessOccam + toks <- runLexer realFilename s + veryDebug $ "{{{ lexer tokens" + veryDebug $ pshow toks + veryDebug $ "}}}" + toks' <- structureOccam toks + veryDebug $ "{{{ structured tokens" + veryDebug $ pshow toks' + veryDebug $ "}}}" + toks'' <- preprocessOccam toks' + veryDebug $ "{{{ preprocessed tokens" + veryDebug $ pshow toks'' + veryDebug $ "}}}" modify (\cs -> cs { csCurrentFile = csCurrentFile origCS }) - return toks + return toks'' -- | Preprocess a token stream. preprocessOccam :: [Token] -> PassM [Token] preprocessOccam [] = return [] -preprocessOccam ((m, TokPreprocessor ('#':s)):(_, EndOfLine):ts) - = do func <- handleDirective m s +preprocessOccam ((m, TokPreprocessor s):(_, EndOfLine):ts) + = do func <- handleDirective m (stripPrefix s) rest <- preprocessOccam ts return $ func rest + where + stripPrefix :: String -> String + stripPrefix (' ':cs) = stripPrefix cs + stripPrefix ('\t':cs) = stripPrefix cs + stripPrefix ('#':cs) = cs + stripPrefix _ = error "bad TokPreprocessor prefix" -- Check the above case didn't miss something. preprocessOccam ((_, TokPreprocessor _):_) = error "bad TokPreprocessor token" @@ -134,17 +148,12 @@ handleUse m [modName] else mod ++ ".occ" --}}} --- | Main function for testing. -main :: IO () -main - = do (arg:_) <- getArgs - v <- evalStateT (runErrorT (test arg)) emptyState - case v of - Left e -> dieIO e - Right r -> return () - where - test :: String -> PassM () - test fn - = do tokens <- preprocessFile emptyMeta fn - liftIO $ putStrLn $ pshow tokens +-- | Load and preprocess an occam program. +preprocessOccamProgram :: String -> PassM [Token] +preprocessOccamProgram filename + = do toks <- preprocessFile emptyMeta filename + veryDebug $ "{{{ tokenised source" + veryDebug $ pshow toks + veryDebug $ "}}}" + return toks diff --git a/RainParse.hs b/RainParse.hs index bd841ce..7fa2002 100644 --- a/RainParse.hs +++ b/RainParse.hs @@ -43,7 +43,6 @@ import CompState import Errors import EvalConstants import EvalLiterals -import Indentation import Intrinsics import Metadata import Pass @@ -296,21 +295,13 @@ rainSourceFile s <- getState return (A.Seq emptyMeta p, s) --- | Parse a file with the given production. --- This is copied from Parse.hs (because OccParser is about to be changed to not be the same as RainParser): -parseFile :: Monad m => String -> RainParser t -> CompState -> m t -parseFile file prod ps - = do let source = case Map.lookup file (csSourceFiles ps) of - Just s -> s - Nothing -> dieIO $ "Failed to preload file: " ++ show file - let ps' = ps { csLoadedFiles = file : csLoadedFiles ps } - case runParser prod ps' file source of - Left err -> dieIO $ "Parse error: " ++ show err - Right r -> return r - +-- | Load and parse a Rain source file. parseRainProgram :: String -> PassM A.Process -parseRainProgram file - = do ps <- get - (p, ps') <- parseFile file rainSourceFile ps - put ps' - return p +parseRainProgram filename + = do source <- liftIO $ readFile filename + cs <- get + case runParser rainSourceFile cs filename source of + Left err -> dieIO $ "Parse error: " ++ show err + Right (p, cs') -> + do put cs' + return p diff --git a/Utils.hs b/Utils.hs index dc81930..2d45835 100644 --- a/Utils.hs +++ b/Utils.hs @@ -66,3 +66,7 @@ transformEither funcLeft funcRight x = case x of maybeIO :: IO a -> IO (Maybe a) maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing) +-- | Remove a number of items from the start and end of a list. +chop :: Int -> Int -> [a] -> [a] +chop start end s = drop start (take (length s - end) s) + diff --git a/testcases/stringlit.occ b/testcases/stringlit.occ index a222bd6..fdd994a 100644 --- a/testcases/stringlit.occ +++ b/testcases/stringlit.occ @@ -10,6 +10,6 @@ PROC P () VAL []BYTE mls IS "first* *second* *third": - VAL [5][5]BYTE square IS ["sator", "arepo", "tenas", "opera", "rotas"]: + VAL [5][5]BYTE square IS ["sator", "arepo", "tenat", "opera", "rotas"]: SKIP :