Rework the parser to use the new lexer.

The occam parser is now a GenParser Token OccState, rather than a GenParser
Char OccState, and a lot of now-redundant code has been removed. The parser is
also somewhat faster, which wasn't intended but is nice anyway.

I've also modified the Rain parser to not rely on the old preprocessing code;
it wasn't appropriate for Rain's syntax anyway, so I assume Neil will be
replacing it eventually.
This commit is contained in:
Adam Sampson 2007-08-21 20:44:15 +00:00
parent 1f490e9f7f
commit 1bac142a53
9 changed files with 204 additions and 636 deletions

View File

@ -50,15 +50,10 @@ data CompState = CompState {
csVerboseLevel :: Int, csVerboseLevel :: Int,
csOutputFile :: String, csOutputFile :: String,
-- Set by (new) preprocessor -- Set by preprocessor
csCurrentFile :: String, csCurrentFile :: String,
csUsedFiles :: Set.Set String, csUsedFiles :: Set.Set String,
-- Set by (old) preprocessor
csSourceFiles :: Map String String,
csIndentLinesIn :: [String],
csIndentLinesOut :: [String],
-- Set by Parse -- Set by Parse
csLocalNames :: [(String, A.Name)], csLocalNames :: [(String, A.Name)],
csMainLocals :: [(String, A.Name)], csMainLocals :: [(String, A.Name)],
@ -93,10 +88,6 @@ emptyState = CompState {
csCurrentFile = "none", csCurrentFile = "none",
csUsedFiles = Set.empty, csUsedFiles = Set.empty,
csSourceFiles = Map.empty,
csIndentLinesIn = [],
csIndentLinesOut = [],
csLocalNames = [], csLocalNames = [],
csMainLocals = [], csMainLocals = [],
csNames = Map.empty, csNames = Map.empty,

View File

@ -1,191 +0,0 @@
{-
Tock: a compiler for parallel languages
Copyright (C) 2007 University of Kent
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 2 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>.
-}
-- | Parse indentation in occam source.
module Indentation (removeIndentation, indentMarker, outdentMarker, eolMarker) where
import Control.Monad
import Control.Monad.Error
import Control.Monad.State
import Data.List
import Text.Regex
import CompState
import Errors
import Pass
-- FIXME When this joins continuation lines, it should stash the details of
-- what it joined into CompState so that error reporting later on can
-- reconstruct the original position.
indentMarker = "__indent"
outdentMarker = "__outdent"
eolMarker = "__eol"
-- FIXME: There's probably a nicer way of doing this.
-- (Well, trivially, use a WriterT...)
-- | Preprocess occam source code to remove comments and turn indentation into
-- explicit markers.
removeIndentation :: String -> String -> PassM String
removeIndentation filename orig
= do modify $ (\ps -> ps { csIndentLinesIn = origLines,
csIndentLinesOut = [] })
catchError (nextLine 0) reportError
ps <- get
let out = concat $ intersperse "\n" $ reverse $ csIndentLinesOut ps
modify $ (\ps -> ps { csIndentLinesIn = [],
csIndentLinesOut = [] })
return out
where
origLines = lines orig
-- | When something goes wrong, figure out how far through the file we'd got.
reportError :: String -> PassM ()
reportError error
= do ps <- get
let lineNumber = length origLines - length (csIndentLinesIn ps)
die $ filename ++ ":" ++ show lineNumber ++ ": " ++ error
-- | Get the next raw line from the input.
getLine :: PassM (Maybe String)
getLine
= do ps <- get
case csIndentLinesIn ps of
[] -> return Nothing
(line:rest) ->
do put $ ps { csIndentLinesIn = rest }
return $ Just line
-- | Add a line to the output.
putLine :: String -> PassM ()
putLine line
= modify $ (\ps -> ps { csIndentLinesOut = line : csIndentLinesOut ps })
-- | Append to the *previous* line added.
addToLine :: String -> PassM ()
addToLine s
= modify $ (\ps -> ps { csIndentLinesOut =
case csIndentLinesOut ps of (l:ls) -> ((l ++ s):ls) })
-- | Given a line, read the rest of it, then return the complete thing.
finishLine :: String -> String -> Bool -> Bool -> String -> PassM String
finishLine left soFar inStr isChar afterStr
= case (left, inStr, isChar) of
([], False, _) -> plainEOL
('-':'-':cs, False, _) -> plainEOL
([], True, _) -> die "end of line in string without continuation"
(['*'], True, _) -> stringEOL
('\'':cs, False, _) -> finishLine cs (afterStr ++ ('\'':soFar)) True True ""
('\'':cs, True, True) -> finishLine cs (afterStr ++ ('\'':soFar)) False False ""
('"':cs, False, _) -> finishLine cs (afterStr ++ ('"':soFar)) True False ""
('"':cs, True, False) -> finishLine cs (afterStr ++ ('"':soFar)) False False ""
('*':'*':cs, True, _) -> finishLine cs ('*':'*':soFar) True isChar afterStr
('*':'"':cs, True, _) -> finishLine cs ('"':'*':soFar) True isChar afterStr
('*':'\'':cs, True, _) -> finishLine cs ('\'':'*':soFar) True isChar afterStr
(c:cs, _, _) -> finishLine cs (c:soFar) inStr isChar afterStr
where
-- | Finish a regular line.
plainEOL :: PassM String
plainEOL
= do let s = reverse soFar
if hasContinuation s
then do l <- getLine >>= checkJust "no continuation line"
finishLine l ('\n':soFar) False False ""
else return s
-- | Finish a line where we're in the middle of a string.
stringEOL :: PassM String
stringEOL
= do l <- getLine >>= checkJust "no string continuation line"
l' <- contStringStart l
-- When we hit the end of the string, add a \n after it to
-- make the line numbers match up again.
finishLine l' soFar True isChar ('\n':afterStr)
-- | Does a line have a continuation line following it?
hasContinuation :: String -> Bool
hasContinuation s
= case matchRegex contRE s of
Just _ -> True
Nothing -> False
where
-- FIXME This should probably be based on the list of operators and
-- reserved words that the parser already has; for now this is the
-- regexp that occamdoc uses.
contRE = mkRegexWithOpts "(-|~|\\+|-|\\*|/|\\\\|/\\\\|\\\\/|><|=|<>|<|>|>=|<=|,|;|:=|<<|>>|([[:space:]](MINUS|BITNOT|NOT|SIZE|REM|PLUS|MINUS|TIMES|BITAND|BITOR|AND|OR|AFTER|FROM|FOR|IS|RETYPES|RESHAPES)))[[:space:]]*$" False True
-- | Strip the spaces-then-star beginning off a string continuation line.
contStringStart :: String -> PassM String
contStringStart (' ':cs) = contStringStart cs
contStringStart ('*':cs) = return cs
contStringStart _ = die "string continuation line doesn't start with *"
-- | Get the next *complete* line from the input, resolving continuations.
readLine :: PassM (Maybe String)
readLine
= do line <- getLine
case line of
Just s ->
do r <- finishLine s "" False False ""
return $ Just r
Nothing -> return Nothing
-- | Compute the indentation level of a line, and return it without the indentation.
countIndent :: String -> Int -> PassM (Int, String)
-- Tabs are 8 spaces.
countIndent ('\t':cs) soFar = countIndent cs (soFar + 4)
countIndent (' ':' ':cs) soFar = countIndent cs (soFar + 1)
countIndent [' '] soFar = return (soFar, [])
countIndent (' ':_) soFar
= die "bad indentation (odd number of spaces)"
countIndent cs soFar = return (soFar, cs)
-- | Repeat a string N times.
rep :: Int -> String -> String
rep n s = concat $ take n (repeat s)
-- | Process the next line from the input.
nextLine :: Int -> PassM ()
nextLine level
= do l <- readLine
case l of
Nothing -> return ()
Just line ->
do (newLevel, stripped) <- countIndent line 0
addLine level newLevel line stripped
-- | Once a line's been retrieved, add it to the output along with the
-- appropriate markers, then go and process the next one.
addLine :: Int -> Int -> String -> String -> PassM ()
addLine level newLevel line stripped
| stripped == "" =
do putLine ""
nextLine level
| newLevel > level =
do addToLine $ rep (newLevel - level) (" " ++ indentMarker)
putLine $ line ++ " " ++ eolMarker
nextLine newLevel
| newLevel < level =
do addToLine $ rep (level - newLevel) (" " ++ outdentMarker)
putLine $ line ++ " " ++ eolMarker
nextLine newLevel
| otherwise =
do putLine $ line ++ " " ++ eolMarker
nextLine level

View File

@ -17,7 +17,7 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
-} -}
-- | Driver for the compiler. -- | Driver for the compiler.
module Main where module Main (main) where
import Control.Monad import Control.Monad
import Control.Monad.Error import Control.Monad.Error
@ -34,6 +34,7 @@ import GenerateC
import GenerateCPPCSP import GenerateCPPCSP
import Parse import Parse
import Pass import Pass
import PreprocessOccam
import PrettyShow import PrettyShow
import RainParse import RainParse
import RainPasses import RainPasses
@ -141,14 +142,10 @@ compile :: String -> PassM ()
compile fn compile fn
= do optsPS <- get = do optsPS <- get
debug "{{{ Preprocess"
loadSource fn
debug "}}}"
debug "{{{ Parse" debug "{{{ Parse"
progress "Parse" progress "Parse"
ast1 <- case csFrontend optsPS of ast1 <- case csFrontend optsPS of
FrontendOccam -> parseProgram fn FrontendOccam -> preprocessOccamProgram fn >>= parseOccamProgram
FrontendRain -> parseRainProgram fn FrontendRain -> parseRainProgram fn
debugAST ast1 debugAST ast1
debug "}}}" debug "}}}"

View File

@ -1,4 +1,4 @@
targets = tock tocktest lextest targets = tock tocktest
all: $(targets) all: $(targets)
@ -20,9 +20,6 @@ tock: $(sources)
tocktest: $(sources) tocktest: $(sources)
ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain
lextest: $(sources)
ghc $(ghc_opts) -o lextest -main-is PreprocessOccam --make PreprocessOccam
CFLAGS = \ CFLAGS = \
-O2 \ -O2 \
-g -Wall \ -g -Wall \

526
Parse.hs
View File

@ -17,7 +17,7 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
-} -}
-- | Parse occam code into an AST. -- | Parse occam code into an AST.
module Parse where module Parse (parseOccamProgram) where
import Control.Monad (liftM, when) import Control.Monad (liftM, when)
import Control.Monad.Error (runErrorT) import Control.Monad.Error (runErrorT)
@ -26,26 +26,23 @@ import Data.List
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Maybe import Data.Maybe
import Debug.Trace import Debug.Trace
import qualified IO
import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Language (emptyDef) import Text.ParserCombinators.Parsec.Pos (newPos)
import qualified Text.ParserCombinators.Parsec.Token as P
import Text.Regex
import qualified AST as A import qualified AST as A
import CompState import CompState
import Errors import Errors
import EvalConstants import EvalConstants
import EvalLiterals import EvalLiterals
import Indentation
import Intrinsics import Intrinsics
import LexOccam
import Metadata import Metadata
import Pass import Pass
import Types import Types
import Utils import Utils
--{{{ setup stuff for Parsec --{{{ the parser monad
type OccParser = GenParser Char CompState type OccParser = GenParser Token CompState
-- | Make MonadState functions work in the parser monad. -- | Make MonadState functions work in the parser monad.
-- This came from <http://hackage.haskell.org/trac/ghc/ticket/1274> -- which means -- This came from <http://hackage.haskell.org/trac/ghc/ticket/1274> -- which means
@ -56,145 +53,48 @@ instance MonadState st (GenParser tok st) where
instance Die (GenParser tok st) where instance Die (GenParser tok st) where
die = fail die = fail
occamStyle
= emptyDef
{ P.commentLine = "--"
, P.nestedComments = False
, P.identStart = letter
, P.identLetter = alphaNum <|> char '.'
, P.opStart = oneOf "+-*/\\>=<~"
, P.opLetter = oneOf "/\\>=<"
, P.reservedOpNames= [
"+",
"-",
"*",
"/",
"\\",
"/\\",
"\\/",
"><",
"<<",
">>",
"=",
"<>",
"<",
">",
">=",
"<=",
"-",
"~"
]
, P.reservedNames = [
"AFTER",
"ALT",
"AND",
"ANY",
"AT",
"BITAND",
"BITNOT",
"BITOR",
"BOOL",
"BYTE",
"BYTESIN",
"CASE",
"CHAN",
"DATA",
"ELSE",
"FALSE",
"FOR",
"FROM",
"FUNCTION",
"IF",
"IN",
"INLINE",
"INT",
"INT16",
"INT32",
"INT64",
"IS",
"MINUS",
"MOSTNEG",
"MOSTPOS",
"NOT",
"OF",
"OFFSETOF",
"OR",
"PACKED",
"PAR",
"PLACE",
"PLACED",
"PLUS",
"PORT",
"PRI",
"PROC",
"PROCESSOR",
"PROTOCOL",
"REAL32",
"REAL64",
"RECORD",
"REM",
"RESHAPES",
"RESULT",
"RETYPES",
"ROUND",
"SEQ",
"SIZE",
"SKIP",
"STOP",
"TIMER",
"TIMES",
"TRUE",
"TRUNC",
"TYPE",
"VAL",
"VALOF",
"WHILE",
"WORKSPACE",
"VECSPACE",
"#INCLUDE",
"#USE",
indentMarker,
outdentMarker,
eolMarker,
mainMarker
]
, P.caseSensitive = True
}
lexer :: P.TokenParser CompState
lexer = P.makeTokenParser occamStyle
-- XXX replace whitespace with something that doesn't eat \ns
whiteSpace = P.whiteSpace lexer
lexeme = P.lexeme lexer
symbol = P.symbol lexer
natural = P.natural lexer
parens = P.parens lexer
semi = P.semi lexer
identifier = P.identifier lexer
reserved = P.reserved lexer
reservedOp = P.reservedOp lexer
--}}} --}}}
--{{{ matching rules for raw tokens
-- | Extract source position from a `Token`.
tokenPos :: Token -> SourcePos
tokenPos (m, _) = metaToSourcePos m
genToken :: (Token -> Maybe a) -> OccParser a
genToken test = token show tokenPos test
reserved :: String -> OccParser ()
reserved name = genToken test
where
test (_, TokReserved name')
= if name' == name then Just () else Nothing
test _ = Nothing
identifier :: OccParser String
identifier = genToken test
where
test (_, TokIdentifier s) = Just s
test _ = Nothing
plainToken :: TokenType -> OccParser ()
plainToken t = genToken test
where
test (_, t') = if t == t' then Just () else Nothing
--}}}
--{{{ symbols --{{{ symbols
sLeft = try $ symbol "[" sAmp = reserved "&"
sRight = try $ symbol "]" sAssign = reserved ":="
sLeftR = try $ symbol "(" sBang = reserved "!"
sRightR = try $ symbol ")" sColon = reserved ":"
sAssign = try $ symbol ":=" sColons = reserved "::"
sColon = try $ symbol ":" sComma = reserved ","
sColons = try $ symbol "::" sEq = reserved "="
sComma = try $ symbol "," sLeft = reserved "["
sSemi = try $ symbol ";" sLeftR = reserved "("
sAmp = try $ symbol "&" sQuest = reserved "?"
sQuest = try $ symbol "?" sRight = reserved "]"
sBang = try $ symbol "!" sRightR = reserved ")"
sEq = try $ symbol "=" sSemi = reserved ";"
sApos = try $ symbol "'"
sQuote = try $ symbol "\""
sHash = try $ symbol "#"
--}}} --}}}
--{{{ keywords --{{{ keywords
sAFTER = reserved "AFTER" sAFTER = reserved "AFTER"
@ -263,29 +163,40 @@ sVALOF = reserved "VALOF"
sWHILE = reserved "WHILE" sWHILE = reserved "WHILE"
sWORKSPACE = reserved "WORKSPACE" sWORKSPACE = reserved "WORKSPACE"
sVECSPACE = reserved "VECSPACE" sVECSPACE = reserved "VECSPACE"
sppINCLUDE = reserved "#INCLUDE"
sppUSE = reserved "#USE"
--}}} --}}}
--{{{ markers inserted by the preprocessor --{{{ markers inserted by the preprocessor
-- XXX could handle VALOF by translating each step to one { and matching multiple ones? indent = do { plainToken Indent } <?> "indentation increase"
mainMarker = "__main" outdent = do { plainToken Outdent } <?> "indentation decrease"
eol = do { plainToken EndOfLine } <?> "end of line"
sMainMarker = do { whiteSpace; reserved mainMarker } <?> "end of input (top-level process)"
indent = do { whiteSpace; reserved indentMarker } <?> "indentation increase"
outdent = do { whiteSpace; reserved outdentMarker } <?> "indentation decrease"
eol = do { whiteSpace; reserved eolMarker } <?> "end of line"
--}}} --}}}
--{{{ helper functions --{{{ helper functions
md :: OccParser Meta md :: OccParser Meta
md md
= do pos <- getPosition = do pos <- getPosition
return Meta { return $ sourcePosToMeta pos
metaFile = Just $ sourceName pos,
--{{{ Meta to/from SourcePos
-- | Convert source position into Parsec's format.
metaToSourcePos :: Meta -> SourcePos
metaToSourcePos meta
= newPos filename (metaLine meta) (metaColumn meta)
where
filename = case metaFile meta of
Just s -> s
Nothing -> ""
-- | Convert source position out of Parsec's format.
sourcePosToMeta :: SourcePos -> Meta
sourcePosToMeta pos
= emptyMeta {
metaFile = case sourceName pos of
"" -> Nothing
s -> Just s,
metaLine = sourceLine pos, metaLine = sourceLine pos,
metaColumn = sourceColumn pos metaColumn = sourceColumn pos
} }
--}}}
--{{{ try* --{{{ try*
-- These functions let you try a sequence of productions and only retrieve the -- These functions let you try a sequence of productions and only retrieve the
@ -335,12 +246,6 @@ tryXVVX a b c d = try (do { a; bv <- b; cv <- c; d; return (bv, cv) })
tryVXXV :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, d) tryVXXV :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, d)
tryVXXV a b c d = try (do { av <- a; b; c; dv <- d; return (av, dv) }) tryVXXV a b c d = try (do { av <- a; b; c; dv <- d; return (av, dv) })
tryVXVX :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, c)
tryVXVX a b c d = try (do { av <- a; b; cv <- c; d; return (av, cv) })
tryVVXX :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, b)
tryVVXX a b c d = try (do { av <- a; bv <- b; c; d; return (av, bv) })
tryVVXV :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, b, d) tryVVXV :: OccParser a -> OccParser b -> OccParser c -> OccParser d -> OccParser (a, b, d)
tryVVXV a b c d = try (do { av <- a; bv <- b; c; dv <- d; return (av, bv, dv) }) tryVVXV a b c d = try (do { av <- a; bv <- b; c; dv <- d; return (av, bv, dv) })
@ -431,15 +336,6 @@ handleSpecs specs inner specMarker
mapM scopeOutSpec ss' mapM scopeOutSpec ss'
return $ foldl (\e s -> specMarker m s e) v ss' return $ foldl (\e s -> specMarker m s e) v ss'
-- | Like sepBy1, but not eager: it won't consume the separator unless it finds
-- another item after it.
sepBy1NE :: OccParser a -> OccParser b -> OccParser [a]
sepBy1NE item sep
= do i <- item
rest <- option [] $ try (do sep
sepBy1NE item sep)
return $ i : rest
-- | Run several different parsers with a separator between them. -- | Run several different parsers with a separator between them.
-- If you give it [a, b, c] and s, it'll parse [a, s, b, s, c] then -- If you give it [a, b, c] and s, it'll parse [a, s, b, s, c] then
-- give you back the results from [a, b, c]. -- give you back the results from [a, b, c].
@ -803,42 +699,31 @@ untypedLiteral
real :: OccParser A.LiteralRepr real :: OccParser A.LiteralRepr
real real
= do m <- md = do m <- md
(l, r) <- tryVXVX digits (char '.') digits (char 'E') genToken (test m)
e <- lexeme occamExponent
return $ A.RealLiteral m (l ++ "." ++ r ++ "E" ++ e)
<|> do m <- md
l <- tryVX digits (char '.')
r <- lexeme digits
return $ A.RealLiteral m (l ++ "." ++ r)
<?> "real literal" <?> "real literal"
where
occamExponent :: OccParser String test m (_, TokRealLiteral s) = Just $ A.RealLiteral m s
occamExponent test _ _ = Nothing
= do c <- oneOf "+-"
d <- digits
return $ c : d
<?> "exponent"
integer :: OccParser A.LiteralRepr integer :: OccParser A.LiteralRepr
integer integer
= do m <- md = do m <- md
do { d <- lexeme digits; return $ A.IntLiteral m d } genToken (test m)
<|> do { d <- lexeme (sHash >> many1 hexDigit); return $ A.HexLiteral m d }
<?> "integer literal" <?> "integer literal"
where
digits :: OccParser String test m (_, TokIntLiteral s) = Just $ A.IntLiteral m s
digits test m (_, TokHexLiteral s) = Just $ A.HexLiteral m (drop 1 s)
= many1 digit test _ _ = Nothing
<?> "decimal digits"
byte :: OccParser A.LiteralRepr byte :: OccParser A.LiteralRepr
byte byte
= do m <- md = do m <- md
char '\'' genToken (test m)
c <- literalCharacter
sApos
return c
<?> "byte literal" <?> "byte literal"
where
test m (_, TokCharLiteral s)
= case splitStringLiteral m (chop 1 1 s) of [lr] -> Just lr
test _ _ = Nothing
-- | Parse a table -- an array literal which might be subscripted or sliced. -- | Parse a table -- an array literal which might be subscripted or sliced.
-- (The implication of this is that the type of the expression this parses -- (The implication of this is that the type of the expression this parses
@ -884,29 +769,45 @@ tableElems
stringLiteral :: OccParser (A.LiteralRepr, A.Dimension) stringLiteral :: OccParser (A.LiteralRepr, A.Dimension)
stringLiteral stringLiteral
= do m <- md = do m <- md
char '"' cs <- stringCont <|> stringLit
cs <- manyTill literalCharacter sQuote let aes = [A.ArrayElemExpr $ A.Literal m' A.Byte c
let aes = [A.ArrayElemExpr $ A.Literal m A.Byte c | c <- cs] | c@(A.ByteLiteral m' _) <- cs]
return (A.ArrayLiteral m aes, A.Dimension $ length cs) return (A.ArrayLiteral m aes, A.Dimension $ length cs)
<?> "string literal" <?> "string literal"
where
character :: OccParser String stringCont :: OccParser [A.LiteralRepr]
character stringCont
= do char '*'
do char '#'
a <- hexDigit
b <- hexDigit
return $ ['*', '#', a, b]
<|> do { c <- anyChar; return ['*', c] }
<|> do c <- anyChar
return [c]
<?> "character"
literalCharacter :: OccParser A.LiteralRepr
literalCharacter
= do m <- md = do m <- md
c <- character s <- genToken test
return $ A.ByteLiteral m c rest <- stringCont <|> stringLit
return $ (splitStringLiteral m s) ++ rest
where
test (_, TokStringCont s) = Just (chop 1 2 s)
test _ = Nothing
stringLit :: OccParser [A.LiteralRepr]
stringLit
= do m <- md
s <- genToken test
return $ splitStringLiteral m s
where
test (_, TokStringLiteral s) = Just (chop 1 1 s)
test _ = Nothing
-- | Parse a string literal.
-- FIXME: This should decode the occam escapes.
splitStringLiteral :: Meta -> String -> [A.LiteralRepr]
splitStringLiteral m cs = ssl cs
where
ssl [] = []
ssl ('*':'#':a:b:cs)
= (A.ByteLiteral m ['*', '#', a, b]) : ssl cs
ssl ('*':'\n':cs)
= (A.ByteLiteral m $ tail $ dropWhile (/= '*') cs) : ssl cs
ssl ('*':c:cs)
= (A.ByteLiteral m ['*', c]) : ssl cs
ssl (c:cs)
= (A.ByteLiteral m [c]) : ssl cs
--}}} --}}}
--{{{ expressions --{{{ expressions
expressionList :: [A.Type] -> OccParser A.ExpressionList expressionList :: [A.Type] -> OccParser A.ExpressionList
@ -1071,42 +972,42 @@ intrinsicFunctionSingle
monadicOperator :: OccParser A.MonadicOp monadicOperator :: OccParser A.MonadicOp
monadicOperator monadicOperator
= do { reservedOp "-" <|> sMINUS; return A.MonadicSubtr } = do { reserved "-" <|> sMINUS; return A.MonadicSubtr }
<|> do { reservedOp "~" <|> sBITNOT; return A.MonadicBitNot } <|> do { reserved "~" <|> sBITNOT; return A.MonadicBitNot }
<|> do { sNOT; return A.MonadicNot } <|> do { sNOT; return A.MonadicNot }
<?> "monadic operator" <?> "monadic operator"
dyadicOperator :: OccParser A.DyadicOp dyadicOperator :: OccParser A.DyadicOp
dyadicOperator dyadicOperator
= do { reservedOp "+"; return A.Add } = do { reserved "+"; return A.Add }
<|> do { reservedOp "-"; return A.Subtr } <|> do { reserved "-"; return A.Subtr }
<|> do { reservedOp "*"; return A.Mul } <|> do { reserved "*"; return A.Mul }
<|> do { reservedOp "/"; return A.Div } <|> do { reserved "/"; return A.Div }
<|> do { reservedOp "\\"; return A.Rem } <|> do { reserved "\\"; return A.Rem }
<|> do { sREM; return A.Rem } <|> do { sREM; return A.Rem }
<|> do { sMINUS; return A.Minus } <|> do { sMINUS; return A.Minus }
<|> do { reservedOp "/\\" <|> sBITAND; return A.BitAnd } <|> do { reserved "/\\" <|> sBITAND; return A.BitAnd }
<|> do { reservedOp "\\/" <|> sBITOR; return A.BitOr } <|> do { reserved "\\/" <|> sBITOR; return A.BitOr }
<|> do { reservedOp "><"; return A.BitXor } <|> do { reserved "><"; return A.BitXor }
<?> "dyadic operator" <?> "dyadic operator"
-- These always need an INT on their right-hand side. -- These always need an INT on their right-hand side.
shiftOperator :: OccParser A.DyadicOp shiftOperator :: OccParser A.DyadicOp
shiftOperator shiftOperator
= do { reservedOp "<<"; return A.LeftShift } = do { reserved "<<"; return A.LeftShift }
<|> do { reservedOp ">>"; return A.RightShift } <|> do { reserved ">>"; return A.RightShift }
<?> "shift operator" <?> "shift operator"
-- These always return a BOOL, so we have to deal with them specially for type -- These always return a BOOL, so we have to deal with them specially for type
-- context. -- context.
comparisonOperator :: OccParser A.DyadicOp comparisonOperator :: OccParser A.DyadicOp
comparisonOperator comparisonOperator
= do { reservedOp "="; return A.Eq } = do { reserved "="; return A.Eq }
<|> do { reservedOp "<>"; return A.NotEq } <|> do { reserved "<>"; return A.NotEq }
<|> do { reservedOp "<"; return A.Less } <|> do { reserved "<"; return A.Less }
<|> do { reservedOp ">"; return A.More } <|> do { reserved ">"; return A.More }
<|> do { reservedOp "<="; return A.LessEq } <|> do { reserved "<="; return A.LessEq }
<|> do { reservedOp ">="; return A.MoreEq } <|> do { reserved ">="; return A.MoreEq }
<|> do { sAFTER; return A.After } <|> do { sAFTER; return A.After }
<?> "comparison operator" <?> "comparison operator"
@ -1617,7 +1518,6 @@ process
<|> mainProcess <|> mainProcess
<|> handleSpecs (allocation <|> specification) process <|> handleSpecs (allocation <|> specification) process
(\m s p -> A.Seq m (A.Spec m s (A.OnlyP m p))) (\m s p -> A.Seq m (A.Spec m s (A.OnlyP m p)))
<|> preprocessorDirective
<?> "process" <?> "process"
--{{{ assignment (:=) --{{{ assignment (:=)
@ -2021,67 +1921,11 @@ intrinsicProc
return $ A.IntrinsicProcCall m s as return $ A.IntrinsicProcCall m s as
<?> "intrinsic PROC instance" <?> "intrinsic PROC instance"
--}}} --}}}
--{{{ preprocessor directives
preprocessorDirective :: OccParser A.Process
preprocessorDirective
= ppInclude
<|> ppUse
<|> unknownPP
<?> "preprocessor directive"
ppInclude :: OccParser A.Process
ppInclude
= do sppINCLUDE
char '"'
file <- manyTill character sQuote
eol
includeFile $ concat file
<?> "#INCLUDE directive"
ppUse :: OccParser A.Process
ppUse
= do sppUSE
char '"'
mod <- manyTill character sQuote
eol
let file = mangleModName $ concat mod
-- Check whether it's been included already.
ps <- getState
if file `elem` csLoadedFiles ps
then process
else includeFile file
<?> "#USE directive"
-- | Invoke the parser recursively to handle an included file.
includeFile :: String -> OccParser A.Process
includeFile file
= do ps <- getState
(r, ps') <- parseFile file includedFile ps
case r of
Left p ->
do setState ps'
return p
Right f ->
do setState ps' { csLocalNames = csMainLocals ps' }
p <- process
return $ f p
unknownPP :: OccParser A.Process
unknownPP
= do m <- md
char '#'
rest <- manyTill anyChar (try eol)
addWarning m $ "unknown preprocessor directive ignored: " ++ rest
process
<?> "unknown preprocessor directive"
--}}}
--{{{ main process --{{{ main process
mainProcess :: OccParser A.Process mainProcess :: OccParser A.Process
mainProcess mainProcess
= do m <- md = do m <- md
sMainMarker eof
eol
-- Stash the current locals so that we can either restore them -- Stash the current locals so that we can either restore them
-- when we get back to the file we included this one from, or -- when we get back to the file we included this one from, or
-- pull the TLP name from them at the end. -- pull the TLP name from them at the end.
@ -2096,100 +1940,26 @@ mainProcess
-- have the earlier ones in scope, so we can't parse them separately. -- have the earlier ones in scope, so we can't parse them separately.
sourceFile :: OccParser (A.Process, CompState) sourceFile :: OccParser (A.Process, CompState)
sourceFile sourceFile
= do whiteSpace = do p <- process
p <- process
s <- getState s <- getState
return (p, s) return (p, s)
-- | An included file is either a process, or a bunch of specs that can be
-- applied to a process (which we return as a function). This is likewise a bit
-- of a cheat, in that included files should really be *textually* included,
-- but it's good enough for most reasonable uses.
includedFile :: OccParser (Either A.Process (A.Process -> A.Process), CompState)
includedFile
= do whiteSpace
p <- process
s <- getState
do eof
return $ (Right $ replaceMain p, s)
<|> do sMainMarker
eol
return $ (Left p, s)
where
replaceMain :: A.Process -> A.Process -> A.Process
replaceMain (A.Seq m (A.Spec m' s (A.OnlyP m'' p))) np
= A.Seq m (A.Spec m' s (A.OnlyP m'' (replaceMain p np)))
replaceMain (A.Main _) np = np
--}}} --}}}
--}}} --}}}
--{{{ preprocessor
-- XXX Doesn't handle conditionals.
-- | Find (via a nasty regex search) all the files that this source file includes.
preFindIncludes :: String -> [String]
preFindIncludes source
= concat [case matchRegex incRE l of
Just [_, fn] -> [fn]
Nothing -> []
| l <- lines source]
where
incRE = mkRegex "^ *#(INCLUDE|USE) +\"([^\"]*)\""
-- | If a module name doesn't already have a suffix, add one.
mangleModName :: String -> String
mangleModName mod
= if ".occ" `isSuffixOf` mod || ".inc" `isSuffixOf` mod
then mod
else mod ++ ".occ"
-- | Load all the source files necessary for a program.
-- We have to do this now, before entering the parser, because the parser
-- doesn't run in the IO monad. If there were a monad transformer version of
-- Parsec then we could just open files as we need them.
loadSource :: String -> PassM ()
loadSource file = load file file
where
load :: String -> String -> PassM ()
load file realName
= do ps <- get
case Map.lookup file (csSourceFiles ps) of
Just _ -> return ()
Nothing ->
do progress $ "Loading source file " ++ realName
rawSource <- liftIO $ readFile realName
source <- removeIndentation realName (rawSource ++ "\n" ++ mainMarker ++ "\n")
debug $ "Preprocessed source:"
debug $ numberLines source
modify $ (\ps -> ps { csSourceFiles = Map.insert file source (csSourceFiles ps) })
let deps = map mangleModName $ preFindIncludes source
sequence_ [load dep (joinPath realName dep) | dep <- deps]
--}}}
--{{{ entry points for the parser itself --{{{ entry points for the parser itself
-- | Test a parser production (for use from ghci while debugging the parser). -- | Parse a token stream with the given production.
testParse :: Show a => OccParser a -> String -> IO () runTockParser :: [Token] -> OccParser t -> CompState -> PassM t
testParse prod text runTockParser toks prod cs
= do let r = runParser prod emptyState "" text = do case runParser prod cs "irrelevant filename" toks of
putStrLn $ "Result: " ++ show r Left err -> die $ "Parse error: " ++ show err
-- | Parse a file with the given production.
parseFile :: Monad m => String -> OccParser t -> CompState -> m t
parseFile file prod ps
= do let source = case Map.lookup file (csSourceFiles ps) of
Just s -> s
Nothing -> dieIO $ "Failed to preload file: " ++ show file
let ps' = ps { csLoadedFiles = file : csLoadedFiles ps }
case runParser prod ps' file source of
Left err -> dieIO $ "Parse error: " ++ show err
Right r -> return r Right r -> return r
-- | Parse the top level source file in a program. -- | Parse an occam program.
parseProgram :: String -> PassM A.Process parseOccamProgram :: [Token] -> PassM A.Process
parseProgram file parseOccamProgram toks
= do ps <- get = do cs <- get
(p, ps') <- parseFile file sourceFile ps (p, cs') <- runTockParser toks sourceFile cs
put ps' put cs'
return p return p
--}}} --}}}

View File

@ -17,26 +17,23 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
-} -}
-- | Preprocess occam code. -- | Preprocess occam code.
module PreprocessOccam where module PreprocessOccam (preprocessOccamProgram) where
import Control.Monad.State
import Data.List import Data.List
import qualified Data.Set as Set import qualified Data.Set as Set
import System.IO
import Text.Regex import Text.Regex
import CompState
import Errors import Errors
import LexOccam import LexOccam
import Metadata import Metadata
import Pass import Pass
import PrettyShow
import StructureOccam import StructureOccam
import Utils import Utils
import CompState
import Control.Monad.Error
import Control.Monad.State
import System
import System.IO
import PrettyShow
-- | Open an included file, looking for it in the search path. -- | Open an included file, looking for it in the search path.
-- Return the open filehandle and the location of the file. -- Return the open filehandle and the location of the file.
-- FIXME: This doesn't actually look at the search path yet. -- FIXME: This doesn't actually look at the search path yet.
@ -59,21 +56,38 @@ searchFile m filename
preprocessFile :: Meta -> String -> PassM [Token] preprocessFile :: Meta -> String -> PassM [Token]
preprocessFile m filename preprocessFile m filename
= do (handle, realFilename) <- searchFile m filename = do (handle, realFilename) <- searchFile m filename
liftIO $ putStrLn $ "Loading " ++ realFilename progress $ "Loading source file " ++ realFilename
origCS <- get origCS <- get
modify (\cs -> cs { csCurrentFile = realFilename }) modify (\cs -> cs { csCurrentFile = realFilename })
s <- liftIO $ hGetContents handle s <- liftIO $ hGetContents handle
toks <- runLexer realFilename s >>= structureOccam >>= preprocessOccam toks <- runLexer realFilename s
veryDebug $ "{{{ lexer tokens"
veryDebug $ pshow toks
veryDebug $ "}}}"
toks' <- structureOccam toks
veryDebug $ "{{{ structured tokens"
veryDebug $ pshow toks'
veryDebug $ "}}}"
toks'' <- preprocessOccam toks'
veryDebug $ "{{{ preprocessed tokens"
veryDebug $ pshow toks''
veryDebug $ "}}}"
modify (\cs -> cs { csCurrentFile = csCurrentFile origCS }) modify (\cs -> cs { csCurrentFile = csCurrentFile origCS })
return toks return toks''
-- | Preprocess a token stream. -- | Preprocess a token stream.
preprocessOccam :: [Token] -> PassM [Token] preprocessOccam :: [Token] -> PassM [Token]
preprocessOccam [] = return [] preprocessOccam [] = return []
preprocessOccam ((m, TokPreprocessor ('#':s)):(_, EndOfLine):ts) preprocessOccam ((m, TokPreprocessor s):(_, EndOfLine):ts)
= do func <- handleDirective m s = do func <- handleDirective m (stripPrefix s)
rest <- preprocessOccam ts rest <- preprocessOccam ts
return $ func rest return $ func rest
where
stripPrefix :: String -> String
stripPrefix (' ':cs) = stripPrefix cs
stripPrefix ('\t':cs) = stripPrefix cs
stripPrefix ('#':cs) = cs
stripPrefix _ = error "bad TokPreprocessor prefix"
-- Check the above case didn't miss something. -- Check the above case didn't miss something.
preprocessOccam ((_, TokPreprocessor _):_) preprocessOccam ((_, TokPreprocessor _):_)
= error "bad TokPreprocessor token" = error "bad TokPreprocessor token"
@ -134,17 +148,12 @@ handleUse m [modName]
else mod ++ ".occ" else mod ++ ".occ"
--}}} --}}}
-- | Main function for testing. -- | Load and preprocess an occam program.
main :: IO () preprocessOccamProgram :: String -> PassM [Token]
main preprocessOccamProgram filename
= do (arg:_) <- getArgs = do toks <- preprocessFile emptyMeta filename
v <- evalStateT (runErrorT (test arg)) emptyState veryDebug $ "{{{ tokenised source"
case v of veryDebug $ pshow toks
Left e -> dieIO e veryDebug $ "}}}"
Right r -> return () return toks
where
test :: String -> PassM ()
test fn
= do tokens <- preprocessFile emptyMeta fn
liftIO $ putStrLn $ pshow tokens

View File

@ -43,7 +43,6 @@ import CompState
import Errors import Errors
import EvalConstants import EvalConstants
import EvalLiterals import EvalLiterals
import Indentation
import Intrinsics import Intrinsics
import Metadata import Metadata
import Pass import Pass
@ -296,21 +295,13 @@ rainSourceFile
s <- getState s <- getState
return (A.Seq emptyMeta p, s) return (A.Seq emptyMeta p, s)
-- | Parse a file with the given production. -- | Load and parse a Rain source file.
-- This is copied from Parse.hs (because OccParser is about to be changed to not be the same as RainParser):
parseFile :: Monad m => String -> RainParser t -> CompState -> m t
parseFile file prod ps
= do let source = case Map.lookup file (csSourceFiles ps) of
Just s -> s
Nothing -> dieIO $ "Failed to preload file: " ++ show file
let ps' = ps { csLoadedFiles = file : csLoadedFiles ps }
case runParser prod ps' file source of
Left err -> dieIO $ "Parse error: " ++ show err
Right r -> return r
parseRainProgram :: String -> PassM A.Process parseRainProgram :: String -> PassM A.Process
parseRainProgram file parseRainProgram filename
= do ps <- get = do source <- liftIO $ readFile filename
(p, ps') <- parseFile file rainSourceFile ps cs <- get
put ps' case runParser rainSourceFile cs filename source of
Left err -> dieIO $ "Parse error: " ++ show err
Right (p, cs') ->
do put cs'
return p return p

View File

@ -66,3 +66,7 @@ transformEither funcLeft funcRight x = case x of
maybeIO :: IO a -> IO (Maybe a) maybeIO :: IO a -> IO (Maybe a)
maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing) maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing)
-- | Remove a number of items from the start and end of a list.
chop :: Int -> Int -> [a] -> [a]
chop start end s = drop start (take (length s - end) s)

View File

@ -10,6 +10,6 @@ PROC P ()
VAL []BYTE mls IS "first* VAL []BYTE mls IS "first*
*second* *second*
*third": *third":
VAL [5][5]BYTE square IS ["sator", "arepo", "tenas", "opera", "rotas"]: VAL [5][5]BYTE square IS ["sator", "arepo", "tenat", "opera", "rotas"]:
SKIP SKIP
: :