Add a preprocessor that works with the new lexer.
The parser now gets a stream of tokens, rather than needing to worry about loading files itself. This also reworks the lexer's idea of what constitutes a Token -- it's now a pair (Meta, TokenType), so it's always easy to pull out/rewrite the metadata -- and adds proper support for lexing preprocessor directives, rather than just treating them as reserved words.
This commit is contained in:
parent
a640dabc04
commit
223a14d767
10
CompState.hs
10
CompState.hs
|
@ -22,6 +22,7 @@ module CompState where
|
|||
import Data.Generics
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Control.Monad.State
|
||||
|
||||
import qualified AST as A
|
||||
|
@ -49,7 +50,11 @@ data CompState = CompState {
|
|||
csVerboseLevel :: Int,
|
||||
csOutputFile :: String,
|
||||
|
||||
-- Set by preprocessor
|
||||
-- Set by (new) preprocessor
|
||||
csCurrentFile :: String,
|
||||
csUsedFiles :: Set.Set String,
|
||||
|
||||
-- Set by (old) preprocessor
|
||||
csSourceFiles :: Map String String,
|
||||
csIndentLinesIn :: [String],
|
||||
csIndentLinesOut :: [String],
|
||||
|
@ -85,6 +90,9 @@ emptyState = CompState {
|
|||
csVerboseLevel = 0,
|
||||
csOutputFile = "-",
|
||||
|
||||
csCurrentFile = "none",
|
||||
csUsedFiles = Set.empty,
|
||||
|
||||
csSourceFiles = Map.empty,
|
||||
csIndentLinesIn = [],
|
||||
csIndentLinesOut = [],
|
||||
|
|
36
LexOccam.x
36
LexOccam.x
|
@ -31,6 +31,8 @@ import Pass
|
|||
$decimalDigit = [0-9]
|
||||
$hexDigit = [0-9 a-f A-F]
|
||||
|
||||
@preprocessor = "#" [^\n]*
|
||||
|
||||
@reserved = "[" | "]" | "(" | ")"
|
||||
| "::" | ":=" | ":" | "," | ";" | "&"
|
||||
| "?" | "!" | "="
|
||||
|
@ -60,7 +62,6 @@ $hexDigit = [0-9 a-f A-F]
|
|||
| "VAL" | "VALOF"
|
||||
| "WHILE" | "WORKSPACE"
|
||||
| "VECSPACE"
|
||||
| "#INCLUDE" | "#USE"
|
||||
|
||||
@identifier = [a-z A-Z] [a-z A-Z 0-9 \.]*
|
||||
|
||||
|
@ -80,6 +81,8 @@ $escapeChar = [cnrts \" \' \* \n]
|
|||
|
||||
occam :-
|
||||
|
||||
@preprocessor { mkToken TokPreprocessor }
|
||||
|
||||
-- Ignore whitespace and comments.
|
||||
$white+ ;
|
||||
"--" [^\n]* ;
|
||||
|
@ -95,28 +98,31 @@ $white+ ;
|
|||
@realLiteral { mkToken TokRealLiteral }
|
||||
|
||||
{
|
||||
-- | An occam source token and its position.
|
||||
type Token = (Meta, TokenType)
|
||||
|
||||
-- | An occam source token.
|
||||
-- Only `Token` is generated by the lexer itself; the others are added later
|
||||
-- once the indentation has been analysed.
|
||||
data Token =
|
||||
Token TokenType Meta String -- ^ A real token read from the source
|
||||
data TokenType =
|
||||
TokReserved String -- ^ A reserved word or symbol
|
||||
| TokIdentifier String
|
||||
| TokStringLiteral String
|
||||
| TokCharLiteral String
|
||||
| TokDecimalLiteral String
|
||||
| TokHexLiteral String
|
||||
| TokRealLiteral String
|
||||
| TokPreprocessor String
|
||||
| Indent -- ^ Indentation increase
|
||||
| Outdent -- ^ Indentation decrease
|
||||
| EndOfLine -- ^ End of line
|
||||
deriving (Show, Eq, Typeable, Data)
|
||||
|
||||
-- | The type of a source token.
|
||||
data TokenType =
|
||||
TokReserved | TokIdentifier
|
||||
| TokStringLiteral | TokCharLiteral
|
||||
| TokDecimalLiteral | TokHexLiteral | TokRealLiteral
|
||||
deriving (Show, Eq, Typeable, Data)
|
||||
|
||||
-- | Build a lexer rule for a token.
|
||||
mkToken :: TokenType -> AlexPosn -> String -> Token
|
||||
mkToken tt (AlexPn _ line col) s = Token tt emptyMeta s
|
||||
mkToken :: (String -> TokenType) -> AlexPosn -> String -> Token
|
||||
mkToken cons _ s = (emptyMeta, cons s)
|
||||
|
||||
-- | Run the lexer, returning either an error position or a list of tokens.
|
||||
-- | Run the lexer, returning a list of tokens.
|
||||
-- (This is based on the `alexScanTokens` function that Alex provides.)
|
||||
runLexer :: String -> String -> PassM [Token]
|
||||
runLexer filename str = go (alexStartPos, '\n', str)
|
||||
|
@ -128,8 +134,8 @@ runLexer filename str = go (alexStartPos, '\n', str)
|
|||
AlexSkip inp' len -> go inp'
|
||||
AlexToken inp' len act ->
|
||||
do ts <- go inp'
|
||||
let (Token tt _ s) = act pos (take len str)
|
||||
return $ (Token tt meta s) : ts
|
||||
let t = act pos (take len str)
|
||||
return $ (meta, snd t) : ts
|
||||
|
||||
where
|
||||
meta = emptyMeta {
|
||||
|
|
2
Makefile
2
Makefile
|
@ -21,7 +21,7 @@ tocktest: $(sources)
|
|||
ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain
|
||||
|
||||
lextest: $(sources)
|
||||
ghc $(ghc_opts) -o lextest -main-is StructureOccam --make StructureOccam
|
||||
ghc $(ghc_opts) -o lextest -main-is PreprocessOccam --make PreprocessOccam
|
||||
|
||||
CFLAGS = \
|
||||
-O2 \
|
||||
|
|
145
PreprocessOccam.hs
Normal file
145
PreprocessOccam.hs
Normal file
|
@ -0,0 +1,145 @@
|
|||
{-
|
||||
Tock: a compiler for parallel languages
|
||||
Copyright (C) 2007 University of Kent
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation, either version 2 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
-}
|
||||
|
||||
-- | Preprocess occam code.
|
||||
module PreprocessOccam where
|
||||
|
||||
import Data.List
|
||||
import qualified Data.Set as Set
|
||||
import Text.Regex
|
||||
|
||||
import Errors
|
||||
import LexOccam
|
||||
import Metadata
|
||||
import Pass
|
||||
import StructureOccam
|
||||
import Utils
|
||||
|
||||
import CompState
|
||||
import Control.Monad.Error
|
||||
import Control.Monad.State
|
||||
import System
|
||||
import System.IO
|
||||
import PrettyShow
|
||||
|
||||
-- | Open an included file, looking for it in the search path.
|
||||
-- Return the open filehandle and the location of the file.
|
||||
-- FIXME: This doesn't actually look at the search path yet.
|
||||
searchFile :: Meta -> String -> PassM (Handle, String)
|
||||
searchFile m filename
|
||||
= do cs <- get
|
||||
let currentFile = csCurrentFile cs
|
||||
let possibilities = [joinPath currentFile filename]
|
||||
openOneOf possibilities
|
||||
where
|
||||
openOneOf :: [String] -> PassM (Handle, String)
|
||||
openOneOf [] = dieP m $ "Unable to find " ++ filename
|
||||
openOneOf (fn:fns)
|
||||
= do r <- liftIO $ maybeIO $ openFile fn ReadMode
|
||||
case r of
|
||||
Just h -> return (h, fn)
|
||||
Nothing -> openOneOf fns
|
||||
|
||||
-- | Preprocess a file and return its tokenised form ready for parsing.
|
||||
preprocessFile :: Meta -> String -> PassM [Token]
|
||||
preprocessFile m filename
|
||||
= do (handle, realFilename) <- searchFile m filename
|
||||
liftIO $ putStrLn $ "Loading " ++ realFilename
|
||||
origCS <- get
|
||||
modify (\cs -> cs { csCurrentFile = realFilename })
|
||||
s <- liftIO $ hGetContents handle
|
||||
toks <- runLexer realFilename s >>= structureOccam >>= preprocessOccam
|
||||
modify (\cs -> cs { csCurrentFile = csCurrentFile origCS })
|
||||
return toks
|
||||
|
||||
-- | Preprocess a token stream.
|
||||
preprocessOccam :: [Token] -> PassM [Token]
|
||||
preprocessOccam [] = return []
|
||||
preprocessOccam ((m, TokPreprocessor ('#':s)):(_, EndOfLine):ts)
|
||||
= do func <- handleDirective m s
|
||||
rest <- preprocessOccam ts
|
||||
return $ func rest
|
||||
-- Check the above case didn't miss something.
|
||||
preprocessOccam ((_, TokPreprocessor _):_)
|
||||
= error "bad TokPreprocessor token"
|
||||
preprocessOccam (t:ts)
|
||||
= do rest <- preprocessOccam ts
|
||||
return $ t : rest
|
||||
|
||||
--{{{ preprocessor directive handlers
|
||||
type DirectiveFunc = Meta -> [String] -> PassM ([Token] -> [Token])
|
||||
|
||||
-- | Call the handler for a preprocessor directive.
|
||||
handleDirective :: Meta -> String -> PassM ([Token] -> [Token])
|
||||
handleDirective m s = lookup s directives
|
||||
where
|
||||
lookup s [] = dieP m "Unknown preprocessor directive"
|
||||
lookup s ((re, func):ds)
|
||||
= case matchRegex re s of
|
||||
Just fields -> func m fields
|
||||
Nothing -> lookup s ds
|
||||
|
||||
-- | List of handlers for preprocessor directives.
|
||||
-- `handleDirective` walks down the regexps in this list until it finds one
|
||||
-- that matches, then uses the corresponding function.
|
||||
directives :: [(Regex, DirectiveFunc)]
|
||||
directives =
|
||||
[ (mkRegex "^INCLUDE \"(.*)\"$", handleInclude)
|
||||
, (mkRegex "^USE \"(.*)\"$", handleUse)
|
||||
]
|
||||
|
||||
-- | Handle the @#INCLUDE@ directive.
|
||||
handleInclude :: DirectiveFunc
|
||||
handleInclude m [incName]
|
||||
= do toks <- preprocessFile m incName
|
||||
return (\ts -> toks ++ ts)
|
||||
|
||||
-- | Handle the @#USE@ directive.
|
||||
-- This is a bit of a hack at the moment, since it just includes the file
|
||||
-- textually.
|
||||
handleUse :: DirectiveFunc
|
||||
handleUse m [modName]
|
||||
= do let incName = mangleModName modName
|
||||
cs <- get
|
||||
put $ cs { csUsedFiles = Set.insert incName (csUsedFiles cs) }
|
||||
if Set.member incName (csUsedFiles cs)
|
||||
then return id
|
||||
else handleInclude m [incName]
|
||||
where
|
||||
-- | If a module name doesn't already have a suffix, add one.
|
||||
mangleModName :: String -> String
|
||||
mangleModName mod
|
||||
= if ".occ" `isSuffixOf` mod || ".inc" `isSuffixOf` mod
|
||||
then mod
|
||||
else mod ++ ".occ"
|
||||
--}}}
|
||||
|
||||
-- | Main function for testing.
|
||||
main :: IO ()
|
||||
main
|
||||
= do (arg:_) <- getArgs
|
||||
v <- evalStateT (runErrorT (test arg)) emptyState
|
||||
case v of
|
||||
Left e -> dieIO e
|
||||
Right r -> return ()
|
||||
where
|
||||
test :: String -> PassM ()
|
||||
test fn
|
||||
= do tokens <- preprocessFile emptyMeta fn
|
||||
liftIO $ putStrLn $ pshow tokens
|
||||
|
|
@ -19,17 +19,12 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
-- | Analyse syntactic structure of occam code.
|
||||
module StructureOccam where
|
||||
|
||||
import Control.Monad.Error
|
||||
import Control.Monad.State
|
||||
import Data.Generics
|
||||
import System
|
||||
|
||||
import CompState
|
||||
import Errors
|
||||
import LexOccam
|
||||
import Metadata
|
||||
import Pass
|
||||
import PrettyShow
|
||||
|
||||
-- | Given the output of the lexer for a single file, add `Indent`, `Outdent`
|
||||
-- and `EndOfLine` markers.
|
||||
|
@ -39,12 +34,12 @@ structureOccam ts = analyse 1 firstLine ts
|
|||
where
|
||||
-- Find the first line that's actually got something on it.
|
||||
firstLine
|
||||
= case ts of (Token _ m _:_) -> metaLine m
|
||||
= case ts of ((m, _):_) -> metaLine m
|
||||
|
||||
analyse :: Int -> Int -> [Token] -> PassM [Token]
|
||||
-- Add extra EndOfLine at the end of the file.
|
||||
analyse _ _ [] = return [EndOfLine]
|
||||
analyse prevCol prevLine (t@(Token _ m _):ts)
|
||||
analyse _ _ [] = return [(emptyMeta, EndOfLine)]
|
||||
analyse prevCol prevLine (t@(m, tokType):ts)
|
||||
= if line /= prevLine
|
||||
then do rest <- analyse col line ts
|
||||
newLine $ t : rest
|
||||
|
@ -56,29 +51,19 @@ structureOccam ts = analyse 1 firstLine ts
|
|||
|
||||
-- A new line -- look to see what's going on with the indentation.
|
||||
newLine rest
|
||||
| col == prevCol + 2 = return $ EndOfLine : Indent : rest
|
||||
| col == prevCol + 2 = withEOL $ (m, Indent) : rest
|
||||
-- FIXME: If col > prevCol, then look to see if there's a VALOF
|
||||
-- coming up before the next column change...
|
||||
| col < prevCol
|
||||
= if (prevCol - col) `mod` 2 == 0
|
||||
then return $ EndOfLine : (replicate steps Outdent ++ rest)
|
||||
else dieP m "Invalid indentation"
|
||||
| col == prevCol = return $ EndOfLine : rest
|
||||
| otherwise = dieP m "Invalid indentation"
|
||||
then withEOL $ replicate steps (m, Outdent) ++ rest
|
||||
else bad
|
||||
| col == prevCol = withEOL rest
|
||||
| otherwise = bad
|
||||
where
|
||||
steps = (prevCol - col) `div` 2
|
||||
|
||||
-- | Main function for testing.
|
||||
main :: IO ()
|
||||
main
|
||||
= do (arg:_) <- getArgs
|
||||
s <- readFile arg
|
||||
e <- evalStateT (runErrorT (test arg s)) emptyState
|
||||
return ()
|
||||
where
|
||||
test :: String -> String -> PassM ()
|
||||
test arg s
|
||||
= do tokens <- runLexer arg s
|
||||
tokens' <- structureOccam tokens
|
||||
liftIO $ putStrLn $ pshow tokens'
|
||||
bad = dieP m "Invalid indentation"
|
||||
-- This is actually the position at which the new line starts
|
||||
-- rather than the end of the previous line.
|
||||
withEOL ts = return $ (m, EndOfLine) : ts
|
||||
|
||||
|
|
8
Utils.hs
8
Utils.hs
|
@ -20,6 +20,9 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
-- that could be put into the standard library.
|
||||
module Utils where
|
||||
|
||||
import Control.Monad
|
||||
import System.IO
|
||||
import System.IO.Error
|
||||
import Text.Regex
|
||||
|
||||
-- | Split the directory and file components of a path.
|
||||
|
@ -58,3 +61,8 @@ transformEither :: (a -> c) -> (b -> d) -> Either a b -> Either c d
|
|||
transformEither funcLeft funcRight x = case x of
|
||||
Left l -> Left (funcLeft l)
|
||||
Right r -> Right (funcRight r)
|
||||
|
||||
-- | Try an IO operation, returning `Nothing` if it fails.
|
||||
maybeIO :: IO a -> IO (Maybe a)
|
||||
maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user