Add a preprocessor that works with the new lexer.
The parser now gets a stream of tokens, rather than needing to worry about loading files itself. This also reworks the lexer's idea of what constitutes a Token -- it's now a pair (Meta, TokenType), so it's always easy to pull out/rewrite the metadata -- and adds proper support for lexing preprocessor directives, rather than just treating them as reserved words.
This commit is contained in:
parent
a640dabc04
commit
223a14d767
10
CompState.hs
10
CompState.hs
|
@ -22,6 +22,7 @@ module CompState where
|
||||||
import Data.Generics
|
import Data.Generics
|
||||||
import Data.Map (Map)
|
import Data.Map (Map)
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
|
import qualified Data.Set as Set
|
||||||
import Control.Monad.State
|
import Control.Monad.State
|
||||||
|
|
||||||
import qualified AST as A
|
import qualified AST as A
|
||||||
|
@ -49,7 +50,11 @@ data CompState = CompState {
|
||||||
csVerboseLevel :: Int,
|
csVerboseLevel :: Int,
|
||||||
csOutputFile :: String,
|
csOutputFile :: String,
|
||||||
|
|
||||||
-- Set by preprocessor
|
-- Set by (new) preprocessor
|
||||||
|
csCurrentFile :: String,
|
||||||
|
csUsedFiles :: Set.Set String,
|
||||||
|
|
||||||
|
-- Set by (old) preprocessor
|
||||||
csSourceFiles :: Map String String,
|
csSourceFiles :: Map String String,
|
||||||
csIndentLinesIn :: [String],
|
csIndentLinesIn :: [String],
|
||||||
csIndentLinesOut :: [String],
|
csIndentLinesOut :: [String],
|
||||||
|
@ -85,6 +90,9 @@ emptyState = CompState {
|
||||||
csVerboseLevel = 0,
|
csVerboseLevel = 0,
|
||||||
csOutputFile = "-",
|
csOutputFile = "-",
|
||||||
|
|
||||||
|
csCurrentFile = "none",
|
||||||
|
csUsedFiles = Set.empty,
|
||||||
|
|
||||||
csSourceFiles = Map.empty,
|
csSourceFiles = Map.empty,
|
||||||
csIndentLinesIn = [],
|
csIndentLinesIn = [],
|
||||||
csIndentLinesOut = [],
|
csIndentLinesOut = [],
|
||||||
|
|
36
LexOccam.x
36
LexOccam.x
|
@ -31,6 +31,8 @@ import Pass
|
||||||
$decimalDigit = [0-9]
|
$decimalDigit = [0-9]
|
||||||
$hexDigit = [0-9 a-f A-F]
|
$hexDigit = [0-9 a-f A-F]
|
||||||
|
|
||||||
|
@preprocessor = "#" [^\n]*
|
||||||
|
|
||||||
@reserved = "[" | "]" | "(" | ")"
|
@reserved = "[" | "]" | "(" | ")"
|
||||||
| "::" | ":=" | ":" | "," | ";" | "&"
|
| "::" | ":=" | ":" | "," | ";" | "&"
|
||||||
| "?" | "!" | "="
|
| "?" | "!" | "="
|
||||||
|
@ -60,7 +62,6 @@ $hexDigit = [0-9 a-f A-F]
|
||||||
| "VAL" | "VALOF"
|
| "VAL" | "VALOF"
|
||||||
| "WHILE" | "WORKSPACE"
|
| "WHILE" | "WORKSPACE"
|
||||||
| "VECSPACE"
|
| "VECSPACE"
|
||||||
| "#INCLUDE" | "#USE"
|
|
||||||
|
|
||||||
@identifier = [a-z A-Z] [a-z A-Z 0-9 \.]*
|
@identifier = [a-z A-Z] [a-z A-Z 0-9 \.]*
|
||||||
|
|
||||||
|
@ -80,6 +81,8 @@ $escapeChar = [cnrts \" \' \* \n]
|
||||||
|
|
||||||
occam :-
|
occam :-
|
||||||
|
|
||||||
|
@preprocessor { mkToken TokPreprocessor }
|
||||||
|
|
||||||
-- Ignore whitespace and comments.
|
-- Ignore whitespace and comments.
|
||||||
$white+ ;
|
$white+ ;
|
||||||
"--" [^\n]* ;
|
"--" [^\n]* ;
|
||||||
|
@ -95,28 +98,31 @@ $white+ ;
|
||||||
@realLiteral { mkToken TokRealLiteral }
|
@realLiteral { mkToken TokRealLiteral }
|
||||||
|
|
||||||
{
|
{
|
||||||
|
-- | An occam source token and its position.
|
||||||
|
type Token = (Meta, TokenType)
|
||||||
|
|
||||||
-- | An occam source token.
|
-- | An occam source token.
|
||||||
-- Only `Token` is generated by the lexer itself; the others are added later
|
-- Only `Token` is generated by the lexer itself; the others are added later
|
||||||
-- once the indentation has been analysed.
|
-- once the indentation has been analysed.
|
||||||
data Token =
|
data TokenType =
|
||||||
Token TokenType Meta String -- ^ A real token read from the source
|
TokReserved String -- ^ A reserved word or symbol
|
||||||
|
| TokIdentifier String
|
||||||
|
| TokStringLiteral String
|
||||||
|
| TokCharLiteral String
|
||||||
|
| TokDecimalLiteral String
|
||||||
|
| TokHexLiteral String
|
||||||
|
| TokRealLiteral String
|
||||||
|
| TokPreprocessor String
|
||||||
| Indent -- ^ Indentation increase
|
| Indent -- ^ Indentation increase
|
||||||
| Outdent -- ^ Indentation decrease
|
| Outdent -- ^ Indentation decrease
|
||||||
| EndOfLine -- ^ End of line
|
| EndOfLine -- ^ End of line
|
||||||
deriving (Show, Eq, Typeable, Data)
|
deriving (Show, Eq, Typeable, Data)
|
||||||
|
|
||||||
-- | The type of a source token.
|
|
||||||
data TokenType =
|
|
||||||
TokReserved | TokIdentifier
|
|
||||||
| TokStringLiteral | TokCharLiteral
|
|
||||||
| TokDecimalLiteral | TokHexLiteral | TokRealLiteral
|
|
||||||
deriving (Show, Eq, Typeable, Data)
|
|
||||||
|
|
||||||
-- | Build a lexer rule for a token.
|
-- | Build a lexer rule for a token.
|
||||||
mkToken :: TokenType -> AlexPosn -> String -> Token
|
mkToken :: (String -> TokenType) -> AlexPosn -> String -> Token
|
||||||
mkToken tt (AlexPn _ line col) s = Token tt emptyMeta s
|
mkToken cons _ s = (emptyMeta, cons s)
|
||||||
|
|
||||||
-- | Run the lexer, returning either an error position or a list of tokens.
|
-- | Run the lexer, returning a list of tokens.
|
||||||
-- (This is based on the `alexScanTokens` function that Alex provides.)
|
-- (This is based on the `alexScanTokens` function that Alex provides.)
|
||||||
runLexer :: String -> String -> PassM [Token]
|
runLexer :: String -> String -> PassM [Token]
|
||||||
runLexer filename str = go (alexStartPos, '\n', str)
|
runLexer filename str = go (alexStartPos, '\n', str)
|
||||||
|
@ -128,8 +134,8 @@ runLexer filename str = go (alexStartPos, '\n', str)
|
||||||
AlexSkip inp' len -> go inp'
|
AlexSkip inp' len -> go inp'
|
||||||
AlexToken inp' len act ->
|
AlexToken inp' len act ->
|
||||||
do ts <- go inp'
|
do ts <- go inp'
|
||||||
let (Token tt _ s) = act pos (take len str)
|
let t = act pos (take len str)
|
||||||
return $ (Token tt meta s) : ts
|
return $ (meta, snd t) : ts
|
||||||
|
|
||||||
where
|
where
|
||||||
meta = emptyMeta {
|
meta = emptyMeta {
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -21,7 +21,7 @@ tocktest: $(sources)
|
||||||
ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain
|
ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain
|
||||||
|
|
||||||
lextest: $(sources)
|
lextest: $(sources)
|
||||||
ghc $(ghc_opts) -o lextest -main-is StructureOccam --make StructureOccam
|
ghc $(ghc_opts) -o lextest -main-is PreprocessOccam --make PreprocessOccam
|
||||||
|
|
||||||
CFLAGS = \
|
CFLAGS = \
|
||||||
-O2 \
|
-O2 \
|
||||||
|
|
145
PreprocessOccam.hs
Normal file
145
PreprocessOccam.hs
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
{-
|
||||||
|
Tock: a compiler for parallel languages
|
||||||
|
Copyright (C) 2007 University of Kent
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU General Public License as published by the
|
||||||
|
Free Software Foundation, either version 2 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
-}
|
||||||
|
|
||||||
|
-- | Preprocess occam code.
|
||||||
|
module PreprocessOccam where
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import Text.Regex
|
||||||
|
|
||||||
|
import Errors
|
||||||
|
import LexOccam
|
||||||
|
import Metadata
|
||||||
|
import Pass
|
||||||
|
import StructureOccam
|
||||||
|
import Utils
|
||||||
|
|
||||||
|
import CompState
|
||||||
|
import Control.Monad.Error
|
||||||
|
import Control.Monad.State
|
||||||
|
import System
|
||||||
|
import System.IO
|
||||||
|
import PrettyShow
|
||||||
|
|
||||||
|
-- | Open an included file, looking for it in the search path.
|
||||||
|
-- Return the open filehandle and the location of the file.
|
||||||
|
-- FIXME: This doesn't actually look at the search path yet.
|
||||||
|
searchFile :: Meta -> String -> PassM (Handle, String)
|
||||||
|
searchFile m filename
|
||||||
|
= do cs <- get
|
||||||
|
let currentFile = csCurrentFile cs
|
||||||
|
let possibilities = [joinPath currentFile filename]
|
||||||
|
openOneOf possibilities
|
||||||
|
where
|
||||||
|
openOneOf :: [String] -> PassM (Handle, String)
|
||||||
|
openOneOf [] = dieP m $ "Unable to find " ++ filename
|
||||||
|
openOneOf (fn:fns)
|
||||||
|
= do r <- liftIO $ maybeIO $ openFile fn ReadMode
|
||||||
|
case r of
|
||||||
|
Just h -> return (h, fn)
|
||||||
|
Nothing -> openOneOf fns
|
||||||
|
|
||||||
|
-- | Preprocess a file and return its tokenised form ready for parsing.
|
||||||
|
preprocessFile :: Meta -> String -> PassM [Token]
|
||||||
|
preprocessFile m filename
|
||||||
|
= do (handle, realFilename) <- searchFile m filename
|
||||||
|
liftIO $ putStrLn $ "Loading " ++ realFilename
|
||||||
|
origCS <- get
|
||||||
|
modify (\cs -> cs { csCurrentFile = realFilename })
|
||||||
|
s <- liftIO $ hGetContents handle
|
||||||
|
toks <- runLexer realFilename s >>= structureOccam >>= preprocessOccam
|
||||||
|
modify (\cs -> cs { csCurrentFile = csCurrentFile origCS })
|
||||||
|
return toks
|
||||||
|
|
||||||
|
-- | Preprocess a token stream.
|
||||||
|
preprocessOccam :: [Token] -> PassM [Token]
|
||||||
|
preprocessOccam [] = return []
|
||||||
|
preprocessOccam ((m, TokPreprocessor ('#':s)):(_, EndOfLine):ts)
|
||||||
|
= do func <- handleDirective m s
|
||||||
|
rest <- preprocessOccam ts
|
||||||
|
return $ func rest
|
||||||
|
-- Check the above case didn't miss something.
|
||||||
|
preprocessOccam ((_, TokPreprocessor _):_)
|
||||||
|
= error "bad TokPreprocessor token"
|
||||||
|
preprocessOccam (t:ts)
|
||||||
|
= do rest <- preprocessOccam ts
|
||||||
|
return $ t : rest
|
||||||
|
|
||||||
|
--{{{ preprocessor directive handlers
|
||||||
|
type DirectiveFunc = Meta -> [String] -> PassM ([Token] -> [Token])
|
||||||
|
|
||||||
|
-- | Call the handler for a preprocessor directive.
|
||||||
|
handleDirective :: Meta -> String -> PassM ([Token] -> [Token])
|
||||||
|
handleDirective m s = lookup s directives
|
||||||
|
where
|
||||||
|
lookup s [] = dieP m "Unknown preprocessor directive"
|
||||||
|
lookup s ((re, func):ds)
|
||||||
|
= case matchRegex re s of
|
||||||
|
Just fields -> func m fields
|
||||||
|
Nothing -> lookup s ds
|
||||||
|
|
||||||
|
-- | List of handlers for preprocessor directives.
|
||||||
|
-- `handleDirective` walks down the regexps in this list until it finds one
|
||||||
|
-- that matches, then uses the corresponding function.
|
||||||
|
directives :: [(Regex, DirectiveFunc)]
|
||||||
|
directives =
|
||||||
|
[ (mkRegex "^INCLUDE \"(.*)\"$", handleInclude)
|
||||||
|
, (mkRegex "^USE \"(.*)\"$", handleUse)
|
||||||
|
]
|
||||||
|
|
||||||
|
-- | Handle the @#INCLUDE@ directive.
|
||||||
|
handleInclude :: DirectiveFunc
|
||||||
|
handleInclude m [incName]
|
||||||
|
= do toks <- preprocessFile m incName
|
||||||
|
return (\ts -> toks ++ ts)
|
||||||
|
|
||||||
|
-- | Handle the @#USE@ directive.
|
||||||
|
-- This is a bit of a hack at the moment, since it just includes the file
|
||||||
|
-- textually.
|
||||||
|
handleUse :: DirectiveFunc
|
||||||
|
handleUse m [modName]
|
||||||
|
= do let incName = mangleModName modName
|
||||||
|
cs <- get
|
||||||
|
put $ cs { csUsedFiles = Set.insert incName (csUsedFiles cs) }
|
||||||
|
if Set.member incName (csUsedFiles cs)
|
||||||
|
then return id
|
||||||
|
else handleInclude m [incName]
|
||||||
|
where
|
||||||
|
-- | If a module name doesn't already have a suffix, add one.
|
||||||
|
mangleModName :: String -> String
|
||||||
|
mangleModName mod
|
||||||
|
= if ".occ" `isSuffixOf` mod || ".inc" `isSuffixOf` mod
|
||||||
|
then mod
|
||||||
|
else mod ++ ".occ"
|
||||||
|
--}}}
|
||||||
|
|
||||||
|
-- | Main function for testing.
|
||||||
|
main :: IO ()
|
||||||
|
main
|
||||||
|
= do (arg:_) <- getArgs
|
||||||
|
v <- evalStateT (runErrorT (test arg)) emptyState
|
||||||
|
case v of
|
||||||
|
Left e -> dieIO e
|
||||||
|
Right r -> return ()
|
||||||
|
where
|
||||||
|
test :: String -> PassM ()
|
||||||
|
test fn
|
||||||
|
= do tokens <- preprocessFile emptyMeta fn
|
||||||
|
liftIO $ putStrLn $ pshow tokens
|
||||||
|
|
|
@ -19,17 +19,12 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
-- | Analyse syntactic structure of occam code.
|
-- | Analyse syntactic structure of occam code.
|
||||||
module StructureOccam where
|
module StructureOccam where
|
||||||
|
|
||||||
import Control.Monad.Error
|
|
||||||
import Control.Monad.State
|
|
||||||
import Data.Generics
|
import Data.Generics
|
||||||
import System
|
|
||||||
|
|
||||||
import CompState
|
|
||||||
import Errors
|
import Errors
|
||||||
import LexOccam
|
import LexOccam
|
||||||
import Metadata
|
import Metadata
|
||||||
import Pass
|
import Pass
|
||||||
import PrettyShow
|
|
||||||
|
|
||||||
-- | Given the output of the lexer for a single file, add `Indent`, `Outdent`
|
-- | Given the output of the lexer for a single file, add `Indent`, `Outdent`
|
||||||
-- and `EndOfLine` markers.
|
-- and `EndOfLine` markers.
|
||||||
|
@ -39,12 +34,12 @@ structureOccam ts = analyse 1 firstLine ts
|
||||||
where
|
where
|
||||||
-- Find the first line that's actually got something on it.
|
-- Find the first line that's actually got something on it.
|
||||||
firstLine
|
firstLine
|
||||||
= case ts of (Token _ m _:_) -> metaLine m
|
= case ts of ((m, _):_) -> metaLine m
|
||||||
|
|
||||||
analyse :: Int -> Int -> [Token] -> PassM [Token]
|
analyse :: Int -> Int -> [Token] -> PassM [Token]
|
||||||
-- Add extra EndOfLine at the end of the file.
|
-- Add extra EndOfLine at the end of the file.
|
||||||
analyse _ _ [] = return [EndOfLine]
|
analyse _ _ [] = return [(emptyMeta, EndOfLine)]
|
||||||
analyse prevCol prevLine (t@(Token _ m _):ts)
|
analyse prevCol prevLine (t@(m, tokType):ts)
|
||||||
= if line /= prevLine
|
= if line /= prevLine
|
||||||
then do rest <- analyse col line ts
|
then do rest <- analyse col line ts
|
||||||
newLine $ t : rest
|
newLine $ t : rest
|
||||||
|
@ -56,29 +51,19 @@ structureOccam ts = analyse 1 firstLine ts
|
||||||
|
|
||||||
-- A new line -- look to see what's going on with the indentation.
|
-- A new line -- look to see what's going on with the indentation.
|
||||||
newLine rest
|
newLine rest
|
||||||
| col == prevCol + 2 = return $ EndOfLine : Indent : rest
|
| col == prevCol + 2 = withEOL $ (m, Indent) : rest
|
||||||
-- FIXME: If col > prevCol, then look to see if there's a VALOF
|
-- FIXME: If col > prevCol, then look to see if there's a VALOF
|
||||||
-- coming up before the next column change...
|
-- coming up before the next column change...
|
||||||
| col < prevCol
|
| col < prevCol
|
||||||
= if (prevCol - col) `mod` 2 == 0
|
= if (prevCol - col) `mod` 2 == 0
|
||||||
then return $ EndOfLine : (replicate steps Outdent ++ rest)
|
then withEOL $ replicate steps (m, Outdent) ++ rest
|
||||||
else dieP m "Invalid indentation"
|
else bad
|
||||||
| col == prevCol = return $ EndOfLine : rest
|
| col == prevCol = withEOL rest
|
||||||
| otherwise = dieP m "Invalid indentation"
|
| otherwise = bad
|
||||||
where
|
where
|
||||||
steps = (prevCol - col) `div` 2
|
steps = (prevCol - col) `div` 2
|
||||||
|
bad = dieP m "Invalid indentation"
|
||||||
-- | Main function for testing.
|
-- This is actually the position at which the new line starts
|
||||||
main :: IO ()
|
-- rather than the end of the previous line.
|
||||||
main
|
withEOL ts = return $ (m, EndOfLine) : ts
|
||||||
= do (arg:_) <- getArgs
|
|
||||||
s <- readFile arg
|
|
||||||
e <- evalStateT (runErrorT (test arg s)) emptyState
|
|
||||||
return ()
|
|
||||||
where
|
|
||||||
test :: String -> String -> PassM ()
|
|
||||||
test arg s
|
|
||||||
= do tokens <- runLexer arg s
|
|
||||||
tokens' <- structureOccam tokens
|
|
||||||
liftIO $ putStrLn $ pshow tokens'
|
|
||||||
|
|
||||||
|
|
8
Utils.hs
8
Utils.hs
|
@ -20,6 +20,9 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
-- that could be put into the standard library.
|
-- that could be put into the standard library.
|
||||||
module Utils where
|
module Utils where
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import System.IO
|
||||||
|
import System.IO.Error
|
||||||
import Text.Regex
|
import Text.Regex
|
||||||
|
|
||||||
-- | Split the directory and file components of a path.
|
-- | Split the directory and file components of a path.
|
||||||
|
@ -58,3 +61,8 @@ transformEither :: (a -> c) -> (b -> d) -> Either a b -> Either c d
|
||||||
transformEither funcLeft funcRight x = case x of
|
transformEither funcLeft funcRight x = case x of
|
||||||
Left l -> Left (funcLeft l)
|
Left l -> Left (funcLeft l)
|
||||||
Right r -> Right (funcRight r)
|
Right r -> Right (funcRight r)
|
||||||
|
|
||||||
|
-- | Try an IO operation, returning `Nothing` if it fails.
|
||||||
|
maybeIO :: IO a -> IO (Maybe a)
|
||||||
|
maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user