From 223a14d767508f6ac211da0a65c02eaade64db0c Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Mon, 20 Aug 2007 23:46:57 +0000 Subject: [PATCH] Add a preprocessor that works with the new lexer. The parser now gets a stream of tokens, rather than needing to worry about loading files itself. This also reworks the lexer's idea of what constitutes a Token -- it's now a pair (Meta, TokenType), so it's always easy to pull out/rewrite the metadata -- and adds proper support for lexing preprocessor directives, rather than just treating them as reserved words. --- CompState.hs | 10 +++- LexOccam.x | 36 ++++++----- Makefile | 2 +- PreprocessOccam.hs | 145 +++++++++++++++++++++++++++++++++++++++++++++ StructureOccam.hs | 39 ++++-------- Utils.hs | 8 +++ 6 files changed, 196 insertions(+), 44 deletions(-) create mode 100644 PreprocessOccam.hs diff --git a/CompState.hs b/CompState.hs index d0f863a..d2d49b6 100644 --- a/CompState.hs +++ b/CompState.hs @@ -22,6 +22,7 @@ module CompState where import Data.Generics import Data.Map (Map) import qualified Data.Map as Map +import qualified Data.Set as Set import Control.Monad.State import qualified AST as A @@ -49,7 +50,11 @@ data CompState = CompState { csVerboseLevel :: Int, csOutputFile :: String, - -- Set by preprocessor + -- Set by (new) preprocessor + csCurrentFile :: String, + csUsedFiles :: Set.Set String, + + -- Set by (old) preprocessor csSourceFiles :: Map String String, csIndentLinesIn :: [String], csIndentLinesOut :: [String], @@ -85,6 +90,9 @@ emptyState = CompState { csVerboseLevel = 0, csOutputFile = "-", + csCurrentFile = "none", + csUsedFiles = Set.empty, + csSourceFiles = Map.empty, csIndentLinesIn = [], csIndentLinesOut = [], diff --git a/LexOccam.x b/LexOccam.x index c3555b9..f8da74e 100644 --- a/LexOccam.x +++ b/LexOccam.x @@ -31,6 +31,8 @@ import Pass $decimalDigit = [0-9] $hexDigit = [0-9 a-f A-F] +@preprocessor = "#" [^\n]* + @reserved = "[" | "]" | "(" | ")" | "::" | ":=" | ":" | "," | ";" | "&" | "?" | "!" | "=" @@ -60,7 +62,6 @@ $hexDigit = [0-9 a-f A-F] | "VAL" | "VALOF" | "WHILE" | "WORKSPACE" | "VECSPACE" - | "#INCLUDE" | "#USE" @identifier = [a-z A-Z] [a-z A-Z 0-9 \.]* @@ -80,6 +81,8 @@ $escapeChar = [cnrts \" \' \* \n] occam :- +@preprocessor { mkToken TokPreprocessor } + -- Ignore whitespace and comments. $white+ ; "--" [^\n]* ; @@ -95,28 +98,31 @@ $white+ ; @realLiteral { mkToken TokRealLiteral } { +-- | An occam source token and its position. +type Token = (Meta, TokenType) + -- | An occam source token. -- Only `Token` is generated by the lexer itself; the others are added later -- once the indentation has been analysed. -data Token = - Token TokenType Meta String -- ^ A real token read from the source +data TokenType = + TokReserved String -- ^ A reserved word or symbol + | TokIdentifier String + | TokStringLiteral String + | TokCharLiteral String + | TokDecimalLiteral String + | TokHexLiteral String + | TokRealLiteral String + | TokPreprocessor String | Indent -- ^ Indentation increase | Outdent -- ^ Indentation decrease | EndOfLine -- ^ End of line deriving (Show, Eq, Typeable, Data) --- | The type of a source token. -data TokenType = - TokReserved | TokIdentifier - | TokStringLiteral | TokCharLiteral - | TokDecimalLiteral | TokHexLiteral | TokRealLiteral - deriving (Show, Eq, Typeable, Data) - -- | Build a lexer rule for a token. -mkToken :: TokenType -> AlexPosn -> String -> Token -mkToken tt (AlexPn _ line col) s = Token tt emptyMeta s +mkToken :: (String -> TokenType) -> AlexPosn -> String -> Token +mkToken cons _ s = (emptyMeta, cons s) --- | Run the lexer, returning either an error position or a list of tokens. +-- | Run the lexer, returning a list of tokens. -- (This is based on the `alexScanTokens` function that Alex provides.) runLexer :: String -> String -> PassM [Token] runLexer filename str = go (alexStartPos, '\n', str) @@ -128,8 +134,8 @@ runLexer filename str = go (alexStartPos, '\n', str) AlexSkip inp' len -> go inp' AlexToken inp' len act -> do ts <- go inp' - let (Token tt _ s) = act pos (take len str) - return $ (Token tt meta s) : ts + let t = act pos (take len str) + return $ (meta, snd t) : ts where meta = emptyMeta { diff --git a/Makefile b/Makefile index 1cf380c..7657a4f 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ tocktest: $(sources) ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain lextest: $(sources) - ghc $(ghc_opts) -o lextest -main-is StructureOccam --make StructureOccam + ghc $(ghc_opts) -o lextest -main-is PreprocessOccam --make PreprocessOccam CFLAGS = \ -O2 \ diff --git a/PreprocessOccam.hs b/PreprocessOccam.hs new file mode 100644 index 0000000..836a446 --- /dev/null +++ b/PreprocessOccam.hs @@ -0,0 +1,145 @@ +{- +Tock: a compiler for parallel languages +Copyright (C) 2007 University of Kent + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation, either version 2 of the License, or (at your +option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program. If not, see . +-} + +-- | Preprocess occam code. +module PreprocessOccam where + +import Data.List +import qualified Data.Set as Set +import Text.Regex + +import Errors +import LexOccam +import Metadata +import Pass +import StructureOccam +import Utils + +import CompState +import Control.Monad.Error +import Control.Monad.State +import System +import System.IO +import PrettyShow + +-- | Open an included file, looking for it in the search path. +-- Return the open filehandle and the location of the file. +-- FIXME: This doesn't actually look at the search path yet. +searchFile :: Meta -> String -> PassM (Handle, String) +searchFile m filename + = do cs <- get + let currentFile = csCurrentFile cs + let possibilities = [joinPath currentFile filename] + openOneOf possibilities + where + openOneOf :: [String] -> PassM (Handle, String) + openOneOf [] = dieP m $ "Unable to find " ++ filename + openOneOf (fn:fns) + = do r <- liftIO $ maybeIO $ openFile fn ReadMode + case r of + Just h -> return (h, fn) + Nothing -> openOneOf fns + +-- | Preprocess a file and return its tokenised form ready for parsing. +preprocessFile :: Meta -> String -> PassM [Token] +preprocessFile m filename + = do (handle, realFilename) <- searchFile m filename + liftIO $ putStrLn $ "Loading " ++ realFilename + origCS <- get + modify (\cs -> cs { csCurrentFile = realFilename }) + s <- liftIO $ hGetContents handle + toks <- runLexer realFilename s >>= structureOccam >>= preprocessOccam + modify (\cs -> cs { csCurrentFile = csCurrentFile origCS }) + return toks + +-- | Preprocess a token stream. +preprocessOccam :: [Token] -> PassM [Token] +preprocessOccam [] = return [] +preprocessOccam ((m, TokPreprocessor ('#':s)):(_, EndOfLine):ts) + = do func <- handleDirective m s + rest <- preprocessOccam ts + return $ func rest +-- Check the above case didn't miss something. +preprocessOccam ((_, TokPreprocessor _):_) + = error "bad TokPreprocessor token" +preprocessOccam (t:ts) + = do rest <- preprocessOccam ts + return $ t : rest + +--{{{ preprocessor directive handlers +type DirectiveFunc = Meta -> [String] -> PassM ([Token] -> [Token]) + +-- | Call the handler for a preprocessor directive. +handleDirective :: Meta -> String -> PassM ([Token] -> [Token]) +handleDirective m s = lookup s directives + where + lookup s [] = dieP m "Unknown preprocessor directive" + lookup s ((re, func):ds) + = case matchRegex re s of + Just fields -> func m fields + Nothing -> lookup s ds + +-- | List of handlers for preprocessor directives. +-- `handleDirective` walks down the regexps in this list until it finds one +-- that matches, then uses the corresponding function. +directives :: [(Regex, DirectiveFunc)] +directives = + [ (mkRegex "^INCLUDE \"(.*)\"$", handleInclude) + , (mkRegex "^USE \"(.*)\"$", handleUse) + ] + +-- | Handle the @#INCLUDE@ directive. +handleInclude :: DirectiveFunc +handleInclude m [incName] + = do toks <- preprocessFile m incName + return (\ts -> toks ++ ts) + +-- | Handle the @#USE@ directive. +-- This is a bit of a hack at the moment, since it just includes the file +-- textually. +handleUse :: DirectiveFunc +handleUse m [modName] + = do let incName = mangleModName modName + cs <- get + put $ cs { csUsedFiles = Set.insert incName (csUsedFiles cs) } + if Set.member incName (csUsedFiles cs) + then return id + else handleInclude m [incName] + where + -- | If a module name doesn't already have a suffix, add one. + mangleModName :: String -> String + mangleModName mod + = if ".occ" `isSuffixOf` mod || ".inc" `isSuffixOf` mod + then mod + else mod ++ ".occ" +--}}} + +-- | Main function for testing. +main :: IO () +main + = do (arg:_) <- getArgs + v <- evalStateT (runErrorT (test arg)) emptyState + case v of + Left e -> dieIO e + Right r -> return () + where + test :: String -> PassM () + test fn + = do tokens <- preprocessFile emptyMeta fn + liftIO $ putStrLn $ pshow tokens + diff --git a/StructureOccam.hs b/StructureOccam.hs index 10341ee..c887665 100644 --- a/StructureOccam.hs +++ b/StructureOccam.hs @@ -19,17 +19,12 @@ with this program. If not, see . -- | Analyse syntactic structure of occam code. module StructureOccam where -import Control.Monad.Error -import Control.Monad.State import Data.Generics -import System -import CompState import Errors import LexOccam import Metadata import Pass -import PrettyShow -- | Given the output of the lexer for a single file, add `Indent`, `Outdent` -- and `EndOfLine` markers. @@ -39,12 +34,12 @@ structureOccam ts = analyse 1 firstLine ts where -- Find the first line that's actually got something on it. firstLine - = case ts of (Token _ m _:_) -> metaLine m + = case ts of ((m, _):_) -> metaLine m analyse :: Int -> Int -> [Token] -> PassM [Token] -- Add extra EndOfLine at the end of the file. - analyse _ _ [] = return [EndOfLine] - analyse prevCol prevLine (t@(Token _ m _):ts) + analyse _ _ [] = return [(emptyMeta, EndOfLine)] + analyse prevCol prevLine (t@(m, tokType):ts) = if line /= prevLine then do rest <- analyse col line ts newLine $ t : rest @@ -56,29 +51,19 @@ structureOccam ts = analyse 1 firstLine ts -- A new line -- look to see what's going on with the indentation. newLine rest - | col == prevCol + 2 = return $ EndOfLine : Indent : rest + | col == prevCol + 2 = withEOL $ (m, Indent) : rest -- FIXME: If col > prevCol, then look to see if there's a VALOF -- coming up before the next column change... | col < prevCol = if (prevCol - col) `mod` 2 == 0 - then return $ EndOfLine : (replicate steps Outdent ++ rest) - else dieP m "Invalid indentation" - | col == prevCol = return $ EndOfLine : rest - | otherwise = dieP m "Invalid indentation" + then withEOL $ replicate steps (m, Outdent) ++ rest + else bad + | col == prevCol = withEOL rest + | otherwise = bad where steps = (prevCol - col) `div` 2 - --- | Main function for testing. -main :: IO () -main - = do (arg:_) <- getArgs - s <- readFile arg - e <- evalStateT (runErrorT (test arg s)) emptyState - return () - where - test :: String -> String -> PassM () - test arg s - = do tokens <- runLexer arg s - tokens' <- structureOccam tokens - liftIO $ putStrLn $ pshow tokens' + bad = dieP m "Invalid indentation" + -- This is actually the position at which the new line starts + -- rather than the end of the previous line. + withEOL ts = return $ (m, EndOfLine) : ts diff --git a/Utils.hs b/Utils.hs index 31a823b..dc81930 100644 --- a/Utils.hs +++ b/Utils.hs @@ -20,6 +20,9 @@ with this program. If not, see . -- that could be put into the standard library. module Utils where +import Control.Monad +import System.IO +import System.IO.Error import Text.Regex -- | Split the directory and file components of a path. @@ -58,3 +61,8 @@ transformEither :: (a -> c) -> (b -> d) -> Either a b -> Either c d transformEither funcLeft funcRight x = case x of Left l -> Left (funcLeft l) Right r -> Right (funcRight r) + +-- | Try an IO operation, returning `Nothing` if it fails. +maybeIO :: IO a -> IO (Maybe a) +maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing) +