Add a preprocessor that works with the new lexer.

The parser now gets a stream of tokens, rather than needing to worry about loading files itself. This also reworks the lexer's idea of what constitutes a Token -- it's now a pair (Meta, TokenType), so it's always easy to pull out/rewrite the metadata -- and adds proper support for lexing preprocessor directives, rather than just treating them as reserved words.
2007-08-20 23:46:57 +00:00 · 2007-08-20 23:46:57 +00:00 · 223a14d767
commit 223a14d767
parent a640dabc04
6 changed files with 196 additions and 44 deletions
--- a/CompState.hs
+++ b/CompState.hs
@ -22,6 +22,7 @@ module CompState where
 import Data.Generics
 import Data.Map (Map)
 import qualified Data.Map as Map
 import qualified Data.Set as Set
 import Control.Monad.State
 import qualified AST as A
@ -49,7 +50,11 @@ data CompState = CompState {
    csVerboseLevel :: Int,
    csOutputFile :: String,
-    -- Set by preprocessor
+    -- Set by (new) preprocessor
    csCurrentFile :: String,
    csUsedFiles :: Set.Set String,
    -- Set by (old) preprocessor
    csSourceFiles :: Map String String,
    csIndentLinesIn :: [String],
    csIndentLinesOut :: [String],
@ -85,6 +90,9 @@ emptyState = CompState {
    csVerboseLevel = 0,
    csOutputFile = "-",
    csCurrentFile = "none",
    csUsedFiles = Set.empty,
    csSourceFiles = Map.empty,
    csIndentLinesIn = [],
    csIndentLinesOut = [],
--- a/LexOccam.x
+++ b/LexOccam.x
@ -31,6 +31,8 @@ import Pass
 $decimalDigit = [0-9]
 $hexDigit = [0-9 a-f A-F]
@preprocessor = "#" [^\n]*
@reserved = "[" | "]" | "(" | ")"
          | "::" | ":=" | ":" | "," | ";" | "&"
          | "?" | "!" | "="
@ -60,7 +62,6 @@ $hexDigit = [0-9 a-f A-F]
          | "VAL" | "VALOF"
          | "WHILE" | "WORKSPACE"
          | "VECSPACE"
          | "#INCLUDE" | "#USE"
@identifier = [a-z A-Z] [a-z A-Z 0-9 \.]*
@ -80,6 +81,8 @@ $escapeChar = [cnrts \" \' \* \n]
 occam :-
@preprocessor      { mkToken TokPreprocessor }
 -- Ignore whitespace and comments.
 $white+            ;
 "--" [^\n]*        ;
@ -95,28 +98,31 @@ $white+            ;
@realLiteral       { mkToken TokRealLiteral }
 {
 -- | An occam source token and its position.
 type Token = (Meta, TokenType)
 -- | An occam source token.
 -- Only `Token` is generated by the lexer itself; the others are added later
 -- once the indentation has been analysed.
-data Token =
+data TokenType =
-  Token TokenType Meta String          -- ^ A real token read from the source
+  TokReserved String                   -- ^ A reserved word or symbol
  | TokIdentifier String
  | TokStringLiteral String
  | TokCharLiteral String
  | TokDecimalLiteral String
  | TokHexLiteral String
  | TokRealLiteral String
  | TokPreprocessor String
  | Indent                             -- ^ Indentation increase
  | Outdent                            -- ^ Indentation decrease
  | EndOfLine                          -- ^ End of line
  deriving (Show, Eq, Typeable, Data)
 -- | The type of a source token.
 data TokenType =
  TokReserved | TokIdentifier
  | TokStringLiteral | TokCharLiteral
  | TokDecimalLiteral | TokHexLiteral | TokRealLiteral
  deriving (Show, Eq, Typeable, Data)
 -- | Build a lexer rule for a token.
-mkToken :: TokenType -> AlexPosn -> String -> Token
+mkToken :: (String -> TokenType) -> AlexPosn -> String -> Token
-mkToken tt (AlexPn _ line col) s = Token tt emptyMeta s
+mkToken cons _ s = (emptyMeta, cons s)
-- | Run the lexer, returning either an error position or a list of tokens.
+-- | Run the lexer, returning a list of tokens.
 -- (This is based on the `alexScanTokens` function that Alex provides.)
 runLexer :: String -> String -> PassM [Token]
 runLexer filename str = go (alexStartPos, '\n', str)
@ -128,8 +134,8 @@ runLexer filename str = go (alexStartPos, '\n', str)
           AlexSkip inp' len -> go inp'
           AlexToken inp' len act ->
             do ts <- go inp'
-                let (Token tt _ s) = act pos (take len str)
+                let t = act pos (take len str)
-                return $ (Token tt meta s) : ts
+                return $ (meta, snd t) : ts
      where
        meta = emptyMeta {
--- a/2
+++ b/2
@ -21,7 +21,7 @@ tocktest: $(sources)
 	ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain
 lextest: $(sources)
-	ghc $(ghc_opts) -o lextest -main-is StructureOccam --make StructureOccam
+	ghc $(ghc_opts) -o lextest -main-is PreprocessOccam --make PreprocessOccam
 CFLAGS = \
 	-O2 \
--- a/PreprocessOccam.hs
+++ b/PreprocessOccam.hs
@ -0,0 +1,145 @@
 {-
 Tock: a compiler for parallel languages
 Copyright (C) 2007  University of Kent
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
 Free Software Foundation, either version 2 of the License, or (at your
 option) any later version.
 This program is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.
 You should have received a copy of the GNU General Public License along
 with this program.  If not, see <http://www.gnu.org/licenses/>.
 -}
 -- | Preprocess occam code.
 module PreprocessOccam where
 import Data.List
 import qualified Data.Set as Set
 import Text.Regex
 import Errors
 import LexOccam
 import Metadata
 import Pass
 import StructureOccam
 import Utils
 import CompState
 import Control.Monad.Error
 import Control.Monad.State
 import System
 import System.IO
 import PrettyShow
 -- | Open an included file, looking for it in the search path.
 -- Return the open filehandle and the location of the file.
 -- FIXME: This doesn't actually look at the search path yet.
 searchFile :: Meta -> String -> PassM (Handle, String)
 searchFile m filename
    =  do cs <- get
          let currentFile = csCurrentFile cs
          let possibilities = [joinPath currentFile filename]
          openOneOf possibilities
  where
    openOneOf :: [String] -> PassM (Handle, String)
    openOneOf [] = dieP m $ "Unable to find " ++ filename
    openOneOf (fn:fns)
        =  do r <- liftIO $ maybeIO $ openFile fn ReadMode
              case r of
                Just h -> return (h, fn)
                Nothing -> openOneOf fns
 -- | Preprocess a file and return its tokenised form ready for parsing.
 preprocessFile :: Meta -> String -> PassM [Token]
 preprocessFile m filename
    =  do (handle, realFilename) <- searchFile m filename
          liftIO $ putStrLn $ "Loading " ++ realFilename
          origCS <- get
          modify (\cs -> cs { csCurrentFile = realFilename })
          s <- liftIO $ hGetContents handle
          toks <- runLexer realFilename s >>= structureOccam >>= preprocessOccam
          modify (\cs -> cs { csCurrentFile = csCurrentFile origCS })
          return toks
 -- | Preprocess a token stream.
 preprocessOccam :: [Token] -> PassM [Token]
 preprocessOccam [] = return []
 preprocessOccam ((m, TokPreprocessor ('#':s)):(_, EndOfLine):ts)
    =  do func <- handleDirective m s
          rest <- preprocessOccam ts
          return $ func rest
 -- Check the above case didn't miss something.
 preprocessOccam ((_, TokPreprocessor _):_)
    = error "bad TokPreprocessor token"
 preprocessOccam (t:ts)
    =  do rest <- preprocessOccam ts
          return $ t : rest
 --{{{  preprocessor directive handlers
 type DirectiveFunc = Meta -> [String] -> PassM ([Token] -> [Token])
 -- | Call the handler for a preprocessor directive.
 handleDirective :: Meta -> String -> PassM ([Token] -> [Token])
 handleDirective m s = lookup s directives
  where
    lookup s [] = dieP m "Unknown preprocessor directive"
    lookup s ((re, func):ds)
        = case matchRegex re s of
            Just fields -> func m fields
            Nothing -> lookup s ds
 -- | List of handlers for preprocessor directives.
 -- `handleDirective` walks down the regexps in this list until it finds one
 -- that matches, then uses the corresponding function.
 directives :: [(Regex, DirectiveFunc)]
 directives =
  [ (mkRegex "^INCLUDE \"(.*)\"$", handleInclude)
  , (mkRegex "^USE \"(.*)\"$", handleUse)
  ]
 -- | Handle the @#INCLUDE@ directive.
 handleInclude :: DirectiveFunc
 handleInclude m [incName]
    =  do toks <- preprocessFile m incName
          return (\ts -> toks ++ ts)
 -- | Handle the @#USE@ directive.
 -- This is a bit of a hack at the moment, since it just includes the file
 -- textually.
 handleUse :: DirectiveFunc
 handleUse m [modName]
    =  do let incName  = mangleModName modName
          cs <- get
          put $ cs { csUsedFiles = Set.insert incName (csUsedFiles cs) }
          if Set.member incName (csUsedFiles cs)
            then return id
            else handleInclude m [incName]
  where
    -- | If a module name doesn't already have a suffix, add one.
    mangleModName :: String -> String
    mangleModName mod
        = if ".occ" `isSuffixOf` mod || ".inc" `isSuffixOf` mod
            then mod
            else mod ++ ".occ"
 --}}}
 -- | Main function for testing.
 main :: IO ()
 main
    =  do (arg:_) <- getArgs
          v <- evalStateT (runErrorT (test arg)) emptyState
          case v of
            Left e -> dieIO e
            Right r -> return ()
  where
    test :: String -> PassM ()
    test fn
        = do tokens <- preprocessFile emptyMeta fn
             liftIO $ putStrLn $ pshow tokens
--- a/StructureOccam.hs
+++ b/StructureOccam.hs
@ -19,17 +19,12 @@ with this program.  If not, see <http://www.gnu.org/licenses/>.
 -- | Analyse syntactic structure of occam code.
 module StructureOccam where
 import Control.Monad.Error
 import Control.Monad.State
 import Data.Generics
 import System
 import CompState
 import Errors
 import LexOccam
 import Metadata
 import Pass
 import PrettyShow
 -- | Given the output of the lexer for a single file, add `Indent`, `Outdent`
 -- and `EndOfLine` markers.
@ -39,12 +34,12 @@ structureOccam ts = analyse 1 firstLine ts
  where
    -- Find the first line that's actually got something on it.
    firstLine
-        = case ts of (Token _ m _:_) -> metaLine m
+        = case ts of ((m, _):_) -> metaLine m
    analyse :: Int -> Int -> [Token] -> PassM [Token]
    -- Add extra EndOfLine at the end of the file.
-    analyse _ _ [] = return [EndOfLine]
+    analyse _ _ [] = return [(emptyMeta, EndOfLine)]
-    analyse prevCol prevLine (t@(Token _ m _):ts)
+    analyse prevCol prevLine (t@(m, tokType):ts)
        = if line /= prevLine
             then do rest <- analyse col line ts
                     newLine $ t : rest
@ -56,29 +51,19 @@ structureOccam ts = analyse 1 firstLine ts
        -- A new line -- look to see what's going on with the indentation.
        newLine rest
-          | col == prevCol + 2   = return $ EndOfLine : Indent : rest
+          | col == prevCol + 2   = withEOL $ (m, Indent) : rest
          -- FIXME: If col > prevCol, then look to see if there's a VALOF
          -- coming up before the next column change...
          | col < prevCol
              = if (prevCol - col) `mod` 2 == 0
-                  then return $ EndOfLine : (replicate steps Outdent ++ rest)
+                  then withEOL $ replicate steps (m, Outdent) ++ rest
-                  else dieP m "Invalid indentation"
+                  else bad
-          | col == prevCol       = return $ EndOfLine : rest
+          | col == prevCol       = withEOL rest
-          | otherwise            = dieP m "Invalid indentation"
+          | otherwise            = bad
            where
              steps = (prevCol - col) `div` 2
-
+              bad = dieP m "Invalid indentation"
-- | Main function for testing.
+              -- This is actually the position at which the new line starts
-main :: IO ()
+              -- rather than the end of the previous line.
-main
+              withEOL ts = return $ (m, EndOfLine) : ts
    =  do (arg:_) <- getArgs
          s <- readFile arg
          e <- evalStateT (runErrorT (test arg s)) emptyState
          return ()
  where
    test :: String -> String -> PassM ()
    test arg s
        = do tokens <- runLexer arg s
             tokens' <- structureOccam tokens
             liftIO $ putStrLn $ pshow tokens'
--- a/Utils.hs
+++ b/Utils.hs
@ -20,6 +20,9 @@ with this program.  If not, see <http://www.gnu.org/licenses/>.
 -- that could be put into the standard library.
 module Utils where
 import Control.Monad
 import System.IO
 import System.IO.Error
 import Text.Regex
 -- | Split the directory and file components of a path.
@ -58,3 +61,8 @@ transformEither :: (a -> c) -> (b -> d) -> Either a b -> Either c d
 transformEither funcLeft funcRight x = case x of
  Left l -> Left (funcLeft l)
  Right r -> Right (funcRight r)
 -- | Try an IO operation, returning `Nothing` if it fails.
 maybeIO :: IO a -> IO (Maybe a)
 maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing)