Add a preprocessor that works with the new lexer.

The parser now gets a stream of tokens, rather than needing to worry about loading files itself. This also reworks the lexer's idea of what constitutes a Token -- it's now a pair (Meta, TokenType), so it's always easy to pull out/rewrite the metadata -- and adds proper support for lexing preprocessor directives, rather than just treating them as reserved words.
2007-08-20 23:46:57 +00:00 · 2007-08-20 23:46:57 +00:00 · 223a14d767
commit 223a14d767
parent a640dabc04
6 changed files with 196 additions and 44 deletions
--- a/CompState.hs
+++ b/CompState.hs
@ -22,6 +22,7 @@ module CompState where
 import Data.Generics
 import Data.Map (Map)
 import qualified Data.Map as Map
+import qualified Data.Set as Set
 import Control.Monad.State

 import qualified AST as A
@ -49,7 +50,11 @@ data CompState = CompState {
    csVerboseLevel :: Int,
    csOutputFile :: String,

-    -- Set by preprocessor
+    -- Set by (new) preprocessor
+    csCurrentFile :: String,
+    csUsedFiles :: Set.Set String,
+
+    -- Set by (old) preprocessor
    csSourceFiles :: Map String String,
    csIndentLinesIn :: [String],
    csIndentLinesOut :: [String],
@ -85,6 +90,9 @@ emptyState = CompState {
    csVerboseLevel = 0,
    csOutputFile = "-",

+    csCurrentFile = "none",
+    csUsedFiles = Set.empty,
+
    csSourceFiles = Map.empty,
    csIndentLinesIn = [],
    csIndentLinesOut = [],
--- a/LexOccam.x
+++ b/LexOccam.x
@ -31,6 +31,8 @@ import Pass
 $decimalDigit = [0-9]
 $hexDigit = [0-9 a-f A-F]

+@preprocessor = "#" [^\n]*
+
@reserved = "[" | "]" | "(" | ")"
          | "::" | ":=" | ":" | "," | ";" | "&"
          | "?" | "!" | "="
@ -60,7 +62,6 @@ $hexDigit = [0-9 a-f A-F]
          | "VAL" | "VALOF"
          | "WHILE" | "WORKSPACE"
          | "VECSPACE"
-          | "#INCLUDE" | "#USE"

@identifier = [a-z A-Z] [a-z A-Z 0-9 \.]*

@ -80,6 +81,8 @@ $escapeChar = [cnrts \" \' \* \n]

 occam :-

+@preprocessor      { mkToken TokPreprocessor }
+
 -- Ignore whitespace and comments.
 $white+            ;
 "--" [^\n]*        ;
@ -95,28 +98,31 @@ $white+            ;
@realLiteral       { mkToken TokRealLiteral }

 {
+-- | An occam source token and its position.
+type Token = (Meta, TokenType)
+
 -- | An occam source token.
 -- Only `Token` is generated by the lexer itself; the others are added later
 -- once the indentation has been analysed.
-data Token =
-  Token TokenType Meta String          -- ^ A real token read from the source
+data TokenType =
+  TokReserved String                   -- ^ A reserved word or symbol
+  | TokIdentifier String
+  | TokStringLiteral String
+  | TokCharLiteral String
+  | TokDecimalLiteral String
+  | TokHexLiteral String
+  | TokRealLiteral String
+  | TokPreprocessor String
  | Indent                             -- ^ Indentation increase
  | Outdent                            -- ^ Indentation decrease
  | EndOfLine                          -- ^ End of line
  deriving (Show, Eq, Typeable, Data)

-- | The type of a source token.
-data TokenType =
-  TokReserved | TokIdentifier
-  | TokStringLiteral | TokCharLiteral
-  | TokDecimalLiteral | TokHexLiteral | TokRealLiteral
-  deriving (Show, Eq, Typeable, Data)
-
 -- | Build a lexer rule for a token.
-mkToken :: TokenType -> AlexPosn -> String -> Token
-mkToken tt (AlexPn _ line col) s = Token tt emptyMeta s
+mkToken :: (String -> TokenType) -> AlexPosn -> String -> Token
+mkToken cons _ s = (emptyMeta, cons s)

-- | Run the lexer, returning either an error position or a list of tokens.
+-- | Run the lexer, returning a list of tokens.
 -- (This is based on the `alexScanTokens` function that Alex provides.)
 runLexer :: String -> String -> PassM [Token]
 runLexer filename str = go (alexStartPos, '\n', str)
@ -128,8 +134,8 @@ runLexer filename str = go (alexStartPos, '\n', str)
           AlexSkip inp' len -> go inp'
           AlexToken inp' len act ->
             do ts <- go inp'
-                let (Token tt _ s) = act pos (take len str)
-                return $ (Token tt meta s) : ts
+                let t = act pos (take len str)
+                return $ (meta, snd t) : ts

      where
        meta = emptyMeta {
--- a/2
+++ b/2
@ -21,7 +21,7 @@ tocktest: $(sources)
 	ghc $(ghc_opts) -o tocktest -main-is TestMain --make TestMain

 lextest: $(sources)
-	ghc $(ghc_opts) -o lextest -main-is StructureOccam --make StructureOccam
+	ghc $(ghc_opts) -o lextest -main-is PreprocessOccam --make PreprocessOccam

 CFLAGS = \
 	-O2 \
--- a/PreprocessOccam.hs
+++ b/PreprocessOccam.hs
@ -0,0 +1,145 @@
+{-
+Tock: a compiler for parallel languages
+Copyright (C) 2007  University of Kent
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation, either version 2 of the License, or (at your
+option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program.  If not, see <http://www.gnu.org/licenses/>.
+-}
+
+-- | Preprocess occam code.
+module PreprocessOccam where
+
+import Data.List
+import qualified Data.Set as Set
+import Text.Regex
+
+import Errors
+import LexOccam
+import Metadata
+import Pass
+import StructureOccam
+import Utils
+
+import CompState
+import Control.Monad.Error
+import Control.Monad.State
+import System
+import System.IO
+import PrettyShow
+
+-- | Open an included file, looking for it in the search path.
+-- Return the open filehandle and the location of the file.
+-- FIXME: This doesn't actually look at the search path yet.
+searchFile :: Meta -> String -> PassM (Handle, String)
+searchFile m filename
+    =  do cs <- get
+          let currentFile = csCurrentFile cs
+          let possibilities = [joinPath currentFile filename]
+          openOneOf possibilities
+  where
+    openOneOf :: [String] -> PassM (Handle, String)
+    openOneOf [] = dieP m $ "Unable to find " ++ filename
+    openOneOf (fn:fns)
+        =  do r <- liftIO $ maybeIO $ openFile fn ReadMode
+              case r of
+                Just h -> return (h, fn)
+                Nothing -> openOneOf fns
+
+-- | Preprocess a file and return its tokenised form ready for parsing.
+preprocessFile :: Meta -> String -> PassM [Token]
+preprocessFile m filename
+    =  do (handle, realFilename) <- searchFile m filename
+          liftIO $ putStrLn $ "Loading " ++ realFilename
+          origCS <- get
+          modify (\cs -> cs { csCurrentFile = realFilename })
+          s <- liftIO $ hGetContents handle
+          toks <- runLexer realFilename s >>= structureOccam >>= preprocessOccam
+          modify (\cs -> cs { csCurrentFile = csCurrentFile origCS })
+          return toks
+
+-- | Preprocess a token stream.
+preprocessOccam :: [Token] -> PassM [Token]
+preprocessOccam [] = return []
+preprocessOccam ((m, TokPreprocessor ('#':s)):(_, EndOfLine):ts)
+    =  do func <- handleDirective m s
+          rest <- preprocessOccam ts
+          return $ func rest
+-- Check the above case didn't miss something.
+preprocessOccam ((_, TokPreprocessor _):_)
+    = error "bad TokPreprocessor token"
+preprocessOccam (t:ts)
+    =  do rest <- preprocessOccam ts
+          return $ t : rest
+
+--{{{  preprocessor directive handlers
+type DirectiveFunc = Meta -> [String] -> PassM ([Token] -> [Token])
+
+-- | Call the handler for a preprocessor directive.
+handleDirective :: Meta -> String -> PassM ([Token] -> [Token])
+handleDirective m s = lookup s directives
+  where
+    lookup s [] = dieP m "Unknown preprocessor directive"
+    lookup s ((re, func):ds)
+        = case matchRegex re s of
+            Just fields -> func m fields
+            Nothing -> lookup s ds
+
+-- | List of handlers for preprocessor directives.
+-- `handleDirective` walks down the regexps in this list until it finds one
+-- that matches, then uses the corresponding function.
+directives :: [(Regex, DirectiveFunc)]
+directives =
+  [ (mkRegex "^INCLUDE \"(.*)\"$", handleInclude)
+  , (mkRegex "^USE \"(.*)\"$", handleUse)
+  ]
+
+-- | Handle the @#INCLUDE@ directive.
+handleInclude :: DirectiveFunc
+handleInclude m [incName]
+    =  do toks <- preprocessFile m incName
+          return (\ts -> toks ++ ts)
+
+-- | Handle the @#USE@ directive.
+-- This is a bit of a hack at the moment, since it just includes the file
+-- textually.
+handleUse :: DirectiveFunc
+handleUse m [modName]
+    =  do let incName  = mangleModName modName
+          cs <- get
+          put $ cs { csUsedFiles = Set.insert incName (csUsedFiles cs) }
+          if Set.member incName (csUsedFiles cs)
+            then return id
+            else handleInclude m [incName]
+  where
+    -- | If a module name doesn't already have a suffix, add one.
+    mangleModName :: String -> String
+    mangleModName mod
+        = if ".occ" `isSuffixOf` mod || ".inc" `isSuffixOf` mod
+            then mod
+            else mod ++ ".occ"
+--}}}
+
+-- | Main function for testing.
+main :: IO ()
+main
+    =  do (arg:_) <- getArgs
+          v <- evalStateT (runErrorT (test arg)) emptyState
+          case v of
+            Left e -> dieIO e
+            Right r -> return ()
+  where
+    test :: String -> PassM ()
+    test fn
+        = do tokens <- preprocessFile emptyMeta fn
+             liftIO $ putStrLn $ pshow tokens
+
--- a/StructureOccam.hs
+++ b/StructureOccam.hs
@ -19,17 +19,12 @@ with this program.  If not, see <http://www.gnu.org/licenses/>.
 -- | Analyse syntactic structure of occam code.
 module StructureOccam where

-import Control.Monad.Error
-import Control.Monad.State
 import Data.Generics
-import System

-import CompState
 import Errors
 import LexOccam
 import Metadata
 import Pass
-import PrettyShow

 -- | Given the output of the lexer for a single file, add `Indent`, `Outdent`
 -- and `EndOfLine` markers.
@ -39,12 +34,12 @@ structureOccam ts = analyse 1 firstLine ts
  where
    -- Find the first line that's actually got something on it.
    firstLine
-        = case ts of (Token _ m _:_) -> metaLine m
+        = case ts of ((m, _):_) -> metaLine m

    analyse :: Int -> Int -> [Token] -> PassM [Token]
    -- Add extra EndOfLine at the end of the file.
-    analyse _ _ [] = return [EndOfLine]
-    analyse prevCol prevLine (t@(Token _ m _):ts)
+    analyse _ _ [] = return [(emptyMeta, EndOfLine)]
+    analyse prevCol prevLine (t@(m, tokType):ts)
        = if line /= prevLine
             then do rest <- analyse col line ts
                     newLine $ t : rest
@ -56,29 +51,19 @@ structureOccam ts = analyse 1 firstLine ts

        -- A new line -- look to see what's going on with the indentation.
        newLine rest
-          | col == prevCol + 2   = return $ EndOfLine : Indent : rest
+          | col == prevCol + 2   = withEOL $ (m, Indent) : rest
          -- FIXME: If col > prevCol, then look to see if there's a VALOF
          -- coming up before the next column change...
          | col < prevCol
              = if (prevCol - col) `mod` 2 == 0
-                  then return $ EndOfLine : (replicate steps Outdent ++ rest)
-                  else dieP m "Invalid indentation"
-          | col == prevCol       = return $ EndOfLine : rest
-          | otherwise            = dieP m "Invalid indentation"
+                  then withEOL $ replicate steps (m, Outdent) ++ rest
+                  else bad
+          | col == prevCol       = withEOL rest
+          | otherwise            = bad
            where
              steps = (prevCol - col) `div` 2
-
-- | Main function for testing.
-main :: IO ()
-main
-    =  do (arg:_) <- getArgs
-          s <- readFile arg
-          e <- evalStateT (runErrorT (test arg s)) emptyState
-          return ()
-  where
-    test :: String -> String -> PassM ()
-    test arg s
-        = do tokens <- runLexer arg s
-             tokens' <- structureOccam tokens
-             liftIO $ putStrLn $ pshow tokens'
+              bad = dieP m "Invalid indentation"
+              -- This is actually the position at which the new line starts
+              -- rather than the end of the previous line.
+              withEOL ts = return $ (m, EndOfLine) : ts

--- a/Utils.hs
+++ b/Utils.hs
@ -20,6 +20,9 @@ with this program.  If not, see <http://www.gnu.org/licenses/>.
 -- that could be put into the standard library.
 module Utils where

+import Control.Monad
+import System.IO
+import System.IO.Error
 import Text.Regex

 -- | Split the directory and file components of a path.
@ -58,3 +61,8 @@ transformEither :: (a -> c) -> (b -> d) -> Either a b -> Either c d
 transformEither funcLeft funcRight x = case x of
  Left l -> Left (funcLeft l)
  Right r -> Right (funcRight r)
+
+-- | Try an IO operation, returning `Nothing` if it fails.
+maybeIO :: IO a -> IO (Maybe a)
+maybeIO op = catch (op >>= (return . Just)) (\e -> return Nothing)
+