Added LexRain, based heavily on (a slightly older version of) LexOccam

2007-08-22 09:37:27 +00:00 · 2007-08-22 09:37:27 +00:00 · b1469fa65b
commit b1469fa65b
parent 1890b281fe
2 changed files with 129 additions and 1 deletions
--- a/LexRain.x
+++ b/LexRain.x
@ -0,0 +1,128 @@
+{ {-
+Tock: a compiler for parallel languages
+Copyright (C) 2007  University of Kent
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation, either version 2 of the License, or (at your
+option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program.  If not, see <http://www.gnu.org/licenses/>.
+-}
+
+-- | Lexically analyse Rain code.
+module LexRain where
+
+import Data.Generics
+
+import Errors
+import Metadata
+import Pass
+}
+
+%wrapper "posn"
+
+$decimalDigit = [0-9]
+$hexDigit = [0-9 a-f A-F]
+
+@reserved = "[" | "]" | "(" | ")" | "{" | "}"
+          | ":" | "==" | "," | ";" | "&"
+          | "?" | "!" | "=" | "+=" | "-=" | "*=" | "/="
+          | "+" | "-" | "*" | "/"
+          | ">=" | "<="
+          | "<" | ">"
+          | "process" | "pareach" | "seqeach" | "par" | "seq" 
+          | "if" | "while" | "else"
+          | "sint8" | "sint16" | "sint32" | "sint64"
+          | "uint8" | "uint16" | "uint32" | "uint64"
+          | "int" | "bool"
+
+
+@identifier = [a-z A-Z _] [a-z A-Z 0-9 _]*
+
+$escapeChar = [cnrts \" \' \\ \n]
+@escape = \\ ( $escapeChar | \# $hexDigit $hexDigit )
+
+@stringLiteral = \" ( @escape | [^\"] )* \"
+@charLiteral = \' ( @escape | [^\'] ) \'
+
+-- Note that occam number literals don't include their signs -- if you say
+-- "-3", then that's the operator "-" applied to the literal "3".
+@decimalLiteral = $decimalDigit+
+@hexLiteral = "#" $hexDigit+
+@exponent = ("+" | "-") $decimalDigit+
+@realLiteral = ( $decimalDigit+ "." $decimalDigit+ "E" @exponent )
+             | ( $decimalDigit+ "." $decimalDigit+ )
+
+occam :-
+
+-- Ignore whitespace and comments.
+$white+            ;
+"#" [^\n]*         ;
+
+@reserved          { mkToken TokReserved }
+@identifier        { mkToken TokIdentifier }
+
+@stringLiteral     { mkTokenTrim TokStringLiteral }
+@charLiteral       { mkTokenTrim TokCharLiteral }
+
+@decimalLiteral    { mkToken TokDecimalLiteral }
+@hexLiteral        { mkToken TokHexLiteral }
+@realLiteral       { mkToken TokRealLiteral }
+
+{
+-- | An occam source token and its position.
+type Token = (Meta, TokenType)
+
+-- | An occam source token.
+-- Only `Token` is generated by the lexer itself; the others are added later
+-- once the indentation has been analysed.
+data TokenType =
+  TokReserved String                   -- ^ A reserved word or symbol
+  | TokIdentifier String
+  | TokStringLiteral String
+  | TokCharLiteral String
+  | TokDecimalLiteral String
+  | TokHexLiteral String
+  | TokRealLiteral String
+  deriving (Show, Eq, Typeable, Data)
+
+-- | Build a lexer rule for a token.
+mkToken :: (String -> TokenType) -> AlexPosn -> String -> Token
+mkToken cons _ s = (emptyMeta, cons s)
+
+-- | Trims the beginning and end characters from a token -- useful for strings and chars
+mkTokenTrim :: (String -> TokenType) -> AlexPosn -> String -> Token
+mkTokenTrim cons _ s = (emptyMeta, cons (init (tail s)))
+
+-- | Run the lexer, returning a list of tokens.
+-- (This is based on the `alexScanTokens` function that Alex provides.)
+runLexer :: String -> String -> IO (Either Meta [Token])
+runLexer filename str = go (alexStartPos, '\n', str)
+  where
+    go inp@(pos@(AlexPn _ line col), _, str) =
+         case alexScan inp 0 of
+           AlexEOF -> return $ Right []
+           AlexError _ -> return $ Left meta
+           AlexSkip inp' len -> go inp'
+           AlexToken inp' len act ->
+             do ts <- go inp'
+                let t = act pos (take len str)
+                case ts of
+                  Left m -> return $ Left m
+                  Right toks -> return $ Right $ (meta, snd t) : toks
+
+      where
+        meta = Meta {
+                 metaFile = Just filename,
+                 metaLine = line,
+                 metaColumn = col
+               }
+}
+
--- a/2
+++ b/2
@ -50,7 +50,7 @@ clean-cgtests:

 haddock:
 	@mkdir -p doc
-	haddock -o doc --html $(filter-out LexOccam.hs,$(sources))
+	haddock -o doc --html $(filter-out LexOccam.hs LexRain.hs,$(sources))

 clean:
 	rm -f $(targets) *.o *.hi