tock-mirror/LexOccam.x
Adam Sampson 223a14d767 Add a preprocessor that works with the new lexer.
The parser now gets a stream of tokens, rather than needing to worry about
loading files itself.

This also reworks the lexer's idea of what constitutes a Token -- it's now a
pair (Meta, TokenType), so it's always easy to pull out/rewrite the metadata --
and adds proper support for lexing preprocessor directives, rather than just
treating them as reserved words.
2007-08-20 23:46:57 +00:00

148 lines
4.6 KiB
Plaintext

{ {-
Tock: a compiler for parallel languages
Copyright (C) 2007 University of Kent
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 2 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>.
-}
-- | Lexically analyse occam code.
module LexOccam where
import Data.Generics
import Errors
import Metadata
import Pass
}
%wrapper "posn"
$decimalDigit = [0-9]
$hexDigit = [0-9 a-f A-F]
@preprocessor = "#" [^\n]*
@reserved = "[" | "]" | "(" | ")"
| "::" | ":=" | ":" | "," | ";" | "&"
| "?" | "!" | "="
| "\" | "/\" | "\/"
| "+" | "-" | "*" | "/"
| "><" | "<<" | ">>" | "<>"
| ">=" | "<="
| "<" | ">"
| "~"
| "AFTER" | "ALT" | "AND" | "ANY" | "AT"
| "BITAND" | "BITNOT" | "BITOR" | "BOOL" | "BYTE" | "BYTESIN"
| "CASE" | "CHAN"
| "DATA"
| "ELSE"
| "FALSE" | "FOR" | "FROM" | "FUNCTION"
| "IF" | "IN" | "INLINE" | "INT" | "INT16" | "INT32" | "INT64"
| "IS"
| "MINUS" | "MOSTNEG" | "MOSTPOS"
| "NOT"
| "OF" | "OFFSETOF" | "OR"
| "PACKED" | "PAR" | "PLACE" | "PLACED" | "PLUS" | "PORT"
| "PRI" | "PROC" | "PROCESSOR" | "PROTOCOL"
| "REAL32" | "REAL64" | "RECORD" | "REM" | "RESHAPES"
| "RESULT" | "RETYPES" | "ROUND"
| "SEQ" | "SIZE" | "SKIP" | "STOP"
| "TIMER" | "TIMES" | "TRUE" | "TRUNC" | "TYPE"
| "VAL" | "VALOF"
| "WHILE" | "WORKSPACE"
| "VECSPACE"
@identifier = [a-z A-Z] [a-z A-Z 0-9 \.]*
$escapeChar = [cnrts \" \' \* \n]
@escape = \* ( $escapeChar | \# $hexDigit $hexDigit )
@stringLiteral = \" ( @escape | [^\"] )* \"
@charLiteral = \' ( @escape | [^\'] ) \'
-- Note that occam number literals don't include their signs -- if you say
-- "-3", then that's the operator "-" applied to the literal "3".
@decimalLiteral = $decimalDigit+
@hexLiteral = "#" $hexDigit+
@exponent = ("+" | "-") $decimalDigit+
@realLiteral = ( $decimalDigit+ "." $decimalDigit+ "E" @exponent )
| ( $decimalDigit+ "." $decimalDigit+ )
occam :-
@preprocessor { mkToken TokPreprocessor }
-- Ignore whitespace and comments.
$white+ ;
"--" [^\n]* ;
@reserved { mkToken TokReserved }
@identifier { mkToken TokIdentifier }
@stringLiteral { mkToken TokStringLiteral }
@charLiteral { mkToken TokCharLiteral }
@decimalLiteral { mkToken TokDecimalLiteral }
@hexLiteral { mkToken TokHexLiteral }
@realLiteral { mkToken TokRealLiteral }
{
-- | An occam source token and its position.
type Token = (Meta, TokenType)
-- | An occam source token.
-- Only `Token` is generated by the lexer itself; the others are added later
-- once the indentation has been analysed.
data TokenType =
TokReserved String -- ^ A reserved word or symbol
| TokIdentifier String
| TokStringLiteral String
| TokCharLiteral String
| TokDecimalLiteral String
| TokHexLiteral String
| TokRealLiteral String
| TokPreprocessor String
| Indent -- ^ Indentation increase
| Outdent -- ^ Indentation decrease
| EndOfLine -- ^ End of line
deriving (Show, Eq, Typeable, Data)
-- | Build a lexer rule for a token.
mkToken :: (String -> TokenType) -> AlexPosn -> String -> Token
mkToken cons _ s = (emptyMeta, cons s)
-- | Run the lexer, returning a list of tokens.
-- (This is based on the `alexScanTokens` function that Alex provides.)
runLexer :: String -> String -> PassM [Token]
runLexer filename str = go (alexStartPos, '\n', str)
where
go inp@(pos@(AlexPn _ line col), _, str) =
case alexScan inp 0 of
AlexEOF -> return []
AlexError _ -> dieP meta "Unrecognised token"
AlexSkip inp' len -> go inp'
AlexToken inp' len act ->
do ts <- go inp'
let t = act pos (take len str)
return $ (meta, snd t) : ts
where
meta = emptyMeta {
metaFile = Just filename,
metaLine = line,
metaColumn = col
}
}