
The parser now gets a stream of tokens, rather than needing to worry about loading files itself. This also reworks the lexer's idea of what constitutes a Token -- it's now a pair (Meta, TokenType), so it's always easy to pull out/rewrite the metadata -- and adds proper support for lexing preprocessor directives, rather than just treating them as reserved words.
148 lines
4.6 KiB
Plaintext
148 lines
4.6 KiB
Plaintext
{ {-
|
|
Tock: a compiler for parallel languages
|
|
Copyright (C) 2007 University of Kent
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation, either version 2 of the License, or (at your
|
|
option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
-}
|
|
|
|
-- | Lexically analyse occam code.
|
|
module LexOccam where
|
|
|
|
import Data.Generics
|
|
|
|
import Errors
|
|
import Metadata
|
|
import Pass
|
|
}
|
|
|
|
%wrapper "posn"
|
|
|
|
$decimalDigit = [0-9]
|
|
$hexDigit = [0-9 a-f A-F]
|
|
|
|
@preprocessor = "#" [^\n]*
|
|
|
|
@reserved = "[" | "]" | "(" | ")"
|
|
| "::" | ":=" | ":" | "," | ";" | "&"
|
|
| "?" | "!" | "="
|
|
| "\" | "/\" | "\/"
|
|
| "+" | "-" | "*" | "/"
|
|
| "><" | "<<" | ">>" | "<>"
|
|
| ">=" | "<="
|
|
| "<" | ">"
|
|
| "~"
|
|
| "AFTER" | "ALT" | "AND" | "ANY" | "AT"
|
|
| "BITAND" | "BITNOT" | "BITOR" | "BOOL" | "BYTE" | "BYTESIN"
|
|
| "CASE" | "CHAN"
|
|
| "DATA"
|
|
| "ELSE"
|
|
| "FALSE" | "FOR" | "FROM" | "FUNCTION"
|
|
| "IF" | "IN" | "INLINE" | "INT" | "INT16" | "INT32" | "INT64"
|
|
| "IS"
|
|
| "MINUS" | "MOSTNEG" | "MOSTPOS"
|
|
| "NOT"
|
|
| "OF" | "OFFSETOF" | "OR"
|
|
| "PACKED" | "PAR" | "PLACE" | "PLACED" | "PLUS" | "PORT"
|
|
| "PRI" | "PROC" | "PROCESSOR" | "PROTOCOL"
|
|
| "REAL32" | "REAL64" | "RECORD" | "REM" | "RESHAPES"
|
|
| "RESULT" | "RETYPES" | "ROUND"
|
|
| "SEQ" | "SIZE" | "SKIP" | "STOP"
|
|
| "TIMER" | "TIMES" | "TRUE" | "TRUNC" | "TYPE"
|
|
| "VAL" | "VALOF"
|
|
| "WHILE" | "WORKSPACE"
|
|
| "VECSPACE"
|
|
|
|
@identifier = [a-z A-Z] [a-z A-Z 0-9 \.]*
|
|
|
|
$escapeChar = [cnrts \" \' \* \n]
|
|
@escape = \* ( $escapeChar | \# $hexDigit $hexDigit )
|
|
|
|
@stringLiteral = \" ( @escape | [^\"] )* \"
|
|
@charLiteral = \' ( @escape | [^\'] ) \'
|
|
|
|
-- Note that occam number literals don't include their signs -- if you say
|
|
-- "-3", then that's the operator "-" applied to the literal "3".
|
|
@decimalLiteral = $decimalDigit+
|
|
@hexLiteral = "#" $hexDigit+
|
|
@exponent = ("+" | "-") $decimalDigit+
|
|
@realLiteral = ( $decimalDigit+ "." $decimalDigit+ "E" @exponent )
|
|
| ( $decimalDigit+ "." $decimalDigit+ )
|
|
|
|
occam :-
|
|
|
|
@preprocessor { mkToken TokPreprocessor }
|
|
|
|
-- Ignore whitespace and comments.
|
|
$white+ ;
|
|
"--" [^\n]* ;
|
|
|
|
@reserved { mkToken TokReserved }
|
|
@identifier { mkToken TokIdentifier }
|
|
|
|
@stringLiteral { mkToken TokStringLiteral }
|
|
@charLiteral { mkToken TokCharLiteral }
|
|
|
|
@decimalLiteral { mkToken TokDecimalLiteral }
|
|
@hexLiteral { mkToken TokHexLiteral }
|
|
@realLiteral { mkToken TokRealLiteral }
|
|
|
|
{
|
|
-- | An occam source token and its position.
|
|
type Token = (Meta, TokenType)
|
|
|
|
-- | An occam source token.
|
|
-- Only `Token` is generated by the lexer itself; the others are added later
|
|
-- once the indentation has been analysed.
|
|
data TokenType =
|
|
TokReserved String -- ^ A reserved word or symbol
|
|
| TokIdentifier String
|
|
| TokStringLiteral String
|
|
| TokCharLiteral String
|
|
| TokDecimalLiteral String
|
|
| TokHexLiteral String
|
|
| TokRealLiteral String
|
|
| TokPreprocessor String
|
|
| Indent -- ^ Indentation increase
|
|
| Outdent -- ^ Indentation decrease
|
|
| EndOfLine -- ^ End of line
|
|
deriving (Show, Eq, Typeable, Data)
|
|
|
|
-- | Build a lexer rule for a token.
|
|
mkToken :: (String -> TokenType) -> AlexPosn -> String -> Token
|
|
mkToken cons _ s = (emptyMeta, cons s)
|
|
|
|
-- | Run the lexer, returning a list of tokens.
|
|
-- (This is based on the `alexScanTokens` function that Alex provides.)
|
|
runLexer :: String -> String -> PassM [Token]
|
|
runLexer filename str = go (alexStartPos, '\n', str)
|
|
where
|
|
go inp@(pos@(AlexPn _ line col), _, str) =
|
|
case alexScan inp 0 of
|
|
AlexEOF -> return []
|
|
AlexError _ -> dieP meta "Unrecognised token"
|
|
AlexSkip inp' len -> go inp'
|
|
AlexToken inp' len act ->
|
|
do ts <- go inp'
|
|
let t = act pos (take len str)
|
|
return $ (meta, snd t) : ts
|
|
|
|
where
|
|
meta = emptyMeta {
|
|
metaFile = Just filename,
|
|
metaLine = line,
|
|
metaColumn = col
|
|
}
|
|
}
|
|
|