
This makes sure that we catch all leftover instances of using SYB to do generic operations that we should be using Polyplate for instead. Most modules should only import Data, and possibly Typeable.
215 lines
7.6 KiB
Plaintext
215 lines
7.6 KiB
Plaintext
{ {-# OPTIONS_GHC -fno-warn-unused-imports -fno-warn-missing-signatures #-}
|
|
{-
|
|
Tock: a compiler for parallel languages
|
|
Copyright (C) 2007 University of Kent
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation, either version 2 of the License, or (at your
|
|
option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
-}
|
|
|
|
-- | Lexically analyse occam code.
|
|
module LexOccam where
|
|
|
|
import Control.Monad.Error
|
|
import Data.Generics (Data, Typeable)
|
|
|
|
import Errors
|
|
import Metadata
|
|
import Pass
|
|
}
|
|
|
|
%wrapper "posn"
|
|
|
|
$decimalDigit = [0-9]
|
|
$hexDigit = [0-9 a-f A-F]
|
|
|
|
$horizSpace = [\ \t]
|
|
$vertSpace = [\r\n]
|
|
|
|
@directive = "COMMENT" | "DEFINE" | "ELSE" | "ENDIF" | "IF" | "INCLUDE"
|
|
| "OPTION" | "PRAGMA" | "RELAX" | "USE"
|
|
|
|
@preprocessor = "#" @directive [^\n]*
|
|
|
|
@reserved = "[" | "]" | "(" | ")"
|
|
| "::" | ":=" | ":" | "," | ";" | "&"
|
|
| "?" | "??" | "!" | "="
|
|
| "\" | "/\" | "\/"
|
|
| "+" | "-" | "*" | "/"
|
|
| "><" | "<<" | ">>" | "<>"
|
|
| ">=" | "<="
|
|
| "<" | ">"
|
|
| "~"
|
|
| "##"
|
|
| "|"
|
|
-- occam UDOs:
|
|
| "@@" | "$$" | "%" | "%%" | "&&" | "<%" | "%>" | "<&" | "&>"
|
|
| "<]" | "[>" | "<@" | "@>" | "@" | "++" | "!!" | "==" | "^"
|
|
| "AFTER" | "ALT" | "AND" | "ANY" | "AT"
|
|
| "BITAND" | "BITNOT" | "BITOR" | "BOOL" | "BYTE" | "BYTESIN"
|
|
| "CASE" | "CHAN" | "CLAIM" | "CLONE"
|
|
| "DATA" | "DEFINED"
|
|
| "ELSE"
|
|
| "FALSE" | "FOR" | "FROM" | "FUNCTION"
|
|
| "IF" | "IN" | "INITIAL" | "INLINE" | "INT" | "INT16" | "INT32" | "INT64"
|
|
| "IS"
|
|
| "MINUS" | "MOBILE" | "MOSTNEG" | "MOSTPOS"
|
|
| "NOT"
|
|
| "OF" | "OFFSETOF" | "OR"
|
|
| "PACKED" | "PAR" | "PLACE" | "PLACED" | "PLUS" | "PORT"
|
|
| "PRI" | "PROC" | "PROCESSOR" | "PROTOCOL"
|
|
| "REAL32" | "REAL64" | "REC" | "RECORD" | "RECURSIVE" | "REM" | "RESHAPES"
|
|
| "RESULT" | "RETYPES" | "ROUND"
|
|
| "SEQ" | "SHARED" | "SIZE" | "SKIP" | "STEP" | "STOP"
|
|
| "TIMER" | "TIMES" | "TRUE" | "TRUNC" | "TYPE"
|
|
| "VAL" | "VALOF"
|
|
| "WHILE" | "WORKSPACE"
|
|
| "VECSPACE"
|
|
| ".STATIC" | ".VSPTR" | ".WSSIZE"
|
|
|
|
@identifier = [a-z A-Z] [a-z A-Z 0-9 \._]*
|
|
|
|
@hexEscape = \# $hexDigit $hexDigit
|
|
@escape = \* ( @hexEscape | [^\#\n] )
|
|
|
|
@charLiteral = \' ( @escape | [^\'\*] ) \'
|
|
@stringBody = ( @escape | [^\"\*] )*
|
|
@fullString = \" @stringBody \"
|
|
@startString = \" @stringBody \* \n
|
|
@contString = \* @stringBody \* \n
|
|
@endString = \* @stringBody \"
|
|
|
|
-- Note that occam number literals don't include their signs -- if you say
|
|
-- "-3", then that's the operator "-" applied to the literal "3".
|
|
@intLiteral = $decimalDigit+
|
|
@hexLiteral = "#" $hexDigit+
|
|
@exponent = ("+" | "-") $decimalDigit+
|
|
@realLiteral = ( $decimalDigit+ "." $decimalDigit+ "E" @exponent )
|
|
| ( $decimalDigit+ "." $decimalDigit+ )
|
|
|
|
occam :-
|
|
|
|
-- In state 0, we're consuming the horizontal space at the start of a line.
|
|
-- In state one, we're reading the first thing on a line.
|
|
-- In state two, we're reading the rest of the line.
|
|
-- In state three, we're in the middle of a multi-line string.
|
|
|
|
<0> $horizSpace* { mkState one }
|
|
|
|
<one> @preprocessor { mkToken TokPreprocessor 0 }
|
|
<one, two> "--" [^\n]* { mkState 0 }
|
|
<one, two> $vertSpace+ { mkState 0 }
|
|
|
|
<one, two> @reserved { mkToken TokReserved two }
|
|
<one, two> @identifier { mkToken TokIdentifier two }
|
|
|
|
<one, two> @charLiteral { mkToken TokCharLiteral two }
|
|
<one, two> @fullString { mkToken TokStringLiteral two }
|
|
<one, two> @startString { mkToken TokStringCont three }
|
|
|
|
<three> $horizSpace+ { mkState three }
|
|
<three> @contString { mkToken TokStringCont three }
|
|
<three> @endString { mkToken TokStringLiteral two }
|
|
|
|
<one, two> @intLiteral { mkToken TokIntLiteral two }
|
|
<one, two> @hexLiteral { mkToken TokHexLiteral two }
|
|
<one, two> @realLiteral { mkToken TokRealLiteral two }
|
|
|
|
<two> $horizSpace+ ;
|
|
|
|
{
|
|
-- | An occam source token and its position.
|
|
data Token = Token Meta TokenType
|
|
deriving (Eq, Typeable, Data)
|
|
|
|
instance Show Token where
|
|
show (Token _ tt) = show tt
|
|
|
|
-- | An occam source token.
|
|
-- Only `Token` is generated by the lexer itself; the others are added later
|
|
-- once the indentation has been analysed.
|
|
data TokenType =
|
|
TokReserved String -- ^ A reserved word or symbol
|
|
| TokIdentifier String
|
|
| TokStringCont String -- ^ A continued string literal.
|
|
| TokStringLiteral String -- ^ (The end of) a string literal.
|
|
| TokCharLiteral String
|
|
| TokIntLiteral String
|
|
| TokHexLiteral String
|
|
| TokRealLiteral String
|
|
| TokPreprocessor String
|
|
| IncludeFile String -- ^ Include a file
|
|
| Pragma String -- ^ A pragma
|
|
| Indent -- ^ Indentation increase
|
|
| Outdent -- ^ Indentation decrease
|
|
| EndOfLine -- ^ End of line
|
|
deriving (Eq, Typeable, Data)
|
|
|
|
instance Show TokenType where
|
|
show tt
|
|
= case tt of
|
|
TokReserved s -> quote "reserved word" s
|
|
TokIdentifier s -> quote "identifier" s
|
|
TokStringCont s -> quote "partial string literal" s
|
|
TokStringLiteral s -> quote "string literal" s
|
|
TokCharLiteral s -> quote "character literal" s
|
|
TokIntLiteral s -> quote "decimal literal" s
|
|
TokHexLiteral s -> quote "hex literal" s
|
|
TokRealLiteral s -> quote "real literal" s
|
|
TokPreprocessor s -> quote "preprocessor directive" s
|
|
IncludeFile s -> quote "file inclusion" s
|
|
Pragma s -> quote "pragma" s
|
|
Indent -> "indentation increase"
|
|
Outdent -> "indentation decrease"
|
|
EndOfLine -> "end of line"
|
|
where
|
|
quote label s = label ++ " \"" ++ s ++ "\""
|
|
|
|
-- | Build a lexer rule for a token.
|
|
mkToken :: (String -> TokenType) -> Int -> AlexPosn -> String -> (Maybe Token, Int)
|
|
mkToken cons code _ s = (Just (Token emptyMeta (cons s)), code)
|
|
|
|
-- | Just switch state.
|
|
mkState :: Int -> AlexPosn -> String -> (Maybe Token, Int)
|
|
mkState code _ s = (Nothing, code)
|
|
|
|
-- | Run the lexer, returning a list of tokens.
|
|
-- (This is based on the `alexScanTokens` function that Alex provides.)
|
|
runLexer' :: Die m => (String, Int, Int) -> String -> m [Token]
|
|
runLexer' (filename, startLine, startCol) str = go (AlexPn 0 startLine startCol, '\n', str) 0
|
|
where
|
|
go inp@(pos@(AlexPn _ line col), _, str) code =
|
|
case alexScan inp code of
|
|
AlexEOF -> return []
|
|
AlexError _ -> dieP meta "Unrecognised token"
|
|
AlexSkip inp' len -> go inp' code
|
|
AlexToken inp' len act ->
|
|
do let (t, code) = act pos (take len str)
|
|
ts <- go inp' code
|
|
return $ case t of
|
|
Just (Token _ tt) -> Token meta tt : ts
|
|
Nothing -> ts
|
|
|
|
where
|
|
meta = emptyMeta {
|
|
metaFile = Just filename,
|
|
metaLine = line,
|
|
metaColumn = col
|
|
}
|
|
|
|
runLexer :: Die m => String -> String -> m [Token]
|
|
runLexer fn = runLexer' (fn, 1, 1)
|
|
|
|
}
|
|
|