diff --git a/LexOccam.x b/LexOccam.x index 012704b..0a2b2c9 100644 --- a/LexOccam.x +++ b/LexOccam.x @@ -68,11 +68,15 @@ $vertSpace = [\r\n] @identifier = [a-z A-Z] [a-z A-Z 0-9 \.]* -$escapeChar = [cnrts \" \' \* \n] -@escape = \* ( $escapeChar | \# $hexDigit $hexDigit ) +@hexEscape = \# $hexDigit $hexDigit +@escape = \* ( @hexEscape | [^\#\n] ) -@stringLiteral = \" ( @escape | [^\"\*] )* \" @charLiteral = \' ( @escape | [^\'\*] ) \' +@stringBody = ( @escape | [^\"\*] )* +@fullString = \" @stringBody \" +@startString = \" @stringBody \* \n +@contString = \* @stringBody \* \n +@endString = \* @stringBody \" -- Note that occam number literals don't include their signs -- if you say -- "-3", then that's the operator "-" applied to the literal "3". @@ -84,10 +88,10 @@ $escapeChar = [cnrts \" \' \* \n] occam :- --- This would all be very simple if it weren't for preprocessor instructions! -- In state 0, we're consuming the horizontal space at the start of a line. -- In state one, we're reading the first thing on a line. -- In state two, we're reading the rest of the line. +-- In state three, we're in the middle of a multi-line string. <0> $horizSpace* { mkState one } @@ -98,8 +102,13 @@ occam :- @reserved { mkToken TokReserved two } @identifier { mkToken TokIdentifier two } - @stringLiteral { mkToken TokStringLiteral two } @charLiteral { mkToken TokCharLiteral two } + @fullString { mkToken TokStringLiteral two } + @startString { mkToken TokStringCont three } + + $horizSpace+ { mkState three } + @contString { mkToken TokStringCont three } + @endString { mkToken TokStringLiteral two } @intLiteral { mkToken TokIntLiteral two } @hexLiteral { mkToken TokHexLiteral two } @@ -117,7 +126,8 @@ type Token = (Meta, TokenType) data TokenType = TokReserved String -- ^ A reserved word or symbol | TokIdentifier String - | TokStringLiteral String + | TokStringCont String -- ^ A continued string literal. + | TokStringLiteral String -- ^ (The end of) a string literal. | TokCharLiteral String | TokIntLiteral String | TokHexLiteral String diff --git a/StructureOccam.hs b/StructureOccam.hs index 7e36167..881ebf4 100644 --- a/StructureOccam.hs +++ b/StructureOccam.hs @@ -62,6 +62,7 @@ structureOccam ts = analyse 1 firstLine ts (emptyMeta, EndOfLine) isContinuation = case prevTok of (_, TokReserved s) -> s `elem` continuationWords + (_, TokStringCont _) -> True _ -> False -- A new line -- look to see what's going on with the indentation. diff --git a/testcases/stringlit.occ b/testcases/stringlit.occ index 431c0b8..a222bd6 100644 --- a/testcases/stringlit.occ +++ b/testcases/stringlit.occ @@ -7,6 +7,9 @@ PROC P () VAL BYTE cc IS '"': VAL BYTE ccx IS '*"': VAL BYTE ccc IS '*'': + VAL []BYTE mls IS "first* + *second* + *third": VAL [5][5]BYTE square IS ["sator", "arepo", "tenas", "opera", "rotas"]: SKIP :