Better string literal handling: do away with StringLiteral in favour of arrays of ByteLiteral
This commit is contained in:
parent
d2c522bec0
commit
057a3a0a67
|
@ -73,7 +73,6 @@ data LiteralRepr =
|
|||
| IntLiteral Meta String
|
||||
| HexLiteral Meta String
|
||||
| ByteLiteral Meta String
|
||||
| StringLiteral Meta String
|
||||
| ArrayLiteral Meta [ArrayElem]
|
||||
deriving (Show, Eq, Typeable, Data)
|
||||
|
||||
|
|
|
@ -5,10 +5,12 @@ import Control.Monad.Error
|
|||
import Control.Monad.Identity
|
||||
import Control.Monad.State
|
||||
import Data.Bits
|
||||
import Data.Char
|
||||
import Data.Generics
|
||||
import Data.Int
|
||||
import Data.Maybe
|
||||
import Numeric
|
||||
import Text.Printf
|
||||
|
||||
import qualified AST as A
|
||||
import Errors
|
||||
|
@ -145,7 +147,9 @@ evalDyadic A.LessEq a b = evalDyadic A.More b a
|
|||
evalDyadic A.MoreEq a b = evalDyadic A.Less b a
|
||||
evalDyadic A.After (OccInt a) (OccInt b) = return $ OccBool ((a - b) > 0)
|
||||
evalDyadic _ _ _ = throwError "bad dyadic op"
|
||||
--}}}
|
||||
|
||||
--{{{ rendering values
|
||||
-- | Convert a value back into a literal.
|
||||
renderValue :: Meta -> OccValue -> (A.Type, A.Expression)
|
||||
renderValue m (OccBool True) = (A.Bool, A.True m)
|
||||
|
@ -154,6 +158,7 @@ renderValue m v = (t, A.ExprLiteral m (A.Literal m t lr))
|
|||
where (t, lr) = renderLiteral m v
|
||||
|
||||
renderLiteral :: Meta -> OccValue -> (A.Type, A.LiteralRepr)
|
||||
renderLiteral m (OccByte c) = (A.Byte, A.ByteLiteral m $ renderChar c)
|
||||
renderLiteral m (OccInt i) = (A.Int, A.IntLiteral m $ show i)
|
||||
renderLiteral m (OccArray vs)
|
||||
= (t, A.ArrayLiteral m aes)
|
||||
|
@ -161,6 +166,18 @@ renderLiteral m (OccArray vs)
|
|||
t = makeArrayType (A.Dimension $ length vs) (head ts)
|
||||
(ts, aes) = unzip $ map (renderLiteralArray m) vs
|
||||
|
||||
renderChar :: Char -> String
|
||||
renderChar '\'' = "*'"
|
||||
renderChar '\"' = "*\""
|
||||
renderChar '*' = "**"
|
||||
renderChar '\r' = "*c"
|
||||
renderChar '\n' = "*n"
|
||||
renderChar '\t' = "*t"
|
||||
renderChar c
|
||||
| (o < 32 || o > 127) = printf "*#%02x" o
|
||||
| otherwise = [c]
|
||||
where o = ord c
|
||||
|
||||
renderLiteralArray :: Meta -> OccValue -> (A.Type, A.ArrayElem)
|
||||
renderLiteralArray m (OccArray vs)
|
||||
= (t, A.ArrayElemArray aes)
|
||||
|
|
|
@ -5,6 +5,7 @@ import Control.Monad.Error
|
|||
import Control.Monad.Identity
|
||||
import Control.Monad.State
|
||||
import Data.Bits
|
||||
import Data.Char
|
||||
import Data.Generics
|
||||
import Data.Int
|
||||
import Data.Maybe
|
||||
|
@ -22,6 +23,7 @@ instance Die EvalM where
|
|||
-- | Occam values of various types.
|
||||
data OccValue =
|
||||
OccBool Bool
|
||||
| OccByte Char
|
||||
| OccInt Int32
|
||||
| OccArray [OccValue]
|
||||
deriving (Show, Eq, Typeable, Data)
|
||||
|
@ -49,6 +51,14 @@ evalIntExpression e
|
|||
Right (OccInt val) -> return $ fromIntegral val
|
||||
Right _ -> die "expression is not of INT type"
|
||||
|
||||
-- | Evaluate a byte literal.
|
||||
evalByte :: (PSM m, Die m) => String -> m Char
|
||||
evalByte s
|
||||
= do ps <- get
|
||||
case runEvaluator ps (evalByteLiteral s) of
|
||||
Left err -> die $ "cannot evaluate byte literal: " ++ err
|
||||
Right (OccByte ch) -> return ch
|
||||
|
||||
-- | Run an evaluator operation.
|
||||
runEvaluator :: ParseState -> EvalM OccValue -> Either String OccValue
|
||||
runEvaluator ps func
|
||||
|
@ -67,7 +77,25 @@ fromRead _ _ = throwError "cannot parse literal"
|
|||
|
||||
-- | Evaluate a simple (non-array) literal.
|
||||
evalSimpleLiteral :: A.Literal -> EvalM OccValue
|
||||
evalSimpleLiteral (A.Literal _ A.Byte (A.ByteLiteral _ s)) = evalByteLiteral s
|
||||
evalSimpleLiteral (A.Literal _ A.Int (A.IntLiteral _ s)) = fromRead OccInt $ readDec s
|
||||
evalSimpleLiteral (A.Literal _ A.Int (A.HexLiteral _ s)) = fromRead OccInt $ readHex s
|
||||
evalSimpleLiteral _ = throwError "bad literal"
|
||||
|
||||
-- | Evaluate a byte literal.
|
||||
evalByteLiteral :: String -> EvalM OccValue
|
||||
evalByteLiteral ('*':'#':hex)
|
||||
= do OccInt n <- fromRead OccInt $ readHex hex
|
||||
return $ OccByte (chr $ fromIntegral n)
|
||||
evalByteLiteral ['*', ch]
|
||||
= return $ OccByte (star ch)
|
||||
where
|
||||
star :: Char -> Char
|
||||
star 'c' = '\r'
|
||||
star 'n' = '\n'
|
||||
star 't' = '\t'
|
||||
star 's' = ' '
|
||||
star c = c
|
||||
evalByteLiteral [ch]
|
||||
= return $ OccByte ch
|
||||
evalByteLiteral _ = throwError "bad BYTE literal"
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
-- | Generate C code from the mangled AST.
|
||||
module GenerateC where
|
||||
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Control.Monad.Writer
|
||||
|
@ -10,6 +11,7 @@ import Numeric
|
|||
import Text.Printf
|
||||
|
||||
import qualified AST as A
|
||||
import EvalLiterals
|
||||
import Metadata
|
||||
import ParseState
|
||||
import Pass
|
||||
|
@ -202,15 +204,30 @@ genConversion m cm t e = missing $ "genConversion " ++ show cm
|
|||
|
||||
--{{{ literals
|
||||
genLiteral :: A.Literal -> CGen ()
|
||||
genLiteral (A.Literal m t lr) = genLiteralRepr lr
|
||||
genLiteral (A.Literal _ _ lr)
|
||||
= if isStringLiteral lr
|
||||
then do tell ["\""]
|
||||
let A.ArrayLiteral _ aes = lr
|
||||
sequence_ [genByteLiteral s
|
||||
| A.ArrayElemExpr (A.ExprLiteral _ (A.Literal _ _ (A.ByteLiteral _ s))) <- aes]
|
||||
tell ["\""]
|
||||
else genLiteralRepr lr
|
||||
genLiteral l = missing $ "genLiteral " ++ show l
|
||||
|
||||
-- | Does a LiteralRepr represent something that can be a plain string literal?
|
||||
isStringLiteral :: A.LiteralRepr -> Bool
|
||||
isStringLiteral (A.ArrayLiteral _ aes)
|
||||
= and [case ae of
|
||||
A.ArrayElemExpr (A.ExprLiteral _ (A.Literal _ _ (A.ByteLiteral _ _))) -> True
|
||||
_ -> False
|
||||
| ae <- aes]
|
||||
isStringLiteral _ = False
|
||||
|
||||
genLiteralRepr :: A.LiteralRepr -> CGen ()
|
||||
genLiteralRepr (A.RealLiteral m s) = tell [s]
|
||||
genLiteralRepr (A.IntLiteral m s) = genDecimal s
|
||||
genLiteralRepr (A.HexLiteral m s) = tell ["0x", s]
|
||||
genLiteralRepr (A.ByteLiteral m s) = tell ["'", convStringLiteral s, "'"]
|
||||
genLiteralRepr (A.StringLiteral m s) = tell ["\"", convStringLiteral s, "\""]
|
||||
genLiteralRepr (A.ByteLiteral m s) = tell ["'"] >> genByteLiteral s >> tell ["'"]
|
||||
genLiteralRepr (A.ArrayLiteral m aes)
|
||||
= do tell ["{"]
|
||||
genArrayLiteralElems aes
|
||||
|
@ -236,23 +253,23 @@ genArrayLiteralElems aes
|
|||
A.Array _ _ -> missing $ "array literal containing non-literal array: " ++ show e
|
||||
_ -> genExpression e
|
||||
|
||||
hexToOct :: String -> String
|
||||
hexToOct h = printf "%03o" ((fst $ head $ readHex h) :: Int)
|
||||
genByteLiteral :: String -> CGen ()
|
||||
genByteLiteral s
|
||||
= do c <- evalByte s
|
||||
tell [convByte c]
|
||||
|
||||
convStringLiteral :: String -> String
|
||||
convStringLiteral [] = []
|
||||
convStringLiteral ('\\':s) = "\\\\" ++ convStringLiteral s
|
||||
convStringLiteral ('*':'#':'0':'0':s) = "\\0" ++ convStringLiteral s
|
||||
convStringLiteral ('*':'#':a:b:s) = "\\" ++ hexToOct [a, b] ++ convStringLiteral s
|
||||
convStringLiteral ('*':c:s) = convStringStar c ++ convStringLiteral s
|
||||
convStringLiteral (c:s) = c : convStringLiteral s
|
||||
|
||||
convStringStar :: Char -> String
|
||||
convStringStar 'c' = "\\r"
|
||||
convStringStar 'n' = "\\n"
|
||||
convStringStar 't' = "\\t"
|
||||
convStringStar 's' = " "
|
||||
convStringStar c = [c]
|
||||
convByte :: Char -> String
|
||||
convByte '\'' = "\\'"
|
||||
convByte '"' = "\\\""
|
||||
convByte '\\' = "\\\\"
|
||||
convByte '\r' = "\\r"
|
||||
convByte '\n' = "\\n"
|
||||
convByte '\t' = "\\t"
|
||||
convByte c
|
||||
| o == 0 = "\\0"
|
||||
| (o < 32 || o > 127) = printf "\\%03o" o
|
||||
| otherwise = [c]
|
||||
where o = ord c
|
||||
--}}}
|
||||
|
||||
--{{{ variables
|
||||
|
|
|
@ -693,9 +693,9 @@ byte :: OccParser A.LiteralRepr
|
|||
byte
|
||||
= do m <- md
|
||||
char '\''
|
||||
s <- character
|
||||
c <- literalCharacter
|
||||
sApos
|
||||
return $ A.ByteLiteral m s
|
||||
return c
|
||||
<?> "byte literal"
|
||||
|
||||
-- i.e. array literal
|
||||
|
@ -716,35 +716,45 @@ table'
|
|||
popTypeContext
|
||||
ets <- mapM typeOfExpression es
|
||||
t <- listType m ets
|
||||
-- If any of the subelements are nested array literals, collapse them.
|
||||
let aes = [case e of
|
||||
A.ExprLiteral _ (A.Literal _ _ al@(A.ArrayLiteral _ subAEs)) ->
|
||||
A.ArrayElemArray subAEs
|
||||
_ -> A.ArrayElemExpr e
|
||||
| e <- es]
|
||||
aes <- mapM collapseArrayElem es
|
||||
return $ A.Literal m t (A.ArrayLiteral m aes)
|
||||
<|> maybeSliced table A.SubscriptedLiteral typeOfLiteral
|
||||
<?> "table'"
|
||||
|
||||
-- | Collapse nested array literals.
|
||||
collapseArrayElem :: A.Expression -> OccParser A.ArrayElem
|
||||
collapseArrayElem e
|
||||
= case e of
|
||||
A.ExprLiteral _ (A.Literal _ _ (A.ArrayLiteral _ subAEs)) ->
|
||||
return $ A.ArrayElemArray subAEs
|
||||
_ -> return $ A.ArrayElemExpr e
|
||||
|
||||
stringLiteral :: OccParser (A.LiteralRepr, A.Dimension)
|
||||
stringLiteral
|
||||
= do m <- md
|
||||
char '"'
|
||||
cs <- manyTill character sQuote
|
||||
return (A.StringLiteral m $ concat cs, A.Dimension $ length cs)
|
||||
cs <- manyTill literalCharacter sQuote
|
||||
let aes = [A.ArrayElemExpr $ A.ExprLiteral m (A.Literal m A.Byte c) | c <- cs]
|
||||
return (A.ArrayLiteral m aes, A.Dimension $ length cs)
|
||||
<?> "string literal"
|
||||
|
||||
character :: OccParser String
|
||||
character
|
||||
= do char '*'
|
||||
(do char '#'
|
||||
a <- hexDigit
|
||||
b <- hexDigit
|
||||
return $ ['*', '#', a, b])
|
||||
<|> do { c <- anyChar; return ['*', c] }
|
||||
do char '#'
|
||||
a <- hexDigit
|
||||
b <- hexDigit
|
||||
return $ ['*', '#', a, b]
|
||||
<|> do { c <- anyChar; return ['*', c] }
|
||||
<|> do c <- anyChar
|
||||
return [c]
|
||||
<?> "character"
|
||||
|
||||
literalCharacter :: OccParser A.LiteralRepr
|
||||
literalCharacter
|
||||
= do m <- md
|
||||
c <- character
|
||||
return $ A.ByteLiteral m c
|
||||
--}}}
|
||||
--{{{ expressions
|
||||
expressionList :: [A.Type] -> OccParser A.ExpressionList
|
||||
|
|
|
@ -7,5 +7,6 @@ PROC P ()
|
|||
VAL BYTE cc IS '"':
|
||||
VAL BYTE ccx IS '*"':
|
||||
VAL BYTE ccc IS '*'':
|
||||
VAL [5][5]BYTE square IS ["sator", "arepo", "tenas", "opera", "rotas"]:
|
||||
SKIP
|
||||
:
|
||||
|
|
Loading…
Reference in New Issue
Block a user