Better string literal handling: do away with StringLiteral in favour of arrays of ByteLiteral

This commit is contained in:
Adam Sampson 2007-04-29 21:31:56 +00:00
parent d2c522bec0
commit 057a3a0a67
6 changed files with 107 additions and 35 deletions

View File

@ -73,7 +73,6 @@ data LiteralRepr =
| IntLiteral Meta String
| HexLiteral Meta String
| ByteLiteral Meta String
| StringLiteral Meta String
| ArrayLiteral Meta [ArrayElem]
deriving (Show, Eq, Typeable, Data)

View File

@ -5,10 +5,12 @@ import Control.Monad.Error
import Control.Monad.Identity
import Control.Monad.State
import Data.Bits
import Data.Char
import Data.Generics
import Data.Int
import Data.Maybe
import Numeric
import Text.Printf
import qualified AST as A
import Errors
@ -145,7 +147,9 @@ evalDyadic A.LessEq a b = evalDyadic A.More b a
evalDyadic A.MoreEq a b = evalDyadic A.Less b a
evalDyadic A.After (OccInt a) (OccInt b) = return $ OccBool ((a - b) > 0)
evalDyadic _ _ _ = throwError "bad dyadic op"
--}}}
--{{{ rendering values
-- | Convert a value back into a literal.
renderValue :: Meta -> OccValue -> (A.Type, A.Expression)
renderValue m (OccBool True) = (A.Bool, A.True m)
@ -154,6 +158,7 @@ renderValue m v = (t, A.ExprLiteral m (A.Literal m t lr))
where (t, lr) = renderLiteral m v
renderLiteral :: Meta -> OccValue -> (A.Type, A.LiteralRepr)
renderLiteral m (OccByte c) = (A.Byte, A.ByteLiteral m $ renderChar c)
renderLiteral m (OccInt i) = (A.Int, A.IntLiteral m $ show i)
renderLiteral m (OccArray vs)
= (t, A.ArrayLiteral m aes)
@ -161,6 +166,18 @@ renderLiteral m (OccArray vs)
t = makeArrayType (A.Dimension $ length vs) (head ts)
(ts, aes) = unzip $ map (renderLiteralArray m) vs
renderChar :: Char -> String
renderChar '\'' = "*'"
renderChar '\"' = "*\""
renderChar '*' = "**"
renderChar '\r' = "*c"
renderChar '\n' = "*n"
renderChar '\t' = "*t"
renderChar c
| (o < 32 || o > 127) = printf "*#%02x" o
| otherwise = [c]
where o = ord c
renderLiteralArray :: Meta -> OccValue -> (A.Type, A.ArrayElem)
renderLiteralArray m (OccArray vs)
= (t, A.ArrayElemArray aes)

View File

@ -5,6 +5,7 @@ import Control.Monad.Error
import Control.Monad.Identity
import Control.Monad.State
import Data.Bits
import Data.Char
import Data.Generics
import Data.Int
import Data.Maybe
@ -22,6 +23,7 @@ instance Die EvalM where
-- | Occam values of various types.
data OccValue =
OccBool Bool
| OccByte Char
| OccInt Int32
| OccArray [OccValue]
deriving (Show, Eq, Typeable, Data)
@ -49,6 +51,14 @@ evalIntExpression e
Right (OccInt val) -> return $ fromIntegral val
Right _ -> die "expression is not of INT type"
-- | Evaluate a byte literal.
evalByte :: (PSM m, Die m) => String -> m Char
evalByte s
= do ps <- get
case runEvaluator ps (evalByteLiteral s) of
Left err -> die $ "cannot evaluate byte literal: " ++ err
Right (OccByte ch) -> return ch
-- | Run an evaluator operation.
runEvaluator :: ParseState -> EvalM OccValue -> Either String OccValue
runEvaluator ps func
@ -67,7 +77,25 @@ fromRead _ _ = throwError "cannot parse literal"
-- | Evaluate a simple (non-array) literal.
evalSimpleLiteral :: A.Literal -> EvalM OccValue
evalSimpleLiteral (A.Literal _ A.Byte (A.ByteLiteral _ s)) = evalByteLiteral s
evalSimpleLiteral (A.Literal _ A.Int (A.IntLiteral _ s)) = fromRead OccInt $ readDec s
evalSimpleLiteral (A.Literal _ A.Int (A.HexLiteral _ s)) = fromRead OccInt $ readHex s
evalSimpleLiteral _ = throwError "bad literal"
-- | Evaluate a byte literal.
evalByteLiteral :: String -> EvalM OccValue
evalByteLiteral ('*':'#':hex)
= do OccInt n <- fromRead OccInt $ readHex hex
return $ OccByte (chr $ fromIntegral n)
evalByteLiteral ['*', ch]
= return $ OccByte (star ch)
where
star :: Char -> Char
star 'c' = '\r'
star 'n' = '\n'
star 't' = '\t'
star 's' = ' '
star c = c
evalByteLiteral [ch]
= return $ OccByte ch
evalByteLiteral _ = throwError "bad BYTE literal"

View File

@ -1,6 +1,7 @@
-- | Generate C code from the mangled AST.
module GenerateC where
import Data.Char
import Data.List
import Data.Maybe
import Control.Monad.Writer
@ -10,6 +11,7 @@ import Numeric
import Text.Printf
import qualified AST as A
import EvalLiterals
import Metadata
import ParseState
import Pass
@ -202,15 +204,30 @@ genConversion m cm t e = missing $ "genConversion " ++ show cm
--{{{ literals
genLiteral :: A.Literal -> CGen ()
genLiteral (A.Literal m t lr) = genLiteralRepr lr
genLiteral (A.Literal _ _ lr)
= if isStringLiteral lr
then do tell ["\""]
let A.ArrayLiteral _ aes = lr
sequence_ [genByteLiteral s
| A.ArrayElemExpr (A.ExprLiteral _ (A.Literal _ _ (A.ByteLiteral _ s))) <- aes]
tell ["\""]
else genLiteralRepr lr
genLiteral l = missing $ "genLiteral " ++ show l
-- | Does a LiteralRepr represent something that can be a plain string literal?
isStringLiteral :: A.LiteralRepr -> Bool
isStringLiteral (A.ArrayLiteral _ aes)
= and [case ae of
A.ArrayElemExpr (A.ExprLiteral _ (A.Literal _ _ (A.ByteLiteral _ _))) -> True
_ -> False
| ae <- aes]
isStringLiteral _ = False
genLiteralRepr :: A.LiteralRepr -> CGen ()
genLiteralRepr (A.RealLiteral m s) = tell [s]
genLiteralRepr (A.IntLiteral m s) = genDecimal s
genLiteralRepr (A.HexLiteral m s) = tell ["0x", s]
genLiteralRepr (A.ByteLiteral m s) = tell ["'", convStringLiteral s, "'"]
genLiteralRepr (A.StringLiteral m s) = tell ["\"", convStringLiteral s, "\""]
genLiteralRepr (A.ByteLiteral m s) = tell ["'"] >> genByteLiteral s >> tell ["'"]
genLiteralRepr (A.ArrayLiteral m aes)
= do tell ["{"]
genArrayLiteralElems aes
@ -236,23 +253,23 @@ genArrayLiteralElems aes
A.Array _ _ -> missing $ "array literal containing non-literal array: " ++ show e
_ -> genExpression e
hexToOct :: String -> String
hexToOct h = printf "%03o" ((fst $ head $ readHex h) :: Int)
genByteLiteral :: String -> CGen ()
genByteLiteral s
= do c <- evalByte s
tell [convByte c]
convStringLiteral :: String -> String
convStringLiteral [] = []
convStringLiteral ('\\':s) = "\\\\" ++ convStringLiteral s
convStringLiteral ('*':'#':'0':'0':s) = "\\0" ++ convStringLiteral s
convStringLiteral ('*':'#':a:b:s) = "\\" ++ hexToOct [a, b] ++ convStringLiteral s
convStringLiteral ('*':c:s) = convStringStar c ++ convStringLiteral s
convStringLiteral (c:s) = c : convStringLiteral s
convStringStar :: Char -> String
convStringStar 'c' = "\\r"
convStringStar 'n' = "\\n"
convStringStar 't' = "\\t"
convStringStar 's' = " "
convStringStar c = [c]
convByte :: Char -> String
convByte '\'' = "\\'"
convByte '"' = "\\\""
convByte '\\' = "\\\\"
convByte '\r' = "\\r"
convByte '\n' = "\\n"
convByte '\t' = "\\t"
convByte c
| o == 0 = "\\0"
| (o < 32 || o > 127) = printf "\\%03o" o
| otherwise = [c]
where o = ord c
--}}}
--{{{ variables

View File

@ -693,9 +693,9 @@ byte :: OccParser A.LiteralRepr
byte
= do m <- md
char '\''
s <- character
c <- literalCharacter
sApos
return $ A.ByteLiteral m s
return c
<?> "byte literal"
-- i.e. array literal
@ -716,35 +716,45 @@ table'
popTypeContext
ets <- mapM typeOfExpression es
t <- listType m ets
-- If any of the subelements are nested array literals, collapse them.
let aes = [case e of
A.ExprLiteral _ (A.Literal _ _ al@(A.ArrayLiteral _ subAEs)) ->
A.ArrayElemArray subAEs
_ -> A.ArrayElemExpr e
| e <- es]
aes <- mapM collapseArrayElem es
return $ A.Literal m t (A.ArrayLiteral m aes)
<|> maybeSliced table A.SubscriptedLiteral typeOfLiteral
<?> "table'"
-- | Collapse nested array literals.
collapseArrayElem :: A.Expression -> OccParser A.ArrayElem
collapseArrayElem e
= case e of
A.ExprLiteral _ (A.Literal _ _ (A.ArrayLiteral _ subAEs)) ->
return $ A.ArrayElemArray subAEs
_ -> return $ A.ArrayElemExpr e
stringLiteral :: OccParser (A.LiteralRepr, A.Dimension)
stringLiteral
= do m <- md
char '"'
cs <- manyTill character sQuote
return (A.StringLiteral m $ concat cs, A.Dimension $ length cs)
cs <- manyTill literalCharacter sQuote
let aes = [A.ArrayElemExpr $ A.ExprLiteral m (A.Literal m A.Byte c) | c <- cs]
return (A.ArrayLiteral m aes, A.Dimension $ length cs)
<?> "string literal"
character :: OccParser String
character
= do char '*'
(do char '#'
a <- hexDigit
b <- hexDigit
return $ ['*', '#', a, b])
<|> do { c <- anyChar; return ['*', c] }
do char '#'
a <- hexDigit
b <- hexDigit
return $ ['*', '#', a, b]
<|> do { c <- anyChar; return ['*', c] }
<|> do c <- anyChar
return [c]
<?> "character"
literalCharacter :: OccParser A.LiteralRepr
literalCharacter
= do m <- md
c <- character
return $ A.ByteLiteral m c
--}}}
--{{{ expressions
expressionList :: [A.Type] -> OccParser A.ExpressionList

View File

@ -7,5 +7,6 @@ PROC P ()
VAL BYTE cc IS '"':
VAL BYTE ccx IS '*"':
VAL BYTE ccc IS '*'':
VAL [5][5]BYTE square IS ["sator", "arepo", "tenas", "opera", "rotas"]:
SKIP
: