From 057a3a0a671407cc825532e7227363b65d8aeceb Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Sun, 29 Apr 2007 21:31:56 +0000 Subject: [PATCH] Better string literal handling: do away with StringLiteral in favour of arrays of ByteLiteral --- fco2/AST.hs | 1 - fco2/EvalConstants.hs | 17 +++++++++++ fco2/EvalLiterals.hs | 28 ++++++++++++++++++ fco2/GenerateC.hs | 55 +++++++++++++++++++++++------------- fco2/Parse.hs | 40 ++++++++++++++++---------- fco2/testcases/stringlit.occ | 1 + 6 files changed, 107 insertions(+), 35 deletions(-) diff --git a/fco2/AST.hs b/fco2/AST.hs index d6bc63d..7989f65 100644 --- a/fco2/AST.hs +++ b/fco2/AST.hs @@ -73,7 +73,6 @@ data LiteralRepr = | IntLiteral Meta String | HexLiteral Meta String | ByteLiteral Meta String - | StringLiteral Meta String | ArrayLiteral Meta [ArrayElem] deriving (Show, Eq, Typeable, Data) diff --git a/fco2/EvalConstants.hs b/fco2/EvalConstants.hs index 44f9f52..bffde03 100644 --- a/fco2/EvalConstants.hs +++ b/fco2/EvalConstants.hs @@ -5,10 +5,12 @@ import Control.Monad.Error import Control.Monad.Identity import Control.Monad.State import Data.Bits +import Data.Char import Data.Generics import Data.Int import Data.Maybe import Numeric +import Text.Printf import qualified AST as A import Errors @@ -145,7 +147,9 @@ evalDyadic A.LessEq a b = evalDyadic A.More b a evalDyadic A.MoreEq a b = evalDyadic A.Less b a evalDyadic A.After (OccInt a) (OccInt b) = return $ OccBool ((a - b) > 0) evalDyadic _ _ _ = throwError "bad dyadic op" +--}}} +--{{{ rendering values -- | Convert a value back into a literal. renderValue :: Meta -> OccValue -> (A.Type, A.Expression) renderValue m (OccBool True) = (A.Bool, A.True m) @@ -154,6 +158,7 @@ renderValue m v = (t, A.ExprLiteral m (A.Literal m t lr)) where (t, lr) = renderLiteral m v renderLiteral :: Meta -> OccValue -> (A.Type, A.LiteralRepr) +renderLiteral m (OccByte c) = (A.Byte, A.ByteLiteral m $ renderChar c) renderLiteral m (OccInt i) = (A.Int, A.IntLiteral m $ show i) renderLiteral m (OccArray vs) = (t, A.ArrayLiteral m aes) @@ -161,6 +166,18 @@ renderLiteral m (OccArray vs) t = makeArrayType (A.Dimension $ length vs) (head ts) (ts, aes) = unzip $ map (renderLiteralArray m) vs +renderChar :: Char -> String +renderChar '\'' = "*'" +renderChar '\"' = "*\"" +renderChar '*' = "**" +renderChar '\r' = "*c" +renderChar '\n' = "*n" +renderChar '\t' = "*t" +renderChar c + | (o < 32 || o > 127) = printf "*#%02x" o + | otherwise = [c] + where o = ord c + renderLiteralArray :: Meta -> OccValue -> (A.Type, A.ArrayElem) renderLiteralArray m (OccArray vs) = (t, A.ArrayElemArray aes) diff --git a/fco2/EvalLiterals.hs b/fco2/EvalLiterals.hs index 693beb7..c574876 100644 --- a/fco2/EvalLiterals.hs +++ b/fco2/EvalLiterals.hs @@ -5,6 +5,7 @@ import Control.Monad.Error import Control.Monad.Identity import Control.Monad.State import Data.Bits +import Data.Char import Data.Generics import Data.Int import Data.Maybe @@ -22,6 +23,7 @@ instance Die EvalM where -- | Occam values of various types. data OccValue = OccBool Bool + | OccByte Char | OccInt Int32 | OccArray [OccValue] deriving (Show, Eq, Typeable, Data) @@ -49,6 +51,14 @@ evalIntExpression e Right (OccInt val) -> return $ fromIntegral val Right _ -> die "expression is not of INT type" +-- | Evaluate a byte literal. +evalByte :: (PSM m, Die m) => String -> m Char +evalByte s + = do ps <- get + case runEvaluator ps (evalByteLiteral s) of + Left err -> die $ "cannot evaluate byte literal: " ++ err + Right (OccByte ch) -> return ch + -- | Run an evaluator operation. runEvaluator :: ParseState -> EvalM OccValue -> Either String OccValue runEvaluator ps func @@ -67,7 +77,25 @@ fromRead _ _ = throwError "cannot parse literal" -- | Evaluate a simple (non-array) literal. evalSimpleLiteral :: A.Literal -> EvalM OccValue +evalSimpleLiteral (A.Literal _ A.Byte (A.ByteLiteral _ s)) = evalByteLiteral s evalSimpleLiteral (A.Literal _ A.Int (A.IntLiteral _ s)) = fromRead OccInt $ readDec s evalSimpleLiteral (A.Literal _ A.Int (A.HexLiteral _ s)) = fromRead OccInt $ readHex s evalSimpleLiteral _ = throwError "bad literal" +-- | Evaluate a byte literal. +evalByteLiteral :: String -> EvalM OccValue +evalByteLiteral ('*':'#':hex) + = do OccInt n <- fromRead OccInt $ readHex hex + return $ OccByte (chr $ fromIntegral n) +evalByteLiteral ['*', ch] + = return $ OccByte (star ch) + where + star :: Char -> Char + star 'c' = '\r' + star 'n' = '\n' + star 't' = '\t' + star 's' = ' ' + star c = c +evalByteLiteral [ch] + = return $ OccByte ch +evalByteLiteral _ = throwError "bad BYTE literal" diff --git a/fco2/GenerateC.hs b/fco2/GenerateC.hs index f8d770f..d6871ef 100644 --- a/fco2/GenerateC.hs +++ b/fco2/GenerateC.hs @@ -1,6 +1,7 @@ -- | Generate C code from the mangled AST. module GenerateC where +import Data.Char import Data.List import Data.Maybe import Control.Monad.Writer @@ -10,6 +11,7 @@ import Numeric import Text.Printf import qualified AST as A +import EvalLiterals import Metadata import ParseState import Pass @@ -202,15 +204,30 @@ genConversion m cm t e = missing $ "genConversion " ++ show cm --{{{ literals genLiteral :: A.Literal -> CGen () -genLiteral (A.Literal m t lr) = genLiteralRepr lr +genLiteral (A.Literal _ _ lr) + = if isStringLiteral lr + then do tell ["\""] + let A.ArrayLiteral _ aes = lr + sequence_ [genByteLiteral s + | A.ArrayElemExpr (A.ExprLiteral _ (A.Literal _ _ (A.ByteLiteral _ s))) <- aes] + tell ["\""] + else genLiteralRepr lr genLiteral l = missing $ "genLiteral " ++ show l +-- | Does a LiteralRepr represent something that can be a plain string literal? +isStringLiteral :: A.LiteralRepr -> Bool +isStringLiteral (A.ArrayLiteral _ aes) + = and [case ae of + A.ArrayElemExpr (A.ExprLiteral _ (A.Literal _ _ (A.ByteLiteral _ _))) -> True + _ -> False + | ae <- aes] +isStringLiteral _ = False + genLiteralRepr :: A.LiteralRepr -> CGen () genLiteralRepr (A.RealLiteral m s) = tell [s] genLiteralRepr (A.IntLiteral m s) = genDecimal s genLiteralRepr (A.HexLiteral m s) = tell ["0x", s] -genLiteralRepr (A.ByteLiteral m s) = tell ["'", convStringLiteral s, "'"] -genLiteralRepr (A.StringLiteral m s) = tell ["\"", convStringLiteral s, "\""] +genLiteralRepr (A.ByteLiteral m s) = tell ["'"] >> genByteLiteral s >> tell ["'"] genLiteralRepr (A.ArrayLiteral m aes) = do tell ["{"] genArrayLiteralElems aes @@ -236,23 +253,23 @@ genArrayLiteralElems aes A.Array _ _ -> missing $ "array literal containing non-literal array: " ++ show e _ -> genExpression e -hexToOct :: String -> String -hexToOct h = printf "%03o" ((fst $ head $ readHex h) :: Int) +genByteLiteral :: String -> CGen () +genByteLiteral s + = do c <- evalByte s + tell [convByte c] -convStringLiteral :: String -> String -convStringLiteral [] = [] -convStringLiteral ('\\':s) = "\\\\" ++ convStringLiteral s -convStringLiteral ('*':'#':'0':'0':s) = "\\0" ++ convStringLiteral s -convStringLiteral ('*':'#':a:b:s) = "\\" ++ hexToOct [a, b] ++ convStringLiteral s -convStringLiteral ('*':c:s) = convStringStar c ++ convStringLiteral s -convStringLiteral (c:s) = c : convStringLiteral s - -convStringStar :: Char -> String -convStringStar 'c' = "\\r" -convStringStar 'n' = "\\n" -convStringStar 't' = "\\t" -convStringStar 's' = " " -convStringStar c = [c] +convByte :: Char -> String +convByte '\'' = "\\'" +convByte '"' = "\\\"" +convByte '\\' = "\\\\" +convByte '\r' = "\\r" +convByte '\n' = "\\n" +convByte '\t' = "\\t" +convByte c + | o == 0 = "\\0" + | (o < 32 || o > 127) = printf "\\%03o" o + | otherwise = [c] + where o = ord c --}}} --{{{ variables diff --git a/fco2/Parse.hs b/fco2/Parse.hs index 1b3f06f..cda8d29 100644 --- a/fco2/Parse.hs +++ b/fco2/Parse.hs @@ -693,9 +693,9 @@ byte :: OccParser A.LiteralRepr byte = do m <- md char '\'' - s <- character + c <- literalCharacter sApos - return $ A.ByteLiteral m s + return c "byte literal" -- i.e. array literal @@ -716,35 +716,45 @@ table' popTypeContext ets <- mapM typeOfExpression es t <- listType m ets - -- If any of the subelements are nested array literals, collapse them. - let aes = [case e of - A.ExprLiteral _ (A.Literal _ _ al@(A.ArrayLiteral _ subAEs)) -> - A.ArrayElemArray subAEs - _ -> A.ArrayElemExpr e - | e <- es] + aes <- mapM collapseArrayElem es return $ A.Literal m t (A.ArrayLiteral m aes) <|> maybeSliced table A.SubscriptedLiteral typeOfLiteral "table'" +-- | Collapse nested array literals. +collapseArrayElem :: A.Expression -> OccParser A.ArrayElem +collapseArrayElem e + = case e of + A.ExprLiteral _ (A.Literal _ _ (A.ArrayLiteral _ subAEs)) -> + return $ A.ArrayElemArray subAEs + _ -> return $ A.ArrayElemExpr e + stringLiteral :: OccParser (A.LiteralRepr, A.Dimension) stringLiteral = do m <- md char '"' - cs <- manyTill character sQuote - return (A.StringLiteral m $ concat cs, A.Dimension $ length cs) + cs <- manyTill literalCharacter sQuote + let aes = [A.ArrayElemExpr $ A.ExprLiteral m (A.Literal m A.Byte c) | c <- cs] + return (A.ArrayLiteral m aes, A.Dimension $ length cs) "string literal" character :: OccParser String character = do char '*' - (do char '#' - a <- hexDigit - b <- hexDigit - return $ ['*', '#', a, b]) - <|> do { c <- anyChar; return ['*', c] } + do char '#' + a <- hexDigit + b <- hexDigit + return $ ['*', '#', a, b] + <|> do { c <- anyChar; return ['*', c] } <|> do c <- anyChar return [c] "character" + +literalCharacter :: OccParser A.LiteralRepr +literalCharacter + = do m <- md + c <- character + return $ A.ByteLiteral m c --}}} --{{{ expressions expressionList :: [A.Type] -> OccParser A.ExpressionList diff --git a/fco2/testcases/stringlit.occ b/fco2/testcases/stringlit.occ index 5f60584..431c0b8 100644 --- a/fco2/testcases/stringlit.occ +++ b/fco2/testcases/stringlit.occ @@ -7,5 +7,6 @@ PROC P () VAL BYTE cc IS '"': VAL BYTE ccx IS '*"': VAL BYTE ccc IS '*'': + VAL [5][5]BYTE square IS ["sator", "arepo", "tenas", "opera", "rotas"]: SKIP :