Rewrite AnalyseAsm's parser.

It now just uses "words" and regular pattern matches rather than regular
expressions. The resulting code is quite a bit simpler, and goes much faster.

I've added some unit tests for it too.
This commit is contained in:
Adam Sampson 2008-04-05 22:05:17 +00:00
parent 4ef1ff7196
commit 17702401fa
4 changed files with 122 additions and 53 deletions

View File

@ -127,6 +127,7 @@ tock_SOURCES = $(tock_SOURCES_hs) frontends/LexOccam.x frontends/LexRain.x
tocktest_SOURCES = $(tock_SOURCES)
tocktest_SOURCES += TestMain.hs
tocktest_SOURCES += backends/AnalyseAsmTest.hs
tocktest_SOURCES += backends/BackendPassesTest.hs
tocktest_SOURCES += backends/GenerateCTest.hs
tocktest_SOURCES += checks/ArrayUsageCheckTest.hs

View File

@ -18,6 +18,8 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
-- | A module containing the 'main' function for the Tock test suite. It currently runs tests from the following modules:
--
-- * "AnalyseAsmTest"
--
-- * "ArrayUsageCheckTest"
--
-- * "BackendPassesTest"
@ -55,6 +57,7 @@ import System.Exit
import System.IO
import Test.HUnit
import qualified AnalyseAsmTest (tests)
import qualified ArrayUsageCheckTest (ioqcTests)
import qualified BackendPassesTest (qcTests)
import qualified CommonTest (tests)
@ -171,7 +174,8 @@ main = do (opts, nonOpts, errs) <- getArgs >>* getOpt RequireOrder options
tests :: [IO TestSet]
tests = [
ArrayUsageCheckTest.ioqcTests
noqc AnalyseAsmTest.tests
,ArrayUsageCheckTest.ioqcTests
,return BackendPassesTest.qcTests
,noqc CommonTest.tests
,return FlowGraphTest.qcTests

View File

@ -1,6 +1,6 @@
{-
Tock: a compiler for parallel languages
Copyright (C) 2007 University of Kent
Copyright (C) 2007, 2008 University of Kent
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@ -22,9 +22,13 @@ with this program. If not, see <http://www.gnu.org/licenses/>.
-- FIXME: This only works for x86 at the moment.
-- FIXME: This should have a "just use a huge fixed number" mode for debugging.
module AnalyseAsm (analyseAsm) where
module AnalyseAsm (
AsmItem(..),
parseAsmLine, analyseAsm
) where
import Control.Monad.State
import Data.Char
import Data.Generics
import Data.List
import qualified Data.Map as Map
@ -32,7 +36,6 @@ import Data.Maybe
import qualified Data.Set as Set
import Numeric (readDec)
import Text.Printf
import Text.Regex
import Errors
import Pass
@ -40,7 +43,7 @@ import PrettyShow
-- | Interesting things that we might find in the assembly source.
data AsmItem =
AsmFunction String
AsmLabel String
| AsmStackInc Int
| AsmCall String
deriving (Show, Eq, Data, Typeable)
@ -49,58 +52,50 @@ data AsmItem =
-- interested in.
parseAsmLine :: String -> Maybe AsmItem
parseAsmLine s
= matchLabel s' `mplus` matchInc s' `mplus` matchPush s' `mplus` matchCall s'
where
s' = trim s
= case words s of
[] -> Nothing
trim :: String -> String
trim s = subRegex wsRE (subRegex startwsRE (subRegex endwsRE s "") "") " "
where
wsRE = mkRegex "[[:space:]]+"
startwsRE = mkRegex "^[[:space:]]+"
endwsRE = mkRegex "[[:space:]]+$"
matchLabel :: String -> Maybe AsmItem
matchLabel s
= case matchRegex labelRE s of
Just [l] -> Just $ AsmFunction l
_ -> Nothing
where
labelRE = mkRegex "^([^\\.0-9][a-zA-Z0-9_]*):$"
matchInc :: String -> Maybe AsmItem
matchInc s
= case matchRegex incdecRE s of
Just [_, v] -> Just $ AsmStackInc (parseVal v)
_ -> Nothing
where
-- The x86 stack goes downwards, so subl makes the stack deeper.
-- GCC will sometimes generate "addl $-n" rather than "subl $n".
incdecRE = mkRegex "^(subl \\$|addl \\$-)([0-9]+), %esp$"
["subl", '$':arg, "%esp"] -> parseInc arg
-- ... but GCC will sometimes generate "addl $-n" rather than "subl
-- $n".
["addl", '$':'-':arg, "%esp"] -> parseInc arg
-- A plain push also makes the stack deeper.
("pushl":_) -> Just $ AsmStackInc 4
parseVal :: String -> Int
parseVal s
= case readDec s of
[(v, "")] -> v
_ -> error $ "Don't know how to parse assembly literal: " ++ s
matchPush :: String -> Maybe AsmItem
matchPush s
= case matchRegex pushRE s of
Just [] -> Just $ AsmStackInc 4
_ -> Nothing
where
pushRE = mkRegex "^pushl"
matchCall :: String -> Maybe AsmItem
matchCall s
= case matchRegex callRE s of
Just [_, l] -> Just $ AsmCall l
_ -> Nothing
where
["call", arg] -> parseCall arg
-- GCC does tail-call optimisation, so we need to look for jmp as well
-- as call.
callRE = mkRegex "^(jmp|call) ([^\\.\\*].*)$"
["jmp", arg] -> parseCall arg
[label] -> parseLabel label
_ -> Nothing
where
-- | Parse a label: a line ending in @:@, and not starting with @.@ or a
-- digit.
parseLabel :: String -> Maybe AsmItem
parseLabel s@(c:cs)
| c == '.' || isDigit c = Nothing
| last cs == ':' = Just $ AsmLabel (liat s)
| otherwise = Nothing
where
liat :: String -> String
liat = reverse . tail . reverse
-- | Parse a stack increase: just a number.
parseInc :: String -> Maybe AsmItem
parseInc s
= case readDec s of
[(v, ",")] -> Just $ AsmStackInc v
_ -> Nothing
-- | Parse a called label, which mustn't start with @.@ or @*@.
parseCall :: String -> Maybe AsmItem
parseCall ('.':_) = Nothing
parseCall ('*':_) = Nothing
parseCall s = Just $ AsmCall s
-- | Turn assembly source into a list of interesting things.
parseAsm :: String -> [AsmItem]
@ -136,7 +131,7 @@ collectInfo ais = collectInfo' ais ""
let fi = Map.findWithDefault emptyFI func fmap
let (func', fi')
= case ai of
AsmFunction newFunc -> (newFunc, fi)
AsmLabel newFunc -> (newFunc, fi)
AsmStackInc v ->
-- This overestimates: it adds together all the stack
-- allocations it finds, rather than trying to figure

View File

@ -0,0 +1,69 @@
{-
Tock: a compiler for parallel languages
Copyright (C) 2008 University of Kent
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 2 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>.
-}
-- #ignore-exports
-- | Tests for 'AnalyseAsm'.
module AnalyseAsmTest (tests) where
import Test.HUnit hiding (State)
import AnalyseAsm
testParse :: Test
testParse = TestList
[ testLine 50 "" Nothing
, testLine 51 " " Nothing
, testLine 52 "addl %eax, %ebx" $ Nothing
, testLine 100 "subl $123, %esp" $ Just $ AsmStackInc 123
, testLine 101 "addl $-123, %esp" $ Just $ AsmStackInc 123
, testLine 102 "pushl %eax" $ Just $ AsmStackInc 4
, testLine 103 " pushl %eax" $ Just $ AsmStackInc 4
, testLine 104 "pushl %eax " $ Just $ AsmStackInc 4
, testLine 105 "\tpushl %eax " $ Just $ AsmStackInc 4
, testLine 150 "subl $123, %eax" $ Nothing
, testLine 151 "subl $-123, %esp" $ Nothing
, testLine 152 "addl $-123, %eax" $ Nothing
, testLine 153 "addl $123, %esp" $ Nothing
, testLine 154 "popl %eax" $ Nothing
, testLine 200 "call foo" $ Just $ AsmCall "foo"
, testLine 201 "jmp foo" $ Just $ AsmCall "foo"
, testLine 250 "call *%eax" $ Nothing
, testLine 251 "jmp *%eax" $ Nothing
, testLine 252 "call .0" $ Nothing
, testLine 253 "jmp .0" $ Nothing
, testLine 300 "foo:" $ Just $ AsmLabel "foo"
, testLine 350 ".blah:" $ Nothing
, testLine 351 "0:" $ Nothing
]
where
testLine :: Int -> String -> Maybe AsmItem -> Test
testLine n s exp = TestCase $ assertEqual ("testParse" ++ show n)
exp (parseAsmLine s)
tests :: Test
tests = TestLabel "AnalyseAsmTest" $ TestList
[ testParse
]