Add the Tock hacker's guide to the repository.
This used to be in a CSProjects Subversion repo, but I figure if we keep it with the code it's more likely to stay up-to-date.
This commit is contained in:
parent
bcad0c225f
commit
16fee9c9eb
47
docextra/hacking-guide/cheat-sheet.tex
Normal file
47
docextra/hacking-guide/cheat-sheet.tex
Normal file
|
@ -0,0 +1,47 @@
|
|||
\documentclass[a4wide]{article}
|
||||
|
||||
\usepackage{color}
|
||||
\usepackage{crg-group}
|
||||
\usepackage{listings}
|
||||
\usepackage[a4paper=true,colorlinks=true,urlcolor=blue]{hyperref}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\haskellsettings
|
||||
|
||||
\begin{lstlisting}
|
||||
-- Lists
|
||||
|
||||
foldl :: (accum -> item -> accum) -> accum -> [item] -> accum
|
||||
foldM :: Monad m => (accum -> item -> m accum) -> accum -> [item] -> m accum
|
||||
|
||||
mapAccumL :: (accum -> a -> (accum, b)) -> accum -> [a] -> (acc, [b])
|
||||
|
||||
zip :: [a] -> [b] -> [(a, b)]
|
||||
unzip :: [(a, b)] -> ([a], [b])
|
||||
|
||||
-- Misc
|
||||
|
||||
fromMaybe :: a -> Maybe a -> a
|
||||
maybe :: b -> (a -> b) -> Maybe a -> b
|
||||
|
||||
transformPair :: (x -> a) -> (y -> b) -> (x,y) -> (a,b)
|
||||
|
||||
-- Monads
|
||||
|
||||
liftM :: Monad m => (a -> b) -> m a -> m b
|
||||
liftM2 :: Monad m => (a1 -> a2 -> b) -> m a1 -> m a2 -> m b
|
||||
|
||||
lift :: Monad m => m a -> t m a -- lifts a value
|
||||
liftF :: (MonadTrans t, Monad m) => (a -> m b) -> (a -> t m b) -- lifts a function
|
||||
|
||||
-- State monad
|
||||
|
||||
runStateT :: Monad monad => StateT state monad value -> state -> monad (value,state)
|
||||
|
||||
evalStateT :: Monad monad => StateT state monad value -> state -> monad value
|
||||
execStateT :: Monad monad => StateT state monad value -> state -> monad state
|
||||
\end{lstlisting}
|
||||
|
||||
|
||||
\end{document}
|
93
docextra/hacking-guide/crg-group.sty
Normal file
93
docextra/hacking-guide/crg-group.sty
Normal file
|
@ -0,0 +1,93 @@
|
|||
\definecolor{KentBlue}{rgb}{0.0,0.2196,0.5098} % 0, 56, 130
|
||||
\definecolor{KentRed}{rgb}{0.7058,0.0117,0.3607} % 180, 3, 92
|
||||
\definecolor{KentGreen}{rgb}{0.0,0.4785,0.3686} % 0, 122, 94
|
||||
|
||||
\usepackage{pifont}
|
||||
\usepackage{xspace}
|
||||
|
||||
\usepackage{listings}
|
||||
|
||||
|
||||
%Note: return isn't a Haskell keyword, but it's used enough and important
|
||||
% enough that I think it's worth highlighting as if it were one.
|
||||
|
||||
%Note: the shorter keyword symbols (like =) must go before any longer
|
||||
% versions (like =>) in otherkeywords
|
||||
|
||||
%Also, "otherkeywords" seem to be highlighted even in strings. This
|
||||
% is partly why I haven't defined _ as a keyword.
|
||||
|
||||
%String highlighting is difficult, especially since foo' is an identifier
|
||||
% in Haskell, not the start of a char literal! Therefore I suggest
|
||||
% never applying any special formatting to strings. I've also removed
|
||||
% the single quote as a string delimiter for this reason.
|
||||
\lstdefinelanguage[improved]{Haskell}
|
||||
% To separate out word keywords from symbol keywords for different formatting,
|
||||
% we define the word keywords as emph items (use emphstyle):
|
||||
{classoffset=0,
|
||||
%If we don't specify at least one "non-other" keyword, listings doesn't work, hence:
|
||||
morekeywords={hduisahfiuabfyasbyoasvbfuyvosf},
|
||||
otherkeywords={::,=,==,->,=>,>>,>>=,>>*,$,++,<-,<|>},
|
||||
classoffset=1,
|
||||
morekeywords={data,type,newtype,let,in,do,where,if,then,else,return},
|
||||
% For some (unknown) reason, setting classoffset = 0 again after this line
|
||||
% breaks the highlighting.
|
||||
morecomment=[l]{--},
|
||||
% morestring=[b]',
|
||||
morestring=[b]",
|
||||
}
|
||||
%TODO The -> operator looks particularly bad (the dash is very thin).
|
||||
% I have seen Haskell papers that use the maths -> symbol instead -- listings
|
||||
% package does allow us to escape to maths mode, so perhaps we should try that...
|
||||
|
||||
\lstdefinelanguage[21]{occam}
|
||||
{morekeywords={BYTE,CHAN,FOR,FROM,IF,INT,INT32,IS,PAR,PROC,RESHAPES,RETYPES,SEQ,SIZE,TRUE,VAL,WHILE},
|
||||
otherkeywords={:,:=},
|
||||
morecomment=[l]{--}
|
||||
}
|
||||
|
||||
\lstdefinelanguage{Rain}
|
||||
{morekeywords={if,while,process,function},
|
||||
otherkeywords={!,?,??,=,==,+,-,*,+=,-=,*=},
|
||||
morecomment=[l]{//}
|
||||
}
|
||||
|
||||
\def\haskellsettings{
|
||||
\lstset{
|
||||
language={[improved]Haskell},
|
||||
columns=flexible,
|
||||
basicstyle=\small,
|
||||
emphstyle=\color{KentRed}\bfseries,
|
||||
keywordstyle=[1]{\color{KentBlue}\bfseries},
|
||||
keywordstyle=[0]{\color{KentBlue}\bfseries\ttfamily},
|
||||
identifierstyle=,
|
||||
commentstyle=\color{KentGreen}\itshape,
|
||||
stringstyle=,
|
||||
showstringspaces=false}
|
||||
}
|
||||
|
||||
\def\rainsettings{
|
||||
\lstset{
|
||||
language={Rain},
|
||||
columns=fixed,
|
||||
basicstyle=\small\ttfamily,
|
||||
emphstyle=\color{KentBlue}\bfseries,
|
||||
keywordstyle=\color{KentBlue}\bfseries,
|
||||
identifierstyle=,
|
||||
commentstyle=\color{KentGreen}\itshape,
|
||||
stringstyle=,
|
||||
showstringspaces=false}
|
||||
}
|
||||
|
||||
\def\occamsettings{
|
||||
\lstset{
|
||||
language={[21]occam},
|
||||
columns=fixed,
|
||||
basicstyle=\small\ttfamily,
|
||||
emphstyle=\color{KentBlue}\bfseries,
|
||||
keywordstyle=\color{KentBlue}\bfseries,
|
||||
identifierstyle=,
|
||||
commentstyle=\color{KentGreen}\itshape,
|
||||
stringstyle=,
|
||||
showstringspaces=false}
|
||||
}
|
816
docextra/hacking-guide/tock-intro.tex
Normal file
816
docextra/hacking-guide/tock-intro.tex
Normal file
|
@ -0,0 +1,816 @@
|
|||
\documentclass[a4wide]{article}
|
||||
|
||||
\usepackage{a4wide}
|
||||
\usepackage{color}
|
||||
\usepackage{crg-group}
|
||||
\usepackage{listings}
|
||||
\usepackage[a4paper=true,colorlinks=true,urlcolor=blue]{hyperref}
|
||||
|
||||
\renewcommand{\haskellsettings}
|
||||
{
|
||||
\lstset{
|
||||
language={[improved]Haskell},
|
||||
columns=flexible,
|
||||
basicstyle=\small,
|
||||
emphstyle=\color{KentRed}\bfseries,
|
||||
keywordstyle=[1]{\color{KentBlue}\bfseries},
|
||||
keywordstyle=[0]{\color{KentBlue}\bfseries\ttfamily},
|
||||
identifierstyle=\ttfamily,
|
||||
commentstyle=\color{KentGreen}\itshape,
|
||||
stringstyle=,
|
||||
showstringspaces=false}
|
||||
|
||||
}
|
||||
|
||||
\title{Introduction to Working on Tock}
|
||||
\author{Neil Brown}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\haskellsettings
|
||||
|
||||
\maketitle
|
||||
\tableofcontents
|
||||
|
||||
\newpage
|
||||
|
||||
\section{Get the Code}
|
||||
|
||||
All details about checking out the code, committing your changes, the mailing list,
|
||||
and how to keep track of the repository, currently reside on this page:
|
||||
\url{http://www.cs.kent.ac.uk/research/groups/sys/wiki/Tock}.
|
||||
|
||||
Tock is held in a darcs repository. Darcs is broadly similar to CVS/SVN.
|
||||
You can find more details on the Darcs website (\url{http://darcs.net/}) or in the manual
|
||||
(\url{http://darcs.net/manual/}) but the following few commands will usually suffice:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{darcs whatsnew} -- shows what changes have been made but not committed (like: svn diff)
|
||||
\item \textbf{darcs record} -- records a patch (like: svn commit)
|
||||
\item \textbf{darcs pull} -- pulls changes from the parent repository (like: svn update)
|
||||
\item \textbf{darcs send} -- sends changes (against the parent repository) via email
|
||||
\end{itemize}
|
||||
|
||||
On Tock we favour utilising the strengths of Darcs, and making each patch independent and small.
|
||||
Obviously this is up to the judgement of the programmer, but one-line patches to fix a bug are
|
||||
perfectly acceptable. Patches that change over a hundred lines are to be avoided unless it really
|
||||
is a big change.
|
||||
|
||||
One other note: if at all possible, record separate patches for the tests and implementation that passes
|
||||
those tests. This makes it easier for someone else in future to check that the tests passed and
|
||||
failed appropriately before the implementation was changed/added.
|
||||
|
||||
\section{Find the Right Place}
|
||||
|
||||
Tock's modules are currently arranged into four directories. They are:
|
||||
|
||||
\begin{enumerate}
|
||||
\item ``common'' -- All modules that are used by many/most parts of the program.
|
||||
\item ``frontends'' -- All modules relating to lexing, parsing, preprocessing occam and Rain, as well
|
||||
as early steps in compilation like resolving names, checking types, etc.
|
||||
\item ``transformations'' -- All modules relating to transforming the tree either for simplicity,
|
||||
efficiency or simply to remove elements (e.g. parallel assignment) not supported by the backends.
|
||||
\item ``checks'' -- All modules relating to usage checks and other compiler checks.
|
||||
\item ``backends'' -- All modules related to the final step of turning AST into actual code.
|
||||
\end{enumerate}
|
||||
|
||||
The separation is by no means hard-and-fast, or perfect, but it's better than nothing.
|
||||
Tests are in the same directory as the thing they test.
|
||||
|
||||
The directories should provide a quick idea of where to find what you are interested in. Data types
|
||||
and functions common to the whole compiler, such as the AST definition and type helper functions
|
||||
are in ``common''. The other parts of the compiler are in the obvious order (frontends, transformations,
|
||||
backends).
|
||||
|
||||
The \lstinline|Main| module in the main tock directory is the actual module for the tock executable.
|
||||
It merely deals with the command-line options and joins together the various passes according to
|
||||
the options given.
|
||||
|
||||
If you want to add a new frontend or backend, then add a new command-line option (look in the modules
|
||||
\lstinline|Main| and \lstinline|CompState|) for it and handle the option accordingly in the \lstinline|Main|
|
||||
module. To add a new pass, add it to the appropriate place in the list in the \lstinline|PassList|
|
||||
module, or add it to the appropriate pass-item already listed there (e.g. \lstinline|simplifyTypes|).
|
||||
|
||||
\section{Understand the Existing Code}
|
||||
|
||||
There are (unfortunately, but realistically) several barriers to understanding the existing Tock code:
|
||||
|
||||
\begin{enumerate}
|
||||
\item It's written in Haskell.
|
||||
\item It makes heavy use of monads.
|
||||
\item It uses generics.
|
||||
\item You need to understand the AST well.
|
||||
\item The C and C++ backends are quite dense and tricky.
|
||||
\end{enumerate}
|
||||
|
||||
The last point is somewhat unavoidable, without an inspired re-write. Knowledge of occam will help
|
||||
a lot with understanding the AST, except perhaps for the \lstinline|Structured| item (see below).
|
||||
Haskell knowledge can be solved with a book or two (or other web resources); monads and generics
|
||||
are each covered in a section below.
|
||||
|
||||
\subsection{Meta Tags}
|
||||
|
||||
Scattered throughout the definition of the AST you will find many \lstinline|Meta| items.
|
||||
\lstinline|Meta| is technically for any annotations about that part of the program. Currently, \lstinline|Meta| is only
|
||||
used for source position. It is included in every appropriate AST structure as the first item
|
||||
after the data constructor name. This allows use to easily use a (generic-based)
|
||||
\lstinline|findMeta| function for finding the first meta-tag in an item.
|
||||
|
||||
\subsection{A.Structured}
|
||||
|
||||
The main item in the AST that I (Neil) found confusing at first was the \lstinline|Structured| item.
|
||||
Thankfully, I've recently changed Structured to be parameterised (which helped), but I've left this explanation
|
||||
in anyway, it case it helps. Note that the \lstinline|AST| module is always imported as \lstinline|A|,
|
||||
hence all the `\lstinline|A.|' prefixes on the AST items discussed here.
|
||||
|
||||
Structured is the body of most occam constructs,
|
||||
such as SEQ, PAR, ALT, CASE. Because occam allows the inter-mingling of processes and declarations,
|
||||
and also replication on most of its constructs (SEQ, PAR, ALT, IF) Structured eliminates redundancy
|
||||
by grouping this together. SEQ and PAR have a \lstinline|A.Structured A.Process| as their `body',
|
||||
whereas, for example, ALT has a \lstinline|A.Structured A.Alternative|.
|
||||
|
||||
Here is the definition of the Structured item:
|
||||
|
||||
\haskellsettings\begin{lstlisting}
|
||||
data Structured a =
|
||||
Rep Meta Replicator (Structured a)
|
||||
| Spec Meta Specification (Structured a)
|
||||
| ProcThen Meta Process (Structured a)
|
||||
| Only Meta a
|
||||
| Several Meta [Structured a]
|
||||
\end{lstlisting}
|
||||
|
||||
So for example, given this occam pseudo-code:
|
||||
|
||||
\occamsettings\begin{lstlisting}
|
||||
SEQ
|
||||
proc1
|
||||
proc2
|
||||
\end{lstlisting}
|
||||
|
||||
\haskellsettings
|
||||
Here is how it would be represented in the AST (Taking \lstinline|proc1| and \lstinline|proc2| to be of type Process, and using
|
||||
\lstinline|m| for all meta-tags):
|
||||
|
||||
\haskellsettings\begin{lstlisting}
|
||||
A.Seq m
|
||||
(A.Several m
|
||||
[A.Only m proc1
|
||||
,A.Only m proc2
|
||||
]
|
||||
)
|
||||
\end{lstlisting}
|
||||
|
||||
You can see the combination of \lstinline|A.Seq| with \lstinline|A.Several| and \lstinline|A.Only| to nest the processes. Here's another example
|
||||
of some occam and corresponding Haskell:
|
||||
|
||||
\occamsettings\begin{lstlisting}
|
||||
SEQ
|
||||
proc1
|
||||
PAR
|
||||
proc2
|
||||
proc3
|
||||
\end{lstlisting}
|
||||
|
||||
\haskellsettings\begin{lstlisting}
|
||||
A.Seq m
|
||||
(A.Several m
|
||||
[A.Only m proc1
|
||||
,A.Only m
|
||||
(A.Par m A.PlainPar
|
||||
(A.Several m
|
||||
[A.Only m proc2
|
||||
,A.Only m proc3
|
||||
]
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
\end{lstlisting}
|
||||
|
||||
Which no doubt looks quite nasty! But things work differently if you nest two blocks of the same type,
|
||||
mainly because of the associativity of the various blocks in occam. Consider these two SEQ blocks:
|
||||
|
||||
\occamsettings\begin{lstlisting}
|
||||
SEQ
|
||||
proc1
|
||||
SEQ
|
||||
proc2
|
||||
proc3
|
||||
\end{lstlisting}
|
||||
|
||||
\haskellsettings\begin{lstlisting}
|
||||
A.Seq m
|
||||
(A.Several m
|
||||
[A.Only m proc1
|
||||
, (A.Several m
|
||||
[A.Only m proc2
|
||||
,A.Only m proc3
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
\end{lstlisting}
|
||||
|
||||
You can see that instead of creating a second \lstinline|A.Seq| inside the \lstinline|A.Several|, it
|
||||
has instead simply nested another \lstinline|A.Several|. In fact, we could later flatten the two
|
||||
nested \lstinline|A.Several|s into one if we wanted; in all \lstinline|A.Structured| items, this should always
|
||||
be possible to do (without altering the behaviour of the program).
|
||||
|
||||
Here is another example:
|
||||
|
||||
\occamsettings\begin{lstlisting}
|
||||
PAR
|
||||
proc1
|
||||
PAR i = 0 FOR 10
|
||||
proc2
|
||||
\end{lstlisting}
|
||||
|
||||
\haskellsettings\begin{lstlisting}
|
||||
A.Par m A.PlainPar
|
||||
(A.Several m
|
||||
[A.Only m proc1
|
||||
,A.Rep m rep (A.Only m proc2)
|
||||
]
|
||||
)
|
||||
\end{lstlisting}
|
||||
|
||||
I have used `rep' as a short-hand for the replicator (which is not the focus here). Hopefully it is
|
||||
now clear how \lstinline|A.Structured| is used as a body for things. There is only one more aspect to explain;
|
||||
specifications.
|
||||
|
||||
\occamsettings\begin{lstlisting}
|
||||
SEQ
|
||||
proc1
|
||||
INT x:
|
||||
proc2
|
||||
\end{lstlisting}
|
||||
|
||||
According to occam scoping rules, \lstinline|x| is in scope for \lstinline|proc2|. This is represented in the
|
||||
AST as follows:
|
||||
|
||||
\haskellsettings\begin{lstlisting}
|
||||
A.Seq m
|
||||
(A.Several m
|
||||
[A.Only m proc1
|
||||
,A.Specification m spec (A.Only m proc2)
|
||||
]
|
||||
)
|
||||
\end{lstlisting}
|
||||
|
||||
Where \lstinline|spec| is shorthand for the full specification of \lstinline|x|. The specification (third argument
|
||||
of \lstinline|A.Specification|) is in scope for the whole of the body (the fourth argument).
|
||||
Multiple specifications lead to nested \lstinline|A.Specification|s:
|
||||
|
||||
\occamsettings\begin{lstlisting}
|
||||
SEQ
|
||||
proc1
|
||||
INT x:
|
||||
INT16 y:
|
||||
proc2
|
||||
\end{lstlisting}
|
||||
|
||||
\haskellsettings\begin{lstlisting}
|
||||
A.Seq m
|
||||
(A.Several m
|
||||
[A.Only m proc1
|
||||
,A.Specification m specX
|
||||
(A.Specification m specY (A.Only m proc2))
|
||||
]
|
||||
)
|
||||
\end{lstlisting}
|
||||
|
||||
The \lstinline|A.ProcThen| item is used in situations where we need to perform a process in the
|
||||
middle of a \lstinline|A.Structured a| block (where \lstinline|a| is not \lstinline|A.Process|).
|
||||
For example, \lstinline|VALOF| uses this to execute a process then return a result. Some
|
||||
specifications with initialisation may need to be transformed into a specification, with
|
||||
initialisation code (a process) followed by some more of the \lstinline|A.Structured| item.
|
||||
|
||||
\subsection{Monadic Code}
|
||||
|
||||
\textit{Monads are generally considered a tricky topic. I do not plan to cover their full scope
|
||||
or try to comprehensively explain them from scratch here. I attempt a brief summary, but mainly
|
||||
I just thought it might be helpful to provide some comments
|
||||
on how real monadic code in Tock works. For a full explanation of monads, try looking for web
|
||||
resources or asking a Tock developer directly.}
|
||||
|
||||
We will look at a real example from the current Tock codebase. This is the \lstinline|doProcess| function
|
||||
inside the \lstinline|removeParAssign| function in the \lstinline|SimplifyProcs| module. Its purpose
|
||||
is to turn parallel assignments into multiple (sequential) single assignments.
|
||||
|
||||
\begin{lstlisting}
|
||||
doProcess :: A.Process -> PassM A.Process
|
||||
doProcess (A.Assign m vs@(_:_:_) (A.ExpressionList _ es))
|
||||
= do ts <- mapM typeOfVariable vs
|
||||
specs <- sequence
|
||||
[makeNonceVariable "assign_temp" m t A.VariableName A.Original | t <- ts]
|
||||
let temps = [A.Variable m n | A.Specification _ n _ <- specs]
|
||||
let first = [A.Assign m [v] (A.ExpressionList m [e]) | (v, e) <- zip temps es]
|
||||
let second = [A.Assign m [v] (A.ExpressionList m [A.ExprVariable m v'])
|
||||
| (v, v') <- zip vs temps]
|
||||
return $ A.Seq m $ foldl (\s spec -> A.Spec m spec s)
|
||||
(A.Several m (map (A.Only m) (first ++ second))) specs
|
||||
doProcess p = doGeneric p
|
||||
\end{lstlisting}
|
||||
|
||||
The type of this function is to take a \lstinline|A.Process| (the \lstinline |A.| is simply because it's an AST
|
||||
fragment; the AST module is always imported as A) and give back a monadic action in the PassM monad that will yield
|
||||
a Process.
|
||||
|
||||
Here is a description of what the code does.
|
||||
The pattern match matches any assignment that has two or more items on the LHS (\lstinline|vs@(_:_:_)|
|
||||
matches any list that has at least two elements -- the final match can be the empty list).
|
||||
The first line gets the type of the variables on the LHS and stores this list of types in \lstinline|ts|.
|
||||
\lstinline|specs| is a list of specifications of nonce variables to be assignment temporaries, one for each
|
||||
type in the type list (\lstinline|ts|). \lstinline|temps| is the list of Variable items. \lstinline|first| is the list
|
||||
of assignments from the RHS of the original assignment to the temporaries. \lstinline|second| is the list
|
||||
of assignments from the temporaries to the original LHS. Finally, we use the foldl function
|
||||
to nest the specifications, and make a sequential list of the assignments (\lstinline|first| then \lstinline|second|).
|
||||
|
||||
The function has a standard (i.e. unrelated to monads) Haskell pattern-match as its header. The direct value of
|
||||
the function is a \lstinline|do| block; a \lstinline|do| block is technically of type `monadic action'.
|
||||
|
||||
The indentation rule of the do block is fairly simple; each item in the do block should have the same level of
|
||||
indentation, and finishes when something is found with less indentation (standard Haskell indentation rules -- the ``offside rule'').
|
||||
|
||||
The first line (\lstinline|ts <- mapM typeOfVariable vs|) is already somewhat complex. Firstly, the type of
|
||||
\lstinline|typeOfVariable| is:
|
||||
|
||||
\begin{lstlisting}
|
||||
typeOfVariable :: (CSM m, Die m) => A.Variable -> m A.Type
|
||||
\end{lstlisting}
|
||||
|
||||
\lstinline|CSM| and \lstinline|Die| are two typeclasses to which \lstinline|PassM| belongs. So in our case, \lstinline|m| can be \lstinline|PassM|.
|
||||
Therefore the effective type for us is \lstinline|A.Variable -> PassM A.Type|. \lstinline|mapM| is a monadic
|
||||
version of \lstinline|map|:
|
||||
|
||||
\begin{lstlisting}
|
||||
mapM :: Monad m => (a -> m b) -> [a] -> m [b]
|
||||
\end{lstlisting}
|
||||
|
||||
It basically takes a monadic function, and applies it to each element of the given list, returning a monadic
|
||||
action that will yield the mapped elements.
|
||||
|
||||
So in our case, \lstinline|mapM typeOfVariable| will have type \lstinline|[A.Variable] -> PassM [A.Type]|.
|
||||
The argument is then \lstinline|vs|. The notation \lstinline|ts <-| means that the value yielded
|
||||
by the monadic action is labelled as \lstinline|ts|. You may think of it as the monadic version of the
|
||||
\lstinline|let| notation in normal Haskell. Note that \lstinline|ts| is of type \lstinline|[A.Type]|; it
|
||||
is not monadic. The action is actually performed by this statement, and the result is put into \lstinline|ts|.
|
||||
|
||||
The second line is again interesting. The list is a standard Haskell list comprehension. The type of
|
||||
\lstinline|makeNonceVariable| is:
|
||||
|
||||
\begin{lstlisting}
|
||||
makeNonceVariable :: CSM m => String -> Meta -> A.Type ->
|
||||
A.NameType -> A.AbbrevMode -> m A.Specification
|
||||
\end{lstlisting}
|
||||
|
||||
The list comprehension is therefore of type \lstinline|[PassM A.Specification]|; that is, it is a list
|
||||
of monadic actions, each of which will yield a \lstinline|A.Specification|. This is then given to the \lstinline|sequence| function
|
||||
which has this type:
|
||||
|
||||
\begin{lstlisting}
|
||||
sequence :: Monad m => [m a] -> m [a]
|
||||
\end{lstlisting}
|
||||
|
||||
In other words, \lstinline|sequence| takes a list of monadic actions, performs each of them (in sequence!) and returns
|
||||
the resulting list of elements (inside the monad, of course). So \lstinline|sequence| performs all our actions and gives
|
||||
us back a list of \lstinline|A.Specification|s. This list is then labelled as \lstinline|specs|.
|
||||
|
||||
The next three lines in our code fragment begin with \lstinline|let|. These are all non-monadic lines, and
|
||||
are just plain Haskell. Note that there is a slight difference between the \lstinline|let| notation inside
|
||||
a \lstinline|do| block to the normal \lstinline|let|..\lstinline|in| notation; there is no \lstinline|in|
|
||||
keyword in the version of \lstinline|let| in the \lstinline|do| block. This is a technicality, but one that can trip you up;
|
||||
if you add the \lstinline|in| keyword you'll get a not-very-helpful parser error.
|
||||
|
||||
Finally, the last line of our function features \lstinline|return|. \lstinline|return| is a standard monadic
|
||||
function that is used very frequently. Its type is:
|
||||
|
||||
\begin{lstlisting}
|
||||
return :: Monad m => a -> m a
|
||||
\end{lstlisting}
|
||||
|
||||
All it does is turn a plain (non-monadic) value into a simple monadic action that yields the value. More
|
||||
simply, it lifts the value into the monad. In our function we have a plain value, but we need to lift it
|
||||
inside the monad to satisfy the types. This is quite complex in relation to the types, so is perhaps best
|
||||
viewed as an analogue to C or Java's return statement.
|
||||
|
||||
\subsection{CompState, CSM and CSMR}
|
||||
|
||||
Our compiler state (which is relatively small, considering it is for the whole compiler) is a data-type
|
||||
named \lstinline|CompState|. If you look at the definition you will see that it is currently divided into
|
||||
four parts:
|
||||
|
||||
\begin{enumerate}
|
||||
\item The options set on the command-line. This includes things such as warning level, and choice of backend.
|
||||
These should not change during compilation.
|
||||
\item Items recorded by the pre-processor.
|
||||
\item The symbol table and similar structures primarily generated by early passes (but definitely added to later on).
|
||||
\item Some state used by various passes.
|
||||
\end{enumerate}
|
||||
|
||||
This may seems like a slight mish-mash but separating it out into separate state (and separate monads) is likely more
|
||||
trouble than it's worth.
|
||||
|
||||
There are two monads associated with \lstinline|CompState|. \lstinline|CSM| is shorthand for a state monad with \lstinline|CompState|
|
||||
as the state. If however you only need read-access to the state for your particular function (e.g. you only
|
||||
need it to check what command-line options were used), then use \lstinline|CSMR| instead, which only provides
|
||||
read-only access to the state. This makes the type signature a little clearer as to whether you may or may not
|
||||
modify the state in that function.
|
||||
|
||||
\subsection{Warn and Die}
|
||||
|
||||
We have one type-class (of monads) for each of warnings and errors. The \lstinline|Die| type-class should be used for fatal
|
||||
errors, whereas the \lstinline|Warn| type-class is used for recording warnings. Both use an optional \lstinline|Meta| item for source position,
|
||||
and a \lstinline|String| for an error message. Wherever possible (and it usually should be) provide a \lstinline|Meta| item with a source
|
||||
position. From a user's perspective, an error with a source position is much more useful than one without!
|
||||
|
||||
\subsection{The PassM monad}
|
||||
|
||||
It has been mentioned above that the PassM monad is the most common monad. Here is its actual type:
|
||||
|
||||
\begin{lstlisting}
|
||||
type PassM = ErrorT ErrorReport (StateT CompState (WriterT [WarningReport] IO))
|
||||
\end{lstlisting}
|
||||
|
||||
The \lstinline|PassM| monad is (currently!) a stack of four monads; an error monad, a state monad, a writer monad and the \lstinline|IO| monad. The error
|
||||
monad allows for exception-like mechanisms. In Tock, we throw an error whenever we can proceed no further
|
||||
with the compilation. Examples include parser errors, type errors and parallel safety problems. The
|
||||
state in question is the CompState type (see the module of the same name), which holds things like the
|
||||
name-type dictionary (aka symbol table). The writer monad keeps track of all the warnings encountered,
|
||||
ready to print them all out at the end of compilation (but allows us to be flexible and ignore them if, for example,
|
||||
we only want to display fatal errors when the compilation fails, not the warnings as well).
|
||||
The \lstinline|IO| monad is included for various reasons, such as being able to read in files.
|
||||
|
||||
\subsection{Generics}
|
||||
|
||||
Generics are a technique used to easily query or modify specifically-typed parts of a big data structure
|
||||
without writing tree traversal code manually. So for example, we may want to apply a certain function
|
||||
to all the \lstinline|A.Expression|s in our AST without having to write code to traverse every other
|
||||
type in the tree looking for expressions.
|
||||
|
||||
We use the \lstinline|Data.Generics| module of GHC to do our generics (also known as the Scrap Your
|
||||
Boilerplate or SYB approach). The deep-down mechanics are very confusing. How to use it is simpler,
|
||||
and is explained in the three excellent SYB papers (found on the web here:
|
||||
\url{http://www.cs.vu.nl/boilerplate/#papers}). Things are made slightly tricky again because
|
||||
we usually perform custom traversals.
|
||||
|
||||
The \lstinline|everywhere| (or \lstinline|everywhereM|) function(s) described in the SYB papers traverse an entire tree
|
||||
structure looking to apply your transformation. Unfortunately this includes examining each character
|
||||
of each string in every \lstinline|Meta| tag, which makes things (unacceptably) slow. Therefore Adam implemented
|
||||
a custom traversal pattern. Since you will almost always want this traversal, you do not have to
|
||||
worry too much about the internals (which are described in section \ref{traverse-detail}), just about how to use it
|
||||
(see section \ref{traverse-common}).
|
||||
|
||||
\subsubsection{Traversal Strategies}
|
||||
\label{traverse-detail}
|
||||
|
||||
The \lstinline|Data.Generics| library provides a \lstinline|gmapM| function which maps a monadic transformation
|
||||
over all sub-elements of a term. So for example, \lstinline|gmapM return (A.Specification m spec innerStr)|
|
||||
will apply the return function (in monads, this is the identity transformation) over the items \lstinline|m|,
|
||||
\lstinline|spec| and \lstinline|innerStr| in turn. Note that there is no recursion into \lstinline|innerStr|.
|
||||
|
||||
To provide recursion easily, you can use the \lstinline|everywhereM| function. This is defined as follows:
|
||||
|
||||
\begin{lstlisting}
|
||||
everywhereM f x = do x' <- gmapM (everywhereM f) x
|
||||
f x'
|
||||
\end{lstlisting}
|
||||
|
||||
It applies itself over all sub-elements (which will recurse all the way through the tree) then applies
|
||||
the modifier function to the result. As described above, however, it can be quite inefficient.
|
||||
|
||||
\subsubsection{A Typical Traversal}
|
||||
\label{traverse-common}
|
||||
|
||||
Here's an example; the wrapper from our previous code example:
|
||||
|
||||
\begin{lstlisting}
|
||||
removeParAssign :: Data t => t -> PassM t
|
||||
removeParAssign = doGeneric `extM` doProcess
|
||||
where
|
||||
doGeneric :: Data t => t -> PassM t
|
||||
doGeneric = makeGeneric removeParAssign
|
||||
|
||||
doProcess :: A.Process -> PassM A.Process
|
||||
doProcess (A.Assign m vs@(_:_:_) (A.ExpressionList _ es)) = ...
|
||||
doProcess p = doGeneric p
|
||||
\end{lstlisting}
|
||||
|
||||
You can follow this template for any new passes you write. All you need to customise is the
|
||||
name of the pass (of course), and change the type and name of the \lstinline|doProcess| function if you
|
||||
want to transform something other than a \lstinline|A.Process|. For example:
|
||||
|
||||
\begin{lstlisting}
|
||||
twiddleExpressions :: Data t => t -> PassM t
|
||||
twiddleExpressions = doGeneric `extM` doExpression
|
||||
where
|
||||
doGeneric :: Data t => t -> PassM t
|
||||
doGeneric = makeGeneric twiddleExpressions
|
||||
|
||||
doExpression :: A.Expression -> PassM A.Expression
|
||||
doExpression (...) = ... -- First pattern match
|
||||
doExpression (...) = ... -- Second pattern match
|
||||
doExpression p = doGeneric p
|
||||
\end{lstlisting}
|
||||
|
||||
Note that you must include the last case for your \lstinline|doProcess|/\lstinline|doExpression| function; otherwise you
|
||||
will get an error if your pattern-matches are not exhaustive (which they rarely will be).
|
||||
The net effect is to apply doExpression to all expressions in the given AST, in an efficient manner.
|
||||
In other words, it's a compiler pass that operates on the expressions in the tree.
|
||||
|
||||
\subsubsection{How the Typical Traversal Works}
|
||||
|
||||
The makeGeneric function is defined as follows:
|
||||
|
||||
\begin{lstlisting}
|
||||
makeGeneric top
|
||||
= (gmapM top)
|
||||
`extM` (return :: String -> PassM String)
|
||||
`extM` (return :: Meta -> PassM Meta)
|
||||
\end{lstlisting}
|
||||
|
||||
So it applies the given function using \lstinline|gmapM|, except for \lstinline|String|s and \lstinline|Meta| tags which
|
||||
it skips. We apply this to our top-level function to get our \lstinline|doGeneric| function
|
||||
that handles all the data items we are \textit{not}~interested in. In our top-level
|
||||
we extend this with the specific cases that we \textit{are}~interested in; in this case,
|
||||
expressions.
|
||||
|
||||
The last thing required is to apply \lstinline|doGeneric| to all the expressions that we are not
|
||||
interested in. Note that we do not apply the top-level function (\lstinline|twiddleExpressions|).
|
||||
If we did, we would get infinite recursion between \lstinline|doExpression| and \lstinline|twiddleExpressions|.
|
||||
Instead we apply \lstinline|doGeneric|, which has no specific case for expressions, and will therefore recurse
|
||||
through the expression item down to the next sub-items.
|
||||
|
||||
\section{Add Your Own Code}
|
||||
|
||||
\subsection{Conventions}
|
||||
|
||||
Tock, as a project, does not have any particular coding conventions.
|
||||
The code is littered with slightly curious code indentation,
|
||||
one or two letter variable names, incredibly long expressions stretching long and wide, various bits of
|
||||
uncommented code, and the use of many similar but different language features (e.g. \lstinline|if|/\lstinline|then|/\lstinline|else| and
|
||||
pattern guards, \lstinline|map| and list comprehensions), which may be blamed in varying measures on the current
|
||||
developers!
|
||||
|
||||
Which is not to say the code is bad, just that there is not tight control on coding style. In general:
|
||||
|
||||
\begin{enumerate}
|
||||
\item Use your common sense
|
||||
\item Vaguely follow the style of the existing code
|
||||
\item Favour readability and clarity over conciseness and cleverness
|
||||
\item If you optimise, optimise only in terms of algorithms (e.g. O($N \log N$) over O($N^2$)) but never look for small savings.
|
||||
Besides the effort being wasted, it would be very hard in Haskell to judge which of several pieces of
|
||||
code would be faster. The compiler does not have to be blindingly fast, but it does need to be
|
||||
maintainable.
|
||||
%
|
||||
\item If a function foo is specifically needed by only the function bar, place foo inside the \lstinline|where|
|
||||
clause of bar (unless this is particularly untenable). This keeps the code neater, and foo can always
|
||||
be moved to the top-level later if necessary.
|
||||
\item Always give type signatures for functions at the top-level of the file (i.e. those not inside a
|
||||
\lstinline|where| clause). Additionally, try to provide type signatures for every function
|
||||
(i.e. anything of kind \lstinline|* -> *|) in a \lstinline|where| clause. Providing types for values in
|
||||
\lstinline|where| clauses is also never a bad thing.
|
||||
%
|
||||
\item Try not to leave warnings in the code. We have compiler options turned on to generate various warnings.
|
||||
Defaulting-to-type warnings can be solved by inserting a type signature, and unused binding warnings can
|
||||
be solved by removing the unused function, unless you know the lack of use is temporary.
|
||||
\item Never allow any possibility of a non-graceful run-time error. For example, do not use head, which
|
||||
can fail directly with a non-helpful error message. Instead, use a \lstinline|case| statement (with a
|
||||
pattern-match), and in the case where the list is empty, use the \lstinline|die|/\lstinline|dieP|/\lstinline|dieInternal| functions to provide a
|
||||
more helpful error message (such as ``list of types was not expected to be empty in function foo''). This is for
|
||||
practical reasons; if we used head everywhere in the code, then when the program failed with ``head: empty list''
|
||||
it would be very hard to work out exactly which instance of head had given the error. Similarly, try to
|
||||
ensure that you either always match all possible cases in pattern-matching, or you provide a default case that
|
||||
then gives an error message. Although this is not quite as crucial, because at least the error message
|
||||
for a failed pattern match gives the relevant line numbers. That helps developers, but not users!
|
||||
%
|
||||
\item In lists where order is unimportant (such as test lists, or module import lists), maintain
|
||||
alphabetical order (to make it easier to find items in a long list). Your editor may be able to help
|
||||
with this.
|
||||
\item Never use tabs.
|
||||
\item Put spaces around operators (except colons in pattern matches).
|
||||
\item When writing out lists or tuples on one line, try to make sure there is a space after the comma (clearer,
|
||||
Adam's preference but Neil's bad habit).
|
||||
\item When writing out lists on several lines, put the commas between items at the beginning of each line,
|
||||
not at the end (can make patches clearer by not disrupting surrounding lines).
|
||||
\end{enumerate}
|
||||
|
||||
As for other patterns of working: use the Tock mailing list (tock-discuss) for any questions you may have.
|
||||
All questions welcome, simple or complex, and asking there will save time, trouble, and should allow us
|
||||
to improve documentation such as this guide. It is especially worth asking if you want to revamp
|
||||
an existing section of code; other people may know of a reason why this is not wise, or may suggest a
|
||||
good way to go about it.
|
||||
|
||||
\subsection{Be Lazy (it's the Haskell way)}
|
||||
|
||||
Try and make your life easy by coding as little as possible.
|
||||
|
||||
\subsubsection{Write Only What You Need}
|
||||
|
||||
Tock has effectively been built using similar ideas to extreme programming. Test everything,
|
||||
don't be afraid to refactor, and don't over-engineer things. Write the minimum you need, and if you
|
||||
find you need more later, add it and possibly refactor. I find refactoring in Haskell -- even big
|
||||
changes that affect most of Tock -- to be quite easy. However, if you are planning to make a change
|
||||
that will impact a lot of code, it's best to discuss/notify via the mailing list first!
|
||||
|
||||
\subsubsection{Re-Use Everyone Else's Code}
|
||||
|
||||
Check the Utils module (and the TestUtils
|
||||
module when testing) as well as other modules (such as Types) to see if there is an existing function
|
||||
that does what you need. Anything that seems likely to have been needed before (such as getting the
|
||||
type of a variable) is probably in those modules. Similarly, if you ever find yourself writing
|
||||
a general utility function more than once, you should probably look to put it the Utils module. The guideline
|
||||
with the Utils module is that it should never import any other Tock modules.
|
||||
|
||||
The Utils (and to a lesser extent TestUtils) module are intended to contain functions that could have
|
||||
come straight out of the standard library. The standard library is also another place to look for
|
||||
useful functions. The URL for the latest version is: \url{http://haskell.org/ghc/docs/6.6/html/libraries/}.
|
||||
Make sure you always refer to the documentation for the lowest compiler version we support (currently
|
||||
6.6) to avoid accidentally using a function that is only available in a later version.
|
||||
|
||||
Particularly useful modules are:
|
||||
|
||||
\begin{itemize}
|
||||
\item \href{http://haskell.org/ghc/docs/6.6/html/libraries/base/Data-List.html}{\lstinline|Data.List|}
|
||||
-- Contains various helper functions for dealing with lists, including map,
|
||||
foldl, zip, sum, and many others\footnote{A lot of these are technically in the standard Prelude, but
|
||||
it's easy to find all the documentation in one place in the \lstinline|Data.List| module, as well
|
||||
as several useful functions not in the Prelude.}. Probably the most useful module.
|
||||
\item \href{http://haskell.org/ghc/docs/6.6/html/libraries/base/Control-Monad.html}{\lstinline|Control.Monad|}
|
||||
-- Contains all the general monadic helper functions, such as
|
||||
mapM, foldM, sequence. If you ever find yourself struggling to manipulate monadic types, there
|
||||
may be something to help you in here.
|
||||
\item \href{http://haskell.org/ghc/docs/6.6/html/libraries/base/Control-Monad.html}{\lstinline|System.IO|}
|
||||
-- Contains all the functions for printing to the screen, reading from
|
||||
files, etc.
|
||||
\end{itemize}
|
||||
|
||||
Other useful modules are
|
||||
\href{http://haskell.org/ghc/docs/6.6/html/libraries/base/Data-Maybe.html}{\lstinline|Data.Maybe|},
|
||||
\href{http://haskell.org/ghc/docs/6.6/html/libraries/base/Data-Generics.html}{\lstinline|Data.Generics|},
|
||||
\href{http://haskell.org/ghc/docs/6.6/html/libraries/base/Data-Map.html}{\lstinline|Data.Map|},
|
||||
\href{http://haskell.org/ghc/docs/6.6/html/libraries/base/Data-Set.html}{\lstinline|Data.Set|} and
|
||||
\href{http://haskell.org/ghc/docs/6.6/html/libraries/HUnit/Test-HUnit.html}{\lstinline|Test.HUnit|}.
|
||||
|
||||
\subsubsection{Clean Up Code}
|
||||
|
||||
Don't be afraid to re-write someone else's code, if you think it could be made clearer or simpler.
|
||||
If it works out (i.e. passes the tests that should exist fot the code), great. If it doesn't pan out
|
||||
for some reason, add a comment as to why re-writing it doesn't work so that the next developer doesn't
|
||||
attempt the same thing.
|
||||
|
||||
\section{Test Your Code}
|
||||
|
||||
Whether you write your tests before, at the same time as, or after the real code is not a major issue, so long
|
||||
as the test gets written. Personally I (Neil) favour writing them somewhat simultaneously; the test usually
|
||||
gives an idea of how to implement the function, but writing the function can often make you realise that your
|
||||
expected test output does not actually match how the function should work! So interleaving writing the tests
|
||||
and the function seems to be helpful.
|
||||
|
||||
Most of Tock is now unit-tested, with the ultimate aim of having everything in it tested. So when you add
|
||||
new functionality, it would be good if you also add the corresponding tests, to help towards this aim.
|
||||
The idea is that running `\verb|make && ./tocktest|' should give no errors (except in whatever
|
||||
you are currently working on). This is broadly true; at the time of writing there are around 1800 tests, with
|
||||
7 errors, 5 of which are related to what I'm currently working on.
|
||||
|
||||
\subsection{Running the Tests}
|
||||
|
||||
When you run `\verb|make|', the build system will build both the `tock' and `tocktest' executables.
|
||||
The latter is of course the test-suite. Currently it runs tests from all the frameworks (see next section)
|
||||
except the cgtests. There are several options to `tocktest':
|
||||
|
||||
\begin{itemize}
|
||||
\item \verb|--qc={off,low,medium,high}| -- Sets the level of QuickCheck testing. I would suggest usually using
|
||||
`low' (it will be faster) but occasionally running `medium' or `high' as a sanity check.
|
||||
\item \verb|--plain| -- Outputs plain text rather than playing with terminal deletion. Useful for
|
||||
when you want plain-text output (e.g. to redirect to a file).
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Test Frameworks}
|
||||
|
||||
We effectively use four test frameworks in Tock:
|
||||
|
||||
\begin{enumerate}
|
||||
\item QuickCheck; a framework used to generate random input data for tests, then test properties of its output.
|
||||
\item HUnit; a simple framework for providing standard lists of assertions.
|
||||
\item cgtests; the standard occam test-suite. Useful for providing a full-system test for anything that gets
|
||||
used in the occam side of things. Currently, not all the cgtests pass, but there is a list on the Trac wiki
|
||||
for Tock of all the tests currently expected to pass/fail (current URL:
|
||||
\url{http://projects.cs.kent.ac.uk/projects/tock/trac/wiki/CgtestOutput}).
|
||||
\item Automatic test harness. Currently very simplistic, but essentially a couple of helper functions in the
|
||||
\lstinline|TestHarness| module allow you to easily provide an external file of occam code tests and to specify
|
||||
whether this code should provoke an error from the compiler (at least, everything before the final
|
||||
code-generation step).
|
||||
\end{enumerate}
|
||||
|
||||
\subsection{HUnit}
|
||||
|
||||
For most of your tests, HUnit will be the most appropriate. The main HUnit data type is as follows:
|
||||
|
||||
\begin{lstlisting}
|
||||
data Test
|
||||
= TestCase Assertion
|
||||
| TestList [Test]
|
||||
| TestLabel String Test
|
||||
\end{lstlisting}
|
||||
|
||||
This allows arbitrary (nested) lists of \lstinline|Test|s to be built up. Since each \lstinline|Assertion|
|
||||
already has a label, I do not favour labelling each \lstinline|TestCase|, but labelling each \lstinline|TestList|
|
||||
is not a bad idea. It makes your test easier to locate if/when it fails.
|
||||
|
||||
Each \lstinline|Assertion| (actually type: \lstinline|IO ()|) comes from functions like
|
||||
\lstinline|assertEqual|. This function is of type:
|
||||
|
||||
\begin{lstlisting}
|
||||
assertEqual :: (Eq a, Show a) => String -> a -> a -> Assertion
|
||||
\end{lstlisting}
|
||||
|
||||
\subsubsection{Labelling Tests}
|
||||
|
||||
The first argument is the label. I usually use this as a subsitute for a test name; you'll commonly see
|
||||
it being simply the name of the test function (e.g. testGenOutput) concatenated with a number. Ideally,
|
||||
the label would be a wonderfully descriptive label of the test, but realistically you end up writing so
|
||||
many trivial test-cases that you will probably also lapse into the same habit as me. I usually combine
|
||||
this with writing a helper function (often called \lstinline|test|) in the \lstinline|where| clause
|
||||
to help out with that particular family of test (often automatically feeding input into the function
|
||||
being tested).
|
||||
|
||||
You will also see that I always arbitrarily number the test-cases in a manner similar to ye olde BASIC
|
||||
line numbers (often taking random leaps forward to leave gaps for later). This probably appears crazy
|
||||
but I do it for two reasons:
|
||||
|
||||
\begin{enumerate}
|
||||
\item As mentioned above, finding descriptive names gets silly when you have a decent number of tests,
|
||||
so numbering is preferred to text.
|
||||
\item I deliberately avoid `auto-numbering' tests (zipping them with \lstinline|[0..]| would be quite
|
||||
easy, of course). This would leave the numbers fragile; if a change to the test lists added a test
|
||||
mid-list then all the numbers would be altered. With my method, because the test numbers are never
|
||||
changed, if someone has ``testGenOutput 203'' fail on them, it is easy to find, and will be uniquely
|
||||
identifiable, even if the test-list has changed since they checked out their version. Also, you
|
||||
can perform a text-search for the number of the test, rather than having to count through items
|
||||
in an automatically numbered list.
|
||||
\end{enumerate}
|
||||
|
||||
\subsubsection{Other items}
|
||||
|
||||
Furthermore, the next two arguments of the \lstinline|assertEqual| function are the expected test output
|
||||
and the actual test output in that order. So you might write something like:
|
||||
|
||||
\begin{lstlisting}
|
||||
assertEqual "concatString Test 0" "foobar" (concatString "foo" "bar")
|
||||
\end{lstlisting}
|
||||
|
||||
This is of type \lstinline|Assertion|; prefix it with `\lstinline|TestCase $|' to make it a \lstinline|Test|.
|
||||
|
||||
There are many helper functions related to building up input for tests (particularly AST fragments)
|
||||
and for performing customised assertions (especially for testing passes in the \lstinline|PassM| monad)
|
||||
in the \lstinline|TestUtil| module; you should always look there to see if it has a helper function
|
||||
that would be useful to you. Similarly, if you think any of your own test helper functions could be
|
||||
useful in other places, add them to the \lstinline|TestUtil| module.
|
||||
|
||||
\subsection{Wiring In The Tests}
|
||||
|
||||
If you are adding to an existing portion of Tock, then you should add your tests alongside the existing
|
||||
tests. Most of Tock is tested; if you add to a bit that is not tested then please consider writing
|
||||
tests for the old code too.
|
||||
|
||||
When writing a new chunk of functionality you will want to create a new module for the tests. The general
|
||||
pattern is to put the tests for module \lstinline|Foo| into module \lstinline|FooTest|. You should
|
||||
then import the module you are testing, any other appropriate modules, and \lstinline|Test.HUnit|.
|
||||
|
||||
By convention, you should provide one (and only one) of the following functions in your test module:
|
||||
|
||||
\begin{enumerate}
|
||||
\item \lstinline|tests :: Test| (remember that a \lstinline|Test| can be a \lstinline|TestList|)
|
||||
\item \lstinline|qcTests :: (Test, [QuickCheckTest])|
|
||||
\item \lstinline|ioqcTests :: IO (Test, [QuickCheckTest])|
|
||||
\end{enumerate}
|
||||
|
||||
You should provide an export list for your module that contains only this function. This is very useful,
|
||||
as it means that any tests you write in your test module but forget to call in its exported
|
||||
\lstinline|tests| function will be flagged up by the compiler as unused.
|
||||
|
||||
Then add your new test module to the list in the \lstinline|TestMain| module. Add the appropriate import
|
||||
declaration, and look at the very foot of the file for the \lstinline|tests| list. Wrap your function
|
||||
according to its type, following the pattern of the other functions there. That is, you may need to
|
||||
add the \lstinline|noqc| or \lstinline|return| wrapper functions.
|
||||
|
||||
\section{Comment Your Code}
|
||||
|
||||
Like any real-world chunk of code, the documentation/comments on Tock vary from `polished' to `absent'.
|
||||
Needless to say, the more documentation the better. If you do happen across someone's code that puzzles
|
||||
you at first, then prod the developer who wrote it into adding some comments. If they wrote it and it's
|
||||
not clear, it's their fault! But much better is if we all document the code we write in the first place.
|
||||
|
||||
Naturally, standard rules apply; don't just repeat in the comment what the code clearly says already.
|
||||
Document the purpose of the code, any interesting/odd methodology, tricks or problems.
|
||||
|
||||
Haddock is a documentation system for Haskell akin to Javadoc, Doxygen, etc. Starting to use it is very
|
||||
simple; instead of writing \lstinline|-- Some comment| before a function, write \lstinline$-- | Some comment$
|
||||
instead. It is not obvious in this style of mark-up but there is a space between the dashes and the pipe (it is
|
||||
required). See the Haddock documentation for other markup (latest version can be found here:
|
||||
\url{http://www.haskell.org/haddock/doc/html/index.html}). You can use \verb$make haddock$ to create the
|
||||
HTML documentation in the `doc' directory.
|
||||
|
||||
\end{document}
|
Loading…
Reference in New Issue
Block a user