
This commit adds: + a few tags to `scribble/html` (by extending the list in `scribble/html/html`) + even more tags to `scribble/html/extra`, these tags are "likely" to cause namespace issues (time, map) or are uncommon / esoteric (rb, ruby, svg) + a test in `scribble/html.rkt` that the tags from - `scribble/html/html` - and `scribble/html/extra` match a master list from the whatwg specification* * https://html.spec.whatwg.org/multipage/#toc-semantics
482 lines
18 KiB
Racket
482 lines
18 KiB
Racket
#lang racket/base
|
|
|
|
;; (X)HTML elements etc.
|
|
;; Keep this file up to date with:
|
|
;; https://html.spec.whatwg.org/multipage/#toc-semantics
|
|
;; Put esoteric elements in scribble/html/extra
|
|
|
|
(require "xml.rkt" scribble/text)
|
|
|
|
;; ----------------------------------------------------------------------------
|
|
;; Doctype line
|
|
|
|
(provide doctype)
|
|
(define (doctype type)
|
|
(cond [(string? type) (literal "<!DOCTYPE " type ">\n")]
|
|
[(eq? 'html type) (doctype "html")]
|
|
[(eq? 'xhtml type)
|
|
(list (literal "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n")
|
|
(doctype (string-append
|
|
"html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\""
|
|
" \"http://www.w3.org/TR/xhtml1/DTD/"
|
|
"xhtml1-strict.dtd\"")))]
|
|
[else (raise-type-error 'doctype
|
|
"string or known doctype symbol" type)]))
|
|
|
|
;; ----------------------------------------------------------------------------
|
|
;; Xhtml toplevel
|
|
|
|
;; creation of xhtml files requires some extra stuff
|
|
(define xhtml-prefix (doctype 'xhtml))
|
|
(provide xhtml)
|
|
(define (xhtml . body)
|
|
(list xhtml-prefix
|
|
(apply html 'xmlns: "http://www.w3.org/1999/xhtml" body)
|
|
"\n"))
|
|
|
|
;; ----------------------------------------------------------------------------
|
|
;; Elements
|
|
|
|
;; For complete reference: http://www.w3.org/TR/html/dtds.html
|
|
;; (See also http://www.w3schools.com/tags/)
|
|
|
|
;; The dtds, in increasing size:
|
|
;; http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd
|
|
;; http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd
|
|
;; http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd
|
|
|
|
;; These are all entities, taked from the DTDs. The ones marked with "[*]" are
|
|
;; defined later, since they need a different definition.
|
|
(define/provide-elements/not-empty
|
|
;; ========== Document Structure
|
|
html
|
|
;; ========== Document Head
|
|
head
|
|
;; The title element is not considered part of the flow of text.
|
|
;; It should be displayed, for example as the page header or
|
|
;; window title. Exactly one title is required per document.
|
|
title
|
|
;; base ; document base URI, can be empty [*]
|
|
;; meta ; generic metainformation, can be empty [*]
|
|
;; link ; relationship values, can be empty [*]
|
|
style ; style info, which may include CDATA sections
|
|
script ; script statements, which may include CDATA sections
|
|
noscript ; alternate content container for non script-based rendering
|
|
slot
|
|
;; ========== Frames
|
|
frameset ; only one noframes element permitted per document
|
|
frame ; tiled window within frameset
|
|
iframe ; inline subwindow
|
|
noframes ; alternate content container for non frame-based rendering
|
|
;; ========== Document Body
|
|
body
|
|
div ; generic language/style container
|
|
;; ========== Paragraphs
|
|
p
|
|
;; ========== Headings
|
|
h1
|
|
h2
|
|
h3
|
|
h4
|
|
h5
|
|
h6
|
|
hgroup
|
|
;; ========== Lists
|
|
ul ; Unordered list
|
|
ol ; Ordered (numbered) list
|
|
menu ; single column list (DEPRECATED)
|
|
dir ; multiple column list (DEPRECATED)
|
|
li ; list item
|
|
dl ; definition lists - dt for term, dd for its definition
|
|
dt
|
|
dd
|
|
;; ========== Address
|
|
address ; information on author
|
|
;; ========== Horizontal Rule
|
|
;; hr ; horizontal rule can be empty [*]
|
|
;; ========== Preformatted Text
|
|
pre
|
|
;; ========== Block-like Quotes
|
|
blockquote
|
|
;; ========== Text alignment
|
|
center ; center content
|
|
;; ========== Inserted/Deleted Text
|
|
ins
|
|
del
|
|
;; ========== The Anchor Element
|
|
a ; content is inline; except that anchors shouldn't be nested
|
|
;; ========== Inline Elements
|
|
span ; generic language/style container
|
|
bdo ; I18N BiDi over-ride
|
|
;; br ; forced line break, can be empty [*]
|
|
em ; emphasis
|
|
strong ; strong emphasis
|
|
dfn ; definitional
|
|
code ; program code
|
|
samp ; sample
|
|
kbd ; something user would type
|
|
var ; variable
|
|
cite ; citation
|
|
abbr ; abbreviation
|
|
acronym ; acronym
|
|
q ; inlined quote
|
|
sub ; subscript
|
|
sup ; superscript
|
|
tt ; fixed pitch font
|
|
i ; italic font
|
|
b ; bold font
|
|
big ; bigger font
|
|
small ; smaller font
|
|
u ; underline
|
|
s ; strike-through
|
|
strike ; strike-through
|
|
;; basefont ; base font size, can be empty [*]
|
|
font ; local change to font
|
|
;; ========== Object
|
|
object ; embeded objects
|
|
;; param ; parameters for objects, can also specify as attrs, can be empty [*]
|
|
applet ; Java applet
|
|
;; ========== Images
|
|
;; To avoid accessibility problems for people who aren't
|
|
;; able to see the image, you should provide a text
|
|
;; description using the alt and longdesc attributes.
|
|
;; In addition, avoid the use of server-side image maps.
|
|
;; img ; can be empty [*]
|
|
;; ========== Client-side image maps
|
|
;; map ; collides with scheme, but not really useful
|
|
;; area ; can be empty [*]
|
|
;; ========== Forms
|
|
form ; forms shouldn't be nested
|
|
label ; text that belongs to a form control
|
|
;; input ; form control, can be empty [*]
|
|
select ; option selector
|
|
optgroup ; option group
|
|
option ; selectable choice
|
|
textarea ; multi-line text field
|
|
fieldset ; group form fields
|
|
legend ; fieldset label (one per fieldset)
|
|
button ; push button
|
|
;; isindex ; single-line text input control (DEPRECATED), can be empty [*]
|
|
;; ========== Tables
|
|
table ; holds caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+)
|
|
caption ; caption text
|
|
thead ; header part, holds tr
|
|
tfoot ; footer part, holds tr
|
|
tbody ; body part, holds tr
|
|
colgroup ; column group, olds col
|
|
;; col ; column info, has only attributes, can be empty [*]
|
|
tr ; holds th or td
|
|
th ; header cell
|
|
td ; table cell
|
|
;; ========== Interactive Elements
|
|
details
|
|
dialog
|
|
menuitem
|
|
)
|
|
|
|
;; [*] empty elements, these are listed with an `EMPTY' content in
|
|
;; http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd
|
|
(define/provide-elements/empty
|
|
embed keygen wbr
|
|
base meta link hr br basefont param img area input isindex col)
|
|
|
|
;; [*] elements with a cdata/comment body
|
|
(provide script/inline)
|
|
(define (script/inline . args)
|
|
(define-values [attrs body] (attributes+body args))
|
|
(make-element
|
|
'script attrs
|
|
`("\n" ,(set-prefix 0 (apply cdata #:line-prefix "//" body)) "\n")))
|
|
(provide style/inline)
|
|
(define (style/inline . args)
|
|
(define-values [attrs body] (attributes+body args))
|
|
(make-element 'style attrs `("\n" ,body "\n")))
|
|
|
|
;; ----------------------------------------------------------------------------
|
|
;; Entities
|
|
|
|
;; The three dtds that define the set of entities are at:
|
|
;; http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
|
|
;; http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
|
|
;; http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
|
|
|
|
(define/provide-entities
|
|
nbsp ndash mdash bull middot sdot lsquo rsquo sbquo ldquo rdquo bdquo
|
|
lang rang dagger Dagger plusmn deg)
|
|
|
|
#; ; the complete list
|
|
(define/provide-entities
|
|
;; 24.2 Character entity references for ISO 8859-1 characters
|
|
nbsp ;00A0 no-break space = non-breaking space
|
|
iexcl ;00A1 inverted exclamation mark
|
|
cent ;00A2 cent sign
|
|
pound ;00A3 pound sign
|
|
curren ;00A4 currency sign
|
|
yen ;00A5 yen sign = yuan sign
|
|
brvbar ;00A6 broken bar = broken vertical bar
|
|
sect ;00A7 section sign
|
|
uml ;00A8 diaeresis = spacing diaeresis
|
|
copy ;00A9 copyright sign
|
|
ordf ;00AA feminine ordinal indicator
|
|
laquo ;00AB left-pointing double angle quotation mark = left pointing guillemet
|
|
not ;00AC not sign
|
|
shy ;00AD soft hyphen = discretionary hyphen
|
|
reg ;00AE registered sign = registered trade mark sign
|
|
macr ;00AF macron = spacing macron = overline = APL overbar
|
|
deg ;00B0 degree sign
|
|
plusmn ;00B1 plus-minus sign = plus-or-minus sign
|
|
sup2 ;00B2 superscript two = superscript digit two = squared
|
|
sup3 ;00B3 superscript three = superscript digit three = cubed
|
|
acute ;00B4 acute accent = spacing acute
|
|
micro ;00B5 micro sign
|
|
para ;00B6 pilcrow sign = paragraph sign
|
|
middot ;00B7 middle dot = Georgian comma = Greek middle dot
|
|
cedil ;00B8 cedilla = spacing cedilla
|
|
sup1 ;00B9 superscript one = superscript digit one
|
|
ordm ;00BA masculine ordinal indicator
|
|
raquo ;00BB right-pointing double angle quotation mark = right pointing guillemet
|
|
frac14 ;00BC vulgar fraction one quarter = fraction one quarter
|
|
frac12 ;00BD vulgar fraction one half = fraction one half
|
|
frac34 ;00BE vulgar fraction three quarters = fraction three quarters
|
|
iquest ;00BF inverted question mark = turned question mark
|
|
Agrave ;00C0 latin capital letter A with grave = latin capital letter A grave
|
|
Aacute ;00C1 latin capital letter A with acute
|
|
Acirc ;00C2 latin capital letter A with circumflex
|
|
Atilde ;00C3 latin capital letter A with tilde
|
|
Auml ;00C4 latin capital letter A with diaeresis
|
|
Aring ;00C5 latin capital letter A with ring above = latin capital letter A ring
|
|
AElig ;00C6 latin capital letter AE = latin capital ligature AE
|
|
Ccedil ;00C7 latin capital letter C with cedilla
|
|
Egrave ;00C8 latin capital letter E with grave
|
|
Eacute ;00C9 latin capital letter E with acute
|
|
Ecirc ;00CA latin capital letter E with circumflex
|
|
Euml ;00CB latin capital letter E with diaeresis
|
|
Igrave ;00CC latin capital letter I with grave
|
|
Iacute ;00CD latin capital letter I with acute
|
|
Icirc ;00CE latin capital letter I with circumflex
|
|
Iuml ;00CF latin capital letter I with diaeresis
|
|
ETH ;00D0 latin capital letter ETH
|
|
Ntilde ;00D1 latin capital letter N with tilde
|
|
Ograve ;00D2 latin capital letter O with grave
|
|
Oacute ;00D3 latin capital letter O with acute
|
|
Ocirc ;00D4 latin capital letter O with circumflex
|
|
Otilde ;00D5 latin capital letter O with tilde
|
|
Ouml ;00D6 latin capital letter O with diaeresis
|
|
times ;00D7 multiplication sign
|
|
Oslash ;00D8 latin capital letter O with stroke = latin capital letter O slash
|
|
Ugrave ;00D9 latin capital letter U with grave
|
|
Uacute ;00DA latin capital letter U with acute
|
|
Ucirc ;00DB latin capital letter U with circumflex
|
|
Uuml ;00DC latin capital letter U with diaeresis
|
|
Yacute ;00DD latin capital letter Y with acute
|
|
THORN ;00DE latin capital letter THORN
|
|
szlig ;00DF latin small letter sharp s = ess-zed
|
|
agrave ;00E0 latin small letter a with grave = latin small letter a grave
|
|
aacute ;00E1 latin small letter a with acute
|
|
acirc ;00E2 latin small letter a with circumflex
|
|
atilde ;00E3 latin small letter a with tilde
|
|
auml ;00E4 latin small letter a with diaeresis
|
|
aring ;00E5 latin small letter a with ring above = latin small letter a ring
|
|
aelig ;00E6 latin small letter ae = latin small ligature ae
|
|
ccedil ;00E7 latin small letter c with cedilla
|
|
egrave ;00E8 latin small letter e with grave
|
|
eacute ;00E9 latin small letter e with acute
|
|
ecirc ;00EA latin small letter e with circumflex
|
|
euml ;00EB latin small letter e with diaeresis
|
|
igrave ;00EC latin small letter i with grave
|
|
iacute ;00ED latin small letter i with acute
|
|
icirc ;00EE latin small letter i with circumflex
|
|
iuml ;00EF latin small letter i with diaeresis
|
|
eth ;00F0 latin small letter eth
|
|
ntilde ;00F1 latin small letter n with tilde
|
|
ograve ;00F2 latin small letter o with grave
|
|
oacute ;00F3 latin small letter o with acute
|
|
ocirc ;00F4 latin small letter o with circumflex
|
|
otilde ;00F5 latin small letter o with tilde
|
|
ouml ;00F6 latin small letter o with diaeresis
|
|
divide ;00F7 division sign
|
|
oslash ;00F8 latin small letter o with stroke, = latin small letter o slash
|
|
ugrave ;00F9 latin small letter u with grave
|
|
uacute ;00FA latin small letter u with acute
|
|
ucirc ;00FB latin small letter u with circumflex
|
|
uuml ;00FC latin small letter u with diaeresis
|
|
yacute ;00FD latin small letter y with acute
|
|
thorn ;00FE latin small letter thorn
|
|
yuml ;00FF latin small letter y with diaeresis
|
|
|
|
;; 24.3 Character entity references for symbols, mathematical symbols, and
|
|
;; Greek letters
|
|
;; Latin Extended-B
|
|
fnof ;0192 latin small f with hook = function = florin
|
|
;; Greek
|
|
Alpha ;0391 greek capital letter alpha
|
|
Beta ;0392 greek capital letter beta
|
|
Gamma ;0393 greek capital letter gamma
|
|
Delta ;0394 greek capital letter delta
|
|
Epsilon ;0395 greek capital letter epsilon
|
|
Zeta ;0396 greek capital letter zeta
|
|
Eta ;0397 greek capital letter eta
|
|
Theta ;0398 greek capital letter theta
|
|
Iota ;0399 greek capital letter iota
|
|
Kappa ;039A greek capital letter kappa
|
|
Lambda ;039B greek capital letter lambda
|
|
Mu ;039C greek capital letter mu
|
|
Nu ;039D greek capital letter nu
|
|
Xi ;039E greek capital letter xi
|
|
Omicron ;039F greek capital letter omicron
|
|
Pi ;03A0 greek capital letter pi
|
|
Rho ;03A1 greek capital letter rho
|
|
Sigma ;03A3 greek capital letter sigma
|
|
Tau ;03A4 greek capital letter tau
|
|
Upsilon ;03A5 greek capital letter upsilon
|
|
Phi ;03A6 greek capital letter phi
|
|
Chi ;03A7 greek capital letter chi
|
|
Psi ;03A8 greek capital letter psi
|
|
Omega ;03A9 greek capital letter omega
|
|
alpha ;03B1 greek small letter alpha
|
|
beta ;03B2 greek small letter beta
|
|
gamma ;03B3 greek small letter gamma
|
|
delta ;03B4 greek small letter delta
|
|
epsilon ;03B5 greek small letter epsilon
|
|
zeta ;03B6 greek small letter zeta
|
|
eta ;03B7 greek small letter eta
|
|
theta ;03B8 greek small letter theta
|
|
iota ;03B9 greek small letter iota
|
|
kappa ;03BA greek small letter kappa
|
|
lambda ;03BB greek small letter lambda
|
|
mu ;03BC greek small letter mu
|
|
nu ;03BD greek small letter nu
|
|
xi ;03BE greek small letter xi
|
|
omicron ;03BF greek small letter omicron
|
|
pi ;03C0 greek small letter pi
|
|
rho ;03C1 greek small letter rho
|
|
sigmaf ;03C2 greek small letter final sigma
|
|
sigma ;03C3 greek small letter sigma
|
|
tau ;03C4 greek small letter tau
|
|
upsilon ;03C5 greek small letter upsilon
|
|
phi ;03C6 greek small letter phi
|
|
chi ;03C7 greek small letter chi
|
|
psi ;03C8 greek small letter psi
|
|
omega ;03C9 greek small letter omega
|
|
thetasym ;03D1 greek small letter theta symbol
|
|
upsih ;03D2 greek upsilon with hook symbol
|
|
piv ;03D6 greek pi symbol
|
|
;; *** General Punctuation
|
|
bull ;2022 bullet = black small circle
|
|
hellip ;2026 horizontal ellipsis = three dot leader
|
|
prime ;2032 prime = minutes = feet
|
|
Prime ;2033 double prime = seconds = inches
|
|
oline ;203E overline = spacing overscore
|
|
frasl ;2044 fraction slash
|
|
;; *** Letterlike Symbols
|
|
weierp ;2118 script capital P = power set = Weierstrass p
|
|
image ;2111 blackletter capital I = imaginary part
|
|
real ;211C blackletter capital R = real part symbol
|
|
trade ;2122 trade mark sign
|
|
alefsym ;2135 alef symbol = first transfinite cardinal
|
|
;; *** Arrows
|
|
larr ;2190 leftwards arrow
|
|
uarr ;2191 upwards arrow
|
|
rarr ;2192 rightwards arrow
|
|
darr ;2193 downwards arrow
|
|
harr ;2194 left right arrow
|
|
crarr ;21B5 downwards arrow with corner leftwards = carriage return
|
|
lArr ;21D0 leftwards double arrow
|
|
uArr ;21D1 upwards double arrow
|
|
rArr ;21D2 rightwards double arrow
|
|
dArr ;21D3 downwards double arrow
|
|
hArr ;21D4 left right double arrow
|
|
;; Mathematical Operators
|
|
forall ;2200 for all
|
|
part ;2202 partial differential
|
|
exist ;2203 there exists
|
|
empty ;2205 empty set = null set = diameter
|
|
nabla ;2207 nabla = backward difference
|
|
isin ;2208 element of
|
|
notin ;2209 not an element of
|
|
ni ;220B contains as member
|
|
prod ;220F n-ary product = product sign
|
|
sum ;2211 n-ary sumation
|
|
minus ;2212 minus sign
|
|
lowast ;2217 asterisk operator
|
|
radic ;221A square root = radical sign
|
|
prop ;221D proportional to
|
|
infin ;221E infinity
|
|
ang ;2220 angle
|
|
and ;2227 logical and = wedge
|
|
or ;2228 logical or = vee
|
|
cap ;2229 intersection = cap
|
|
cup ;222A union = cup
|
|
int ;222B integral
|
|
there4 ;2234 therefore
|
|
sim ;223C tilde operator = varies with = similar to
|
|
cong ;2245 approximately equal to
|
|
asymp ;2248 almost equal to = asymptotic to
|
|
ne ;2260 not equal to
|
|
equiv ;2261 identical to
|
|
le ;2264 less-than or equal to
|
|
ge ;2265 greater-than or equal to
|
|
sub ;2282 subset of
|
|
sup ;2283 superset of
|
|
nsub ;2284 not a subset of
|
|
sube ;2286 subset of or equal to
|
|
supe ;2287 superset of or equal to
|
|
oplus ;2295 circled plus = direct sum
|
|
otimes ;2297 circled times = vector product
|
|
perp ;22A5 up tack = orthogonal to = perpendicular
|
|
sdot ;22C5 dot operator
|
|
;; Miscellaneous Technical
|
|
lceil ;2308 left ceiling = apl upstile
|
|
rceil ;2309 right ceiling
|
|
lfloor ;230A left floor = apl downstile
|
|
rfloor ;230B right floor
|
|
lang ;2329 left-pointing angle bracket = bra
|
|
rang ;232A right-pointing angle bracket = ket
|
|
;; Geometric Shapes
|
|
loz ;25CA lozenge
|
|
;; Miscellaneous Symbols
|
|
spades ;2660 black spade suit
|
|
clubs ;2663 black club suit = shamrock
|
|
hearts ;2665 black heart suit = valentine
|
|
diams ;2666 black diamond suit
|
|
|
|
;; 24.4 Character entity references for markup-significant and
|
|
;; internationalization characters
|
|
;; C0 Controls and Basic Latin
|
|
quot ;0022 quotation mark = APL quote
|
|
amp ;0026 ampersand
|
|
lt ;003C less-than sign
|
|
gt ;003E greater-than sign
|
|
;; Latin Extended-A
|
|
OElig ;0152 latin capital ligature OE
|
|
oelig ;0153 latin small ligature oe
|
|
Scaron ;0160 latin capital letter S with caron
|
|
scaron ;0161 latin small letter s with caron
|
|
Yuml ;0178 latin capital letter Y with diaeresis
|
|
;; Spacing Modifier Letters
|
|
circ ;02C6 modifier letter circumflex accent
|
|
tilde ;02DC small tilde
|
|
;; General Punctuation
|
|
ensp ;2002 en space
|
|
emsp ;2003 em space
|
|
thinsp ;2009 thin space
|
|
zwnj ;200C zero width non-joiner
|
|
zwj ;200D zero width joiner
|
|
lrm ;200E left-to-right mark
|
|
rlm ;200F right-to-left mark
|
|
ndash ;2013 en dash
|
|
mdash ;2014 em dash
|
|
lsquo ;2018 left single quotation mark
|
|
rsquo ;2019 right single quotation mark
|
|
sbquo ;201A single low-9 quotation mark
|
|
ldquo ;201C left double quotation mark
|
|
rdquo ;201D right double quotation mark
|
|
bdquo ;201E double low-9 quotation mark
|
|
dagger ;2020 dagger
|
|
Dagger ;2021 double dagger
|
|
permil ;2030 per mille sign
|
|
lsaquo ;2039 single left-pointing angle quotation mark
|
|
rsaquo ;203A single right-pointing angle quotation mark
|
|
euro ;20AC euro sign
|
|
)
|