From ac120cf5460224d69fa57ef6533bdae38859c4c0 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 28 May 2008 17:40:23 +0000 Subject: [PATCH] more consistent cleaning of index strings; fix for 'get-module-code' when no file (source, bytecode, or extension) is available svn: r10002 original commit: fdd99b83dc4001b6ad6e96a4a0d8054eabc346c2 --- collects/scribble/basic.ss | 19 +++---------------- collects/scribble/decode.ss | 22 ++++++++++++++++++---- collects/scribble/manual.ss | 2 +- collects/scribble/struct.ss | 6 +++++- collects/scribblings/scribble/basic.scrbl | 14 ++++++++------ collects/scribblings/scribble/decode.scrbl | 5 +++++ collects/scribblings/scribble/struct.scrbl | 5 +++-- 7 files changed, 43 insertions(+), 30 deletions(-) diff --git a/collects/scribble/basic.ss b/collects/scribble/basic.ss index 105efef0..9be4ed4d 100644 --- a/collects/scribble/basic.ss +++ b/collects/scribble/basic.ss @@ -161,24 +161,11 @@ ;; ---------------------------------------- -(provide section-index index index* as-index index-section index-blocks - clean-up-index-string) +(provide section-index index index* as-index index-section index-blocks) (define (section-index . elems) (make-part-index-decl (map element->string elems) elems)) -(define (clean-up-index-string s) - ;; Remove leading spaces or trailing, which might appear there due - ;; to images or something else that gets dropped in string form. - ;; Then collapse whitespace. - (regexp-replace* #px"\\s+" - (regexp-replace #rx"^ +" - (regexp-replace #rx" +$" - s - "") - "") - " ")) - (define (record-index word-seq element-seq tag content) (make-index-element #f (list (make-target-element #f content `(idx ,tag))) @@ -189,11 +176,11 @@ (define (index* word-seq content-seq . s) (let ([key (make-generated-tag)]) - (record-index word-seq content-seq key (decode-content s)))) + (record-index (map clean-up-index-string word-seq) content-seq key (decode-content s)))) (define (index word-seq . s) (let ([word-seq (if (string? word-seq) (list word-seq) word-seq)]) - (apply index* word-seq word-seq s))) + (apply index* (map clean-up-index-string word-seq) word-seq s))) (define (as-index . s) (let ([key (make-generated-tag)] diff --git a/collects/scribble/decode.ss b/collects/scribble/decode.ss index f055debe..0df55749 100644 --- a/collects/scribble/decode.ss +++ b/collects/scribble/decode.ss @@ -12,7 +12,8 @@ decode-content (rename-out [decode-content decode-elements]) decode-string - whitespace?) + whitespace? + clean-up-index-string) (provide-structs [title-decl ([tag-prefix (or/c false/c string?)] @@ -31,6 +32,18 @@ [part-collect-decl ([element (or/c element? part-relative-element?)])] [part-tag-decl ([tag tag?])]) +(define (clean-up-index-string s) + ;; Remove leading spaces or trailing, which might appear there due + ;; to images or something else that gets dropped in string form. + ;; Then collapse whitespace. + (regexp-replace* #px"\\s+" + (regexp-replace #rx"^ +" + (regexp-replace #rx" +$" + s + "") + "") + " ")) + (define (decode-string s) (let loop ([l '((#rx"---" mdash) (#rx"--" ndash) @@ -88,9 +101,10 @@ (and (list? style) (memq 'hidden style))))) (cons (make-index-element #f null (car tags) - (list (regexp-replace - #px"^\\s+(?:(?:A|An|The)\\s)?" - (content->string title) "")) + (list (clean-up-index-string + (regexp-replace + #px"^\\s+(?:(?:A|An|The)\\s)?" + (content->string title) ""))) (list (make-element #f title)) (make-part-index-desc)) l) diff --git a/collects/scribble/manual.ss b/collects/scribble/manual.ss index 691f791d..ef6a364a 100644 --- a/collects/scribble/manual.ss +++ b/collects/scribble/manual.ss @@ -585,7 +585,7 @@ (make-index-element #f (list t) (target-element-tag t) - (list (element->string e)) + (list (clean-up-index-string (element->string e))) (list e) 'tech))) diff --git a/collects/scribble/struct.ss b/collects/scribble/struct.ss index d629b1ce..2ef0942f 100644 --- a/collects/scribble/struct.ss +++ b/collects/scribble/struct.ss @@ -124,6 +124,10 @@ (blockquote? p) (delayed-block? p))) +(define (string-without-newline? s) + (and (string? s) + (not (regexp-match? #rx"\n" s)))) + (provide-structs [part ([tag-prefix (or/c false/c string?)] [tags (listof tag?)] @@ -156,7 +160,7 @@ [alt-anchor string?])] [(link-element element) ([tag tag?])] [(index-element element) ([tag tag?] - [plain-seq (listof string?)] + [plain-seq (and/c pair? (listof string-without-newline?))] [entry-seq list?] [desc any/c])] [(aux-element element) ()] diff --git a/collects/scribblings/scribble/basic.scrbl b/collects/scribblings/scribble/basic.scrbl index 762d685c..cc26e22f 100644 --- a/collects/scribblings/scribble/basic.scrbl +++ b/collects/scribblings/scribble/basic.scrbl @@ -81,8 +81,9 @@ The @scheme[vers] argument is propagated to the @scheme[title-decl] structure. The section title is automatically indexed by -@scheme[decode-part]. For the index key, a leading ``A'', ``An'', or -``The'' (followed by whitespace) is removed.} +@scheme[decode-part]. For the index key, leading whitespace and a +leading ``A'', ``An'', or ``The'' (followed by more whitespace) is +removed.} @def-section-like[section part-start?]{ Like @scheme[title], but @@ -171,10 +172,11 @@ Wraps the @tech{decode}d @scheme[pre-content] as an element with style Creates an index element given a plain-text string---or list of strings for a hierarchy, such as @scheme['("strings" "plain")] for a -``plain'' entry below a more general ``strings'' entry. The strings -also serve as the text to render in the index. The @tech{decode}d -@scheme[pre-content] is the text to appear inline as the index -target.} +``plain'' entry below a more general ``strings'' entry. As index keys, +the strings are ``cleaned'' using @scheme[clean-up-index-strings]. The +strings (without clean-up) also serve as the text to render in the +index. The @tech{decode}d @scheme[pre-content] is the text to appear +inline as the index target.} @defproc[(index* [words (listof string?)] diff --git a/collects/scribblings/scribble/decode.scrbl b/collects/scribblings/scribble/decode.scrbl index da1ccc9d..58f8b8b2 100644 --- a/collects/scribblings/scribble/decode.scrbl +++ b/collects/scribblings/scribble/decode.scrbl @@ -159,3 +159,8 @@ See @scheme[decode], @scheme[decode-part], and @scheme[decode-flow]. } +@defproc[(clean-up-index-string [str string?]) string?]{ + +Trims leading and trailing whitespace, and converts non-empty +sequences of whitespace to a single space character.} + diff --git a/collects/scribblings/scribble/struct.scrbl b/collects/scribblings/scribble/struct.scrbl index 793cfc5e..edcbdd23 100644 --- a/collects/scribblings/scribble/struct.scrbl +++ b/collects/scribblings/scribble/struct.scrbl @@ -441,7 +441,7 @@ Hyperlinks the content to @scheme[tag]. @defstruct[(index-element element) ([tag tag?] - [plain-seq (and/c (listof string?) cons?)] + [plain-seq (and/c pair? (listof string?))] [entry-seq list?] [desc any/c])]{ @@ -453,7 +453,8 @@ former would be represented by @scheme[plain-seq] @scheme['("night" "things that go bump in")], and the latter by @scheme['("night" "defender of the")]. Naturally, single-@tech{element} @scheme[plain-seq] lists are the common case, and at least one word is -required, but there is no limit to the word-list length. +required, but there is no limit to the word-list length. The strings in +@scheme[plain-seq] must not contain a newline character. The @scheme[entry-seq] list must have the same length as @scheme[plain-seq]. It provides the form of each key to render in the