293 lines
11 KiB
Scheme
293 lines
11 KiB
Scheme
;****************************************************************************
|
|
; My Scheme misc utility functions
|
|
; (mainly dealing with string and list manipulations)
|
|
;
|
|
; myenv.scm, myenv-bigloo.scm or similar prelude is assumed.
|
|
; From SRFI-13, import many functions
|
|
; If a particular implementation lacks SRFI-13 support, please
|
|
; include the file srfi-13-local.scm
|
|
;
|
|
; $Id: util.scm,v 1.2 2004/11/09 14:11:40 sperber Exp $
|
|
|
|
;------------------------------------------------------------------------
|
|
; Iterator ANY?
|
|
;
|
|
; -- procedure+: any? PRED COLLECTION
|
|
; Searches for the first element in the collection satisfying a
|
|
; given predicate
|
|
; That is, the procedure applies PRED to every element of the
|
|
; COLLECTION in turn.
|
|
; The first element for which PRED returns non-#f stops the iteration;
|
|
; the value of the predicate is returned.
|
|
; If none of the elements of the COLLECTION satisfy the predicate,
|
|
; the return value from the procedure is #f
|
|
; COLLECTION can be a list, a vector, a string, or an input port.
|
|
; See vmyenv.scm for validation tests.
|
|
|
|
(define (any? <pred?> coll)
|
|
(cond
|
|
((list? coll)
|
|
(let loop ((curr-l coll))
|
|
(if (null? curr-l) #f
|
|
(or (<pred?> (car curr-l)) (loop (cdr curr-l))))))
|
|
|
|
((vector? coll)
|
|
(let ((len (vector-length coll)))
|
|
(let loop ((i 0))
|
|
(if (>= i len) #f
|
|
(or (<pred?> (vector-ref coll i)) (loop (inc i)))))))
|
|
|
|
((string? coll)
|
|
(let ((len (string-length coll)))
|
|
(let loop ((i 0))
|
|
(if (>= i len) #f
|
|
(or (<pred?> (string-ref coll i)) (loop (inc i)))))))
|
|
|
|
((input-port? coll)
|
|
(let loop ((c (read-char coll)))
|
|
(if (eof-object? c) #f
|
|
(or (<pred?> c) (loop (read-char coll))))))
|
|
|
|
(else (error "any? on an invalid collection"))))
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; Some list manipulation functions
|
|
|
|
; -- procedure+: list-intersperse SRC-L ELEM
|
|
; inserts ELEM between elements of the SRC-L, returning a freshly allocated
|
|
; list (cells, that is)
|
|
|
|
(define (list-intersperse src-l elem)
|
|
(if (null? src-l) src-l
|
|
(let loop ((l (cdr src-l)) (dest (cons (car src-l) '())))
|
|
(if (null? l) (reverse dest)
|
|
(loop (cdr l) (cons (car l) (cons elem dest)))))))
|
|
|
|
|
|
; -- procedure+: list-intersperse! SRC-L ELEM
|
|
; inserts ELEM between elements of the SRC-L inplace
|
|
|
|
(define (list-intersperse! src-l elem)
|
|
(if (null? src-l) src-l
|
|
(let loop ((l src-l))
|
|
(let ((next-l (cdr l)))
|
|
(if (null? next-l) src-l
|
|
(begin
|
|
(set-cdr! l (cons elem next-l))
|
|
(loop next-l)))))))
|
|
|
|
|
|
; List-tail-difference: given two lists, list1 and list2 where
|
|
; list2 is presumably a tail of list1, return
|
|
; a (freshly allocated) list which is a difference between list1
|
|
; and list2. If list2 is *not* a tail of list1, the entire list1
|
|
; is returned.
|
|
(define (list-tail-diff list1 list2)
|
|
(let loop ((l1-curr list1) (difference '()))
|
|
(cond
|
|
((eq? l1-curr list2) (reverse difference))
|
|
((null? l1-curr) (reverse difference))
|
|
(else (loop (cdr l1-curr) (cons (car l1-curr) difference))))))
|
|
|
|
|
|
;------------------------------------------------------------------------
|
|
; String utilities
|
|
; See SRFI-13 or srfi-13-local.scm
|
|
|
|
|
|
; Return the index of the last occurence of a-char in str, or #f
|
|
; See SRFI-13
|
|
(define string-rindex string-index-right)
|
|
|
|
; -- procedure+: substring? PATTERN STRING
|
|
; Searches STRING to see if it contains the substring PATTERN.
|
|
; Returns the index of the first substring of STRING that is equal
|
|
; to PATTERN; or `#f' if STRING does not contain PATTERN.
|
|
;
|
|
; (substring? "rat" "pirate") => 2
|
|
; (substring? "rat" "outrage") => #f
|
|
; (substring? "" any-string) => 0
|
|
(define (substring? pattern str) (string-contains str pattern))
|
|
|
|
|
|
; -- procedure+: string->integer STR START END
|
|
;
|
|
; Makes sure a substring of the STR from START (inclusive) till END
|
|
; (exclusive) is a representation of a non-negative integer in decimal
|
|
; notation. If so, this integer is returned. Otherwise -- when the
|
|
; substring contains non-decimal characters, or when the range from
|
|
; START till END is not within STR, the result is #f.
|
|
;
|
|
; This procedure is a simplification of the standard string->number.
|
|
; The latter is far more generic: for example, it will try to read
|
|
; strings like "1/2" "1S2" "1.34" and even "1/0" (the latter causing
|
|
; a zero-divide error). Note that to string->number, "1S2" is a valid
|
|
; representation of an _inexact_ integer (100 to be precise).
|
|
; Oftentimes we want to be more restrictive about what we consider a
|
|
; number; we want merely to read an integral label.
|
|
|
|
(define (string->integer str start end)
|
|
(and (< -1 start end (inc (string-length str)))
|
|
(let loop ((pos start) (accum 0))
|
|
(cond
|
|
((>= pos end) accum)
|
|
((char-numeric? (string-ref str pos))
|
|
(loop (inc pos) (+ (char->integer (string-ref str pos))
|
|
(- (char->integer #\0)) (* 10 accum))))
|
|
(else #f)))))
|
|
|
|
|
|
;
|
|
; -- procedure+: string-split STRING
|
|
; -- procedure+: string-split STRING '()
|
|
; -- procedure+: string-split STRING '() MAXSPLIT
|
|
;
|
|
; Returns a list of whitespace delimited words in STRING.
|
|
; If STRING is empty or contains only whitespace, then the empty list
|
|
; is returned. Leading and trailing whitespaces are trimmed.
|
|
; If MAXSPLIT is specified and positive, the resulting list will
|
|
; contain at most MAXSPLIT elements, the last of which is the string
|
|
; remaining after (MAXSPLIT - 1) splits. If MAXSPLIT is specified and
|
|
; non-positive, the empty list is returned. "In time critical
|
|
; applications it behooves you not to split into more fields than you
|
|
; really need."
|
|
;
|
|
; -- procedure+: string-split STRING CHARSET
|
|
; -- procedure+: string-split STRING CHARSET MAXSPLIT
|
|
;
|
|
; Returns a list of words delimited by the characters in CHARSET in
|
|
; STRING. CHARSET is a list of characters that are treated as delimiters.
|
|
; Leading or trailing delimeters are NOT trimmed. That is, the resulting
|
|
; list will have as many initial empty string elements as there are
|
|
; leading delimiters in STRING.
|
|
;
|
|
; If MAXSPLIT is specified and positive, the resulting list will
|
|
; contain at most MAXSPLIT elements, the last of which is the string
|
|
; remaining after (MAXSPLIT - 1) splits. If MAXSPLIT is specified and
|
|
; non-positive, the empty list is returned. "In time critical
|
|
; applications it behooves you not to split into more fields than you
|
|
; really need."
|
|
;
|
|
; This is based on the split function in Python/Perl
|
|
;
|
|
; (string-split " abc d e f ") ==> ("abc" "d" "e" "f")
|
|
; (string-split " abc d e f " '() 1) ==> ("abc d e f ")
|
|
; (string-split " abc d e f " '() 0) ==> ()
|
|
; (string-split ":abc:d:e::f:" '(#\:)) ==> ("" "abc" "d" "e" "" "f" "")
|
|
; (string-split ":" '(#\:)) ==> ("" "")
|
|
; (string-split "root:x:0:0:Lord" '(#\:) 2) ==> ("root" "x:0:0:Lord")
|
|
; (string-split "/usr/local/bin:/usr/bin:/usr/ucb/bin" '(#\:))
|
|
; ==> ("/usr/local/bin" "/usr/bin" "/usr/ucb/bin")
|
|
; (string-split "/usr/local/bin" '(#\/)) ==> ("" "usr" "local" "bin")
|
|
|
|
(define (string-split str . rest)
|
|
; maxsplit is a positive number
|
|
(define (split-by-whitespace str maxsplit)
|
|
(define (skip-ws i yet-to-split-count)
|
|
(cond
|
|
((>= i (string-length str)) '())
|
|
((char-whitespace? (string-ref str i))
|
|
(skip-ws (inc i) yet-to-split-count))
|
|
(else (scan-beg-word (inc i) i yet-to-split-count))))
|
|
(define (scan-beg-word i from yet-to-split-count)
|
|
(cond
|
|
((zero? yet-to-split-count)
|
|
(cons (substring str from (string-length str)) '()))
|
|
(else (scan-word i from yet-to-split-count))))
|
|
(define (scan-word i from yet-to-split-count)
|
|
(cond
|
|
((>= i (string-length str))
|
|
(cons (substring str from i) '()))
|
|
((char-whitespace? (string-ref str i))
|
|
(cons (substring str from i)
|
|
(skip-ws (inc i) (- yet-to-split-count 1))))
|
|
(else (scan-word (inc i) from yet-to-split-count))))
|
|
(skip-ws 0 (- maxsplit 1)))
|
|
|
|
; maxsplit is a positive number
|
|
; str is not empty
|
|
(define (split-by-charset str delimeters maxsplit)
|
|
(define (scan-beg-word from yet-to-split-count)
|
|
(cond
|
|
((>= from (string-length str)) '(""))
|
|
((zero? yet-to-split-count)
|
|
(cons (substring str from (string-length str)) '()))
|
|
(else (scan-word from from yet-to-split-count))))
|
|
(define (scan-word i from yet-to-split-count)
|
|
(cond
|
|
((>= i (string-length str))
|
|
(cons (substring str from i) '()))
|
|
((memq (string-ref str i) delimeters)
|
|
(cons (substring str from i)
|
|
(scan-beg-word (inc i) (- yet-to-split-count 1))))
|
|
(else (scan-word (inc i) from yet-to-split-count))))
|
|
(scan-beg-word 0 (- maxsplit 1)))
|
|
|
|
; resolver of overloading...
|
|
; if omitted, maxsplit defaults to
|
|
; (inc (string-length str))
|
|
(if (string-null? str) '()
|
|
(if (null? rest)
|
|
(split-by-whitespace str (inc (string-length str)))
|
|
(let ((charset (car rest))
|
|
(maxsplit
|
|
(if (pair? (cdr rest)) (cadr rest) (inc (string-length str)))))
|
|
(cond
|
|
((not (positive? maxsplit)) '())
|
|
((null? charset) (split-by-whitespace str maxsplit))
|
|
(else (split-by-charset str charset maxsplit))))))
|
|
)
|
|
|
|
|
|
; make-char-quotator QUOT-RULES
|
|
;
|
|
; Given QUOT-RULES, an assoc list of (char . string) pairs, return
|
|
; a quotation procedure. The returned quotation procedure takes a string
|
|
; and returns either a string or a list of strings. The quotation procedure
|
|
; check to see if its argument string contains any instance of a character
|
|
; that needs to be encoded (quoted). If the argument string is "clean",
|
|
; it is returned unchanged. Otherwise, the quotation procedure will
|
|
; return a list of string fragments. The input straing will be broken
|
|
; at the places where the special characters occur. The special character
|
|
; will be replaced by the corresponding encoding strings.
|
|
;
|
|
; For example, to make a procedure that quotes special HTML characters,
|
|
; do
|
|
; (make-char-quotator
|
|
; '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """)))
|
|
|
|
(define (make-char-quotator char-encoding)
|
|
(let ((bad-chars (map car char-encoding)))
|
|
|
|
; Check to see if str contains one of the characters in charset,
|
|
; from the position i onward. If so, return that character's index.
|
|
; otherwise, return #f
|
|
(define (index-cset str i charset)
|
|
(let loop ((i i))
|
|
(and (< i (string-length str))
|
|
(if (memv (string-ref str i) charset) i
|
|
(loop (inc i))))))
|
|
|
|
; The body of the function
|
|
(lambda (str)
|
|
(let ((bad-pos (index-cset str 0 bad-chars)))
|
|
(if (not bad-pos) str ; str had all good chars
|
|
(let loop ((from 0) (to bad-pos))
|
|
(cond
|
|
((>= from (string-length str)) '())
|
|
((not to)
|
|
(cons (substring str from (string-length str)) '()))
|
|
(else
|
|
(let ((quoted-char
|
|
(cdr (assv (string-ref str to) char-encoding)))
|
|
(new-to
|
|
(index-cset str (inc to) bad-chars)))
|
|
(if (< from to)
|
|
(cons
|
|
(substring str from to)
|
|
(cons quoted-char (loop (inc to) new-to)))
|
|
(cons quoted-char (loop (inc to) new-to))))))))))
|
|
))
|
|
|