PR 13471: Add cycle-detection during lex-abbrev expansion to raise appropriate syntax error.
Also add test cases for lex-abbrev cycle detection.
This commit is contained in:
parent
042ec40a7b
commit
7146289c34
|
@ -40,6 +40,35 @@
|
|||
(let ()
|
||||
(lexer ((a) 1))))))
|
||||
|
||||
;; Detecting mutual recursion cycle:
|
||||
(check-regexp-match #rx"regular-expression"
|
||||
(catch-syn-error
|
||||
(let ()
|
||||
(define-lex-abbrev a b)
|
||||
(define-lex-abbrev b a)
|
||||
(let ()
|
||||
(lexer ((a) 1))))))
|
||||
|
||||
(check-regexp-match #rx"regular-expression"
|
||||
(catch-syn-error
|
||||
(let ()
|
||||
(define-lex-abbrev a (repetition 0 1 b))
|
||||
(define-lex-abbrev b (repetition 0 1 a))
|
||||
(let ()
|
||||
(lexer ((a) 1))))))
|
||||
|
||||
;; Detecting cycle within same abbreviation:
|
||||
(check-regexp-match #rx"regular-expression"
|
||||
(catch-syn-error
|
||||
(let ()
|
||||
(define-lex-abbrev balanced
|
||||
(union (concatenation "(" balanced ")" balanced)
|
||||
any-char))
|
||||
(lexer
|
||||
[balanced (string-append lexeme (balanced input-port))]
|
||||
[(eof) ""]))))
|
||||
|
||||
|
||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1))))
|
||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1))))
|
||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1))))
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
(module stx mzscheme
|
||||
(require syntax/boundmap
|
||||
"util.rkt")
|
||||
(require "util.rkt"
|
||||
syntax/id-table)
|
||||
|
||||
(provide parse)
|
||||
|
||||
|
@ -37,103 +37,118 @@
|
|||
;; checks for errors and generates the plain s-exp form for s
|
||||
;; Expands lex-abbrevs and applies lex-trans.
|
||||
(define (parse stx disappeared-uses)
|
||||
(let ((parse
|
||||
(lambda (s)
|
||||
(parse (syntax-rearm s stx)
|
||||
disappeared-uses))))
|
||||
(syntax-case (disarm stx) (repetition union intersection complement concatenation
|
||||
char-range char-complement)
|
||||
(_
|
||||
(identifier? stx)
|
||||
(let ((expansion (syntax-local-value stx (lambda () #f))))
|
||||
(unless (lex-abbrev? expansion)
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined abbreviation"
|
||||
stx))
|
||||
(set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
|
||||
(parse ((lex-abbrev-get-abbrev expansion)))))
|
||||
(_
|
||||
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
|
||||
(syntax-e stx))
|
||||
((repetition arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 3 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ((low (syntax-e (car arg-list)))
|
||||
(high (syntax-e (cadr arg-list)))
|
||||
(re (caddr arg-list)))
|
||||
(unless (and (number? low) (exact? low) (integer? low) (>= low 0))
|
||||
(raise-syntax-error #f
|
||||
"not a non-negative exact integer"
|
||||
stx
|
||||
(car arg-list)))
|
||||
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
|
||||
(eq? high +inf.0))
|
||||
(raise-syntax-error #f
|
||||
"not a non-negative exact integer or +inf.0"
|
||||
stx
|
||||
(cadr arg-list)))
|
||||
(unless (<= low high)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"the first argument is not less than or equal to the second argument"
|
||||
stx))
|
||||
`(repetition ,low ,high ,(parse re)))))
|
||||
((union re ...)
|
||||
`(union ,@(map parse (syntax->list (syntax (re ...))))))
|
||||
((intersection re ...)
|
||||
`(intersection ,@(map parse (syntax->list (syntax (re ...))))))
|
||||
((complement re ...)
|
||||
(let ((re-list (syntax->list (syntax (re ...)))))
|
||||
(unless (= 1 (length re-list))
|
||||
(bad-args stx 1))
|
||||
`(complement ,(parse (car re-list)))))
|
||||
((concatenation re ...)
|
||||
`(concatenation ,@(map parse (syntax->list (syntax (re ...))))))
|
||||
((char-range arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 2 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ((i1 (char-range-arg (car arg-list) stx))
|
||||
(i2 (char-range-arg (cadr arg-list) stx)))
|
||||
(if (<= i1 i2)
|
||||
`(char-range ,(integer->char i1) ,(integer->char i2))
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"the first argument does not precede or equal second argument"
|
||||
stx)))))
|
||||
((char-complement arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 1 (length arg-list))
|
||||
(bad-args stx 1))
|
||||
(let ((parsed (parse (car arg-list))))
|
||||
(unless (char-set? parsed)
|
||||
(raise-syntax-error #f
|
||||
"not a character set"
|
||||
stx
|
||||
(car arg-list)))
|
||||
`(char-complement ,parsed))))
|
||||
((op form ...)
|
||||
(identifier? (syntax op))
|
||||
(let* ((o (syntax op))
|
||||
(expansion (syntax-local-value o (lambda () #f))))
|
||||
(set-box! disappeared-uses (cons o (unbox disappeared-uses)))
|
||||
(cond
|
||||
((lex-trans? expansion)
|
||||
(parse ((lex-trans-f expansion) (disarm stx))))
|
||||
(expansion
|
||||
(raise-syntax-error 'regular-expression
|
||||
"not a lex-trans"
|
||||
stx))
|
||||
(else
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined operator"
|
||||
stx)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'regular-expression
|
||||
"not a char, string, identifier, or (op args ...)"
|
||||
stx)))))
|
||||
(let loop ([stx stx]
|
||||
[disappeared-uses disappeared-uses]
|
||||
;; seen-lex-abbrevs: id-table
|
||||
[seen-lex-abbrevs (make-immutable-free-id-table)])
|
||||
(let ([recur (lambda (s)
|
||||
(loop (syntax-rearm s stx)
|
||||
disappeared-uses
|
||||
seen-lex-abbrevs))]
|
||||
[recur/abbrev (lambda (s id)
|
||||
(loop (syntax-rearm s stx)
|
||||
disappeared-uses
|
||||
(free-id-table-set seen-lex-abbrevs id id)))])
|
||||
(syntax-case (disarm stx) (repetition union intersection complement concatenation
|
||||
char-range char-complement)
|
||||
(_
|
||||
(identifier? stx)
|
||||
(let ((expansion (syntax-local-value stx (lambda () #f))))
|
||||
(unless (lex-abbrev? expansion)
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined abbreviation"
|
||||
stx))
|
||||
;; Check for cycles.
|
||||
(when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f))
|
||||
(raise-syntax-error 'regular-expression
|
||||
"illegal lex-abbrev cycle detected"
|
||||
stx
|
||||
#f
|
||||
(list (free-id-table-ref seen-lex-abbrevs stx))))
|
||||
(set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
|
||||
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx)))
|
||||
(_
|
||||
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
|
||||
(syntax-e stx))
|
||||
((repetition arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 3 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ((low (syntax-e (car arg-list)))
|
||||
(high (syntax-e (cadr arg-list)))
|
||||
(re (caddr arg-list)))
|
||||
(unless (and (number? low) (exact? low) (integer? low) (>= low 0))
|
||||
(raise-syntax-error #f
|
||||
"not a non-negative exact integer"
|
||||
stx
|
||||
(car arg-list)))
|
||||
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
|
||||
(eq? high +inf.0))
|
||||
(raise-syntax-error #f
|
||||
"not a non-negative exact integer or +inf.0"
|
||||
stx
|
||||
(cadr arg-list)))
|
||||
(unless (<= low high)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"the first argument is not less than or equal to the second argument"
|
||||
stx))
|
||||
`(repetition ,low ,high ,(recur re)))))
|
||||
((union re ...)
|
||||
`(union ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((intersection re ...)
|
||||
`(intersection ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((complement re ...)
|
||||
(let ((re-list (syntax->list (syntax (re ...)))))
|
||||
(unless (= 1 (length re-list))
|
||||
(bad-args stx 1))
|
||||
`(complement ,(recur (car re-list)))))
|
||||
((concatenation re ...)
|
||||
`(concatenation ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((char-range arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 2 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ((i1 (char-range-arg (car arg-list) stx))
|
||||
(i2 (char-range-arg (cadr arg-list) stx)))
|
||||
(if (<= i1 i2)
|
||||
`(char-range ,(integer->char i1) ,(integer->char i2))
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"the first argument does not precede or equal second argument"
|
||||
stx)))))
|
||||
((char-complement arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 1 (length arg-list))
|
||||
(bad-args stx 1))
|
||||
(let ((parsed (recur (car arg-list))))
|
||||
(unless (char-set? parsed)
|
||||
(raise-syntax-error #f
|
||||
"not a character set"
|
||||
stx
|
||||
(car arg-list)))
|
||||
`(char-complement ,parsed))))
|
||||
((op form ...)
|
||||
(identifier? (syntax op))
|
||||
(let* ((o (syntax op))
|
||||
(expansion (syntax-local-value o (lambda () #f))))
|
||||
(set-box! disappeared-uses (cons o (unbox disappeared-uses)))
|
||||
(cond
|
||||
((lex-trans? expansion)
|
||||
(recur ((lex-trans-f expansion) (disarm stx))))
|
||||
(expansion
|
||||
(raise-syntax-error 'regular-expression
|
||||
"not a lex-trans"
|
||||
stx))
|
||||
(else
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined operator"
|
||||
stx)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'regular-expression
|
||||
"not a char, string, identifier, or (op args ...)"
|
||||
stx))))))
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user