From 13ebd0e1c83fc6ce8311bfff5c0717f09c95892c Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 8 Apr 2016 18:29:23 -0600 Subject: [PATCH] adjustments for `regexp` failure handler Pass a string to the handler to describe the problem. Also, fix minor issues (GC registration, contracts and `history` in docs) and make `pregexp`, etc., report compilation errors as `pregexp`, etc. --- .../scribblings/reference/regexps.scrbl | 59 ++++++++++++------- pkgs/racket-test-core/tests/racket/rx.rktl | 20 ++++--- racket/src/racket/include/schthread.h | 2 + racket/src/racket/src/regexp.c | 38 ++++++++---- 4 files changed, 76 insertions(+), 43 deletions(-) diff --git a/pkgs/racket-doc/scribblings/reference/regexps.scrbl b/pkgs/racket-doc/scribblings/reference/regexps.scrbl index 122cba11e1..aaebe98c74 100644 --- a/pkgs/racket-doc/scribblings/reference/regexps.scrbl +++ b/pkgs/racket-doc/scribblings/reference/regexps.scrbl @@ -198,9 +198,10 @@ Returns @racket[#t] if @racket[v] is a @tech{regexp value} created by otherwise.} -@defproc[(regexp [str string?] - [handler (-> any) (λ () (raise (exn:fail:contract ....)))]) - regexp?]{ +@defproc*[([(regexp [str string?]) regexp?] + [(regexp [str string?] + [handler (or/c #f (string? -> any))]) + any])]{ Takes a string representation of a regular expression (using the syntax in @secref["regexp-syntax"]) and compiles it into a @tech{regexp @@ -210,8 +211,11 @@ is used multiple times, it is faster to compile the string once to a @tech{regexp value} and use it for repeated matches instead of using the string each time. -If @racket[handler] is provided, it is called and its result is returned -if @racket[str] is not a valid representation of a regular expression. +If @racket[handler] is provided and not @racket[#f], it is called and +its result is returned when @racket[str] is not a valid representation +of a regular expression; the argument to @racket[handler] is a string +that describes the problem with @racket[str]. If @racket[handler] is +@racket[#f] or not provided, then @exnraise[exn:fail:contract]. The @racket[object-name] procedure returns the source string for a @tech{regexp value}. @@ -219,12 +223,15 @@ the source string for a @tech{regexp value}. @examples[ (regexp "ap*le") (object-name #rx"ap*le") -(regexp "+" (λ () #f)) -]} +(regexp "+" (λ (s) (list s))) +] -@defproc[(pregexp [string string?] - [handler (-> any) (λ () (raise (exn:fail:contract ....)))]) - pregexp?]{ +@history[#:changed "6.5.0.1" @elem{Added the @racket[handler] argument.}]} + +@defproc*[([(pregexp [str string?]) regexp?] + [(pregexp [str string?] + [handler (or/c #f (string? -> any))]) + any])]{ Like @racket[regexp], except that it uses a slightly different syntax (see @secref["regexp-syntax"]). The result can be used with @@ -234,12 +241,15 @@ Like @racket[regexp], except that it uses a slightly different syntax @examples[ (pregexp "ap*le") (regexp? #px"ap*le") -(pregexp "+" (λ () #f)) -]} +(pregexp "+" (λ (s) (vector s))) +] -@defproc[(byte-regexp [bstr bytes?] - [handler (-> any) (λ () (raise (exn:fail:contract ....)))]) - byte-regexp?]{ +@history[#:changed "6.5.0.1" @elem{Added the @racket[handler] argument.}]} + +@defproc*[([(byte-regexp [str string?]) regexp?] + [(byte-regexp [str string?] + [handler (or/c #f (string? -> any))]) + any])]{ Takes a byte-string representation of a regular expression (using the syntax in @secref["regexp-syntax"]) and compiles it into a @@ -255,12 +265,15 @@ returns the source byte string for a @tech{regexp value}. (byte-regexp #"ap*le") (object-name #rx#"ap*le") (eval:error (byte-regexp "ap*le")) -(byte-regexp #"+" (λ () #f)) -]} +(byte-regexp #"+" (λ (s) (list s))) +] -@defproc[(byte-pregexp [bstr bytes?] - [handler (-> any) (λ () (raise (exn:fail:contract ....)))]) - byte-pregexp?]{ +@history[#:changed "6.5.0.1" @elem{Added the @racket[handler] argument.}]} + +@defproc*[([(byte-pregexp [str string?]) regexp?] + [(byte-pregexp [str string?] + [handler (or/c #f (string? -> any))]) + any])]{ Like @racket[byte-regexp], except that it uses a slightly different syntax (see @secref["regexp-syntax"]). The result can be used with @@ -269,8 +282,10 @@ syntax (see @secref["regexp-syntax"]). The result can be used with @examples[ (byte-pregexp #"ap*le") -(byte-pregexp #"+" (λ () #f)) -]} +(byte-pregexp #"+" (λ (s) (vector s))) +] + +@history[#:changed "6.5.0.1" @elem{Added the @racket[handler] argument.}]} @defproc*[([(regexp-quote [str string?] [case-sensitive? any/c #t]) string?] [(regexp-quote [bstr bytes?] [case-sensitive? any/c #t]) bytes?])]{ diff --git a/pkgs/racket-test-core/tests/racket/rx.rktl b/pkgs/racket-test-core/tests/racket/rx.rktl index 3574538021..996cf00413 100644 --- a/pkgs/racket-test-core/tests/racket/rx.rktl +++ b/pkgs/racket-test-core/tests/racket/rx.rktl @@ -1787,14 +1787,18 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Test failure handlers -(test #f regexp "+" (λ () #f)) -(test #f pregexp "+" (λ () #f)) -(test #f byte-regexp #"+" (λ () #f)) -(test #f byte-pregexp #"+" (λ () #f)) -(test 3 regexp "+" (λ () (+ 1 2))) -(test 3 pregexp "+" (λ () (+ 1 2))) -(test 3 byte-regexp #"+" (λ () (+ 1 2))) -(test 3 byte-pregexp #"+" (λ () (+ 1 2))) +(test "`+' follows nothing in pattern" regexp "+" (λ (s) s)) +(test "`+' follows nothing in pattern" pregexp "+" (λ (s) s)) +(test "`+' follows nothing in pattern" byte-regexp #"+" (λ (s) s)) +(test "`+' follows nothing in pattern" byte-pregexp #"+" (λ (s) s)) +(test 3 regexp "+" (λ (s) (+ 1 2))) +(test 3 pregexp "+" (λ (s) (+ 1 2))) +(test 3 byte-regexp #"+" (λ (s) (+ 1 2))) +(test 3 byte-pregexp #"+" (λ (s) (+ 1 2))) + +(test-values '(1 2 3) (lambda () (byte-pregexp #"+" (λ (s) (values 1 2 3))))) + +(err/rt-test (regexp "+" #f) (lambda (exn) (regexp-match? "`[+]' follows nothing in pattern" (exn-message exn)))) ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/racket/src/racket/include/schthread.h b/racket/src/racket/include/schthread.h index 72c1d286c3..29c76b764c 100644 --- a/racket/src/racket/include/schthread.h +++ b/racket/src/racket/include/schthread.h @@ -229,6 +229,7 @@ typedef struct Thread_Local_Variables { rxpos regcodesize_; rxpos regcodemax_; intptr_t regmaxlookback_; + const char *regerrorwho_; Scheme_Object *regerrorproc_; Scheme_Object *regerrorval_; intptr_t rx_buffer_size_; @@ -626,6 +627,7 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL; #define regcodesize XOA (scheme_get_thread_local_variables()->regcodesize_) #define regcodemax XOA (scheme_get_thread_local_variables()->regcodemax_) #define regmaxlookback XOA (scheme_get_thread_local_variables()->regmaxlookback_) +#define regerrorwho XOA (scheme_get_thread_local_variables()->regerrorwho_) #define regerrorproc XOA (scheme_get_thread_local_variables()->regerrorproc_) #define regerrorval XOA (scheme_get_thread_local_variables()->regerrorval_) #define rx_buffer_size XOA (scheme_get_thread_local_variables()->rx_buffer_size_) diff --git a/racket/src/racket/src/regexp.c b/racket/src/racket/src/regexp.c index e6ac3194d6..1c54e766d8 100644 --- a/racket/src/racket/src/regexp.c +++ b/racket/src/racket/src/regexp.c @@ -85,6 +85,7 @@ THREAD_LOCAL_DECL(static rxpos regcodesize); THREAD_LOCAL_DECL(static rxpos regcodemax); THREAD_LOCAL_DECL(static intptr_t regmaxlookback); +THREAD_LOCAL_DECL(static char *regerrorwho); THREAD_LOCAL_DECL(static Scheme_Object *regerrorproc); /* error handler for regexp construction */ THREAD_LOCAL_DECL(static Scheme_Object *regerrorval); /* result of error handler for failed regexp construction */ @@ -129,11 +130,19 @@ READ_ONLY static Scheme_Object *empty_byte_string; static void regerror(char *s) { - if (SCHEME_FALSEP(regerrorproc)) { - scheme_raise_exn(MZEXN_FAIL_CONTRACT, - "regexp: %s", s); - } else { - regerrorval = scheme_apply(regerrorproc, 0, NULL); + if (!regerrorval) { + if (SCHEME_FALSEP(regerrorproc)) { + const char *who = regerrorwho; + regerrorwho = NULL; + scheme_raise_exn(MZEXN_FAIL_CONTRACT, + "%s: %s", + (who ? who : "regexp"), + s); + } else { + Scheme_Object *a[1]; + a[0] = scheme_make_utf8_string(s); + regerrorval = scheme_apply_multi(regerrorproc, 1, a); + } } } @@ -191,6 +200,8 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre, Scheme_Object *handler) regerrorval = NULL; regc(MAGIC); if (reg(0, &flags, 0, 0, PARSE_CASE_SENS | PARSE_SINGLE_LINE | (pcre ? PARSE_PCRE : 0)) == 0) { + if (regerrorval) + return NULL; FAIL("unknown regexp failure"); } @@ -5050,7 +5061,7 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int if (is_byte) { if (!SCHEME_BYTE_STRINGP(argv[0])) - scheme_wrong_contract(who, "byte?", 0, argc, argv); + scheme_wrong_contract(who, "bytes?", 0, argc, argv); bs = argv[0]; } else { if (!SCHEME_CHAR_STRINGP(argv[0])) @@ -5059,14 +5070,11 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int } if (argc >= 2) { - if (!SCHEME_PROCP(argv[1])) { - scheme_wrong_contract(who, "(-> any)", 0, argc, argv); - } - scheme_check_proc_arity(who, 0, 1, argc, argv); + if (!scheme_check_proc_arity2(who, 1, 1, argc, argv, 1)) + scheme_wrong_contract(who, "(or/c #f (string? -> any))", 1, argc, argv); handler = argv[1]; - } else { + } else handler = scheme_false; - } s = SCHEME_BYTE_STR_VAL(bs); slen = SCHEME_BYTE_STRTAG_VAL(bs); @@ -5088,8 +5096,10 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int #endif } + regerrorwho = who; re = (Scheme_Object *)regcomp(s, 0, slen, pcre, handler); - + regerrorwho = NULL; + /* passed a handler and regexp compilation failed */ if (!re) { return regerrorval; @@ -6050,4 +6060,6 @@ void scheme_init_regexp_places() REGISTER_SO(regstr); REGISTER_SO(regbackknown); REGISTER_SO(regbackdepends); + REGISTER_SO(regerrorproc); + REGISTER_SO(regerrorval); }