diff --git a/pkgs/racket-doc/scribblings/reference/regexps.scrbl b/pkgs/racket-doc/scribblings/reference/regexps.scrbl index 9472e06c95..122cba11e1 100644 --- a/pkgs/racket-doc/scribblings/reference/regexps.scrbl +++ b/pkgs/racket-doc/scribblings/reference/regexps.scrbl @@ -198,7 +198,9 @@ Returns @racket[#t] if @racket[v] is a @tech{regexp value} created by otherwise.} -@defproc[(regexp [str string?]) regexp?]{ +@defproc[(regexp [str string?] + [handler (-> any) (λ () (raise (exn:fail:contract ....)))]) + regexp?]{ Takes a string representation of a regular expression (using the syntax in @secref["regexp-syntax"]) and compiles it into a @tech{regexp @@ -208,15 +210,21 @@ is used multiple times, it is faster to compile the string once to a @tech{regexp value} and use it for repeated matches instead of using the string each time. +If @racket[handler] is provided, it is called and its result is returned +if @racket[str] is not a valid representation of a regular expression. + The @racket[object-name] procedure returns the source string for a @tech{regexp value}. @examples[ (regexp "ap*le") (object-name #rx"ap*le") +(regexp "+" (λ () #f)) ]} -@defproc[(pregexp [string string?]) pregexp?]{ +@defproc[(pregexp [string string?] + [handler (-> any) (λ () (raise (exn:fail:contract ....)))]) + pregexp?]{ Like @racket[regexp], except that it uses a slightly different syntax (see @secref["regexp-syntax"]). The result can be used with @@ -226,14 +234,20 @@ Like @racket[regexp], except that it uses a slightly different syntax @examples[ (pregexp "ap*le") (regexp? #px"ap*le") +(pregexp "+" (λ () #f)) ]} -@defproc[(byte-regexp [bstr bytes?]) byte-regexp?]{ +@defproc[(byte-regexp [bstr bytes?] + [handler (-> any) (λ () (raise (exn:fail:contract ....)))]) + byte-regexp?]{ Takes a byte-string representation of a regular expression (using the syntax in @secref["regexp-syntax"]) and compiles it into a byte-@tech{regexp value}. +If @racket[handler] is provided, it is called and its result is returned +if @racket[str] is not a valid representation of a regular expression. + The @racket[object-name] procedure returns the source byte string for a @tech{regexp value}. @@ -241,9 +255,12 @@ returns the source byte string for a @tech{regexp value}. (byte-regexp #"ap*le") (object-name #rx#"ap*le") (eval:error (byte-regexp "ap*le")) +(byte-regexp #"+" (λ () #f)) ]} -@defproc[(byte-pregexp [bstr bytes?]) byte-pregexp?]{ +@defproc[(byte-pregexp [bstr bytes?] + [handler (-> any) (λ () (raise (exn:fail:contract ....)))]) + byte-pregexp?]{ Like @racket[byte-regexp], except that it uses a slightly different syntax (see @secref["regexp-syntax"]). The result can be used with @@ -252,6 +269,7 @@ syntax (see @secref["regexp-syntax"]). The result can be used with @examples[ (byte-pregexp #"ap*le") +(byte-pregexp #"+" (λ () #f)) ]} @defproc*[([(regexp-quote [str string?] [case-sensitive? any/c #t]) string?] diff --git a/pkgs/racket-test-core/tests/racket/basic.rktl b/pkgs/racket-test-core/tests/racket/basic.rktl index 7c3ca8590b..b92448688b 100644 --- a/pkgs/racket-test-core/tests/racket/basic.rktl +++ b/pkgs/racket-test-core/tests/racket/basic.rktl @@ -1364,7 +1364,7 @@ o)) (test (- N M) string-length (get-output-string o))))) -(arity-test regexp 1 1) +(arity-test regexp 1 2) (arity-test regexp? 1 1) (arity-test regexp-match 2 6) (arity-test regexp-match-positions 2 6) diff --git a/pkgs/racket-test-core/tests/racket/rx.rktl b/pkgs/racket-test-core/tests/racket/rx.rktl index 9450a72e18..3574538021 100644 --- a/pkgs/racket-test-core/tests/racket/rx.rktl +++ b/pkgs/racket-test-core/tests/racket/rx.rktl @@ -1784,6 +1784,18 @@ (test-values `(((0 . 3)) #"c") (lambda () (regexp-match-peek-positions-immediate/end #rx".*" (open-input-string "abc")))) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Test failure handlers + +(test #f regexp "+" (λ () #f)) +(test #f pregexp "+" (λ () #f)) +(test #f byte-regexp #"+" (λ () #f)) +(test #f byte-pregexp #"+" (λ () #f)) +(test 3 regexp "+" (λ () (+ 1 2))) +(test 3 pregexp "+" (λ () (+ 1 2))) +(test 3 byte-regexp #"+" (λ () (+ 1 2))) +(test 3 byte-pregexp #"+" (λ () (+ 1 2))) + ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (report-errs) diff --git a/racket/src/racket/include/schthread.h b/racket/src/racket/include/schthread.h index 2bf55034d5..72c1d286c3 100644 --- a/racket/src/racket/include/schthread.h +++ b/racket/src/racket/include/schthread.h @@ -229,6 +229,8 @@ typedef struct Thread_Local_Variables { rxpos regcodesize_; rxpos regcodemax_; intptr_t regmaxlookback_; + Scheme_Object *regerrorproc_; + Scheme_Object *regerrorval_; intptr_t rx_buffer_size_; rxpos *startp_buffer_cache_; rxpos *endp_buffer_cache_; @@ -624,6 +626,8 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL; #define regcodesize XOA (scheme_get_thread_local_variables()->regcodesize_) #define regcodemax XOA (scheme_get_thread_local_variables()->regcodemax_) #define regmaxlookback XOA (scheme_get_thread_local_variables()->regmaxlookback_) +#define regerrorproc XOA (scheme_get_thread_local_variables()->regerrorproc_) +#define regerrorval XOA (scheme_get_thread_local_variables()->regerrorval_) #define rx_buffer_size XOA (scheme_get_thread_local_variables()->rx_buffer_size_) #define startp_buffer_cache XOA (scheme_get_thread_local_variables()->startp_buffer_cache_) #define endp_buffer_cache XOA (scheme_get_thread_local_variables()->endp_buffer_cache_) diff --git a/racket/src/racket/src/regexp.c b/racket/src/racket/src/regexp.c index e501539f5d..e6ac3194d6 100644 --- a/racket/src/racket/src/regexp.c +++ b/racket/src/racket/src/regexp.c @@ -60,7 +60,7 @@ # define rOPRNGS(o) OPRNGS(o, regstr) # define NEXT_OP(scan) (scan + rNEXT(scan)) -static regexp *regcomp(char *, rxpos, int, int); +static regexp *regcomp(char *, rxpos, int, int, Scheme_Object*); /* static int regexec(regexp *, char *, int, int, rxpos *, rxpos * ...); */ /* @@ -85,6 +85,9 @@ THREAD_LOCAL_DECL(static rxpos regcodesize); THREAD_LOCAL_DECL(static rxpos regcodemax); THREAD_LOCAL_DECL(static intptr_t regmaxlookback); +THREAD_LOCAL_DECL(static Scheme_Object *regerrorproc); /* error handler for regexp construction */ +THREAD_LOCAL_DECL(static Scheme_Object *regerrorval); /* result of error handler for failed regexp construction */ + /* caches to avoid gc */ THREAD_LOCAL_DECL(static intptr_t rx_buffer_size); THREAD_LOCAL_DECL(static rxpos *startp_buffer_cache); @@ -126,8 +129,12 @@ READ_ONLY static Scheme_Object *empty_byte_string; static void regerror(char *s) { - scheme_raise_exn(MZEXN_FAIL_CONTRACT, - "regexp: %s", s); + if (SCHEME_FALSEP(regerrorproc)) { + scheme_raise_exn(MZEXN_FAIL_CONTRACT, + "regexp: %s", s); + } else { + regerrorval = scheme_apply(regerrorproc, 0, NULL); + } } THREAD_LOCAL_DECL(const char *failure_msg_for_read); @@ -158,7 +165,7 @@ regcomperror(char *s) * of the structure of the compiled regexp. */ static regexp * -regcomp(char *expstr, rxpos exp, int explen, int pcre) +regcomp(char *expstr, rxpos exp, int explen, int pcre, Scheme_Object *handler) { regexp *r; rxpos scan, next; @@ -180,6 +187,8 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre) regmaxbackposn = 0; regbackknown = NULL; regbackdepends = NULL; + regerrorproc = handler; + regerrorval = NULL; regc(MAGIC); if (reg(0, &flags, 0, 0, PARSE_CASE_SENS | PARSE_SINGLE_LINE | (pcre ? PARSE_PCRE : 0)) == 0) { FAIL("unknown regexp failure"); @@ -5035,6 +5044,7 @@ int scheme_is_pregexp(Scheme_Object *o) static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int argc, Scheme_Object *argv[]) { Scheme_Object *re, *bs; + Scheme_Object *handler; char *s; int slen; @@ -5048,6 +5058,16 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int bs = scheme_char_string_to_byte_string(argv[0]); } + if (argc >= 2) { + if (!SCHEME_PROCP(argv[1])) { + scheme_wrong_contract(who, "(-> any)", 0, argc, argv); + } + scheme_check_proc_arity(who, 0, 1, argc, argv); + handler = argv[1]; + } else { + handler = scheme_false; + } + s = SCHEME_BYTE_STR_VAL(bs); slen = SCHEME_BYTE_STRTAG_VAL(bs); @@ -5068,7 +5088,12 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int #endif } - re = (Scheme_Object *)regcomp(s, 0, slen, pcre); + re = (Scheme_Object *)regcomp(s, 0, slen, pcre, handler); + + /* passed a handler and regexp compilation failed */ + if (!re) { + return regerrorval; + } if (!is_byte) ((regexp *)re)->flags |= REGEXP_IS_UTF8; @@ -5993,10 +6018,10 @@ void scheme_regexp_initialize(Scheme_Env *env) REGISTER_SO(empty_byte_string); empty_byte_string = scheme_alloc_byte_string(0, 0); - GLOBAL_PRIM_W_ARITY("byte-regexp", make_regexp, 1, 1, env); - GLOBAL_PRIM_W_ARITY("regexp", make_utf8_regexp, 1, 1, env); - GLOBAL_PRIM_W_ARITY("byte-pregexp", make_pregexp, 1, 1, env); - GLOBAL_PRIM_W_ARITY("pregexp", make_utf8_pregexp, 1, 1, env); + GLOBAL_PRIM_W_ARITY("byte-regexp", make_regexp, 1, 2, env); + GLOBAL_PRIM_W_ARITY("regexp", make_utf8_regexp, 1, 2, env); + GLOBAL_PRIM_W_ARITY("byte-pregexp", make_pregexp, 1, 2, env); + GLOBAL_PRIM_W_ARITY("pregexp", make_utf8_pregexp, 1, 2, env); GLOBAL_PRIM_W_ARITY("regexp-match", compare, 2, 6, env); GLOBAL_PRIM_W_ARITY("regexp-match/end", compare_end, 2, 7, env); GLOBAL_PRIM_W_ARITY("regexp-match-positions", positions, 2, 6, env);