Add optional failure thunk for regexp and friends

This commit is contained in:
Asumu Takikawa 2016-02-20 23:38:40 -05:00 committed by Matthew Flatt
parent c59e1a2ea9
commit 436fca7134
5 changed files with 73 additions and 14 deletions

View File

@ -198,7 +198,9 @@ Returns @racket[#t] if @racket[v] is a @tech{regexp value} created by
otherwise.}
@defproc[(regexp [str string?]) regexp?]{
@defproc[(regexp [str string?]
[handler (-> any) (λ () (raise (exn:fail:contract ....)))])
regexp?]{
Takes a string representation of a regular expression (using the
syntax in @secref["regexp-syntax"]) and compiles it into a @tech{regexp
@ -208,15 +210,21 @@ is used multiple times, it is faster to compile the string once to a
@tech{regexp value} and use it for repeated matches instead of using the
string each time.
If @racket[handler] is provided, it is called and its result is returned
if @racket[str] is not a valid representation of a regular expression.
The @racket[object-name] procedure returns
the source string for a @tech{regexp value}.
@examples[
(regexp "ap*le")
(object-name #rx"ap*le")
(regexp "+" (λ () #f))
]}
@defproc[(pregexp [string string?]) pregexp?]{
@defproc[(pregexp [string string?]
[handler (-> any) (λ () (raise (exn:fail:contract ....)))])
pregexp?]{
Like @racket[regexp], except that it uses a slightly different syntax
(see @secref["regexp-syntax"]). The result can be used with
@ -226,14 +234,20 @@ Like @racket[regexp], except that it uses a slightly different syntax
@examples[
(pregexp "ap*le")
(regexp? #px"ap*le")
(pregexp "+" (λ () #f))
]}
@defproc[(byte-regexp [bstr bytes?]) byte-regexp?]{
@defproc[(byte-regexp [bstr bytes?]
[handler (-> any) (λ () (raise (exn:fail:contract ....)))])
byte-regexp?]{
Takes a byte-string representation of a regular expression (using the
syntax in @secref["regexp-syntax"]) and compiles it into a
byte-@tech{regexp value}.
If @racket[handler] is provided, it is called and its result is returned
if @racket[str] is not a valid representation of a regular expression.
The @racket[object-name] procedure
returns the source byte string for a @tech{regexp value}.
@ -241,9 +255,12 @@ returns the source byte string for a @tech{regexp value}.
(byte-regexp #"ap*le")
(object-name #rx#"ap*le")
(eval:error (byte-regexp "ap*le"))
(byte-regexp #"+" (λ () #f))
]}
@defproc[(byte-pregexp [bstr bytes?]) byte-pregexp?]{
@defproc[(byte-pregexp [bstr bytes?]
[handler (-> any) (λ () (raise (exn:fail:contract ....)))])
byte-pregexp?]{
Like @racket[byte-regexp], except that it uses a slightly different
syntax (see @secref["regexp-syntax"]). The result can be used with
@ -252,6 +269,7 @@ syntax (see @secref["regexp-syntax"]). The result can be used with
@examples[
(byte-pregexp #"ap*le")
(byte-pregexp #"+" (λ () #f))
]}
@defproc*[([(regexp-quote [str string?] [case-sensitive? any/c #t]) string?]

View File

@ -1364,7 +1364,7 @@
o))
(test (- N M) string-length (get-output-string o)))))
(arity-test regexp 1 1)
(arity-test regexp 1 2)
(arity-test regexp? 1 1)
(arity-test regexp-match 2 6)
(arity-test regexp-match-positions 2 6)

View File

@ -1784,6 +1784,18 @@
(test-values `(((0 . 3)) #"c")
(lambda () (regexp-match-peek-positions-immediate/end #rx".*" (open-input-string "abc"))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Test failure handlers
(test #f regexp "+" (λ () #f))
(test #f pregexp "+" (λ () #f))
(test #f byte-regexp #"+" (λ () #f))
(test #f byte-pregexp #"+" (λ () #f))
(test 3 regexp "+" (λ () (+ 1 2)))
(test 3 pregexp "+" (λ () (+ 1 2)))
(test 3 byte-regexp #"+" (λ () (+ 1 2)))
(test 3 byte-pregexp #"+" (λ () (+ 1 2)))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs)

View File

@ -229,6 +229,8 @@ typedef struct Thread_Local_Variables {
rxpos regcodesize_;
rxpos regcodemax_;
intptr_t regmaxlookback_;
Scheme_Object *regerrorproc_;
Scheme_Object *regerrorval_;
intptr_t rx_buffer_size_;
rxpos *startp_buffer_cache_;
rxpos *endp_buffer_cache_;
@ -624,6 +626,8 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL;
#define regcodesize XOA (scheme_get_thread_local_variables()->regcodesize_)
#define regcodemax XOA (scheme_get_thread_local_variables()->regcodemax_)
#define regmaxlookback XOA (scheme_get_thread_local_variables()->regmaxlookback_)
#define regerrorproc XOA (scheme_get_thread_local_variables()->regerrorproc_)
#define regerrorval XOA (scheme_get_thread_local_variables()->regerrorval_)
#define rx_buffer_size XOA (scheme_get_thread_local_variables()->rx_buffer_size_)
#define startp_buffer_cache XOA (scheme_get_thread_local_variables()->startp_buffer_cache_)
#define endp_buffer_cache XOA (scheme_get_thread_local_variables()->endp_buffer_cache_)

View File

@ -60,7 +60,7 @@
# define rOPRNGS(o) OPRNGS(o, regstr)
# define NEXT_OP(scan) (scan + rNEXT(scan))
static regexp *regcomp(char *, rxpos, int, int);
static regexp *regcomp(char *, rxpos, int, int, Scheme_Object*);
/* static int regexec(regexp *, char *, int, int, rxpos *, rxpos * ...); */
/*
@ -85,6 +85,9 @@ THREAD_LOCAL_DECL(static rxpos regcodesize);
THREAD_LOCAL_DECL(static rxpos regcodemax);
THREAD_LOCAL_DECL(static intptr_t regmaxlookback);
THREAD_LOCAL_DECL(static Scheme_Object *regerrorproc); /* error handler for regexp construction */
THREAD_LOCAL_DECL(static Scheme_Object *regerrorval); /* result of error handler for failed regexp construction */
/* caches to avoid gc */
THREAD_LOCAL_DECL(static intptr_t rx_buffer_size);
THREAD_LOCAL_DECL(static rxpos *startp_buffer_cache);
@ -126,8 +129,12 @@ READ_ONLY static Scheme_Object *empty_byte_string;
static void
regerror(char *s)
{
scheme_raise_exn(MZEXN_FAIL_CONTRACT,
"regexp: %s", s);
if (SCHEME_FALSEP(regerrorproc)) {
scheme_raise_exn(MZEXN_FAIL_CONTRACT,
"regexp: %s", s);
} else {
regerrorval = scheme_apply(regerrorproc, 0, NULL);
}
}
THREAD_LOCAL_DECL(const char *failure_msg_for_read);
@ -158,7 +165,7 @@ regcomperror(char *s)
* of the structure of the compiled regexp.
*/
static regexp *
regcomp(char *expstr, rxpos exp, int explen, int pcre)
regcomp(char *expstr, rxpos exp, int explen, int pcre, Scheme_Object *handler)
{
regexp *r;
rxpos scan, next;
@ -180,6 +187,8 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre)
regmaxbackposn = 0;
regbackknown = NULL;
regbackdepends = NULL;
regerrorproc = handler;
regerrorval = NULL;
regc(MAGIC);
if (reg(0, &flags, 0, 0, PARSE_CASE_SENS | PARSE_SINGLE_LINE | (pcre ? PARSE_PCRE : 0)) == 0) {
FAIL("unknown regexp failure");
@ -5035,6 +5044,7 @@ int scheme_is_pregexp(Scheme_Object *o)
static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int argc, Scheme_Object *argv[])
{
Scheme_Object *re, *bs;
Scheme_Object *handler;
char *s;
int slen;
@ -5048,6 +5058,16 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int
bs = scheme_char_string_to_byte_string(argv[0]);
}
if (argc >= 2) {
if (!SCHEME_PROCP(argv[1])) {
scheme_wrong_contract(who, "(-> any)", 0, argc, argv);
}
scheme_check_proc_arity(who, 0, 1, argc, argv);
handler = argv[1];
} else {
handler = scheme_false;
}
s = SCHEME_BYTE_STR_VAL(bs);
slen = SCHEME_BYTE_STRTAG_VAL(bs);
@ -5068,7 +5088,12 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int
#endif
}
re = (Scheme_Object *)regcomp(s, 0, slen, pcre);
re = (Scheme_Object *)regcomp(s, 0, slen, pcre, handler);
/* passed a handler and regexp compilation failed */
if (!re) {
return regerrorval;
}
if (!is_byte)
((regexp *)re)->flags |= REGEXP_IS_UTF8;
@ -5993,10 +6018,10 @@ void scheme_regexp_initialize(Scheme_Env *env)
REGISTER_SO(empty_byte_string);
empty_byte_string = scheme_alloc_byte_string(0, 0);
GLOBAL_PRIM_W_ARITY("byte-regexp", make_regexp, 1, 1, env);
GLOBAL_PRIM_W_ARITY("regexp", make_utf8_regexp, 1, 1, env);
GLOBAL_PRIM_W_ARITY("byte-pregexp", make_pregexp, 1, 1, env);
GLOBAL_PRIM_W_ARITY("pregexp", make_utf8_pregexp, 1, 1, env);
GLOBAL_PRIM_W_ARITY("byte-regexp", make_regexp, 1, 2, env);
GLOBAL_PRIM_W_ARITY("regexp", make_utf8_regexp, 1, 2, env);
GLOBAL_PRIM_W_ARITY("byte-pregexp", make_pregexp, 1, 2, env);
GLOBAL_PRIM_W_ARITY("pregexp", make_utf8_pregexp, 1, 2, env);
GLOBAL_PRIM_W_ARITY("regexp-match", compare, 2, 6, env);
GLOBAL_PRIM_W_ARITY("regexp-match/end", compare_end, 2, 7, env);
GLOBAL_PRIM_W_ARITY("regexp-match-positions", positions, 2, 6, env);