adjustments for regexp
failure handler
Pass a string to the handler to describe the problem. Also, fix minor issues (GC registration, contracts and `history` in docs) and make `pregexp`, etc., report compilation errors as `pregexp`, etc.
This commit is contained in:
parent
436fca7134
commit
13ebd0e1c8
|
@ -198,9 +198,10 @@ Returns @racket[#t] if @racket[v] is a @tech{regexp value} created by
|
||||||
otherwise.}
|
otherwise.}
|
||||||
|
|
||||||
|
|
||||||
@defproc[(regexp [str string?]
|
@defproc*[([(regexp [str string?]) regexp?]
|
||||||
[handler (-> any) (λ () (raise (exn:fail:contract ....)))])
|
[(regexp [str string?]
|
||||||
regexp?]{
|
[handler (or/c #f (string? -> any))])
|
||||||
|
any])]{
|
||||||
|
|
||||||
Takes a string representation of a regular expression (using the
|
Takes a string representation of a regular expression (using the
|
||||||
syntax in @secref["regexp-syntax"]) and compiles it into a @tech{regexp
|
syntax in @secref["regexp-syntax"]) and compiles it into a @tech{regexp
|
||||||
|
@ -210,8 +211,11 @@ is used multiple times, it is faster to compile the string once to a
|
||||||
@tech{regexp value} and use it for repeated matches instead of using the
|
@tech{regexp value} and use it for repeated matches instead of using the
|
||||||
string each time.
|
string each time.
|
||||||
|
|
||||||
If @racket[handler] is provided, it is called and its result is returned
|
If @racket[handler] is provided and not @racket[#f], it is called and
|
||||||
if @racket[str] is not a valid representation of a regular expression.
|
its result is returned when @racket[str] is not a valid representation
|
||||||
|
of a regular expression; the argument to @racket[handler] is a string
|
||||||
|
that describes the problem with @racket[str]. If @racket[handler] is
|
||||||
|
@racket[#f] or not provided, then @exnraise[exn:fail:contract].
|
||||||
|
|
||||||
The @racket[object-name] procedure returns
|
The @racket[object-name] procedure returns
|
||||||
the source string for a @tech{regexp value}.
|
the source string for a @tech{regexp value}.
|
||||||
|
@ -219,12 +223,15 @@ the source string for a @tech{regexp value}.
|
||||||
@examples[
|
@examples[
|
||||||
(regexp "ap*le")
|
(regexp "ap*le")
|
||||||
(object-name #rx"ap*le")
|
(object-name #rx"ap*le")
|
||||||
(regexp "+" (λ () #f))
|
(regexp "+" (λ (s) (list s)))
|
||||||
]}
|
]
|
||||||
|
|
||||||
@defproc[(pregexp [string string?]
|
@history[#:changed "6.5.0.1" @elem{Added the @racket[handler] argument.}]}
|
||||||
[handler (-> any) (λ () (raise (exn:fail:contract ....)))])
|
|
||||||
pregexp?]{
|
@defproc*[([(pregexp [str string?]) regexp?]
|
||||||
|
[(pregexp [str string?]
|
||||||
|
[handler (or/c #f (string? -> any))])
|
||||||
|
any])]{
|
||||||
|
|
||||||
Like @racket[regexp], except that it uses a slightly different syntax
|
Like @racket[regexp], except that it uses a slightly different syntax
|
||||||
(see @secref["regexp-syntax"]). The result can be used with
|
(see @secref["regexp-syntax"]). The result can be used with
|
||||||
|
@ -234,12 +241,15 @@ Like @racket[regexp], except that it uses a slightly different syntax
|
||||||
@examples[
|
@examples[
|
||||||
(pregexp "ap*le")
|
(pregexp "ap*le")
|
||||||
(regexp? #px"ap*le")
|
(regexp? #px"ap*le")
|
||||||
(pregexp "+" (λ () #f))
|
(pregexp "+" (λ (s) (vector s)))
|
||||||
]}
|
]
|
||||||
|
|
||||||
@defproc[(byte-regexp [bstr bytes?]
|
@history[#:changed "6.5.0.1" @elem{Added the @racket[handler] argument.}]}
|
||||||
[handler (-> any) (λ () (raise (exn:fail:contract ....)))])
|
|
||||||
byte-regexp?]{
|
@defproc*[([(byte-regexp [str string?]) regexp?]
|
||||||
|
[(byte-regexp [str string?]
|
||||||
|
[handler (or/c #f (string? -> any))])
|
||||||
|
any])]{
|
||||||
|
|
||||||
Takes a byte-string representation of a regular expression (using the
|
Takes a byte-string representation of a regular expression (using the
|
||||||
syntax in @secref["regexp-syntax"]) and compiles it into a
|
syntax in @secref["regexp-syntax"]) and compiles it into a
|
||||||
|
@ -255,12 +265,15 @@ returns the source byte string for a @tech{regexp value}.
|
||||||
(byte-regexp #"ap*le")
|
(byte-regexp #"ap*le")
|
||||||
(object-name #rx#"ap*le")
|
(object-name #rx#"ap*le")
|
||||||
(eval:error (byte-regexp "ap*le"))
|
(eval:error (byte-regexp "ap*le"))
|
||||||
(byte-regexp #"+" (λ () #f))
|
(byte-regexp #"+" (λ (s) (list s)))
|
||||||
]}
|
]
|
||||||
|
|
||||||
@defproc[(byte-pregexp [bstr bytes?]
|
@history[#:changed "6.5.0.1" @elem{Added the @racket[handler] argument.}]}
|
||||||
[handler (-> any) (λ () (raise (exn:fail:contract ....)))])
|
|
||||||
byte-pregexp?]{
|
@defproc*[([(byte-pregexp [str string?]) regexp?]
|
||||||
|
[(byte-pregexp [str string?]
|
||||||
|
[handler (or/c #f (string? -> any))])
|
||||||
|
any])]{
|
||||||
|
|
||||||
Like @racket[byte-regexp], except that it uses a slightly different
|
Like @racket[byte-regexp], except that it uses a slightly different
|
||||||
syntax (see @secref["regexp-syntax"]). The result can be used with
|
syntax (see @secref["regexp-syntax"]). The result can be used with
|
||||||
|
@ -269,8 +282,10 @@ syntax (see @secref["regexp-syntax"]). The result can be used with
|
||||||
|
|
||||||
@examples[
|
@examples[
|
||||||
(byte-pregexp #"ap*le")
|
(byte-pregexp #"ap*le")
|
||||||
(byte-pregexp #"+" (λ () #f))
|
(byte-pregexp #"+" (λ (s) (vector s)))
|
||||||
]}
|
]
|
||||||
|
|
||||||
|
@history[#:changed "6.5.0.1" @elem{Added the @racket[handler] argument.}]}
|
||||||
|
|
||||||
@defproc*[([(regexp-quote [str string?] [case-sensitive? any/c #t]) string?]
|
@defproc*[([(regexp-quote [str string?] [case-sensitive? any/c #t]) string?]
|
||||||
[(regexp-quote [bstr bytes?] [case-sensitive? any/c #t]) bytes?])]{
|
[(regexp-quote [bstr bytes?] [case-sensitive? any/c #t]) bytes?])]{
|
||||||
|
|
|
@ -1787,14 +1787,18 @@
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Test failure handlers
|
;; Test failure handlers
|
||||||
|
|
||||||
(test #f regexp "+" (λ () #f))
|
(test "`+' follows nothing in pattern" regexp "+" (λ (s) s))
|
||||||
(test #f pregexp "+" (λ () #f))
|
(test "`+' follows nothing in pattern" pregexp "+" (λ (s) s))
|
||||||
(test #f byte-regexp #"+" (λ () #f))
|
(test "`+' follows nothing in pattern" byte-regexp #"+" (λ (s) s))
|
||||||
(test #f byte-pregexp #"+" (λ () #f))
|
(test "`+' follows nothing in pattern" byte-pregexp #"+" (λ (s) s))
|
||||||
(test 3 regexp "+" (λ () (+ 1 2)))
|
(test 3 regexp "+" (λ (s) (+ 1 2)))
|
||||||
(test 3 pregexp "+" (λ () (+ 1 2)))
|
(test 3 pregexp "+" (λ (s) (+ 1 2)))
|
||||||
(test 3 byte-regexp #"+" (λ () (+ 1 2)))
|
(test 3 byte-regexp #"+" (λ (s) (+ 1 2)))
|
||||||
(test 3 byte-pregexp #"+" (λ () (+ 1 2)))
|
(test 3 byte-pregexp #"+" (λ (s) (+ 1 2)))
|
||||||
|
|
||||||
|
(test-values '(1 2 3) (lambda () (byte-pregexp #"+" (λ (s) (values 1 2 3)))))
|
||||||
|
|
||||||
|
(err/rt-test (regexp "+" #f) (lambda (exn) (regexp-match? "`[+]' follows nothing in pattern" (exn-message exn))))
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
|
|
@ -229,6 +229,7 @@ typedef struct Thread_Local_Variables {
|
||||||
rxpos regcodesize_;
|
rxpos regcodesize_;
|
||||||
rxpos regcodemax_;
|
rxpos regcodemax_;
|
||||||
intptr_t regmaxlookback_;
|
intptr_t regmaxlookback_;
|
||||||
|
const char *regerrorwho_;
|
||||||
Scheme_Object *regerrorproc_;
|
Scheme_Object *regerrorproc_;
|
||||||
Scheme_Object *regerrorval_;
|
Scheme_Object *regerrorval_;
|
||||||
intptr_t rx_buffer_size_;
|
intptr_t rx_buffer_size_;
|
||||||
|
@ -626,6 +627,7 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL;
|
||||||
#define regcodesize XOA (scheme_get_thread_local_variables()->regcodesize_)
|
#define regcodesize XOA (scheme_get_thread_local_variables()->regcodesize_)
|
||||||
#define regcodemax XOA (scheme_get_thread_local_variables()->regcodemax_)
|
#define regcodemax XOA (scheme_get_thread_local_variables()->regcodemax_)
|
||||||
#define regmaxlookback XOA (scheme_get_thread_local_variables()->regmaxlookback_)
|
#define regmaxlookback XOA (scheme_get_thread_local_variables()->regmaxlookback_)
|
||||||
|
#define regerrorwho XOA (scheme_get_thread_local_variables()->regerrorwho_)
|
||||||
#define regerrorproc XOA (scheme_get_thread_local_variables()->regerrorproc_)
|
#define regerrorproc XOA (scheme_get_thread_local_variables()->regerrorproc_)
|
||||||
#define regerrorval XOA (scheme_get_thread_local_variables()->regerrorval_)
|
#define regerrorval XOA (scheme_get_thread_local_variables()->regerrorval_)
|
||||||
#define rx_buffer_size XOA (scheme_get_thread_local_variables()->rx_buffer_size_)
|
#define rx_buffer_size XOA (scheme_get_thread_local_variables()->rx_buffer_size_)
|
||||||
|
|
|
@ -85,6 +85,7 @@ THREAD_LOCAL_DECL(static rxpos regcodesize);
|
||||||
THREAD_LOCAL_DECL(static rxpos regcodemax);
|
THREAD_LOCAL_DECL(static rxpos regcodemax);
|
||||||
THREAD_LOCAL_DECL(static intptr_t regmaxlookback);
|
THREAD_LOCAL_DECL(static intptr_t regmaxlookback);
|
||||||
|
|
||||||
|
THREAD_LOCAL_DECL(static char *regerrorwho);
|
||||||
THREAD_LOCAL_DECL(static Scheme_Object *regerrorproc); /* error handler for regexp construction */
|
THREAD_LOCAL_DECL(static Scheme_Object *regerrorproc); /* error handler for regexp construction */
|
||||||
THREAD_LOCAL_DECL(static Scheme_Object *regerrorval); /* result of error handler for failed regexp construction */
|
THREAD_LOCAL_DECL(static Scheme_Object *regerrorval); /* result of error handler for failed regexp construction */
|
||||||
|
|
||||||
|
@ -129,11 +130,19 @@ READ_ONLY static Scheme_Object *empty_byte_string;
|
||||||
static void
|
static void
|
||||||
regerror(char *s)
|
regerror(char *s)
|
||||||
{
|
{
|
||||||
|
if (!regerrorval) {
|
||||||
if (SCHEME_FALSEP(regerrorproc)) {
|
if (SCHEME_FALSEP(regerrorproc)) {
|
||||||
|
const char *who = regerrorwho;
|
||||||
|
regerrorwho = NULL;
|
||||||
scheme_raise_exn(MZEXN_FAIL_CONTRACT,
|
scheme_raise_exn(MZEXN_FAIL_CONTRACT,
|
||||||
"regexp: %s", s);
|
"%s: %s",
|
||||||
|
(who ? who : "regexp"),
|
||||||
|
s);
|
||||||
} else {
|
} else {
|
||||||
regerrorval = scheme_apply(regerrorproc, 0, NULL);
|
Scheme_Object *a[1];
|
||||||
|
a[0] = scheme_make_utf8_string(s);
|
||||||
|
regerrorval = scheme_apply_multi(regerrorproc, 1, a);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,6 +200,8 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre, Scheme_Object *handler)
|
||||||
regerrorval = NULL;
|
regerrorval = NULL;
|
||||||
regc(MAGIC);
|
regc(MAGIC);
|
||||||
if (reg(0, &flags, 0, 0, PARSE_CASE_SENS | PARSE_SINGLE_LINE | (pcre ? PARSE_PCRE : 0)) == 0) {
|
if (reg(0, &flags, 0, 0, PARSE_CASE_SENS | PARSE_SINGLE_LINE | (pcre ? PARSE_PCRE : 0)) == 0) {
|
||||||
|
if (regerrorval)
|
||||||
|
return NULL;
|
||||||
FAIL("unknown regexp failure");
|
FAIL("unknown regexp failure");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5050,7 +5061,7 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int
|
||||||
|
|
||||||
if (is_byte) {
|
if (is_byte) {
|
||||||
if (!SCHEME_BYTE_STRINGP(argv[0]))
|
if (!SCHEME_BYTE_STRINGP(argv[0]))
|
||||||
scheme_wrong_contract(who, "byte?", 0, argc, argv);
|
scheme_wrong_contract(who, "bytes?", 0, argc, argv);
|
||||||
bs = argv[0];
|
bs = argv[0];
|
||||||
} else {
|
} else {
|
||||||
if (!SCHEME_CHAR_STRINGP(argv[0]))
|
if (!SCHEME_CHAR_STRINGP(argv[0]))
|
||||||
|
@ -5059,14 +5070,11 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc >= 2) {
|
if (argc >= 2) {
|
||||||
if (!SCHEME_PROCP(argv[1])) {
|
if (!scheme_check_proc_arity2(who, 1, 1, argc, argv, 1))
|
||||||
scheme_wrong_contract(who, "(-> any)", 0, argc, argv);
|
scheme_wrong_contract(who, "(or/c #f (string? -> any))", 1, argc, argv);
|
||||||
}
|
|
||||||
scheme_check_proc_arity(who, 0, 1, argc, argv);
|
|
||||||
handler = argv[1];
|
handler = argv[1];
|
||||||
} else {
|
} else
|
||||||
handler = scheme_false;
|
handler = scheme_false;
|
||||||
}
|
|
||||||
|
|
||||||
s = SCHEME_BYTE_STR_VAL(bs);
|
s = SCHEME_BYTE_STR_VAL(bs);
|
||||||
slen = SCHEME_BYTE_STRTAG_VAL(bs);
|
slen = SCHEME_BYTE_STRTAG_VAL(bs);
|
||||||
|
@ -5088,7 +5096,9 @@ static Scheme_Object *do_make_regexp(const char *who, int is_byte, int pcre, int
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
regerrorwho = who;
|
||||||
re = (Scheme_Object *)regcomp(s, 0, slen, pcre, handler);
|
re = (Scheme_Object *)regcomp(s, 0, slen, pcre, handler);
|
||||||
|
regerrorwho = NULL;
|
||||||
|
|
||||||
/* passed a handler and regexp compilation failed */
|
/* passed a handler and regexp compilation failed */
|
||||||
if (!re) {
|
if (!re) {
|
||||||
|
@ -6050,4 +6060,6 @@ void scheme_init_regexp_places()
|
||||||
REGISTER_SO(regstr);
|
REGISTER_SO(regstr);
|
||||||
REGISTER_SO(regbackknown);
|
REGISTER_SO(regbackknown);
|
||||||
REGISTER_SO(regbackdepends);
|
REGISTER_SO(regbackdepends);
|
||||||
|
REGISTER_SO(regerrorproc);
|
||||||
|
REGISTER_SO(regerrorval);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user