allow paths as input to regexp-matching functions
This commit is contained in:
parent
745adb03d1
commit
acb98cef60
|
@ -126,7 +126,12 @@
|
||||||
success-choose failure-k
|
success-choose failure-k
|
||||||
port-success-k port-success-choose port-failure-k
|
port-success-k port-success-choose port-failure-k
|
||||||
need-leftover? peek?)
|
need-leftover? peek?)
|
||||||
(let* ([len (cond [(string? string) (string-length string)]
|
(let* ([string (if (path? string)
|
||||||
|
(if (or (string? pattern) (regexp? pattern))
|
||||||
|
(path->string string)
|
||||||
|
(path->bytes string))
|
||||||
|
string)]
|
||||||
|
[len (cond [(string? string) (string-length string)]
|
||||||
[(bytes? string) (bytes-length string)]
|
[(bytes? string) (bytes-length string)]
|
||||||
[else #f])]
|
[else #f])]
|
||||||
[orig-rx (cond [(bytes? pattern) (byte-regexp pattern)]
|
[orig-rx (cond [(bytes? pattern) (byte-regexp pattern)]
|
||||||
|
@ -143,7 +148,7 @@
|
||||||
(raise-type-error 'name "input port" string))
|
(raise-type-error 'name "input port" string))
|
||||||
(unless (or len (input-port? string))
|
(unless (or len (input-port? string))
|
||||||
(raise-type-error
|
(raise-type-error
|
||||||
'name "string, byte string or input port" string)))
|
'name "string, byte string, path, or input port" string)))
|
||||||
(unless (and (number? start) (exact? start) (integer? start)
|
(unless (and (number? start) (exact? start) (integer? start)
|
||||||
(start . >= . 0))
|
(start . >= . 0))
|
||||||
(raise-type-error 'name "non-negative exact integer" start))
|
(raise-type-error 'name "non-negative exact integer" start))
|
||||||
|
@ -283,7 +288,11 @@
|
||||||
(bytes? pattern)))
|
(bytes? pattern)))
|
||||||
(string->bytes/utf-8 string (char->integer #\?))
|
(string->bytes/utf-8 string (char->integer #\?))
|
||||||
string))
|
string))
|
||||||
(define sub (if (bytes? buf) subbytes substring))
|
(define sub (if (or (bytes? buf) (and (path? string)
|
||||||
|
(or (bytes? pattern)
|
||||||
|
(byte-regexp? pattern))))
|
||||||
|
subbytes
|
||||||
|
substring))
|
||||||
(regexp-loop regexp-split loop start end pattern buf ipre
|
(regexp-loop regexp-split loop start end pattern buf ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start mstart mend ms acc) (cons (sub buf start mstart) acc))
|
(lambda (start mstart mend ms acc) (cons (sub buf start mstart) acc))
|
||||||
|
@ -418,7 +427,11 @@
|
||||||
(bytes? pattern)))
|
(bytes? pattern)))
|
||||||
(string->bytes/utf-8 string (char->integer #\?))
|
(string->bytes/utf-8 string (char->integer #\?))
|
||||||
string))
|
string))
|
||||||
(define sub (if (bytes? buf) subbytes substring))
|
(define sub (if (or (bytes? buf) (and (path? string)
|
||||||
|
(or (bytes? pattern)
|
||||||
|
(byte-regexp? pattern))))
|
||||||
|
subbytes
|
||||||
|
substring))
|
||||||
(regexp-loop regexp-match* loop start end pattern buf ipre
|
(regexp-loop regexp-match* loop start end pattern buf ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start mstart mend ms acc) (cons (sub buf mstart mend) acc))
|
(lambda (start mstart mend ms acc) (cons (sub buf mstart mend) acc))
|
||||||
|
|
|
@ -210,20 +210,26 @@ the start of the input or of a line.}
|
||||||
@section{Regexp Matching}
|
@section{Regexp Matching}
|
||||||
|
|
||||||
@defproc[(regexp-match [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[output-port (or/c output-port? #f) #f]
|
[output-port (or/c output-port? #f) #f]
|
||||||
[input-prefix bytes? #""])
|
[input-prefix bytes? #""])
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(string? input))
|
(or (string? input) (path? input)))
|
||||||
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
||||||
(or/c #f (cons/c bytes? (listof (or/c bytes? #f)))))]{
|
(or/c #f (cons/c bytes? (listof (or/c bytes? #f)))))]{
|
||||||
|
|
||||||
Attempts to match @racket[pattern] (a string, byte string, @tech{regexp
|
Attempts to match @racket[pattern] (a string, byte string,
|
||||||
value}, or byte-@tech{regexp value}) once to a portion of @racket[input]. The
|
@tech{regexp value}, or byte-@tech{regexp value}) once to a portion of
|
||||||
matcher finds a portion of @racket[input] that matches and is closest
|
@racket[input]. The matcher finds a portion of @racket[input] that
|
||||||
to the start of the input (after @racket[start-pos]).
|
matches and is closest to the start of the input (after
|
||||||
|
@racket[start-pos]).
|
||||||
|
|
||||||
|
If @racket[input] is a path, it is converted to a byte string with
|
||||||
|
@racket[path->bytes] if @racket[pattern] is a byte string or a
|
||||||
|
byte-based regexp. Otherwise, @racket[input] is converted to a string
|
||||||
|
with @racket[path->string].
|
||||||
|
|
||||||
The optional @racket[start-pos] and @racket[end-pos] arguments select
|
The optional @racket[start-pos] and @racket[end-pos] arguments select
|
||||||
a portion of @racket[input] for matching; the default is the entire
|
a portion of @racket[input] for matching; the default is the entire
|
||||||
|
@ -320,12 +326,12 @@ bytes. To avoid such interleaving, use @racket[regexp-match-peek]
|
||||||
|
|
||||||
|
|
||||||
@defproc[(regexp-match* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[input-prefix bytes? #""])
|
[input-prefix bytes? #""])
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(string? input))
|
(or (string? input) (path? input)))
|
||||||
(listof string?)
|
(listof string?)
|
||||||
(listof bytes?))]{
|
(listof bytes?))]{
|
||||||
|
|
||||||
|
@ -381,7 +387,7 @@ fails.}
|
||||||
|
|
||||||
|
|
||||||
@defproc[(regexp-match-positions [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match-positions [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[output-port (or/c output-port? #f) #f]
|
[output-port (or/c output-port? #f) #f]
|
||||||
|
@ -414,7 +420,7 @@ positions indicate the number of bytes that were read, including
|
||||||
]}
|
]}
|
||||||
|
|
||||||
@defproc[(regexp-match-positions* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match-positions* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[input-prefix bytes? #""])
|
[input-prefix bytes? #""])
|
||||||
|
@ -430,7 +436,7 @@ like @racket[regexp-match*].
|
||||||
|
|
||||||
|
|
||||||
@defproc[(regexp-match? [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match? [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[output-port (or/c output-port? #f) #f]
|
[output-port (or/c output-port? #f) #f]
|
||||||
|
@ -447,7 +453,7 @@ match succeeds, @racket[#f] otherwise.
|
||||||
|
|
||||||
|
|
||||||
@defproc[(regexp-match-exact? [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match-exact? [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)])
|
[input (or/c string? bytes? path? input-port?)])
|
||||||
boolean?]{
|
boolean?]{
|
||||||
|
|
||||||
Like @racket[regexp-match?], but @racket[#t] is only returned when the
|
Like @racket[regexp-match?], but @racket[#t] is only returned when the
|
||||||
|
@ -553,7 +559,7 @@ Like @racket[regexp-match-peek-positions], but returns multiple matches like
|
||||||
@racket[regexp-match*].}
|
@racket[regexp-match*].}
|
||||||
|
|
||||||
@defproc[(regexp-match/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[output-port (or/c output-port? #f) #f]
|
[output-port (or/c output-port? #f) #f]
|
||||||
|
@ -561,7 +567,7 @@ Like @racket[regexp-match-peek-positions], but returns multiple matches like
|
||||||
[count nonnegative-exact-integer? 1])
|
[count nonnegative-exact-integer? 1])
|
||||||
(values
|
(values
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(string? input))
|
(or/c (string? input) (path? input)))
|
||||||
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
||||||
(or/c #f (cons/c bytes? (listof (or/c bytes? #f)))))
|
(or/c #f (cons/c bytes? (listof (or/c bytes? #f)))))
|
||||||
(or/c #f bytes?))]{
|
(or/c #f bytes?))]{
|
||||||
|
@ -578,7 +584,7 @@ to determine an appropriate value for @racket[count].}
|
||||||
|
|
||||||
@deftogether[(
|
@deftogether[(
|
||||||
@defproc[(regexp-match-positions/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match-positions/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[input-prefix bytes? #""]
|
[input-prefix bytes? #""]
|
||||||
|
|
|
@ -5000,25 +5000,38 @@ static Scheme_Object *gen_compare(char *name, int pos,
|
||||||
rxpos *startp, *maybep, *endp, prefix_len = 0, prefix_offset = 0, minpos;
|
rxpos *startp, *maybep, *endp, prefix_len = 0, prefix_offset = 0, minpos;
|
||||||
int offset = 0, orig_offset, endset, m, was_non_byte, last_bytes_count = last_bytes;
|
int offset = 0, orig_offset, endset, m, was_non_byte, last_bytes_count = last_bytes;
|
||||||
Scheme_Object *iport, *oport = NULL, *startv = NULL, *endv = NULL, *dropped, *unless_evt = NULL;
|
Scheme_Object *iport, *oport = NULL, *startv = NULL, *endv = NULL, *dropped, *unless_evt = NULL;
|
||||||
Scheme_Object *last_bytes_str = scheme_false;
|
Scheme_Object *last_bytes_str = scheme_false, *srcin;
|
||||||
|
|
||||||
if (SCHEME_TYPE(argv[0]) != scheme_regexp_type
|
if (SCHEME_TYPE(argv[0]) != scheme_regexp_type
|
||||||
&& !SCHEME_BYTE_STRINGP(argv[0])
|
&& !SCHEME_BYTE_STRINGP(argv[0])
|
||||||
&& !SCHEME_CHAR_STRINGP(argv[0]))
|
&& !SCHEME_CHAR_STRINGP(argv[0]))
|
||||||
scheme_wrong_type(name, "regexp, byte-regexp, string, or byte string", 0, argc, argv);
|
scheme_wrong_type(name, "regexp, byte-regexp, string, or byte string", 0, argc, argv);
|
||||||
if ((peek || (!SCHEME_BYTE_STRINGP(argv[1]) && !SCHEME_CHAR_STRINGP(argv[1])))
|
if ((peek || (!SCHEME_BYTE_STRINGP(argv[1]) && !SCHEME_CHAR_STRINGP(argv[1])))
|
||||||
&& !SCHEME_INPUT_PORTP(argv[1]))
|
&& !SCHEME_INPUT_PORTP(argv[1])
|
||||||
scheme_wrong_type(name, peek ? "input-port" : "string, byte string, or input port", 1, argc, argv);
|
&& !SCHEME_PATHP(argv[1]))
|
||||||
|
scheme_wrong_type(name, peek ? "input-port" : "string, byte string, path, or input port", 1, argc, argv);
|
||||||
|
|
||||||
if (SCHEME_CHAR_STRINGP(argv[1])) {
|
srcin = argv[1];
|
||||||
|
if (SCHEME_PATHP(srcin)) {
|
||||||
|
if (SCHEME_BYTE_STRINGP(argv[0])
|
||||||
|
|| (SAME_TYPE(SCHEME_TYPE(argv[0]), scheme_regexp_type)
|
||||||
|
&& !(((regexp *)argv[0])->flags & REGEXP_IS_UTF8)))
|
||||||
|
srcin = scheme_make_sized_byte_string(SCHEME_PATH_VAL(srcin),
|
||||||
|
SCHEME_PATH_LEN(srcin),
|
||||||
|
1);
|
||||||
|
else
|
||||||
|
srcin = scheme_path_to_char_string(srcin);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (SCHEME_CHAR_STRINGP(srcin)) {
|
||||||
iport = NULL;
|
iport = NULL;
|
||||||
endset = SCHEME_CHAR_STRLEN_VAL(argv[1]);
|
endset = SCHEME_CHAR_STRLEN_VAL(srcin);
|
||||||
} else if (SCHEME_INPUT_PORTP(argv[1])) {
|
} else if (SCHEME_INPUT_PORTP(srcin)) {
|
||||||
iport = argv[1];
|
iport = srcin;
|
||||||
endset = -2;
|
endset = -2;
|
||||||
} else {
|
} else {
|
||||||
iport = NULL;
|
iport = NULL;
|
||||||
endset = SCHEME_BYTE_STRLEN_VAL(argv[1]);
|
endset = SCHEME_BYTE_STRLEN_VAL(srcin);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc > 2) {
|
if (argc > 2) {
|
||||||
|
@ -5027,7 +5040,7 @@ static Scheme_Object *gen_compare(char *name, int pos,
|
||||||
offset = scheme_extract_index(name, 2, argc, argv, len + 1, 0);
|
offset = scheme_extract_index(name, 2, argc, argv, len + 1, 0);
|
||||||
|
|
||||||
if (!iport && (offset > len)) {
|
if (!iport && (offset > len)) {
|
||||||
scheme_out_of_string_range(name, "offset ", argv[2], argv[1], 0, len);
|
scheme_out_of_string_range(name, "offset ", argv[2], srcin, 0, len);
|
||||||
return NULL;
|
return NULL;
|
||||||
} else if (offset < 0) {
|
} else if (offset < 0) {
|
||||||
/* argument was a bignum */
|
/* argument was a bignum */
|
||||||
|
@ -5052,7 +5065,7 @@ static Scheme_Object *gen_compare(char *name, int pos,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
} else if (endset < offset || endset > len) {
|
} else if (endset < offset || endset > len) {
|
||||||
scheme_out_of_string_range(name, "ending ", argv[3], argv[1], offset, len);
|
scheme_out_of_string_range(name, "ending ", argv[3], srcin, offset, len);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
endv = argv[3];
|
endv = argv[3];
|
||||||
|
@ -5117,16 +5130,16 @@ static Scheme_Object *gen_compare(char *name, int pos,
|
||||||
was_non_byte = 0;
|
was_non_byte = 0;
|
||||||
orig_offset = 0; /* extra offset */
|
orig_offset = 0; /* extra offset */
|
||||||
if (!iport) {
|
if (!iport) {
|
||||||
if (SCHEME_BYTE_STRINGP(argv[1]))
|
if (SCHEME_BYTE_STRINGP(srcin))
|
||||||
full_s = SCHEME_BYTE_STR_VAL(argv[1]);
|
full_s = SCHEME_BYTE_STR_VAL(srcin);
|
||||||
else {
|
else {
|
||||||
/* Extract substring and UTF-8 encode: */
|
/* Extract substring and UTF-8 encode: */
|
||||||
int blen;
|
int blen;
|
||||||
blen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(argv[1]), offset, endset,
|
blen = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(srcin), offset, endset,
|
||||||
NULL, 0,
|
NULL, 0,
|
||||||
0 /* not UTF-16 */);
|
0 /* not UTF-16 */);
|
||||||
full_s = (char *)scheme_malloc_atomic(blen);
|
full_s = (char *)scheme_malloc_atomic(blen);
|
||||||
scheme_utf8_encode(SCHEME_CHAR_STR_VAL(argv[1]), offset, endset,
|
scheme_utf8_encode(SCHEME_CHAR_STR_VAL(srcin), offset, endset,
|
||||||
(unsigned char *)full_s, 0,
|
(unsigned char *)full_s, 0,
|
||||||
0 /* not UTF-16 */);
|
0 /* not UTF-16 */);
|
||||||
orig_offset = offset;
|
orig_offset = offset;
|
||||||
|
@ -5136,7 +5149,7 @@ static Scheme_Object *gen_compare(char *name, int pos,
|
||||||
was_non_byte = 1;
|
was_non_byte = 1;
|
||||||
else {
|
else {
|
||||||
/* Convert orig_offset into encoded bytes */
|
/* Convert orig_offset into encoded bytes */
|
||||||
orig_offset = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(argv[1]), 0, orig_offset,
|
orig_offset = scheme_utf8_encode(SCHEME_CHAR_STR_VAL(srcin), 0, orig_offset,
|
||||||
NULL, 0,
|
NULL, 0,
|
||||||
0);
|
0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user