PCRE working, many benchmarks in place
svn: r4325
This commit is contained in:
parent
e0d2e1a8f3
commit
2b5360574c
|
@ -26,31 +26,25 @@ exec mzscheme -qu "$0" ${1+"$@"}
|
|||
(lambda ()
|
||||
(copy-port (current-input-port)
|
||||
(current-output-port))))
|
||||
(printf "test \"~a\", /~a/, \"/~a/\", ~a;\n"
|
||||
(printf "test \"~a\", qr/~a/, \"/~a/\", ~a;\n"
|
||||
input rx rx iterations))
|
||||
'truncate)
|
||||
(let ([s (open-output-bytes)])
|
||||
(parameterize ([current-output-port s])
|
||||
(system "perl test.pl"))
|
||||
(parameterize ([current-input-port (open-input-string (get-output-string s))])
|
||||
(read-line)
|
||||
(* 1000 (read)))))
|
||||
|
||||
(define (test-pcre input rx iterations)
|
||||
(let ([s (open-output-bytes)])
|
||||
(parameterize (; [current-output-port s]
|
||||
[current-input-port (open-input-bytes
|
||||
(bytes-append
|
||||
(string->bytes/latin-1 (format "/~a/S\n" rx))
|
||||
input
|
||||
#"\n"))])
|
||||
(system (format "pcretest -t -Q -n ~a" iterations)))
|
||||
(let ([m (regexp-match #rx"Execute time ([0-9.]*)" (get-output-string s))])
|
||||
(if m
|
||||
(* (string->number (cadr m)) iterations)
|
||||
(begin
|
||||
(printf "~a\n" (get-output-string s))
|
||||
#f)))))
|
||||
(let ([pcregexp (dynamic-require "pcre.ss" 'pcregexp)]
|
||||
[pcregexp-match (dynamic-require "pcre.ss" 'pcregexp-match)])
|
||||
(let ([rx (pcregexp rx)])
|
||||
(let ([start (current-inexact-milliseconds)])
|
||||
(let loop ([n iterations])
|
||||
(unless (zero? n)
|
||||
(pcregexp-match rx input)
|
||||
(loop (sub1 n))))
|
||||
(- (current-inexact-milliseconds) start)))))
|
||||
|
||||
(define (random-letters n)
|
||||
(parameterize ([current-pseudo-random-generator (make-pseudo-random-generator)])
|
||||
|
@ -74,71 +68,79 @@ exec mzscheme -qu "$0" ${1+"$@"}
|
|||
(define inputs
|
||||
(add-index
|
||||
(list
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?s:.*)" 100000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?s:.*)" 100000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?s:.*)" 100000)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?s:.*)" 100000)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?m:.*)" 100000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?m:.*)" 100000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"x*" 100000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"x*" 100000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"x*" 10000)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"x*" 1000)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"[xy]*" 100000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"[xy]*" 10000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"[xy]*" 1000)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"[xy]*" 100)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?m:(.)*)" 100000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?m:(.)*)" 100000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?m:(.)*)" 100000)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?m:(.)*)" 10000)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(x)*" 100000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(x)*" 100000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(x)*" 10000)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(x)*" 1000)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(y|x)*" 10000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(y|x)*" 1000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(y|x)*" 100)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(y|x)*" 10)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"([yz]|x)*" 10000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"([yz]|x)*" 1000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"([yz]|x)*" 100)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"([yz]|x)*" 10)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"([xy])*" 100000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"([xy])*" 10000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"([xy])*" 1000)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"([xy])*" 100)
|
||||
(list (make-bytes 100 (char->integer #\x)) #"((x){2})*" 10000)
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"((x){2})*" 10000)
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"((x){2})*" 100)
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"((x){2})*" 100)
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 100000)
|
||||
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 10000)
|
||||
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 1000)
|
||||
(list (bytes-append (random-letters 100) #"NOPE") #"[a-z]*FOOBARBAZ" 1000000)
|
||||
(list (bytes-append (random-letters 1000) #"NOPE") #"[a-z]*FOOBARBAZ" 100000)
|
||||
(list (bytes-append (random-letters 10000) #"NOPE") #"[a-z]*FOOBARBAZ" 10000)
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 100000)
|
||||
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 10000)
|
||||
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 1000)
|
||||
(list (bytes-append (random-letters 100) #"NOPE") #"([a-z])*FOOBARBAZ" 1000000)
|
||||
(list (bytes-append (random-letters 1000) #"NOPE") #"([a-z])*FOOBARBAZ" 100000)
|
||||
(list (bytes-append (random-letters 10000) #"NOPE") #"([a-z])*FOOBARBAZ" 10000)
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 10000)
|
||||
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 1000)
|
||||
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 10)
|
||||
(list (bytes-append (random-letters 100) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 1000000)
|
||||
(list (bytes-append (random-letters 1000) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 100000)
|
||||
(list (bytes-append (random-letters 10000) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 10000)
|
||||
(list (bytes-append (random-letters 100) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 1000)
|
||||
(list (bytes-append (random-letters 1000) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 10)
|
||||
(list (bytes-append (random-letters 10000) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 10))))
|
||||
(list (make-bytes 10 (char->integer #\x)) #"." 1000000 '())
|
||||
(list #"cataract cataract23" #"(cat(a(ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
|
||||
(list #"cataract cataract23" #"(?:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
|
||||
(list #"cataract cataract23" #"(?i:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
|
||||
(list #"From abcd Mon Sep 1 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '())
|
||||
(list #"From abcd Sep 01 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '())
|
||||
(list #"foobar is foolish see?" #"foo(?!bar)(.*)" 100000 '())
|
||||
(list #"foobar crowbar etc" #"(?:(?!foo)...|^.{0,2})bar(.*)" 100000 '())
|
||||
(list #"now is the time for all good men to come to the aid of the party" #"^((?>\\w+)|(?>\\s+))*$" 30000 '())
|
||||
(list #"this is not a line with only words and spaces!" #"^((?>\\w+)|(?>\\s+))*$" 30000 '())
|
||||
(list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB([^AB]+|A.)*B" 10000 '())
|
||||
(list #"noBABthe AAABquickAAAB brown foxAB" #"noB([^AB]+|A.)*B" 10 '())
|
||||
(list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB(?:[^AB]+|A.)*B" 10000 '())
|
||||
(list #"noBABthe AAABquickAAAB brown foxAB" #"noB(?:[^AB]+|A.)*B" 10 '())
|
||||
(list #"yesbabthe aaabquickaaab frown foxabb" #"(?i:yesB(?:[^AB]+|A.)*B)" 10000 '())
|
||||
(list #"nobabthe aaabquickaaab frown foxab" #"(?i:noB(?:[^AB]+|A.)*B)" 10 '())
|
||||
(list #"track1.title:TBlah blah blah" #"([^.]*)\\.([^:]*):[T ]+(.*)" 100000 '())
|
||||
(list (make-bytes 1000 (char->integer #\a)) #"^(a|x)\\1*a$" 1000 '())
|
||||
(list (make-bytes 1000 (char->integer #\a)) #"^(a*|x)\\1a$" 1000 '())
|
||||
(list (make-bytes 1000 (char->integer #\a)) #"^(a*|x)\\1a" 1000 '())
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?s:.*)" 100000 '())
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?s:.*)" 100000 '())
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?s:.*)" 100000 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?m:.*)" 100000 '())
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?m:.*)" 100000 '())
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000 '())
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000 '())
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre))
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000 '(pcre))
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000 '(pcre))
|
||||
(list (make-bytes 100 (char->integer #\x)) #"x*" 100000 '())
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"x*" 100000 '())
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"x*" 10000 '())
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"x*" 1000 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"([xy])*" 100000 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"[xy]*" 100000 '())
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"[xy]*" 10000 '())
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"[xy]*" 1000 '())
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"[xy]*" 100 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(y|x)*" 10000 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?:y|x)*" 10000 '())
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?:y|x)*" 1000 '())
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?:y|x)*" 100 '())
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?:y|x)*" 10 '(pcre))
|
||||
(list (make-bytes 100 (char->integer #\x)) #"([yz]|x)*" 10000 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?:[yz]|x)*" 10000 '())
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?:[yz]|x)*" 1000 '())
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?:[yz]|x)*" 100 '())
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?:[yz]|x)*" 10 '(pcre))
|
||||
(list (make-bytes 100 (char->integer #\x)) #"((x){2})*" 10000 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(x{2})*" 10000 '())
|
||||
(list (make-bytes 100 (char->integer #\x)) #"(?:x{2})*" 10000 '())
|
||||
(list (make-bytes 1000 (char->integer #\x)) #"(?:x{2})*" 10000 '())
|
||||
(list (make-bytes 10000 (char->integer #\x)) #"(?:x{2})*" 100 '())
|
||||
(list (make-bytes 100000 (char->integer #\x)) #"(?:x{2})*" 100 '(pcre))
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 100000 '())
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 100000 '())
|
||||
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 10000 '())
|
||||
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 1000 '())
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARNOPE" 1000000 '())
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 1000000 '())
|
||||
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 100000 '(pcre))
|
||||
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 10000 '(pcre))
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10000 '())
|
||||
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 1000 '())
|
||||
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10 '())
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 1000000 '())
|
||||
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 100000 '(pcre))
|
||||
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 10000 '(pcre))
|
||||
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 10000 '())
|
||||
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl))
|
||||
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl)))))
|
||||
|
||||
(define benchmark-names (map (lambda (t)
|
||||
(string->symbol (car t)))
|
||||
|
@ -233,14 +235,16 @@ exec mzscheme -qu "$0" ${1+"$@"}
|
|||
|
||||
(define (run who which)
|
||||
(let ([t (assoc (symbol->string which) inputs)])
|
||||
(let-values ([(index input rx iterations) (apply values t)])
|
||||
(let-values ([(index input rx iterations skips) (apply values t)])
|
||||
#;
|
||||
(printf "Testing ~a: ~s on ~a iterations of a ~a-byte input\n"
|
||||
who
|
||||
rx
|
||||
iterations
|
||||
(bytes-length input))
|
||||
(let ([ms ((cadr (assoc who testers)) input rx iterations)])
|
||||
(let ([ms (if (memq who skips)
|
||||
#f
|
||||
((cadr (assoc who testers)) input rx iterations))])
|
||||
(rprintf "[~a ~s (~a #f #f) #f]\n"
|
||||
who
|
||||
(string->symbol (format "~a.~a/~a/~a" index rx (bytes-length input) iterations))
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
(require (lib "foreign.ss"))
|
||||
(unsafe!)
|
||||
|
||||
(provide pcregexp
|
||||
pcregexp-match)
|
||||
|
||||
(define pcre-lib (ffi-lib "libpcre"))
|
||||
|
||||
(define pcre-compile
|
||||
|
@ -11,7 +14,7 @@
|
|||
-> _pointer)))
|
||||
(define pcre-study
|
||||
(get-ffi-obj "pcre_study" pcre-lib
|
||||
(_fun _pointer _int _pointer
|
||||
(_fun _pointer _int _bytes
|
||||
-> _pointer)))
|
||||
(define pcre-exec
|
||||
(get-ffi-obj "pcre_exec" pcre-lib
|
||||
|
@ -23,13 +26,9 @@
|
|||
|
||||
(define (pcregexp s)
|
||||
(let* ([pat (pcre-compile s 0 random-vector random-vector #f)]
|
||||
[extra #f #;(pcre-study pat 0 #f)])
|
||||
[extra (pcre-study pat 0 random-vector)])
|
||||
(cons pat extra)))
|
||||
|
||||
(define (pcregexp-match re bytes)
|
||||
(pcre-exec (car re) (cdr re) bytes (bytes-length bytes)
|
||||
0 0 random-vector 10))
|
||||
|
||||
(display (pcregexp-match (pcregexp #".*") #"abc")))
|
||||
|
||||
|
||||
0 0 random-vector 10)))
|
||||
|
|
|
@ -4,12 +4,12 @@ use Time::HiRes qw(time);
|
|||
sub test ($$$$) {
|
||||
local ($x, $pattern, $pstr, $times) = @_;
|
||||
|
||||
print "$pstr $times iterations on " . length($x) . " bytes:\n";
|
||||
# print "Trying $pattern $times iterations on " . length($x) . " bytes:\n";
|
||||
|
||||
$start = time;
|
||||
for ($i = 0; $i < $times; $i++) {
|
||||
$x =~ ${pattern};
|
||||
}
|
||||
print (time - $start);
|
||||
print "\n";
|
||||
$duration = (time - $start);
|
||||
print $duration . "\n";
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user