PCRE working, many benchmarks in place

svn: r4325
This commit is contained in:
Matthew Flatt 2006-09-13 23:05:57 +00:00
parent e0d2e1a8f3
commit 2b5360574c
3 changed files with 96 additions and 93 deletions

View File

@ -26,31 +26,25 @@ exec mzscheme -qu "$0" ${1+"$@"}
(lambda ()
(copy-port (current-input-port)
(current-output-port))))
(printf "test \"~a\", /~a/, \"/~a/\", ~a;\n"
(printf "test \"~a\", qr/~a/, \"/~a/\", ~a;\n"
input rx rx iterations))
'truncate)
(let ([s (open-output-bytes)])
(parameterize ([current-output-port s])
(system "perl test.pl"))
(parameterize ([current-input-port (open-input-string (get-output-string s))])
(read-line)
(* 1000 (read)))))
(define (test-pcre input rx iterations)
(let ([s (open-output-bytes)])
(parameterize (; [current-output-port s]
[current-input-port (open-input-bytes
(bytes-append
(string->bytes/latin-1 (format "/~a/S\n" rx))
input
#"\n"))])
(system (format "pcretest -t -Q -n ~a" iterations)))
(let ([m (regexp-match #rx"Execute time ([0-9.]*)" (get-output-string s))])
(if m
(* (string->number (cadr m)) iterations)
(begin
(printf "~a\n" (get-output-string s))
#f)))))
(let ([pcregexp (dynamic-require "pcre.ss" 'pcregexp)]
[pcregexp-match (dynamic-require "pcre.ss" 'pcregexp-match)])
(let ([rx (pcregexp rx)])
(let ([start (current-inexact-milliseconds)])
(let loop ([n iterations])
(unless (zero? n)
(pcregexp-match rx input)
(loop (sub1 n))))
(- (current-inexact-milliseconds) start)))))
(define (random-letters n)
(parameterize ([current-pseudo-random-generator (make-pseudo-random-generator)])
@ -74,71 +68,79 @@ exec mzscheme -qu "$0" ${1+"$@"}
(define inputs
(add-index
(list
(list (make-bytes 100 (char->integer #\x)) #"(?s:.*)" 100000)
(list (make-bytes 1000 (char->integer #\x)) #"(?s:.*)" 100000)
(list (make-bytes 10000 (char->integer #\x)) #"(?s:.*)" 100000)
(list (make-bytes 100000 (char->integer #\x)) #"(?s:.*)" 100000)
(list (make-bytes 100 (char->integer #\x)) #"(?m:.*)" 100000)
(list (make-bytes 1000 (char->integer #\x)) #"(?m:.*)" 100000)
(list (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000)
(list (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000)
(list (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000)
(list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000)
(list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000)
(list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000)
(list (make-bytes 100 (char->integer #\x)) #"x*" 100000)
(list (make-bytes 1000 (char->integer #\x)) #"x*" 100000)
(list (make-bytes 10000 (char->integer #\x)) #"x*" 10000)
(list (make-bytes 100000 (char->integer #\x)) #"x*" 1000)
(list (make-bytes 100 (char->integer #\x)) #"[xy]*" 100000)
(list (make-bytes 1000 (char->integer #\x)) #"[xy]*" 10000)
(list (make-bytes 10000 (char->integer #\x)) #"[xy]*" 1000)
(list (make-bytes 100000 (char->integer #\x)) #"[xy]*" 100)
(list (make-bytes 100 (char->integer #\x)) #"(?m:(.)*)" 100000)
(list (make-bytes 1000 (char->integer #\x)) #"(?m:(.)*)" 100000)
(list (make-bytes 10000 (char->integer #\x)) #"(?m:(.)*)" 100000)
(list (make-bytes 100000 (char->integer #\x)) #"(?m:(.)*)" 10000)
(list (make-bytes 100 (char->integer #\x)) #"(x)*" 100000)
(list (make-bytes 1000 (char->integer #\x)) #"(x)*" 100000)
(list (make-bytes 10000 (char->integer #\x)) #"(x)*" 10000)
(list (make-bytes 100000 (char->integer #\x)) #"(x)*" 1000)
(list (make-bytes 100 (char->integer #\x)) #"(y|x)*" 10000)
(list (make-bytes 1000 (char->integer #\x)) #"(y|x)*" 1000)
(list (make-bytes 10000 (char->integer #\x)) #"(y|x)*" 100)
(list (make-bytes 100000 (char->integer #\x)) #"(y|x)*" 10)
(list (make-bytes 100 (char->integer #\x)) #"([yz]|x)*" 10000)
(list (make-bytes 1000 (char->integer #\x)) #"([yz]|x)*" 1000)
(list (make-bytes 10000 (char->integer #\x)) #"([yz]|x)*" 100)
(list (make-bytes 100000 (char->integer #\x)) #"([yz]|x)*" 10)
(list (make-bytes 100 (char->integer #\x)) #"([xy])*" 100000)
(list (make-bytes 1000 (char->integer #\x)) #"([xy])*" 10000)
(list (make-bytes 10000 (char->integer #\x)) #"([xy])*" 1000)
(list (make-bytes 100000 (char->integer #\x)) #"([xy])*" 100)
(list (make-bytes 100 (char->integer #\x)) #"((x){2})*" 10000)
(list (make-bytes 1000 (char->integer #\x)) #"((x){2})*" 10000)
(list (make-bytes 10000 (char->integer #\x)) #"((x){2})*" 100)
(list (make-bytes 100000 (char->integer #\x)) #"((x){2})*" 100)
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 100000)
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 10000)
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 1000)
(list (bytes-append (random-letters 100) #"NOPE") #"[a-z]*FOOBARBAZ" 1000000)
(list (bytes-append (random-letters 1000) #"NOPE") #"[a-z]*FOOBARBAZ" 100000)
(list (bytes-append (random-letters 10000) #"NOPE") #"[a-z]*FOOBARBAZ" 10000)
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 100000)
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 10000)
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 1000)
(list (bytes-append (random-letters 100) #"NOPE") #"([a-z])*FOOBARBAZ" 1000000)
(list (bytes-append (random-letters 1000) #"NOPE") #"([a-z])*FOOBARBAZ" 100000)
(list (bytes-append (random-letters 10000) #"NOPE") #"([a-z])*FOOBARBAZ" 10000)
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 10000)
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 1000)
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 10)
(list (bytes-append (random-letters 100) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 1000000)
(list (bytes-append (random-letters 1000) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 100000)
(list (bytes-append (random-letters 10000) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 10000)
(list (bytes-append (random-letters 100) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 1000)
(list (bytes-append (random-letters 1000) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 10)
(list (bytes-append (random-letters 10000) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 10))))
(list (make-bytes 10 (char->integer #\x)) #"." 1000000 '())
(list #"cataract cataract23" #"(cat(a(ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
(list #"cataract cataract23" #"(?:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
(list #"cataract cataract23" #"(?i:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
(list #"From abcd Mon Sep 1 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '())
(list #"From abcd Sep 01 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '())
(list #"foobar is foolish see?" #"foo(?!bar)(.*)" 100000 '())
(list #"foobar crowbar etc" #"(?:(?!foo)...|^.{0,2})bar(.*)" 100000 '())
(list #"now is the time for all good men to come to the aid of the party" #"^((?>\\w+)|(?>\\s+))*$" 30000 '())
(list #"this is not a line with only words and spaces!" #"^((?>\\w+)|(?>\\s+))*$" 30000 '())
(list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB([^AB]+|A.)*B" 10000 '())
(list #"noBABthe AAABquickAAAB brown foxAB" #"noB([^AB]+|A.)*B" 10 '())
(list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB(?:[^AB]+|A.)*B" 10000 '())
(list #"noBABthe AAABquickAAAB brown foxAB" #"noB(?:[^AB]+|A.)*B" 10 '())
(list #"yesbabthe aaabquickaaab frown foxabb" #"(?i:yesB(?:[^AB]+|A.)*B)" 10000 '())
(list #"nobabthe aaabquickaaab frown foxab" #"(?i:noB(?:[^AB]+|A.)*B)" 10 '())
(list #"track1.title:TBlah blah blah" #"([^.]*)\\.([^:]*):[T ]+(.*)" 100000 '())
(list (make-bytes 1000 (char->integer #\a)) #"^(a|x)\\1*a$" 1000 '())
(list (make-bytes 1000 (char->integer #\a)) #"^(a*|x)\\1a$" 1000 '())
(list (make-bytes 1000 (char->integer #\a)) #"^(a*|x)\\1a" 1000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?s:.*)" 100000 '())
(list (make-bytes 10000 (char->integer #\x)) #"(?s:.*)" 100000 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?s:.*)" 100000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?m:.*)" 100000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?m:.*)" 100000 '())
(list (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre))
(list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000 '(pcre))
(list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000 '(pcre))
(list (make-bytes 100 (char->integer #\x)) #"x*" 100000 '())
(list (make-bytes 1000 (char->integer #\x)) #"x*" 100000 '())
(list (make-bytes 10000 (char->integer #\x)) #"x*" 10000 '())
(list (make-bytes 100000 (char->integer #\x)) #"x*" 1000 '())
(list (make-bytes 100 (char->integer #\x)) #"([xy])*" 100000 '())
(list (make-bytes 100 (char->integer #\x)) #"[xy]*" 100000 '())
(list (make-bytes 1000 (char->integer #\x)) #"[xy]*" 10000 '())
(list (make-bytes 10000 (char->integer #\x)) #"[xy]*" 1000 '())
(list (make-bytes 100000 (char->integer #\x)) #"[xy]*" 100 '())
(list (make-bytes 100 (char->integer #\x)) #"(y|x)*" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?:y|x)*" 10000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?:y|x)*" 1000 '())
(list (make-bytes 10000 (char->integer #\x)) #"(?:y|x)*" 100 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?:y|x)*" 10 '(pcre))
(list (make-bytes 100 (char->integer #\x)) #"([yz]|x)*" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?:[yz]|x)*" 10000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?:[yz]|x)*" 1000 '())
(list (make-bytes 10000 (char->integer #\x)) #"(?:[yz]|x)*" 100 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?:[yz]|x)*" 10 '(pcre))
(list (make-bytes 100 (char->integer #\x)) #"((x){2})*" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(x{2})*" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?:x{2})*" 10000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?:x{2})*" 10000 '())
(list (make-bytes 10000 (char->integer #\x)) #"(?:x{2})*" 100 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?:x{2})*" 100 '(pcre))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 100000 '())
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 100000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 10000 '())
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 1000 '())
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARNOPE" 1000000 '())
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 1000000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 100000 '(pcre))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 10000 '(pcre))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 1000 '())
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10 '())
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 1000000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 100000 '(pcre))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 10000 '(pcre))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 10000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl)))))
(define benchmark-names (map (lambda (t)
(string->symbol (car t)))
@ -233,14 +235,16 @@ exec mzscheme -qu "$0" ${1+"$@"}
(define (run who which)
(let ([t (assoc (symbol->string which) inputs)])
(let-values ([(index input rx iterations) (apply values t)])
(let-values ([(index input rx iterations skips) (apply values t)])
#;
(printf "Testing ~a: ~s on ~a iterations of a ~a-byte input\n"
who
rx
iterations
(bytes-length input))
(let ([ms ((cadr (assoc who testers)) input rx iterations)])
(let ([ms (if (memq who skips)
#f
((cadr (assoc who testers)) input rx iterations))])
(rprintf "[~a ~s (~a #f #f) #f]\n"
who
(string->symbol (format "~a.~a/~a/~a" index rx (bytes-length input) iterations))

View File

@ -3,6 +3,9 @@
(require (lib "foreign.ss"))
(unsafe!)
(provide pcregexp
pcregexp-match)
(define pcre-lib (ffi-lib "libpcre"))
(define pcre-compile
@ -11,7 +14,7 @@
-> _pointer)))
(define pcre-study
(get-ffi-obj "pcre_study" pcre-lib
(_fun _pointer _int _pointer
(_fun _pointer _int _bytes
-> _pointer)))
(define pcre-exec
(get-ffi-obj "pcre_exec" pcre-lib
@ -23,13 +26,9 @@
(define (pcregexp s)
(let* ([pat (pcre-compile s 0 random-vector random-vector #f)]
[extra #f #;(pcre-study pat 0 #f)])
[extra (pcre-study pat 0 random-vector)])
(cons pat extra)))
(define (pcregexp-match re bytes)
(pcre-exec (car re) (cdr re) bytes (bytes-length bytes)
0 0 random-vector 10))
(display (pcregexp-match (pcregexp #".*") #"abc")))
0 0 random-vector 10)))

View File

@ -4,12 +4,12 @@ use Time::HiRes qw(time);
sub test ($$$$) {
local ($x, $pattern, $pstr, $times) = @_;
print "$pstr $times iterations on " . length($x) . " bytes:\n";
# print "Trying $pattern $times iterations on " . length($x) . " bytes:\n";
$start = time;
for ($i = 0; $i < $times; $i++) {
$x =~ ${pattern};
}
print (time - $start);
print "\n";
$duration = (time - $start);
print $duration . "\n";
}