From 2b5360574cc3f386cdc3f858b42cb375ecd24f79 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 13 Sep 2006 23:05:57 +0000 Subject: [PATCH] PCRE working, many benchmarks in place svn: r4325 --- collects/tests/mzscheme/benchmarks/rx/auto.ss | 170 +++++++++--------- collects/tests/mzscheme/benchmarks/rx/pcre.ss | 13 +- .../mzscheme/benchmarks/rx/perl_prefix.pl | 6 +- 3 files changed, 96 insertions(+), 93 deletions(-) diff --git a/collects/tests/mzscheme/benchmarks/rx/auto.ss b/collects/tests/mzscheme/benchmarks/rx/auto.ss index 771acbb91f..dd5ac25f0e 100644 --- a/collects/tests/mzscheme/benchmarks/rx/auto.ss +++ b/collects/tests/mzscheme/benchmarks/rx/auto.ss @@ -26,31 +26,25 @@ exec mzscheme -qu "$0" ${1+"$@"} (lambda () (copy-port (current-input-port) (current-output-port)))) - (printf "test \"~a\", /~a/, \"/~a/\", ~a;\n" + (printf "test \"~a\", qr/~a/, \"/~a/\", ~a;\n" input rx rx iterations)) 'truncate) (let ([s (open-output-bytes)]) (parameterize ([current-output-port s]) (system "perl test.pl")) (parameterize ([current-input-port (open-input-string (get-output-string s))]) - (read-line) (* 1000 (read))))) (define (test-pcre input rx iterations) - (let ([s (open-output-bytes)]) - (parameterize (; [current-output-port s] - [current-input-port (open-input-bytes - (bytes-append - (string->bytes/latin-1 (format "/~a/S\n" rx)) - input - #"\n"))]) - (system (format "pcretest -t -Q -n ~a" iterations))) - (let ([m (regexp-match #rx"Execute time ([0-9.]*)" (get-output-string s))]) - (if m - (* (string->number (cadr m)) iterations) - (begin - (printf "~a\n" (get-output-string s)) - #f))))) + (let ([pcregexp (dynamic-require "pcre.ss" 'pcregexp)] + [pcregexp-match (dynamic-require "pcre.ss" 'pcregexp-match)]) + (let ([rx (pcregexp rx)]) + (let ([start (current-inexact-milliseconds)]) + (let loop ([n iterations]) + (unless (zero? n) + (pcregexp-match rx input) + (loop (sub1 n)))) + (- (current-inexact-milliseconds) start))))) (define (random-letters n) (parameterize ([current-pseudo-random-generator (make-pseudo-random-generator)]) @@ -74,71 +68,79 @@ exec mzscheme -qu "$0" ${1+"$@"} (define inputs (add-index (list - (list (make-bytes 100 (char->integer #\x)) #"(?s:.*)" 100000) - (list (make-bytes 1000 (char->integer #\x)) #"(?s:.*)" 100000) - (list (make-bytes 10000 (char->integer #\x)) #"(?s:.*)" 100000) - (list (make-bytes 100000 (char->integer #\x)) #"(?s:.*)" 100000) - (list (make-bytes 100 (char->integer #\x)) #"(?m:.*)" 100000) - (list (make-bytes 1000 (char->integer #\x)) #"(?m:.*)" 100000) - (list (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000) - (list (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000) - (list (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000) - (list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000) - (list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000) - (list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000) - (list (make-bytes 100 (char->integer #\x)) #"x*" 100000) - (list (make-bytes 1000 (char->integer #\x)) #"x*" 100000) - (list (make-bytes 10000 (char->integer #\x)) #"x*" 10000) - (list (make-bytes 100000 (char->integer #\x)) #"x*" 1000) - (list (make-bytes 100 (char->integer #\x)) #"[xy]*" 100000) - (list (make-bytes 1000 (char->integer #\x)) #"[xy]*" 10000) - (list (make-bytes 10000 (char->integer #\x)) #"[xy]*" 1000) - (list (make-bytes 100000 (char->integer #\x)) #"[xy]*" 100) - (list (make-bytes 100 (char->integer #\x)) #"(?m:(.)*)" 100000) - (list (make-bytes 1000 (char->integer #\x)) #"(?m:(.)*)" 100000) - (list (make-bytes 10000 (char->integer #\x)) #"(?m:(.)*)" 100000) - (list (make-bytes 100000 (char->integer #\x)) #"(?m:(.)*)" 10000) - (list (make-bytes 100 (char->integer #\x)) #"(x)*" 100000) - (list (make-bytes 1000 (char->integer #\x)) #"(x)*" 100000) - (list (make-bytes 10000 (char->integer #\x)) #"(x)*" 10000) - (list (make-bytes 100000 (char->integer #\x)) #"(x)*" 1000) - (list (make-bytes 100 (char->integer #\x)) #"(y|x)*" 10000) - (list (make-bytes 1000 (char->integer #\x)) #"(y|x)*" 1000) - (list (make-bytes 10000 (char->integer #\x)) #"(y|x)*" 100) - (list (make-bytes 100000 (char->integer #\x)) #"(y|x)*" 10) - (list (make-bytes 100 (char->integer #\x)) #"([yz]|x)*" 10000) - (list (make-bytes 1000 (char->integer #\x)) #"([yz]|x)*" 1000) - (list (make-bytes 10000 (char->integer #\x)) #"([yz]|x)*" 100) - (list (make-bytes 100000 (char->integer #\x)) #"([yz]|x)*" 10) - (list (make-bytes 100 (char->integer #\x)) #"([xy])*" 100000) - (list (make-bytes 1000 (char->integer #\x)) #"([xy])*" 10000) - (list (make-bytes 10000 (char->integer #\x)) #"([xy])*" 1000) - (list (make-bytes 100000 (char->integer #\x)) #"([xy])*" 100) - (list (make-bytes 100 (char->integer #\x)) #"((x){2})*" 10000) - (list (make-bytes 1000 (char->integer #\x)) #"((x){2})*" 10000) - (list (make-bytes 10000 (char->integer #\x)) #"((x){2})*" 100) - (list (make-bytes 100000 (char->integer #\x)) #"((x){2})*" 100) - (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 100000) - (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 10000) - (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 1000) - (list (bytes-append (random-letters 100) #"NOPE") #"[a-z]*FOOBARBAZ" 1000000) - (list (bytes-append (random-letters 1000) #"NOPE") #"[a-z]*FOOBARBAZ" 100000) - (list (bytes-append (random-letters 10000) #"NOPE") #"[a-z]*FOOBARBAZ" 10000) - (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 100000) - (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 10000) - (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 1000) - (list (bytes-append (random-letters 100) #"NOPE") #"([a-z])*FOOBARBAZ" 1000000) - (list (bytes-append (random-letters 1000) #"NOPE") #"([a-z])*FOOBARBAZ" 100000) - (list (bytes-append (random-letters 10000) #"NOPE") #"([a-z])*FOOBARBAZ" 10000) - (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 10000) - (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 1000) - (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"([a-z]|ab)*FOOBARBAZ" 10) - (list (bytes-append (random-letters 100) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 1000000) - (list (bytes-append (random-letters 1000) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 100000) - (list (bytes-append (random-letters 10000) #"NOPE") #"([a-z]|ab)*FOOBARBAZ" 10000) - (list (bytes-append (random-letters 100) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 1000) - (list (bytes-append (random-letters 1000) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 10) - (list (bytes-append (random-letters 10000) #"NOPE") #"(?i:[a-z]*FOOBARBAZ)" 10)))) + (list (make-bytes 10 (char->integer #\x)) #"." 1000000 '()) + (list #"cataract cataract23" #"(cat(a(ract|tonic)|erpillar)) \\1()2(3)" 100000 '()) + (list #"cataract cataract23" #"(?:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '()) + (list #"cataract cataract23" #"(?i:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '()) + (list #"From abcd Mon Sep 1 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '()) + (list #"From abcd Sep 01 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '()) + (list #"foobar is foolish see?" #"foo(?!bar)(.*)" 100000 '()) + (list #"foobar crowbar etc" #"(?:(?!foo)...|^.{0,2})bar(.*)" 100000 '()) + (list #"now is the time for all good men to come to the aid of the party" #"^((?>\\w+)|(?>\\s+))*$" 30000 '()) + (list #"this is not a line with only words and spaces!" #"^((?>\\w+)|(?>\\s+))*$" 30000 '()) + (list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB([^AB]+|A.)*B" 10000 '()) + (list #"noBABthe AAABquickAAAB brown foxAB" #"noB([^AB]+|A.)*B" 10 '()) + (list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB(?:[^AB]+|A.)*B" 10000 '()) + (list #"noBABthe AAABquickAAAB brown foxAB" #"noB(?:[^AB]+|A.)*B" 10 '()) + (list #"yesbabthe aaabquickaaab frown foxabb" #"(?i:yesB(?:[^AB]+|A.)*B)" 10000 '()) + (list #"nobabthe aaabquickaaab frown foxab" #"(?i:noB(?:[^AB]+|A.)*B)" 10 '()) + (list #"track1.title:TBlah blah blah" #"([^.]*)\\.([^:]*):[T ]+(.*)" 100000 '()) + (list (make-bytes 1000 (char->integer #\a)) #"^(a|x)\\1*a$" 1000 '()) + (list (make-bytes 1000 (char->integer #\a)) #"^(a*|x)\\1a$" 1000 '()) + (list (make-bytes 1000 (char->integer #\a)) #"^(a*|x)\\1a" 1000 '()) + (list (make-bytes 1000 (char->integer #\x)) #"(?s:.*)" 100000 '()) + (list (make-bytes 10000 (char->integer #\x)) #"(?s:.*)" 100000 '()) + (list (make-bytes 100000 (char->integer #\x)) #"(?s:.*)" 100000 '()) + (list (make-bytes 100 (char->integer #\x)) #"(?m:.*)" 100000 '()) + (list (make-bytes 1000 (char->integer #\x)) #"(?m:.*)" 100000 '()) + (list (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000 '()) + (list (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000 '()) + (list (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000 '()) + (list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre)) + (list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000 '(pcre)) + (list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000 '(pcre)) + (list (make-bytes 100 (char->integer #\x)) #"x*" 100000 '()) + (list (make-bytes 1000 (char->integer #\x)) #"x*" 100000 '()) + (list (make-bytes 10000 (char->integer #\x)) #"x*" 10000 '()) + (list (make-bytes 100000 (char->integer #\x)) #"x*" 1000 '()) + (list (make-bytes 100 (char->integer #\x)) #"([xy])*" 100000 '()) + (list (make-bytes 100 (char->integer #\x)) #"[xy]*" 100000 '()) + (list (make-bytes 1000 (char->integer #\x)) #"[xy]*" 10000 '()) + (list (make-bytes 10000 (char->integer #\x)) #"[xy]*" 1000 '()) + (list (make-bytes 100000 (char->integer #\x)) #"[xy]*" 100 '()) + (list (make-bytes 100 (char->integer #\x)) #"(y|x)*" 10000 '()) + (list (make-bytes 100 (char->integer #\x)) #"(?:y|x)*" 10000 '()) + (list (make-bytes 1000 (char->integer #\x)) #"(?:y|x)*" 1000 '()) + (list (make-bytes 10000 (char->integer #\x)) #"(?:y|x)*" 100 '()) + (list (make-bytes 100000 (char->integer #\x)) #"(?:y|x)*" 10 '(pcre)) + (list (make-bytes 100 (char->integer #\x)) #"([yz]|x)*" 10000 '()) + (list (make-bytes 100 (char->integer #\x)) #"(?:[yz]|x)*" 10000 '()) + (list (make-bytes 1000 (char->integer #\x)) #"(?:[yz]|x)*" 1000 '()) + (list (make-bytes 10000 (char->integer #\x)) #"(?:[yz]|x)*" 100 '()) + (list (make-bytes 100000 (char->integer #\x)) #"(?:[yz]|x)*" 10 '(pcre)) + (list (make-bytes 100 (char->integer #\x)) #"((x){2})*" 10000 '()) + (list (make-bytes 100 (char->integer #\x)) #"(x{2})*" 10000 '()) + (list (make-bytes 100 (char->integer #\x)) #"(?:x{2})*" 10000 '()) + (list (make-bytes 1000 (char->integer #\x)) #"(?:x{2})*" 10000 '()) + (list (make-bytes 10000 (char->integer #\x)) #"(?:x{2})*" 100 '()) + (list (make-bytes 100000 (char->integer #\x)) #"(?:x{2})*" 100 '(pcre)) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 100000 '()) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 100000 '()) + (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 10000 '()) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 1000 '()) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARNOPE" 1000000 '()) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 1000000 '()) + (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 100000 '(pcre)) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 10000 '(pcre)) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10000 '()) + (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 1000 '()) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10 '()) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 1000000 '()) + (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 100000 '(pcre)) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 10000 '(pcre)) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 10000 '()) + (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl)) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl))))) (define benchmark-names (map (lambda (t) (string->symbol (car t))) @@ -233,14 +235,16 @@ exec mzscheme -qu "$0" ${1+"$@"} (define (run who which) (let ([t (assoc (symbol->string which) inputs)]) - (let-values ([(index input rx iterations) (apply values t)]) + (let-values ([(index input rx iterations skips) (apply values t)]) #; (printf "Testing ~a: ~s on ~a iterations of a ~a-byte input\n" who rx iterations (bytes-length input)) - (let ([ms ((cadr (assoc who testers)) input rx iterations)]) + (let ([ms (if (memq who skips) + #f + ((cadr (assoc who testers)) input rx iterations))]) (rprintf "[~a ~s (~a #f #f) #f]\n" who (string->symbol (format "~a.~a/~a/~a" index rx (bytes-length input) iterations)) diff --git a/collects/tests/mzscheme/benchmarks/rx/pcre.ss b/collects/tests/mzscheme/benchmarks/rx/pcre.ss index 8fd95d87f3..46d1019f82 100644 --- a/collects/tests/mzscheme/benchmarks/rx/pcre.ss +++ b/collects/tests/mzscheme/benchmarks/rx/pcre.ss @@ -3,6 +3,9 @@ (require (lib "foreign.ss")) (unsafe!) + (provide pcregexp + pcregexp-match) + (define pcre-lib (ffi-lib "libpcre")) (define pcre-compile @@ -11,7 +14,7 @@ -> _pointer))) (define pcre-study (get-ffi-obj "pcre_study" pcre-lib - (_fun _pointer _int _pointer + (_fun _pointer _int _bytes -> _pointer))) (define pcre-exec (get-ffi-obj "pcre_exec" pcre-lib @@ -23,13 +26,9 @@ (define (pcregexp s) (let* ([pat (pcre-compile s 0 random-vector random-vector #f)] - [extra #f #;(pcre-study pat 0 #f)]) + [extra (pcre-study pat 0 random-vector)]) (cons pat extra))) (define (pcregexp-match re bytes) (pcre-exec (car re) (cdr re) bytes (bytes-length bytes) - 0 0 random-vector 10)) - - (display (pcregexp-match (pcregexp #".*") #"abc"))) - - + 0 0 random-vector 10))) diff --git a/collects/tests/mzscheme/benchmarks/rx/perl_prefix.pl b/collects/tests/mzscheme/benchmarks/rx/perl_prefix.pl index 86a135abdd..c7dde89ec3 100644 --- a/collects/tests/mzscheme/benchmarks/rx/perl_prefix.pl +++ b/collects/tests/mzscheme/benchmarks/rx/perl_prefix.pl @@ -4,12 +4,12 @@ use Time::HiRes qw(time); sub test ($$$$) { local ($x, $pattern, $pstr, $times) = @_; - print "$pstr $times iterations on " . length($x) . " bytes:\n"; + # print "Trying $pattern $times iterations on " . length($x) . " bytes:\n"; $start = time; for ($i = 0; $i < $times; $i++) { $x =~ ${pattern}; } - print (time - $start); - print "\n"; + $duration = (time - $start); + print $duration . "\n"; }