refine rx benchmark suite

svn: r4356
This commit is contained in:
Matthew Flatt 2006-09-17 05:08:27 +00:00
parent 093919aaac
commit 335ead2dae

View File

@ -10,27 +10,35 @@ exec mzscheme -qu "$0" ${1+"$@"}
(lib "date.ss")
"../common/cmdline.ss")
(define (test-mz input rx iterations)
(let ([start (current-inexact-milliseconds)])
(let loop ([n iterations])
(unless (zero? n)
(regexp-match-positions rx input)
(loop (sub1 n))))
(- (current-inexact-milliseconds) start)))
(define (test-mzscheme input rx iterations)
(let ([rx (byte-pregexp rx)])
(collect-garbage)
(let ([start (current-inexact-milliseconds)])
(let loop ([n iterations])
(unless (zero? n)
(regexp-match-positions rx input)
(loop (sub1 n))))
(- (current-inexact-milliseconds) start))))
(test-mz input (byte-pregexp rx) iterations))
(define (test-mzscheme-j input rx iterations)
(test-mz input
(parameterize ([eval-jit-enabled #f])
(byte-pregexp rx))
iterations))
(define (test-mzunicode input rx iterations)
;; Note: input is left as bytes, to avoid overhead of UTF-8
;; conversion.
(let ([rx (pregexp (bytes->string/latin-1 rx))])
(collect-garbage)
(let ([start (current-inexact-milliseconds)])
(let loop ([n iterations])
(unless (zero? n)
(regexp-match-positions rx input)
(loop (sub1 n))))
(- (current-inexact-milliseconds) start))))
(test-mz input
(pregexp (bytes->string/latin-1 rx))
iterations))
(define (test-mzunicode-j input rx iterations)
(test-mz input
(parameterize ([eval-jit-enabled #f])
(pregexp (bytes->string/latin-1 rx)))
iterations))
(define (encode-newlines rx)
(regexp-replace* #rx#"\n" rx #"\\\\n"))
@ -119,17 +127,21 @@ exec mzscheme -qu "$0" ${1+"$@"}
(list 'simple #"track1.title:TBlah blah blah" #"([^.]*)\\.([^:]*):[T ]+(.*)" 100000 '())
(list 'far (bytes-append (make-bytes 10000 (char->integer #\a)) #"z") #"z" 10000 '())
(list 'farzq (bytes-append (make-bytes 1000 (char->integer #\a)) #"z") #"[zq]" 10000 '())
(list 'farzqalt (bytes-append (make-bytes 1000 (char->integer #\a)) #"z") #"z|q" 10000 '())
(list 'farzqalt (bytes-append (make-bytes 1000 (char->integer #\a)) #"z") #"z|q" 100000 '())
(list 'fard (bytes-append (make-bytes 1000 (char->integer #\a)) #"0") #"\\d" 10000 '())
(list 'farw (bytes-append (make-bytes 1000 (char->integer #\space)) #"_") #"\\w" 10000 '())
(list 'farnz (bytes-append (make-bytes 10000 (char->integer #\a)) #"z") #"(?:)z" 1000 '())
(list 'backtrack (bytes-append (make-bytes 300 (char->integer #\a)) #"zay") #"a*y" 1000 '())
(list 'backtrack (bytes-append (make-bytes 300 (char->integer #\a)) #"zay") #"a*?z" 10000 '())
(list 'backtrack (bytes-append (make-bytes 300 (char->integer #\a)) #"zay") #"a*?y" 1000 '())
(list 'alts #"cataract cataract23" #"(cat(a(ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
(list 'alts #"cataract cataract23" #"(cat(?:a(?:ract|tonic)|erpillar)) \\1(?:)23" 100000 '())
(list 'alts #"caterpillar caterpillar23" #"(cat(?:a(?:ract|tonic)|erpillar)) \\1(?:)23" 100000 '())
(list 'alts #"cataract cataract23" #"(?i:(cat(?:a(?:ract|tonic)|erpillar))) \\1(?:)23" 100000 '())
(list 'alts #"cataract23" #"(cat(a(ract|tonic)|erpillar))2(3)" 100000 '())
(list 'alts #"cataract23" #"(?:cat(?:a(?:ract|tonic)|erpillar))23" 100000 '())
(list 'alts #"caterpillar23" #"(?:cat(?:a(?:ract|tonic)|erpillar))23" 100000 '())
(list 'alts #"cataract23" #"(?i:(?:cat(?:a(?:ract|tonic)|erpillar)))23" 100000 '())
(list 'backref #"cataract cataract23" #"(cat(a(ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
(list 'backref #"cataract cataract23" #"(cat(?:a(?:ract|tonic)|erpillar)) \\1(?:)23" 100000 '())
(list 'backref #"caterpillar caterpillar23" #"(cat(?:a(?:ract|tonic)|erpillar)) \\1(?:)23" 100000 '())
(list 'backref #"cataract cataract23" #"(?i:(cat(?:a(?:ract|tonic)|erpillar))) \\1(?:)23" 100000 '())
(list 'digits #"From abcd Mon Sep 1 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '())
(list 'digits #"From abcd Sep 01 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '())
(list 'lines (bytes-append (make-bytes 100 (char->integer #\x)) #"\na\nb\nc\nxxxxxxxx") #"(?m:^a(?:$)[^a]^b(?:$)[^a]^c(?:$))" 10000 '())
@ -146,22 +158,24 @@ exec mzscheme -qu "$0" ${1+"$@"}
(list 'escape #"noBABthe AAABquickAAAB brown foxAB" #"noB([^AB]+|A.)*B" 10 '())
(list 'escape #"yesBABthe AAABquickAAAB brown foxABB" #"yesB(?:[^AB]+|A.)*B" 10000 '())
(list 'escape #"noBABthe AAABquickAAAB brown foxAB" #"noB(?:[^AB]+|A.)*B" 10 '())
(list 'escape #"yesbabthe aaabquickaaab frown foxabb" #"(?i:yesB(?:[^AB]+|A.)*B)" 10000 '())
(list 'escape #"yesbabthe aaabquickaaab frown foxabb" #"(?i:yesB(?:[^AB]+|A.)*B)" 100000 '())
(list 'escape #"nobabthe aaabquickaaab frown foxab" #"(?i:noB(?:[^AB]+|A.)*B)" 10 '())
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x|a)\\1*x$" 1000 '())
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x*|a)\\1x$" 1000 '())
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x*|a)\\1x" 1000 '())
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x*?|a)\\1x$" 1000 '())
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x*|a)\\1x" 10000 '())
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x{499})\\1x" 10000 '())
(list 'stress-any (make-bytes 1000 (char->integer #\x)) #"(?s:.*)" 100000 '())
(list 'stress-any (make-bytes 10000 (char->integer #\x)) #"(?s:.*)" 100000 '(mzunicode))
(list 'stress-any (make-bytes 100000 (char->integer #\x)) #"(?s:.*)" 100000 '(mzunicode))
(list 'stress-any (make-bytes 10000 (char->integer #\x)) #"(?s:.*)" 100000 '(mzunicode mzunicode-j))
(list 'stress-any (make-bytes 100000 (char->integer #\x)) #"(?s:.*)" 100000 '(mzunicode mzunicode-j))
(list 'stress-nonlf (make-bytes 100 (char->integer #\x)) #"(?m:.*)" 100000 '())
(list 'stress-nonlf (make-bytes 1000 (char->integer #\x)) #"(?m:.*)" 100000 '(mzunicode))
(list 'stress-nonlf (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000 '(mzunicode))
(list 'stress-nonlf (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000 '(mzunicode))
(list 'stress-nonlf (make-bytes 1000 (char->integer #\x)) #"(?m:.*)" 100000 '(mzunicode mzunicode-j))
(list 'stress-nonlf (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000 '(mzunicode mzunicode-j))
(list 'stress-nonlf (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000 '(mzunicode mzunicode-j))
(list 'stress-anysave (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000 '())
(list 'stress-anysave (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre python mzunicode))
(list 'stress-anysave (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000 '(pcre python mzunicode))
(list 'stress-anysave (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000 '(pcre python mzunicode))
(list 'stress-anysave (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre python mzunicode mzunicode-j))
(list 'stress-anysave (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre python mzunicode mzunicode-j))
(list 'stress-anysave (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre python mzunicode mzunicode-j))
(list 'stress-xs (make-bytes 100 (char->integer #\x)) #"x*" 100000 '())
(list 'stress-xs (make-bytes 1000 (char->integer #\x)) #"x*" 100000 '())
(list 'stress-xs (make-bytes 10000 (char->integer #\x)) #"x*" 10000 '())
@ -212,9 +226,13 @@ exec mzscheme -qu "$0" ${1+"$@"}
(list 'perl test-perl)
(list 'python test-python)
(list 'pcre test-pcre)
(list 'mzunicode test-mzunicode)))
(list 'mzscheme-j test-mzscheme-j)
(list 'mzunicode test-mzunicode)
(list 'mzunicode-j test-mzunicode-j)))
(define non-defaults (list 'mzunicode))
(define non-defaults (list 'mzscheme-j
'mzunicode
'mzunicode-j))
;; Extract command-line arguments --------------------