python support for rx benchmarking

svn: r4329
This commit is contained in:
Matthew Flatt 2006-09-14 00:35:39 +00:00
parent b486f3c36a
commit 08f06adfc2
2 changed files with 57 additions and 26 deletions

View File

@ -35,6 +35,26 @@ exec mzscheme -qu "$0" ${1+"$@"}
(parameterize ([current-input-port (open-input-string (get-output-string s))])
(* 1000 (read)))))
(define (pythonize rx)
;; Python doesn't recognize (?i:...), so we convert to (?:(?i)...)
(regexp-replace* #rx#"[(][?]([ims]):" rx #"(?:(?\\1)"))
(define (test-python input rx iterations)
(with-output-to-file "test.py"
(lambda ()
(with-input-from-file "python_prefix.py"
(lambda ()
(copy-port (current-input-port)
(current-output-port))))
(printf "test(~s, \"~a\", ~a)\n"
(bytes->string/latin-1 (pythonize rx)) input iterations))
'truncate)
(let ([s (open-output-bytes)])
(parameterize ([current-output-port s])
(system "python test.py"))
(parameterize ([current-input-port (open-input-string (get-output-string s))])
(* 1000 (read)))))
(define (test-pcre input rx iterations)
(let ([pcregexp (dynamic-require "pcre.ss" 'pcregexp)]
[pcregexp-match (dynamic-require "pcre.ss" 'pcregexp-match)])
@ -70,14 +90,14 @@ exec mzscheme -qu "$0" ${1+"$@"}
(list
(list (make-bytes 10 (char->integer #\x)) #"." 1000000 '())
(list #"cataract cataract23" #"(cat(a(ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
(list #"cataract cataract23" #"(?:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
(list #"cataract cataract23" #"(?i:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '())
(list #"cataract cataract23" #"(cat(?:a(?:ract|tonic)|erpillar)) \\1(?:)23" 100000 '())
(list #"cataract cataract23" #"(?i:(cat(?:a(?:ract|tonic)|erpillar))) \\1(?:)23" 100000 '())
(list #"From abcd Mon Sep 1 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '())
(list #"From abcd Sep 01 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '())
(list #"foobar is foolish see?" #"foo(?!bar)(.*)" 100000 '())
(list #"foobar crowbar etc" #"(?:(?!foo)...|^.{0,2})bar(.*)" 100000 '())
(list #"now is the time for all good men to come to the aid of the party" #"^((?>\\w+)|(?>\\s+))*$" 30000 '())
(list #"this is not a line with only words and spaces!" #"^((?>\\w+)|(?>\\s+))*$" 30000 '())
(list #"now is the time for all good men to come to the aid of the party" #"^((?>\\w+)|(?>\\s+))*$" 30000 '(python))
(list #"this is not a line with only words and spaces!" #"^((?>\\w+)|(?>\\s+))*$" 30000 '(python))
(list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB([^AB]+|A.)*B" 10000 '())
(list #"noBABthe AAABquickAAAB brown foxAB" #"noB([^AB]+|A.)*B" 10 '())
(list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB(?:[^AB]+|A.)*B" 10000 '())
@ -96,9 +116,9 @@ exec mzscheme -qu "$0" ${1+"$@"}
(list (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre))
(list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000 '(pcre))
(list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000 '(pcre))
(list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre python))
(list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000 '(pcre python))
(list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000 '(pcre python))
(list (make-bytes 100 (char->integer #\x)) #"x*" 100000 '())
(list (make-bytes 1000 (char->integer #\x)) #"x*" 100000 '())
(list (make-bytes 10000 (char->integer #\x)) #"x*" 10000 '())
@ -110,37 +130,37 @@ exec mzscheme -qu "$0" ${1+"$@"}
(list (make-bytes 100000 (char->integer #\x)) #"[xy]*" 100 '())
(list (make-bytes 100 (char->integer #\x)) #"(y|x)*" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?:y|x)*" 10000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?:y|x)*" 1000 '())
(list (make-bytes 10000 (char->integer #\x)) #"(?:y|x)*" 100 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?:y|x)*" 10 '(pcre))
(list (make-bytes 1000 (char->integer #\x)) #"(?:y|x)*" 1000 '(python))
(list (make-bytes 10000 (char->integer #\x)) #"(?:y|x)*" 100 '(python))
(list (make-bytes 100000 (char->integer #\x)) #"(?:y|x)*" 10 '(pcre python))
(list (make-bytes 100 (char->integer #\x)) #"([yz]|x)*" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?:[yz]|x)*" 10000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?:[yz]|x)*" 1000 '())
(list (make-bytes 10000 (char->integer #\x)) #"(?:[yz]|x)*" 100 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?:[yz]|x)*" 10 '(pcre))
(list (make-bytes 1000 (char->integer #\x)) #"(?:[yz]|x)*" 1000 '(python))
(list (make-bytes 10000 (char->integer #\x)) #"(?:[yz]|x)*" 100 '(python))
(list (make-bytes 100000 (char->integer #\x)) #"(?:[yz]|x)*" 10 '(pcre python))
(list (make-bytes 100 (char->integer #\x)) #"((x){2})*" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(x{2})*" 10000 '())
(list (make-bytes 100 (char->integer #\x)) #"(?:x{2})*" 10000 '())
(list (make-bytes 1000 (char->integer #\x)) #"(?:x{2})*" 10000 '())
(list (make-bytes 10000 (char->integer #\x)) #"(?:x{2})*" 100 '())
(list (make-bytes 100000 (char->integer #\x)) #"(?:x{2})*" 100 '(pcre))
(list (make-bytes 1000 (char->integer #\x)) #"(?:x{2})*" 10000 '(python))
(list (make-bytes 10000 (char->integer #\x)) #"(?:x{2})*" 100 '(python))
(list (make-bytes 100000 (char->integer #\x)) #"(?:x{2})*" 100 '(pcre python))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 100000 '())
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 100000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 10000 '())
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 1000 '())
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARNOPE" 1000000 '())
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 1000000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 100000 '(pcre))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 10000 '(pcre))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARNOPE" 1000000 '(python))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 1000000 '(python))
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 100000 '(pcre python))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 10000 '(pcre python))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 1000 '())
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10 '())
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 1000000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 100000 '(pcre))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 10000 '(pcre))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10 '(python))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 1000000 '(python))
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 100000 '(pcre python))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 10000 '(pcre python))
(list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 10000 '())
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl)))))
(list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl python))
(list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl python)))))
(define benchmark-names (map (lambda (t)
(string->symbol (car t)))
@ -149,6 +169,7 @@ exec mzscheme -qu "$0" ${1+"$@"}
(define testers
(list (list 'mzscheme test-mzscheme)
(list 'perl test-perl)
(list 'python test-python)
(list 'pcre test-pcre)))
;; Extract command-line arguments --------------------

View File

@ -0,0 +1,10 @@
import re
import time
def test(rx, input, iterations):
crx = re.compile(rx)
start = time.time()
for i in range(0, iterations):
re.search(crx, input)
print(time.time() - start)