From 08f06adfc247b795ab16df533401d71732c6b001 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Thu, 14 Sep 2006 00:35:39 +0000 Subject: [PATCH] python support for rx benchmarking svn: r4329 --- collects/tests/mzscheme/benchmarks/rx/auto.ss | 73 ++++++++++++------- .../mzscheme/benchmarks/rx/python_prefix.py | 10 +++ 2 files changed, 57 insertions(+), 26 deletions(-) create mode 100644 collects/tests/mzscheme/benchmarks/rx/python_prefix.py diff --git a/collects/tests/mzscheme/benchmarks/rx/auto.ss b/collects/tests/mzscheme/benchmarks/rx/auto.ss index 2f212b7024..b51ac2a69e 100755 --- a/collects/tests/mzscheme/benchmarks/rx/auto.ss +++ b/collects/tests/mzscheme/benchmarks/rx/auto.ss @@ -35,6 +35,26 @@ exec mzscheme -qu "$0" ${1+"$@"} (parameterize ([current-input-port (open-input-string (get-output-string s))]) (* 1000 (read))))) + (define (pythonize rx) + ;; Python doesn't recognize (?i:...), so we convert to (?:(?i)...) + (regexp-replace* #rx#"[(][?]([ims]):" rx #"(?:(?\\1)")) + + (define (test-python input rx iterations) + (with-output-to-file "test.py" + (lambda () + (with-input-from-file "python_prefix.py" + (lambda () + (copy-port (current-input-port) + (current-output-port)))) + (printf "test(~s, \"~a\", ~a)\n" + (bytes->string/latin-1 (pythonize rx)) input iterations)) + 'truncate) + (let ([s (open-output-bytes)]) + (parameterize ([current-output-port s]) + (system "python test.py")) + (parameterize ([current-input-port (open-input-string (get-output-string s))]) + (* 1000 (read))))) + (define (test-pcre input rx iterations) (let ([pcregexp (dynamic-require "pcre.ss" 'pcregexp)] [pcregexp-match (dynamic-require "pcre.ss" 'pcregexp-match)]) @@ -70,14 +90,14 @@ exec mzscheme -qu "$0" ${1+"$@"} (list (list (make-bytes 10 (char->integer #\x)) #"." 1000000 '()) (list #"cataract cataract23" #"(cat(a(ract|tonic)|erpillar)) \\1()2(3)" 100000 '()) - (list #"cataract cataract23" #"(?:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '()) - (list #"cataract cataract23" #"(?i:cat(?:a(?:ract|tonic)|erpillar)) \\1()2(3)" 100000 '()) + (list #"cataract cataract23" #"(cat(?:a(?:ract|tonic)|erpillar)) \\1(?:)23" 100000 '()) + (list #"cataract cataract23" #"(?i:(cat(?:a(?:ract|tonic)|erpillar))) \\1(?:)23" 100000 '()) (list #"From abcd Mon Sep 1 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '()) (list #"From abcd Sep 01 12:33:02 1997" #"^From\\s+\\S+\\s+([a-zA-Z]{3}\\s+){2}\\d{1,2}\\s+\\d\\d:\\d\\d" 100000 '()) (list #"foobar is foolish see?" #"foo(?!bar)(.*)" 100000 '()) (list #"foobar crowbar etc" #"(?:(?!foo)...|^.{0,2})bar(.*)" 100000 '()) - (list #"now is the time for all good men to come to the aid of the party" #"^((?>\\w+)|(?>\\s+))*$" 30000 '()) - (list #"this is not a line with only words and spaces!" #"^((?>\\w+)|(?>\\s+))*$" 30000 '()) + (list #"now is the time for all good men to come to the aid of the party" #"^((?>\\w+)|(?>\\s+))*$" 30000 '(python)) + (list #"this is not a line with only words and spaces!" #"^((?>\\w+)|(?>\\s+))*$" 30000 '(python)) (list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB([^AB]+|A.)*B" 10000 '()) (list #"noBABthe AAABquickAAAB brown foxAB" #"noB([^AB]+|A.)*B" 10 '()) (list #"yesBABthe AAABquickAAAB brown foxABB" #"yesB(?:[^AB]+|A.)*B" 10000 '()) @@ -96,9 +116,9 @@ exec mzscheme -qu "$0" ${1+"$@"} (list (make-bytes 10000 (char->integer #\x)) #"(?m:.*)" 100000 '()) (list (make-bytes 100000 (char->integer #\x)) #"(?m:.*)" 10000 '()) (list (make-bytes 100 (char->integer #\x)) #"(?s:(.)*)" 100000 '()) - (list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre)) - (list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000 '(pcre)) - (list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000 '(pcre)) + (list (make-bytes 1000 (char->integer #\x)) #"(?s:(.)*)" 100000 '(pcre python)) + (list (make-bytes 10000 (char->integer #\x)) #"(?s:(.)*)" 10000 '(pcre python)) + (list (make-bytes 100000 (char->integer #\x)) #"(?s:(.)*)" 1000 '(pcre python)) (list (make-bytes 100 (char->integer #\x)) #"x*" 100000 '()) (list (make-bytes 1000 (char->integer #\x)) #"x*" 100000 '()) (list (make-bytes 10000 (char->integer #\x)) #"x*" 10000 '()) @@ -110,37 +130,37 @@ exec mzscheme -qu "$0" ${1+"$@"} (list (make-bytes 100000 (char->integer #\x)) #"[xy]*" 100 '()) (list (make-bytes 100 (char->integer #\x)) #"(y|x)*" 10000 '()) (list (make-bytes 100 (char->integer #\x)) #"(?:y|x)*" 10000 '()) - (list (make-bytes 1000 (char->integer #\x)) #"(?:y|x)*" 1000 '()) - (list (make-bytes 10000 (char->integer #\x)) #"(?:y|x)*" 100 '()) - (list (make-bytes 100000 (char->integer #\x)) #"(?:y|x)*" 10 '(pcre)) + (list (make-bytes 1000 (char->integer #\x)) #"(?:y|x)*" 1000 '(python)) + (list (make-bytes 10000 (char->integer #\x)) #"(?:y|x)*" 100 '(python)) + (list (make-bytes 100000 (char->integer #\x)) #"(?:y|x)*" 10 '(pcre python)) (list (make-bytes 100 (char->integer #\x)) #"([yz]|x)*" 10000 '()) (list (make-bytes 100 (char->integer #\x)) #"(?:[yz]|x)*" 10000 '()) - (list (make-bytes 1000 (char->integer #\x)) #"(?:[yz]|x)*" 1000 '()) - (list (make-bytes 10000 (char->integer #\x)) #"(?:[yz]|x)*" 100 '()) - (list (make-bytes 100000 (char->integer #\x)) #"(?:[yz]|x)*" 10 '(pcre)) + (list (make-bytes 1000 (char->integer #\x)) #"(?:[yz]|x)*" 1000 '(python)) + (list (make-bytes 10000 (char->integer #\x)) #"(?:[yz]|x)*" 100 '(python)) + (list (make-bytes 100000 (char->integer #\x)) #"(?:[yz]|x)*" 10 '(pcre python)) (list (make-bytes 100 (char->integer #\x)) #"((x){2})*" 10000 '()) (list (make-bytes 100 (char->integer #\x)) #"(x{2})*" 10000 '()) (list (make-bytes 100 (char->integer #\x)) #"(?:x{2})*" 10000 '()) - (list (make-bytes 1000 (char->integer #\x)) #"(?:x{2})*" 10000 '()) - (list (make-bytes 10000 (char->integer #\x)) #"(?:x{2})*" 100 '()) - (list (make-bytes 100000 (char->integer #\x)) #"(?:x{2})*" 100 '(pcre)) + (list (make-bytes 1000 (char->integer #\x)) #"(?:x{2})*" 10000 '(python)) + (list (make-bytes 10000 (char->integer #\x)) #"(?:x{2})*" 100 '(python)) + (list (make-bytes 100000 (char->integer #\x)) #"(?:x{2})*" 100 '(pcre python)) (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARBAZ" 100000 '()) (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 100000 '()) (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 10000 '()) (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARBAZ" 1000 '()) - (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARNOPE" 1000000 '()) - (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 1000000 '()) - (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 100000 '(pcre)) - (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 10000 '(pcre)) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"([a-z])*FOOBARNOPE" 1000000 '(python)) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 1000000 '(python)) + (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 100000 '(pcre python)) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"[a-z]*FOOBARNOPE" 10000 '(pcre python)) (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10000 '()) (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 1000 '()) - (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10 '()) - (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 1000000 '()) - (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 100000 '(pcre)) - (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 10000 '(pcre)) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARBAZ" 10 '(python)) + (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 1000000 '(python)) + (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 100000 '(pcre python)) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?:[a-z]|ab)*FOOBARNOPE" 10000 '(pcre python)) (list (bytes-append (random-letters 100) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 10000 '()) - (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl)) - (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl))))) + (list (bytes-append (random-letters 1000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl python)) + (list (bytes-append (random-letters 10000) #"FOOBARBAZ") #"(?i:[a-z]*FOOBARNOPE)" 1000 '(pcre perl python))))) (define benchmark-names (map (lambda (t) (string->symbol (car t))) @@ -149,6 +169,7 @@ exec mzscheme -qu "$0" ${1+"$@"} (define testers (list (list 'mzscheme test-mzscheme) (list 'perl test-perl) + (list 'python test-python) (list 'pcre test-pcre))) ;; Extract command-line arguments -------------------- diff --git a/collects/tests/mzscheme/benchmarks/rx/python_prefix.py b/collects/tests/mzscheme/benchmarks/rx/python_prefix.py new file mode 100644 index 0000000000..8b0f2fc99f --- /dev/null +++ b/collects/tests/mzscheme/benchmarks/rx/python_prefix.py @@ -0,0 +1,10 @@ + +import re +import time + +def test(rx, input, iterations): + crx = re.compile(rx) + start = time.time() + for i in range(0, iterations): + re.search(crx, input) + print(time.time() - start)