From 6e02663d2db06cdd9a9acc6c3ff9114fb04e31da Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Sun, 20 Dec 2009 09:49:31 +0000 Subject: [PATCH] use scheme/base, move from mzlib/md5 to file/md5 svn: r17365 original commit: bbb4228d44ef44376734192d62be0888182470cf --- collects/mzlib/md5.ss | 474 +----------------------------------------- 1 file changed, 3 insertions(+), 471 deletions(-) diff --git a/collects/mzlib/md5.ss b/collects/mzlib/md5.ss index 973d358..d1e0060 100644 --- a/collects/mzlib/md5.ss +++ b/collects/mzlib/md5.ss @@ -1,471 +1,3 @@ -(module md5 mzscheme - - (provide md5) - - ;;; Copyright (c) 2005-2009, PLT Scheme Inc. - ;;; Copyright (c) 2002, Jens Axel Soegaard - ;;; - ;;; Permission to copy this software, in whole or in part, to use this - ;;; software for any lawful purpose, and to redistribute this software - ;;; is hereby granted. - ;;; - ;;; md5.scm -- Jens Axel Soegaard, 16 oct 2002 - - ;;; Summary - ;; This is an implementation of the md5 message-digest algorithm - ;; in R5RS Scheme. The algorithm takes an arbitrary byte-string or - ;; an input port, and returns a 128-bit "fingerprint" byte string. - ;; The algorithm was invented by Ron Rivest, RSA Security, INC. - ;; Reference: RFC 1321, - - ;;; History - ; 14-10-2002 /jas - ; - Bored. Initial attempt. Done. Well, except for faulty output. - ; 15-10-2002 /jas - ; - It works at last - ; 16-10-2002 /jas - ; - Added R5RS support - ; 16-02-2003 / lth - ; - Removed let-values implementation because Larceny has it already - ; - Implemented Larceny versions of many bit primitives (note, 0.52 - ; or later required due to bignum bug) - ; - Removed most 'personal idiosyncrasies' to give the compiler a fair - ; chance to inline primitives and improve performance some. - ; Performance in the interpreter is still really quite awful. - ; - Wrapped entire procedure in a big LET to protect the namespace - ; - Some cleanup of repeated computations - ; - Moved test code to separate file - ; 17-02-2003 / lth - ; - Removed some of the indirection, for a 30% speedup in Larceny's - ; interpreter. Running in the interpreter on my Dell Inspiron 4000 - ; I get a fingerprint of "Lib/Common/bignums-be.sch" in about 63ms, - ; which is slow but adequate. (The compiled version is not much - ; faster -- most time is spent in bignum manipulation, which is - ; compiled in either case. To do this well we must either operate - ; on the bignum representation or redo the algorithm to use - ; fixnums only.) - ; 01-12-2003 / lth - ; - Reimplemented word arithmetic to use two 16-bit fixnums boxed in - ; a cons cell. In Petit Larceny's interpreter this gives a speedup - ; of a factor of almost eight, and in addition this change translates - ; well to other Scheme systems that support bit operations on fixnums. - ; Only 17-bit (signed) fixnums are required. - ; 23-12-2003 / jas - ; - Trivial port to PLT. Rewrote the word macro to syntax-rules. - ; Larceny primitives written as syntax-rules macros exanding - ; to their PLT name. - - ; 5-5-2005 / Greg Pettyjohn - ; - It was failing for strings of length 56 bytes i.e. when the length - ; in bits was congruent 448 modulo 512. Changed step 1 to fix this. - ; According to RFC 1321, the message should still be padded in this - ; case. - - ; 23-12-2005 / Jepri - ; - Mucked around with the insides to get it to read from a port - ; - Now it accepts a port or a string as input - ; - Doesn't explode when handed large strings anymore - ; - Now much slower - - ; 10-2-2006 / Matthew - ; - Cleaned up a little - ; - Despite comment above, it seems consistently faster - - ; 11-5-2006 / Eli - ; - Cleaned up a lot, removed Larceny-isms - ; - Heavy optimization: not consing anything throughout the loop - - ; 17-9-2007 / Eli - ; - making raw output possible - - - ;;; Word aritmetic (32 bit) - ;; Terminology - ;; word: 32 bit unsigned integer - ;; byte: 8 bit unsigned integer - - ;; Words are represented as a cons where the car holds the high 16 - ;; bits and the cdr holds the low 16 bits. Most good Scheme systems - ;; will have fixnums that hold at least 16 bits as well as fast - ;; allocation, so this has a fair chance at beating bignums for - ;; performance. - - ;; (word c) turns into a quoted pair '(hi . lo) if c is a literal number. - ;; can create a new word, compute one at compile-time etc - (define-syntax (word stx) - (syntax-case stx () - ;; normal version (checks, allocates) - [(word #:new c) - #'(let ([n c]) - (if (<= 0 n 4294967296) - (mcons (quotient n 65536) (remainder n 65536)) - (error 'word "out of range: ~e" n)))] - ;; use when the number is known to be in range (allocates, no check) - [(word #:new+safe c) - #'(let ([n c]) (mcons (quotient n 65536) (remainder n 65536)))] - ;; default form: compute at compile-time if possible - [(word c) - (let ([n (syntax-e #'c)]) - (if (integer? n) - (if (<= 0 n 4294967295) - (syntax-local-lift-expression - #`(mcons #,(quotient n 65536) #,(remainder n 65536))) - (raise-syntax-error #f "constant number out of range" stx)) - #'(word #:new c)))])) - - ;; destructive operations to save on consing - - ;; destructive cons - (define (cons! p x y) - (set-mcar! p x) - (set-mcdr! p y)) - - ;; a := b - (define (word=! a b) - (cons! a (mcar b) (mcdr b))) - - ;; a := a + b - (define (word+=! a b) - (let ([t1 (+ (mcar a) (mcar b))] - [t2 (+ (mcdr a) (mcdr b))]) - (cons! a - (bitwise-and (+ t1 (arithmetic-shift t2 -16)) 65535) - (bitwise-and t2 65535)))) - - (define word<<word-vector! : vector byte-string -> void - (define (bytes->word-vector! result l-raw) - ;; assumption: always getting a byte-string with 64 places - ;; (unless (eq? 64 (bytes-length l-raw)) - ;; (error 'bytes->word-vector! "something bad happened")) - (let loop ([n 15]) - (when (<= 0 n) - (let ([m (arithmetic-shift n 2)]) - (cons! (vector-ref result n) - (+ (bytes-ref l-raw (+ 2 m)) - (arithmetic-shift (bytes-ref l-raw (+ 3 m)) 8)) - (+ (bytes-ref l-raw m) - (arithmetic-shift (bytes-ref l-raw (+ 1 m)) 8)))) - (loop (sub1 n))))) - - (define empty-port (open-input-bytes #"")) - - ;; List Helper - ;; read-block! : a-port done-n (vector word) -> (values vector a-port done-n) - ;; reads 512 bytes from the port, writes them into the `result' vector of 16 - ;; 32-bit words when the port is exhausted it returns #f for the port and the - ;; last few bytes padded - (define (read-block! a-port done result) - (define-syntax write-words! - (syntax-rules () - [(_ done buf) (bytes->word-vector! result (step2 (* 8 done) buf))])) - (let ([l-raw (read-bytes 512/8 a-port)]) - (cond - ;; File size was a multiple of 512 bits, or we're doing one more round - ;; to add the correct padding from the short case - [(eof-object? l-raw) - (write-words! done - (if (zero? (modulo done 512/8)) - ;; The file is a multiple of 512 or was 0, so there hasn't been a - ;; chance to add the 1-bit pad, so we need to do a full pad - (step1 #"") - ;; We only enter this block when the previous block didn't have - ;; enough room to fit the 64-bit file length, so we just add 448 - ;; bits of zeros and then the 64-bit file length (step2) - (make-bytes 448/8 0))) - (values #f done)] - ;; We read exactly 512 bits, the algorithm proceeds as usual - [(eq? (bytes-length l-raw) 512/8) - (bytes->word-vector! result l-raw) - (values a-port (+ done (bytes-length l-raw)))] - ;; We read less than 512 bits, so the file has ended. - [else - (let ([done (+ done (bytes-length l-raw))]) - (write-words! done (step1 l-raw)) - (values - (if (> (* 8 (bytes-length l-raw)) 446) - ;; However, we don't have enough room to add the correct trailer, - ;; so we add what we can, then go for one more round which will - ;; automatically fall into the (eof-object? case) - empty-port - ;; Returning a longer vector than we should, luckily it doesn't - ;; matter. We read less than 512 bits and there is enough room - ;; for the correct trailer. Add trailer and bail - #f) - done))]))) - - - ;; MD5 - ;; The algorithm consists of four steps an encoding the result. - ;; All we need to do, is to call them in order. - ;; md5 : string [bool] -> string - - (define md5 - (case-lambda - [(a-thing) (md5 a-thing #t)] - [(a-thing hex-encode?) - (let ([a-port - (cond [(bytes? a-thing) (open-input-bytes a-thing)] - [(input-port? a-thing) a-thing] - [else (raise-type-error 'md5 "input-port or bytes" - a-thing)])]) - (encode (step4 a-port) hex-encode?))])) - - ;; Step 1 - Append Padding Bits - ;; The message is padded so the length (in bits) becomes 448 modulo 512. - ;; We allways append a 1 bit and then append the proper numbber of 0's. - ;; NB: 448 bits is 14 words and 512 bits is 16 words - ;; step1 : bytes -> bytes - - (define (step1 message) - (let* ([nbytes (modulo (- 448/8 (bytes-length message)) 512/8)] - [nbytes (if (zero? nbytes) 512/8 nbytes)]) - (bytes-append message - #"\x80" ; the 1 bit byte => one less 0 bytes to append - (make-bytes (sub1 nbytes) 0)))) - - ;; Step 2 - Append Length - ;; A 64 bit representation of the bit length b of the message before - ;; the padding of step 1 is appended. Lower word first. - ;; step2 : number bytes -> bytes - ;; org-len is the length of the original message in number of bits - - (define (step2 len padded-message) - (bytes-append padded-message (integer->integer-bytes len 8 #f #f))) - - ;; Step 3 - Initialize MD Buffer - ;; These magic constants are used to initialize the loop - ;; in step 4. - ;; - ;; word A: 01 23 45 67 - ;; word B: 89 ab cd ef - ;; word C: fe dc ba 98 - ;; word D: 76 54 32 10 - - ;; Step 4 - Process Message in 16-Word Blocks - ;; For each 16 word block, go through a round one to four. - ;; step4 : input-port -> (list word word word word) - - ;; Step 3 :-) (magic constants) - (define (step4 a-port) - ;; X is always a vector of 16 words (it changes in read-block!) - (define X - (vector (mcons 0 0) (mcons 0 0) (mcons 0 0) (mcons 0 0) (mcons 0 0) (mcons 0 0) - (mcons 0 0) (mcons 0 0) (mcons 0 0) (mcons 0 0) (mcons 0 0) (mcons 0 0) - (mcons 0 0) (mcons 0 0) (mcons 0 0) (mcons 0 0))) - (define A (word #:new+safe #x67452301)) - (define B (word #:new+safe #xefcdab89)) - (define C (word #:new+safe #x98badcfe)) - (define D (word #:new+safe #x10325476)) - (define AA (mcons 0 0)) - (define BB (mcons 0 0)) - (define CC (mcons 0 0)) - (define DD (mcons 0 0)) - (define tmp (mcons 0 0)) - (let loop ([a-port a-port] [done 0]) - (if (not a-port) - (list A B C D) - (let-values ([(b-port done) (read-block! a-port done X)]) - (define-syntax step - (syntax-rules () - [(_ a b c d e f g h) - #| This is the `no GC version' (aka C-in-Scheme) of this: - (set! a (word+ b (word<<< (word+ (word+ a (e b c d)) - (word+ (vector-ref X f) - (word h))) - g))) - |# - (begin (e tmp b c d) - (word+=! a tmp) - (word+=! a (vector-ref X f)) - (word+=! a (word h)) - (word<<exact (floor (* 4294967296 (abs (sin i)))))) - ;; for i from 1 to 64 - (step A B C D F 0 7 3614090360) - (step D A B C F 1 12 3905402710) - (step C D A B F 2 17 606105819) - (step B C D A F 3 22 3250441966) - (step A B C D F 4 7 4118548399) - (step D A B C F 5 12 1200080426) - (step C D A B F 6 17 2821735955) - (step B C D A F 7 22 4249261313) - (step A B C D F 8 7 1770035416) - (step D A B C F 9 12 2336552879) - (step C D A B F 10 17 4294925233) - (step B C D A F 11 22 2304563134) - (step A B C D F 12 7 1804603682) - (step D A B C F 13 12 4254626195) - (step C D A B F 14 17 2792965006) - (step B C D A F 15 22 1236535329) - ;;--- - (step A B C D G 1 5 4129170786) - (step D A B C G 6 9 3225465664) - (step C D A B G 11 14 643717713) - (step B C D A G 0 20 3921069994) - (step A B C D G 5 5 3593408605) - (step D A B C G 10 9 38016083) - (step C D A B G 15 14 3634488961) - (step B C D A G 4 20 3889429448) - (step A B C D G 9 5 568446438) - (step D A B C G 14 9 3275163606) - (step C D A B G 3 14 4107603335) - (step B C D A G 8 20 1163531501) - (step A B C D G 13 5 2850285829) - (step D A B C G 2 9 4243563512) - (step C D A B G 7 14 1735328473) - (step B C D A G 12 20 2368359562) - ;;--- - (step A B C D H 5 4 4294588738) - (step D A B C H 8 11 2272392833) - (step C D A B H 11 16 1839030562) - (step B C D A H 14 23 4259657740) - (step A B C D H 1 4 2763975236) - (step D A B C H 4 11 1272893353) - (step C D A B H 7 16 4139469664) - (step B C D A H 10 23 3200236656) - (step A B C D H 13 4 681279174) - (step D A B C H 0 11 3936430074) - (step C D A B H 3 16 3572445317) - (step B C D A H 6 23 76029189) - (step A B C D H 9 4 3654602809) - (step D A B C H 12 11 3873151461) - (step C D A B H 15 16 530742520) - (step B C D A H 2 23 3299628645) - ;;--- - (step A B C D II 0 6 4096336452) - (step D A B C II 7 10 1126891415) - (step C D A B II 14 15 2878612391) - (step B C D A II 5 21 4237533241) - (step A B C D II 12 6 1700485571) - (step D A B C II 3 10 2399980690) - (step C D A B II 10 15 4293915773) - (step B C D A II 1 21 2240044497) - (step A B C D II 8 6 1873313359) - (step D A B C II 15 10 4264355552) - (step C D A B II 6 15 2734768916) - (step B C D A II 13 21 1309151649) - (step A B C D II 4 6 4149444226) - (step D A B C II 11 10 3174756917) - (step C D A B II 2 15 718787259) - (step B C D A II 9 21 3951481745) - ;;--- - (word+=! A AA) (word+=! B BB) (word+=! C CC) (word+=! D DD) - ;;--- - (loop b-port done))))) - - ;; Each round consists of the application of the following - ;; basic functions. They functions on a word bitwise, as follows. - ;; F(X,Y,Z) = XY v not(X) Z (NB: or can be replaced with + in F) - ;; G(X,Y,Z) = XZ v Y not(Z) - ;; H(X,Y,Z) = X xor Y xor Z - ;; I(X,Y,Z) = Y xor (X v not(Z)) - - #| These functions used to be simple, for example: - (define (F x y z) - (word-or (word-and x y) (word-and (word-not x) z))) - but we don't want to allocate stuff for each operation, so we add an - output pair for each of these functions (the `t' argument). However, this - means that if we want to avoid consing, we need either a few such - pre-allocated `register' values... The solution is to use a macro that - will perform an operation on the cars, cdrs, and set the result into the - target pair. Works only because these operations are symmetrical in their - use of the two halves. - - |# - - (define-syntax cons-op! - (syntax-rules () - [(cons-op! t (x ...) body) - (cons! t (let ([x (mcar x)] ...) body) (let ([x (mcdr x)] ...) body))])) - - (define (F t x y z) - (cons-op! t (x y z) - (bitwise-and (bitwise-ior (bitwise-and x y) - (bitwise-and (bitwise-not x) z)) - 65535))) - - (define (G t x y z) - (cons-op! t (x y z) - (bitwise-and (bitwise-ior (bitwise-and x z) - (bitwise-and y (bitwise-not z))) - 65535))) - - (define (H t x y z) - (cons-op! t (x y z) (bitwise-xor x y z))) - - (define (II t x y z) - (cons-op! t (x y z) - (bitwise-and (bitwise-xor y (bitwise-ior x (bitwise-not z))) - 65535))) - - ;; Step 5 - Encoding - ;; To finish up, we convert the word to hexadecimal string - ;; - and make sure they end up in order. - ;; encode : (list word word word word) bool -> byte-string - - (define hex-digits #(48 49 50 51 52 53 54 55 56 57 97 98 99 100 101 102)) - ;; word->digits : word -> bytes-string, - ;; returns a little endian result, but each byte is hi half and then lo half - (define (word->digits w) - (let ([digit (lambda (n b) - (vector-ref hex-digits - (bitwise-and (arithmetic-shift n (- b)) 15)))] - [lo (mcdr w)] [hi (mcar w)]) - (bytes (digit lo 4) (digit lo 0) (digit lo 12) (digit lo 8) - (digit hi 4) (digit hi 0) (digit hi 12) (digit hi 8)))) - (define (word->bytes w) - (bytes-append (integer->integer-bytes (mcdr w) 2 #f #f) - (integer->integer-bytes (mcar w) 2 #f #f))) - (define (encode l hex-encode?) - (apply bytes-append (map (if hex-encode? word->digits word->bytes) l))) - -;(define (md5-test) -; (if (and (equal? (md5 "") -; "d41d8cd98f00b204e9800998ecf8427e") -; (equal? (md5 "a") -; "0cc175b9c0f1b6a831c399e269772661") -; (equal? (md5 "abc") -; "900150983cd24fb0d6963f7d28e17f72") -; (equal? (md5 "message digest") -; "f96b697d7cb7938d525a2f31aaf161d0") -; (equal? (md5 "abcdefghijklmnopqrstuvwxyz") -; "c3fcd3d76192e4007dfb496cca67e13b") -; (equal? (md5 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") -; "d174ab98d277d9f5a5611c2c9f419d9f") -; (equal? (md5 "12345678901234567890123456789012345678901234567890123456789012345678901234567890") -; "57edf4a22be3c955ac49da2e2107b67a")) -; 'passed -; 'failed)) -; -;(md5-test) - ) +#lang scheme/base +(require file/md5) +(provide (all-from-out file/md5))