improve PS/AFM support with uniXXXX parsing and per-font glyphlist.txt

svn: r1570
This commit is contained in:
Matthew Flatt 2005-12-09 17:05:09 +00:00
parent 4f8db6d8d4
commit 682d43cb54
2 changed files with 227 additions and 27 deletions

View File

@ -0,0 +1,175 @@
(module ttf-to-glyphlist mzscheme
;; http://developer.apple.com/fonts/TTRefMan/RM06/Chap6.html
;; http://www.microsoft.com/typography/otspec/otff.htm
(require (lib "etc.ss")
(lib "list.ss")
(lib "cmdline.ss"))
(define (read-fixed p)
(integer-bytes->integer (read-bytes 4 p) #f #t))
(define (read-short p)
(integer-bytes->integer (read-bytes 2 p) #t #t))
(define (read-ushort p)
(integer-bytes->integer (read-bytes 2 p) #f #t))
(define (read-ulong p)
(integer-bytes->integer (read-bytes 4 p) #f #t))
(define (parse-ttf p)
(begin-with-definitions
(unless (= (read-fixed p) #x10000)
(error "Doesn't start with snft version 1.0"))
(define num-tables (read-short p))
(read-short p)
(read-short p)
(read-short p)
(define-struct table (tag checksum offset length))
(define (read-table)
(make-table (read-bytes 4 p)
(read-ulong p)
(read-ulong p)
(read-ulong p)))
(define tables
(build-list num-tables (lambda (_) (read-table))))
(define cmaps
(filter (lambda (t) (bytes=? #"cmap" (table-tag t)))
tables))
(define posts
(filter (lambda (t) (bytes=? #"post" (table-tag t)))
tables))
(define-struct encoding (platform encoding offset))
(define-struct segment (start end delta offset) #f)
(print-struct #t)
(define (read-cmap t)
(file-position p (table-offset t))
(unless (= (read-ushort p) 0)
(error "cmap table is not version 0"))
(let ([count (read-ushort p)])
(let ([encodings (build-list
count
(lambda (_)
(make-encoding (read-ushort p)
(read-ushort p)
(read-ulong p))))])
(for-each (lambda (e)
(file-position p (+ (table-offset t) (encoding-offset e)))
;; (printf "~a ~a\n" (encoding-platform e) (encoding-encoding e))
(cond
[(or (and (= (encoding-platform e) 0)
(= (encoding-encoding e) 3))
(and (= (encoding-platform e) 3)
(= (encoding-encoding e) 1)))
(let ([format (read-ushort p)])
(unless (= format 4)
(error "mapping sub-table is not format 4"))
(read-ushort p) ; length in bytes
(read-ushort p) ; version
(let ([count (/ (read-ushort p) 2)])
(read-ushort p) ; searchrange
(read-ushort p) ; entry selector
(read-ushort p) ; rangeshift
(let ([ends (build-list count
(lambda (_) (read-ushort p)))]
[_ (read-ushort p)]
[starts (build-list count
(lambda (_) (read-ushort p)))]
[deltas (build-list count
(lambda (_) (read-short p)))]
[offsets (build-list count
(lambda (_) (read-ushort p)))])
(let ([segments (map make-segment starts ends deltas offsets)])
(let loop ([segments segments][prev-end 0])
(unless (null? segments)
(let ([s (car segments)])
;; (printf "~s\n" s)
(unless (zero? (segment-offset s))
(file-position p
(+ (table-offset t) (encoding-offset e)
(segment-offset s))))
(let loop ([pos (max (add1 prev-end) (segment-start s))])
(unless (pos . > . (segment-end s))
(let ([char pos])
(map-unicode char
(modulo
(+ (if (zero? (segment-offset s))
char
(read-ushort p))
(segment-delta s))
65536)))
(loop (add1 pos))))
(loop (cdr segments) (segment-end s)))))))))]))
encodings)
(void))))
(define name-map (make-hash-table 'equal))
(define (read-name pos names)
(or (vector-ref names pos)
(begin
(when (positive? pos)
(read-name (sub1 pos) names))
(vector-set! names pos (let ([n (read-byte p)])
(read-bytes n p)))
(vector-ref names pos))))
(define (read-post t)
(file-position p (table-offset t))
(if (= (read-fixed p) #x20000)
(begin
(read-bytes 28 p) ; header
(let ([count (read-ushort p)])
(let ([idxs (build-list count
(lambda (_) (read-ushort p)))]
[names (make-vector count #f)])
(let loop ([idxs idxs][glyph 0])
(unless (null? idxs)
(let ([idx (car idxs)])
(when (idx . > . 257)
(let ([name (read-name (- idx 258) names)])
(hash-table-put! name-map glyph name))))
(loop (cdr idxs) (add1 glyph)))))))
(error "post table is not version 2")))
(for-each read-post posts)
(define rename-table (make-hash-table 'equal))
(define (map-unicode u g)
(let ([name (hash-table-get name-map g (lambda () #f))])
(when name
(printf "~a;~x\n" name u)
(hash-table-put! rename-table name u))))
(for-each read-cmap cmaps)))
(define filename
(command-line
"ttf-to-glyphlist"
(current-command-line-arguments)
[args (ttf-file)
ttf-file]))
(let ([p (open-input-file filename)])
(dynamic-wind
void
(lambda () (parse-ttf p))
(lambda () (close-input-port p)))))

View File

@ -71,12 +71,11 @@
;; table has been loaded.
(define got-long-name-list? #f)
;; Reads the Adobe char name -> Unicode table
(define (read-names! gl.txt long?)
(define (read-glyph-names gl.txt)
(let ([ht (make-hash-table 'equal)])
(with-handlers ([exn:fail? report-exn])
(call-with-input-file*
(find-path (current-ps-afm-file-paths) gl.txt)
gl.txt
(lambda (i)
(let loop ()
(let ([l (read-bytes-line i)])
@ -88,21 +87,46 @@
(cadr m)
(bytes->number (caddr m) 16))))
(loop)))))))
(set! adobe-name-to-code-point ht)
(set! got-long-name-list? long?)))
ht))
;; Maps Adbobe char name to Unicode, loading the table as necesary
(define (find-unicode name)
(unless adobe-name-to-code-point
(read-names! "glyphshortlist.txt" #f))
(hash-table-get adobe-name-to-code-point
name
(lambda ()
(if got-long-name-list?
#f
(begin
(read-names! "glyphlist.txt" #t)
(find-unicode name))))))
;; Reads a font-specific glyphname mapping
(define (read-font-glyphnames file)
(let-values ([(base name dir?) (split-path file)])
(let ([file (build-path base
(bytes->path
(bytes-append
(path->bytes (path-replace-suffix name #""))
#"-glyphlist.txt")))])
(if (file-exists? file)
;; Read glyph names:
(read-glyph-names file)
;; Make empty hash table:
(make-hash-table)))))
;; Reads the Adobe char name -> Unicode table
(define (read-names! gl.txt long?)
(set! adobe-name-to-code-point (read-glyph-names
(find-path (current-ps-afm-file-paths) gl.txt)))
(set! got-long-name-list? long?))
;; Maps Adobe char name to Unicode, loading the table as necesary
(define (find-unicode font-glyphnames name)
(hash-table-get
font-glyphnames
name
(lambda ()
(unless adobe-name-to-code-point
(read-names! "glyphshortlist.txt" #f))
(hash-table-get adobe-name-to-code-point
name
(lambda ()
(if got-long-name-list?
(let ([m (regexp-match #rx#"^uni([0-9a-fA-Z]+)" name)])
(and m
(string->number (bytes->string/latin-1 (cadr m)) 16)))
(begin
(read-names! "glyphlist.txt" #t)
(find-unicode font-glyphnames name))))))))
;; ------------------------------------------------------------
@ -239,7 +263,8 @@
[kern-pairs null]
[char-set #f]
[char-set-name #f]
[is-cid? #f])
[is-cid? #f]
[font-glyphnames (read-font-glyphnames file)])
(parameterize ([read-case-sensitive #f])
(call-with-input-file*
file
@ -271,7 +296,7 @@
(eq? n 'ch))
(let ([v (read i)]
[rest (read-bytes-line i)])
(let ([nm (regexp-match #rx#"; *N +([a-zA-Z0-9]+) *;" rest)]
(let ([nm (regexp-match #rx#"; *N +([a-zA-Z0-9_.-]+) *;" rest)]
[wm (regexp-match #rx#"; *W.?X +([0-9]+) *;" rest)])
(when (or (and (eq? n 'c)
(integer? v))
@ -287,7 +312,7 @@
achars
(if (and char-set is-cid?)
(hash-table-get char-set name (lambda () 0))
(find-unicode name))
(find-unicode font-glyphnames name))
(make-achar
(let ([v (if (eq? n 'c)
v
@ -297,7 +322,7 @@
name
v))
(or (and wm (bytes->number (cadr wm))) 500)
(extract-ligatures rest)))))))
(extract-ligatures font-glyphnames rest)))))))
(loop)))))]
[(startkernpairs)
(let ([cnt (read i)])
@ -311,8 +336,8 @@
(read i))]
[amt (read i)])
(read-bytes-line i)
(let ([v1 (find-unicode (string->bytes/utf-8 (symbol->string v1)))]
[v2 (find-unicode (string->bytes/utf-8 (symbol->string v2)))])
(let ([v1 (find-unicode font-glyphnames (string->bytes/utf-8 (symbol->string v1)))]
[v2 (find-unicode font-glyphnames (string->bytes/utf-8 (symbol->string v2)))])
(set! kern-pairs (cons (list v1 v2 amt) kern-pairs))))
(loop)))))]
[else (read-bytes-line i)])
@ -335,15 +360,15 @@
(and char-set #t)
char-set-name))))
(define (extract-ligatures rest)
(define (extract-ligatures font-glyphnames rest)
(let ([m (regexp-match #rx#"; *L +([a-zA-Z0-9-]+) +([a-zA-Z0-9-]+)(.*)$" rest)])
(if m
(let ([next (cadr m)]
[ligature (caddr m)]
[rest (cadddr m)])
(cons (cons (find-unicode next)
(find-unicode ligature))
(extract-ligatures rest)))
(cons (cons (find-unicode font-glyphnames next)
(find-unicode font-glyphnames ligature))
(extract-ligatures font-glyphnames rest)))
null)))
(define fonts (make-hash-table 'equal))