From e5ebb45c066c9409b5dd2e50ab66e9188335df8b Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Mon, 23 Jun 2014 06:25:26 +0100 Subject: [PATCH] Scribble: improve Unicode handling for Latex/PDF output Normalize strings to composed form before splitting a string into characters to handle individually. original commit: d970d5aaead2bfbae0651b583f79e6ac719c7d39 --- pkgs/scribble-pkgs/scribble-lib/scribble/latex-render.rkt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkgs/scribble-pkgs/scribble-lib/scribble/latex-render.rkt b/pkgs/scribble-pkgs/scribble-lib/scribble/latex-render.rkt index f225023a..863790c0 100644 --- a/pkgs/scribble-pkgs/scribble-lib/scribble/latex-render.rkt +++ b/pkgs/scribble-pkgs/scribble-lib/scribble/latex-render.rkt @@ -823,6 +823,10 @@ [(#\}) (display "\\%7d")] [else (display c)]))] [else + ;; Start by normalizing to "combined" form, so that Racket characters + ;; are closer to Unicode characters (e.g., รค is one character, instead + ;; of a combining character followed by "a"). + (let ([s (string-normalize-nfc s)]) (let ([len (string-length s)]) (let loop ([i 0]) (unless (= i len) @@ -1082,7 +1086,7 @@ c)] [else c])])]) c)]))) - (loop (add1 i)))))])) + (loop (add1 i))))))])) (define/private (box-character c)