From 264ec7279046c68e24c3997dc773a11773f5a748 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 16 Aug 2019 11:03:37 +0200 Subject: [PATCH] cache bytes converter used for locale conversions On platforms other than Windows and MacOS, locale encoding (inclduing path <-> string conversion) opened a converter for each separate operation. That can be slow on some OSes, so cache converters used for locale conversions. Relevant to #2781 --- racket/src/io/converter/main.rkt | 9 ++ racket/src/io/locale/cache.rkt | 80 +++++++++++++++++ racket/src/io/locale/collate.rkt | 10 ++- racket/src/io/locale/recase.rkt | 6 +- racket/src/io/locale/string.rkt | 11 ++- racket/src/io/main.rkt | 3 + racket/src/racket/include/schthread.h | 6 ++ racket/src/racket/src/env.c | 1 + racket/src/racket/src/schpriv.h | 1 + racket/src/racket/src/string.c | 118 ++++++++++++++++++++++---- racket/src/rktio/rktio.def | 1 + racket/src/rktio/rktio.h | 3 + racket/src/rktio/rktio.inc | 1 + racket/src/rktio/rktio.rktl | 5 ++ racket/src/rktio/rktio_convert.c | 5 ++ 15 files changed, 235 insertions(+), 25 deletions(-) create mode 100644 racket/src/io/locale/cache.rkt diff --git a/racket/src/io/converter/main.rkt b/racket/src/io/converter/main.rkt index 0c82e6034e..6b8a61e35e 100644 --- a/racket/src/io/converter/main.rkt +++ b/racket/src/io/converter/main.rkt @@ -12,6 +12,9 @@ bytes-convert bytes-convert-end) +(module+ reset + (provide bytes-reset-converter)) + (struct bytes-converter ([c #:mutable] [custodian-reference #:mutable])) @@ -229,3 +232,9 @@ (define err (and (= converted RKTIO_CONVERT_ERROR) (rktio_get_last_error rktio))) (values in-consumed out-produced err)])) + +;; in atomic mode +(define (bytes-reset-converter converter) + (define c (bytes-converter-c converter)) + (unless (utf-8-converter? c) + (rktio_convert_reset rktio c))) diff --git a/racket/src/io/locale/cache.rkt b/racket/src/io/locale/cache.rkt new file mode 100644 index 0000000000..e2c44f8584 --- /dev/null +++ b/racket/src/io/locale/cache.rkt @@ -0,0 +1,80 @@ +#lang racket/base +(require "../host/thread.rkt" + "../host/place-local.rkt" + "../converter/main.rkt" + (submod "../converter/main.rkt" reset) + "ucs-4.rkt") + +(provide convert-cache-init! + + bytes-open-converter/cached-to + bytes-open-converter/cached-to2 + bytes-open-converter/cached-from + bytes-close-converter/cached-to + bytes-close-converter/cached-to2 + bytes-close-converter/cached-from) + +(struct cache (enc to to2 from) + #:mutable + #:authentic) + +(define (new-cache) (cache #f #f #f #f)) + +(define-place-local local-cache (new-cache)) + +(define (convert-cache-init!) + (set! local-cache (new-cache))) + +(define (cache-clear! get update!) + (define c (get local-cache)) + (update! local-cache #f) + (when c + (bytes-close-converter c))) + +(define (cache-lookup! enc get update!) + (atomically + (and (equal? enc (cache-enc local-cache)) + (let ([c (get local-cache)]) + (when c + (update! local-cache #f)) + c)))) + +(define (cache-save! c enc get update!) + (atomically + (unless (equal? enc (cache-enc local-cache)) + (cache-clear! cache-to set-cache-to!) + (cache-clear! cache-to2 set-cache-to2!) + (cache-clear! cache-from set-cache-from!) + (set-cache-enc! local-cache enc)) + (cond + [(get local-cache) + (bytes-close-converter c)] + [else + (bytes-reset-converter c) ; must be in atomic mode + (update! local-cache c)]))) + +;; ---------------------------------------- + +(define (bytes-open-converter/cached-to enc) + (or (cache-lookup! enc cache-to set-cache-to!) + (bytes-open-converter ucs-4-encoding enc))) + +(define (bytes-open-converter/cached-to2 enc) + (or (cache-lookup! enc cache-to2 set-cache-to2!) + (bytes-open-converter ucs-4-encoding enc))) + +(define (bytes-open-converter/cached-from enc) + (or (cache-lookup! enc cache-from set-cache-from!) + (bytes-open-converter enc "UTF-8"))) + +(define (bytes-close-converter/cached-to c enc) + (or (cache-save! c enc cache-to set-cache-to!) + (bytes-close-converter c))) + +(define (bytes-close-converter/cached-to2 c enc) + (or (cache-save! c enc cache-to2 set-cache-to2!) + (bytes-close-converter c))) + +(define (bytes-close-converter/cached-from c enc) + (or (cache-save! c enc cache-from set-cache-from!) + (bytes-close-converter c))) diff --git a/racket/src/io/locale/collate.rkt b/racket/src/io/locale/collate.rkt index 85351c1c17..bc308a7a60 100644 --- a/racket/src/io/locale/collate.rkt +++ b/racket/src/io/locale/collate.rkt @@ -4,6 +4,7 @@ "../host/rktio.rkt" "../string/utf-16-encode.rkt" "../converter/main.rkt" + "cache.rkt" "parameter.rkt" "string.rkt" "recase.rkt" @@ -93,10 +94,11 @@ (define c2 #f) (define in-bstr1 (string->bytes/ucs-4 s1 0 (string-length s1))) (define in-bstr2 (string->bytes/ucs-4 s2 0 (string-length s2))) + (define enc (locale-string-encoding)) (dynamic-wind (lambda () - (set! c1 (bytes-open-converter ucs-4-encoding (locale-string-encoding))) - (set! c2 (bytes-open-converter ucs-4-encoding (locale-string-encoding)))) + (set! c1 (bytes-open-converter/cached-to enc)) + (set! c2 (bytes-open-converter/cached-to2 enc))) (lambda () (let loop ([pos1 0] [pos2 0] [end1 (bytes-length in-bstr1)] [end2 (bytes-length in-bstr2)]) (define-values (bstr1 in-used1 status1) @@ -154,5 +156,5 @@ [else (loop pos1 pos2 (+ pos1 len) (+ pos2 len))])]))) (lambda () - (bytes-close-converter c1) - (bytes-close-converter c2)))])) + (bytes-close-converter/cached-to c1 enc) + (bytes-close-converter/cached-to2 c2 enc)))])) diff --git a/racket/src/io/locale/recase.rkt b/racket/src/io/locale/recase.rkt index c247ebe1a9..03e0321b56 100644 --- a/racket/src/io/locale/recase.rkt +++ b/racket/src/io/locale/recase.rkt @@ -6,6 +6,7 @@ "../string/utf-16-encode.rkt" "../string/utf-16-decode.rkt" "../converter/main.rkt" + "cache.rkt" "parameter.rkt" "string.rkt" "nul-char.rkt" @@ -64,9 +65,10 @@ ;; encoding-error bytes alone. (define c #f) (define in-bstr (string->bytes/ucs-4 s 0 (string-length s))) + (define enc (locale-string-encoding)) (dynamic-wind (lambda () - (set! c (bytes-open-converter ucs-4-encoding (locale-string-encoding)))) + (set! c (bytes-open-converter/cached-to enc))) (lambda () (let loop ([pos 0]) (cond @@ -94,7 +96,7 @@ (apply string-append ls err-s r) (list* ls err-s r))])]))) (lambda () - (bytes-close-converter c)))])) + (bytes-close-converter/cached-to c enc)))])) ;; in atomic mode ;; Assumes that the locale is sync'ed diff --git a/racket/src/io/locale/string.rkt b/racket/src/io/locale/string.rkt index 8312a8f81f..188ddcbac7 100644 --- a/racket/src/io/locale/string.rkt +++ b/racket/src/io/locale/string.rkt @@ -3,6 +3,7 @@ "../string/convert.rkt" "../string/utf-8-decode.rkt" "../converter/main.rkt" + "cache.rkt" "parameter.rkt" "ucs-4.rkt") @@ -20,9 +21,10 @@ (string->bytes/utf-8 str err-byte start end)] [else (define c #f) + (define enc (locale-string-encoding)) (dynamic-wind (lambda () - (set! c (bytes-open-converter ucs-4-encoding (locale-string-encoding)))) + (set! c (bytes-open-converter/cached-to enc))) (lambda () (define in-bstr (string->bytes/ucs-4 str start end)) (let loop ([pos 0]) @@ -51,7 +53,7 @@ (apply bytes-append (cons bstr (cons err-bstr r))) (cons bstr (cons err-bstr r)))])]))) (lambda () - (bytes-close-converter c)))])) + (bytes-close-converter/cached-to c enc)))])) (define/who (bytes->string/locale in-bstr [err-char #f] [start 0] [end (and (bytes? in-bstr) (bytes-length in-bstr))]) @@ -65,9 +67,10 @@ (bytes->string/utf-8 in-bstr err-char start end)] [else (define c #f) + (define enc (locale-string-encoding)) (dynamic-wind (lambda () - (set! c (bytes-open-converter (locale-string-encoding) "UTF-8"))) + (set! c (bytes-open-converter/cached-from enc))) (lambda () (let loop ([pos 0]) (define-values (bstr in-used status) @@ -95,4 +98,4 @@ (bytes->string/utf-8 (apply bytes-append (cons bstr (cons err-bstr r)))) (cons bstr (cons err-bstr r)))])]))) (lambda () - (bytes-close-converter c)))])) + (bytes-close-converter/cached-from c enc)))])) diff --git a/racket/src/io/main.rkt b/racket/src/io/main.rkt index 50cf463ea7..9dbc73d0c6 100644 --- a/racket/src/io/main.rkt +++ b/racket/src/io/main.rkt @@ -30,6 +30,8 @@ place-init) (only-in "sandman/ltps.rkt" shared-ltps-place-init!) + (only-in "locale/cache.rkt" + convert-cache-init!) "port/place.rkt") (provide (all-from-out "port/main.rkt") @@ -60,6 +62,7 @@ (define (io-place-init! in-fd out-fd err-fd cust plumber) (rktio-place-init!) + (convert-cache-init!) (logger-init!) (shared-ltps-place-init!) (install-error-value->string-handler!) diff --git a/racket/src/racket/include/schthread.h b/racket/src/racket/include/schthread.h index 374cd412a9..fe8b2ec510 100644 --- a/racket/src/racket/include/schthread.h +++ b/racket/src/racket/include/schthread.h @@ -321,6 +321,9 @@ typedef struct Thread_Local_Variables { int num_minor_garbage_collections_; int locale_on_; void *current_locale_name_ptr_; + char *cached_locale_encoding_name_; + struct rktio_converter_t *cached_locale_to_converter_; + struct rktio_converter_t *cached_locale_from_converter_; int gensym_counter_; struct Scheme_Object *dummy_input_port_; struct Scheme_Object *dummy_output_port_; @@ -706,6 +709,9 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL; #define num_minor_garbage_collections XOA (scheme_get_thread_local_variables()->num_minor_garbage_collections_) #define locale_on XOA (scheme_get_thread_local_variables()->locale_on_) #define current_locale_name_ptr XOA (scheme_get_thread_local_variables()->current_locale_name_ptr_) +#define cached_locale_encoding_name XOA (scheme_get_thread_local_variables()->cached_locale_encoding_name_) +#define cached_locale_to_converter XOA (scheme_get_thread_local_variables()->cached_locale_to_converter_) +#define cached_locale_from_converter XOA (scheme_get_thread_local_variables()->cached_locale_from_converter_) #define gensym_counter XOA (scheme_get_thread_local_variables()->gensym_counter_) #define dummy_input_port XOA (scheme_get_thread_local_variables()->dummy_input_port_) #define dummy_output_port XOA (scheme_get_thread_local_variables()->dummy_output_port_) diff --git a/racket/src/racket/src/env.c b/racket/src/racket/src/env.c index 4e44c9b479..0898091f82 100644 --- a/racket/src/racket/src/env.c +++ b/racket/src/racket/src/env.c @@ -586,6 +586,7 @@ void scheme_place_instance_destroy(int force) GC_destruct_child_gc(); #endif scheme_free_all_code(); + scheme_clear_locale_cache(); rktio_destroy(scheme_rktio); } diff --git a/racket/src/racket/src/schpriv.h b/racket/src/racket/src/schpriv.h index adb984089d..dc5f20cb5a 100644 --- a/racket/src/racket/src/schpriv.h +++ b/racket/src/racket/src/schpriv.h @@ -436,6 +436,7 @@ void scheme_register_network_evts(); void scheme_free_dynamic_extensions(void); void scheme_free_all_code(void); +void scheme_clear_locale_cache(void); XFORM_NONGCING int scheme_is_multithreaded(int now); diff --git a/racket/src/racket/src/string.c b/racket/src/racket/src/string.c index 7519783339..47b57f751d 100644 --- a/racket/src/racket/src/string.c +++ b/racket/src/racket/src/string.c @@ -166,6 +166,8 @@ static char *string_to_from_locale(int to_bytes, intptr_t *olen, int perm, int *no_cvt); +static void cache_locale_or_close(int to_bytes, rktio_converter_t *cd, char *le); + #define portable_isspace(x) (((x) < 128) && isspace(x)) ROSYM static Scheme_Object *sys_symbol; @@ -184,6 +186,10 @@ SHARED_OK static Scheme_Object *banner_str; THREAD_LOCAL_DECL(static Scheme_Object *fs_change_props); +THREAD_LOCAL_DECL(static char *cached_locale_encoding_name); +THREAD_LOCAL_DECL(struct rktio_converter_t *cached_locale_to_converter); +THREAD_LOCAL_DECL(struct rktio_converter_t *cached_locale_from_converter); + READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol; READ_ONLY Scheme_Object *scheme_string_p_proc; @@ -2571,6 +2577,18 @@ static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]) return s; } +static void do_convert_close(rktio_converter_t *cd, int cache_mode, const char *to_e, const char *from_e) +/* If `cache_mode` is -1, then `to_e` needs to be freed (or cached). + If `cache_mode` is 1, then `from_e` needs to be freed (or cached). */ +{ + if (cache_mode == -1) + cache_locale_or_close(1, cd, (char *)to_e); + else if (cache_mode == 1) + cache_locale_or_close(0, cd, (char *)from_e); + else if (!cache_mode) + rktio_converter_close(scheme_rktio, cd); +} + static char *do_convert(rktio_converter_t *cd, /* if !cd and either from_e or to_e can be NULL, then reset_locale() must have been called */ @@ -2598,7 +2616,7 @@ static char *do_convert(rktio_converter_t *cd, 1 for more avail */ int *status) { - int dip, dop, close_it = 0, mz_utf8 = 0; + int dip, dop, close_it = 0, cache_mode = 0, mz_utf8 = 0; intptr_t il, ol, r; GC_CAN_IGNORE char *ip, *op; @@ -2611,6 +2629,12 @@ static char *do_convert(rktio_converter_t *cd, if (!cd) { if (rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED) { char *tmp_from_e = NULL, *tmp_to_e = NULL; + + if (!to_e && !strcmp(from_e, MZ_UCS4_NAME)) + cache_mode = -1; + else if (!from_e && !strcmp(to_e, MZ_UCS4_NAME)) + cache_mode = 1; + if (!from_e) { tmp_from_e = rktio_locale_encoding(scheme_rktio); from_e = tmp_from_e; @@ -2619,10 +2643,23 @@ static char *do_convert(rktio_converter_t *cd, tmp_to_e = rktio_locale_encoding(scheme_rktio); to_e = tmp_to_e; } - cd = rktio_converter_open(scheme_rktio, to_e, from_e); + + if ((cache_mode == -1) + && cached_locale_to_converter + && !strcmp(to_e, cached_locale_encoding_name)) { + cd = cached_locale_to_converter; + cached_locale_to_converter = NULL; + } else if ((cache_mode == 1) + && cached_locale_from_converter + && !strcmp(from_e, cached_locale_encoding_name)) { + cd = cached_locale_from_converter; + cached_locale_from_converter = NULL; + } else { + cd = rktio_converter_open(scheme_rktio, to_e, from_e); + } close_it = 1; - if (tmp_from_e) free(tmp_from_e); - if (tmp_to_e) free(tmp_to_e); + if (tmp_from_e && ((cache_mode != 1) || !cd)) free(tmp_from_e); + if (tmp_to_e && ((cache_mode != -1) || !cd)) free(tmp_to_e); } else if (to_from_utf8) { /* Assume UTF-8 */ mz_utf8 = 1; @@ -2737,7 +2774,7 @@ static char *do_convert(rktio_converter_t *cd, } else { *status = 1; if (close_it) - rktio_converter_close(scheme_rktio, cd); + do_convert_close(cd, cache_mode, to_e, from_e); while (extra--) { out[od + dop + extra] = 0; } @@ -2748,7 +2785,7 @@ static char *do_convert(rktio_converter_t *cd, if (icerr == RKTIO_ERROR_CONVERT_BAD_SEQUENCE) *status = -2; if (close_it) - rktio_converter_close(scheme_rktio, cd); + do_convert_close(cd, cache_mode, to_e, from_e); while (extra--) { out[od + dop + extra] = 0; } @@ -2766,7 +2803,7 @@ static char *do_convert(rktio_converter_t *cd, } else { *status = 0; if (close_it) - rktio_converter_close(scheme_rktio, cd); + do_convert_close(cd, cache_mode, to_e, from_e); while (extra--) { out[od + dop + extra] = 0; } @@ -2792,18 +2829,32 @@ static char *string_to_from_locale(int to_bytes, rktio_converter_t *cd; le = rktio_locale_encoding(scheme_rktio); - if (to_bytes) - cd = rktio_converter_open(scheme_rktio, le, MZ_UCS4_NAME); - else - cd = rktio_converter_open(scheme_rktio, MZ_UCS4_NAME, le); - free(le); - + if (cached_locale_encoding_name + && !strcmp(le, cached_locale_encoding_name) + && (to_bytes ? cached_locale_to_converter : cached_locale_from_converter)) { + if (to_bytes) { + cd = cached_locale_to_converter; + cached_locale_to_converter = NULL; + } else { + cd = cached_locale_from_converter; + cached_locale_from_converter = NULL; + } + } else { + if (to_bytes) + cd = rktio_converter_open(scheme_rktio, le, MZ_UCS4_NAME); + else + cd = rktio_converter_open(scheme_rktio, MZ_UCS4_NAME, le); + } + if (!cd) { + free(le); *no_cvt = 1; return NULL; } *no_cvt = 0; + status = 0; + while (len) { /* We might have conversion errors... */ c = do_convert(cd, NULL, NULL, 0, @@ -2818,6 +2869,7 @@ static char *string_to_from_locale(int to_bytes, if ((perm < 0) && (used < len)) { rktio_converter_close(scheme_rktio, cd); + free(le); return NULL; } @@ -2832,7 +2884,7 @@ static char *string_to_from_locale(int to_bytes, *olen = (clen >> 2); ((mzchar *)c)[*olen] = 0; } - rktio_converter_close(scheme_rktio, cd); + cache_locale_or_close(to_bytes, cd, le); return c; } @@ -2864,7 +2916,7 @@ static char *string_to_from_locale(int to_bytes, } } - rktio_converter_close(scheme_rktio, cd); + cache_locale_or_close(to_bytes, cd, le); if (to_bytes) { parts = append_all_byte_strings_backwards(parts); @@ -2879,6 +2931,42 @@ static char *string_to_from_locale(int to_bytes, } } +void cache_locale_or_close(int to_bytes, rktio_converter_t *cd, char *le) +{ + if (to_bytes ? cached_locale_to_converter : cached_locale_from_converter) { + rktio_converter_close(scheme_rktio, cd); + free(le); + } else { + if (!cached_locale_encoding_name || strcmp(le, cached_locale_encoding_name)) { + scheme_clear_locale_cache(); + cached_locale_encoding_name = le; + } else + free(le); + + rktio_convert_reset(scheme_rktio, cd); + if (to_bytes) + cached_locale_to_converter = cd; + else + cached_locale_from_converter = cd; + } +} + +void scheme_clear_locale_cache(void) +{ + if (cached_locale_encoding_name) { + if (cached_locale_to_converter) { + rktio_converter_close(scheme_rktio, cached_locale_to_converter); + cached_locale_to_converter = NULL; + } + if (cached_locale_from_converter) { + rktio_converter_close(scheme_rktio, cached_locale_from_converter); + cached_locale_from_converter = NULL; + } + free(cached_locale_encoding_name); + cached_locale_encoding_name = NULL; + } +} + static char *locale_recase(int to_up, /* in must be null-terminated, iilen doesn't include it */ char *in, int id, int iilen, diff --git a/racket/src/rktio/rktio.def b/racket/src/rktio/rktio.def index dd339ec4da..5c0e965399 100644 --- a/racket/src/rktio/rktio.def +++ b/racket/src/rktio/rktio.def @@ -176,6 +176,7 @@ rktio_converter_open rktio_converter_close rktio_convert rktio_convert_in +rktio_convert_reset rktio_locale_recase rktio_recase_utf16 rktio_locale_strcoll diff --git a/racket/src/rktio/rktio.h b/racket/src/rktio/rktio.h index 09003eca38..846f2e8c97 100644 --- a/racket/src/rktio/rktio.h +++ b/racket/src/rktio/rktio.h @@ -1119,6 +1119,9 @@ RKTIO_EXTERN rktio_convert_result_t *rktio_convert_in(rktio_t *rktio, doesn't return a NULL result; instead, `converted` in the result reports the error. */ +RKTIO_EXTERN void rktio_convert_reset(rktio_t *rktio, rktio_converter_t *cvt); +/* Resets a converter to its initial state. */ + RKTIO_EXTERN_NOERR char *rktio_locale_recase(rktio_t *rktio, rktio_bool_t to_up, rktio_const_string_t in); diff --git a/racket/src/rktio/rktio.inc b/racket/src/rktio/rktio.inc index 91774411b4..12814a5918 100644 --- a/racket/src/rktio/rktio.inc +++ b/racket/src/rktio/rktio.inc @@ -176,6 +176,7 @@ Sforeign_symbol("rktio_converter_open", (void *)rktio_converter_open); Sforeign_symbol("rktio_converter_close", (void *)rktio_converter_close); Sforeign_symbol("rktio_convert", (void *)rktio_convert); Sforeign_symbol("rktio_convert_in", (void *)rktio_convert_in); +Sforeign_symbol("rktio_convert_reset", (void *)rktio_convert_reset); Sforeign_symbol("rktio_locale_recase", (void *)rktio_locale_recase); Sforeign_symbol("rktio_recase_utf16", (void *)rktio_recase_utf16); Sforeign_symbol("rktio_locale_strcoll", (void *)rktio_locale_strcoll); diff --git a/racket/src/rktio/rktio.rktl b/racket/src/rktio/rktio.rktl index d4f36f87c9..a27178576a 100644 --- a/racket/src/rktio/rktio.rktl +++ b/racket/src/rktio/rktio.rktl @@ -1261,6 +1261,11 @@ ((*ref char) out) (intptr_t out_start) (intptr_t out_end))) +(define-function + () + void + rktio_convert_reset + (((ref rktio_t) rktio) ((ref rktio_converter_t) cvt))) (define-function () (ref char) diff --git a/racket/src/rktio/rktio_convert.c b/racket/src/rktio/rktio_convert.c index 6b7c37161f..0eb814ae7a 100644 --- a/racket/src/rktio/rktio_convert.c +++ b/racket/src/rktio/rktio_convert.c @@ -507,6 +507,11 @@ rktio_convert_result_t *rktio_convert_in(rktio_t *rktio, return r; } +void rktio_convert_reset(rktio_t *rktio, rktio_converter_t *cvt) +{ + (void)iconv(cvt->cd, NULL, NULL, NULL, NULL); +} + /*============================================================*/ /* Case conversion */ /*============================================================*/