cache bytes converter used for locale conversions

On platforms other than Windows and MacOS, locale encoding (inclduing
path <-> string conversion) opened a converter for each separate
operation. That can be slow on some OSes, so cache converters used for
locale conversions.

Relevant to #2781
This commit is contained in:
Matthew Flatt 2019-08-16 11:03:37 +02:00
parent ee9de07744
commit 264ec72790
15 changed files with 235 additions and 25 deletions

View File

@ -12,6 +12,9 @@
bytes-convert
bytes-convert-end)
(module+ reset
(provide bytes-reset-converter))
(struct bytes-converter ([c #:mutable]
[custodian-reference #:mutable]))
@ -229,3 +232,9 @@
(define err (and (= converted RKTIO_CONVERT_ERROR)
(rktio_get_last_error rktio)))
(values in-consumed out-produced err)]))
;; in atomic mode
(define (bytes-reset-converter converter)
(define c (bytes-converter-c converter))
(unless (utf-8-converter? c)
(rktio_convert_reset rktio c)))

View File

@ -0,0 +1,80 @@
#lang racket/base
(require "../host/thread.rkt"
"../host/place-local.rkt"
"../converter/main.rkt"
(submod "../converter/main.rkt" reset)
"ucs-4.rkt")
(provide convert-cache-init!
bytes-open-converter/cached-to
bytes-open-converter/cached-to2
bytes-open-converter/cached-from
bytes-close-converter/cached-to
bytes-close-converter/cached-to2
bytes-close-converter/cached-from)
(struct cache (enc to to2 from)
#:mutable
#:authentic)
(define (new-cache) (cache #f #f #f #f))
(define-place-local local-cache (new-cache))
(define (convert-cache-init!)
(set! local-cache (new-cache)))
(define (cache-clear! get update!)
(define c (get local-cache))
(update! local-cache #f)
(when c
(bytes-close-converter c)))
(define (cache-lookup! enc get update!)
(atomically
(and (equal? enc (cache-enc local-cache))
(let ([c (get local-cache)])
(when c
(update! local-cache #f))
c))))
(define (cache-save! c enc get update!)
(atomically
(unless (equal? enc (cache-enc local-cache))
(cache-clear! cache-to set-cache-to!)
(cache-clear! cache-to2 set-cache-to2!)
(cache-clear! cache-from set-cache-from!)
(set-cache-enc! local-cache enc))
(cond
[(get local-cache)
(bytes-close-converter c)]
[else
(bytes-reset-converter c) ; must be in atomic mode
(update! local-cache c)])))
;; ----------------------------------------
(define (bytes-open-converter/cached-to enc)
(or (cache-lookup! enc cache-to set-cache-to!)
(bytes-open-converter ucs-4-encoding enc)))
(define (bytes-open-converter/cached-to2 enc)
(or (cache-lookup! enc cache-to2 set-cache-to2!)
(bytes-open-converter ucs-4-encoding enc)))
(define (bytes-open-converter/cached-from enc)
(or (cache-lookup! enc cache-from set-cache-from!)
(bytes-open-converter enc "UTF-8")))
(define (bytes-close-converter/cached-to c enc)
(or (cache-save! c enc cache-to set-cache-to!)
(bytes-close-converter c)))
(define (bytes-close-converter/cached-to2 c enc)
(or (cache-save! c enc cache-to2 set-cache-to2!)
(bytes-close-converter c)))
(define (bytes-close-converter/cached-from c enc)
(or (cache-save! c enc cache-from set-cache-from!)
(bytes-close-converter c)))

View File

@ -4,6 +4,7 @@
"../host/rktio.rkt"
"../string/utf-16-encode.rkt"
"../converter/main.rkt"
"cache.rkt"
"parameter.rkt"
"string.rkt"
"recase.rkt"
@ -93,10 +94,11 @@
(define c2 #f)
(define in-bstr1 (string->bytes/ucs-4 s1 0 (string-length s1)))
(define in-bstr2 (string->bytes/ucs-4 s2 0 (string-length s2)))
(define enc (locale-string-encoding))
(dynamic-wind
(lambda ()
(set! c1 (bytes-open-converter ucs-4-encoding (locale-string-encoding)))
(set! c2 (bytes-open-converter ucs-4-encoding (locale-string-encoding))))
(set! c1 (bytes-open-converter/cached-to enc))
(set! c2 (bytes-open-converter/cached-to2 enc)))
(lambda ()
(let loop ([pos1 0] [pos2 0] [end1 (bytes-length in-bstr1)] [end2 (bytes-length in-bstr2)])
(define-values (bstr1 in-used1 status1)
@ -154,5 +156,5 @@
[else
(loop pos1 pos2 (+ pos1 len) (+ pos2 len))])])))
(lambda ()
(bytes-close-converter c1)
(bytes-close-converter c2)))]))
(bytes-close-converter/cached-to c1 enc)
(bytes-close-converter/cached-to2 c2 enc)))]))

View File

@ -6,6 +6,7 @@
"../string/utf-16-encode.rkt"
"../string/utf-16-decode.rkt"
"../converter/main.rkt"
"cache.rkt"
"parameter.rkt"
"string.rkt"
"nul-char.rkt"
@ -64,9 +65,10 @@
;; encoding-error bytes alone.
(define c #f)
(define in-bstr (string->bytes/ucs-4 s 0 (string-length s)))
(define enc (locale-string-encoding))
(dynamic-wind
(lambda ()
(set! c (bytes-open-converter ucs-4-encoding (locale-string-encoding))))
(set! c (bytes-open-converter/cached-to enc)))
(lambda ()
(let loop ([pos 0])
(cond
@ -94,7 +96,7 @@
(apply string-append ls err-s r)
(list* ls err-s r))])])))
(lambda ()
(bytes-close-converter c)))]))
(bytes-close-converter/cached-to c enc)))]))
;; in atomic mode
;; Assumes that the locale is sync'ed

View File

@ -3,6 +3,7 @@
"../string/convert.rkt"
"../string/utf-8-decode.rkt"
"../converter/main.rkt"
"cache.rkt"
"parameter.rkt"
"ucs-4.rkt")
@ -20,9 +21,10 @@
(string->bytes/utf-8 str err-byte start end)]
[else
(define c #f)
(define enc (locale-string-encoding))
(dynamic-wind
(lambda ()
(set! c (bytes-open-converter ucs-4-encoding (locale-string-encoding))))
(set! c (bytes-open-converter/cached-to enc)))
(lambda ()
(define in-bstr (string->bytes/ucs-4 str start end))
(let loop ([pos 0])
@ -51,7 +53,7 @@
(apply bytes-append (cons bstr (cons err-bstr r)))
(cons bstr (cons err-bstr r)))])])))
(lambda ()
(bytes-close-converter c)))]))
(bytes-close-converter/cached-to c enc)))]))
(define/who (bytes->string/locale in-bstr [err-char #f] [start 0] [end (and (bytes? in-bstr)
(bytes-length in-bstr))])
@ -65,9 +67,10 @@
(bytes->string/utf-8 in-bstr err-char start end)]
[else
(define c #f)
(define enc (locale-string-encoding))
(dynamic-wind
(lambda ()
(set! c (bytes-open-converter (locale-string-encoding) "UTF-8")))
(set! c (bytes-open-converter/cached-from enc)))
(lambda ()
(let loop ([pos 0])
(define-values (bstr in-used status)
@ -95,4 +98,4 @@
(bytes->string/utf-8 (apply bytes-append (cons bstr (cons err-bstr r))))
(cons bstr (cons err-bstr r)))])])))
(lambda ()
(bytes-close-converter c)))]))
(bytes-close-converter/cached-from c enc)))]))

View File

@ -30,6 +30,8 @@
place-init)
(only-in "sandman/ltps.rkt"
shared-ltps-place-init!)
(only-in "locale/cache.rkt"
convert-cache-init!)
"port/place.rkt")
(provide (all-from-out "port/main.rkt")
@ -60,6 +62,7 @@
(define (io-place-init! in-fd out-fd err-fd cust plumber)
(rktio-place-init!)
(convert-cache-init!)
(logger-init!)
(shared-ltps-place-init!)
(install-error-value->string-handler!)

View File

@ -321,6 +321,9 @@ typedef struct Thread_Local_Variables {
int num_minor_garbage_collections_;
int locale_on_;
void *current_locale_name_ptr_;
char *cached_locale_encoding_name_;
struct rktio_converter_t *cached_locale_to_converter_;
struct rktio_converter_t *cached_locale_from_converter_;
int gensym_counter_;
struct Scheme_Object *dummy_input_port_;
struct Scheme_Object *dummy_output_port_;
@ -706,6 +709,9 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL;
#define num_minor_garbage_collections XOA (scheme_get_thread_local_variables()->num_minor_garbage_collections_)
#define locale_on XOA (scheme_get_thread_local_variables()->locale_on_)
#define current_locale_name_ptr XOA (scheme_get_thread_local_variables()->current_locale_name_ptr_)
#define cached_locale_encoding_name XOA (scheme_get_thread_local_variables()->cached_locale_encoding_name_)
#define cached_locale_to_converter XOA (scheme_get_thread_local_variables()->cached_locale_to_converter_)
#define cached_locale_from_converter XOA (scheme_get_thread_local_variables()->cached_locale_from_converter_)
#define gensym_counter XOA (scheme_get_thread_local_variables()->gensym_counter_)
#define dummy_input_port XOA (scheme_get_thread_local_variables()->dummy_input_port_)
#define dummy_output_port XOA (scheme_get_thread_local_variables()->dummy_output_port_)

View File

@ -586,6 +586,7 @@ void scheme_place_instance_destroy(int force)
GC_destruct_child_gc();
#endif
scheme_free_all_code();
scheme_clear_locale_cache();
rktio_destroy(scheme_rktio);
}

View File

@ -436,6 +436,7 @@ void scheme_register_network_evts();
void scheme_free_dynamic_extensions(void);
void scheme_free_all_code(void);
void scheme_clear_locale_cache(void);
XFORM_NONGCING int scheme_is_multithreaded(int now);

View File

@ -166,6 +166,8 @@ static char *string_to_from_locale(int to_bytes,
intptr_t *olen, int perm,
int *no_cvt);
static void cache_locale_or_close(int to_bytes, rktio_converter_t *cd, char *le);
#define portable_isspace(x) (((x) < 128) && isspace(x))
ROSYM static Scheme_Object *sys_symbol;
@ -184,6 +186,10 @@ SHARED_OK static Scheme_Object *banner_str;
THREAD_LOCAL_DECL(static Scheme_Object *fs_change_props);
THREAD_LOCAL_DECL(static char *cached_locale_encoding_name);
THREAD_LOCAL_DECL(struct rktio_converter_t *cached_locale_to_converter);
THREAD_LOCAL_DECL(struct rktio_converter_t *cached_locale_from_converter);
READ_ONLY static Scheme_Object *complete_symbol, *continues_symbol, *aborts_symbol, *error_symbol;
READ_ONLY Scheme_Object *scheme_string_p_proc;
@ -2571,6 +2577,18 @@ static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[])
return s;
}
static void do_convert_close(rktio_converter_t *cd, int cache_mode, const char *to_e, const char *from_e)
/* If `cache_mode` is -1, then `to_e` needs to be freed (or cached).
If `cache_mode` is 1, then `from_e` needs to be freed (or cached). */
{
if (cache_mode == -1)
cache_locale_or_close(1, cd, (char *)to_e);
else if (cache_mode == 1)
cache_locale_or_close(0, cd, (char *)from_e);
else if (!cache_mode)
rktio_converter_close(scheme_rktio, cd);
}
static char *do_convert(rktio_converter_t *cd,
/* if !cd and either from_e or to_e can be NULL, then
reset_locale() must have been called */
@ -2598,7 +2616,7 @@ static char *do_convert(rktio_converter_t *cd,
1 for more avail */
int *status)
{
int dip, dop, close_it = 0, mz_utf8 = 0;
int dip, dop, close_it = 0, cache_mode = 0, mz_utf8 = 0;
intptr_t il, ol, r;
GC_CAN_IGNORE char *ip, *op;
@ -2611,6 +2629,12 @@ static char *do_convert(rktio_converter_t *cd,
if (!cd) {
if (rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED) {
char *tmp_from_e = NULL, *tmp_to_e = NULL;
if (!to_e && !strcmp(from_e, MZ_UCS4_NAME))
cache_mode = -1;
else if (!from_e && !strcmp(to_e, MZ_UCS4_NAME))
cache_mode = 1;
if (!from_e) {
tmp_from_e = rktio_locale_encoding(scheme_rktio);
from_e = tmp_from_e;
@ -2619,10 +2643,23 @@ static char *do_convert(rktio_converter_t *cd,
tmp_to_e = rktio_locale_encoding(scheme_rktio);
to_e = tmp_to_e;
}
cd = rktio_converter_open(scheme_rktio, to_e, from_e);
if ((cache_mode == -1)
&& cached_locale_to_converter
&& !strcmp(to_e, cached_locale_encoding_name)) {
cd = cached_locale_to_converter;
cached_locale_to_converter = NULL;
} else if ((cache_mode == 1)
&& cached_locale_from_converter
&& !strcmp(from_e, cached_locale_encoding_name)) {
cd = cached_locale_from_converter;
cached_locale_from_converter = NULL;
} else {
cd = rktio_converter_open(scheme_rktio, to_e, from_e);
}
close_it = 1;
if (tmp_from_e) free(tmp_from_e);
if (tmp_to_e) free(tmp_to_e);
if (tmp_from_e && ((cache_mode != 1) || !cd)) free(tmp_from_e);
if (tmp_to_e && ((cache_mode != -1) || !cd)) free(tmp_to_e);
} else if (to_from_utf8) {
/* Assume UTF-8 */
mz_utf8 = 1;
@ -2737,7 +2774,7 @@ static char *do_convert(rktio_converter_t *cd,
} else {
*status = 1;
if (close_it)
rktio_converter_close(scheme_rktio, cd);
do_convert_close(cd, cache_mode, to_e, from_e);
while (extra--) {
out[od + dop + extra] = 0;
}
@ -2748,7 +2785,7 @@ static char *do_convert(rktio_converter_t *cd,
if (icerr == RKTIO_ERROR_CONVERT_BAD_SEQUENCE)
*status = -2;
if (close_it)
rktio_converter_close(scheme_rktio, cd);
do_convert_close(cd, cache_mode, to_e, from_e);
while (extra--) {
out[od + dop + extra] = 0;
}
@ -2766,7 +2803,7 @@ static char *do_convert(rktio_converter_t *cd,
} else {
*status = 0;
if (close_it)
rktio_converter_close(scheme_rktio, cd);
do_convert_close(cd, cache_mode, to_e, from_e);
while (extra--) {
out[od + dop + extra] = 0;
}
@ -2792,18 +2829,32 @@ static char *string_to_from_locale(int to_bytes,
rktio_converter_t *cd;
le = rktio_locale_encoding(scheme_rktio);
if (to_bytes)
cd = rktio_converter_open(scheme_rktio, le, MZ_UCS4_NAME);
else
cd = rktio_converter_open(scheme_rktio, MZ_UCS4_NAME, le);
free(le);
if (cached_locale_encoding_name
&& !strcmp(le, cached_locale_encoding_name)
&& (to_bytes ? cached_locale_to_converter : cached_locale_from_converter)) {
if (to_bytes) {
cd = cached_locale_to_converter;
cached_locale_to_converter = NULL;
} else {
cd = cached_locale_from_converter;
cached_locale_from_converter = NULL;
}
} else {
if (to_bytes)
cd = rktio_converter_open(scheme_rktio, le, MZ_UCS4_NAME);
else
cd = rktio_converter_open(scheme_rktio, MZ_UCS4_NAME, le);
}
if (!cd) {
free(le);
*no_cvt = 1;
return NULL;
}
*no_cvt = 0;
status = 0;
while (len) {
/* We might have conversion errors... */
c = do_convert(cd, NULL, NULL, 0,
@ -2818,6 +2869,7 @@ static char *string_to_from_locale(int to_bytes,
if ((perm < 0) && (used < len)) {
rktio_converter_close(scheme_rktio, cd);
free(le);
return NULL;
}
@ -2832,7 +2884,7 @@ static char *string_to_from_locale(int to_bytes,
*olen = (clen >> 2);
((mzchar *)c)[*olen] = 0;
}
rktio_converter_close(scheme_rktio, cd);
cache_locale_or_close(to_bytes, cd, le);
return c;
}
@ -2864,7 +2916,7 @@ static char *string_to_from_locale(int to_bytes,
}
}
rktio_converter_close(scheme_rktio, cd);
cache_locale_or_close(to_bytes, cd, le);
if (to_bytes) {
parts = append_all_byte_strings_backwards(parts);
@ -2879,6 +2931,42 @@ static char *string_to_from_locale(int to_bytes,
}
}
void cache_locale_or_close(int to_bytes, rktio_converter_t *cd, char *le)
{
if (to_bytes ? cached_locale_to_converter : cached_locale_from_converter) {
rktio_converter_close(scheme_rktio, cd);
free(le);
} else {
if (!cached_locale_encoding_name || strcmp(le, cached_locale_encoding_name)) {
scheme_clear_locale_cache();
cached_locale_encoding_name = le;
} else
free(le);
rktio_convert_reset(scheme_rktio, cd);
if (to_bytes)
cached_locale_to_converter = cd;
else
cached_locale_from_converter = cd;
}
}
void scheme_clear_locale_cache(void)
{
if (cached_locale_encoding_name) {
if (cached_locale_to_converter) {
rktio_converter_close(scheme_rktio, cached_locale_to_converter);
cached_locale_to_converter = NULL;
}
if (cached_locale_from_converter) {
rktio_converter_close(scheme_rktio, cached_locale_from_converter);
cached_locale_from_converter = NULL;
}
free(cached_locale_encoding_name);
cached_locale_encoding_name = NULL;
}
}
static char *locale_recase(int to_up,
/* in must be null-terminated, iilen doesn't include it */
char *in, int id, int iilen,

View File

@ -176,6 +176,7 @@ rktio_converter_open
rktio_converter_close
rktio_convert
rktio_convert_in
rktio_convert_reset
rktio_locale_recase
rktio_recase_utf16
rktio_locale_strcoll

View File

@ -1119,6 +1119,9 @@ RKTIO_EXTERN rktio_convert_result_t *rktio_convert_in(rktio_t *rktio,
doesn't return a NULL result; instead, `converted` in the result
reports the error. */
RKTIO_EXTERN void rktio_convert_reset(rktio_t *rktio, rktio_converter_t *cvt);
/* Resets a converter to its initial state. */
RKTIO_EXTERN_NOERR char *rktio_locale_recase(rktio_t *rktio,
rktio_bool_t to_up,
rktio_const_string_t in);

View File

@ -176,6 +176,7 @@ Sforeign_symbol("rktio_converter_open", (void *)rktio_converter_open);
Sforeign_symbol("rktio_converter_close", (void *)rktio_converter_close);
Sforeign_symbol("rktio_convert", (void *)rktio_convert);
Sforeign_symbol("rktio_convert_in", (void *)rktio_convert_in);
Sforeign_symbol("rktio_convert_reset", (void *)rktio_convert_reset);
Sforeign_symbol("rktio_locale_recase", (void *)rktio_locale_recase);
Sforeign_symbol("rktio_recase_utf16", (void *)rktio_recase_utf16);
Sforeign_symbol("rktio_locale_strcoll", (void *)rktio_locale_strcoll);

View File

@ -1261,6 +1261,11 @@
((*ref char) out)
(intptr_t out_start)
(intptr_t out_end)))
(define-function
()
void
rktio_convert_reset
(((ref rktio_t) rktio) ((ref rktio_converter_t) cvt)))
(define-function
()
(ref char)

View File

@ -507,6 +507,11 @@ rktio_convert_result_t *rktio_convert_in(rktio_t *rktio,
return r;
}
void rktio_convert_reset(rktio_t *rktio, rktio_converter_t *cvt)
{
(void)iconv(cvt->cd, NULL, NULL, NULL, NULL);
}
/*============================================================*/
/* Case conversion */
/*============================================================*/