From 6a543f4783f9698172f936806c0a28f4ce9df961 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 21 Jun 2017 10:10:35 -0600 Subject: [PATCH] rktio: add iconv --- racket/src/configure | 67 +- racket/src/racket/configure.ac | 37 +- racket/src/racket/mzconfig.h.in | 3 - racket/src/racket/src/file.c | 36 +- racket/src/racket/src/string.c | 671 ++++---------------- racket/src/rktio/Makefile.in | 4 + racket/src/rktio/configure | 54 +- racket/src/rktio/configure.ac | 22 +- racket/src/rktio/rktio.h | 117 +++- racket/src/rktio/rktio_config.h.in | 6 + racket/src/rktio/rktio_convert.c | 670 +++++++++++++++++++ racket/src/rktio/rktio_envvars.c | 2 +- racket/src/rktio/rktio_error.c | 7 +- racket/src/rktio/rktio_fs.c | 2 +- racket/src/rktio/rktio_main.c | 2 +- racket/src/rktio/rktio_private.h | 10 +- racket/src/rktio/rktio_wide.c | 27 +- racket/src/worksp/librktio/librktio.vcproj | 4 + racket/src/worksp/librktio/librktio.vcxproj | 1 + 19 files changed, 1071 insertions(+), 671 deletions(-) create mode 100644 racket/src/rktio/rktio_convert.c diff --git a/racket/src/configure b/racket/src/configure index 9e1b296e8b..ae08514a4a 100755 --- a/racket/src/configure +++ b/racket/src/configure @@ -5103,32 +5103,8 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $using_gnu_cpp" >&5 $as_echo "$using_gnu_cpp" >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for nl_langinfo (CODESET)" >&5 -$as_echo_n "checking for nl_langinfo (CODESET)... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main () -{ -char *codeset = nl_langinfo (CODESET); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - -$as_echo "#define HAVE_CODESET 1" >>confdefs.h - - have_codeset=yes -else - have_codeset=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $have_codeset" >&5 -$as_echo "$have_codeset" >&6; } - +# Although rktio takes care of iconv, we need to know whether +# to link to it iconv_lib_flag="" if test "${skip_iconv_check}" = "no" ; then if test "${enable_iconv}" = "yes" ; then @@ -5463,40 +5439,8 @@ $as_echo_n "checking $msg... " >&6; } iconv_usage_result="$enable_iconv$iconv_lib_flag" { $as_echo "$as_me:${as_lineno-$LINENO}: result: $iconv_usage_result" >&5 $as_echo "$iconv_usage_result" >&6; } - if test "${enable_iconv}" = "no" ; then - MZOPTIONS="$MZOPTIONS -DMZ_NO_ICONV" - fi fi - msg="for mbsrtowcs" -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $msg" >&5 -$as_echo_n "checking $msg... " >&6; } -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - - #include - #include - int main() { - mbstate_t state; - char *src = "X"; - bzero(&state, sizeof(mbstate_t)); - mbsrtowcs(0, &src, 0, &state); - return 0; - } -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - mbsrtowcs=yes -else - mbsrtowcs=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -if test "$mbsrtowcs" = "no" ; then - MZOPTIONS="$MZOPTIONS -DNO_MBTOWC_FUNCTIONS" -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mbsrtowcs" >&5 -$as_echo "$mbsrtowcs" >&6; } - if test "${check_for_mprotect}" = "yes" ; then msg="for mmap and mprotect" { $as_echo "$as_me:${as_lineno-$LINENO}: checking $msg" >&5 @@ -6892,6 +6836,13 @@ else SUB_CONFIGURE_EXTRAS="$SUB_CONFIGURE_EXTRAS --disable-pthread" fi +# Make sure the --enable-iconv result is propagated: +if test "${enable_iconv}" = "yes" ; then + SUB_CONFIGURE_EXTRAS="$SUB_CONFIGURE_EXTRAS --enable-iconv" +else + SUB_CONFIGURE_EXTRAS="$SUB_CONFIGURE_EXTRAS --disable-iconv" +fi + FOREIGNTARGET= FOREIGN_IF_USED="FOREIGN_NOT_USED" if test -d "${srcdir}/foreign" && test "${enable_foreign}" = "yes" ; then diff --git a/racket/src/racket/configure.ac b/racket/src/racket/configure.ac index d738ff7579..eb2f91c2a5 100644 --- a/racket/src/racket/configure.ac +++ b/racket/src/racket/configure.ac @@ -1068,14 +1068,8 @@ if test "$using_gnu_cpp" = "yes" ; then fi AC_MSG_RESULT($using_gnu_cpp) -AC_MSG_CHECKING([for nl_langinfo (CODESET)]) -AC_TRY_LINK([#include ], - [char *codeset = nl_langinfo (CODESET);], - AC_DEFINE(HAVE_CODESET,1,[Have nl_langinfo (CODESET)]) - have_codeset=yes, - have_codeset=no) -AC_MSG_RESULT($have_codeset) - +# Although rktio takes care of iconv, we need to know whether +# to link to it iconv_lib_flag="" if test "${skip_iconv_check}" = "no" ; then if test "${enable_iconv}" = "yes" ; then @@ -1113,28 +1107,8 @@ if test "${skip_iconv_check}" = "no" ; then AC_MSG_CHECKING($msg) iconv_usage_result="$enable_iconv$iconv_lib_flag" AC_MSG_RESULT($iconv_usage_result) - if test "${enable_iconv}" = "no" ; then - MZOPTIONS="$MZOPTIONS -DMZ_NO_ICONV" - fi fi -[ msg="for mbsrtowcs" ] -AC_MSG_CHECKING($msg) -AC_LINK_IFELSE([AC_LANG_SOURCE([ - #include - #include - int main() { - mbstate_t state; - char *src = "X"; - bzero(&state, sizeof(mbstate_t)); - mbsrtowcs(0, &src, 0, &state); - return 0; - }])], mbsrtowcs=yes, mbsrtowcs=no) -if test "$mbsrtowcs" = "no" ; then - MZOPTIONS="$MZOPTIONS -DNO_MBTOWC_FUNCTIONS" -fi -AC_MSG_RESULT($mbsrtowcs) - if test "${check_for_mprotect}" = "yes" ; then [ msg="for mmap and mprotect" ] AC_MSG_CHECKING($msg) @@ -1825,6 +1799,13 @@ else SUB_CONFIGURE_EXTRAS="$SUB_CONFIGURE_EXTRAS --disable-pthread" fi +# Make sure the --enable-iconv result is propagated: +if test "${enable_iconv}" = "yes" ; then + SUB_CONFIGURE_EXTRAS="$SUB_CONFIGURE_EXTRAS --enable-iconv" +else + SUB_CONFIGURE_EXTRAS="$SUB_CONFIGURE_EXTRAS --disable-iconv" +fi + FOREIGNTARGET= FOREIGN_IF_USED="FOREIGN_NOT_USED" if test -d "${srcdir}/foreign" && test "${enable_foreign}" = "yes" ; then diff --git a/racket/src/racket/mzconfig.h.in b/racket/src/racket/mzconfig.h.in index db3a0bff6c..f60f4ec3c2 100644 --- a/racket/src/racket/mzconfig.h.in +++ b/racket/src/racket/mzconfig.h.in @@ -44,9 +44,6 @@ typedef unsigned long uintptr_t; /* Direction of stack growth: 1 = up, -1 = down, 0 = unknown. */ #undef STACK_DIRECTION -/* Whether nl_langinfo works. */ -#undef HAVE_CODESET - /* Whether __attribute__ ((noinline)) works. */ #undef MZ_USE_NOINLINE diff --git a/racket/src/racket/src/file.c b/racket/src/racket/src/file.c index 7fb71d5120..3408279556 100644 --- a/racket/src/racket/src/file.c +++ b/racket/src/racket/src/file.c @@ -5392,29 +5392,23 @@ void scheme_set_addon_dir(Scheme_Object *p) #ifdef DOS_FILE_SYSTEM -static wchar_t *dlldir; - -wchar_t *scheme_get_dll_path(wchar_t *s) -{ - if (dlldir) { - int len1, len2; - wchar_t *p; - len1 = wc_strlen(dlldir); - len2 = wc_strlen(s); - p = (wchar_t *)scheme_malloc_atomic((len1 + len2 + 2) * sizeof(wchar_t)); - memcpy(p, dlldir, len1 * sizeof(wchar_t)); - if (p[len1 - 1] != '\\') { - p[len1++] = '\\'; - } - memcpy(p + len1, s, (len2 + 1) * sizeof(wchar_t)); - return p; - } else - return s; -} - void scheme_set_dll_path(wchar_t *p) { - dlldir = p; + rktio_set_dll_path(p); +} + +wchar_t *scheme_get_dll_path(wchar_t *p) +{ + wchar_t *r, *r2; + intptr_t len; + r = rktio_get_dll_path(p); + if (!r) + return p; + len = wcslen(r) + 1; + r2 = scheme_malloc_atomic(sizeof(wchar_t) * len); + memcpy(r2, r, sizeof(wchar_t) * len); + free(r); + return r2; } #endif diff --git a/racket/src/racket/src/string.c b/racket/src/racket/src/string.c index 3df4504d19..d6beac752d 100644 --- a/racket/src/racket/src/string.c +++ b/racket/src/racket/src/string.c @@ -33,25 +33,6 @@ #else # include #endif -#ifndef DONT_USE_LOCALE -# include -# ifdef MZ_NO_ICONV -# define USE_ICONV_DLL -# endif -# ifndef USE_ICONV_DLL -# include -# include -# endif -# include -# include -# ifdef MACOS_UNICODE_SUPPORT -# include -# include -# endif -# ifdef WINDOWS_UNICODE_SUPPORT -# include -# endif -#endif #ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH # include "schsys.h" @@ -63,87 +44,6 @@ #include "schustr.inc" -#ifdef USE_ICONV_DLL -typedef intptr_t iconv_t; -typedef int *(*errno_proc_t)(); -typedef size_t (*iconv_proc_t)(iconv_t cd, - char **inbuf, size_t *inbytesleft, - char **outbuf, size_t *outbytesleft); -typedef iconv_t (*iconv_open_proc_t)(const char *tocode, const char *fromcode); -typedef void (*iconv_close_proc_t)(iconv_t cd); -typedef char *(*locale_charset_proc_t)(); -static errno_proc_t iconv_errno; -static iconv_proc_t iconv; -static iconv_open_proc_t iconv_open; -static iconv_close_proc_t iconv_close; -static locale_charset_proc_t locale_charset; /* Not used, currently */ -#define mzCHK_PROC(x) x -static int get_iconv_errno(void) -{ - int *a; - a = iconv_errno(); - return *a; -} -# undef HAVE_CODESET -# define HAVE_CODESET 1 -# define CODESET 0 -# define ICONV_errno get_iconv_errno() -static int iconv_ready = 0; -static void init_iconv() -{ -# ifdef MZ_NO_ICONV -# else - HMODULE m; - m = LoadLibraryW(scheme_get_dll_path(L"iconv.dll")); - if (!m) - m = LoadLibraryW(scheme_get_dll_path(L"libiconv.dll")); - if (!m) - m = LoadLibraryW(scheme_get_dll_path(L"libiconv-2.dll")); - if (!m) - m = LoadLibrary("iconv.dll"); - if (!m) - m = LoadLibrary("libiconv.dll"); - if (!m) - m = LoadLibrary("libiconv-2.dll"); - if (m) { - iconv = (iconv_proc_t)GetProcAddress(m, "libiconv"); - iconv_open = (iconv_open_proc_t)GetProcAddress(m, "libiconv_open"); - iconv_close = (iconv_close_proc_t)GetProcAddress(m, "libiconv_close"); - locale_charset = (locale_charset_proc_t)GetProcAddress(m, "locale_charset"); - /* Make sure we have all of them or none: */ - if (!iconv || !iconv_open || !iconv_close) { - iconv = NULL; - iconv_open = NULL; - iconv_close = NULL; - } - } - if (iconv) { - iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno"); - if (!iconv_errno) { - /* The iconv.dll distributed with Racket links to msvcrt.dll. - It's a slighly dangerous assumption that whatever iconv we - found also uses msvcrt.dll. */ - m = LoadLibrary("msvcrt.dll"); - if (m) { - iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno"); - if (!iconv_errno) { - iconv = NULL; - iconv_open = NULL; - iconv_close = NULL; - } - } - } - } -# endif - iconv_ready = 1; -} -#else -# define ICONV_errno errno -# define iconv_ready 1 -# define mzCHK_PROC(x) 1 -static void init_iconv() { } -#endif - #ifdef MACOS_UNICODE_SUPPORT # define mzLOCALE_IS_UTF_8(s) (!s || !(*s)) #endif @@ -151,7 +51,7 @@ static void init_iconv() { } # define mzLOCALE_IS_UTF_8(s) (!s || !(*s)) #endif #ifndef mzLOCALE_IS_UTF_8 -# define mzLOCALE_IS_UTF_8(s) !mzCHK_PROC(iconv_open) +# define mzLOCALE_IS_UTF_8(s) (!(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED)) #endif #define mzICONV_KIND 0 @@ -163,95 +63,13 @@ typedef struct Scheme_Converter { Scheme_Object so; short closed; short kind; - iconv_t cd; + rktio_converter_t *cd; int permissive; Scheme_Custodian_Reference *mref; } Scheme_Converter; /* locals */ -/* These two locale variables are only valid when reset_locale() - is called after continuation marks (and hence parameterization) - may have changed. Similarly, setlocale() is only up-to-date - when reset_locale() has been called. */ -THREAD_LOCAL_DECL(static int locale_on); -THREAD_LOCAL_DECL(static void *current_locale_name_ptr); -static void reset_locale(void); - -#define current_locale_name ((const mzchar *)current_locale_name_ptr) - -static const mzchar empty_char_string[1] = { 0 }; -static const mzchar xes_char_string[2] = { 0x78787878, 0 }; - -#ifdef USE_ICONV_DLL -static char *nl_langinfo(int which) -{ - int i; - - reset_locale(); - if (!current_locale_name) - current_locale_name_ptr = empty_char_string; - - if ((current_locale_name[0] == 'C') - && !current_locale_name[1]) - return "US-ASCII"; - - for (i = 0; current_locale_name[i]; i++) { - if (current_locale_name[i] == '.') { - if (current_locale_name[i + 1]) { - int len, j; - char *enc; - i++; - len = scheme_char_strlen(current_locale_name) - i; - enc = (char *)scheme_malloc_atomic(2 + len + 1); - - /* Check whether the encoding is numberic, in which case - we add "CP" in front to make it an encoding name */ - for (j = i; current_locale_name[j]; j++) { - if (current_locale_name[j] > 127) - break; - if (!isdigit(current_locale_name[j])) - break; - } - if (!current_locale_name[j]) { - j = 2; - memcpy(enc, "CP", j); - } else { - j = 0; - } - - while (current_locale_name[i]) { - if (current_locale_name[i] > 127) - return "UTF-8"; - enc[j++] = current_locale_name[i++]; - } - enc[j] = 0; - return enc; - } - } - } - - return "UTF-8"; -} -#endif - -#ifdef DONT_USE_LOCALE -# define mz_iconv_nl_langinfo() "" -#else -static char *mz_iconv_nl_langinfo(){ - char *s; -# if HAVE_CODESET - s = nl_langinfo(CODESET); -# else - s = NULL; -# endif - if (!s) - return ""; - else - return s; -} -#endif - static Scheme_Object *make_string (int argc, Scheme_Object *argv[]); static Scheme_Object *string (int argc, Scheme_Object *argv[]); static Scheme_Object *string_p (int argc, Scheme_Object *argv[]); @@ -398,6 +216,19 @@ READ_ONLY Scheme_Object *scheme_byte_string_p_proc; READ_ONLY static int cross_compile_mode; +/* These two locale variables are only valid when reset_locale() + is called after continuation marks (and hence parameterization) + may have changed. Similarly, setlocale() is only up-to-date + when reset_locale() has been called. */ +THREAD_LOCAL_DECL(static int locale_on); +THREAD_LOCAL_DECL(static void *current_locale_name_ptr); +static void reset_locale(void); + +#define current_locale_name ((const mzchar *)current_locale_name_ptr) + +static const mzchar empty_char_string[1] = { 0 }; +static const mzchar xes_char_string[2] = { 0x78787878, 0 }; + void scheme_init_string (Scheme_Env *env) { @@ -1375,9 +1206,9 @@ do_byte_string_to_char_string_locale(const char *who, intptr_t olen; reset_locale(); - if (!iconv_ready) init_iconv(); - if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open)) + if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on + || !(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED)) return do_byte_string_to_char_string(who, bstr, istart, ifinish, perm, 1); if (istart < ifinish) { @@ -1507,9 +1338,9 @@ do_char_string_to_byte_string_locale(const char *who, intptr_t olen; reset_locale(); - if (!iconv_ready) init_iconv(); - if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on || !mzCHK_PROC(iconv_open)) + if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on + || !(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED)) return do_char_string_to_byte_string(cstr, istart, ifinish, 1); if (istart < ifinish) { @@ -2700,101 +2531,34 @@ static Scheme_Object *current_locale(int argc, Scheme_Object *argv[]) static Scheme_Object *locale_string_encoding(int argc, Scheme_Object *argv[]) { + char *enc; + Scheme_Object *s; + reset_locale(); if (mzLOCALE_IS_UTF_8(current_locale_name) || !locale_on) return scheme_make_utf8_string("UTF-8"); - -#if HAVE_CODESET - return scheme_make_utf8_string(nl_langinfo(CODESET)); -#else - /* nl_langinfo doesn't work, so just make up something */ - return scheme_make_utf8_string("UTF-8"); -#endif + + enc = rktio_locale_encoding(scheme_rktio); + s = scheme_make_utf8_string(enc); + free(enc); + + return s; } static Scheme_Object *system_language_country(int argc, Scheme_Object *argv[]) { -#ifdef MACOS_UNICODE_SUPPORT - /* Mac OS X */ - CFLocaleRef l; - CFStringRef s; - int len; - char *r; + char *lc; + Scheme_Object *s; - l = CFLocaleCopyCurrent(); - s = CFLocaleGetIdentifier(l); + lc = rktio_system_language_country(scheme_rktio); + s = scheme_make_utf8_string(lc); + free(lc); - len = CFStringGetLength(s); - r = (char *)scheme_malloc_atomic(len * 6 + 1); - CFStringGetCString(s, r, len * 6 + 1, kCFStringEncodingUTF8); - - CFRelease(l); - - return scheme_make_sized_utf8_string(r, 5); -#else -# ifdef WINDOWS_UNICODE_SUPPORT - /* Windows */ - LCID l; - int llen, clen; - char *lang, *country, *s; - l = GetUserDefaultLCID(); - - llen = GetLocaleInfo(l, LOCALE_SENGLANGUAGE, NULL, 0); - lang = (char *)scheme_malloc_atomic(llen); - GetLocaleInfo(l, LOCALE_SENGLANGUAGE, lang, llen); - if (llen) - llen -= 1; /* drop nul terminator */ - - clen = GetLocaleInfo(l, LOCALE_SENGCOUNTRY, NULL, 0); - country = (char *)scheme_malloc_atomic(clen); - GetLocaleInfo(l, LOCALE_SENGCOUNTRY, country, clen); - if (clen) - clen -= 1; /* drop nul terminator */ - - s = (char *)scheme_malloc_atomic(clen + llen + 1); - memcpy(s, lang, llen); - memcpy(s + 1 + llen, country, clen); - s[llen] = '_'; - - return scheme_make_sized_utf8_string(s, llen + 1 + clen); -# else - /* Unix */ - char *s; - - s = getenv("LC_ALL"); - if (!s) - s = getenv("LC_CTYPE"); - if (!s) - s = getenv("LANG"); - - if (s) { - /* Check that the environment variable has the form - xx_XX[.ENC] */ - if ((s[0] >= 'a') && (s[0] <= 'z') - && (s[1] >= 'a') && (s[1] <= 'z') - && (s[2] == '_') - && (s[3] >= 'A') && (s[3] <= 'Z') - && (s[4] >= 'A') && (s[4] <= 'Z') - && (!s[5] || s[5] == '.')) { - /* Good */ - } else - s = NULL; - } - - if (!s) - s = "en_US"; - - return scheme_make_sized_utf8_string(s, 5); -# endif -#endif + return s; } -#ifndef DONT_USE_LOCALE - -#define ICONV_ARG_CAST /* empty */ - -static char *do_convert(iconv_t cd, - /* if cd == -1 and either from_e or to_e can be NULL, then +static char *do_convert(rktio_converter_t *cd, + /* if !cd and either from_e or to_e can be NULL, then reset_locale() must have been called */ const char *from_e, const char *to_e, /* 1 => UCS-4 -> UTF-8; 2 => UTF-8 -> UCS-4; 0 => other */ @@ -2821,7 +2585,7 @@ static char *do_convert(iconv_t cd, int *status) { int dip, dop, close_it = 0, mz_utf8 = 0; - size_t il, ol, r; + intptr_t il, ol, r; GC_CAN_IGNORE char *ip, *op; /* Defaults: */ @@ -2830,22 +2594,28 @@ static char *do_convert(iconv_t cd, *oilen = 0; *oolen = 0; - if (cd == (iconv_t)-1) { - if (!iconv_ready) init_iconv(); - if (mzCHK_PROC(iconv_open)) { - if (!from_e) - from_e = mz_iconv_nl_langinfo(); - if (!to_e) - to_e = mz_iconv_nl_langinfo(); - cd = iconv_open(to_e, from_e); + if (!cd) { + if (rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED) { + char *tmp_from_e = NULL, *tmp_to_e = NULL; + if (!from_e) { + tmp_from_e = rktio_locale_encoding(scheme_rktio); + from_e = tmp_from_e; + } + if (!to_e) { + tmp_to_e = rktio_locale_encoding(scheme_rktio); + to_e = tmp_to_e; + } + cd = rktio_converter_open(scheme_rktio, to_e, from_e); close_it = 1; + if (tmp_from_e) free(tmp_from_e); + if (tmp_to_e) free(tmp_to_e); } else if (to_from_utf8) { /* Assume UTF-8 */ mz_utf8 = 1; } } - if ((cd == (iconv_t)-1) && !mz_utf8) { + if (!cd && !mz_utf8) { if (out) { while (extra--) { out[extra] = 0; @@ -2916,9 +2686,9 @@ static char *do_convert(iconv_t cd, if ((r == -1) || (r == -2)) { r = (size_t)-1; - icerr = EILSEQ; + icerr = RKTIO_ERROR_CONVERT_BAD_SEQUENCE; } else if (r == -3) { - icerr = E2BIG; + icerr = RKTIO_ERROR_CONVERT_NOT_ENOUGH_SPACE; r = (size_t)-1; } else icerr = 0; @@ -2926,11 +2696,11 @@ static char *do_convert(iconv_t cd, } else { ip = in XFORM_OK_PLUS id + dip; op = out XFORM_OK_PLUS od + dop; - r = iconv(cd, ICONV_ARG_CAST &ip, &il, &op, &ol); + r = rktio_convert(scheme_rktio, cd, &ip, &il, &op, &ol); dip = ip - (in XFORM_OK_PLUS id); dop = op - (out XFORM_OK_PLUS od); ip = op = NULL; - icerr = ICONV_errno; + icerr = rktio_get_last_error(scheme_rktio); } /* Record how many chars processed, now */ @@ -2939,8 +2709,8 @@ static char *do_convert(iconv_t cd, *oolen = dop; /* Got all the chars? */ - if (r == (size_t)-1) { - if (icerr == E2BIG) { + if (r == RKTIO_CONVERT_ERROR) { + if (icerr == RKTIO_ERROR_CONVERT_NOT_ENOUGH_SPACE) { if (grow) { /* Double the string size and try again */ char *naya; @@ -2953,7 +2723,7 @@ static char *do_convert(iconv_t cd, } else { *status = 1; if (close_it) - iconv_close(cd); + rktio_converter_close(scheme_rktio, cd); while (extra--) { out[od + dop + extra] = 0; } @@ -2961,10 +2731,10 @@ static char *do_convert(iconv_t cd, } } else { /* Either EINVAL (premature end) or EILSEQ (bad sequence) */ - if (icerr == EILSEQ) + if (icerr == RKTIO_ERROR_CONVERT_BAD_SEQUENCE) *status = -2; if (close_it) - iconv_close(cd); + rktio_converter_close(scheme_rktio, cd); while (extra--) { out[od + dop + extra] = 0; } @@ -2982,7 +2752,7 @@ static char *do_convert(iconv_t cd, } else { *status = 0; if (close_it) - iconv_close(cd); + rktio_converter_close(scheme_rktio, cd); while (extra--) { out[od + dop + extra] = 0; } @@ -3002,18 +2772,19 @@ static char *string_to_from_locale(int to_bytes, reset_locale() has been called */ { Scheme_Object *parts = scheme_null, *one; - char *c; + char *c, *le; intptr_t clen, used; int status; - iconv_t cd; - - if (!iconv_ready) init_iconv(); + rktio_converter_t *cd; + le = rktio_locale_encoding(scheme_rktio); if (to_bytes) - cd = iconv_open(mz_iconv_nl_langinfo(), MZ_UCS4_NAME); + cd = rktio_converter_open(scheme_rktio, le, MZ_UCS4_NAME); else - cd = iconv_open(MZ_UCS4_NAME, mz_iconv_nl_langinfo()); - if (cd == (iconv_t)-1) { + cd = rktio_converter_open(scheme_rktio, MZ_UCS4_NAME, le); + free(le); + + if (!cd) { *no_cvt = 1; return NULL; } @@ -3032,7 +2803,7 @@ static char *string_to_from_locale(int to_bytes, used >>= 2; if ((perm < 0) && (used < len)) { - iconv_close(cd); + rktio_converter_close(scheme_rktio, cd); return NULL; } @@ -3047,7 +2818,7 @@ static char *string_to_from_locale(int to_bytes, *olen = (clen >> 2); ((mzchar *)c)[*olen] = 0; } - iconv_close(cd); + rktio_converter_close(scheme_rktio, cd); return c; } @@ -3079,7 +2850,7 @@ static char *string_to_from_locale(int to_bytes, } } - iconv_close(cd); + rktio_converter_close(scheme_rktio, cd); if (to_bytes) { parts = append_all_byte_strings_backwards(parts); @@ -3102,106 +2873,20 @@ static char *locale_recase(int to_up, intptr_t *oolen) /* Assumes that reset_locale() has been called */ { -#ifdef NO_MBTOWC_FUNCTIONS - /* No wide-char functions... - The C library's toupper and tolower is supposed to be - locale-sensitive. It can't be right for characters that are - encoded in multiple bytes, but probably it will do the right - thing in common cases. */ - int i; - - /* First, copy "in" to "out" */ - if (iilen + 1 >= iolen) { - out = (char *)scheme_malloc_atomic(iilen + 1); - od = 0; - } - memcpy(out + od, in + id, iilen); - out[od + iilen] = 0; - *oolen = iilen; - - /* Re-case chars in "out" */ - for (i = 0; i < iilen; i++) { - char t; - t = (to_up) ? toupper(out[od+i]) : tolower(out[od+i]); - out[od+i] = t; - } - - return out; -#else - /* To change the case, convert the string to multibyte, re-case the - multibyte, then convert back. */ -# define MZ_WC_BUF_SIZE 32 - GC_CAN_IGNORE mbstate_t state; - size_t wl, ml; - wchar_t *wc, *ws, wcbuf[MZ_WC_BUF_SIZE], cwc; - const char *s; - unsigned int j; - /* The "n" versions are apparently not too standard: */ -# define mz_mbsnrtowcs(t, f, fl, tl, s) mbsrtowcs(t, f, tl, s) -# define mz_wcsnrtombs(t, f, fl, tl, s) wcsrtombs(t, f, tl, s) - - /* ----- to wide char ---- */ - - /* Get length */ - memset(&state, 0, sizeof(mbstate_t)); - s = in XFORM_OK_PLUS id; - wl = mz_mbsnrtowcs(NULL, &s, iilen, 0, &state); - s = NULL; - - /* Allocate space */ - if (wl < MZ_WC_BUF_SIZE) { - wc = wcbuf; + char *s, *s2; + intptr_t len; + s = rktio_locale_recase(scheme_rktio, to_up, in XFORM_OK_PLUS id); + len = strlen(s); + if ((len+1) < iolen) { + memcpy(out XFORM_OK_PLUS od, s, len+1); + s2 = out; } else { - wc = (wchar_t *)scheme_malloc_atomic(sizeof(wchar_t) * (wl + 1)); + s2 = scheme_malloc_atomic(len+1); + memcpy(s2, s, len+1); } - - /* Convert */ - memset(&state, 0, sizeof(mbstate_t)); - s = in XFORM_OK_PLUS id; - (void)mz_mbsnrtowcs(wc, &s, iilen, wl + 1, &state); - s = NULL; - - wc[wl] = 0; /* just in case */ - - /* ---- re-case ---- */ - - if (to_up) { - for (j = 0; j < wl; j++) { - cwc = towupper(wc[j]); - wc[j] = cwc; - } - } else { - for (j = 0; j < wl; j++) { - cwc = towlower(wc[j]); - wc[j] = cwc; - } - } - - /* ---- back to multibyte ---- */ - - /* Measure */ - memset(&state, 0, sizeof(mbstate_t)); - ws = wc; - ml = mz_wcsnrtombs(NULL, (const wchar_t **)&ws, wl, 0, &state); - ws = NULL; - - /* Allocate space */ - *oolen = ml; - if (ml + 1 >= (unsigned int)iolen) { - out = (char *)scheme_malloc_atomic(ml + 1); - od = 0; - } - - /* Convert */ - memset(&state, 0, sizeof(mbstate_t)); - ws = wc; - (void)mz_wcsnrtombs(out + od, (const wchar_t **)&ws, wl, ml + 1, &state); - ws = NULL; - - out[od + ml] = 0; - - return out; -#endif + free(s); + *oolen = len; + return s2; } int mz_locale_strcoll(char *s1, int d1, int l1, char *s2, int d2, int l2, int cvt_case) @@ -3236,13 +2921,13 @@ int mz_locale_strcoll(char *s1, int d1, int l1, char *s2, int d2, int l2, int cv l1 = origl1; l2 = origl2; while (1) { - c1 = do_convert((iconv_t)-1, MZ_UCS4_NAME, NULL, 1, + c1 = do_convert(NULL, MZ_UCS4_NAME, NULL, 1, s1, d1 * 4, 4 * l1, buf1, 0, MZ_SC_BUF_SIZE - 1, 1 /* grow */, 0, 1 /* terminator size */, &used1, &clen1, &status); - c2 = do_convert((iconv_t)-1, MZ_UCS4_NAME, NULL, 1, + c2 = do_convert(NULL, MZ_UCS4_NAME, NULL, 1, s2, d2 * 4, 4 * l2, buf2, 0, MZ_SC_BUF_SIZE - 1, 1 /* grow */, 0, 1 /* terminator size */, @@ -3302,7 +2987,7 @@ int mz_locale_strcoll(char *s1, int d1, int l1, char *s2, int d2, int l2, int cv } /* Collate, finally. */ - status = strcoll(c1, c2); + status = rktio_locale_strcoll(scheme_rktio, c1, c2); /* If one is bigger than the other, we're done. */ if (status) @@ -3341,43 +3026,14 @@ int mz_locale_strcoll(char *s1, int d1, int l1, char *s2, int d2, int l2, int cv } } -#ifdef MACOS_UNICODE_SUPPORT int mz_native_strcoll(char *s1, int d1, int l1, char *s2, int d2, int l2, int cvt_case) /* The s1 and s2 arguments are actually UTF-16. */ { - CFStringRef str1, str2; - CFComparisonResult r; - - str1 = CFStringCreateWithBytes(NULL, (unsigned char *)s1 XFORM_OK_PLUS (d1 * 2), (l1 * 2), - kCFStringEncodingUnicode, FALSE); - str2 = CFStringCreateWithBytes(NULL, (unsigned char *)s2 XFORM_OK_PLUS (d2 * 2), (l2 * 2), - kCFStringEncodingUnicode, FALSE); - - r = CFStringCompare(str1, str2, (kCFCompareLocalized - | (cvt_case ? kCFCompareCaseInsensitive : 0))); - - CFRelease(str1); - CFRelease(str2); - - return (int)r; + return rktio_strcoll_utf16(scheme_rktio, + (rktio_char16_t *)s1 XFORM_OK_PLUS d1, l1, + (rktio_char16_t *)s2 XFORM_OK_PLUS d2, l2, + cvt_case); } -#endif - -#ifdef WINDOWS_UNICODE_SUPPORT -int mz_native_strcoll(char *s1, int d1, int l1, char *s2, int d2, int l2, int cvt_case) - /* The s1 and s2 arguments are actually UTF-16. */ -{ - int r; - - r = CompareStringW(LOCALE_USER_DEFAULT, - ((cvt_case ? NORM_IGNORECASE : 0) - | NORM_IGNOREKANATYPE - | NORM_IGNOREWIDTH), - (wchar_t *)s1 + d1, l1, (wchar_t *)s2 + d2, l2); - - return r - 2; -} -#endif typedef int (*strcoll_proc)(char *s1, int d1, int l1, char *s2, int d2, int l2, int cvt_case); @@ -3387,12 +3043,12 @@ int do_locale_comp(const char *who, const mzchar *us1, intptr_t ul1, const mzcha int v, endres, utf16 = 0; GC_CAN_IGNORE strcoll_proc mz_strcoll = mz_locale_strcoll; -#if defined(MACOS_UNICODE_SUPPORT) || defined(WINDOWS_UNICODE_SUPPORT) - if (current_locale_name && !*current_locale_name) { + if (current_locale_name + && !*current_locale_name + && (rktio_convert_properties(scheme_rktio) & RKTIO_CONVERT_STRCOLL_UTF16)) { utf16 = 1; mz_strcoll = mz_native_strcoll; } -#endif if (utf16) { us1 = (mzchar *)scheme_ucs4_to_utf16(us1, 0, ul1, NULL, 0, &ul1, 1); @@ -3453,7 +3109,6 @@ int do_locale_comp(const char *who, const mzchar *us1, intptr_t ul1, const mzcha return endres; } - mzchar *do_locale_recase(int to_up, mzchar *in, int delta, int len, intptr_t *olen) { Scheme_Object *parts = scheme_null; @@ -3463,7 +3118,7 @@ mzchar *do_locale_recase(int to_up, mzchar *in, int delta, int len, intptr_t *ol while (len) { /* We might have conversion errors... */ - c = do_convert((iconv_t)-1, MZ_UCS4_NAME, NULL, 1, + c = do_convert(NULL, MZ_UCS4_NAME, NULL, 1, (char *)in, 4 * delta, 4 * len, buf, 0, MZ_SC_BUF_SIZE - 1, 1 /* grow */, 0, 1 /* terminator size */, @@ -3480,7 +3135,7 @@ mzchar *do_locale_recase(int to_up, mzchar *in, int delta, int len, intptr_t *ol if (!c) clen = 0; - c = do_convert((iconv_t)-1, NULL, MZ_UCS4_NAME, 2, + c = do_convert(NULL, NULL, MZ_UCS4_NAME, 2, c, 0, clen, NULL, 0, 0, 1 /* grow */, 0, sizeof(mzchar) /* terminator size */, @@ -3514,59 +3169,22 @@ mzchar *do_locale_recase(int to_up, mzchar *in, int delta, int len, intptr_t *ol return SCHEME_CHAR_STR_VAL(parts); } -#ifdef MACOS_UNICODE_SUPPORT mzchar *do_native_recase(int to_up, mzchar *in, int delta, int len, intptr_t *olen) - /* The in argument is actually UTF-16. */ +/* The in argument is actually UTF-16. */ { - CFMutableStringRef mstr; - CFStringRef str; - GC_CAN_IGNORE CFRange rng; - char *result; + rktio_char16_t *s, *s2; + intptr_t ol; - str = CFStringCreateWithBytes(NULL, ((unsigned char *)in) XFORM_OK_PLUS (delta * 2), (len * 2), - kCFStringEncodingUnicode, FALSE); - mstr = CFStringCreateMutableCopy(NULL, 0, str); - CFRelease(str); + s = rktio_recase_utf16(scheme_rktio, to_up, (rktio_char16_t *)in XFORM_OK_PLUS delta, len, &ol); - if (to_up) - CFStringUppercase(mstr, NULL); - else - CFStringLowercase(mstr, NULL); + s2 = scheme_malloc_atomic(sizeof(rktio_char16_t) * (ol+1)); + memcpy(s2, s, sizeof(rktio_char16_t) * (ol+1)); + free(s); + + *olen = ol; - len = CFStringGetLength(mstr); - *olen = len; - - result = (char *)scheme_malloc_atomic((len + 1) * 2); - - rng = CFRangeMake(0, len); - CFStringGetCharacters(mstr, rng, (UniChar *)result); - CFRelease(mstr); - - ((UniChar *)result)[len] = 0; - - return (mzchar *)result; + return (mzchar *)s2; } -#endif - -#ifdef WINDOWS_UNICODE_SUPPORT -mzchar *do_native_recase(int to_up, mzchar *in, int delta, int len, intptr_t *olen) - /* The in argument is actually UTF-16. */ -{ - char *result; - - result = (char *)scheme_malloc_atomic((len + 1) * 2); - memcpy(result, ((char *)in) + (2 * delta), len * 2); - ((wchar_t*)result)[len] = 0; - - if (to_up) - CharUpperBuffW((wchar_t *)result, len); - else - CharLowerBuffW((wchar_t *)result, len); - - *olen = len; - return (mzchar *)result; -} -#endif typedef mzchar *(*recase_proc)(int to_up, mzchar *in, int delta, int len, intptr_t *olen); @@ -3580,19 +3198,19 @@ static Scheme_Object *mz_recase(const char *who, int to_up, mzchar *us, intptr_t reset_locale(); -#if defined(MACOS_UNICODE_SUPPORT) || defined(WINDOWS_UNICODE_SUPPORT) - if (current_locale_name && !*current_locale_name) { + if (current_locale_name + && !*current_locale_name + && (rktio_convert_properties(scheme_rktio) & RKTIO_CONVERT_RECASE_UTF16)) { utf16 = 1; mz_do_recase = do_native_recase; } -#endif if (utf16) { us = (mzchar *)scheme_ucs4_to_utf16(us, 0, ulen, NULL, 0, &ulen, 1); ((short *)us)[ulen] = 0; } - /* If there are nulls in the string, then we have to make multiple + /* If there are nuls in the string, then we have to make multiple calls to mz_do_recase */ i = 0; while (1) { @@ -3634,8 +3252,6 @@ static Scheme_Object *mz_recase(const char *who, int to_up, mzchar *us, intptr_t return append_all_strings_backwards(parts); } -#endif - static Scheme_Object * unicode_recase(const char *who, int to_up, int argc, Scheme_Object *argv[]) { @@ -3678,47 +3294,31 @@ static void reset_locale(void) if (locale_on) { name = SCHEME_CHAR_STR_VAL(v); -#ifndef DONT_USE_LOCALE if ((current_locale_name != name) && (!current_locale_name || mz_char_strcmp("result-locale", current_locale_name, scheme_char_strlen(current_locale_name), name, SCHEME_CHAR_STRLEN_VAL(v), 0, 1))) { - /* We only need CTYPE and COLLATE; two calls seem to be much - faster than one call with ALL */ char *n, buf[32]; n = scheme_utf8_encode_to_buffer(name, SCHEME_CHAR_STRLEN_VAL(v), buf, 32); - if (!setlocale(LC_CTYPE, n)) - setlocale(LC_CTYPE, "C"); - if (!setlocale(LC_COLLATE, n)) - setlocale(LC_COLLATE, "C"); + rktio_set_locale(scheme_rktio, n); } -#endif + current_locale_name_ptr = (void *)name; } } char *scheme_push_c_numeric_locale() { -#ifndef DONT_USE_LOCALE - GC_CAN_IGNORE char *prev; - prev = setlocale(LC_NUMERIC, NULL); - if (!prev || !strcmp(prev, "C")) - return NULL; - else - return setlocale(LC_NUMERIC, "C"); -#endif + return rktio_push_c_numeric_locale(scheme_rktio); } void scheme_pop_c_numeric_locale(char *prev) { -#ifndef DONT_USE_LOCALE - if (prev) - setlocale(LC_NUMERIC, prev); -#endif + rktio_pop_c_numeric_locale(scheme_rktio, prev); } static int find_special_casing(int ch) @@ -4419,14 +4019,12 @@ static int mz_char_strcmp(const char *who, const mzchar *str1, intptr_t l1, cons if (size_shortcut && (l1 != l2)) return 1; -#ifndef DONT_USE_LOCALE if (use_locale) { reset_locale(); if (locale_on) { return do_locale_comp(who, str1, l1, str2, l2, 0); } } -#endif if (l1 > l2) { l1 = l2; @@ -4461,14 +4059,12 @@ static int mz_char_strcmp_ci(const char *who, const mzchar *str1, intptr_t l1, c if (size_shortcut && (l1 != l2)) return 1; -#ifndef DONT_USE_LOCALE if (use_locale) { reset_locale(); if (locale_on) { return do_locale_comp(who, str1, l1, str2, l2, 1); } } -#endif p1 = sp1 = 0; p2 = sp2 = 0; @@ -4559,8 +4155,8 @@ static void close_converter(Scheme_Object *o, void *data) if (!c->closed) { c->closed = 1; if (c->kind == mzICONV_KIND) { - iconv_close(c->cd); - c->cd = (iconv_t)-1; + rktio_converter_close(scheme_rktio, c->cd); + c->cd = NULL; } if (c->mref) { scheme_remove_managed(c->mref, (Scheme_Object *)c); @@ -4572,14 +4168,12 @@ static void close_converter(Scheme_Object *o, void *data) Scheme_Object *scheme_open_converter(const char *from_e, const char *to_e) { Scheme_Converter *c; - iconv_t cd; + rktio_converter_t *cd; int kind; int permissive; int need_regis = 1; Scheme_Custodian_Reference *mref; - if (!iconv_ready) init_iconv(); - if (!*to_e || !*from_e) reset_locale(); @@ -4594,7 +4188,7 @@ Scheme_Object *scheme_open_converter(const char *from_e, const char *to_e) permissive = 0xFFFD; else permissive = 0; - cd = (iconv_t)-1; + cd = NULL; need_regis = (*to_e && *from_e); } else if ((!strcmp(from_e, "platform-UTF-8") || !strcmp(from_e, "platform-UTF-8-permissive")) @@ -4604,30 +4198,37 @@ Scheme_Object *scheme_open_converter(const char *from_e, const char *to_e) permissive = 0xFFFD; else permissive = 0; - cd = (iconv_t)-1; + cd = NULL; need_regis = 0; } else if (!strcmp(from_e, "platform-UTF-16") && !strcmp(to_e, "platform-UTF-8")) { kind = mzUTF16_TO_UTF8_KIND; permissive = 0; - cd = (iconv_t)-1; + cd = NULL; need_regis = 0; } else { - if (!iconv_ready) init_iconv(); + char *tmp_from_e = NULL, *tmp_to_e = NULL; - if (!mzCHK_PROC(iconv_open)) + if (!(rktio_convert_properties(scheme_rktio) & RKTIO_CONVERTER_SUPPORTED)) return scheme_false; if (!*from_e || !*to_e) reset_locale(); - if (!*from_e) - from_e = mz_iconv_nl_langinfo(); - if (!*to_e) - to_e = mz_iconv_nl_langinfo(); - cd = iconv_open(to_e, from_e); + if (!*from_e) { + tmp_from_e = rktio_locale_encoding(scheme_rktio); + from_e = tmp_from_e; + } + if (!*to_e) { + tmp_to_e = rktio_locale_encoding(scheme_rktio); + to_e = tmp_to_e; + } + cd = rktio_converter_open(scheme_rktio, to_e, from_e); - if (cd == (iconv_t)-1) + if (tmp_from_e) free(tmp_from_e); + if (tmp_to_e) free(tmp_to_e); + + if (!cd) return scheme_false; kind = mzICONV_KIND; diff --git a/racket/src/rktio/Makefile.in b/racket/src/rktio/Makefile.in index ec40f7aaf0..1f7b0f0969 100644 --- a/racket/src/rktio/Makefile.in +++ b/racket/src/rktio/Makefile.in @@ -28,6 +28,7 @@ OBJS = rktio_fs.@LTO@ \ rktio_shellex.@LTO@ \ rktio_time.@LTO@ \ rktio_syslog.@LTO@ \ + rktio_convert.@LTO@ \ rktio_error.@LTO@ \ rktio_hash.@LTO@ \ rktio_wide.@LTO@ \ @@ -85,6 +86,9 @@ rktio_time.@LTO@: $(srcdir)/rktio_time.c $(RKTIO_HEADERS) rktio_syslog.@LTO@: $(srcdir)/rktio_syslog.c $(RKTIO_HEADERS) $(CC) $(CFLAGS) -I$(srcdir) -I. -o rktio_syslog.@LTO@ -c $(srcdir)/rktio_syslog.c +rktio_convert.@LTO@: $(srcdir)/rktio_convert.c $(RKTIO_HEADERS) + $(CC) $(CFLAGS) -I$(srcdir) -I. -o rktio_convert.@LTO@ -c $(srcdir)/rktio_convert.c + rktio_error.@LTO@: $(srcdir)/rktio_error.c $(RKTIO_HEADERS) $(CC) $(CFLAGS) -I$(srcdir) -I. -o rktio_error.@LTO@ -c $(srcdir)/rktio_error.c diff --git a/racket/src/rktio/configure b/racket/src/rktio/configure index 6aedd63677..b89bf4d1ab 100755 --- a/racket/src/rktio/configure +++ b/racket/src/rktio/configure @@ -694,6 +694,8 @@ ac_subst_files='' ac_user_opts=' enable_option_checking enable_shared +enable_pthread +enable_iconv ' ac_precious_vars='build_alias host_alias @@ -1317,6 +1319,8 @@ Optional Features: --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --enable-shared create shared libraries (ok, but not recommended) + --enable-pthread link with pthreads (usually auto-enabled if needed) + --enable-iconv use iconv (usually auto-enabled) Some influential environment variables: CC C compiler command @@ -2243,6 +2247,20 @@ if test "${enable_shared+set}" = set; then : enableval=$enable_shared; fi +# Check whether --enable-pthread was given. +if test "${enable_pthread+set}" = set; then : + enableval=$enable_pthread; +fi + +# Check whether --enable-iconv was given. +if test "${enable_iconv+set}" = set; then : + enableval=$enable_iconv; +fi + + +if test "${enable_iconv}" = "" ; then + enable_iconv=yes +fi ###### Autoconfigure ####### @@ -3921,9 +3939,39 @@ $as_echo_n "checking $msg... " >&6; } iconv_usage_result="$enable_iconv$iconv_lib_flag" { $as_echo "$as_me:${as_lineno-$LINENO}: result: $iconv_usage_result" >&5 $as_echo "$iconv_usage_result" >&6; } - if test "${enable_iconv}" = "no" ; then - CFLAGS="$CFLAGS -DRKTIO_NO_ICONV" - fi +fi +if test "${enable_iconv}" = "no" ; then + +$as_echo "#define RKTIO_NO_ICON 1" >>confdefs.h + +fi + +if test "${enable_iconv}" = "yes" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for nl_langinfo (CODESET)" >&5 +$as_echo_n "checking for nl_langinfo (CODESET)... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +char *codeset = nl_langinfo (CODESET); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + +$as_echo "#define RKTIO_HAVE_CODESET 1" >>confdefs.h + + have_codeset=yes +else + have_codeset=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $have_codeset" >&5 +$as_echo "$have_codeset" >&6; } fi msg="for mbsrtowcs" diff --git a/racket/src/rktio/configure.ac b/racket/src/rktio/configure.ac index 97d1884c52..8e4b116c16 100644 --- a/racket/src/rktio/configure.ac +++ b/racket/src/rktio/configure.ac @@ -11,6 +11,12 @@ AC_CONFIG_AUX_DIR(../lt) AC_CANONICAL_SYSTEM AC_ARG_ENABLE(shared, [ --enable-shared create shared libraries (ok, but not recommended)]) +AC_ARG_ENABLE(pthread, [ --enable-pthread link with pthreads (usually auto-enabled if needed)]) +AC_ARG_ENABLE(iconv, [ --enable-iconv use iconv (usually auto-enabled)]) + +if test "${enable_iconv}" = "" ; then + enable_iconv=yes +fi ###### Autoconfigure ####### @@ -156,9 +162,19 @@ if test "${skip_iconv_check}" = "no" ; then AC_MSG_CHECKING($msg) iconv_usage_result="$enable_iconv$iconv_lib_flag" AC_MSG_RESULT($iconv_usage_result) - if test "${enable_iconv}" = "no" ; then - CFLAGS="$CFLAGS -DRKTIO_NO_ICONV" - fi +fi +if test "${enable_iconv}" = "no" ; then + AC_DEFINE(RKTIO_NO_ICON,1,[Do not use iconv]) +fi + +if test "${enable_iconv}" = "yes" ; then + AC_MSG_CHECKING([for nl_langinfo (CODESET)]) + AC_TRY_LINK([#include ], + [char *codeset = nl_langinfo (CODESET);], + AC_DEFINE(RKTIO_HAVE_CODESET,1,[Have nl_langinfo (CODESET)]) + have_codeset=yes, + have_codeset=no) + AC_MSG_RESULT($have_codeset) fi [ msg="for mbsrtowcs" ] diff --git a/racket/src/rktio/rktio.h b/racket/src/rktio/rktio.h index d9bba5edaf..7d8686fb09 100644 --- a/racket/src/rktio/rktio.h +++ b/racket/src/rktio/rktio.h @@ -72,6 +72,9 @@ Thread and signal conventions: # define RKTIO_EXTERN extern #endif +/*************************************************/ +/* Initialization and general datatypes */ + typedef struct rktio_t rktio_t; /* A rktio_t value represents an instance of the Racket I/O system. Almost every `rktio_...` function takes it as the first argument. */ @@ -101,6 +104,24 @@ typedef int rktio_tri_t; typedef int rktio_bool_t; /* 0 or 1. */ +typedef unsigned short rktio_char16_t; +/* A UTF-16 code unit. A `rktio_char16_t *` is meant to be the same as + `wchar_t *` on Windows. */ + +/*************************************************/ +/* DLL paths */ + +RKTIO_EXTERN void rktio_set_dll_path(rktio_char16_t *p); +/* Sets a path to search for loading DLLs, such as `iconv` on Windows. + This function departs from all the usual conventions: the given + path is in wide-character format, it's not copied, and it's not + specific to a `rktio_t` instance. */ + +RKTIO_EXTERN rktio_char16_t *rktio_get_dll_path(rktio_char16_t *p); +/* Combines a path prevously registered with `rktio_set_dll_path` with + the given filename. The result is allocated (as should be + deallocated) as usual. */ + /*************************************************/ /* Reading and writing files */ @@ -878,11 +899,10 @@ rktio_ok_t rktio_shell_execute(rktio_t *rktio, /*************************************************/ /* Path conversion */ -void *rktio_path_to_wide_path(rktio_t *rktio, const char *p); -char *rktio_wide_path_to_path(rktio_t *rktio, const void *wp); +rktio_char16_t *rktio_path_to_wide_path(rktio_t *rktio, const char *p); +char *rktio_wide_path_to_path(rktio_t *rktio, const rktio_char16_t *wp); /* Convert to/from the OS's native path representation. These - functions are useful only on Windows, where each `void *` - is actually a `wchar_t*`. The `rktio_path_to_wide_path` + functions are useful only on Windows. The `rktio_path_to_wide_path` function can fail and report `RKTIO_ERROR_INVALID_PATH`. */ /*************************************************/ @@ -904,6 +924,91 @@ enum { RKTIO_LOG_DEBUG }; +/*************************************************/ +/* Encoding conversion */ + +int rktio_convert_properties(rktio_t *rktio); +/* Returns a combination of the following flags. */ + +#define RKTIO_CONVERTER_SUPPORTED (1 << 0) +#define RKTIO_CONVERT_STRCOLL_UTF16 (1 << 1) +#define RKTIO_CONVERT_RECASE_UTF16 (1 << 2) + +typedef struct rktio_converter_t rktio_converter_t; + +rktio_converter_t *rktio_converter_open(rktio_t *rktio, const char *to_enc, const char *from_enc); +/* Creates an encoding converter. */ + +void rktio_converter_close(rktio_t *rktio, rktio_converter_t *cvt); +/* Destroys an encoding converter. */ + +intptr_t rktio_convert(rktio_t *rktio, + rktio_converter_t *cvt, + char **in, intptr_t *in_left, + char **out, intptr_t *out_left); +/* Converts some bytes, following the icon protocol: each consumed by + increments `*in` and decrements `*in_left`, and each produced by + increments `*out` and decrements `*out_left`. In case of an error, + the result is `RKTIO_CONVERT_ERROR` and the last error is set to + one of `RKTIO_ERROR_CONVERT_NOT_ENOUGH_SPACE`, + `RKTIO_ERROR_CONVERT_BAD_SEQUENCE`, or `RKTIO_ERROR_CONVERT_OTHER` + --- but an error indicates something within `in` or `out`, + and some bytes may have been successfully converted even if an + error is reported. */ + +#define RKTIO_CONVERT_ERROR (-1) + +char *rktio_locale_recase(rktio_t *rktio, + rktio_bool_t to_up, + char *in); +/* Upcases (of `to_up`) or downcases (if `!to_up`) the content of `in` + using the current locale's encoding and case conversion. */ + +rktio_char16_t *rktio_recase_utf16(rktio_t *rktio, + rktio_bool_t to_up, rktio_char16_t *s1, + intptr_t len, intptr_t *olen); +/* Converts the case of a string encoded in UTF-16 for the system's + default locale if the OS provided direct support for it. The + `RKTIO_CONVERT_RECASE_UTF16 property from + `rktio_convert_properties` reports whether this functon will work. + Takes and optionally returns a length (`olen` can be NULL), but the + UTF-16 sequence is expected to have no nuls. */ + +int rktio_locale_strcoll(rktio_t *rktio, char *s1, char *s2); +/* Returns -1 if `s1` is less than `s2` by the current locale's + comparison, positive is `s1` is greater, and 0 if the strings + are equal. */ + +int rktio_strcoll_utf16(rktio_t *rktio, + rktio_char16_t *s1, intptr_t l1, + rktio_char16_t *s2, intptr_t l2, + rktio_bool_t cvt_case); +/* Compares two strings encoded in UTF-16 for the system's default + locale if the OS provided direct support for it. The + `RKTIO_CONVERT_STRCOLL_UTF16 property from + `rktio_convert_properties` reports whether this functon will work. + Takes lengths, but the UTF-16 sequences are expected to have + no include nuls. */ + +char *rktio_locale_encoding(rktio_t *rktio); +/* Returns the name of the current locale's encoding. */ + +void rktio_set_locale(rktio_t *rktio, char *name); +/* Sets the current locale, which affects string comparisons and + conversions. It can also affect the C library's character-property + predicates and number printing/parsing. The empty string + corresponds to the OS's native locale. */ + +char *rktio_push_c_numeric_locale(rktio_t *rktio); +void rktio_pop_c_numeric_locale(rktio_t *rktio, char *prev); +/* Use this pair of funtions to temporarily switch the locale to the C + locale for number parsing and printing. The result of the first + function is deallocated when passed to second function. */ + +char *rktio_system_language_country(rktio_t *rktio); +/* Returns the current system's language in country in a 5-character + format such as "en_US". */ + /*************************************************/ /* Errors */ @@ -948,6 +1053,10 @@ enum { RKTIO_ERROR_TIME_OUT_OF_RANGE, RKTIO_ERROR_NO_SUCH_ENVVAR, RKTIO_ERROR_SHELL_EXECUTE_FAILED, + RKTIO_ERROR_CONVERT_NOT_ENOUGH_SPACE, + RKTIO_ERROR_CONVERT_BAD_SEQUENCE, + RKTIO_ERROR_CONVERT_PREMATURE_END, + RKTIO_ERROR_CONVERT_OTHER, }; RKTIO_EXTERN void rktio_set_last_error(rktio_t *rktio, int kind, int errid); diff --git a/racket/src/rktio/rktio_config.h.in b/racket/src/rktio/rktio_config.h.in index 75adbd25d7..4d410959ef 100644 --- a/racket/src/rktio/rktio_config.h.in +++ b/racket/src/rktio/rktio_config.h.in @@ -46,8 +46,14 @@ typedef unsigned long long rktio_uint64_t; /* Whether getaddrinfo() is available: */ #undef HAVE_GETADDRINFO +/* Whether nl_langinfo works. */ +#undef RKTIO_HAVE_CODESET + /* In case you want to avoid dynamic sizing of `fd_set` arrays: */ #undef RKTIO_STATIC_FDSET_SIZE /* In case you want to use fcntl for file locks */ #undef RKTIO_USE_FCNTL_AND_FORK_FOR_FILE_LOCKS + +/* In case iconv is not available: */ +#undef RKTIO_NO_ICONV diff --git a/racket/src/rktio/rktio_convert.c b/racket/src/rktio/rktio_convert.c new file mode 100644 index 0000000000..f64e0d3fbe --- /dev/null +++ b/racket/src/rktio/rktio_convert.c @@ -0,0 +1,670 @@ +#include "rktio.h" +#include "rktio_private.h" +#include +#include + +#ifdef OS_X +# define MACOS_UNICODE_SUPPORT +#endif + +#include +#include +#if !defined(RKTIO_SYSTEM_WINDOWS) && !defined(RKTIO_NO_ICONV) +# include +# include +#endif +#include +#include +#ifdef MACOS_UNICODE_SUPPORT +# include +# include +#endif + +/*============================================================*/ +/* Using iconv via a DLL */ +/*============================================================*/ + +#ifdef RKTIO_NO_ICONV + +# define HAVE_CODESET 0 +# define ICONV_errno 0 +# define RKTIO_CHK_PROC(x) 0 +# define iconv_ready 1 + +typedef intptr_t iconv_t; +static size_t iconv(iconv_t cd, char **in, size_t *in_left, char **out, size_t *out_left) { return (size_t)-1; } +static iconv_t iconv_open(const char *to, const char *from) { return -1; } +static void iconv_close(iconv_t cd) { } +static void init_iconv() { } + +#elif defined(RKTIO_SYSTEM_WINDOWS) + +static wchar_t *dlldir; + +typedef intptr_t iconv_t; +typedef int *(*errno_proc_t)(); +typedef size_t (*iconv_proc_t)(iconv_t cd, + char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft); +typedef iconv_t (*iconv_open_proc_t)(const char *tocode, const char *fromcode); +typedef void (*iconv_close_proc_t)(iconv_t cd); +typedef char *(*locale_charset_proc_t)(); +static errno_proc_t iconv_errno; +static iconv_proc_t iconv; +static iconv_open_proc_t iconv_open; +static iconv_close_proc_t iconv_close; +static locale_charset_proc_t locale_charset; /* Not used, currently */ + +static int get_iconv_errno(void) +{ + int *a; + a = iconv_errno(); + return *a; +} + +# define HAVE_CODESET 1 +# define CODESET 0 +# define ICONV_errno get_iconv_errno() +# define RKTIO_CHK_PROC(x) x +static int iconv_ready = 0; + +static void init_iconv() +{ + HMODULE m; + wchar_t *p; + + p = rktio_get_dll_path(L"iconv.dll"); + if (p) { + m = LoadLibraryW(p); + free(p); + } else + m = NULL; + + if (!m) { + p = rktio_get_dll_path(L"libiconv.dll"); + if (p) { + m = LoadLibraryW(p); + free(p); + } else + m = NULL; + } + + if (!m) { + p = rktio_get_dll_path(L"libiconv-2.dll"); + if (p) { + m = LoadLibraryW(p); + free(p); + } else + m = NULL; + } + + if (!m) + m = LoadLibraryW(L"iconv.dll"); + if (!m) + m = LoadLibraryW(L"libiconv.dll"); + if (!m) + m = LoadLibraryW(L"libiconv-2.dll"); + + if (m) { + iconv = (iconv_proc_t)GetProcAddress(m, "libiconv"); + iconv_open = (iconv_open_proc_t)GetProcAddress(m, "libiconv_open"); + iconv_close = (iconv_close_proc_t)GetProcAddress(m, "libiconv_close"); + locale_charset = (locale_charset_proc_t)GetProcAddress(m, "locale_charset"); + /* Make sure we have all of them or none: */ + if (!iconv || !iconv_open || !iconv_close) { + iconv = NULL; + iconv_open = NULL; + iconv_close = NULL; + } + } + + if (iconv) { + iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno"); + if (!iconv_errno) { + /* The iconv.dll distributed with Racket links to msvcrt.dll. + It's a slighly dangerous assumption that whatever iconv we + found also uses msvcrt.dll. */ + m = LoadLibraryW(L"msvcrt.dll"); + if (m) { + iconv_errno = (errno_proc_t)GetProcAddress(m, "_errno"); + if (!iconv_errno) { + iconv = NULL; + iconv_open = NULL; + iconv_close = NULL; + } + } + } + } + + iconv_ready = 1; +} + +rktio_char16_t *rktio_get_dll_path(rktio_char16_t *s) +{ + if (dlldir) { + int len1, len2; + wchar_t *p; + len1 = wcslen(dlldir); + len2 = wcslen(s); + p = malloc((len1 + len2 + 2) * sizeof(wchar_t)); + memcpy(p, dlldir, len1 * sizeof(wchar_t)); + if (p[len1 - 1] != '\\') { + p[len1++] = '\\'; + } + memcpy(p + len1, s, (len2 + 1) * sizeof(wchar_t)); + return p; + } else + return NULL; +} + +void rktio_set_dll_path(rktio_char16_t *p) +{ + dlldir = p; +} + +#else + +# ifdef RKTIO_HAVE_CODESET +# define HAVE_CODESET 1 +# else +# define HAVE_CODESET 0 +# endif + +# include +# define ICONV_errno errno +# define iconv_ready 1 +# define RKTIO_CHK_PROC(x) 1 +static void init_iconv() { } + +void rktio_set_dll_path(rktio_char16_t *p) { } +rktio_char16_t *rktio_get_dll_path(rktio_char16_t *s) { return NULL; } + +#endif + +/*============================================================*/ +/* Properties */ +/*============================================================*/ + +int rktio_convert_properties(rktio_t *rktio) +{ + int flags = 0; + + if (!iconv_ready) init_iconv(); + + if (RKTIO_CHK_PROC(iconv_errno)) + flags = RKTIO_CONVERTER_SUPPORTED; + +#if defined(MACOS_UNICODE_SUPPORT) || defined(RKTIO_SYSTEM_WINDOWS) + flags |= (RKTIO_CONVERT_STRCOLL_UTF16 | RKTIO_CONVERT_RECASE_UTF16); +#endif + + return flags; +} + +/*============================================================*/ +/* Current locale */ +/*============================================================*/ + +void rktio_set_locale(rktio_t *rktio, char *name) +{ + /* We only need CTYPE and COLLATE; two calls seem to be much + faster than one call with ALL */ + if (name) { + if (!setlocale(LC_CTYPE, name)) + setlocale(LC_CTYPE, "C"); + if (!setlocale(LC_COLLATE, name)) + setlocale(LC_COLLATE, "C"); + } else { + setlocale(LC_CTYPE, "C"); + setlocale(LC_COLLATE, "C"); + } +} + +char *rktio_push_c_numeric_locale(rktio_t *rktio) +{ + char *prev; + prev = setlocale(LC_NUMERIC, NULL); + if (!prev || !strcmp(prev, "C")) + return NULL; + else + return setlocale(LC_NUMERIC, "C"); +} + +void rktio_pop_c_numeric_locale(rktio_t *rktio, char *prev) +{ + if (prev) + setlocale(LC_NUMERIC, prev); +} + +/*============================================================*/ +/* Current locale's encoding */ +/*============================================================*/ + +#ifdef RKTIO_SYSTEM_WINDOWS + +static char *nl_langinfo_dup() +{ + int i; + char *current_locale_name; + + current_locale_name = setlocale(LC_NUMERIC, NULL); + if (!current_locale_name) + current_locale_name = ""; + + if ((current_locale_name[0] == 'C') + && !current_locale_name[1]) + return MSC_IZE(strdup)("US-ASCII"); + + for (i = 0; current_locale_name[i]; i++) { + if (current_locale_name[i] == '.') { + if (current_locale_name[i + 1]) { + int len, j; + char *enc; + i++; + len = scheme_char_strlen(current_locale_name) - i; + enc = malloc(2 + len + 1); + + /* Check whether the encoding is numeric, in which case + we add "CP" in front to make it an encoding name */ + for (j = i; current_locale_name[j]; j++) { + if (current_locale_name[j] > 127) + break; + if (!isdigit(current_locale_name[j])) + break; + } + if (!current_locale_name[j]) { + j = 2; + memcpy(enc, "CP", j); + } else { + j = 0; + } + + while (current_locale_name[i]) { + if (current_locale_name[i] > 127) { + free(enc); + return MSC_IZE(strdup)("UTF-8"); + } + enc[j++] = current_locale_name[i++]; + } + enc[j] = 0; + return enc; + } + } + } + + return MSC_IZE(strdup)("UTF-8"); +} + +#else + +static char *nl_langinfo_dup() +{ + char *s; +# if HAVE_CODESET + s = nl_langinfo(CODESET); +# else + /* nl_langinfo doesn't work, so just make up something */ + s = "UTF-8"; +# endif + + return MSC_IZE(strdup)(s); +} + +#endif + +char *rktio_locale_encoding(rktio_t *rktio) +{ + return nl_langinfo_dup(); +} + +/*============================================================*/ + +char *rktio_system_language_country(rktio_t *rktio) +{ +#ifdef MACOS_UNICODE_SUPPORT + /* Mac OS */ + CFLocaleRef l; + CFStringRef s; + int len; + char *r; + + l = CFLocaleCopyCurrent(); + s = CFLocaleGetIdentifier(l); + + len = CFStringGetLength(s); + r = malloc(len * 6 + 1); + CFStringGetCString(s, r, len * 6 + 1, kCFStringEncodingUTF8); + + CFRelease(l); + + r[5] = 0; + + return r; +#elif defined(RKTIO_SYSTEM_WINDOWS) + /* Windows */ + LCID l; + int llen, clen; + char *lang, *country, *s; + l = GetUserDefaultLCID(); + + llen = GetLocaleInfo(l, LOCALE_SENGLANGUAGE, NULL, 0); + lang = malloc(llen); + GetLocaleInfo(l, LOCALE_SENGLANGUAGE, lang, llen); + if (llen) + llen -= 1; /* drop nul terminator */ + + clen = GetLocaleInfo(l, LOCALE_SENGCOUNTRY, NULL, 0); + country = malloc(clen); + GetLocaleInfo(l, LOCALE_SENGCOUNTRY, country, clen); + if (clen) + clen -= 1; /* drop nul terminator */ + + s = malloc(clen + llen + 2); + memcpy(s, lang, llen); + memcpy(s + 1 + llen, country, clen); + s[llen] = '_'; + s[clen + llen + 1] = 0; + + free(lang); + free(country); + + return s; +#else + /* Unix */ + char *s; + + s = getenv("LC_ALL"); + if (!s) + s = getenv("LC_CTYPE"); + if (!s) + s = getenv("LANG"); + + if (s) { + /* Check that the environment variable has the form + xx_XX[.ENC] */ + if ((s[0] >= 'a') && (s[0] <= 'z') + && (s[1] >= 'a') && (s[1] <= 'z') + && (s[2] == '_') + && (s[3] >= 'A') && (s[3] <= 'Z') + && (s[4] >= 'A') && (s[4] <= 'Z') + && (!s[5] || s[5] == '.')) { + /* Good */ + } else + s = NULL; + } + + if (!s) + s = "en_US"; + + return MSC_IZE(strdup)(s); +#endif +} + +/*============================================================*/ +/* Converters */ +/*============================================================*/ + +struct rktio_converter_t { + iconv_t cd; +}; + +rktio_converter_t *rktio_converter_open(rktio_t *rktio, const char *to_enc, const char *from_enc) +{ + iconv_t cd; + rktio_converter_t *cvt; + + if (!iconv_ready) init_iconv(); + + cd = iconv_open(to_enc, from_enc); + if (cd == (iconv_t)-1) { + errno = ICONV_errno; + get_posix_error(); + return NULL; + } + + cvt = malloc(sizeof(rktio_converter_t)); + cvt->cd = cd; + return cvt; +} + +void rktio_converter_close(rktio_t *rktio, rktio_converter_t *cvt) +{ + iconv_close(cvt->cd); + free(cvt); +} + +intptr_t rktio_convert(rktio_t *rktio, + rktio_converter_t *cvt, + char **in, intptr_t *in_left, + char **out, intptr_t *out_left) +{ + size_t il = *in_left, ol = *out_left, r; + int icerr; + + r = iconv(cvt->cd, in, &il, out, &ol); + + *in_left = il; + *out_left = ol; + + if (r == (size_t)-1) { + icerr = ICONV_errno; + + if (icerr == E2BIG) + set_racket_error(RKTIO_ERROR_CONVERT_NOT_ENOUGH_SPACE); + else if (icerr == EILSEQ) + set_racket_error(RKTIO_ERROR_CONVERT_BAD_SEQUENCE); + else if (icerr == EINVAL) + set_racket_error(RKTIO_ERROR_CONVERT_PREMATURE_END); + else + set_racket_error(RKTIO_ERROR_CONVERT_OTHER); + return RKTIO_CONVERT_ERROR; + } + + return (intptr_t)r; +} + +/*============================================================*/ +/* Case conversion */ +/*============================================================*/ + +char *rktio_locale_recase(rktio_t *rktio, + rktio_bool_t to_up, + char *in) +{ + char *out; + +#ifdef NO_MBTOWC_FUNCTIONS + /* No wide-char functions... + The C library's toupper and tolower is supposed to be + locale-sensitive. It can't be right for characters that are + encoded in multiple bytes, but probably it will do the right + thing in common cases. */ + intptr_t iilen = strlen(in); + int i; + + /* First, copy "in" to "out" */ + out = malloc(iilen + 1); + memcpy(out, in, iilen); + out[iilen] = 0; + + /* Re-case chars in "out" */ + for (i = 0; i < iilen; i++) { + char t; + t = (to_up) ? toupper(out[i]) : tolower(out[i]); + out[i] = t; + } + + return out; +#else + /* To change the case, convert the string to multibyte, re-case the + multibyte, then convert back. */ +# define RKTIO_WC_BUF_SIZE 32 + mbstate_t state; + size_t wl, ml; + wchar_t *wc, *ws, wcbuf[RKTIO_WC_BUF_SIZE], cwc; + const char *s; + unsigned int j; + /* The "n" versions are apparently not too standard: */ +# define mz_mbsnrtowcs(t, f, fl, tl, s) mbsrtowcs(t, f, tl, s) +# define mz_wcsnrtombs(t, f, fl, tl, s) wcsrtombs(t, f, tl, s) + + /* ----- to wide char ---- */ + + /* Get length */ + memset(&state, 0, sizeof(mbstate_t)); + s = in; + wl = mz_mbsnrtowcs(NULL, &s, iilen, 0, &state); + s = NULL; + + /* Allocate space */ + if (wl < RKTIO_WC_BUF_SIZE) { + wc = wcbuf; + } else { + wc = malloc(sizeof(wchar_t) * (wl + 1)); + } + + /* Convert */ + memset(&state, 0, sizeof(mbstate_t)); + s = in; + (void)mz_mbsnrtowcs(wc, &s, iilen, wl + 1, &state); + s = NULL; + + wc[wl] = 0; /* just in case */ + + /* ---- re-case ---- */ + + if (to_up) { + for (j = 0; j < wl; j++) { + cwc = towupper(wc[j]); + wc[j] = cwc; + } + } else { + for (j = 0; j < wl; j++) { + cwc = towlower(wc[j]); + wc[j] = cwc; + } + } + + /* ---- back to multibyte ---- */ + + /* Measure */ + memset(&state, 0, sizeof(mbstate_t)); + ws = wc; + ml = mz_wcsnrtombs(NULL, (const wchar_t **)&ws, wl, 0, &state); + ws = NULL; + + /* Allocate space */ + out = malloc(ml + 1); + + /* Convert */ + memset(&state, 0, sizeof(mbstate_t)); + ws = wc; + (void)mz_wcsnrtombs(out, (const wchar_t **)&ws, wl, ml + 1, &state); + ws = NULL; + + out[ml] = 0; + + if (wc != wcbuf) free(wc); + + return out; +#endif +} + +rktio_char16_t *rktio_recase_utf16(rktio_t *rktio, rktio_bool_t to_up, rktio_char16_t *s1, intptr_t l1, intptr_t *olen) +{ +#ifdef MACOS_UNICODE_SUPPORT + CFMutableStringRef mstr; + CFStringRef str; + CFRange rng; + rktio_char16_t *result; + intptr_t len; + + str = CFStringCreateWithBytes(NULL, (unsigned char *)s1, (l1 * sizeof(rktio_char16_t)), kCFStringEncodingUnicode, FALSE); + mstr = CFStringCreateMutableCopy(NULL, 0, str); + CFRelease(str); + + if (to_up) + CFStringUppercase(mstr, NULL); + else + CFStringLowercase(mstr, NULL); + + len = CFStringGetLength(mstr); + + result = malloc((len + 1) * sizeof(rktio_char16_t)); + + rng = CFRangeMake(0, len); + CFStringGetCharacters(mstr, rng, (UniChar *)result); + CFRelease(mstr); + + result[len] = 0; + + if (olen) + *olen = len; + + return result; +#elif defined(RKTIO_SYSTEM_WINDOWS) + rktio_char16_t *result; + + result = malloc((l1 + 1) * sizeof(rktio_char16_t)); + memcpy(result, s1, l1 * sizeof(rktio_char16_t)); + result[l1] = 0; + + if (to_up) + CharUpperBuffW((wchar_t *)result, l1); + else + CharLowerBuffW((wchar_t *)result, l1); + + if (olen) + *olen = l1; + + return result; +#else + return NULL; +#endif +} + +/*============================================================*/ +/* Native string comparison */ +/*============================================================*/ + +int rktio_locale_strcoll(rktio_t *rktio, char *s1, char *s2) +{ + return strcoll(s1, s2); +} + +int rktio_strcoll_utf16(rktio_t *rktio, + rktio_char16_t *s1, intptr_t l1, + rktio_char16_t *s2, intptr_t l2, + rktio_bool_t cvt_case) +{ +#ifdef MACOS_UNICODE_SUPPORT + CFStringRef str1, str2; + CFComparisonResult r; + + str1 = CFStringCreateWithBytes(NULL, (unsigned char *)s1, (l1 * sizeof(rktio_char16_t)), + kCFStringEncodingUnicode, FALSE); + str2 = CFStringCreateWithBytes(NULL, (unsigned char *)s2, (l2 * sizeof(rktio_char16_t)), + kCFStringEncodingUnicode, FALSE); + + r = CFStringCompare(str1, str2, (kCFCompareLocalized + | (cvt_case ? kCFCompareCaseInsensitive : 0))); + + CFRelease(str1); + CFRelease(str2); + + return (int)r; +#elif defined(RKTIO_SYSTEM_WINDOWS) + int r; + r = CompareStringW(LOCALE_USER_DEFAULT, + ((cvt_case ? NORM_IGNORECASE : 0) + | NORM_IGNOREKANATYPE + | NORM_IGNOREWIDTH), + (wchar_t *)s1, l1, (wchar_t *)s2, l2); + + return r - 2; +#else + return 0; +#endif +} + + diff --git a/racket/src/rktio/rktio_envvars.c b/racket/src/rktio/rktio_envvars.c index d15bc51304..6e28b2e0c7 100644 --- a/racket/src/rktio/rktio_envvars.c +++ b/racket/src/rktio/rktio_envvars.c @@ -217,7 +217,7 @@ rktio_envvars_t *rktio_envvars(rktio_t *rktio) p = ea[i]; for (j = 0; p[j] && p[j] != '='; j++) { } - envvars->names[i] = MSC_IZE(strndup)(p, j); + envvars->names[i] = rktio_strndup(p, j); envvars->vals[i] = MSC_IZE(strdup)(p+j+1); } diff --git a/racket/src/rktio/rktio_error.c b/racket/src/rktio/rktio_error.c index 7dd1128890..a71c080287 100644 --- a/racket/src/rktio/rktio_error.c +++ b/racket/src/rktio/rktio_error.c @@ -1,8 +1,5 @@ #include "rktio.h" #include "rktio_private.h" -#ifdef RKTIO_SYSTEM_WINDOWS -# include -#endif #include #include @@ -38,6 +35,10 @@ err_str_t err_strs[] = { { RKTIO_ERROR_TIME_OUT_OF_RANGE, "time value out-of-range for date conversion" }, { RKTIO_ERROR_NO_SUCH_ENVVAR, "no value as an environment variable" }, { RKTIO_ERROR_SHELL_EXECUTE_FAILED, "ShellExecute failed" }, + { RKTIO_ERROR_CONVERT_NOT_ENOUGH_SPACE, "encoding conversion needs more output space" }, + { RKTIO_ERROR_CONVERT_BAD_SEQUENCE, "ill-formed input encountered in encoding conversion" }, + { RKTIO_ERROR_CONVERT_PREMATURE_END, "input encoding ended prematurely" }, + { RKTIO_ERROR_CONVERT_OTHER, "encoding conversion encountered an error" }, { 0, NULL } }; diff --git a/racket/src/rktio/rktio_fs.c b/racket/src/rktio/rktio_fs.c index bbb6fc22d8..3795c8c69d 100644 --- a/racket/src/rktio/rktio_fs.c +++ b/racket/src/rktio/rktio_fs.c @@ -1533,7 +1533,7 @@ char *rktio_directory_list_step(rktio_t *rktio, rktio_directory_list_t *dl) continue; # endif - return strndup(e->d_name, nlen); + return rktio_strndup(e->d_name, nlen); } rktio_directory_list_stop(rktio, dl); diff --git a/racket/src/rktio/rktio_main.c b/racket/src/rktio/rktio_main.c index 833bfff905..480ad62c8c 100644 --- a/racket/src/rktio/rktio_main.c +++ b/racket/src/rktio/rktio_main.c @@ -51,7 +51,7 @@ void rktio_destroy(rktio_t *rktio) } /* Useful on Windows to make sure everyone is using the same malloc() - and fre(): */ + and free(): */ void rktio_free(void *p) { free(p); diff --git a/racket/src/rktio/rktio_private.h b/racket/src/rktio/rktio_private.h index a4da11158c..f2798ab7a3 100644 --- a/racket/src/rktio/rktio_private.h +++ b/racket/src/rktio/rktio_private.h @@ -91,13 +91,13 @@ struct rktio_t { wchar_t *wide_buffer; #endif -#ifdef RKTIO_SYSTEM_WINDOWS - HANDLE hEventLog; -#endif - #ifdef RKTIO_USE_FCNTL_AND_FORK_FOR_FILE_LOCKS struct rktio_hash_t *locked_fd_process_map; #endif + +#ifdef RKTIO_SYSTEM_WINDOWS + HANDLE hEventLog; +#endif }; /*========================================================================*/ @@ -323,3 +323,5 @@ void rktio_syslog_clean(rktio_t* rktio); #else # define BIG_OFF_T_IZE(n) n #endif + +char *rktio_strndup(char *s, intptr_t len); diff --git a/racket/src/rktio/rktio_wide.c b/racket/src/rktio/rktio_wide.c index 6350280d9e..a965413836 100644 --- a/racket/src/rktio/rktio_wide.c +++ b/racket/src/rktio/rktio_wide.c @@ -1,5 +1,6 @@ #include "rktio.h" #include "rktio_private.h" +#include #include /* For converting byte strings to and from "wide" strings on Windows. */ @@ -7,14 +8,16 @@ #ifdef RKTIO_SYSTEM_UNIX void rktio_init_wide(rktio_t *rktio) { } -void *rktio_path_to_wide_path(rktio_t *rktio, const char *p) +rktio_char16_t *rktio_path_to_wide_path(rktio_t *rktio, const char *p) { - return strdup(p); + set_racket_error(RKTIO_ERROR_UNSUPPORTED); + return NULL; } -char *rktio_wide_path_to_path(rktio_t *rktio, const void *wp) +char *rktio_wide_path_to_path(rktio_t *rktio, const rktio_char16_t *wp) { - return strdup((char *)wp); + set_racket_error(RKTIO_ERROR_UNSUPPORTED); + return NULL; } #endif @@ -320,14 +323,26 @@ char *rktio_convert_from_wchar(const wchar_t *ws, int free_given) return s; } -void *rktio_path_to_wide_path(rktio_t *rktio, const char *p) +rktio_char16_t *rktio_path_to_wide_path(rktio_t *rktio, const char *p) { return WIDE_PATH_copy(p); } -char *rktio_wide_path_to_path(rktio_t *rktio, const void *wp) +char *rktio_wide_path_to_path(rktio_t *rktio, const rktio_char16_t *wp) { return NARROW_PATH_copy(wp); } #endif + +/*============================================================*/ + +/* The same as strndup(), but sometimes strndup() is missing */ +char *rktio_strndup(char *s, intptr_t len) +{ + char *s2; + s2 = malloc(len + 1); + memcpy(s2, s, len); + s2[len] = 0; + return s2; +} diff --git a/racket/src/worksp/librktio/librktio.vcproj b/racket/src/worksp/librktio/librktio.vcproj index 109f62b7b8..ca0f3cd121 100644 --- a/racket/src/worksp/librktio/librktio.vcproj +++ b/racket/src/worksp/librktio/librktio.vcproj @@ -166,6 +166,10 @@ RelativePath="..\..\rktio\rktio_syslog.c" > + + diff --git a/racket/src/worksp/librktio/librktio.vcxproj b/racket/src/worksp/librktio/librktio.vcxproj index 8a8a7612fa..acb76a18ba 100644 --- a/racket/src/worksp/librktio/librktio.vcxproj +++ b/racket/src/worksp/librktio/librktio.vcxproj @@ -130,6 +130,7 @@ +