added unicode support to windows console i/o

original commit: e7e638e871ac4b46a84149dda93aae8741683e0a
This commit is contained in:
Neal Alexander 2020-01-09 19:34:58 +01:00 committed by Bob Burger
parent cec5dce677
commit e7bb4def71
4 changed files with 259 additions and 34 deletions

View File

@ -68,6 +68,11 @@ static IBOOL s_ee_init_term(void) {
return init_status; return init_status;
} }
static int utf16_is_surrogate(WORD uc) {
return (uc - 0xd800u) < 2048u;
}
/* returns char, eof, #t (winched), or #f (nothing ready), the latter /* returns char, eof, #t (winched), or #f (nothing ready), the latter
only if blockp is false */ only if blockp is false */
static ptr s_ee_read_char(IBOOL blockp) { static ptr s_ee_read_char(IBOOL blockp) {
@ -104,13 +109,13 @@ static ptr s_ee_read_char(IBOOL blockp) {
tc = get_thread_context(); tc = get_thread_context();
if (DISABLECOUNT(tc) == FIX(0)) { if (DISABLECOUNT(tc) == FIX(0)) {
deactivate_thread(tc); deactivate_thread(tc);
succ = ReadConsoleInput(hStdin, irInBuf, 1, &cNumRead); succ = ReadConsoleInputW(hStdin, irInBuf, 1, &cNumRead);
reactivate_thread(tc); reactivate_thread(tc);
} else { } else {
succ = ReadConsoleInput(hStdin, irInBuf, 1, &cNumRead); succ = ReadConsoleInputW(hStdin, irInBuf, 1, &cNumRead);
} }
#else /* PTHREADS */ #else /* PTHREADS */
succ = ReadConsoleInput(hStdin, irInBuf, 1, &cNumRead); succ = ReadConsoleInputW(hStdin, irInBuf, 1, &cNumRead);
#endif /* PTHREADS */ #endif /* PTHREADS */
@ -125,15 +130,22 @@ static ptr s_ee_read_char(IBOOL blockp) {
KEY_EVENT_RECORD ker = irInBuf[0].Event.KeyEvent; KEY_EVENT_RECORD ker = irInBuf[0].Event.KeyEvent;
rptcnt = ker.wRepeatCount; rptcnt = ker.wRepeatCount;
if (ker.bKeyDown) { if (ker.bKeyDown) {
char c; WCHAR c;
if (c = ker.uChar.AsciiChar) { if (c = ker.uChar.UnicodeChar) {
/* translate ^@ 2) and ^<space> to nul */ /* translate ^@ 2) and ^<space> to nul */
if (c == 0x20 && (ker.dwControlKeyState & (LEFT_CTRL_PRESSED|RIGHT_CTRL_PRESSED))) if (c == 0x20 && (ker.dwControlKeyState
& (LEFT_CTRL_PRESSED|RIGHT_CTRL_PRESSED))) {
buf[0] = 0; buf[0] = 0;
else
buf[0] = c;
buflen = 1; buflen = 1;
} else if (utf16_is_surrogate(c)) {
return Schar('\0');
} else {
return Schar(c);
}
} else { } else {
switch (ker.wVirtualKeyCode) { switch (ker.wVirtualKeyCode) {
case VK_DELETE: case VK_DELETE:
@ -508,11 +520,35 @@ static ptr s_ee_get_clipboard(void) {
ptr x = S_G.null_string; ptr x = S_G.null_string;
if (OpenClipboard((HWND)0)) { if (OpenClipboard((HWND)0)) {
HANDLE h = GetClipboardData(CF_TEXT); HANDLE h = GetClipboardData(CF_UNICODETEXT);
if (h != (HANDLE *)0) { if (h != (HANDLE *)0) {
char *s = (char *)GlobalLock(h); wchar_t *s = (wchar_t *)GlobalLock(h);
if (s != (char *)0) x = Sstring(s);
if (s != NULL) {
int sz8 = WideCharToMultiByte(CP_UTF8,
WC_ERR_INVALID_CHARS,
s, -1,
NULL,
0, NULL, NULL);
if (sz8 > 0) {
unsigned char *buf = (unsigned char*) malloc(sz8);
if (buf != NULL) {
if (WideCharToMultiByte(CP_UTF8,
WC_ERR_INVALID_CHARS,
s, -1,
buf,
sz8, NULL, NULL)) {
x = Sstring_utf8(buf, sz8 - 1);
}
free(buf);
}
}
}
GlobalUnlock(h); GlobalUnlock(h);
} }
CloseClipboard(); CloseClipboard();
@ -521,9 +557,9 @@ static ptr s_ee_get_clipboard(void) {
return x; return x;
} }
static void s_ee_write_char(wchar_t c) { static void s_ee_write_char(wchar_t c) { // TODO: utf-32 chars?
if (c > 255) c = '?'; DWORD n;
putchar(c); WriteConsoleW(hStdout, &c, 1, &n, NULL);
} }
#else /* WIN32 */ #else /* WIN32 */

View File

@ -396,6 +396,14 @@ extern int S_windows_stat64(const char *pathname, struct STATBUF *buffer);
extern int S_windows_system(const char *command); extern int S_windows_system(const char *command);
extern int S_windows_unlink(const char *pathname); extern int S_windows_unlink(const char *pathname);
extern char *S_windows_getcwd(char *buffer, int maxlen); extern char *S_windows_getcwd(char *buffer, int maxlen);
extern int S_windows_stdin_read(unsigned char *buf, int size);
extern int S_windows_stdout_write(unsigned char *buf, int size);
extern int S_windows_stderr_write(unsigned char *buf, int size);
extern int S_windows_console_write_utf8(HANDLE h, unsigned char *b_ptr, int b_n, int *written);
extern int S_windows_console_read_utf8(HANDLE h, unsigned char *output, int want, int *got);
#endif /* WIN32 */ #endif /* WIN32 */
#ifdef FEATURE_EXPEDITOR #ifdef FEATURE_EXPEDITOR

View File

@ -417,6 +417,40 @@ ptr S_close_fd(ptr file, IBOOL gzflag) {
#define IO_SIZE_T size_t #define IO_SIZE_T size_t
#endif /* WIN32 */ #endif /* WIN32 */
int os_read(ptr tc, INT fd, unsigned char *buf, IO_SIZE_T size, int *m) {
#ifdef WIN32
if (fd == 0) {
// There may be a race condition or other problem with this signal mask.
// Originally it was placed deeper down, right next to the win32 API
// call, but this failed to mask the signal for some confusing reason.
if (!SetConsoleCtrlHandler(NULL, TRUE)) {
*m = -1;
return 1;
}
*m = S_windows_stdin_read(buf, size);
if (!SetConsoleCtrlHandler(NULL, FALSE))
return 1;
return 0;
} else
#endif /* WIN32 */
{
int flag = 0;
FD_EINTR_GUARD(*m >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)),
flag,
*m = READ(fd, buf, size));
return flag;
}
}
/* Returns string on error, #!eof on end-of-file and integer-count otherwise */ /* Returns string on error, #!eof on end-of-file and integer-count otherwise */
ptr S_bytevector_read(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) { ptr S_bytevector_read(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
INT saved_errno = 0; INT saved_errno = 0;
@ -433,33 +467,20 @@ ptr S_bytevector_read(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
#endif #endif
LOCKandDEACTIVATE(tc, bv) LOCKandDEACTIVATE(tc, bv)
#ifdef WIN32
if (!gzflag && fd == 0) {
DWORD error_code;
SetConsoleCtrlHandler(NULL, TRUE);
SetLastError(0);
m = _read(0, &BVIT(bv,start), (IO_SIZE_T)count);
error_code = GetLastError();
SetConsoleCtrlHandler(NULL, FALSE);
if (m == 0 && error_code == 0x3e3) {
KEYBOARDINTERRUPTPENDING(tc) = Strue;
SOMETHINGPENDING(tc) = Strue;
}
} else
#endif /* WIN32 */
{ {
if (!gzflag) { if (!gzflag) {
FD_EINTR_GUARD( int len = 0;
m >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), flag, flag = os_read(tc, fd, &BVIT(bv, start), (IO_SIZE_T)count, &len);
m = READ(fd,&BVIT(bv,start),(IO_SIZE_T)count)); m = len;
} else { } else {
GZ_EINTR_GUARD( GZ_EINTR_GUARD(
1, m >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), 1, m >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)),
flag, gzfile, flag, gzfile,
m = S_glzread(gzfile, &BVIT(bv,start), (GZ_IO_SIZE_T)count)); m = S_glzread(gzfile, &BVIT(bv,start), (GZ_IO_SIZE_T)count));
} }
}
saved_errno = errno; saved_errno = errno;
}
REACTIVATEandUNLOCK(tc, bv) REACTIVATEandUNLOCK(tc, bv)
if (Sboolean_value(KEYBOARDINTERRUPTPENDING(tc))) { if (Sboolean_value(KEYBOARDINTERRUPTPENDING(tc))) {
@ -535,6 +556,21 @@ ptr S_bytevector_read_nb(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag)
#endif /* WIN32 */ #endif /* WIN32 */
} }
int os_write(INT fd, unsigned char *buf, IO_SIZE_T size) {
#ifdef WIN32
if (fd == 1)
return S_windows_stdout_write(buf, size);
else if (fd == 2)
return S_windows_stderr_write(buf, size);
#endif
return WRITE(fd, buf, size);
}
ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) { ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
iptr i, s, c; iptr i, s, c;
ptr tc = get_thread_context(); ptr tc = get_thread_context();
@ -552,6 +588,7 @@ ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
/* if we could know that fd is nonblocking, we wouldn't need to deactivate. /* if we could know that fd is nonblocking, we wouldn't need to deactivate.
we could test ioctl, but some other thread could change it before we actually we could test ioctl, but some other thread could change it before we actually
get around to writing. */ get around to writing. */
LOCKandDEACTIVATE(tc, bv) LOCKandDEACTIVATE(tc, bv)
if (gzflag) { if (gzflag) {
/* strangely, gzwrite returns 0 on error */ /* strangely, gzwrite returns 0 on error */
@ -561,7 +598,7 @@ ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
i = S_glzwrite(gzfile, &BVIT(bv,s), (GZ_IO_SIZE_T)cx)); i = S_glzwrite(gzfile, &BVIT(bv,s), (GZ_IO_SIZE_T)cx));
} else { } else {
FD_EINTR_GUARD(i >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), FD_EINTR_GUARD(i >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)),
flag, i = WRITE(fd, &BVIT(bv,s), (IO_SIZE_T)cx)); flag, i = os_write(fd, &BVIT(bv,s), (IO_SIZE_T)cx));
} }
saved_errno = errno; saved_errno = errno;
REACTIVATEandUNLOCK(tc, bv) REACTIVATEandUNLOCK(tc, bv)
@ -616,7 +653,7 @@ ptr S_put_byte(ptr file, INT byte, IBOOL gzflag) {
i = S_glzwrite(gzfile, buf, 1)); i = S_glzwrite(gzfile, buf, 1));
} else { } else {
FD_EINTR_GUARD(i >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), FD_EINTR_GUARD(i >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)),
flag, i = WRITE(fd, buf, 1)); flag, i = os_write(fd, buf, 1));
} }
saved_errno = errno; saved_errno = errno;
REACTIVATE(tc) REACTIVATE(tc)

View File

@ -33,6 +33,7 @@ void S_machine_init() {
Sregister_symbol("(windows)PutRegistry", (void *)s_PutRegistry); Sregister_symbol("(windows)PutRegistry", (void *)s_PutRegistry);
Sregister_symbol("(windows)RemoveRegistry", (void *)s_RemoveRegistry); Sregister_symbol("(windows)RemoveRegistry", (void *)s_RemoveRegistry);
Sregister_symbol("(windows)ErrorString", (void *)s_ErrorString); Sregister_symbol("(windows)ErrorString", (void *)s_ErrorString);
SetConsoleOutputCP(CP_UTF8);
} }
INT S_getpagesize() { INT S_getpagesize() {
@ -495,3 +496,146 @@ char *Sgetenv(const char *name) {
} }
} }
} }
// Console
#define WINDOWS_CONSOLE_STATIC_INPUT_LEN 1024
#define UTF8_MAX_CODEPOINT 4
#define WINDOWS_CONSOLE_ERROR -1
#define WINDOWS_CONSOLE_EOF -2
#define WINDOWS_CONSOLE_SUCCESS 0
static int _windows_console_read_utf8_chunk(HANDLE h, unsigned char *buf,
int size, int *got) {
// 'utf16_buf' should be locked by the caller, and 'buf' must have the
// correct size relative to 'utf16_buf'.
static wchar_t utf16_buf[WINDOWS_CONSOLE_STATIC_INPUT_LEN];
if (h == INVALID_HANDLE_VALUE || got == NULL || buf == NULL)
return WINDOWS_CONSOLE_ERROR;
*got = 0;
if (size < (WINDOWS_CONSOLE_STATIC_INPUT_LEN * UTF8_MAX_CODEPOINT))
return WINDOWS_CONSOLE_ERROR;
DWORD utf16_len = 0;
if (!ReadConsoleW(h, utf16_buf, sizeof utf16_buf, &utf16_len, NULL))
return WINDOWS_CONSOLE_ERROR;
else if (utf16_len == 0)
return WINDOWS_CONSOLE_EOF;
else if (utf16_len > INT_MAX)
return WINDOWS_CONSOLE_ERROR;
int utf8_len = WideCharToMultiByte(CP_UTF8,
WC_ERR_INVALID_CHARS,
utf16_buf, utf16_len,
buf,
size, NULL, NULL);
if (utf8_len <= 0)
return WINDOWS_CONSOLE_ERROR;
*got = utf8_len;
return WINDOWS_CONSOLE_SUCCESS;
}
int S_windows_console_read_utf8(HANDLE h, unsigned char *output, int want,
int *got) {
// Regardless of the size of 'want', the maximum which can be read per call
// is WINDOWS_CONSOLE_STATIC_INPUT_LEN * UTF8_MAX_CODEPOINT
static SRWLOCK lock = SRWLOCK_INIT;
if (h == INVALID_HANDLE_VALUE || got == NULL || output == NULL)
return WINDOWS_CONSOLE_ERROR;
*got = 0;
if (want <= 0)
return WINDOWS_CONSOLE_SUCCESS;
AcquireSRWLockExclusive(&lock);
{
static int surplus = 0;
static unsigned char surplus_buf[WINDOWS_CONSOLE_STATIC_INPUT_LEN
* UTF8_MAX_CODEPOINT];
if (surplus <= 0) {
const int e = _windows_console_read_utf8_chunk(h, surplus_buf,
sizeof surplus_buf,
&surplus);
if (e != WINDOWS_CONSOLE_SUCCESS) {
ReleaseSRWLockExclusive(&lock);
return e;
}
}
const int n = max(0,min(surplus, want));
memcpy(output, surplus_buf, n);
surplus -= n;
*got = n;
if (surplus > 0)
memmove(surplus_buf, surplus_buf + n, surplus);
}
ReleaseSRWLockExclusive(&lock);
return WINDOWS_CONSOLE_SUCCESS;
}
// Having previously set the code page to UTF-8, we can output directly.
int S_windows_stderr_write(unsigned char *buf, int size) {
return _write(2, buf, size);
}
int S_windows_stdout_write(unsigned char *buf, int size) {
return _write(1, buf, size);
}
// Despite having set the code page to UTF-8, we cant use it to read due to
// windows bugs. We fall back to using the UTF-16 api, buffering the stream
// to avoid splitting codepoints.
int S_windows_stdin_read(unsigned char *buf, int size) {
const HANDLE h = GetStdHandle(STD_INPUT_HANDLE);
if (h == INVALID_HANDLE_VALUE || size <= 0)
return WINDOWS_CONSOLE_ERROR;
const int type = GetFileType(h);
switch (type) {
case FILE_TYPE_CHAR: {
int n = 0;
const int ret = S_windows_console_read_utf8(h, buf, size, &n);
switch (ret) {
case WINDOWS_CONSOLE_EOF : return 0;
case WINDOWS_CONSOLE_SUCCESS : return n;
default : return -1;
}
}
case FILE_TYPE_DISK:
case FILE_TYPE_PIPE:
return _read(0, buf, size);
default:
return -1;
}
}