added unicode support to windows console i/o

original commit: e7e638e871ac4b46a84149dda93aae8741683e0a
This commit is contained in:
Neal Alexander 2020-01-09 19:34:58 +01:00 committed by Bob Burger
parent cec5dce677
commit e7bb4def71
4 changed files with 259 additions and 34 deletions

View File

@ -68,6 +68,11 @@ static IBOOL s_ee_init_term(void) {
return init_status;
}
static int utf16_is_surrogate(WORD uc) {
return (uc - 0xd800u) < 2048u;
}
/* returns char, eof, #t (winched), or #f (nothing ready), the latter
only if blockp is false */
static ptr s_ee_read_char(IBOOL blockp) {
@ -104,13 +109,13 @@ static ptr s_ee_read_char(IBOOL blockp) {
tc = get_thread_context();
if (DISABLECOUNT(tc) == FIX(0)) {
deactivate_thread(tc);
succ = ReadConsoleInput(hStdin, irInBuf, 1, &cNumRead);
succ = ReadConsoleInputW(hStdin, irInBuf, 1, &cNumRead);
reactivate_thread(tc);
} else {
succ = ReadConsoleInput(hStdin, irInBuf, 1, &cNumRead);
succ = ReadConsoleInputW(hStdin, irInBuf, 1, &cNumRead);
}
#else /* PTHREADS */
succ = ReadConsoleInput(hStdin, irInBuf, 1, &cNumRead);
succ = ReadConsoleInputW(hStdin, irInBuf, 1, &cNumRead);
#endif /* PTHREADS */
@ -125,15 +130,22 @@ static ptr s_ee_read_char(IBOOL blockp) {
KEY_EVENT_RECORD ker = irInBuf[0].Event.KeyEvent;
rptcnt = ker.wRepeatCount;
if (ker.bKeyDown) {
char c;
WCHAR c;
if (c = ker.uChar.AsciiChar) {
if (c = ker.uChar.UnicodeChar) {
/* translate ^@ 2) and ^<space> to nul */
if (c == 0x20 && (ker.dwControlKeyState & (LEFT_CTRL_PRESSED|RIGHT_CTRL_PRESSED)))
if (c == 0x20 && (ker.dwControlKeyState
& (LEFT_CTRL_PRESSED|RIGHT_CTRL_PRESSED))) {
buf[0] = 0;
else
buf[0] = c;
buflen = 1;
} else if (utf16_is_surrogate(c)) {
return Schar('\0');
} else {
return Schar(c);
}
} else {
switch (ker.wVirtualKeyCode) {
case VK_DELETE:
@ -508,11 +520,35 @@ static ptr s_ee_get_clipboard(void) {
ptr x = S_G.null_string;
if (OpenClipboard((HWND)0)) {
HANDLE h = GetClipboardData(CF_TEXT);
HANDLE h = GetClipboardData(CF_UNICODETEXT);
if (h != (HANDLE *)0) {
char *s = (char *)GlobalLock(h);
if (s != (char *)0) x = Sstring(s);
wchar_t *s = (wchar_t *)GlobalLock(h);
if (s != NULL) {
int sz8 = WideCharToMultiByte(CP_UTF8,
WC_ERR_INVALID_CHARS,
s, -1,
NULL,
0, NULL, NULL);
if (sz8 > 0) {
unsigned char *buf = (unsigned char*) malloc(sz8);
if (buf != NULL) {
if (WideCharToMultiByte(CP_UTF8,
WC_ERR_INVALID_CHARS,
s, -1,
buf,
sz8, NULL, NULL)) {
x = Sstring_utf8(buf, sz8 - 1);
}
free(buf);
}
}
}
GlobalUnlock(h);
}
CloseClipboard();
@ -521,9 +557,9 @@ static ptr s_ee_get_clipboard(void) {
return x;
}
static void s_ee_write_char(wchar_t c) {
if (c > 255) c = '?';
putchar(c);
static void s_ee_write_char(wchar_t c) { // TODO: utf-32 chars?
DWORD n;
WriteConsoleW(hStdout, &c, 1, &n, NULL);
}
#else /* WIN32 */

View File

@ -396,6 +396,14 @@ extern int S_windows_stat64(const char *pathname, struct STATBUF *buffer);
extern int S_windows_system(const char *command);
extern int S_windows_unlink(const char *pathname);
extern char *S_windows_getcwd(char *buffer, int maxlen);
extern int S_windows_stdin_read(unsigned char *buf, int size);
extern int S_windows_stdout_write(unsigned char *buf, int size);
extern int S_windows_stderr_write(unsigned char *buf, int size);
extern int S_windows_console_write_utf8(HANDLE h, unsigned char *b_ptr, int b_n, int *written);
extern int S_windows_console_read_utf8(HANDLE h, unsigned char *output, int want, int *got);
#endif /* WIN32 */
#ifdef FEATURE_EXPEDITOR

View File

@ -417,6 +417,40 @@ ptr S_close_fd(ptr file, IBOOL gzflag) {
#define IO_SIZE_T size_t
#endif /* WIN32 */
int os_read(ptr tc, INT fd, unsigned char *buf, IO_SIZE_T size, int *m) {
#ifdef WIN32
if (fd == 0) {
// There may be a race condition or other problem with this signal mask.
// Originally it was placed deeper down, right next to the win32 API
// call, but this failed to mask the signal for some confusing reason.
if (!SetConsoleCtrlHandler(NULL, TRUE)) {
*m = -1;
return 1;
}
*m = S_windows_stdin_read(buf, size);
if (!SetConsoleCtrlHandler(NULL, FALSE))
return 1;
return 0;
} else
#endif /* WIN32 */
{
int flag = 0;
FD_EINTR_GUARD(*m >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)),
flag,
*m = READ(fd, buf, size));
return flag;
}
}
/* Returns string on error, #!eof on end-of-file and integer-count otherwise */
ptr S_bytevector_read(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
INT saved_errno = 0;
@ -433,33 +467,20 @@ ptr S_bytevector_read(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
#endif
LOCKandDEACTIVATE(tc, bv)
#ifdef WIN32
if (!gzflag && fd == 0) {
DWORD error_code;
SetConsoleCtrlHandler(NULL, TRUE);
SetLastError(0);
m = _read(0, &BVIT(bv,start), (IO_SIZE_T)count);
error_code = GetLastError();
SetConsoleCtrlHandler(NULL, FALSE);
if (m == 0 && error_code == 0x3e3) {
KEYBOARDINTERRUPTPENDING(tc) = Strue;
SOMETHINGPENDING(tc) = Strue;
}
} else
#endif /* WIN32 */
{
if (!gzflag) {
FD_EINTR_GUARD(
m >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)), flag,
m = READ(fd,&BVIT(bv,start),(IO_SIZE_T)count));
int len = 0;
flag = os_read(tc, fd, &BVIT(bv, start), (IO_SIZE_T)count, &len);
m = len;
} else {
GZ_EINTR_GUARD(
1, m >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)),
flag, gzfile,
m = S_glzread(gzfile, &BVIT(bv,start), (GZ_IO_SIZE_T)count));
}
}
saved_errno = errno;
}
REACTIVATEandUNLOCK(tc, bv)
if (Sboolean_value(KEYBOARDINTERRUPTPENDING(tc))) {
@ -535,6 +556,21 @@ ptr S_bytevector_read_nb(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag)
#endif /* WIN32 */
}
int os_write(INT fd, unsigned char *buf, IO_SIZE_T size) {
#ifdef WIN32
if (fd == 1)
return S_windows_stdout_write(buf, size);
else if (fd == 2)
return S_windows_stderr_write(buf, size);
#endif
return WRITE(fd, buf, size);
}
ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
iptr i, s, c;
ptr tc = get_thread_context();
@ -552,6 +588,7 @@ ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
/* if we could know that fd is nonblocking, we wouldn't need to deactivate.
we could test ioctl, but some other thread could change it before we actually
get around to writing. */
LOCKandDEACTIVATE(tc, bv)
if (gzflag) {
/* strangely, gzwrite returns 0 on error */
@ -561,7 +598,7 @@ ptr S_bytevector_write(ptr file, ptr bv, iptr start, iptr count, IBOOL gzflag) {
i = S_glzwrite(gzfile, &BVIT(bv,s), (GZ_IO_SIZE_T)cx));
} else {
FD_EINTR_GUARD(i >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)),
flag, i = WRITE(fd, &BVIT(bv,s), (IO_SIZE_T)cx));
flag, i = os_write(fd, &BVIT(bv,s), (IO_SIZE_T)cx));
}
saved_errno = errno;
REACTIVATEandUNLOCK(tc, bv)
@ -616,7 +653,7 @@ ptr S_put_byte(ptr file, INT byte, IBOOL gzflag) {
i = S_glzwrite(gzfile, buf, 1));
} else {
FD_EINTR_GUARD(i >= 0 || Sboolean_value(KEYBOARDINTERRUPTPENDING(tc)),
flag, i = WRITE(fd, buf, 1));
flag, i = os_write(fd, buf, 1));
}
saved_errno = errno;
REACTIVATE(tc)

View File

@ -33,6 +33,7 @@ void S_machine_init() {
Sregister_symbol("(windows)PutRegistry", (void *)s_PutRegistry);
Sregister_symbol("(windows)RemoveRegistry", (void *)s_RemoveRegistry);
Sregister_symbol("(windows)ErrorString", (void *)s_ErrorString);
SetConsoleOutputCP(CP_UTF8);
}
INT S_getpagesize() {
@ -495,3 +496,146 @@ char *Sgetenv(const char *name) {
}
}
}
// Console
#define WINDOWS_CONSOLE_STATIC_INPUT_LEN 1024
#define UTF8_MAX_CODEPOINT 4
#define WINDOWS_CONSOLE_ERROR -1
#define WINDOWS_CONSOLE_EOF -2
#define WINDOWS_CONSOLE_SUCCESS 0
static int _windows_console_read_utf8_chunk(HANDLE h, unsigned char *buf,
int size, int *got) {
// 'utf16_buf' should be locked by the caller, and 'buf' must have the
// correct size relative to 'utf16_buf'.
static wchar_t utf16_buf[WINDOWS_CONSOLE_STATIC_INPUT_LEN];
if (h == INVALID_HANDLE_VALUE || got == NULL || buf == NULL)
return WINDOWS_CONSOLE_ERROR;
*got = 0;
if (size < (WINDOWS_CONSOLE_STATIC_INPUT_LEN * UTF8_MAX_CODEPOINT))
return WINDOWS_CONSOLE_ERROR;
DWORD utf16_len = 0;
if (!ReadConsoleW(h, utf16_buf, sizeof utf16_buf, &utf16_len, NULL))
return WINDOWS_CONSOLE_ERROR;
else if (utf16_len == 0)
return WINDOWS_CONSOLE_EOF;
else if (utf16_len > INT_MAX)
return WINDOWS_CONSOLE_ERROR;
int utf8_len = WideCharToMultiByte(CP_UTF8,
WC_ERR_INVALID_CHARS,
utf16_buf, utf16_len,
buf,
size, NULL, NULL);
if (utf8_len <= 0)
return WINDOWS_CONSOLE_ERROR;
*got = utf8_len;
return WINDOWS_CONSOLE_SUCCESS;
}
int S_windows_console_read_utf8(HANDLE h, unsigned char *output, int want,
int *got) {
// Regardless of the size of 'want', the maximum which can be read per call
// is WINDOWS_CONSOLE_STATIC_INPUT_LEN * UTF8_MAX_CODEPOINT
static SRWLOCK lock = SRWLOCK_INIT;
if (h == INVALID_HANDLE_VALUE || got == NULL || output == NULL)
return WINDOWS_CONSOLE_ERROR;
*got = 0;
if (want <= 0)
return WINDOWS_CONSOLE_SUCCESS;
AcquireSRWLockExclusive(&lock);
{
static int surplus = 0;
static unsigned char surplus_buf[WINDOWS_CONSOLE_STATIC_INPUT_LEN
* UTF8_MAX_CODEPOINT];
if (surplus <= 0) {
const int e = _windows_console_read_utf8_chunk(h, surplus_buf,
sizeof surplus_buf,
&surplus);
if (e != WINDOWS_CONSOLE_SUCCESS) {
ReleaseSRWLockExclusive(&lock);
return e;
}
}
const int n = max(0,min(surplus, want));
memcpy(output, surplus_buf, n);
surplus -= n;
*got = n;
if (surplus > 0)
memmove(surplus_buf, surplus_buf + n, surplus);
}
ReleaseSRWLockExclusive(&lock);
return WINDOWS_CONSOLE_SUCCESS;
}
// Having previously set the code page to UTF-8, we can output directly.
int S_windows_stderr_write(unsigned char *buf, int size) {
return _write(2, buf, size);
}
int S_windows_stdout_write(unsigned char *buf, int size) {
return _write(1, buf, size);
}
// Despite having set the code page to UTF-8, we cant use it to read due to
// windows bugs. We fall back to using the UTF-16 api, buffering the stream
// to avoid splitting codepoints.
int S_windows_stdin_read(unsigned char *buf, int size) {
const HANDLE h = GetStdHandle(STD_INPUT_HANDLE);
if (h == INVALID_HANDLE_VALUE || size <= 0)
return WINDOWS_CONSOLE_ERROR;
const int type = GetFileType(h);
switch (type) {
case FILE_TYPE_CHAR: {
int n = 0;
const int ret = S_windows_console_read_utf8(h, buf, size, &n);
switch (ret) {
case WINDOWS_CONSOLE_EOF : return 0;
case WINDOWS_CONSOLE_SUCCESS : return n;
default : return -1;
}
}
case FILE_TYPE_DISK:
case FILE_TYPE_PIPE:
return _read(0, buf, size);
default:
return -1;
}
}