Windows: Unicode I/O for console ports
Use WriteConsoleW and ReadConsoleW to support Unicode input and output for a console independent of it's code page. Closes #2087
This commit is contained in:
parent
4d04a77cb3
commit
f5f4e6cf03
60
pkgs/racket-test-core/tests/racket/draw-box.rkt
Normal file
60
pkgs/racket-test-core/tests/racket/draw-box.rkt
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
#lang racket/base
|
||||||
|
|
||||||
|
;; This test is intended for checking Unicode output to a Windows
|
||||||
|
;; console --- because that requires yet another decoding/encoding
|
||||||
|
;; layer within rktio to use WriteConsoleW --- so its output is
|
||||||
|
;; intended to be inspected directly.
|
||||||
|
|
||||||
|
(define H 100)
|
||||||
|
(define W 40)
|
||||||
|
|
||||||
|
(define MAX-FLUSH-SPAN 10)
|
||||||
|
|
||||||
|
(define (go content-char)
|
||||||
|
|
||||||
|
(define tl #\u2554) (define h #\u2550) (define tr #\u2557)
|
||||||
|
(define v #\u2551)
|
||||||
|
(define bl #\u255A) (define br #\u255D)
|
||||||
|
|
||||||
|
(define (draw)
|
||||||
|
(display (string tl))
|
||||||
|
(display (make-string (- W 2) h))
|
||||||
|
(display (string tr))
|
||||||
|
(newline)
|
||||||
|
|
||||||
|
(for ([i (- H 2)])
|
||||||
|
(display v)
|
||||||
|
(display (make-string (- W 2) content-char))
|
||||||
|
(display v)
|
||||||
|
(newline))
|
||||||
|
|
||||||
|
(display (string bl))
|
||||||
|
(display (make-string (- W 2) h))
|
||||||
|
(display (string br))
|
||||||
|
(newline))
|
||||||
|
|
||||||
|
;; ----------------------------------------
|
||||||
|
;; Direct to console:
|
||||||
|
(draw)
|
||||||
|
|
||||||
|
;; ----------------------------------------
|
||||||
|
;; Write UTF-8 byte string piecewise (to check that
|
||||||
|
;; incomplete encodings are buffered)
|
||||||
|
|
||||||
|
(define o (open-output-bytes))
|
||||||
|
(parameterize ([current-output-port o])
|
||||||
|
(draw))
|
||||||
|
|
||||||
|
(define bstr (get-output-bytes o))
|
||||||
|
(let loop ([i 0])
|
||||||
|
(unless (= i (bytes-length bstr))
|
||||||
|
(define span (min (add1 (random MAX-FLUSH-SPAN))
|
||||||
|
(- (bytes-length bstr) i)))
|
||||||
|
(write-bytes bstr (current-output-port) i (+ i span))
|
||||||
|
(flush-output)
|
||||||
|
(loop (+ i span)))))
|
||||||
|
|
||||||
|
(go #\uA9) ; copyright (2 bytes in UTF-8)
|
||||||
|
(go #\u3BB) ; lambda (3 bytes in UTF-8)
|
||||||
|
(go #\U24b6) ; white-circle "A"
|
||||||
|
(go #\U1F150) ; black-circle "A" (2 bytes in UTF-8; surrogate pair in UTF-16)
|
|
@ -43,6 +43,8 @@ struct rktio_fd_t {
|
||||||
char *buffer; /* shared with reading thread */
|
char *buffer; /* shared with reading thread */
|
||||||
int has_pending_byte; /* for text-mode input, may be dropped by a following lf */
|
int has_pending_byte; /* for text-mode input, may be dropped by a following lf */
|
||||||
int pending_byte; /* for text-mode input, either a CR waiting to decode, or byte that didn't fit */
|
int pending_byte; /* for text-mode input, either a CR waiting to decode, or byte that didn't fit */
|
||||||
|
int leftover_len; /* for bytes that should be written, but that form a UTF-8 encoding prefix */
|
||||||
|
char leftover[6];
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -129,6 +131,11 @@ static intptr_t adjust_input_text_for_pending_cr(rktio_fd_t *rfd, char *buffer,
|
||||||
static const char *adjust_output_text(const char *buffer, intptr_t *towrite);
|
static const char *adjust_output_text(const char *buffer, intptr_t *towrite);
|
||||||
static intptr_t recount_output_text(const char *orig_buffer, const char *buffer, intptr_t wrote);
|
static intptr_t recount_output_text(const char *orig_buffer, const char *buffer, intptr_t wrote);
|
||||||
|
|
||||||
|
static wchar_t *convert_output_wtext(const char *buffer, intptr_t *_towrite,
|
||||||
|
int *_can_leftover, int *_keep_leftover,
|
||||||
|
int leftover_len, char *leftover);
|
||||||
|
static intptr_t recount_output_wtext(wchar_t *w_buffer, intptr_t winwrote);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*========================================================================*/
|
/*========================================================================*/
|
||||||
|
@ -1021,15 +1028,18 @@ static void deinit_read_fd(rktio_t *rktio, rktio_fd_t *rfd, int full_close)
|
||||||
static long WINAPI WindowsFDReader(Win_FD_Input_Thread *th)
|
static long WINAPI WindowsFDReader(Win_FD_Input_Thread *th)
|
||||||
{
|
{
|
||||||
DWORD toget, got;
|
DWORD toget, got;
|
||||||
int perma_eof = 0;
|
int perma_eof = 0, ft, is_console = 0;
|
||||||
HANDLE eof_wait = NULL;
|
HANDLE eof_wait = NULL;
|
||||||
|
|
||||||
if (GetFileType((HANDLE)th->fd) == FILE_TYPE_PIPE) {
|
ft = GetFileType((HANDLE)th->fd);
|
||||||
|
|
||||||
|
if (ft == FILE_TYPE_PIPE) {
|
||||||
/* Reading from a pipe will return early when data is available. */
|
/* Reading from a pipe will return early when data is available. */
|
||||||
toget = RKTIO_FD_BUFFSIZE;
|
toget = RKTIO_FD_BUFFSIZE;
|
||||||
} else {
|
} else {
|
||||||
/* Non-pipe: get one char at a time: */
|
/* Non-pipe: get one char at a time: */
|
||||||
toget = 1;
|
toget = 1;
|
||||||
|
is_console = (ft == FILE_TYPE_CHAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!perma_eof && !th->err) {
|
while (!perma_eof && !th->err) {
|
||||||
|
@ -1044,8 +1054,19 @@ static long WINAPI WindowsFDReader(Win_FD_Input_Thread *th)
|
||||||
/* Spurious wake-up? */
|
/* Spurious wake-up? */
|
||||||
ReleaseSemaphore(th->lock_sema, 1, NULL);
|
ReleaseSemaphore(th->lock_sema, 1, NULL);
|
||||||
} else {
|
} else {
|
||||||
|
int ok;
|
||||||
ReleaseSemaphore(th->lock_sema, 1, NULL);
|
ReleaseSemaphore(th->lock_sema, 1, NULL);
|
||||||
if (ReadFile(th->fd, th->buffer, toget, &got, NULL)) {
|
if (!is_console)
|
||||||
|
ok = ReadFile(th->fd, th->buffer, toget, &got, NULL);
|
||||||
|
else {
|
||||||
|
# define CONSOLE_BUFFER_IN_SIZE 16
|
||||||
|
wchar_t w_buffer[CONSOLE_BUFFER_IN_SIZE];
|
||||||
|
ok = ReadConsoleW(th->fd, w_buffer, CONSOLE_BUFFER_IN_SIZE, &got, NULL);
|
||||||
|
if (ok) {
|
||||||
|
got = WideCharToMultiByte(CP_UTF8, 0, w_buffer, got, th->buffer, RKTIO_FD_BUFFSIZE, NULL, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ok) {
|
||||||
WaitForSingleObject(th->lock_sema, INFINITE);
|
WaitForSingleObject(th->lock_sema, INFINITE);
|
||||||
th->avail = got;
|
th->avail = got;
|
||||||
th->offset = 0;
|
th->offset = 0;
|
||||||
|
@ -1172,19 +1193,42 @@ intptr_t rktio_write(rktio_t *rktio, rktio_fd_t *rfd, const char *buffer, intptr
|
||||||
|
|
||||||
/* If we try to write too much at once, the result
|
/* If we try to write too much at once, the result
|
||||||
is ERROR_NOT_ENOUGH_MEMORY (as opposed to a partial write). */
|
is ERROR_NOT_ENOUGH_MEMORY (as opposed to a partial write). */
|
||||||
int ok;
|
int ok, to_console, can_leftover = 0, keep_leftover = 0;
|
||||||
intptr_t towrite = len;
|
intptr_t towrite = len;
|
||||||
const char *orig_buffer = buffer;
|
const char *orig_buffer = buffer;
|
||||||
|
wchar_t *w_buffer = NULL;
|
||||||
|
DWORD max_winwrote;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (rfd->modes & RKTIO_OPEN_TEXT)
|
if (rfd->modes & RKTIO_OPEN_TEXT)
|
||||||
buffer = adjust_output_text(buffer, &towrite);
|
buffer = adjust_output_text(buffer, &towrite);
|
||||||
|
|
||||||
|
max_winwrote = towrite;
|
||||||
|
|
||||||
|
to_console = rktio_fd_is_terminal(rktio, rfd);
|
||||||
|
if (to_console) {
|
||||||
|
/* Decode UTF-8 and write a chunk on a character boundary. */
|
||||||
|
w_buffer = convert_output_wtext(buffer, &towrite,
|
||||||
|
&can_leftover, &keep_leftover,
|
||||||
|
rfd->leftover_len, rfd->leftover);
|
||||||
|
}
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
ok = WriteFile((HANDLE)rfd->fd, buffer, towrite, &winwrote, NULL);
|
if (!to_console)
|
||||||
|
ok = WriteFile((HANDLE)rfd->fd, buffer, towrite, &winwrote, NULL);
|
||||||
|
else {
|
||||||
|
if (towrite)
|
||||||
|
ok = WriteConsoleW((HANDLE)rfd->fd, w_buffer, towrite, &winwrote, NULL);
|
||||||
|
else {
|
||||||
|
/* can happend if can_leftover is > 0 */
|
||||||
|
ok = 1;
|
||||||
|
winwrote = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!ok)
|
if (!ok)
|
||||||
err = GetLastError();
|
err = GetLastError();
|
||||||
|
|
||||||
if (!ok && (err == ERROR_NOT_ENOUGH_MEMORY)) {
|
if (!ok && (err == ERROR_NOT_ENOUGH_MEMORY)) {
|
||||||
towrite = towrite >> 1;
|
towrite = towrite >> 1;
|
||||||
if (towrite && (buffer != orig_buffer)) {
|
if (towrite && (buffer != orig_buffer)) {
|
||||||
|
@ -1205,6 +1249,27 @@ intptr_t rktio_write(rktio_t *rktio, rktio_fd_t *rfd, const char *buffer, intptr
|
||||||
return RKTIO_WRITE_ERROR;
|
return RKTIO_WRITE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (to_console) {
|
||||||
|
/* Convert wchar count to byte count, taking into account leftovers */
|
||||||
|
int wrote_all = (winwrote == towrite);
|
||||||
|
if (winwrote) {
|
||||||
|
/* Recounting only works right if the outptu was well-formed
|
||||||
|
UTF-8. Weird things happen otherwise... but we guard against
|
||||||
|
external inconsistency with the `max_winwrote` check below. */
|
||||||
|
winwrote = recount_output_wtext(w_buffer, winwrote);
|
||||||
|
winwrote -= rfd->leftover_len;
|
||||||
|
rfd->leftover_len = 0;
|
||||||
|
}
|
||||||
|
if (wrote_all && can_leftover) {
|
||||||
|
memcpy(rfd->leftover + keep_leftover, buffer + winwrote, can_leftover);
|
||||||
|
rfd->leftover_len = can_leftover + keep_leftover;
|
||||||
|
winwrote += can_leftover;
|
||||||
|
}
|
||||||
|
free(w_buffer);
|
||||||
|
if (winwrote > max_winwrote)
|
||||||
|
winwrote = max_winwrote;
|
||||||
|
}
|
||||||
|
|
||||||
if (buffer != orig_buffer) {
|
if (buffer != orig_buffer) {
|
||||||
/* Convert converted count back to original count: */
|
/* Convert converted count back to original count: */
|
||||||
winwrote = recount_output_text(orig_buffer, buffer, winwrote);
|
winwrote = recount_output_text(orig_buffer, buffer, winwrote);
|
||||||
|
@ -1484,6 +1549,108 @@ static intptr_t recount_output_text(const char *orig_buffer, const char *buffer,
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static wchar_t *convert_output_wtext(const char *buffer, intptr_t *_towrite,
|
||||||
|
int *_can_leftover, int *_keep_leftover,
|
||||||
|
int leftover_len, char *leftover)
|
||||||
|
{
|
||||||
|
/* Figure out how many bytes we can convert to complete wide
|
||||||
|
characters. To avoid quadratic behavior overall, we'll limit the
|
||||||
|
number of bytes.
|
||||||
|
|
||||||
|
The given `leftover_len` and `leftover` is a prefix on `buffer`.
|
||||||
|
If the tail (after writing all other bytes) is an incomplete
|
||||||
|
UTF-8 prefix, report the prefix length in `_can_leftover`. */
|
||||||
|
intptr_t i, count, len = *_towrite;
|
||||||
|
char *src_buffer;
|
||||||
|
wchar_t *dest_buffer;
|
||||||
|
int want, span = 0;
|
||||||
|
|
||||||
|
if (leftover_len) {
|
||||||
|
/* Assume that leftover is a valid prefix: */
|
||||||
|
int v = ((unsigned char *)leftover)[0];
|
||||||
|
if ((v & 0xF8) == 0xF0)
|
||||||
|
span = 4;
|
||||||
|
else if ((v & 0xF0) == 0xE0)
|
||||||
|
span = 3;
|
||||||
|
else
|
||||||
|
span = 2;
|
||||||
|
} else
|
||||||
|
span = 0;
|
||||||
|
want = span - leftover_len;
|
||||||
|
|
||||||
|
for (i = 0, count = 0; (i < len) && (count < 1024); i++) {
|
||||||
|
int v = ((unsigned char *)buffer)[i];
|
||||||
|
if (want) {
|
||||||
|
if ((v & 0xC0) == 0x80) {
|
||||||
|
/* valid continuation byte */
|
||||||
|
want--;
|
||||||
|
if (!want) {
|
||||||
|
count++;
|
||||||
|
if (span == 4)
|
||||||
|
count++; /* surrogate pair */
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* not a valid continuation byte */
|
||||||
|
count++;
|
||||||
|
want = 0;
|
||||||
|
--i; /* retry byte */
|
||||||
|
}
|
||||||
|
} else if (!(v & 0x80)) {
|
||||||
|
count++;
|
||||||
|
} else if ((v & 0xF8) == 0xF0) {
|
||||||
|
span = 4;
|
||||||
|
want = 3;
|
||||||
|
} else if ((v & 0xF0) == 0xE0) {
|
||||||
|
span = 3;
|
||||||
|
want = 2;
|
||||||
|
} else {
|
||||||
|
span = 2;
|
||||||
|
want = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((i == len) && (want > 0)) {
|
||||||
|
/* consuming all input, so set leftover */
|
||||||
|
int keep = span - want;
|
||||||
|
if (i >= keep) {
|
||||||
|
*_can_leftover = keep;
|
||||||
|
i -= keep;
|
||||||
|
*_keep_leftover = 0;
|
||||||
|
} else {
|
||||||
|
*_can_leftover = (keep - leftover_len);
|
||||||
|
*_keep_leftover = leftover_len;
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*_can_leftover = 0;
|
||||||
|
*_keep_leftover = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (leftover_len) {
|
||||||
|
src_buffer = malloc(i + leftover_len);
|
||||||
|
memcpy(src_buffer, leftover, leftover_len);
|
||||||
|
memcpy(src_buffer + leftover_len, buffer, i);
|
||||||
|
i += leftover_len;
|
||||||
|
} else
|
||||||
|
src_buffer = (char *)buffer;
|
||||||
|
|
||||||
|
dest_buffer = (wchar_t *)malloc(sizeof(wchar_t) * count);
|
||||||
|
|
||||||
|
if (count > 0)
|
||||||
|
count = MultiByteToWideChar(CP_UTF8, 0, src_buffer, i, dest_buffer, count);
|
||||||
|
*_towrite = count;
|
||||||
|
|
||||||
|
if (leftover_len)
|
||||||
|
free(src_buffer);
|
||||||
|
|
||||||
|
return dest_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
static intptr_t recount_output_wtext(wchar_t *w_buffer, intptr_t winwrote)
|
||||||
|
{
|
||||||
|
return WideCharToMultiByte(CP_UTF8, 0, w_buffer, winwrote, NULL, 0, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static void deinit_write_fd(rktio_t *rktio, rktio_fd_t *rfd, int full_close)
|
static void deinit_write_fd(rktio_t *rktio, rktio_fd_t *rfd, int full_close)
|
||||||
{
|
{
|
||||||
if (rfd->oth) {
|
if (rfd->oth) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user