diff --git a/pkgs/racket-test-core/tests/racket/draw-box.rkt b/pkgs/racket-test-core/tests/racket/draw-box.rkt new file mode 100644 index 0000000000..39b615a6aa --- /dev/null +++ b/pkgs/racket-test-core/tests/racket/draw-box.rkt @@ -0,0 +1,60 @@ +#lang racket/base + +;; This test is intended for checking Unicode output to a Windows +;; console --- because that requires yet another decoding/encoding +;; layer within rktio to use WriteConsoleW --- so its output is +;; intended to be inspected directly. + +(define H 100) +(define W 40) + +(define MAX-FLUSH-SPAN 10) + +(define (go content-char) + + (define tl #\u2554) (define h #\u2550) (define tr #\u2557) + (define v #\u2551) + (define bl #\u255A) (define br #\u255D) + + (define (draw) + (display (string tl)) + (display (make-string (- W 2) h)) + (display (string tr)) + (newline) + + (for ([i (- H 2)]) + (display v) + (display (make-string (- W 2) content-char)) + (display v) + (newline)) + + (display (string bl)) + (display (make-string (- W 2) h)) + (display (string br)) + (newline)) + + ;; ---------------------------------------- + ;; Direct to console: + (draw) + + ;; ---------------------------------------- + ;; Write UTF-8 byte string piecewise (to check that + ;; incomplete encodings are buffered) + + (define o (open-output-bytes)) + (parameterize ([current-output-port o]) + (draw)) + + (define bstr (get-output-bytes o)) + (let loop ([i 0]) + (unless (= i (bytes-length bstr)) + (define span (min (add1 (random MAX-FLUSH-SPAN)) + (- (bytes-length bstr) i))) + (write-bytes bstr (current-output-port) i (+ i span)) + (flush-output) + (loop (+ i span))))) + +(go #\uA9) ; copyright (2 bytes in UTF-8) +(go #\u3BB) ; lambda (3 bytes in UTF-8) +(go #\U24b6) ; white-circle "A" +(go #\U1F150) ; black-circle "A" (2 bytes in UTF-8; surrogate pair in UTF-16) diff --git a/racket/src/rktio/rktio_fd.c b/racket/src/rktio/rktio_fd.c index d396f8266b..469d400d87 100644 --- a/racket/src/rktio/rktio_fd.c +++ b/racket/src/rktio/rktio_fd.c @@ -43,6 +43,8 @@ struct rktio_fd_t { char *buffer; /* shared with reading thread */ int has_pending_byte; /* for text-mode input, may be dropped by a following lf */ int pending_byte; /* for text-mode input, either a CR waiting to decode, or byte that didn't fit */ + int leftover_len; /* for bytes that should be written, but that form a UTF-8 encoding prefix */ + char leftover[6]; #endif }; @@ -129,6 +131,11 @@ static intptr_t adjust_input_text_for_pending_cr(rktio_fd_t *rfd, char *buffer, static const char *adjust_output_text(const char *buffer, intptr_t *towrite); static intptr_t recount_output_text(const char *orig_buffer, const char *buffer, intptr_t wrote); +static wchar_t *convert_output_wtext(const char *buffer, intptr_t *_towrite, + int *_can_leftover, int *_keep_leftover, + int leftover_len, char *leftover); +static intptr_t recount_output_wtext(wchar_t *w_buffer, intptr_t winwrote); + #endif /*========================================================================*/ @@ -1021,15 +1028,18 @@ static void deinit_read_fd(rktio_t *rktio, rktio_fd_t *rfd, int full_close) static long WINAPI WindowsFDReader(Win_FD_Input_Thread *th) { DWORD toget, got; - int perma_eof = 0; + int perma_eof = 0, ft, is_console = 0; HANDLE eof_wait = NULL; - if (GetFileType((HANDLE)th->fd) == FILE_TYPE_PIPE) { + ft = GetFileType((HANDLE)th->fd); + + if (ft == FILE_TYPE_PIPE) { /* Reading from a pipe will return early when data is available. */ toget = RKTIO_FD_BUFFSIZE; } else { /* Non-pipe: get one char at a time: */ toget = 1; + is_console = (ft == FILE_TYPE_CHAR); } while (!perma_eof && !th->err) { @@ -1044,8 +1054,19 @@ static long WINAPI WindowsFDReader(Win_FD_Input_Thread *th) /* Spurious wake-up? */ ReleaseSemaphore(th->lock_sema, 1, NULL); } else { + int ok; ReleaseSemaphore(th->lock_sema, 1, NULL); - if (ReadFile(th->fd, th->buffer, toget, &got, NULL)) { + if (!is_console) + ok = ReadFile(th->fd, th->buffer, toget, &got, NULL); + else { +# define CONSOLE_BUFFER_IN_SIZE 16 + wchar_t w_buffer[CONSOLE_BUFFER_IN_SIZE]; + ok = ReadConsoleW(th->fd, w_buffer, CONSOLE_BUFFER_IN_SIZE, &got, NULL); + if (ok) { + got = WideCharToMultiByte(CP_UTF8, 0, w_buffer, got, th->buffer, RKTIO_FD_BUFFSIZE, NULL, 0); + } + } + if (ok) { WaitForSingleObject(th->lock_sema, INFINITE); th->avail = got; th->offset = 0; @@ -1172,19 +1193,42 @@ intptr_t rktio_write(rktio_t *rktio, rktio_fd_t *rfd, const char *buffer, intptr /* If we try to write too much at once, the result is ERROR_NOT_ENOUGH_MEMORY (as opposed to a partial write). */ - int ok; + int ok, to_console, can_leftover = 0, keep_leftover = 0; intptr_t towrite = len; const char *orig_buffer = buffer; + wchar_t *w_buffer = NULL; + DWORD max_winwrote; int err; if (rfd->modes & RKTIO_OPEN_TEXT) buffer = adjust_output_text(buffer, &towrite); + + max_winwrote = towrite; + + to_console = rktio_fd_is_terminal(rktio, rfd); + if (to_console) { + /* Decode UTF-8 and write a chunk on a character boundary. */ + w_buffer = convert_output_wtext(buffer, &towrite, + &can_leftover, &keep_leftover, + rfd->leftover_len, rfd->leftover); + } while (1) { - ok = WriteFile((HANDLE)rfd->fd, buffer, towrite, &winwrote, NULL); + if (!to_console) + ok = WriteFile((HANDLE)rfd->fd, buffer, towrite, &winwrote, NULL); + else { + if (towrite) + ok = WriteConsoleW((HANDLE)rfd->fd, w_buffer, towrite, &winwrote, NULL); + else { + /* can happend if can_leftover is > 0 */ + ok = 1; + winwrote = 0; + } + } + if (!ok) err = GetLastError(); - + if (!ok && (err == ERROR_NOT_ENOUGH_MEMORY)) { towrite = towrite >> 1; if (towrite && (buffer != orig_buffer)) { @@ -1205,6 +1249,27 @@ intptr_t rktio_write(rktio_t *rktio, rktio_fd_t *rfd, const char *buffer, intptr return RKTIO_WRITE_ERROR; } + if (to_console) { + /* Convert wchar count to byte count, taking into account leftovers */ + int wrote_all = (winwrote == towrite); + if (winwrote) { + /* Recounting only works right if the outptu was well-formed + UTF-8. Weird things happen otherwise... but we guard against + external inconsistency with the `max_winwrote` check below. */ + winwrote = recount_output_wtext(w_buffer, winwrote); + winwrote -= rfd->leftover_len; + rfd->leftover_len = 0; + } + if (wrote_all && can_leftover) { + memcpy(rfd->leftover + keep_leftover, buffer + winwrote, can_leftover); + rfd->leftover_len = can_leftover + keep_leftover; + winwrote += can_leftover; + } + free(w_buffer); + if (winwrote > max_winwrote) + winwrote = max_winwrote; + } + if (buffer != orig_buffer) { /* Convert converted count back to original count: */ winwrote = recount_output_text(orig_buffer, buffer, winwrote); @@ -1484,6 +1549,108 @@ static intptr_t recount_output_text(const char *orig_buffer, const char *buffer, return i; } +static wchar_t *convert_output_wtext(const char *buffer, intptr_t *_towrite, + int *_can_leftover, int *_keep_leftover, + int leftover_len, char *leftover) +{ + /* Figure out how many bytes we can convert to complete wide + characters. To avoid quadratic behavior overall, we'll limit the + number of bytes. + + The given `leftover_len` and `leftover` is a prefix on `buffer`. + If the tail (after writing all other bytes) is an incomplete + UTF-8 prefix, report the prefix length in `_can_leftover`. */ + intptr_t i, count, len = *_towrite; + char *src_buffer; + wchar_t *dest_buffer; + int want, span = 0; + + if (leftover_len) { + /* Assume that leftover is a valid prefix: */ + int v = ((unsigned char *)leftover)[0]; + if ((v & 0xF8) == 0xF0) + span = 4; + else if ((v & 0xF0) == 0xE0) + span = 3; + else + span = 2; + } else + span = 0; + want = span - leftover_len; + + for (i = 0, count = 0; (i < len) && (count < 1024); i++) { + int v = ((unsigned char *)buffer)[i]; + if (want) { + if ((v & 0xC0) == 0x80) { + /* valid continuation byte */ + want--; + if (!want) { + count++; + if (span == 4) + count++; /* surrogate pair */ + } + } else { + /* not a valid continuation byte */ + count++; + want = 0; + --i; /* retry byte */ + } + } else if (!(v & 0x80)) { + count++; + } else if ((v & 0xF8) == 0xF0) { + span = 4; + want = 3; + } else if ((v & 0xF0) == 0xE0) { + span = 3; + want = 2; + } else { + span = 2; + want = 1; + } + } + + if ((i == len) && (want > 0)) { + /* consuming all input, so set leftover */ + int keep = span - want; + if (i >= keep) { + *_can_leftover = keep; + i -= keep; + *_keep_leftover = 0; + } else { + *_can_leftover = (keep - leftover_len); + *_keep_leftover = leftover_len; + i = 0; + } + } else { + *_can_leftover = 0; + *_keep_leftover = 0; + } + + if (leftover_len) { + src_buffer = malloc(i + leftover_len); + memcpy(src_buffer, leftover, leftover_len); + memcpy(src_buffer + leftover_len, buffer, i); + i += leftover_len; + } else + src_buffer = (char *)buffer; + + dest_buffer = (wchar_t *)malloc(sizeof(wchar_t) * count); + + if (count > 0) + count = MultiByteToWideChar(CP_UTF8, 0, src_buffer, i, dest_buffer, count); + *_towrite = count; + + if (leftover_len) + free(src_buffer); + + return dest_buffer; +} + +static intptr_t recount_output_wtext(wchar_t *w_buffer, intptr_t winwrote) +{ + return WideCharToMultiByte(CP_UTF8, 0, w_buffer, winwrote, NULL, 0, NULL, 0); +} + static void deinit_write_fd(rktio_t *rktio, rktio_fd_t *rfd, int full_close) { if (rfd->oth) {