windows: repair command parsing, especially when building with MinGW

Related #3277
This commit is contained in:
Matthew Flatt 2020-06-01 21:14:28 -06:00
parent 606a8c52aa
commit 774bfd9139
16 changed files with 202 additions and 186 deletions

View File

@ -17,6 +17,9 @@
(define tmpfile (build-path (find-system-path 'temp-dir) "cattmp"))
(define tmpfile2 (build-path (find-system-path 'temp-dir) "cattmp2"))
(unless cat
(error "\"cat\" executable not found"))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; process* tests
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -617,6 +620,46 @@
(test "hello" get-output-string out)
(test "goodbye" get-output-string err))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Check Windows command-line parsing
(when (eq? 'windows (system-type))
(define (try-arg cmdline-str result-str)
(let ()
(define-values (sp i o no-e)
(subprocess #f #f (current-error-port) self 'exact
(string-append (regexp-replace* #rx" " (path->string self) "\" \"")
" -l racket/base"
" -e \"(write (vector-ref (current-command-line-arguments) 0))\""
" " cmdline-str)))
(close-output-port o)
(test result-str read i)
(subprocess-wait sp)
(close-input-port i))
;; Check encoding by `subprocess`, too
(let ()
(define-values (sp i o no-e)
(subprocess #f #f (current-error-port) self
"-l" "racket/base"
"-e" "(write (vector-ref (current-command-line-arguments) 0))"
result-str))
(close-output-port o)
(test result-str read i)
(subprocess-wait sp)
(close-input-port i)))
(try-arg "x" "x")
(try-arg "\"x\"" "x")
(try-arg "\"a \"\"b\"\" c\"" "a \"b\" c")
(try-arg "\"a \"\"b\"\" c" "a \"b\" c")
(try-arg "\"a\\\"" "a\"")
(try-arg "a\\\"" "a\"")
(try-arg "a\\\"b" "a\"b")
(try-arg "a\\\\\"b" "a\\b")
(try-arg "a\\\\\\\"b" "a\\\"b")
(try-arg "a\\\\\\\\\"b" "a\\\\b")
(try-arg "a\\\\\\\\\\\"b" "a\\\\\"b"))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(for ([f (list tmpfile tmpfile2)] #:when (file-exists? f)) (delete-file f))

View File

@ -319,7 +319,7 @@ raw_racketcs.exe: main.o MemoryModule.o rres.o
raw_gracketcs.exe: grmain.o MemoryModule.o grres.o
$(CC) $(CFLAGS) -mwindows -o raw_gracketcs.exe grmain.o MemoryModule.o grres.o $(LDFLAGS)
MINGW_LIBS = -lshell32 -luser32 -lole32 -lrpcrt4 -luuid -lws2_32 -ladvapi32
MINGW_LIBS = -lshell32 -luser32 -lole32 -lrpcrt4 -luuid -lws2_32 -ladvapi32 -lwinmm
raw_libracketcs.dll: boot.o $(BOOT_OBJ_DEPS) libres.o
$(CC) $(CFLAGS) --shared -o raw_libracketcs.dll $(BOOT_OBJS) libres.o $(LDFLAGS) rktio/librktio.a $(MINGW_LIBS) -static-libgcc $(LIBS)

View File

@ -276,7 +276,7 @@ static int scheme_utf8_encode(unsigned int *path, int zero_offset, int len,
return WideCharToMultiByte(CP_UTF8, 0, (wchar_t *)path, len, dest, dest_len, NULL, NULL);
}
# include "../start/parse_cmdl.inc"
# include "../start/cmdl_to_argv.inc"
# undef USE_GENERIC_GET_SELF_PATH
#endif

View File

@ -121,9 +121,7 @@ int IsFlag(LPCTSTR cmd, LPCTSTR flag)
#define DLL_RELATIVE_PATH L"."
#include "../start/delayed.inc"
#define ASSUME_ASCII_COMMAND_LINE
#define GC_CAN_IGNORE
#include "../start/parse_cmdl.inc"
#include "../start/cmdl_to_argv.inc"
/////////////////////////////////////////////////////////////////////////////
//

View File

@ -239,7 +239,7 @@ mzsj86g.o: $(srcdir)/src/mzsj86g.S
lib/libracketxxxxxxx.dll: lib/libmzgcxxxxxxx.dll libracket.@LIBSFX@ mzsj86g.o $(LIBRKTIO)
mkdir -p lib
@MZLINKER@ -shared -o lib/libracketxxxxxxx.dll mzsj86g.o -Wl,--output-def -Wl,libracket.def -Wl,--whole-archive libracket.@LIBSFX@ -Wl,--no-whole-archive $(LIBRKTIO) @LDFLAGS@ -lshell32 -luser32 -lws2_32 lib/libmzgcxxxxxxx.dll -static-libgcc @LIBS@
@MZLINKER@ -shared -o lib/libracketxxxxxxx.dll mzsj86g.o -Wl,--output-def -Wl,libracket.def -Wl,--whole-archive libracket.@LIBSFX@ -Wl,--no-whole-archive $(LIBRKTIO) @LDFLAGS@ -lshell32 -luser32 -lws2_32 -lwinmm lib/libmzgcxxxxxxx.dll -static-libgcc @LIBS@
libracket.dll.a: lib/libracketxxxxxxx.dll
@DLLTOOL@ --def libracket.def -D libracketxxxxxxx.dll --output-exp libracketxxxxxxx.exp --output-lib libracketxxxxxxx.lib --output-delaylib libracket.dll.a
@ -278,6 +278,7 @@ DEF_C_DIRS = $(DEF_COLLECTS_DIR) $(DEF_CONFIG_DIR)
MAIN_HEADER_DEPS = $(srcdir)/include/scheme.h $(srcdir)/include/schthread.h $(srcdir)/sconfig.h \
$(srcdir)/src/stypes.h $(srcdir)/cmdline.inc $(srcdir)/../start/parse_cmdl.inc \
$(srcdir)/../start/cmdl_to_argv.inc \
$(srcdir)/../start/config.inc $(srcdir)/../start/delayed.inc \
$(srcdir)/../start/embedded_dll.inc

View File

@ -21,7 +21,7 @@ CC = @CC@
PLAIN_CC = @PLAIN_CC@
CFLAGS = @CFLAGS@ @COMPFLAGS@
CPPFLAGS = @PREFLAGS@
ALL_CFLAGS = $(CFLAGS) $(CPPFLAGS) -I$(builddir)/.. -I$(srcdir)/../include -I$(srcdir)/../src @MZOPTIONS@
ALL_CFLAGS = $(CFLAGS) $(CPPFLAGS) -I$(builddir)/.. -I$(srcdir)/../include -I$(srcdir)/../src -I$(srcdir)/../../start @MZOPTIONS@
dynlib@NOT_MINGW@:
$(MAKE) ../mzdyn.o
@ -63,7 +63,9 @@ dynexmpl.o: $(srcdir)/dynexmpl.c $(HEADERS)
../starter@NOT_MINGW@@EXE_SUFFIX@: $(srcdir)/../../start/ustart.c
$(PLAIN_CC) $(ALL_CFLAGS) -o ../starter@EXE_SUFFIX@ $(srcdir)/../../start/ustart.c
../starter@MINGW@@EXE_SUFFIX@: $(srcdir)/../../start/start.c ../mrstarter@EXE_SUFFIX@ sres.o
PARSE_CMDL = $(srcdir)/../../start/parse_cmdl.inc
../starter@MINGW@@EXE_SUFFIX@: $(srcdir)/../../start/start.c $(PARSE_CMDL) ../mrstarter@EXE_SUFFIX@ sres.o
$(PLAIN_CC) $(ALL_CFLAGS) -o ../starter@EXE_SUFFIX@ $(srcdir)/../../start/start.c sres.o
../mrstarter@EXE_SUFFIX@: smrres.o
$(PLAIN_CC) $(ALL_CFLAGS) -mwindows -DMRSTART -o ../mrstarter@EXE_SUFFIX@ $(srcdir)/../../start/start.c smrres.o

View File

@ -379,7 +379,9 @@ $(XSRCDIR)/main.c: $(XFORMDEP)
@INCLUDEDEP@ foreign.d
@INCLUDEDEP@ main.d
$(XSRCDIR)/mzobj.cxx: $(XFORMDEP) $(srcdir)/../../mzcom/mzobj.cxx
CMDL_DEPS = $(srcdir)/../../start/parse_cmdl.inc $(srcdir)/../../start/cmdl_to_argv.inc
$(XSRCDIR)/mzobj.cxx: $(XFORMDEP) $(srcdir)/../../mzcom/mzobj.cxx $(CMDL_DEPS)
$(XFORM_NOPRECOMP) $(XSRCDIR)/mzobj.cxx $(DEF_C_DIRS) $(srcdir)/../../mzcom/mzobj.cxx
salloc.@LTO@: $(XSRCDIR)/salloc.c
@ -621,7 +623,7 @@ MACLIBRKT_LINK_static = $(MACLIBRKT_LIBS)
../lib/libracket3mxxxxxxx.dll: ../libracket3m.@LIBSFX@ ../mzsj86g.o $(LIBRKTIO)
mkdir -p ../lib
@MZLINKER@ -shared -o ../lib/libracket3mxxxxxxx.dll ../mzsj86g.o -Wl,--output-def -Wl,libracket3m.def -Wl,--whole-archive ../libracket3m.@LIBSFX@ -Wl,--no-whole-archive $(LIBRKTIO) @LDFLAGS@ -lshell32 -luser32 -lws2_32 -static-libgcc @LIBS@
@MZLINKER@ -shared -o ../lib/libracket3mxxxxxxx.dll ../mzsj86g.o -Wl,--output-def -Wl,libracket3m.def -Wl,--whole-archive ../libracket3m.@LIBSFX@ -Wl,--no-whole-archive $(LIBRKTIO) @LDFLAGS@ -lshell32 -luser32 -lws2_32 -lwinmm -static-libgcc @LIBS@
libracket3m.dll.a: ../lib/libracket3mxxxxxxx.dll
@DLLTOOL@ --def libracket3m.def -D libracket3mxxxxxxx.dll --output-exp libracket3mxxxxxxx.exp --output-lib libracket3mxxxxxxx.lib --output-delaylib libracket3m.dll.a

View File

@ -281,7 +281,7 @@ START_XFORM_SKIP;
# endif
#if defined(__MINGW32__) || defined(WINMAIN_ALREADY)
# include "../start/parse_cmdl.inc"
# include "../start/cmdl_to_argv.inc"
#endif
#ifdef DOS_FILE_SYSTEM

View File

@ -3971,16 +3971,6 @@ Scheme_Object *scheme_open_output_file_with_mode(const char *name, const char *w
return scheme_do_open_output_file((char *)who, 0, 3, a, 0, 0);
}
#ifdef WINDOWS_FILE_HANDLES
static int win_seekable(intptr_t fd)
{
/* SetFilePointer() requires " a file stored on a seeking device".
I'm not sure how to test that, so we approximate as "regular
file". */
return GetFileType((HANDLE)fd) == FILE_TYPE_DISK;
}
#endif
static Scheme_Object *
do_file_position(const char *who, int argc, Scheme_Object *argv[], int can_false)
{

View File

@ -7,9 +7,7 @@ extern intptr_t _tls_index;
# define REGISTER_TLS_INDEX_ARG 0
# endif
# ifdef __MINGW32__
static __thread void *tls_space;
# else
# ifndef __MINGW32__
static __declspec(thread) void *tls_space;
# endif

View File

@ -0,0 +1,67 @@
#include "parse_cmdl.inc"
static char *wchar_to_char(wchar_t *wa, int len)
{
char *r;
int wlen;
wlen = WideCharToMultiByte(CP_UTF8, 0, wa, len+1, NULL, 0, NULL, NULL);
r = (char *)malloc(wlen);
wlen = WideCharToMultiByte(CP_UTF8, 0, wa, len+1, r, wlen, NULL, NULL);
return r;
}
static char **cmdline_to_argv(int *_argc, char **_normalized_path)
{
LPWSTR m_lpCmdLine;
int argc, j, l;
wchar_t **w_argv = NULL;
int argv_len = 0;
char *a, **argv, *normalized_path;
m_lpCmdLine = GetCommandLineW();
argc = parse_command_line(1, &w_argv, &argv_len, m_lpCmdLine, 1);
argv = (char **)malloc((argc + 1) * sizeof(char *));
for (j = 0; j < argc; j++) {
int len;
for (len = 0; w_argv[j][len] != 0; len++);
argv[j] = wchar_to_char(w_argv[j], len);
}
/* argv[0] should be the name of the executable, but Windows doesn't
specify really where this name comes from, so we get it from
GetModuleFileName, just in case */
{
int name_len = 1024;
while (1) {
wchar_t *my_name;
my_name = (wchar_t *)malloc(sizeof(wchar_t) * name_len);
l = GetModuleFileNameW(NULL, my_name, name_len);
if (!l) {
name_len = GetLastError();
free(my_name);
my_name = NULL;
break;
} else if (l < name_len) {
a = wchar_to_char(my_name, l);
argv[0] = a;
CharLowerBuffW(my_name, l);
normalized_path = wchar_to_char(my_name, l);
free(my_name);
break;
} else {
free(my_name);
name_len = name_len * 2;
}
}
}
*_argc = argc;
if (_normalized_path)
*_normalized_path = normalized_path;
return argv;
}

View File

@ -97,7 +97,7 @@ static void *in_memory_open(const char *name, int as_global)
CloseHandle(fd);
if (got != len)
fprintf(stderr, "partial load %d vs %ld\n", got, len);
fprintf(stderr, "partial load %ld vs %ld\n", got, len);
loaded_h = MemoryLoadLibraryEx(p, len,
MemoryDefaultAlloc, MemoryDefaultFree,
@ -105,7 +105,7 @@ static void *in_memory_open(const char *name, int as_global)
MemoryDefaultFreeLibrary, NULL);
if (noisy_embedded) {
if (!loaded_h) {
fprintf(stderr, "failed %s %d\n", name, GetLastError());
fprintf(stderr, "failed %s %ld\n", name, GetLastError());
} else
fprintf(stderr, "ok %s\n", name);
}

View File

@ -1,45 +1,65 @@
/* Windows command-line parsing */
/*
#ifndef ASSUME_ASCII_COMMAND_LINE
static char *wchar_to_char(wchar_t *wa, int len)
Windows command-line parsing:
This command-line parser is meant to be consistent with the MSVC
library for MSVC 2008 and later. It's mostly used when compiling
with MinGW, but it's also used for parsing arguments embedded in a
starter executable.
The parser was is based on Microsoft documentation plus the missing
parsing rule reported at
http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
To quote from that page:
The missing undocumented rule has to do with how doubledouble quotes ("") are handled:
[...]
After 2008
A double quote encountered outside a double quoted block starts a double quoted block.
A double quote encountered inside a double quoted block:
not followed by another double quote ends the double quoted block.
followed immediately by another double quote (e.g. ""), a single double quote is
added to the output, and the double quoted block continues.
Note that the CommandLineToArgv() function from shell32 does not
parse the same way, specially in its handling of quotes.
*/
/* Note: modifies `buf`, and result array points into `buf` */
static int parse_command_line(int pos, /* starting pos ifor output in `_command` */
wchar_t ***_command, /* initially `_command_len` in size */
int *_command_len, /* returns number of allocated slots */
wchar_t *buf, /* command line to parse */
int skip_exe) /* handle an executable name (which is parsed differently)? */
{
char *a;
int l;
wchar_t *parse, *created, *write;
int maxargs = *_command_len;
int findquote = 0; /* i.e., inside a quoted block? */
wchar_t **command = *_command;
l = scheme_utf8_encode((unsigned int *)wa, 0, len,
NULL, 0,
1 /* UTF-16 */);
a = (char *)malloc(l + 1);
scheme_utf8_encode((unsigned int *)wa, 0, len,
(unsigned char *)a, 0,
1 /* UTF-16 */);
a[l] = 0;
return a;
}
#endif
static int parse_command_line(char ***_command, char *buf)
{
GC_CAN_IGNORE unsigned char *parse, *created, *write;
int maxargs;
int findquote = 0;
char **command;
int count = 0;
maxargs = 49;
command = (char **)malloc((maxargs + 1) * sizeof(char *));
if (maxargs == 0) {
maxargs = 49;
command = (wchar_t **)malloc((maxargs + 1) * sizeof(wchar_t *));
}
parse = created = write = (unsigned char *)buf;
parse = created = write = buf;
while (*parse) {
int did_create = 0;
while (*parse && isspace(*parse)) { parse++; }
while (*parse && (!isspace(*parse) || findquote)) {
while (*parse && (*parse < 128) && isspace(*parse)) parse++;
while (*parse && ((*parse > 128) || !isspace(*parse) || findquote)) {
if (*parse== '"') {
findquote = !findquote;
did_create = 1;
} else if (*parse== '\\') {
GC_CAN_IGNORE unsigned char *next;
if (!skip_exe && findquote && (parse[1] == '"')) {
parse++;
*(write++) = '"';
} else {
findquote = !findquote;
did_create = 1;
}
} else if (!skip_exe && *parse== '\\') {
wchar_t *next;
for (next = parse; *next == '\\'; next++) { }
if (*next == '"') {
/* Special handling: */
@ -63,78 +83,24 @@ static int parse_command_line(char ***_command, char *buf)
*(write++) = 0;
if (*created || did_create) {
command[count++] = (char *)created;
if (count == maxargs) {
char **c2;
c2 = (char **)malloc(((2 * maxargs) + 1) * sizeof(char *));
memcpy(c2, command, maxargs * sizeof(char *));
maxargs *= 2;
if (skip_exe > 0) {
--skip_exe;
} else {
command[pos++] = (wchar_t *)created;
if (pos == maxargs) {
wchar_t **c2;
c2 = (wchar_t **)malloc(((2 * maxargs) + 1) * sizeof(wchar_t *));
memcpy(c2, command, maxargs * sizeof(wchar_t *));
maxargs *= 2;
}
}
}
created = write;
}
command[count] = NULL;
command[pos] = NULL;
*_command = command;
*_command_len = maxargs;
return count;
}
static char **cmdline_to_argv(int *_argc, char **_normalized_path)
{
#ifndef ASSUME_ASCII_COMMAND_LINE
LPWSTR m_lpCmdLine;
int j, l;
#endif
int argc;
char *a, **argv, *normalized_path;
#ifdef ASSUME_ASCII_COMMAND_LINE
a = GetCommandLine();
#else
m_lpCmdLine = GetCommandLineW();
for (j = 0; m_lpCmdLine[j]; j++) {
}
a = wchar_to_char(m_lpCmdLine, j);
#endif
argc = parse_command_line(&argv, a);
#ifndef ASSUME_ASCII_COMMAND_LINE
/* argv[0] should be the name of the executable, but Windows doesn't
specify really where this name comes from, so we get it from
GetModuleFileName, just in case */
{
int name_len = 1024;
while (1) {
wchar_t *my_name;
my_name = (wchar_t *)malloc(sizeof(wchar_t) * name_len);
l = GetModuleFileNameW(NULL, my_name, name_len);
if (!l) {
name_len = GetLastError();
free(my_name);
my_name = NULL;
break;
} else if (l < name_len) {
a = wchar_to_char(my_name, l);
argv[0] = a;
CharLowerBuffW(my_name, l);
normalized_path = wchar_to_char(my_name, l);
free(my_name);
break;
} else {
free(my_name);
name_len = name_len * 2;
}
}
}
#else
normalized_path = "?";
#endif
*_argc = argc;
if (_normalized_path)
*_normalized_path = normalized_path;
return argv;
return pos;
}

View File

@ -32,7 +32,6 @@
#endif
#define MAXCOMMANDLEN 1024
#define MAX_ARGS 100
#if defined(_MSC_VER) || defined(__MINGW32__)
# define MSC_IZE(x) _ ## x
@ -121,6 +120,8 @@ static void wc_strcat(wchar_t *dest, const wchar_t *src)
wc_strcpy(dest, src);
}
#include "parse_cmdl.inc"
static wchar_t *protect(wchar_t *s)
{
wchar_t *naya;
@ -171,57 +172,6 @@ static wchar_t *protect(wchar_t *s)
return s;
}
static int parse_command_line(int count, wchar_t **command,
wchar_t *buf, int maxargs, int skip)
{
wchar_t *parse, *created, *write;
int findquote = 0;
parse = created = write = buf;
while (*parse) {
while (*parse && (*parse < 128) && isspace(*parse)) parse++;
while (*parse && ((*parse > 128) || !isspace(*parse) || findquote)) {
if (*parse== '"') {
findquote = !findquote;
} else if (*parse== '\\') {
wchar_t *next;
for (next = parse; *next == '\\'; next++);
if (*next == '"') {
/* Special handling: */
int count = (next - parse), i;
for (i = 1; i < count; i += 2)
*(write++) = '\\';
parse += (count - 1);
if (count & 0x1) {
*(write++) = '\"';
parse++;
}
} else
*(write++) = *parse;
} else
*(write++) = *parse;
parse++;
}
if (*parse)
parse++;
*(write++) = 0;
if (*created) {
if (skip) {
skip--;
} else {
command[count++] = created;
if (count == maxargs)
return count;
}
}
created = write;
}
return count;
}
static wchar_t *make_command_line(int argc, wchar_t **argv)
{
int i, len = 0;
@ -272,7 +222,8 @@ int wmain(int argc_in, wchar_t **argv_in)
#endif
{
wchar_t go[MAXCOMMANDLEN * 2];
wchar_t *args[MAX_ARGS + 1];
wchar_t **args = NULL;
int args_len = 0;
wchar_t *command_line;
int count, i, cl_len;
struct MSC_IZE(stat) st;
@ -291,8 +242,7 @@ int wmain(int argc_in, wchar_t **argv_in)
variant = copy_string(variant);
#endif
count = 1;
count = parse_command_line(count, args, input, MAX_ARGS, 0);
count = parse_command_line(1, &args, &args_len, input, 0);
/* exedir can be relative to the current executable */
if ((exedir[0] == '\\')
@ -351,7 +301,7 @@ int wmain(int argc_in, wchar_t **argv_in)
buf = (wchar_t *)malloc((wc_strlen(m_lpCmdLine) + 1) * sizeof(wchar_t));
memcpy(buf, m_lpCmdLine, (wc_strlen(m_lpCmdLine) + 1) * sizeof(wchar_t));
count = parse_command_line(count, args, buf, MAX_ARGS, 1);
count = parse_command_line(count, &args, &args_len, buf, 1);
}
#else
{

View File

@ -77,8 +77,6 @@ static BOOL CALLBACK CheckWindow(HWND wnd, LPARAM param)
200,
&result);
printf("try %p result %d\n", wnd, result);
if (ok == 0)
return TRUE; /* ignore and continue */
if ((intptr_t)result == 79) {

View File

@ -17,6 +17,7 @@ CSDIR = ..\..\cs\c
MAIN_DEPS = ..\..\start\config.inc \
..\..\start\parse_cmdl.inc \
..\..\start\cmdl_to_argv.inc \
..\..\start\delayed.inc \
..\..\start\win_single.inc \
..\..\start\embedded_dll.inc