diff --git a/c/externs.h b/c/externs.h index e02a6d2a2f..d3cd85bb74 100644 --- a/c/externs.h +++ b/c/externs.h @@ -109,6 +109,7 @@ extern int S_fasl_intern_rtd(ptr *x); extern ptr S_to_vfasl PROTO((ptr v)); extern ptr S_vfasl PROTO((ptr bv, void *stream, iptr len)); extern ptr S_vfasl_to PROTO((ptr v)); +extern IBOOL S_vfasl_can_combinep(ptr v); /* flushcache.c */ extern void S_record_code_mod PROTO((ptr tc, uptr addr, uptr bytes)); diff --git a/c/fasl.c b/c/fasl.c index 00858432b7..8c6e81c5e8 100644 --- a/c/fasl.c +++ b/c/fasl.c @@ -237,7 +237,7 @@ static void ppc32_set_jump PROTO((void *address, uptr item, IBOOL callp)); static uptr ppc32_get_jump PROTO((void *address)); #endif /* PPC32 */ #ifdef X86_64 -static void x86_64_set_jump PROTO((void *address, uptr item, IBOOL callp, IBOOL force_abs)); +static void x86_64_set_jump PROTO((void *address, uptr item, IBOOL callp)); static uptr x86_64_get_jump PROTO((void *address)); #endif /* X86_64 */ #ifdef SPARC64 @@ -463,6 +463,12 @@ static ptr fasl_entry(ptr tc, unbufFaslFile uf) { ffo.size = uf_uptrin(uf); if (ty == fasl_type_vfasl_size) { + if (S_vfasl_boot_mode == -1) { + ptr pre = S_cputime(); + Scompact_heap(); + S_vfasl_boot_mode = 1; + printf("pre compact %ld\n", UNFIX(S_cputime()) - UNFIX(pre)); + } x = S_vfasl((ptr)0, uf, ffo.size); } else { ffo.buf = buf; @@ -1192,7 +1198,7 @@ void S_set_code_obj(who, typ, p, n, x, o) char *who; IFASLCODE typ; iptr n, o; p address = (void *)((uptr)p + n); item = (uptr)x + o; - switch (typ & ~reloc_force_abs) { + switch (typ) { case reloc_abs: *(uptr *)address = item; break; @@ -1226,10 +1232,10 @@ void S_set_code_obj(who, typ, p, n, x, o) char *who; IFASLCODE typ; iptr n, o; p #endif /* I386 */ #ifdef X86_64 case reloc_x86_64_jump: - x86_64_set_jump(address, item, 0, typ & reloc_force_abs); + x86_64_set_jump(address, item, 0); break; case reloc_x86_64_call: - x86_64_set_jump(address, item, 1, typ & reloc_force_abs); + x86_64_set_jump(address, item, 1); break; #endif /* X86_64 */ #ifdef SPARC64 @@ -1269,7 +1275,7 @@ ptr S_get_code_obj(typ, p, n, o) IFASLCODE typ; iptr n, o; ptr p; { void *address; uptr item; address = (void *)((uptr)p + n); - switch (typ & ~reloc_force_abs) { + switch (typ) { case reloc_abs: item = *(uptr *)address; break; @@ -1447,9 +1453,9 @@ static uptr ppc32_get_jump(void *address) { #endif /* PPC32 */ #ifdef X86_64 -static void x86_64_set_jump(void *address, uptr item, IBOOL callp, IBOOL force_abs) { +static void x86_64_set_jump(void *address, uptr item, IBOOL callp) { I64 disp = (I64)item - ((I64)address + 5); /* 5 = size of call instruction */ - if ((I32)disp == disp && !force_abs) { + if ((I32)disp == disp) { *(octet *)address = callp ? 0xE8 : 0xE9; /* call or jmp disp32 opcode */ *(I32 *)((uptr)address + 1) = (I32)disp; *((octet *)address + 5) = 0x90; /* nop */ diff --git a/c/globals.h b/c/globals.h index b3bf0b53a5..d71f0ddc75 100644 --- a/c/globals.h +++ b/c/globals.h @@ -26,6 +26,7 @@ EXTERN ptr S_child_processes[static_generation+1]; /* scheme.c */ EXTERN IBOOL S_boot_time; +EXTERN int S_vfasl_boot_mode; EXTERN IBOOL S_errors_to_console; EXTERN ptr S_threads; EXTERN uptr S_nthreads; diff --git a/c/prim5.c b/c/prim5.c index 6a9cb94385..a2ac55bc5e 100644 --- a/c/prim5.c +++ b/c/prim5.c @@ -1548,6 +1548,7 @@ void S_prim5_init() { Sforeign_symbol("(cs)bv_fasl_read", (void *)S_bv_fasl_read); Sforeign_symbol("(cs)to_vfasl", (void *)S_to_vfasl); Sforeign_symbol("(cs)vfasl_to", (void *)S_vfasl_to); + Sforeign_symbol("(cs)vfasl_can_combinep", (void *)S_vfasl_can_combinep); Sforeign_symbol("(cs)s_decode_float", (void *)s_decode_float); Sforeign_symbol("(cs)new_open_input_fd", (void *)S_new_open_input_fd); diff --git a/c/scheme.c b/c/scheme.c index ebf30b4eeb..2427c8b1b1 100644 --- a/c/scheme.c +++ b/c/scheme.c @@ -25,6 +25,10 @@ #endif #include +extern iptr vfasl_load_time; +extern iptr vfasl_fix_time; +extern iptr vfasl_relocs; + static INT boot_count; static IBOOL verbose; @@ -884,7 +888,11 @@ static void load(tc, n, base) ptr tc; iptr n; IBOOL base; { i = 0; while (i++ < LOADSKIP && S_boot_read(bd[n].file, bd[n].path) != Seof_object); + ptr pre = S_cputime(); + uptr reading = 0; + while ((x = S_boot_read(bd[n].file, bd[n].path)) != Seof_object) { + reading += UNFIX(S_cputime()) - UNFIX(pre); if (loadecho) { printf("%ld: ", (long)i); fflush(stdout); @@ -917,8 +925,11 @@ static void load(tc, n, base) ptr tc; iptr n; IBOOL base; { fflush(stdout); } i += 1; + pre = S_cputime(); } + printf("load %ld\n", reading); + S_G.load_binary = Sfalse; gzclose(bd[n].file); } @@ -1116,6 +1127,8 @@ extern void Sbuild_heap(kernel, custom_init) const char *kernel; void (*custom_i } } + S_vfasl_boot_mode = -1; /* to static generation after compacting initial */ + if (boot_count != 0) { INT i = 0; @@ -1142,8 +1155,16 @@ extern void Sbuild_heap(kernel, custom_init) const char *kernel; void (*custom_i while (i < boot_count) load(tc, i++, 0); } + S_vfasl_boot_mode = 0; + + printf("vfasl %ld %ld / %ld\n", vfasl_load_time, vfasl_fix_time, vfasl_relocs); + + ptr pre = S_cputime(); + if (boot_count != 0) Scompact_heap(); + printf("compact %ld\n", UNFIX(S_cputime()) - UNFIX(pre)); + /* complete the initialization on the Scheme side */ p = S_symbol_value(S_intern((const unsigned char *)"$scheme-init")); if (!Sprocedurep(p)) { diff --git a/c/vfasl.c b/c/vfasl.c index 145c225d9d..843060b616 100644 --- a/c/vfasl.c +++ b/c/vfasl.c @@ -16,6 +16,47 @@ #include "system.h" +iptr vfasl_load_time; +iptr vfasl_fix_time; +iptr vfasl_relocs; + +/* + + vfasl ("very fast load") format, where "data" corresponds to an + image to load into memory, "table" is metadata to relocate that + data, and the fixed-size header determines the overall size. The + data can be loaded directly to the static generation on boot, since + it's organized into pieces that should reside in a particular + space. + + [vfasl_header] + _ + / [symbol] ... -> space_symbol + / [rtd] ... -> space_pure +d | [closure] ... -> space_pure +a | [impure] ... -> space_impure +t | [pure_typed] ... -> space_pure_typed +a | [impure_record] ... -> space_impure_record + | [code] ... -> space_code + \ [data] ... -> space_data + \_ [reloc] ... -> (not kept for direct-to-static) + _ +t / [symbol reference offset] ... +a / [rtd reference offset] ... +b | [singleton reference offset] ... +l \ +e \_ [bitmap of pointers to relocate] + + + The bitmap at the end has one bit for each pointer-sized word in + the data, but it's shorter than the "data" size (divided by the + pointer size then divided by 8 bits per byte) because the trailing + zeros for data are omitted. The bitmap doesn't include some fixups + that are handled more directly, such as for code and its + relocations. + +*/ + typedef uptr vfoff; typedef struct vfasl_header { @@ -29,37 +70,59 @@ typedef struct vfasl_header { vfoff rtd_offset; # define rtd_end_offset closure_offset vfoff closure_offset; -# define closure_end_offset code_offset +# define closure_end_offset impure_offset + vfoff impure_offset; +# define impure_end_offset pure_typed_offset + vfoff pure_typed_offset; +# define pure_typed_object_end_offset impure_record_offset + vfoff impure_record_offset; +# define impure_record_end_offset code_offset vfoff code_offset; -# define code_end_offset other_offset - vfoff other_offset; +# define code_end_offset data_offset + vfoff data_offset; +# define data_end_offset reloc_offset + vfoff reloc_offset; +# define reloc_end_offset data_size vfoff symref_count; vfoff rtdref_count; vfoff singletonref_count; } vfasl_header; -/* vfasl format, where the fixed-size header determines the rest of the - size: +enum { + /* The order of these spaces needs to match vfasl_header: */ + vspace_symbol, + vspace_rtd, + vspace_closure, + vspace_impure, + vspace_pure_typed, + vspace_impure_record, + /* rest rest are at then end to make the pointer bitmap + end with zeros (that can be dropped): */ + vspace_code, + vspace_data, + vspace_reloc, /* can be dropped after direct to static generation */ + vspaces_count +}; - [vfasl_header] - _ -d / [symbol] ... -a / [rtd] ... -t | [closure] ... -a \ [code] ... - \_ [other] ... +/* Needs to match order above: */ +static ISPC vspace_spaces[] = { + space_symbol, + space_pure, /* rtd */ + space_pure, /* closure */ + space_impure, + space_pure_typed_object, + space_impure_record, + space_code, + space_data, + space_data /* reloc --- but not really, since relocs are never in static */ +}; -t / [vfoff: symbol reference offset] ... -a / [vfoff: rtd reference offset] ... -b | [vfoff: singleton reference offset] ... -l \ -e \_ [bitmap of pointer offsets] - -*/ +/************************************************************/ +/* Encode-time data structures */ -/* Many chunks per vspace on first pass, one per vspace on second - pass: */ +/* During encoding, we use many chunks per vspace on first pass, one + per vspace on second pass: */ typedef struct vfasl_chunk { ptr bytes; uptr length; @@ -74,20 +137,6 @@ struct vfasl_count_and_chunk { vfasl_chunk *first; }; -enum { - /* The order of these spaces matters: */ - vspace_symbol, - vspace_rtd, - vspace_closure, - vspace_code, - /* The rest of the spaces are "other" */ - vspace_array, - vspace_typed, - vspace_reloc, - vspace_data, /* at end, so pointer bitmap ends with zeros */ - vspaces_count -}; - typedef struct vfasl_info { ptr base_addr; /* address to make relocations relative to */ @@ -109,6 +158,8 @@ typedef struct vfasl_info { octet *ptr_bitmap; struct vfasl_hash_table *graph; + + IBOOL installs_library_entry; /* to determine whether vfasls can be combined */ } vfasl_info; #define ptr_add(p, n) ((ptr)((uptr)(p) + (n))) @@ -126,7 +177,8 @@ static uptr sweep_code_object(vfasl_info *vfi, ptr co); static uptr sweep_record(vfasl_info *vfi, ptr co); static uptr sweep(vfasl_info *vfi, ptr p); -static void relink_code(ptr co, ptr sym_base, ptr dest_base); +static void relink_code(ptr co, ptr sym_base, ptr *vspaces, uptr *vspace_offsets, IBOOL to_static); +static ptr find_pointer_from_offset(uptr p_off, ptr *vspaces, uptr *vspace_offsets); static void vfasl_relocate(vfasl_info *vfi, ptr *ppp); static ptr vfasl_relocate_help(vfasl_info *vfi, ptr pp); @@ -138,6 +190,7 @@ static void vfasl_register_forward(vfasl_info *vfi, ptr pp, ptr p); static ptr vfasl_lookup_forward(vfasl_info *vfi, ptr p); static void fasl_init_entry_tables(); +static void vfasl_check_install_library_entry(vfasl_info *vfi, ptr name); static int detect_singleton(ptr p); static ptr lookup_singleton(int which); @@ -152,17 +205,24 @@ static void sort_offsets(vfoff *p, vfoff len); #define vfasl_fail(vfi, what) S_error("vfasl", "cannot encode " what) -#define print_stats(args) /* printf args */ +/************************************************************/ +/* Loading */ ptr S_vfasl(ptr bv, void *stream, iptr input_len) { ptr tc = get_thread_context(); vfasl_header header; + ptr vspaces[vspaces_count]; + uptr vspace_offsets[vspaces_count+1], vspace_deltas[vspaces_count]; ptr data, table; vfoff *symrefs, *rtdrefs, *singletonrefs; octet *bm, *bm_end; iptr used_len; - + int s; + IBOOL to_static = 0; + + ptr pre = S_cputime(); + used_len = sizeof(header); if (used_len > input_len) S_error("fasl-read", "input length mismatch"); @@ -178,21 +238,68 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) if (used_len > input_len) S_error("fasl-read", "input length mismatch"); + vspace_offsets[vspace_symbol] = 0; + vspace_offsets[vspace_rtd] = header.rtd_offset; + vspace_offsets[vspace_closure] = header.closure_offset; + vspace_offsets[vspace_impure] = header.impure_offset; + vspace_offsets[vspace_pure_typed] = header.pure_typed_offset; + vspace_offsets[vspace_impure_record] = header.impure_record_offset; + vspace_offsets[vspace_code] = header.code_offset; + vspace_offsets[vspace_data] = header.data_offset; + vspace_offsets[vspace_reloc] = header.reloc_offset; + vspace_offsets[vspaces_count] = header.data_size; + if (bv) { ptr base_addr = &BVIT(bv, sizeof(vfasl_header)); thread_find_room(tc, typemod, header.data_size, data); memcpy(data, base_addr, header.data_size); table = ptr_add(base_addr, header.data_size); } else { - thread_find_room(tc, typemod, header.data_size, data); - if (S_fasl_stream_read(stream, data, header.data_size) < 0) - S_error("fasl-read", "input truncated"); + if (S_vfasl_boot_mode > 0) { + for (s = 0; s < vspaces_count; s++) { + uptr sz = vspace_offsets[s+1] - vspace_offsets[s]; + if (sz > 0) { + if (s == vspace_reloc) { + thread_find_room(tc, typemod, sz, vspaces[s]); + } else { + find_room(vspace_spaces[s], static_generation, typemod, sz, vspaces[s]); + } + if (S_fasl_stream_read(stream, vspaces[s], sz) < 0) + S_error("fasl-read", "input truncated"); + } else + vspaces[s] = (ptr)0; + } + for (s = vspaces_count - 1; s--; ) { + if (!vspaces[s]) + vspaces[s] = vspaces[s+1]; + } + data = (ptr)0; /* => initialize below */ + to_static = 1; + } else { + thread_find_room(tc, typemod, header.data_size, data); + if (S_fasl_stream_read(stream, data, header.data_size) < 0) + S_error("fasl-read", "input truncated"); + } thread_find_room(tc, typemod, ptr_align(header.table_size), table); if (S_fasl_stream_read(stream, table, header.table_size) < 0) S_error("fasl-read", "input truncated"); } + if (data) { + for (s = 0; s < vspaces_count; s++) { + vspaces[s] = ptr_add(data, vspace_offsets[s]); + vspace_deltas[s] = (uptr)data; + } + } else { + data = vspaces[0]; + for (s = 0; s < vspaces_count; s++) + vspace_deltas[s] = (uptr)ptr_subtract(vspaces[s], vspace_offsets[s]); + } + + vfasl_load_time += UNFIX(S_cputime()) - UNFIX(pre); + pre = S_cputime(); + symrefs = table; rtdrefs = ptr_add(symrefs, header.symref_count * sizeof(vfoff)); singletonrefs = ptr_add(rtdrefs, header.rtdref_count * sizeof(vfoff)); @@ -213,21 +320,50 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) header.rtd_end_offset - header.rtd_offset, header.closure_end_offset - header.closure_offset, header.code_end_offset - header.code_offset, - header.data_size - header.other_offset, + ((header.code_offset - header.closure_end_offset) + + (header.data_size - header.code_end_offset)), header.table_size, header.symref_count * sizeof(vfoff), header.rtdref_count * sizeof(vfoff), header.singletonref_count * sizeof(vfoff)); - /* Fix up pointers. The content `data` initially has all pointers - relative to the start of the data, so add the `data` address - to all pointers. */ + /* We have to convert an offset relative to the start of data in the + vfasl format to an offset relative to an individual space, at + least for target generations other than 0. Rely on the fact that + the spaces and the references are both sorted. */ +#define SPACE_OFFSET_DECLS \ + int s2 = 0; \ + uptr offset2 = vspace_offsets[s2]; \ + uptr next_offset2 = vspace_offsets[s2+1] +#define INC_SPACE_OFFSET(off) \ + do { \ + while ((off) >= next_offset2) { \ + s2++; \ + offset2 = next_offset2; \ + next_offset2 = vspace_offsets[s2+1]; \ + } \ + } while (0) +#define SPACE_PTR(off) ptr_add(vspaces[s2], (off) - offset2) + + /* Fix up pointers. The initial content has all pointers relative to + the start of the data. If the data were all still contiguous, + we'd add the `data` address to all pointers. Since the spaces may + be disconnected, though, add `vspace_deltas[s]` for the right + `s`. */ { - ptr *p = data; + SPACE_OFFSET_DECLS; + uptr p_off = 0; while (bm != bm_end) { - octet m; - m = *bm; -# define MAYBE_FIXUP(i) if (m & (1 << i)) ((uptr *)p)[i] += (uptr)data + octet m = *bm; +# define MAYBE_FIXUP(i) \ + if (m & (1 << i)) { \ + ptr *p3; \ + INC_SPACE_OFFSET(p_off); \ + p3 = SPACE_PTR(p_off); \ + *p3 = find_pointer_from_offset((uptr)*p3, vspaces, vspace_offsets); \ + } \ + p_off += sizeof(uptr); + MAYBE_FIXUP(0); MAYBE_FIXUP(1); MAYBE_FIXUP(2); @@ -236,8 +372,8 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) MAYBE_FIXUP(5); MAYBE_FIXUP(6); MAYBE_FIXUP(7); + # undef MAYBE_FIXUP - p += byte_bits; bm++; } } @@ -246,18 +382,22 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) This needs to be before interning symbols, in case "" is a symbol name. */ { + SPACE_OFFSET_DECLS; vfoff i; for (i = 0; i < header.singletonref_count; i++) { + uptr r_off; ptr *ref; - ref = ptr_add(data, singletonrefs[i]); + r_off = singletonrefs[i]; + INC_SPACE_OFFSET(r_off); + ref = SPACE_PTR(r_off); *ref = lookup_singleton(UNFIX(*ref)); } } /* Intern symbols */ { - ptr sym = TYPE(data, type_symbol); - ptr end_syms = TYPE(ptr_add(data, header.sym_end_offset), type_symbol); + ptr sym = TYPE(vspaces[vspace_symbol], type_symbol); + ptr end_syms = TYPE(ptr_add(vspaces[vspace_symbol], header.sym_end_offset), type_symbol); if (sym != end_syms) { tc_mutex_acquire() @@ -283,12 +423,15 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) /* Replace symbol references with interned references */ { - ptr syms = data; + SPACE_OFFSET_DECLS; + ptr syms = vspaces[vspace_symbol]; vfoff i; for (i = 0; i < header.symref_count; i++) { - uptr sym_pos; + uptr p2_off, sym_pos; ptr p2, sym, val; - p2 = ptr_add(data, symrefs[i]); + p2_off = symrefs[i]; + INC_SPACE_OFFSET(p2_off); + p2 = SPACE_PTR(p2_off); sym_pos = UNFIX(*(ptr **)p2); sym = TYPE(ptr_add(syms, sym_pos * size_symbol), type_symbol); if ((val = SYMVAL(sym)) != sunbound) @@ -299,9 +442,10 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) /* Intern rtds */ if (header.rtd_offset < header.rtd_end_offset) { - ptr rtd = TYPE(ptr_add(data, header.rtd_offset), type_typed_object); - ptr rtd_end = TYPE(ptr_add(data, header.rtd_end_offset), type_typed_object); - + ptr rtd = TYPE(ptr_add(vspaces[vspace_rtd], header.rtd_offset - vspace_offsets[vspace_rtd]), + type_typed_object); + ptr rtd_end = ptr_add(rtd, header.rtd_end_offset - header.rtd_offset); + /* first one corresponds to base_rtd */ RECORDINSTTYPE(rtd) = S_G.base_rtd; RECORDDESCUID(rtd) = S_G.base_rtd; @@ -315,7 +459,7 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) RECORDINSTTYPE(rtd) = S_G.base_rtd; - /* fixup type and parent before continuing, relying on parents being earlier in `rtd`s */ + /* fixup parent before continuing, relying on parents being earlier in `rtd`s */ parent_rtd = RECORDDESCPARENT(rtd); if (parent_rtd != Sfalse) { ptr parent_uid = RECORDDESCUID(parent_rtd); @@ -337,10 +481,14 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) /* Replace rtd references to interned references */ { + SPACE_OFFSET_DECLS; vfoff i; for (i = 0; i < header.rtdref_count; i++) { + uptr r_off; ptr *ref, rtd, uid; - ref = ptr_add(data, rtdrefs[i]); + r_off = rtdrefs[i]; + INC_SPACE_OFFSET(r_off); + ref = SPACE_PTR(r_off); rtd = *ref; uid = RECORDDESCUID(rtd); if (!Ssymbolp(uid)) { @@ -352,35 +500,38 @@ ptr S_vfasl(ptr bv, void *stream, iptr input_len) /* Fix code pointers on closures */ { - ptr cl = TYPE(ptr_add(data, header.closure_offset), type_closure); - ptr end_closures = TYPE(ptr_add(data, header.closure_end_offset), type_closure); + ptr cl = TYPE(ptr_add(vspaces[vspace_closure], header.closure_offset - vspace_offsets[vspace_closure]), + type_closure); + ptr end_closures = ptr_add(cl, header.closure_end_offset - header.closure_offset); + uptr code_delta = vspace_deltas[vspace_code]; while (cl != end_closures) { ptr code = CLOSCODE(cl); - code = ptr_add(code, (uptr)data); + code = ptr_add(code, code_delta); SETCLOSCODE(cl,code); cl = ptr_add(cl, size_closure(CLOSLEN(cl))); } } /* Fix code via relocations */ - { - - ptr sym_base = data; - ptr code = TYPE(ptr_add(data, header.code_offset), type_typed_object); - ptr code_end = TYPE(ptr_add(data, header.code_end_offset), type_typed_object); + { + ptr sym_base = vspaces[vspace_symbol]; + ptr code = TYPE(vspaces[vspace_code], type_typed_object); + ptr code_end = ptr_add(code, header.code_end_offset - header.code_offset); while (code != code_end) { - relink_code(code, sym_base, data); + relink_code(code, sym_base, vspaces, vspace_offsets, to_static); code = ptr_add(code, size_code(CODELEN(code))); } } + vfasl_fix_time += UNFIX(S_cputime()) - UNFIX(pre); + /* Turn result offset into a value, unboxing if it's a box (which supports a symbol result, for example). */ { ptr v; ITYPE t; - v = ptr_add(data, header.result_offset); + v = find_pointer_from_offset(header.result_offset, vspaces, vspace_offsets); if (((t = TYPEBITS(v)) == type_typed_object) && TYPEP(TYPEFIELD(v), mask_box, type_box)) v = Sunbox(v); @@ -394,6 +545,53 @@ ptr S_vfasl_to(ptr bv) return S_vfasl(bv, (ptr)0, Sbytevector_length(bv)); } +/************************************************************/ +/* Saving */ + +static void vfasl_init(vfasl_info *vfi) { + int s; + + vfi->base_addr = (ptr)0; + vfi->sym_count = 0; + vfi->symref_count = 0; + vfi->symrefs = (ptr)0; + vfi->base_rtd = S_G.base_rtd; + vfi->rtdref_count = 0; + vfi->rtdrefs = (ptr)0; + vfi->singletonref_count = 0; + vfi->singletonrefs = (ptr)0; + vfi->graph = make_vfasl_hash_table(); + vfi->ptr_bitmap = (ptr)0; + vfi->installs_library_entry = 0; + + for (s = 0; s < vspaces_count; s++) { + vfasl_chunk *c; + + c = malloc(sizeof(vfasl_chunk)); + c->bytes = (ptr)0; + c->length = 0; + c->used = 0; + c->swept = 0; + c->next = (ptr)0; + + vfi->spaces[s].first = c; + vfi->spaces[s].total_bytes = 0; + } +} + +static void vfasl_free_chunks(vfasl_info *vfi) { + int s; + for (s = 0; s < vspaces_count; s++) { + vfasl_chunk *c, *next; + for (c = vfi->spaces[s].first; c; c = next) { + next = c->next; + if (c->bytes) + free(c->bytes); + free(c); + } + } +} + ptr S_to_vfasl(ptr v) { vfasl_info *vfi; @@ -420,44 +618,13 @@ ptr S_to_vfasl(ptr v) vfi = malloc(sizeof(vfasl_info)); - vfi->base_addr = (ptr)0; - vfi->sym_count = 0; - vfi->symref_count = 0; - vfi->symrefs = (ptr)0; - vfi->base_rtd = S_G.base_rtd; - vfi->rtdref_count = 0; - vfi->rtdrefs = (ptr)0; - vfi->singletonref_count = 0; - vfi->singletonrefs = (ptr)0; - vfi->graph = make_vfasl_hash_table(); - vfi->ptr_bitmap = (ptr)0; + vfasl_init(vfi); /* First pass: determine sizes */ - for (s = 0; s < vspaces_count; s++) { - vfasl_chunk *c; - - c = malloc(sizeof(vfasl_chunk)); - c->bytes = (ptr)0; - c->length = 0; - c->used = 0; - c->swept = 0; - c->next = (ptr)0; - - vfi->spaces[s].first = c; - vfi->spaces[s].total_bytes = 0; - } - (void)vfasl_copy_all(vfi, v); - for (s = 0; s < vspaces_count; s++) { - vfasl_chunk *c, *next; - for (c = vfi->spaces[s].first; c; c = next) { - next = c->next; - free(c->bytes); - free(c); - } - } + vfasl_free_chunks(vfi); free_vfasl_hash_table(vfi->graph); @@ -480,8 +647,12 @@ ptr S_to_vfasl(ptr v) header.rtd_offset = vfi->spaces[vspace_symbol].total_bytes; header.closure_offset = header.rtd_offset + vfi->spaces[vspace_rtd].total_bytes; - header.code_offset = header.closure_offset + vfi->spaces[vspace_closure].total_bytes; - header.other_offset = header.code_offset + vfi->spaces[vspace_code].total_bytes; + header.impure_offset = header.closure_offset + vfi->spaces[vspace_closure].total_bytes; + header.pure_typed_offset = header.impure_offset + vfi->spaces[vspace_impure].total_bytes; + header.impure_record_offset = header.pure_typed_offset + vfi->spaces[vspace_pure_typed].total_bytes; + header.code_offset = header.impure_record_offset + vfi->spaces[vspace_impure_record].total_bytes; + header.data_offset = header.code_offset + vfi->spaces[vspace_code].total_bytes; + header.reloc_offset = header.data_offset + vfi->spaces[vspace_data].total_bytes; header.symref_count = vfi->symref_count; header.rtdref_count = vfi->rtdref_count; @@ -503,7 +674,7 @@ ptr S_to_vfasl(ptr v) vfi->base_addr = p; - /* Set pointers to vspaces based on sizes frm first pass */ + /* Set pointers to vspaces based on sizes from first pass */ for (s = 0; s < vspaces_count; s++) { vfasl_chunk *c; @@ -585,10 +756,11 @@ ptr S_to_vfasl(ptr v) sort_offsets(vfi->symrefs, vfi->symref_count); sort_offsets(vfi->rtdrefs, vfi->rtdref_count); sort_offsets(vfi->singletonrefs, vfi->singletonref_count); - + for (s = 0; s < vspaces_count; s++) { - free(vfi->spaces[s].first); + free(vfi->spaces[s].first->bytes = (ptr)0); } + vfasl_free_chunks(vfi); free_vfasl_hash_table(vfi->graph); @@ -597,6 +769,41 @@ ptr S_to_vfasl(ptr v) return bv; } +/* If compiled code uses `$install-library-entry`, then it can't be + combined into a single vfasled object, because the installation + needs to be evaluated for laster vfasls. Recognize a non-combinable + value as anything that references the C entry or even mentions the + symbol `$install-library-entry` (as defined in "library.ss"). If + non-boot code mentions the symbol `$install-library-entry`, it just + isn't as optimal. + + This is an expensive test, since we perform half of a vfasl + encoding to look for `$install-library-entry`. */ +IBOOL S_vfasl_can_combinep(ptr v) +{ + IBOOL installs; + vfasl_info *vfi; + + fasl_init_entry_tables(); + + /* Run a "first pass" */ + + vfi = malloc(sizeof(vfasl_info)); + vfasl_init(vfi); + (void)vfasl_copy_all(vfi, v); + vfasl_free_chunks(vfi); + free_vfasl_hash_table(vfi->graph); + + installs = vfi->installs_library_entry; + + free(vfi); + + return !installs; +} + +/************************************************************/ +/* Traversals for saving */ + static ptr vfasl_copy_all(vfasl_info *vfi, ptr v) { seginfo *si; int s; @@ -628,7 +835,7 @@ static ptr vfasl_copy_all(vfasl_info *vfi, ptr v) { pp = ptr_add(pp, sweep(vfi, TYPE((ptr)pp, type_closure))); } break; - case vspace_array: + case vspace_impure: while (pp < pp_end) { vfasl_relocate(vfi, pp); pp = ptr_add(pp, sizeof(ptr)); @@ -636,7 +843,8 @@ static ptr vfasl_copy_all(vfasl_info *vfi, ptr v) { break; case vspace_rtd: case vspace_code: - case vspace_typed: + case vspace_pure_typed: + case vspace_impure_record: while (pp < pp_end) { pp = ptr_add(pp, sweep(vfi, TYPE((ptr)pp, type_typed_object))); } @@ -759,8 +967,14 @@ static ptr copy(vfasl_info *vfi, ptr pp, seginfo *si) { } s = vspace_rtd; - } else - s = vspace_typed; + } else { + /* See gc.c for original rationale but the fine-grained + choices only matter when loading into the static + generation, so we make */ + s = (RECORDDESCMPM(rtd) == FIX(0) + ? vspace_pure_typed + : vspace_impure_record); + } n = size_record_inst(UNFIX(RECORDDESCSIZE(rtd))); @@ -773,8 +987,10 @@ static ptr copy(vfasl_info *vfi, ptr pp, seginfo *si) { iptr len, n; len = Svector_length(pp); n = size_vector(len); - FIND_ROOM(vfi, vspace_typed, type_typed_object, n, p); + FIND_ROOM(vfi, vspace_impure, type_typed_object, n, p); copy_ptrs(type_typed_object, p, pp, n); + /* pad if necessary */ + if ((len & 1) == 0) INITVECTIT(p, len) = FIX(0); } else if (TYPEP(tf, mask_string, type_string)) { iptr n; n = size_string(Sstring_length(pp)); @@ -794,19 +1010,27 @@ static ptr copy(vfasl_info *vfi, ptr pp, seginfo *si) { vfasl_fail(vfi, "tlc"); return (ptr)0; } else if (TYPEP(tf, mask_box, type_box)) { - FIND_ROOM(vfi, vspace_typed, type_typed_object, size_box, p); + FIND_ROOM(vfi, vspace_impure, type_typed_object, size_box, p); BOXTYPE(p) = (iptr)tf; INITBOXREF(p) = Sunbox(pp); } else if ((iptr)tf == type_ratnum) { - FIND_ROOM(vfi, vspace_typed, type_typed_object, size_ratnum, p); + /* note: vspace_impure is suboptimal for loading into static + generation, but these will be rare in boot code */ + FIND_ROOM(vfi, vspace_impure, type_typed_object, size_ratnum, p); RATTYPE(p) = type_ratnum; RATNUM(p) = RATNUM(pp); RATDEN(p) = RATDEN(pp); + /* pad */ + ((void **)UNTYPE(p, type_typed_object))[3] = (ptr)0; } else if ((iptr)tf == type_exactnum) { - FIND_ROOM(vfi, vspace_typed, type_typed_object, size_exactnum, p); + /* note: vspace_impure is suboptimal for loading into static + generation, but these will be rare in boot code */ + FIND_ROOM(vfi, vspace_impure, type_typed_object, size_exactnum, p); EXACTNUM_TYPE(p) = type_exactnum; EXACTNUM_REAL_PART(p) = EXACTNUM_REAL_PART(pp); EXACTNUM_IMAG_PART(p) = EXACTNUM_IMAG_PART(pp); + /* pad */ + ((void **)UNTYPE(p, type_typed_object))[3] = (ptr)0; } else if ((iptr)tf == type_inexactnum) { FIND_ROOM(vfi, vspace_data, type_typed_object, size_inexactnum, p); INEXACTNUM_TYPE(p) = type_inexactnum; @@ -825,17 +1049,8 @@ static ptr copy(vfasl_info *vfi, ptr pp, seginfo *si) { n = size_code(CODELEN(pp)); FIND_ROOM(vfi, vspace_code, type_typed_object, n, p); copy_ptrs(type_typed_object, p, pp, n); - if (CODERELOC(pp) == (ptr)0) { - /* We only get here if we're vfasling code that belongs in - the static generation. */ - ptr l; iptr ln; - ln = size_reloc_table(0); - FIND_ROOM(vfi, vspace_reloc, typemod, ln, l); - RELOCSIZE(l) = 0; - RELOCCODE(l) = p; - CODERELOC(p) = l; - vfasl_register_pointer(vfi, &CODERELOC(p)); - } + if (CODERELOC(pp) == (ptr)0) + vfasl_fail(vfi, "code without relocation"); } else if ((iptr)tf == type_rtd_counts) { /* prune counts, since GC will recreate as needed */ return Sfalse; @@ -854,7 +1069,7 @@ static ptr copy(vfasl_info *vfi, ptr pp, seginfo *si) { vfasl_fail(vfi, "weakpair"); return (ptr)0; } else { - FIND_ROOM(vfi, vspace_array, type_pair, size_pair, p); + FIND_ROOM(vfi, vspace_impure, type_pair, size_pair, p); } INITCAR(p) = Scar(pp); INITCDR(p) = Scdr(pp); @@ -864,6 +1079,9 @@ static ptr copy(vfasl_info *vfi, ptr pp, seginfo *si) { if (CODETYPE(code) & (code_flag_continuation << code_flags_offset)) { vfasl_fail(vfi, "continuation"); return (ptr)0; + } else if (CODETYPE(code) & (code_flag_mutable_closure << code_flags_offset)) { + vfasl_fail(vfi, "mutable closure"); + return (ptr)0; } else { iptr len, n; len = CLOSLEN(pp); @@ -880,6 +1098,8 @@ static ptr copy(vfasl_info *vfi, ptr pp, seginfo *si) { INITSYMSPLIST(p) = Snil; INITSYMNAME(p) = SYMNAME(pp); INITSYMHASH(p) = SYMHASH(pp); + if (Sstringp(SYMNAME(pp))) + vfasl_check_install_library_entry(vfi, SYMNAME(pp)); } else if (t == type_flonum) { FIND_ROOM(vfi, vspace_data, type_flonum, size_flonum, p); FLODAT(p) = FLODAT(pp); @@ -1062,6 +1282,10 @@ static uptr sweep_record(vfasl_info *vfi, ptr x) return size_record_inst(UNFIX(RECORDDESCSIZE(rtd))); } + +/*************************************************************/ +/* Code and relocation handling for save and load */ + #define VFASL_RELOC_TAG_BITS 3 #define VFASL_RELOC_C_ENTRY_TAG 1 @@ -1116,6 +1340,8 @@ static uptr sweep_code_object(vfasl_info *vfi, ptr co) { if ((which_singleton = detect_singleton(obj))) { obj = FIX(VFASL_RELOC_SINGLETON(which_singleton)); } else if ((pos = vfasl_hash_table_ref(S_G.c_entries, obj))) { + if ((uptr)pos == CENTRY_install_library_entry) + vfi->installs_library_entry = 1; obj = FIX(VFASL_RELOC_C_ENTRY(pos)); } else if ((pos = vfasl_hash_table_ref(S_G.library_entries, obj))) { obj = FIX(VFASL_RELOC_LIBRARY_ENTRY(pos)); @@ -1133,29 +1359,35 @@ static uptr sweep_code_object(vfasl_info *vfi, ptr co) { obj = (ptr)ptr_diff(obj, vfi->base_addr); } - S_set_code_obj("vfasl", RELOC_TYPE(entry) | reloc_force_abs, co, a, obj, item_off); + S_set_code_obj("vfasl", reloc_abs, co, a, obj, item_off); } - RELOCCODE(t) = co; - CODERELOC(co) = t; - - vfasl_register_pointer(vfi, &RELOCCODE(t)); - vfasl_register_pointer(vfi, &CODERELOC(co)); + RELOCCODE(t) = (ptr)ptr_diff(co, vfi->base_addr); + CODERELOC(co) = (ptr)ptr_diff(t, vfi->base_addr); + /* no vfasl_register_pointer, since relink_code can handle it */ return size_code(CODELEN(co)); } -static void relink_code(ptr co, ptr sym_base, ptr dest_base) { +static void relink_code(ptr co, ptr sym_base, ptr *vspaces, uptr *vspace_offsets, IBOOL to_static) { ptr t; iptr a, m, n; t = CODERELOC(co); + t = ptr_add(vspaces[vspace_reloc], (uptr)t - vspace_offsets[vspace_reloc]); + + if (to_static) + CODERELOC(co) = (ptr)0; + else { + CODERELOC(co) = t; + RELOCCODE(t) = co; + } m = RELOCSIZE(t); a = 0; n = 0; while (n < m) { uptr entry, item_off, code_off; ptr obj; - + entry = RELOCIT(t, n); n += 1; if (RELOC_EXTENDED_FORMAT(entry)) { item_off = RELOCIT(t, n); n += 1; @@ -1165,7 +1397,7 @@ static void relink_code(ptr co, ptr sym_base, ptr dest_base) { code_off = RELOC_CODE_OFFSET(entry); } a += code_off; - obj = S_get_code_obj(RELOC_TYPE(entry) | reloc_force_abs, co, a, item_off); + obj = S_get_code_obj(reloc_abs, co, a, item_off); if (IMMEDIATE(obj)) { if (Sfixnump(obj)) { @@ -1193,7 +1425,8 @@ static void relink_code(ptr co, ptr sym_base, ptr dest_base) { } } else { uptr offset = (uptr)obj; - obj = ptr_add(dest_base, offset); + + obj = find_pointer_from_offset(offset, vspaces, vspace_offsets); if ((TYPEBITS(obj) == type_typed_object) && (TYPEFIELD(obj) == S_G.base_rtd)) { /* Similar to symbols: potentially replace with interned */ @@ -1209,6 +1442,18 @@ static void relink_code(ptr co, ptr sym_base, ptr dest_base) { } } +static ptr find_pointer_from_offset(uptr p_off, ptr *vspaces, uptr *vspace_offsets) +{ + int s = 0; + ITYPE t = TYPEBITS(p_off); + + p_off = (uptr)UNTYPE(p_off, t); + while (p_off >= vspace_offsets[s+1]) + s++; + + return TYPE(ptr_add(vspaces[s], p_off - vspace_offsets[s]), t); +} + /*************************************************************/ static void fasl_init_entry_tables() @@ -1239,6 +1484,20 @@ static void fasl_init_entry_tables() tc_mutex_release() } +static void vfasl_check_install_library_entry(vfasl_info *vfi, ptr name) +{ + const char *ile = "$install-library-entry"; + iptr len = Sstring_length(name), i; + + for (i = 0; i < len; i++) { + if (Sstring_ref(name, i) != (unsigned)ile[i]) + return; + } + + if (!ile[i]) + vfi->installs_library_entry = 1; +} + /*************************************************************/ static int detect_singleton(ptr p) { diff --git a/s/compile.ss b/s/compile.ss index 878dae4ae6..20eec02c62 100644 --- a/s/compile.ss +++ b/s/compile.ss @@ -1614,7 +1614,8 @@ (do-make-boot-header who out machine bootfiles))) (set-who! vfasl-convert-file - (let ([->vfasl (foreign-procedure "(cs)to_vfasl" (scheme-object) scheme-object)]) + (let ([->vfasl (foreign-procedure "(cs)to_vfasl" (scheme-object) scheme-object)] + [vfasl-can-combine? (foreign-procedure "(cs)vfasl_can_combinep" (scheme-object) boolean)]) (lambda (in-file out-file bootfile*) (let ([op ($open-file-output-port who out-file (if (compile-compressed) @@ -1626,15 +1627,28 @@ (emit-boot-header op (constant machine-type) bootfile*)) (let ([ip ($open-file-input-port who in-file (file-options compressed))]) (on-reset (close-port ip) - (let loop () - (let ([x (fasl-read ip)]) - (unless (eof-object? x) - (emit-header op (constant machine-type)) - (let ([bv (->vfasl x)]) - (put-u8 op (constant fasl-type-vfasl-size)) - (put-uptr op (bytevector-length bv)) - (put-bytevector op bv)) - (loop)))) + (let* ([write-out (lambda (x) + (emit-header op (constant machine-type)) + (let ([bv (->vfasl x)]) + (put-u8 op (constant fasl-type-vfasl-size)) + (put-uptr op (bytevector-length bv)) + (put-bytevector op bv)))] + [write-out-accum (lambda (accum) + (unless (null? accum) + (write-out (list->vector (reverse accum)))))]) + (let loop ([accum '()]) + (let ([x (fasl-read ip)]) + (cond + [(eof-object? x) + (write-out-accum accum)] + [(not (vfasl-can-combine? x)) + (write-out-accum accum) + (write-out x) + (loop '())] + [(vector? x) + (loop (append (reverse (vector->list x)) accum))] + [else + (loop (cons x accum))])))) (close-port ip))) (close-port op)))))))) diff --git a/s/library.ss b/s/library.ss index 261fc4e45b..4a69847c47 100644 --- a/s/library.ss +++ b/s/library.ss @@ -74,6 +74,7 @@ ($hand-coded 'nonprocedure-code))) (define $foreign-entry ($hand-coded '$foreign-entry-procedure)) +;; The name `$install-library-entry` is special to `vfasl-can-combine?` (define $install-library-entry ($hand-coded '$install-library-entry-procedure))