Merged changes from libffi and libffi_msvc.

svn: r30
2005-05-31 22:10:51 +00:00 · 2005-05-31 22:10:51 +00:00 · c37ac535b1
commit c37ac535b1
parent a559d96009
34 changed files with 4202 additions and 2531 deletions
--- a/src/foreign/gcc/libffi/ChangeLog
+++ b/src/foreign/gcc/libffi/ChangeLog
@ -1,3 +1,229 @@
+2005-05-17  Kelley Cook  <kcook@gcc.gnu.org>
+
+	* configure.ac: Use AC_C_BIGENDIAN instead of AC_C_BIGENDIAN_CROSS.
+	Use AC_CHECK_SIZEOF instead of AC_COMPILE_CHECK_SIZEOF.
+	* Makefile.am (ACLOCAL_AMFLAGS): Remove -I ../config.
+	* aclocal.m4, configure, fficonfig.h.in, Makefile.in,
+	include/Makefile.in, testsuite/Makefile.in: Regenerate.
+	
+2005-05-09  Mike Stump  <mrs@apple.com>
+
+	* configure: Regenerate.
+
+2005-05-08  Richard Henderson  <rth@redhat.com>
+
+	PR libffi/21285
+	* src/alpha/osf.S: Update unwind into to match code.
+
+2005-05-04  Andreas Degert <ad@papyrus-gmbh.de>
+	    Richard Henderson  <rth@redhat.com>
+
+	* src/x86/ffi64.c (ffi_prep_cif_machdep): Save sse-used flag in
+	bit 11 of flags.
+	(ffi_call): Mask return type field.  Pass ssecount to ffi_call_unix64.
+	(ffi_prep_closure): Set carry bit if sse-used flag set.
+	* src/x86/unix64.S (ffi_call_unix64): Add ssecount argument.
+	Only load sse registers if ssecount non-zero.
+	(ffi_closure_unix64): Only save sse registers if carry set on entry.
+
+2005-04-29  Ralf Corsepius  <ralf.corsepius@rtems.org>
+
+	* configure.ac: Add i*86-*-rtems*, sparc*-*-rtems*,
+	powerpc-*rtems*, arm*-*-rtems*, sh-*-rtems*.
+	* configure: Regenerate.
+
+2005-04-20  Hans-Peter Nilsson  <hp@axis.com>
+
+	* testsuite/lib/libffi-dg.exp (libffi-dg-test-1): In regsub use,
+	have Tcl8.3-compatible intermediate variable.
+
+2005-04-18  Simon Posnjak <simon.posnjak@siol.net> 
+	    Hans-Peter Nilsson  <hp@axis.com>
+
+	* Makefile.am: Add CRIS support.
+	* configure.ac: Likewise.
+	* Makefile.in, configure, testsuite/Makefile.in,
+	include/Makefile.in: Regenerate.
+	* src/cris: New directory.
+	* src/cris/ffi.c, src/cris/sysv.S, src/cris/ffitarget.h: New files.
+	* src/prep_cif.c (ffi_prep_cif): Wrap in #ifndef __CRIS__.
+
+	* testsuite/lib/libffi-dg.exp (libffi-dg-test-1): Replace \n with
+	\r?\n in output tests.
+
+2005-04-12  Mike Stump  <mrs@apple.com>
+
+	* configure: Regenerate.
+
+2005-03-30  Hans Boehm  <Hans.Boehm@hp.com>
+
+	* src/ia64/ffitarget.h (ffi_arg): Use long long instead of DI.
+	
+2005-03-30  Steve Ellcey  <sje@cup.hp.com>
+
+	* src/ia64/ffitarget.h (ffi_arg) ADD DI attribute.
+	(ffi_sarg) Ditto.
+	* src/ia64/unix.S (ffi_closure_unix): Extend gp
+	to 64 bits in ILP32 mode.
+	Load 64 bits even for short data.
+
+2005-03-23  Mike Stump  <mrs@apple.com>
+
+	* src/powerpc/darwin.S: Update for -m64 multilib.
+	* src/powerpc/darwin_closure.S: Likewise.
+
+2005-03-21  Zack Weinberg  <zack@codesourcery.com>
+
+	* configure.ac: Do not invoke TL_AC_GCC_VERSION.
+	Do not set tool_include_dir.
+	* aclocal.m4, configure, Makefile.in, testsuite/Makefile.in:
+	Regenerate.
+	* include/Makefile.am: Set gcc_version and toollibffidir.
+	* include/Makefile.in: Regenerate.
+
+2005-02-22  Andrew Haley  <aph@redhat.com>
+
+	* src/powerpc/ffi.c (ffi_prep_cif_machdep): Bump alignment to
+	odd-numbered register pairs for 64-bit integer types.
+
+2005-02-23  Andreas Tobler  <a.tobler@schweiz.ch>
+
+	PR libffi/20104
+	* testsuite/libffi.call/return_ll1.c: New test case.
+
+2005-02-11  Janis Johnson  <janis187@us.ibm.com>
+
+	* testsuite/libffi.call/cls_align_longdouble.c: Remove dg-options.
+	* testsuite/libffi.call/float.c: Ditto.
+	* testsuite/libffi.call/float2.c: Ditto.
+	* testsuite/libffi.call/float3.c: Ditto.
+
+2005-02-08  Andreas Tobler  <a.tobler@schweiz.ch>
+
+	* src/frv/ffitarget.h: Remove PPC stuff which does not belong to frv.
+
+2005-01-12  Eric Botcazou  <ebotcazou@libertysurf.fr>
+
+	* testsuite/libffi.special/special.exp (cxx_options): Add
+	-shared-libgcc.
+
+2004-12-31  Richard Henderson  <rth@redhat.com>
+
+	* src/types.c (FFI_AGGREGATE_TYPEDEF): Remove.
+	(FFI_TYPEDEF): Rename from FFI_INTEGRAL_TYPEDEF.  Replace size and
+	offset parameters with a type parameter; deduce size and structure
+	alignment.  Update all users.
+
+2004-12-31  Richard Henderson  <rth@redhat.com>
+
+	* src/types.c (FFI_TYPE_POINTER): Define with sizeof.
+	(FFI_TYPE_LONGDOUBLE): Fix for ia64.
+	* src/ia64/ffitarget.h (struct ffi_ia64_trampoline_struct): Move
+	into ffi_prep_closure.
+	* src/ia64/ia64_flags.h, src/ia64/ffi.c, src/ia64/unix.S: Rewrite
+	from scratch.
+
+2004-12-27  Richard Henderson  <rth@redhat.com>
+
+	* src/x86/unix64.S: Fix typo in unwind info.
+
+2004-12-25  Richard Henderson  <rth@redhat.com>
+
+	* src/x86/ffi64.c (struct register_args): Rename from stackLayout.
+	(enum x86_64_reg_class): Add X86_64_COMPLEX_X87_CLASS.
+	(merge_classes): Check for it.
+	(SSE_CLASS_P): New.
+	(classify_argument): Pass byte_offset by value; perform all updates
+	inside struct case.
+	(examine_argument): Add classes argument; handle
+	X86_64_COMPLEX_X87_CLASS.
+	(ffi_prep_args): Merge into ...
+	(ffi_call): ... here.  Share stack frame with ffi_call_unix64.
+	(ffi_prep_cif_machdep): Setup cif->flags for proper structure return.
+	(ffi_fill_return_value): Remove.
+	(ffi_prep_closure): Remove dead assert.
+	(ffi_closure_unix64_inner): Rename from ffi_closure_UNIX64_inner.
+	Rewrite to use struct register_args instead of va_list.  Create
+	flags for handling structure returns.
+	* src/x86/unix64.S: Remove dead strings.
+	(ffi_call_unix64): Rename from ffi_call_UNIX64.  Rewrite to share
+	stack frame with ffi_call.  Handle structure returns properly.
+	(float2sse, floatfloat2sse, double2sse): Remove.
+	(sse2float, sse2double, sse2floatfloat): Remove.
+	(ffi_closure_unix64): Rename from ffi_closure_UNIX64.  Rewrite
+	to handle structure returns properly.
+
+2004-12-08  David Edelsohn  <edelsohn@gnu.org>
+
+	* Makefile.am (AM_MAKEFLAGS): Remove duplicate LIBCFLAGS and
+	PICFLAG.
+	* Makefile.in: Regenerated.
+
+2004-12-02  Richard Sandiford  <rsandifo@redhat.com>
+
+	* configure.ac: Use TL_AC_GCC_VERSION to set gcc_version.
+	* configure, aclocal.m4, Makefile.in: Regenerate.
+	* include/Makefile.in, testsuite/Makefile.in: Regenerate.
+
+2004-11-29  Kelley Cook  <kcook@gcc.gnu.org>
+
+	* configure: Regenerate for libtool change.
+
+2004-11-25  Kelley Cook  <kcook@gcc.gnu.org>
+
+	* configure: Regenerate for libtool reversion.
+
+2004-11-24  Kelley Cook  <kcook@gcc.gnu.org>
+
+	* configure: Regenerate for libtool change.
+
+2004-11-23  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>
+
+	* testsuite/lib/libffi-dg.exp: Use new procs in target-libpath.exp.
+
+2004-11-23  Richard Sandiford  <rsandifo@redhat.com>
+
+	* src/mips/o32.S (ffi_call_O32, ffi_closure_O32): Use jalr instead
+	of jal.  Use an absolute encoding for the frame information.
+
+2004-11-23  Kelley Cook  <kcook@gcc.gnu.org>
+
+	* Makefile.am: Remove no-dependencies.  Add ACLOCAL_AMFLAGS.
+	* acinclude.m4: Delete logic for sincludes.
+	* aclocal.m4, Makefile.in, configure: Regenerate.
+	* include/Makefile: Likewise.
+	* testsuite/Makefile: Likewise.
+
+2004-11-22  Eric Botcazou  <ebotcazou@libertysurf.fr>
+
+	* src/sparc/ffi.c (ffi_prep_closure): Align doubles and 64-bit integers
+	on a 8-byte boundary.
+	* src/sparc/v8.S (ffi_closure_v8): Reserve frame space for arguments.
+
+2004-10-27  Richard Earnshaw  <rearnsha@arm.com>
+
+	* src/arm/ffi.c (ffi_prep_cif_machdep): Handle functions that return
+	long long values.  Round stack allocation to a multiple of 8 bytes
+	for ATPCS compatibility.
+	* src/arm/sysv.S (ffi_call_SYSV): Rework to avoid use of APCS register
+	names.  Handle returning long long types.  Add Thumb and interworking
+	support.  Improve soft-float code.
+
+2004-10-27  Richard Earnshaw  <rearnsha@arm.com>
+
+	* testsuite/lib/libffi-db.exp (load_gcc_lib): New function.
+	(libffi_exit): New function.
+	(libffi_init): Build the testglue wrapper if needed.
+
+2004-10-25  Eric Botcazou  <ebotcazou@libertysurf.fr>
+
+	PR other/18138
+	* testsuite/lib/libffi-dg.exp: Accept more than one multilib libgcc.
+
+2004-10-25  Kazuhiro Inaoka <inaoka.kazuhiro@renesas.com>
+
+	* src/m32r/libffitarget.h (FFI_CLOSURES): Set to 0.
+
 2004-10-20  Kaz Kojima  <kkojima@gcc.gnu.org>

 	* src/sh/sysv.S (ffi_call_SYSV): Don't align for double data.
@ -169,7 +395,7 @@
 	integer (O32 ABI only).
 	(ffi_prep_closure): new function.
 	(ffi_closure_mips_inner_O32): new function.
-	* src/mips/ffitarget.h: Define `FFI_CLOSURES' and 
+	* src/mips/ffitarget.h: Define `FFI_CLOSURES' and
 	`FFI_TRAMPOLINE_SIZE' appropriately if the ABI is o32.
 	* src/mips/o32.S (ffi_call_O32): add labels for .eh_frame. Return
 	64 bit integers correctly.
@ -249,7 +475,7 @@
 	* src/powerpc/darwin.S: Go through a non-lazy pointer for initial
 	FDE location.
 	* src/powerpc/darwin_closure.S: Likewise.
-	
+
 2004-04-24  Andreas Tobler  <a.tobler@schweiz.ch>

 	* testsuite/libffi.call/cls_multi_schar.c (main): Fix initialization
@ -1059,7 +1285,7 @@

 2003-03-03  Andreas Tobler <a.tobler@schweiz.ch>

-        * src/powerpc/darwin_closure.S: Recode to fit dynamic libraries.
+	* src/powerpc/darwin_closure.S: Recode to fit dynamic libraries.

 2003-02-06  Andreas Tobler <a.tobler@schweiz.ch>

@ -1078,12 +1304,12 @@
 2003-01-31  Christian Cornelssen  <ccorn@cs.tu-berlin.de>,
 	    Andreas Schwab  <schwab@suse.de>

-        * configure.in: Adjust command to source config-ml.in to account
+	* configure.in: Adjust command to source config-ml.in to account
 	for changes to the libffi_basedir definition.
 	(libffi_basedir): Remove ${srcdir} from value and include trailing
 	slash if nonempty.

-        * configure: Regenerate.
+	* configure: Regenerate.

 2003-01-29  Franz Sirl  <Franz.Sirl-kernel@lauterbach.com>

@ -1132,12 +1358,12 @@
 2003-01-13 Andreas Tobler <a.tobler@schweiz.ch>

 	* libffi/src/ffitest.c
-         add closure testcases
+	 add closure testcases

 2003-01-13 Kevin B. Hendricks <khendricks@ivey.uwo.ca>

 	* libffi/src/powerpc/ffi.c
-         fix alignment bug for float (4 byte aligned iso 8 byte)
+	 fix alignment bug for float (4 byte aligned iso 8 byte)

 2003-01-09  Geoffrey Keating  <geoffk@apple.com>

--- a/src/foreign/gcc/libffi/Makefile.am
+++ b/src/foreign/gcc/libffi/Makefile.am
@ -1,6 +1,7 @@
 ## Process this with automake to create Makefile.in

-AUTOMAKE_OPTIONS = foreign no-dependencies subdir-objects
+AUTOMAKE_OPTIONS = foreign subdir-objects
+ACLOCAL_AMFLAGS = -I ..

 # ELI: removed testsuite
 SUBDIRS = include
@ -8,6 +9,7 @@ SUBDIRS = include
 EXTRA_DIST = LICENSE ChangeLog.v1 \
 	src/alpha/ffi.c src/alpha/osf.S src/alpha/ffitarget.h \
 	src/arm/ffi.c src/arm/sysv.S src/arm/ffitarget.h \
+	src/cris/ffi.c src/cris/sysv.S src/cris/ffitarget.h \
 	src/mips/ffi.c src/mips/n32.S src/mips/o32.S \
 	src/mips/ffitarget.h \
 	src/m32r/ffi.c src/m32r/sysv.S src/m32r/ffitarget.h \
@ -66,9 +68,7 @@ AM_MAKEFLAGS = \
 	"CC=$(CC)" \
 	"CXX=$(CXX)" \
 	"LD=$(LD)" \
-	"LIBCFLAGS=$(LIBCFLAGS)" \
 	"NM=$(NM)" \
-	"PICFLAG=$(PICFLAG)" \
 	"RANLIB=$(RANLIB)" \
 	"DESTDIR=$(DESTDIR)"

@ -121,6 +121,9 @@ endif
 if ARM
 nodist_libffi_la_SOURCES += src/arm/sysv.S src/arm/ffi.c
 endif
+if LIBFFI_CRIS
+nodist_libffi_la_SOURCES += src/cris/sysv.S src/cris/ffi.c
+endif
 if FRV
 nodist_libffi_la_SOURCES += src/frv/eabi.S src/frv/ffi.c
 endif
--- a/src/foreign/gcc/libffi/Makefile.in
+++ b/src/foreign/gcc/libffi/Makefile.in
--- a/src/foreign/gcc/libffi/acinclude.m4
+++ b/src/foreign/gcc/libffi/acinclude.m4
@ -1,13 +1,3 @@
-sinclude(../libtool.m4)
-dnl The lines below arrange for aclocal not to bring libtool.m4
-dnl AC_PROG_LIBTOOL into aclocal.m4, while still arranging for automake
-dnl to add a definition of LIBTOOL to Makefile.in.
-ifelse(yes,no,[
-AC_DEFUN([AC_PROG_LIBTOOL],)
-AC_DEFUN([AM_PROG_LIBTOOL],)
-AC_SUBST(LIBTOOL)
-])
-
 # mmap(2) blacklisting.  Some platforms provide the mmap library routine
 # but don't support all of the features we need from it.
 AC_DEFUN([AC_FUNC_MMAP_BLACKLIST],
@ -100,5 +90,3 @@ if test $ac_cv_func_mmap_anon = yes; then
 	    [Define if mmap with MAP_ANON(YMOUS) works.])
 fi
 ])
-
-sinclude(../config/accross.m4)
--- a/src/foreign/gcc/libffi/aclocal.m4
+++ b/src/foreign/gcc/libffi/aclocal.m4
@ -1,4 +1,4 @@
-# generated automatically by aclocal 1.8.3 -*- Autoconf -*-
+# generated automatically by aclocal 1.9.3 -*- Autoconf -*-

 # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
 # Free Software Foundation, Inc.
@ -33,14 +33,14 @@
 # ----------------------------
 # Automake X.Y traces this macro to ensure aclocal.m4 has been
 # generated from the m4 files accompanying Automake X.Y.
-AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version="1.8"])
+AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version="1.9"])

 # AM_SET_CURRENT_AUTOMAKE_VERSION
 # -------------------------------
 # Call AM_AUTOMAKE_VERSION so it can be traced.
 # This function is AC_REQUIREd by AC_INIT_AUTOMAKE.
 AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-	 [AM_AUTOMAKE_VERSION([1.8.3])])
+	 [AM_AUTOMAKE_VERSION([1.9.3])])

 # Figure out how to run the assembler.             -*- Autoconf -*-

@ -140,7 +140,7 @@ am_aux_dir=`cd $ac_aux_dir && pwd`

 # AM_CONDITIONAL                                              -*- Autoconf -*-

-# Copyright (C) 1997, 2000, 2001, 2003 Free Software Foundation, Inc.
+# Copyright (C) 1997, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.

 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@ -177,8 +177,8 @@ else
 fi
 AC_CONFIG_COMMANDS_PRE(
 [if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
-  AC_MSG_ERROR([conditional "$1" was never defined.
-Usually this means the macro was only invoked conditionally.])
+  AC_MSG_ERROR([[conditional "$1" was never defined.
+Usually this means the macro was only invoked conditionally.]])
 fi])])

 # serial 7						-*- Autoconf -*-
@ -298,9 +298,14 @@ AC_CACHE_CHECK([dependency style of $depcc],
       grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 &&
       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
      # icc doesn't choke on unknown options, it will just issue warnings
-      # (even with -Werror).  So we grep stderr for any message
-      # that says an option was ignored.
-      if grep 'ignoring option' conftest.err >/dev/null 2>&1; then :; else
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
        am_cv_$1_dependencies_compiler_type=$depmode
        break
      fi
@ -346,7 +351,8 @@ AC_SUBST([AMDEPBACKSLASH])

 # Generate code to set up dependency tracking.   -*- Autoconf -*-

-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
+#   Free Software Foundation, Inc.

 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@ -382,27 +388,21 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
  else
    continue
  fi
-  grep '^DEP_FILES *= *[[^ @%:@]]' < "$mf" > /dev/null || continue
-  # Extract the definition of DEP_FILES from the Makefile without
-  # running `make'.
+  # Extract the definition of DEPDIR, am__include, and am__quote
+  # from the Makefile without running `make'.
  DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
  test -z "$DEPDIR" && continue
+  am__include=`sed -n 's/^am__include = //p' < "$mf"`
+  test -z "am__include" && continue
+  am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
  # When using ansi2knr, U may be empty or an underscore; expand it
  U=`sed -n 's/^U = //p' < "$mf"`
-  test -d "$dirpart/$DEPDIR" || mkdir "$dirpart/$DEPDIR"
-  # We invoke sed twice because it is the simplest approach to
-  # changing $(DEPDIR) to its actual value in the expansion.
-  for file in `sed -n '
-    /^DEP_FILES = .*\\\\$/ {
-      s/^DEP_FILES = //
-      :loop
-	s/\\\\$//
-	p
-	n
-	/\\\\$/ b loop
-      p
-    }
-    /^DEP_FILES = / s/^DEP_FILES = //p' < "$mf" | \
+  # Find all dependency output files, they are included files with
+  # $(DEPDIR) in their names.  We invoke sed twice because it is the
+  # simplest approach to changing $(DEPDIR) to its actual value in the
+  # expansion.
+  for file in `sed -n "
+    s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
       sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
    # Make sure the directory exists.
    test -f "$dirpart/$file" && continue
@ -433,7 +433,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
 # This macro actually does too much some checks are only needed if
 # your package does certain things.  But this isn't really a big deal.

-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
 # Free Software Foundation, Inc.

 # This program is free software; you can redistribute it and/or modify
@ -509,7 +509,6 @@ AM_MISSING_PROG(AUTOCONF, autoconf)
 AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
 AM_MISSING_PROG(AUTOHEADER, autoheader)
 AM_MISSING_PROG(MAKEINFO, makeinfo)
-AM_MISSING_PROG(AMTAR, tar)
 AM_PROG_INSTALL_SH
 AM_PROG_INSTALL_STRIP
 AC_REQUIRE([AM_PROG_MKDIR_P])dnl
@ -518,7 +517,9 @@ AC_REQUIRE([AM_PROG_MKDIR_P])dnl
 AC_REQUIRE([AC_PROG_AWK])dnl
 AC_REQUIRE([AC_PROG_MAKE_SET])dnl
 AC_REQUIRE([AM_SET_LEADING_DOT])dnl
-
+_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
+              [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
+	      		     [_AM_PROG_TAR([v7])])])
 _AM_IF_OPTION([no-dependencies],,
 [AC_PROVIDE_IFELSE([AC_PROG_CC],
                  [_AM_DEPENDENCIES(CC)],
@ -841,13 +842,21 @@ fi
 # this.)
 AC_DEFUN([AM_PROG_MKDIR_P],
 [if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then
-  # Keeping the `.' argument allows $(mkdir_p) to be used without
-  # argument.  Indeed, we sometimes output rules like
+  # We used to keeping the `.' as first argument, in order to
+  # allow $(mkdir_p) to be used without argument.  As in
  #   $(mkdir_p) $(somedir)
-  # where $(somedir) is conditionally defined.
-  # (`test -n '$(somedir)' && $(mkdir_p) $(somedir)' is a more
-  # expensive solution, as it forces Make to start a sub-shell.)
-  mkdir_p='mkdir -p -- .'
+  # where $(somedir) is conditionally defined.  However this is wrong
+  # for two reasons:
+  #  1. if the package is installed by a user who cannot write `.'
+  #     make install will fail,
+  #  2. the above comment should most certainly read
+  #     $(mkdir_p) $(DESTDIR)$(somedir)
+  #     so it does not work when $(somedir) is undefined and
+  #     $(DESTDIR) is not.
+  #  To support the latter case, we have to write
+  #     test -z "$(somedir)" || $(mkdir_p) $(DESTDIR)$(somedir),
+  #  so the `.' trick is pointless.
+  mkdir_p='mkdir -p --'
 else
  # On NextStep and OpenStep, the `mkdir' command does not
  # recognize any option.  It will interpret all options as
@ -1082,4 +1091,113 @@ fi
 INSTALL_STRIP_PROGRAM="\${SHELL} \$(install_sh) -c -s"
 AC_SUBST([INSTALL_STRIP_PROGRAM])])

+# Check how to create a tarball.                            -*- Autoconf -*-
+
+# Copyright (C) 2004  Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# serial 1
+
+
+# _AM_PROG_TAR(FORMAT)
+# --------------------
+# Check how to create a tarball in format FORMAT.
+# FORMAT should be one of `v7', `ustar', or `pax'.
+#
+# Substitute a variable $(am__tar) that is a command
+# writing to stdout a FORMAT-tarball containing the directory
+# $tardir.
+#     tardir=directory && $(am__tar) > result.tar
+#
+# Substitute a variable $(am__untar) that extract such
+# a tarball read from stdin.
+#     $(am__untar) < result.tar
+AC_DEFUN([_AM_PROG_TAR],
+[# Always define AMTAR for backward compatibility.
+AM_MISSING_PROG([AMTAR], [tar])
+m4_if([$1], [v7],
+     [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
+     [m4_case([$1], [ustar],, [pax],,
+              [m4_fatal([Unknown tar format])])
+AC_MSG_CHECKING([how to create a $1 tar archive])
+# Loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
+_am_tools=${am_cv_prog_tar_$1-$_am_tools}
+# Do not fold the above two line into one, because Tru64 sh and
+# Solaris sh will not grok spaces in the rhs of `-'.
+for _am_tool in $_am_tools
+do
+  case $_am_tool in
+  gnutar)
+    for _am_tar in tar gnutar gtar;
+    do
+      AM_RUN_LOG([$_am_tar --version]) && break
+    done
+    am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
+    am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
+    am__untar="$_am_tar -xf -"
+    ;;
+  plaintar)
+    # Must skip GNU tar: if it does not support --format= it doesn't create
+    # ustar tarball either.
+    (tar --version) >/dev/null 2>&1 && continue
+    am__tar='tar chf - "$$tardir"'
+    am__tar_='tar chf - "$tardir"'
+    am__untar='tar xf -'
+    ;;
+  pax)
+    am__tar='pax -L -x $1 -w "$$tardir"'
+    am__tar_='pax -L -x $1 -w "$tardir"'
+    am__untar='pax -r'
+    ;;
+  cpio)
+    am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
+    am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
+    am__untar='cpio -i -H $1 -d'
+    ;;
+  none)
+    am__tar=false
+    am__tar_=false
+    am__untar=false
+    ;;
+  esac
+
+  # If the value was cached, stop now.  We just wanted to have am__tar
+  # and am__untar set.
+  test -n "${am_cv_prog_tar_$1}" && break
+
+  # tar/untar a dummy directory, and stop if the command works
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  echo GrepMe > conftest.dir/file
+  AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
+  rm -rf conftest.dir
+  if test -s conftest.tar; then
+    AM_RUN_LOG([$am__untar <conftest.tar])
+    grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
+  fi
+done
+rm -rf conftest.dir
+
+AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
+AC_MSG_RESULT([$am_cv_prog_tar_$1])])
+AC_SUBST([am__tar])
+AC_SUBST([am__untar])
+]) # _AM_PROG_TAR
+
+m4_include([../libtool.m4])
 m4_include([acinclude.m4])
--- a/src/foreign/gcc/libffi/configure
+++ b/src/foreign/gcc/libffi/configure
--- a/src/foreign/gcc/libffi/configure.ac
+++ b/src/foreign/gcc/libffi/configure.ac
@ -46,6 +46,7 @@ i*86-*-solaris*) TARGET=X86; TARGETDIR=x86;;
 i*86-*-beos*) TARGET=X86; TARGETDIR=x86;;
 i*86-*-freebsd* | i*86-*-kfreebsd*-gnu) TARGET=X86; TARGETDIR=x86;;
 i*86-*-netbsdelf* | i*86-*-knetbsd*-gnu) TARGET=X86; TARGETDIR=x86;;
+i*86-*-rtems*) TARGET=X86; TARGETDIR=x86;;
 i*86-*-win32*) TARGET=X86_WIN32; TARGETDIR=x86;;
 i*86-*-cygwin*) TARGET=X86_WIN32; TARGETDIR=x86;;
 i*86-*-mingw*) TARGET=X86_WIN32; TARGETDIR=x86;;
@ -53,6 +54,7 @@ frv-*-*) TARGET=FRV; TARGETDIR=frv;;
 sparc-sun-4*) TARGET=SPARC; TARGETDIR=sparc;;
 sparc*-sun-*) TARGET=SPARC; TARGETDIR=sparc;;
 sparc-*-linux* | sparc-*-netbsdelf* | sparc-*-knetbsd*-gnu) TARGET=SPARC; TARGETDIR=sparc;;
+sparc*-*-rtems*) TARGET=SPARC; TARGETDIR=sparc;;
 sparc64-*-linux* | sparc64-*-netbsd* | sparc64-*-knetbsd*-gnu) TARGET=SPARC; TARGETDIR=sparc;;
 alpha*-*-linux* | alpha*-*-osf* | alpha*-*-freebsd* | alpha*-*-kfreebsd*-gnu | alpha*-*-netbsd* | alpha*-*-knetbsd*-gnu) TARGET=ALPHA; TARGETDIR=alpha;;
 ia64*-*-*) TARGET=IA64; TARGETDIR=ia64;;
@ -65,13 +67,17 @@ powerpc*-*-linux* | powerpc-*-sysv*) TARGET=POWERPC; TARGETDIR=powerpc;;
 powerpc-*-beos*) TARGET=POWERPC; TARGETDIR=powerpc;;
 powerpc-*-darwin*) TARGET=POWERPC_DARWIN; TARGETDIR=powerpc;;
 powerpc-*-aix*) TARGET=POWERPC_AIX; TARGETDIR=powerpc;;
+powerpc*-*-rtems*) TARGET=POWERPC; TARGETDIR=powerpc;;
 rs6000-*-aix*) TARGET=POWERPC_AIX; TARGETDIR=powerpc;;
 arm*-*-linux-*) TARGET=ARM; TARGETDIR=arm;;
 arm*-*-netbsdelf* | arm*-*-knetbsd*-gnu) TARGET=ARM; TARGETDIR=arm;;
+arm*-*-rtems*) TARGET=ARM; TARGETDIR=arm;;
+cris-*-*) TARGET=LIBFFI_CRIS; TARGETDIR=cris;;
 s390-*-linux-*) TARGET=S390; TARGETDIR=s390;;
 s390x-*-linux-*) TARGET=S390; TARGETDIR=s390;;
 x86_64-*-linux* | x86_64-*-freebsd* | x86_64-*-kfreebsd*-gnu) TARGET=X86_64; TARGETDIR=x86;;
 sh-*-linux* | sh[[34]]*-*-linux*) TARGET=SH; TARGETDIR=sh;;
+sh-*-rtems*) TARGET=SH; TARGETDIR=sh;;
 sh64-*-linux* | sh5*-*-linux*) TARGET=SH64; TARGETDIR=sh64;;
 hppa-*-linux* | parisc-*-linux*) TARGET=PA; TARGETDIR=pa;;
 esac
@ -95,6 +101,7 @@ AM_CONDITIONAL(POWERPC, test x$TARGET = xPOWERPC)
 AM_CONDITIONAL(POWERPC_AIX, test x$TARGET = xPOWERPC_AIX)
 AM_CONDITIONAL(POWERPC_DARWIN, test x$TARGET = xPOWERPC_DARWIN)
 AM_CONDITIONAL(ARM, test x$TARGET = xARM)
+AM_CONDITIONAL(LIBFFI_CRIS, test x$TARGET = xLIBFFI_CRIS)
 AM_CONDITIONAL(FRV, test x$TARGET = xFRV)
 AM_CONDITIONAL(S390, test x$TARGET = xS390)
 AM_CONDITIONAL(X86_64, test x$TARGET = xX86_64)
@ -111,8 +118,8 @@ AC_HEADER_STDC
 AC_CHECK_FUNCS(memcpy)
 AC_FUNC_ALLOCA

-AC_COMPILE_CHECK_SIZEOF(double)
-AC_COMPILE_CHECK_SIZEOF(long double)
+AC_CHECK_SIZEOF(double)
+AC_CHECK_SIZEOF(long double)

 # Also AC_SUBST this variable for ffi.h.
 HAVE_LONG_DOUBLE=0
@ -124,7 +131,7 @@ if test $ac_cv_sizeof_double != $ac_cv_sizeof_long_double; then
 fi
 AC_SUBST(HAVE_LONG_DOUBLE)

-AC_C_BIGENDIAN_CROSS
+AC_C_BIGENDIAN

 if test x$TARGET = xSPARC; then
    AC_CACHE_CHECK([assembler and linker support unaligned pc related relocs],
@ -226,17 +233,6 @@ esac
 AC_SUBST(toolexecdir)
 AC_SUBST(toolexeclibdir)

-#Figure out where generated headers like ffitarget.h get installed.
-changequote(,)dnl
-gcc_version_trigger=${srcdir}/../gcc/version.c
-gcc_version_full=`grep version_string ${gcc_version_trigger} | sed -e 's/.*\"\([^\"]*\)\".*/\1/'`
-gcc_version=`echo ${gcc_version_full} | sed -e 's/\([^ ]*\) .*/\1/'`
-tool_include_dir='$(libdir)/gcc/$(target_alias)/'${gcc_version}/include
-changequote([,])dnl
-AC_SUBST(tool_include_dir)
-AC_SUBST(gcc_version)
-
-
 if test "${multilib}" = "yes"; then
  multilib_arg="--enable-multilib"
 else
--- a/src/foreign/gcc/libffi/fficonfig.h.in
+++ b/src/foreign/gcc/libffi/fficonfig.h.in
@ -1,8 +1,5 @@
 /* fficonfig.h.in.  Generated from configure.ac by autoheader.  */

-/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
-#undef BYTEORDER
-
 /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
   systems. This function is required for `alloca.c' support on those systems.
   */
@ -88,10 +85,6 @@
 /* Define to 1 if you have the <unistd.h> header file. */
 #undef HAVE_UNISTD_H

-/* Define if the host machine stores words of multi-word integers in
-   big-endian order. */
-#undef HOST_WORDS_BIG_ENDIAN
-
 /* Define to 1 if your C compiler doesn't accept -c and -o together. */
 #undef NO_MINUS_C_MINUS_O

@ -113,10 +106,10 @@
 /* Define to the version of this package. */
 #undef PACKAGE_VERSION

-/* The number of bytes in type double */
+/* The size of a `double', as computed by sizeof. */
 #undef SIZEOF_DOUBLE

-/* The number of bytes in type long double */
+/* The size of a `long double', as computed by sizeof. */
 #undef SIZEOF_LONG_DOUBLE

 /* If using the C implementation of alloca, define if you know the
@ -137,5 +130,6 @@
 /* Version number of package */
 #undef VERSION

-/* whether byteorder is bigendian */
+/* Define to 1 if your processor stores words with the most significant byte
+   first (like Motorola and SPARC, unlike Intel and VAX). */
 #undef WORDS_BIGENDIAN
--- a/src/foreign/gcc/libffi/include/Makefile.am
+++ b/src/foreign/gcc/libffi/include/Makefile.am
@ -9,5 +9,8 @@ hackdir=$(includedir)

 hack_DATA= ffi.h

-toollibffidir = @tool_include_dir@/libffi
+# Where generated headers like ffitarget.h get installed.
+gcc_version   := $(shell cat $(top_srcdir)/../gcc/BASE-VER)
+toollibffidir := $(libdir)/gcc/$(target_alias)/$(gcc_version)/include/libffi
+
 toollibffi_HEADERS = ffitarget.h
--- a/src/foreign/gcc/libffi/include/Makefile.in
+++ b/src/foreign/gcc/libffi/include/Makefile.in
@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.8.3 from Makefile.am.
+# Makefile.in generated by automake 1.9.3 from Makefile.am.
 # @configure_input@

 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
@ -42,9 +42,8 @@ subdir = include
 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
 	$(srcdir)/ffi.h.in $(toollibffi_HEADERS)
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-	$(top_srcdir)/../libtool.m4 $(top_srcdir)/../config/accross.m4 \
-	$(top_srcdir)/configure.ac
+am__aclocal_m4_deps = $(top_srcdir)/../libtool.m4 \
+	$(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 	$(ACLOCAL_M4)
 mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
@ -52,6 +51,12 @@ CONFIG_HEADER = $(top_builddir)/fficonfig.h
 CONFIG_CLEAN_FILES = ffi.h ffitarget.h
 SOURCES =
 DIST_SOURCES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
 am__installdirs = "$(DESTDIR)$(hackdir)" "$(DESTDIR)$(toollibffidir)"
 hackDATA_INSTALL = $(INSTALL_DATA)
 DATA = $(hack_DATA)
@ -98,6 +103,8 @@ INSTALL_DATA = @INSTALL_DATA@
 INSTALL_PROGRAM = @INSTALL_PROGRAM@
 INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LIBFFI_CRIS_FALSE = @LIBFFI_CRIS_FALSE@
+LIBFFI_CRIS_TRUE = @LIBFFI_CRIS_TRUE@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
@ -162,6 +169,8 @@ am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
 am__include = @am__include@
 am__leading_dot = @am__leading_dot@
 am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
 bindir = @bindir@
 build = @build@
 build_alias = @build_alias@
@ -170,7 +179,6 @@ build_os = @build_os@
 build_vendor = @build_vendor@
 datadir = @datadir@
 exec_prefix = @exec_prefix@
-gcc_version = @gcc_version@
 host = @host@
 host_alias = @host_alias@
 host_cpu = @host_cpu@
@ -196,7 +204,6 @@ target_alias = @target_alias@
 target_cpu = @target_cpu@
 target_os = @target_os@
 target_vendor = @target_vendor@
-tool_include_dir = @tool_include_dir@
 toolexecdir = @toolexecdir@
 toolexeclibdir = @toolexeclibdir@
 AUTOMAKE_OPTIONS = foreign
@ -204,7 +211,10 @@ DISTCLEANFILES = ffitarget.h
 EXTRA_DIST = ffi.h.in ffi_common.h
 hackdir = $(includedir)
 hack_DATA = ffi.h
-toollibffidir = @tool_include_dir@/libffi
+
+# Where generated headers like ffitarget.h get installed.
+gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER)
+toollibffidir := $(libdir)/gcc/$(target_alias)/$(gcc_version)/include/libffi
 toollibffi_HEADERS = ffitarget.h
 all: all-am

@ -255,7 +265,7 @@ install-hackDATA: $(hack_DATA)
 	test -z "$(hackdir)" || $(mkdir_p) "$(DESTDIR)$(hackdir)"
 	@list='$(hack_DATA)'; for p in $$list; do \
 	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
-	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  f=$(am__strip_dir) \
 	  echo " $(hackDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(hackdir)/$$f'"; \
 	  $(hackDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(hackdir)/$$f"; \
 	done
@ -263,7 +273,7 @@ install-hackDATA: $(hack_DATA)
 uninstall-hackDATA:
 	@$(NORMAL_UNINSTALL)
 	@list='$(hack_DATA)'; for p in $$list; do \
-	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  f=$(am__strip_dir) \
 	  echo " rm -f '$(DESTDIR)$(hackdir)/$$f'"; \
 	  rm -f "$(DESTDIR)$(hackdir)/$$f"; \
 	done
@ -272,7 +282,7 @@ install-toollibffiHEADERS: $(toollibffi_HEADERS)
 	test -z "$(toollibffidir)" || $(mkdir_p) "$(DESTDIR)$(toollibffidir)"
 	@list='$(toollibffi_HEADERS)'; for p in $$list; do \
 	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
-	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  f=$(am__strip_dir) \
 	  echo " $(toollibffiHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(toollibffidir)/$$f'"; \
 	  $(toollibffiHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(toollibffidir)/$$f"; \
 	done
@ -280,7 +290,7 @@ install-toollibffiHEADERS: $(toollibffi_HEADERS)
 uninstall-toollibffiHEADERS:
 	@$(NORMAL_UNINSTALL)
 	@list='$(toollibffi_HEADERS)'; for p in $$list; do \
-	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  f=$(am__strip_dir) \
 	  echo " rm -f '$(DESTDIR)$(toollibffidir)/$$f'"; \
 	  rm -f "$(DESTDIR)$(toollibffidir)/$$f"; \
 	done
@ -305,9 +315,11 @@ TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
 	  done | \
 	  $(AWK) '    { files[$$0] = 1; } \
 	       END { for (i in files) print i; }'`; \
-	test -z "$(ETAGS_ARGS)$$tags$$unique" \
-	  || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	     $$tags $$unique
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
 ctags: CTAGS
 CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
 		$(TAGS_FILES) $(LISP)
@ -384,7 +396,7 @@ mostlyclean-generic:
 clean-generic:

 distclean-generic:
-	-rm -f $(CONFIG_CLEAN_FILES)
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
 	-test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)

 maintainer-clean-generic:
--- a/src/foreign/gcc/libffi/src/alpha/osf.S
+++ b/src/foreign/gcc/libffi/src/alpha/osf.S
@ -3,7 +3,7 @@
   
   Alpha/OSF Foreign Function Interface 

-   $Id: osf.S,v 1.2 2004/10/25 07:21:13 eli Exp $
+   $Id: osf.S,v 1.1.1.1 1998/11/29 16:48:16 green Exp $

   Permission is hereby granted, free of charge, to any person obtaining
   a copy of this software and associated documentation files (the
@ -49,12 +49,10 @@ $LFB1:
 	addq	$16,$17,$1
 	mov	$16, $30
 	stq	$26, 0($1)
-$LCFI0:
 	stq	$15, 8($1)
-$LCFI1:
 	stq	$18, 16($1)
 	mov	$1, $15
-$LCFI2:
+$LCFI1:
 	.prologue 0

 	stq	$19, 24($1)
@ -84,6 +82,7 @@ $LCFI2:
 	ldq	$19, 24($15)
 	ldq	$18, 16($15)
 	ldq	$26, 0($15)
+$LCFI2:
 	beq	$19, $noretval

 	# Store the return value out in the proper type.
@ -94,22 +93,26 @@ $LCFI2:
 	cmpeq	$18, FFI_TYPE_DOUBLE, $3
 	bne	$3, $retdouble

+	.align	3
 $noretval:
 	ldq	$15, 8($15)
 	ret

+	.align	4
 $retint:
 	stq	$0, 0($19)
 	nop
 	ldq	$15, 8($15)
 	ret

+	.align	4
 $retfloat:
 	sts	$f0, 0($19)
 	nop
 	ldq	$15, 8($15)
 	ret

+	.align	4
 $retdouble:
 	stt	$f0, 0($19)
 	nop
@ -295,61 +298,62 @@ $load_table:
 #ifdef __ELF__
 	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
 __FRAME_BEGIN__:
-	.4byte	$LECIE1-$LSCIE1	 # Length of Common Information Entry
+	.4byte	$LECIE1-$LSCIE1	# Length of Common Information Entry
 $LSCIE1:
-	.4byte	0x0	 # CIE Identifier Tag
-	.byte	0x1	 # CIE Version
-	.ascii "zR\0"	 # CIE Augmentation
-	.byte	0x1	 # uleb128 0x1; CIE Code Alignment Factor
-	.byte	0x78	 # sleb128 -8; CIE Data Alignment Factor
-	.byte	0x1a	 # CIE RA Column
-	.byte	0x1	 # uleb128 0x1; Augmentation size
-	.byte	0x1b	 # FDE Encoding (pcrel sdata4)
-	.byte	0xc	 # DW_CFA_def_cfa
-	.byte	0x1e	 # uleb128 0x1e
-	.byte	0x0	 # uleb128 0x0
+	.4byte	0x0		# CIE Identifier Tag
+	.byte	0x1		# CIE Version
+	.ascii "zR\0"		# CIE Augmentation
+	.byte	0x1		# uleb128 0x1; CIE Code Alignment Factor
+	.byte	0x78		# sleb128 -8; CIE Data Alignment Factor
+	.byte	26		# CIE RA Column
+	.byte	0x1		# uleb128 0x1; Augmentation size
+	.byte	0x1b		# FDE Encoding (pcrel sdata4)
+	.byte	0xc		# DW_CFA_def_cfa
+	.byte	30		# uleb128 column 30
+	.byte	0		# uleb128 offset 0
 	.align 3
 $LECIE1:
 $LSFDE1:
-	.4byte	$LEFDE1-$LASFDE1	 # FDE Length
+	.4byte	$LEFDE1-$LASFDE1		# FDE Length
 $LASFDE1:
-	.4byte	$LASFDE1-__FRAME_BEGIN__	 # FDE CIE offset
-	.4byte	$LFB1-.	 # FDE initial location
-	.4byte	$LFE1-$LFB1	 # FDE address range
-	.byte	0x0	 # uleb128 0x0; Augmentation size
-	.byte	0x4	 # DW_CFA_advance_loc4
-	.4byte	$LCFI0-$LFB1
-	.byte	0xe	 # DW_CFA_def_cfa_offset
-	.byte	0x30	 # uleb128 0x30
-	.byte	0x4	 # DW_CFA_advance_loc4
-	.4byte	$LCFI1-$LCFI0
-	.byte	0x9a	 # DW_CFA_offset, column 0x1a
-	.byte	0x6	 # uleb128 0x6
-	.byte	0x8f	 # DW_CFA_offset, column 0xf
-	.byte	0x5	 # uleb128 0x5
-	.byte	0x4	 # DW_CFA_advance_loc4
+	.4byte	$LASFDE1-__FRAME_BEGIN__	# FDE CIE offset
+	.4byte	$LFB1-.		# FDE initial location
+	.4byte	$LFE1-$LFB1	# FDE address range
+	.byte	0x0		# uleb128 0x0; Augmentation size
+
+	.byte	0x4		# DW_CFA_advance_loc4
+	.4byte	$LCFI1-$LFB1
+	.byte	0x9a		# DW_CFA_offset, column 26
+	.byte	4		# uleb128 4*-8
+	.byte	0x8f		# DW_CFA_offset, column 15
+	.byte	0x3		# uleb128 3*-8
+	.byte	0xc		# DW_CFA_def_cfa
+	.byte	15		# uleb128 column 15
+	.byte	32		# uleb128 offset 32
+
+	.byte	0x4		# DW_CFA_advance_loc4
 	.4byte	$LCFI2-$LCFI1
-	.byte	0xc	 # DW_CFA_def_cfa
-	.byte	0xf	 # uleb128 0xf
-	.byte	0x30	 # uleb128 0x30
+	.byte	0xda		# DW_CFA_restore, column 26
 	.align 3
 $LEFDE1:

 $LSFDE3:
-	.4byte	$LEFDE3-$LASFDE3	 # FDE Length
+	.4byte	$LEFDE3-$LASFDE3		# FDE Length
 $LASFDE3:
-	.4byte	$LASFDE3-__FRAME_BEGIN__	 # FDE CIE offset
-	.4byte	$LFB2-.	 # FDE initial location
-	.4byte	$LFE2-$LFB2	 # FDE address range
-	.byte	0x0	 # uleb128 0x0; Augmentation size
-	.byte	0x4	 # DW_CFA_advance_loc4
+	.4byte	$LASFDE3-__FRAME_BEGIN__	# FDE CIE offset
+	.4byte	$LFB2-.		# FDE initial location
+	.4byte	$LFE2-$LFB2	# FDE address range
+	.byte	0x0		# uleb128 0x0; Augmentation size
+
+	.byte	0x4		# DW_CFA_advance_loc4
 	.4byte	$LCFI5-$LFB2
-	.byte	0xe	 # DW_CFA_def_cfa_offset
-	.byte	0x90,0x1	 # uleb128 0x90
-	.byte	0x4	 # DW_CFA_advance_loc4
+	.byte	0xe		# DW_CFA_def_cfa_offset
+	.byte	0x80,0x1	# uleb128 128
+
+	.byte	0x4		# DW_CFA_advance_loc4
 	.4byte	$LCFI6-$LCFI5
-	.byte	0x9a	 # DW_CFA_offset, column 0x1a
-	.byte	0x12	 # uleb128 0x12
+	.byte	0x9a		# DW_CFA_offset, column 26
+	.byte	16		# uleb128 offset 16*-8
 	.align 3
 $LEFDE3:
 #endif
--- a/src/foreign/gcc/libffi/src/arm/ffi.c
+++ b/src/foreign/gcc/libffi/src/arm/ffi.c
@ -108,6 +108,11 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
 /* Perform machine dependent cif processing */
 ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
 {
+  /* Round the stack up to a multiple of 8 bytes.  This isn't needed 
+     everywhere, but it is on some platforms, and it doesn't harm anything
+     when it isn't needed.  */
+  cif->bytes = (cif->bytes + 7) & ~7;
+
  /* Set the return type flag */
  switch (cif->rtype->type)
    {
@ -118,6 +123,11 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
      cif->flags = (unsigned) cif->rtype->type;
      break;

+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+      cif->flags = (unsigned) FFI_TYPE_SINT64;
+      break;
+
    default:
      cif->flags = FFI_TYPE_INT;
      break;
--- a/src/foreign/gcc/libffi/src/arm/sysv.S
+++ b/src/foreign/gcc/libffi/src/arm/sysv.S
@ -40,87 +40,169 @@
 #endif
 #define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x):
 #endif
+
+#ifdef __ELF__
+#define LSYM(x) .x
+#else
+#define LSYM(x) x
+#endif
+
+/* We need a better way of testing for this, but for now, this is all 
+   we can do.  */
+@ This selects the minimum architecture level required.
+#define __ARM_ARCH__ 3
+
+#if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
+# undef __ARM_ARCH__
+# define __ARM_ARCH__ 4
+#endif
+        
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+	|| defined(__ARM_ARCH_5TEJ__)
+# undef __ARM_ARCH__
+# define __ARM_ARCH__ 5
+#endif
+
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+        || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+        || defined(__ARM_ARCH_6ZK__)
+# undef __ARM_ARCH__
+# define __ARM_ARCH__ 6
+#endif
+
+#if __ARM_ARCH__ >= 5
+# define call_reg(x)	blx	x
+#elif defined (__ARM_ARCH_4T__)
+# define call_reg(x)	mov	lr, pc ; bx	x
+# if defined(__thumb__) || defined(__THUMB_INTERWORK__)
+#  define __INTERWORKING__
+# endif
+#else
+# define call_reg(x)	mov	lr, pc ; mov	pc, x
+#endif
+
+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+.macro	ARM_FUNC_START name
+	.text
+	.align 0
+	.thumb
+	.thumb_func
+	ENTRY(\name)
+	bx	pc
+	nop
+	.arm
+/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
+   directly from other local arm routines.  */
+_L__\name:		
+.endm
+#else
+.macro	ARM_FUNC_START name
+	.text
+	.align 0
+	.arm
+	ENTRY(\name)
+.endm
+#endif
+
+.macro	RETLDM	regs=, cond=, dirn=ia
+#if defined (__INTERWORKING__)
+	.ifc "\regs",""
+	ldr\cond	lr, [sp], #4
+	.else
+	ldm\cond\dirn	sp!, {\regs, lr}
+	.endif
+	bx\cond	lr
+#else
+	.ifc "\regs",""
+	ldr\cond	pc, [sp], #4
+	.else
+	ldm\cond\dirn	sp!, {\regs, pc}
+	.endif
+#endif
+.endm
+
+
+	@ r0:   ffi_prep_args
+	@ r1:   &ecif
+	@ r2:   cif->bytes
+	@ r3:   fig->flags
+	@ sp+0: ecif.rvalue
+	@ sp+4: fn
+
+	@ This assumes we are using gas.
+ARM_FUNC_START ffi_call_SYSV
+	@ Save registers
+        stmfd	sp!, {r0-r3, fp, lr}
+	mov	fp, sp
+
+	@ Make room for all of the new args.
+	sub	sp, fp, r2
+
+	@ Place all of the ffi_prep_args in position
+	mov	ip, r0
+	mov	r0, sp
+	@     r1 already set
+
+	@ Call ffi_prep_args(stack, &ecif)
+	call_reg(ip)
+
+	@ move first 4 parameters in registers
+	ldmia	sp, {r0-r3}
+
+	@ and adjust stack
+	ldr	ip, [fp, #8]
+        cmp	ip, #16
+	movhs	ip, #16
+        add	sp, sp, ip
+
+	@ call (fn) (...)
+	ldr	ip, [fp, #28]
+	call_reg(ip)
 	
-.text
+	@ Remove the space we pushed for the args
+	mov	sp, fp

-	# a1:   ffi_prep_args
-	# a2:   &ecif
-	# a3:   cif->bytes
-	# a4:   fig->flags
-	# sp+0: ecif.rvalue
-	# sp+4: fn
+	@ Load r2 with the pointer to storage for the return value
+	ldr	r2, [sp, #24]

-	# This assumes we are using gas.
-ENTRY(ffi_call_SYSV)
-	# Save registers
-        stmfd sp!, {a1-a4, fp, lr}
-	mov   fp, sp
+	@ Load r3 with the return type code 
+	ldr	r3, [sp, #12]

-	# Make room for all of the new args.
-	sub   sp, fp, a3
+	@ If the return value pointer is NULL, assume no return value.
+	cmp	r2, #0
+	beq	LSYM(Lepilogue)

-	# Place all of the ffi_prep_args in position
-	mov   ip, a1
-	mov   a1, sp
-	#     a2 already set
-
-	# And call
-	mov   lr, pc
-	mov   pc, ip
-
-	# move first 4 parameters in registers
-	ldr   a1, [sp, #0]
-	ldr   a2, [sp, #4]
-	ldr   a3, [sp, #8]
-        ldr   a4, [sp, #12]
-
-	# and adjust stack
-	ldr   ip, [fp, #8]
-        cmp   ip, #16
-	movge ip, #16
-        add   sp, sp, ip
-
-	# call function
-	mov   lr, pc
-	ldr   pc, [fp, #28]
-
-	# Remove the space we pushed for the args
-	mov   sp, fp
-
-	# Load a3 with the pointer to storage for the return value
-	ldr   a3, [sp, #24]
-
-	# Load a4 with the return type code 
-	ldr   a4, [sp, #12]
-
-	# If the return value pointer is NULL, assume no return value.
-	cmp   a3, #0
-	beq   epilogue
-
-# return INT
-	cmp   a4, #FFI_TYPE_INT
-	streq a1, [a3]
-	beq   epilogue
-
-# return FLOAT
-	cmp     a4, #FFI_TYPE_FLOAT
+@ return INT
+	cmp	r3, #FFI_TYPE_INT
 #ifdef __SOFTFP__
-	streq	a1, [a3]
-#else
-	stfeqs  f0, [a3]
+	cmpne	r3, #FFI_TYPE_FLOAT
 #endif
-	beq     epilogue
+	streq	r0, [r2]
+	beq	LSYM(Lepilogue)

-# return DOUBLE or LONGDOUBLE
-	cmp     a4, #FFI_TYPE_DOUBLE
+	@ return INT64
+	cmp	r3, #FFI_TYPE_SINT64
 #ifdef __SOFTFP__
-	stmeqia	a3, {a1, a2}
-#else
-	stfeqd  f0, [a3]
+	cmpne	r3, #FFI_TYPE_DOUBLE
+#endif
+	stmeqia	r2, {r0, r1}
+
+#ifndef __SOFTFP__
+	beq	LSYM(Lepilogue)
+
+@ return FLOAT
+	cmp	r3, #FFI_TYPE_FLOAT
+	stfeqs	f0, [r2]
+	beq	LSYM(Lepilogue)
+
+@ return DOUBLE or LONGDOUBLE
+	cmp	r3, #FFI_TYPE_DOUBLE
+	stfeqd	f0, [r2]
 #endif

-epilogue:
-        ldmfd sp!, {a1-a4, fp, pc}
+LSYM(Lepilogue):
+	RETLDM	"r0-r3,fp"

 .ffi_call_SYSV_end:
        .size    CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV)
--- a/src/foreign/gcc/libffi/src/cris/ffi.c
+++ b/src/foreign/gcc/libffi/src/cris/ffi.c
@ -0,0 +1,381 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1998 Cygnus Solutions
+           Copyright (c) 2004 Simon Posnjak
+	   Copyright (c) 2005 Axis Communications AB
+
+   CRIS Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL SIMON POSNJAK BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG)
+
+static ffi_status
+initialize_aggregate_packed_struct (ffi_type * arg)
+{
+  ffi_type **ptr;
+
+  FFI_ASSERT (arg != NULL);
+
+  FFI_ASSERT (arg->elements != NULL);
+  FFI_ASSERT (arg->size == 0);
+  FFI_ASSERT (arg->alignment == 0);
+
+  ptr = &(arg->elements[0]);
+
+  while ((*ptr) != NULL)
+    {
+      if (((*ptr)->size == 0)
+	  && (initialize_aggregate_packed_struct ((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+
+      FFI_ASSERT (ffi_type_test ((*ptr)));
+
+      arg->size += (*ptr)->size;
+
+      arg->alignment = (arg->alignment > (*ptr)->alignment) ?
+	arg->alignment : (*ptr)->alignment;
+
+      ptr++;
+    }
+
+  if (arg->size == 0)
+    return FFI_BAD_TYPEDEF;
+  else
+    return FFI_OK;
+}
+
+int
+ffi_prep_args (char *stack, extended_cif * ecif)
+{
+  unsigned int i;
+  unsigned int struct_count = 0;
+  void **p_argv;
+  char *argp;
+  ffi_type **p_arg;
+
+  argp = stack;
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      switch ((*p_arg)->type)
+	{
+	case FFI_TYPE_STRUCT:
+	  {
+	    z = (*p_arg)->size;
+	    if (z <= 4)
+	      {
+		memcpy (argp, *p_argv, z);
+		z = 4;
+	      }
+	    else if (z <= 8)
+	      {
+		memcpy (argp, *p_argv, z);
+		z = 8;
+	      }
+	    else
+	      {
+		unsigned int uiLocOnStack;
+		z = sizeof (void *);
+		uiLocOnStack = 4 * ecif->cif->nargs + struct_count;
+		struct_count = struct_count + (*p_arg)->size;
+		*(unsigned int *) argp =
+		  (unsigned int) (UINT32 *) (stack + uiLocOnStack);
+		memcpy ((stack + uiLocOnStack), *p_argv, (*p_arg)->size);
+	      }
+	    break;
+	  }
+	default:
+	  z = (*p_arg)->size;
+	  if (z < sizeof (int))
+	    {
+	      switch ((*p_arg)->type)
+		{
+		case FFI_TYPE_SINT8:
+		  *(signed int *) argp = (signed int) *(SINT8 *) (*p_argv);
+		  break;
+
+		case FFI_TYPE_UINT8:
+		  *(unsigned int *) argp =
+		    (unsigned int) *(UINT8 *) (*p_argv);
+		  break;
+
+		case FFI_TYPE_SINT16:
+		  *(signed int *) argp = (signed int) *(SINT16 *) (*p_argv);
+		  break;
+
+		case FFI_TYPE_UINT16:
+		  *(unsigned int *) argp =
+		    (unsigned int) *(UINT16 *) (*p_argv);
+		  break;
+
+		default:
+		  FFI_ASSERT (0);
+		}
+	      z = sizeof (int);
+	    }
+	  else if (z == sizeof (int))
+	    *(unsigned int *) argp = (unsigned int) *(UINT32 *) (*p_argv);
+	  else
+	    memcpy (argp, *p_argv, z);
+	  break;
+	}
+      p_argv++;
+      argp += z;
+    }
+
+  return (struct_count);
+}
+
+ffi_status
+ffi_prep_cif (ffi_cif * cif,
+	      ffi_abi abi, unsigned int nargs,
+	      ffi_type * rtype, ffi_type ** atypes)
+{
+  unsigned bytes = 0;
+  unsigned int i;
+  ffi_type **ptr;
+
+  FFI_ASSERT (cif != NULL);
+  FFI_ASSERT ((abi > FFI_FIRST_ABI) && (abi <= FFI_DEFAULT_ABI));
+
+  cif->abi = abi;
+  cif->arg_types = atypes;
+  cif->nargs = nargs;
+  cif->rtype = rtype;
+
+  cif->flags = 0;
+
+  if ((cif->rtype->size == 0)
+      && (initialize_aggregate_packed_struct (cif->rtype) != FFI_OK))
+    return FFI_BAD_TYPEDEF;
+
+  FFI_ASSERT_VALID_TYPE (cif->rtype);
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      if (((*ptr)->size == 0)
+	  && (initialize_aggregate_packed_struct ((*ptr)) != FFI_OK))
+	return FFI_BAD_TYPEDEF;
+
+      FFI_ASSERT_VALID_TYPE (*ptr);
+
+      if (((*ptr)->alignment - 1) & bytes)
+	bytes = ALIGN (bytes, (*ptr)->alignment);
+      if ((*ptr)->type == FFI_TYPE_STRUCT)
+	{
+	  if ((*ptr)->size > 8)
+	    {
+	      bytes += (*ptr)->size;
+	      bytes += sizeof (void *);
+	    }
+	  else
+	    {
+	      if ((*ptr)->size > 4)
+		bytes += 8;
+	      else
+		bytes += 4;
+	    }
+	}
+      else
+	bytes += STACK_ARG_SIZE ((*ptr)->size);
+    }
+
+  cif->bytes = bytes;
+
+  return ffi_prep_cif_machdep (cif);
+}
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif * cif)
+{
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+    case FFI_TYPE_STRUCT:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    default:
+      cif->flags = FFI_TYPE_INT;
+      break;
+    }
+
+  return FFI_OK;
+}
+
+extern void ffi_call_SYSV (int (*)(char *, extended_cif *),
+			   extended_cif *,
+			   unsigned, unsigned, unsigned *, void (*fn) ())
+     __attribute__ ((__visibility__ ("hidden")));
+
+void
+ffi_call (ffi_cif * cif, void (*fn) (), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+
+  if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT))
+    {
+      ecif.rvalue = alloca (cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+
+  switch (cif->abi)
+    {
+    case FFI_SYSV:
+      ffi_call_SYSV (ffi_prep_args, &ecif, cif->bytes,
+		     cif->flags, ecif.rvalue, fn);
+      break;
+    default:
+      FFI_ASSERT (0);
+      break;
+    }
+}
+
+/* Because the following variables are not exported outside libffi, we
+   mark them hidden.  */
+
+/* Assembly code for the jump stub.  */
+extern const char ffi_cris_trampoline_template[]
+ __attribute__ ((__visibility__ ("hidden")));
+
+/* Offset into ffi_cris_trampoline_template of where to put the
+   ffi_prep_closure_inner function.  */
+extern const int ffi_cris_trampoline_fn_offset
+ __attribute__ ((__visibility__ ("hidden")));
+
+/* Offset into ffi_cris_trampoline_template of where to put the
+   closure data.  */
+extern const int ffi_cris_trampoline_closure_offset
+ __attribute__ ((__visibility__ ("hidden")));
+
+/* This function is sibling-called (jumped to) by the closure
+   trampoline.  We get R10..R13 at PARAMS[0..3] and a copy of [SP] at
+   PARAMS[4] to simplify handling of a straddling parameter.  A copy
+   of R9 is at PARAMS[5] and SP at PARAMS[6].  These parameters are
+   put at the appropriate place in CLOSURE which is then executed and
+   the return value is passed back to the caller.  */
+
+static unsigned long long
+ffi_prep_closure_inner (void **params, ffi_closure* closure)
+{
+  char *register_args = (char *) params;
+  void *struct_ret = params[5];
+  char *stack_args = params[6];
+  char *ptr = register_args;
+  ffi_cif *cif = closure->cif;
+  ffi_type **arg_types = cif->arg_types;
+
+  /* Max room needed is number of arguments as 64-bit values.  */
+  void **avalue = alloca (closure->cif->nargs * sizeof(void *));
+  int i;
+  int doing_regs;
+  long long llret = 0;
+
+  /* Find the address of each argument.  */
+  for (i = 0, doing_regs = 1; i < cif->nargs; i++)
+    {
+      /* Types up to and including 8 bytes go by-value.  */
+      if (arg_types[i]->size <= 4)
+	{
+	  avalue[i] = ptr;
+	  ptr += 4;
+	}
+      else if (arg_types[i]->size <= 8)
+	{
+	  avalue[i] = ptr;
+	  ptr += 8;
+	}
+      else
+	{
+	  FFI_ASSERT (arg_types[i]->type == FFI_TYPE_STRUCT);
+
+	  /* Passed by-reference, so copy the pointer.  */
+	  avalue[i] = *(void **) ptr;
+	  ptr += 4;
+	}
+
+      /* If we've handled more arguments than fit in registers, start
+	 looking at the those passed on the stack.  Step over the
+	 first one if we had a straddling parameter.  */
+      if (doing_regs && ptr >= register_args + 4*4)
+	{
+	  ptr = stack_args + ((ptr > register_args + 4*4) ? 4 : 0);
+	  doing_regs = 0;
+	}
+    }
+
+  /* Invoke the closure.  */
+  (closure->fun) (cif,
+
+		  cif->rtype->type == FFI_TYPE_STRUCT
+		  /* The caller allocated space for the return
+		     structure, and passed a pointer to this space in
+		     R9.  */
+		  ? struct_ret
+
+		  /* We take advantage of being able to ignore that
+		     the high part isn't set if the return value is
+		     not in R10:R11, but in R10 only.  */
+		  : (void *) &llret,
+
+		  avalue, closure->user_data);
+
+  return llret;
+}
+
+/* API function: Prepare the trampoline.  */
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif *, void *, void **, void*),
+		  void *user_data)
+{
+  void *innerfn = ffi_prep_closure_inner;
+  FFI_ASSERT (cif->abi == FFI_SYSV);
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+  memcpy (closure->tramp, ffi_cris_trampoline_template,
+	  FFI_CRIS_TRAMPOLINE_CODE_PART_SIZE);
+  memcpy (closure->tramp + ffi_cris_trampoline_fn_offset,
+	  &innerfn, sizeof (void *));
+  memcpy (closure->tramp + ffi_cris_trampoline_closure_offset,
+	  &closure, sizeof (void *));
+
+  return FFI_OK;
+}
--- a/src/foreign/gcc/libffi/src/cris/ffitarget.h
+++ b/src/foreign/gcc/libffi/src/cris/ffitarget.h
@ -0,0 +1,50 @@
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 1996-2003  Red Hat, Inc.
+   Target configuration macros for CRIS.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_ASM
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+
+typedef enum ffi_abi {
+  FFI_FIRST_ABI = 0,
+  FFI_SYSV,
+  FFI_DEFAULT_ABI = FFI_SYSV,
+  FFI_LAST_ABI = FFI_DEFAULT_ABI + 1
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_CRIS_TRAMPOLINE_CODE_PART_SIZE 36
+#define FFI_CRIS_TRAMPOLINE_DATA_PART_SIZE (7*4)
+#define FFI_TRAMPOLINE_SIZE \
+ (FFI_CRIS_TRAMPOLINE_CODE_PART_SIZE + FFI_CRIS_TRAMPOLINE_DATA_PART_SIZE)
+#define FFI_NATIVE_RAW_API 0
+
+#endif
--- a/src/foreign/gcc/libffi/src/cris/sysv.S
+++ b/src/foreign/gcc/libffi/src/cris/sysv.S
@ -0,0 +1,215 @@
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 2004 Simon Posnjak
+	    Copyright (c) 2005 Axis Communications AB
+
+   CRIS Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL SIMON POSNJAK BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <ffi.h>
+#define CONCAT(x,y) x ## y
+#define XCONCAT(x,y) CONCAT (x, y)
+#define L(x) XCONCAT (__USER_LABEL_PREFIX__, x)
+
+	.text
+
+	;; OK, when we get called we should have this (according to
+	;; AXIS ETRAX 100LX Programmer's Manual chapter 6.3).
+	;;
+	;; R10:	 ffi_prep_args (func. pointer)
+	;; R11:  &ecif
+	;; R12:  cif->bytes
+	;; R13:  fig->flags
+	;; sp+0: ecif.rvalue
+	;; sp+4: fn (function pointer to the function that we need to call)
+
+	.globl  L(ffi_call_SYSV)
+	.type   L(ffi_call_SYSV),@function
+	.hidden	L(ffi_call_SYSV)
+
+L(ffi_call_SYSV):
+	;; Save the regs to the stack.
+	push $srp
+	;; Used for stack pointer saving.
+	push $r6
+	;; Used for function address pointer.
+	push $r7
+	;; Used for stack pointer saving.
+	push $r8
+	;; We save fig->flags to stack we will need them after we
+	;; call The Function.
+	push $r13
+
+	;; Saving current stack pointer.
+	move.d $sp,$r8
+	move.d $sp,$r6
+
+	;; Move address of ffi_prep_args to r13.
+	move.d $r10,$r13
+
+	;; Make room on the stack for the args of fn.
+	sub.d  $r12,$sp
+
+	;; Function void ffi_prep_args(char *stack, extended_cif *ecif) parameters are:
+	;; 	r10 <-- stack pointer
+	;; 	r11 <-- &ecif (already there)
+	move.d $sp,$r10
+
+	;; Call the function.
+	jsr $r13
+
+	;; Save the size of the structures which are passed on stack.
+	move.d $r10,$r7
+
+	;; Move first four args in to r10..r13.
+	move.d [$sp+0],$r10
+	move.d [$sp+4],$r11
+	move.d [$sp+8],$r12
+	move.d [$sp+12],$r13
+
+	;; Adjust the stack and check if any parameters are given on stack.
+	addq 16,$sp
+	sub.d $r7,$r6
+	cmp.d $sp,$r6
+
+	bpl go_on
+	nop
+
+go_on_no_params_on_stack:
+	move.d $r6,$sp
+
+go_on:
+	;; Discover if we need to put rval address in to r9.
+	move.d [$r8+0],$r7
+	cmpq FFI_TYPE_STRUCT,$r7
+	bne call_now
+	nop
+
+	;; Move rval address to $r9.
+	move.d [$r8+20],$r9
+
+call_now:
+	;; Move address of The Function in to r7.
+	move.d [$r8+24],$r7
+
+	;; Call The Function.
+	jsr $r7
+
+	;; Reset stack.
+	move.d $r8,$sp
+
+	;; Load rval type (fig->flags) in to r13.
+	pop $r13
+
+	;; Detect rval type.
+	cmpq FFI_TYPE_VOID,$r13
+	beq epilogue
+
+	cmpq FFI_TYPE_STRUCT,$r13
+	beq epilogue
+
+	cmpq FFI_TYPE_DOUBLE,$r13
+	beq return_double_or_longlong
+
+	cmpq FFI_TYPE_UINT64,$r13
+	beq return_double_or_longlong
+
+	cmpq FFI_TYPE_SINT64,$r13
+	beq return_double_or_longlong
+	nop
+
+	;; Just return the 32 bit value.
+	ba return
+	nop
+
+return_double_or_longlong:
+	;; Load half of the rval to r10 and the other half to r11.
+	move.d [$sp+16],$r13
+	move.d $r10,[$r13]
+	addq 4,$r13
+	move.d $r11,[$r13]
+	ba epilogue
+	nop
+
+return:
+	;; Load the rval to r10.
+	move.d [$sp+16],$r13
+	move.d $r10,[$r13]
+
+epilogue:
+	pop $r8
+	pop $r7
+	pop $r6
+	Jump [$sp+]
+
+	.size   ffi_call_SYSV,.-ffi_call_SYSV
+
+/* Save R10..R13 into an array, somewhat like varargs.  Copy the next
+   argument too, to simplify handling of any straddling parameter.
+   Save R9 and SP after those.  Jump to function handling the rest.
+   Since this is a template, copied and the main function filled in by
+   the user.  */
+
+	.globl	L(ffi_cris_trampoline_template)
+	.type	L(ffi_cris_trampoline_template),@function
+	.hidden	L(ffi_cris_trampoline_template)
+
+L(ffi_cris_trampoline_template):
+0:
+	/* The value we get for "PC" is right after the prefix instruction,
+	   two bytes from the beginning, i.e. 0b+2. */
+	move.d $r10,[$pc+2f-(0b+2)]
+	move.d $pc,$r10
+1:
+	addq 2f-1b+4,$r10
+	move.d $r11,[$r10+]
+	move.d $r12,[$r10+]
+	move.d $r13,[$r10+]
+	move.d [$sp],$r11
+	move.d $r11,[$r10+]
+	move.d $r9,[$r10+]
+	move.d $sp,[$r10+]
+	subq FFI_CRIS_TRAMPOLINE_DATA_PART_SIZE,$r10
+	move.d 0,$r11
+3:
+        jump 0
+2:
+	.size	ffi_cris_trampoline_template,.-0b
+
+/* This macro create a constant usable as "extern const int \name" in
+   C from within libffi, when \name has no prefix decoration.  */
+
+	.macro const name,value
+	.globl	\name
+	.type	\name,@object
+	.hidden	\name
+\name:
+	.dword  \value
+	.size	\name,4
+	.endm
+
+/* Constants for offsets within the trampoline.  We could do this with
+   just symbols, avoiding memory contents and memory accesses, but the
+   C usage code would look a bit stranger.  */
+
+	const L(ffi_cris_trampoline_fn_offset),2b-4-0b
+	const L(ffi_cris_trampoline_closure_offset),3b-4-0b
--- a/src/foreign/gcc/libffi/src/frv/eabi.S
+++ b/src/foreign/gcc/libffi/src/frv/eabi.S
@ -3,7 +3,7 @@
   
   FR-V Assembly glue.

-   $Id: eabi.S,v 1.1 2004/10/25 07:21:13 eli Exp $
+   $Id: sysv.S,v 1.1.1.1 1998/11/29 16:48:16 green Exp $

   Permission is hereby granted, free of charge, to any person obtaining
   a copy of this software and associated documentation files (the
--- a/src/foreign/gcc/libffi/src/frv/ffitarget.h
+++ b/src/foreign/gcc/libffi/src/frv/ffitarget.h
@ -28,10 +28,6 @@

 /* ---- System specific configurations ----------------------------------- */

-#if defined (POWERPC) && defined (__powerpc64__)
-#define POWERPC64
-#endif
-
 #ifndef LIBFFI_ASM
 typedef unsigned long          ffi_arg;
 typedef signed long            ffi_sarg;
@ -44,29 +40,6 @@ typedef enum ffi_abi {
  FFI_DEFAULT_ABI = FFI_EABI,
 #endif

-#ifdef POWERPC
-  FFI_SYSV,
-  FFI_GCC_SYSV,
-  FFI_LINUX64,
-# ifdef POWERPC64
-  FFI_DEFAULT_ABI = FFI_LINUX64,
-# else
-  FFI_DEFAULT_ABI = FFI_GCC_SYSV,
-# endif
-#endif
-
-#ifdef POWERPC_AIX
-  FFI_AIX,
-  FFI_DARWIN,
-  FFI_DEFAULT_ABI = FFI_AIX,
-#endif
-
-#ifdef POWERPC_DARWIN
-  FFI_AIX,
-  FFI_DARWIN,
-  FFI_DEFAULT_ABI = FFI_DARWIN,
-#endif
-
  FFI_LAST_ABI = FFI_DEFAULT_ABI + 1
 } ffi_abi;
 #endif
@ -78,11 +51,10 @@ typedef enum ffi_abi {

 #ifdef __FRV_FDPIC__
 /* Trampolines are 8 4-byte instructions long.  */
-#define FFI_TRAMPOLINE_SIZE (8*4) 
+#define FFI_TRAMPOLINE_SIZE (8*4)
 #else
 /* Trampolines are 5 4-byte instructions long.  */
-#define FFI_TRAMPOLINE_SIZE (5*4) 
+#define FFI_TRAMPOLINE_SIZE (5*4)
 #endif

 #endif
-
--- a/src/foreign/gcc/libffi/src/ia64/ffi.c
+++ b/src/foreign/gcc/libffi/src/ia64/ffi.c
--- a/src/foreign/gcc/libffi/src/ia64/ffitarget.h
+++ b/src/foreign/gcc/libffi/src/ia64/ffitarget.h
@ -27,8 +27,8 @@
 #define LIBFFI_TARGET_H

 #ifndef LIBFFI_ASM
-typedef unsigned long          ffi_arg;
-typedef signed long            ffi_sarg;
+typedef unsigned long long          ffi_arg;
+typedef signed long long            ffi_sarg;

 typedef enum ffi_abi {
  FFI_FIRST_ABI = 0,
@ -45,14 +45,5 @@ typedef enum ffi_abi {
 				/* can be interpreted as a C function	*/
 				/* descriptor:				*/

-#ifndef LIBFFI_ASM
-struct ffi_ia64_trampoline_struct {
-    void * code_pointer;	/* Pointer to ffi_closure_UNIX	*/
-    void * fake_gp;		/* Pointer to closure, installed as gp	*/
-    void * real_gp;		/* Real gp value, reinstalled by 	*/
-				/* ffi_closure_UNIX.			*/
-};
-#endif
-
 #endif

--- a/src/foreign/gcc/libffi/src/ia64/ia64_flags.h
+++ b/src/foreign/gcc/libffi/src/ia64/ia64_flags.h
@ -25,38 +25,15 @@
   OTHER DEALINGS IN THE SOFTWARE.
   ----------------------------------------------------------------------- */

+/* "Type" codes used between assembly and C.  When used as a part of
+   a cfi->flags value, the low byte will be these extra type codes,
+   and bits 8-31 will be the actual size of the type.  */

-/* Homogeneous Floating Point Aggregates (HFAs) which are returned	*/
-/* in FP registers.  The least significant bits specify the size in 	*/
-/* words.								*/
-#define FFI_IS_FLOAT_FP_AGGREGATE 0x1000
-#define FFI_IS_DOUBLE_FP_AGGREGATE 0x0800
-#define FLOAT_FP_AGGREGATE_BIT 12
-#define DOUBLE_FP_AGGREGATE_BIT 11
+/* Small structures containing N words in integer registers.  */
+#define FFI_IA64_TYPE_SMALL_STRUCT	(FFI_TYPE_LAST + 1)

-/* Small structures containing N words.  If N=1, they are returned	*/
-/* as though they were integers.					*/
-#define FFI_IS_SMALL_STRUCT2	0x40 /* Struct > 8, <=16 bytes	*/
-#define FFI_IS_SMALL_STRUCT3	0x41 /* Struct > 16 <= 24 bytes	*/
-#define FFI_IS_SMALL_STRUCT4	0x42 /* Struct > 24, <=32 bytes	*/
-
-/* Flag values identifying particularly simple cases, which are 	*/
-/* handled specially.  We treat functions as simple if they take all	*/
-/* arguments can be passed as 32 or 64 bit integer quantities, there is	*/
-/* either no return value or it can be treated as a 64bit integer, and	*/
-/* if there are at most 2 arguments.					*/
-/* This is OR'ed with the normal flag values.				*/
-#define FFI_SIMPLE_V 0x10000	/* () -> X	*/
-#define FFI_SIMPLE_I 0x20000	/* (int) -> X	*/
-#define FFI_SIMPLE_L 0x30000	/* (long) -> X	*/
-#define FFI_SIMPLE_II 0x40000	/* (int,int) -> X	*/
-#define FFI_SIMPLE_IL 0x50000	/* (int,long) -> X	*/
-#define FFI_SIMPLE_LI 0x60000	/* (long,int) -> X	*/
-#define FFI_SIMPLE_LL 0x70000	/* (long,long) -> X	*/
-
-/* Mask for all of the FFI_SIMPLE bits:	*/
-#define FFI_SIMPLE 0xf0000
-
-/* An easy way to build FFI_SIMPLE flags from FFI_SIMPLE_V:	*/
-#define FFI_ADD_LONG_ARG(flag) (((flag) << 1) | 0x10000)
-#define FFI_ADD_INT_ARG(flag) ((flag) << 1)
+/* Homogeneous Floating Point Aggregates (HFAs) which are returned
+   in FP registers.  */
+#define FFI_IA64_TYPE_HFA_FLOAT		(FFI_TYPE_LAST + 2)
+#define FFI_IA64_TYPE_HFA_DOUBLE	(FFI_TYPE_LAST + 3)
+#define FFI_IA64_TYPE_HFA_LDOUBLE	(FFI_TYPE_LAST + 4)
--- a/src/foreign/gcc/libffi/src/ia64/unix.S
+++ b/src/foreign/gcc/libffi/src/ia64/unix.S
@ -33,295 +33,523 @@
 #include <ffi.h>
 #include "ia64_flags.h"

-/* parameters:	*/
-#define callback	in0
-#define ecifp		in1
-#define bytes		in2
-#define flags		in3
-#define raddr		in4
-#define fn		in5
-
-#define FLOAT_SZ	8 /* in-memory size of fp operands	*/
-
-/* Allocate an ia64_args structure on the stack; call ffi_prep_args	*/
-/* to fill it in with argument values; copy those to the real 		*/
-/* registers, leaving overflow arguments on the stack.  Then call fn	*/
-/* and move the result from registers into *raddr.			*/
 	.pred.safe_across_calls p1-p5,p16-p63
 .text
+
+/* int ffi_call_unix (struct ia64_args *stack, PTR64 rvalue,
+		      void (*fn)(), int flags);
+ */
+
        .align 16
-        .global ffi_call_unix
-        .proc ffi_call_unix
+        .global	ffi_call_unix
+        .proc	ffi_call_unix
 ffi_call_unix:
 	.prologue
-	.save	ar.pfs,r38 /* loc0 */
-	alloc   loc0=ar.pfs,6,6,8,0
-	.save	rp,loc1
-	mov 	loc1=b0;
-	.vframe	loc5
-	mov	loc5=sp;
+	/* Bit o trickiness.  We actually share a stack frame with ffi_call.
+	   Rely on the fact that ffi_call uses a vframe and don't bother
+	   tracking one here at all.  */
+	.fframe	0
+	.save	ar.pfs, r36 // loc0
+	alloc   loc0 = ar.pfs, 4, 3, 8, 0
+	.save	rp, loc1
+	mov 	loc1 = b0
 	.body
-	sub	sp=sp,bytes
-	mov	loc4=r1		/* Save gp 	*/
-	ld8	r8=[callback],8	/* code address of callback	*/
+	add	r16 = 16, in0
+	mov	loc2 = gp
+	mov	r8 = in1
 	;;
-	mov 	out0=sp
-	mov	out1=ecifp
-	mov	out2=bytes
-	ld8	r1=[callback]	/* Set up gp for callback.  Unnecessary? */
-	mov	b6=r8
-	;;
-	br.call.sptk.many b0 = b6	/* call ffi_prep_args		*/
-	cmp.eq	p6,p0=0,r8		/* r8 nonzero ==> need fp regs	*/
- 	;;
-(p6)	add	loc2=32+8*FLOAT_SZ,sp
-(p6)	br.cond.dptk.many	fp_done
-	;;	/* Quiets warning; needed?	*/
-	add	loc2=32,sp
-	add	loc3=32+FLOAT_SZ,sp
-	;;
-	ldfd	f8=[loc2],2*FLOAT_SZ
-	ldfd	f9=[loc3],2*FLOAT_SZ
-	;;
-	ldfd	f10=[loc2],2*FLOAT_SZ
-	ldfd	f11=[loc3],2*FLOAT_SZ
-	;;
-	ldfd	f12=[loc2],2*FLOAT_SZ
-	ldfd	f13=[loc3],2*FLOAT_SZ
-	;;
-	ldfd	f14=[loc2],2*FLOAT_SZ
-	ldfd	f15=[loc3]
-	;;
-fp_done:
-	add	r9=16,sp	/* Pointer to r8_contents	*/
-	/* loc2 points at first integer register value.  */
-	add	loc3=8,loc2
-	;;
-	ld8	r8=[r9]		/* Just in case we return large struct */
-	ld8	out0=[loc2],16
-	ld8	out1=[loc3],16
-	;;
-	ld8	out2=[loc2],16
-	ld8	out3=[loc3],16
-	;;
-	ld8	out4=[loc2],16
-	ld8	out5=[loc3],16
-	;;
-	ld8	out6=[loc2]
-	ld8	out7=[loc3]
-        /* Set sp to 16 bytes below the first stack parameter.  This    */
-        /* is the value currently in loc2.                              */
-	mov	sp=loc2
-	
-	ld8 	r8=[fn],8
-	;;
-	ld8	r1=[fn]		/* Set up gp */
-	mov	b6=r8;;
-	br.call.sptk.many b0 = b6	/* call fn	*/
-	
-	/* Handle return value. */
-	cmp.eq	p6,p0=0,raddr
-	cmp.eq	p7,p0=FFI_TYPE_INT,flags
-	cmp.eq	p10,p0=FFI_IS_SMALL_STRUCT2,flags
-	cmp.eq	p11,p0=FFI_IS_SMALL_STRUCT3,flags
-	cmp.eq	p12,p0=FFI_IS_SMALL_STRUCT4,flags
-	;;
-(p6) 	br.cond.dpnt.few done		/* Dont copy ret values if raddr = 0 */
-(p7)	br.cond.dptk.few copy1
-(p10)	br.cond.dpnt.few copy2
-(p11)	br.cond.dpnt.few copy3
-(p12)	br.cond.dpnt.few copy4
-	cmp.eq	p8,p0=FFI_TYPE_FLOAT,flags
-	cmp.eq	p9,p0=FFI_TYPE_DOUBLE,flags
-	tbit.nz	p6,p0=flags,FLOAT_FP_AGGREGATE_BIT
-	tbit.nz	p7,p0=flags,DOUBLE_FP_AGGREGATE_BIT
-	;;
-(p8)	stfs	[raddr]=f8
-(p9)	stfd	[raddr]=f8
-	;;
-	.label_state 1
-(p6)	br.cond.dpnt.few handle_float_hfa
-(p7)	br.cond.dpnt.few handle_double_hfa
-	br done

-copy4:
-	add	loc3=24,raddr
+	/* Load up all of the argument registers.  */
+	ldf.fill f8 = [in0], 32
+	ldf.fill f9 = [r16], 32
 	;;
-	st8	[loc3]=r11
-copy3:
-	add	loc3=16,raddr
+	ldf.fill f10 = [in0], 32
+	ldf.fill f11 = [r16], 32
 	;;
-	st8	[loc3]=r10
-copy2:
-	add	loc3=8,raddr
+	ldf.fill f12 = [in0], 32
+	ldf.fill f13 = [r16], 32
+	;;
+	ldf.fill f14 = [in0], 32
+	ldf.fill f15 = [r16], 24
+	;;
+	ld8	out0 = [in0], 16
+	ld8	out1 = [r16], 16
+	;;
+	ld8	out2 = [in0], 16
+	ld8	out3 = [r16], 16
+	;;
+	ld8	out4 = [in0], 16
+	ld8	out5 = [r16], 16
+	;;
+	ld8	out6 = [in0]
+	ld8	out7 = [r16]
 	;;
-	st8	[loc3]=r9
-copy1:
-	st8	[raddr]=r8
-	/* In the big struct case, raddr was passed as an argument.	*/
-	/* In the void case there was nothing to do.			*/

-done:
-	mov	r1=loc4		/* Restore gp	*/
+	/* Deallocate the register save area from the stack frame.  */
+	mov	sp = in0
+
+	/* Call the target function.  */
+	ld8	r16 = [in2], 8
+	;;
+	ld8	gp = [in2]
+	mov	b6 = r16
+	br.call.sptk.many b0 = b6
+	;;
+
+	/* Dispatch to handle return value.  */
+	mov	gp = loc2
+	zxt1	r16 = in3
+	;;
+	mov	ar.pfs = loc0
+	addl	r18 = @ltoffx(.Lst_table), gp
+	;;
+	ld8.mov	r18 = [r18], .Lst_table
+	mov	b0 = loc1
+	;;
+	shladd	r18 = r16, 3, r18
+	;;
+	ld8	r17 = [r18]
+	shr	in3 = in3, 8
+	;;
+	add	r17 = r17, r18
+	;;
+	mov	b6 = r17
+	br	b6
+	;;
+
+.Lst_void:
+	br.ret.sptk.many b0
+	;;
+.Lst_uint8:
+	zxt1	r8 = r8
+	;;
+	st8	[in1] = r8
+	br.ret.sptk.many b0
+	;;
+.Lst_sint8:
+	sxt1	r8 = r8
+	;;
+	st8	[in1] = r8
+	br.ret.sptk.many b0
+	;;
+.Lst_uint16:
+	zxt2	r8 = r8
+	;;
+	st8	[in1] = r8
+	br.ret.sptk.many b0
+	;;
+.Lst_sint16:
+	sxt2	r8 = r8
+	;;
+	st8	[in1] = r8
+	br.ret.sptk.many b0
+	;;
+.Lst_uint32:
+	zxt4	r8 = r8
+	;;
+	st8	[in1] = r8
+	br.ret.sptk.many b0
+	;;
+.Lst_sint32:
+	sxt4	r8 = r8
+	;;
+	st8	[in1] = r8
+	br.ret.sptk.many b0
+	;;
+.Lst_int64:
+	st8	[in1] = r8
+	br.ret.sptk.many b0
+	;;
+.Lst_float:
+	stfs	[in1] = f8
+	br.ret.sptk.many b0
+	;;
+.Lst_double:
+	stfd	[in1] = f8
+	br.ret.sptk.many b0
+	;;
+.Lst_ldouble:
+	stfe	[in1] = f8
+	br.ret.sptk.many b0
+	;;
+
+.Lst_small_struct:
+	add	sp = -16, sp
+	cmp.lt	p6, p0 = 8, in3
+	cmp.lt	p7, p0 = 16, in3
+	cmp.lt	p8, p0 = 24, in3
+	;;
+	add	r16 = 8, sp
+	add	r17 = 16, sp
+	add	r18 = 24, sp
+	;;
+	st8	[sp] = r8
+(p6)	st8	[r16] = r9
+	mov	out0 = in1
+(p7)	st8	[r17] = r10
+(p8)	st8	[r18] = r11
+	mov	out1 = sp
+	mov	out2 = in3
+	br.call.sptk.many b0 = memcpy#
+	;;
 	mov	ar.pfs = loc0
 	mov	b0 = loc1
-	.restore sp
-	mov	sp = loc5
+	mov	gp = loc2
 	br.ret.sptk.many b0

-handle_double_hfa:
-	.body
-	.copy_state 1
-	/* Homogeneous floating point array of doubles is returned in	*/
-	/* registers f8-f15.  Save one at a time to return area.	*/
-	and	flags=0xf,flags	/* Retrieve size	*/
+.Lst_hfa_float:
+	add	r16 = 4, in1
+	cmp.lt	p6, p0 = 4, in3
 	;;
-	cmp.eq	p6,p0=2,flags
-	cmp.eq	p7,p0=3,flags
-	cmp.eq	p8,p0=4,flags
-	cmp.eq	p9,p0=5,flags
-	cmp.eq	p10,p0=6,flags
-	cmp.eq	p11,p0=7,flags
-	cmp.eq	p12,p0=8,flags
+	stfs	[in1] = f8, 8
+(p6)	stfs	[r16] = f9, 8
+	cmp.lt	p7, p0 = 8, in3
+	cmp.lt	p8, p0 = 12, in3
 	;;
-(p6)	br.cond.dptk.few	dhfa2
-(p7)	br.cond.dptk.few	dhfa3
-(p8)	br.cond.dptk.few	dhfa4
-(p9)	br.cond.dptk.few	dhfa5
-(p10)	br.cond.dptk.few	dhfa6
-(p11)	br.cond.dptk.few	dhfa7
-dhfa8:	add 	loc3=7*8,raddr
+(p7)	stfs	[in1] = f10, 8
+(p8)	stfs	[r16] = f11, 8
+	cmp.lt	p9, p0 = 16, in3
+	cmp.lt	p10, p0 = 20, in3
 	;;
-	stfd	[loc3]=f15
-dhfa7:	add 	loc3=6*8,raddr
+(p9)	stfs	[in1] = f12, 8
+(p10)	stfs	[r16] = f13, 8
+	cmp.lt	p6, p0 = 24, in3
+	cmp.lt	p7, p0 = 28, in3
 	;;
-	stfd	[loc3]=f14
-dhfa6:	add 	loc3=5*8,raddr
+(p6)	stfs	[in1] = f14
+(p7)	stfs	[r16] = f15
+	br.ret.sptk.many b0
 	;;
-	stfd	[loc3]=f13
-dhfa5:	add 	loc3=4*8,raddr
-	;;
-	stfd	[loc3]=f12
-dhfa4:	add 	loc3=3*8,raddr
-	;;
-	stfd	[loc3]=f11
-dhfa3:	add 	loc3=2*8,raddr
-	;;
-	stfd	[loc3]=f10
-dhfa2:	add 	loc3=1*8,raddr
-	;;
-	stfd	[loc3]=f9
-	stfd	[raddr]=f8
-	br	done

-handle_float_hfa:
-	/* Homogeneous floating point array of floats is returned in	*/
-	/* registers f8-f15.  Save one at a time to return area.	*/
-	and	flags=0xf,flags	/* Retrieve size	*/
+.Lst_hfa_double:
+	add	r16 = 8, in1
+	cmp.lt	p6, p0 = 8, in3
 	;;
-	cmp.eq	p6,p0=2,flags
-	cmp.eq	p7,p0=3,flags
-	cmp.eq	p8,p0=4,flags
-	cmp.eq	p9,p0=5,flags
-	cmp.eq	p10,p0=6,flags
-	cmp.eq	p11,p0=7,flags
-	cmp.eq	p12,p0=8,flags
+	stfd	[in1] = f8, 16
+(p6)	stfd	[r16] = f9, 16
+	cmp.lt	p7, p0 = 16, in3
+	cmp.lt	p8, p0 = 24, in3
 	;;
-(p6)	br.cond.dptk.few	shfa2
-(p7)	br.cond.dptk.few	shfa3
-(p8)	br.cond.dptk.few	shfa4
-(p9)	br.cond.dptk.few	shfa5
-(p10)	br.cond.dptk.few	shfa6
-(p11)	br.cond.dptk.few	shfa7
-shfa8:	add 	loc3=7*4,raddr
+(p7)	stfd	[in1] = f10, 16
+(p8)	stfd	[r16] = f11, 16
+	cmp.lt	p9, p0 = 32, in3
+	cmp.lt	p10, p0 = 40, in3
 	;;
-	stfd	[loc3]=f15
-shfa7:	add 	loc3=6*4,raddr
+(p9)	stfd	[in1] = f12, 16
+(p10)	stfd	[r16] = f13, 16
+	cmp.lt	p6, p0 = 48, in3
+	cmp.lt	p7, p0 = 56, in3
 	;;
-	stfd	[loc3]=f14
-shfa6:	add 	loc3=5*4,raddr
+(p6)	stfd	[in1] = f14
+(p7)	stfd	[r16] = f15
+	br.ret.sptk.many b0
 	;;
-	stfd	[loc3]=f13
-shfa5:	add 	loc3=4*4,raddr
+
+.Lst_hfa_ldouble:
+	add	r16 = 16, in1
+	cmp.lt	p6, p0 = 16, in3
 	;;
-	stfd	[loc3]=f12
-shfa4:	add 	loc3=3*4,raddr
+	stfe	[in1] = f8, 32
+(p6)	stfe	[r16] = f9, 32
+	cmp.lt	p7, p0 = 32, in3
+	cmp.lt	p8, p0 = 48, in3
 	;;
-	stfd	[loc3]=f11
-shfa3:	add 	loc3=2*4,raddr
+(p7)	stfe	[in1] = f10, 32
+(p8)	stfe	[r16] = f11, 32
+	cmp.lt	p9, p0 = 64, in3
+	cmp.lt	p10, p0 = 80, in3
 	;;
-	stfd	[loc3]=f10
-shfa2:	add 	loc3=1*4,raddr
+(p9)	stfe	[in1] = f12, 32
+(p10)	stfe	[r16] = f13, 32
+	cmp.lt	p6, p0 = 96, in3
+	cmp.lt	p7, p0 = 112, in3
+	;;
+(p6)	stfe	[in1] = f14
+(p7)	stfe	[r16] = f15
+	br.ret.sptk.many b0
 	;;
-	stfd	[loc3]=f9
-	stfd	[raddr]=f8
-	br	done

        .endp ffi_call_unix

-
-	.pred.safe_across_calls p1-p5,p16-p63
-.text
        .align 16
-        .global ffi_closure_UNIX
-        .proc ffi_closure_UNIX
-ffi_closure_UNIX:
-	.prologue
-	.save 	ar.pfs,r40 /* loc0 */
-	alloc   loc0=ar.pfs,8,3,2,0
-	.save	rp,loc1
-	mov	loc1=b0
-	.vframe	loc2
-	mov	loc2=sp
-	/* Retrieve closure pointer and real gp.	*/
-	mov	out0=gp
-	add	gp=16,gp
-	;;
-	ld8	gp=[gp]
-	/* Reserve a structia64_args on the stack such that arguments	*/
-	/* past the first 8 are automatically placed in the right	*/
-	/* slot.  Note that when we start the sp points at 2 8-byte	*/
-	/* scratch words, followed by the extra arguments.		*/
-#	define BASIC_ARGS_SZ (8*FLOAT_SZ+8*8+2*8)
-#	define FIRST_FP_OFFSET (4*8)
-	add	r14=-(BASIC_ARGS_SZ-FIRST_FP_OFFSET),sp
-	add	r15=-(BASIC_ARGS_SZ-FIRST_FP_OFFSET-FLOAT_SZ),sp
-	add	sp=-BASIC_ARGS_SZ,sp
-	/* r14 points to fp_regs[0], r15 points to fp_regs[1]	*/
-	;;
-	stfd	[r14]=f8,2*FLOAT_SZ
-	stfd	[r15]=f9,2*FLOAT_SZ
-	;;
-	stfd	[r14]=f10,2*FLOAT_SZ
-	stfd	[r15]=f11,2*FLOAT_SZ
-	;;
-	stfd	[r14]=f12,2*FLOAT_SZ
-	stfd	[r15]=f13,2*FLOAT_SZ
-	;;
-	stfd	[r14]=f14,2*FLOAT_SZ
-	stfd	[r15]=f15,FLOAT_SZ+8
-	;;
-	/* r14 points to first parameter register area, r15 to second. */
-	st8	[r14]=in0,2*8
-	st8	[r15]=in1,2*8
-	;;
-	st8	[r14]=in2,2*8
-	st8	[r15]=in3,2*8
-	;;
-	st8	[r14]=in4,2*8
-	st8	[r15]=in5,2*8
-	;;
-	st8	[r14]=in6,2*8
-	st8	[r15]=in7,2*8
-	/* Call ffi_closure_UNIX_inner */
-	mov	out1=sp
-	br.call.sptk.many b0=ffi_closure_UNIX_inner
-	;;
-	mov	b0=loc1
-	mov 	ar.pfs=loc0
-	.restore sp
-	mov	sp=loc2
-	br.ret.sptk.many b0
-	.endp ffi_closure_UNIX
-	
+        .global ffi_closure_unix
+        .proc ffi_closure_unix

+#define FRAME_SIZE	(8*16 + 8*8 + 8*16)
+
+ffi_closure_unix:
+	.prologue
+	.save	ar.pfs, r40 // loc0
+	alloc   loc0 = ar.pfs, 8, 4, 4, 0
+	.fframe	FRAME_SIZE
+	add	r12 = -FRAME_SIZE, r12
+	.save	rp, loc1
+	mov	loc1 = b0
+	.save	ar.unat, loc2
+	mov	loc2 = ar.unat
+	.body
+
+	/* Retrieve closure pointer and real gp.  */
+#ifdef _ILP32
+	addp4	out0 = 0, gp
+	addp4	gp = 16, gp
+#else
+	mov	out0 = gp
+	add	gp = 16, gp
+#endif
+	;;
+	ld8	gp = [gp]
+
+	/* Spill all of the possible argument registers.  */
+	add	r16 = 16 + 8*16, sp
+	add	r17 = 16 + 8*16 + 16, sp
+	;;
+	stf.spill [r16] = f8, 32
+	stf.spill [r17] = f9, 32
+	mov	loc3 = gp
+	;;
+	stf.spill [r16] = f10, 32
+	stf.spill [r17] = f11, 32
+	;;
+	stf.spill [r16] = f12, 32
+	stf.spill [r17] = f13, 32
+	;;
+	stf.spill [r16] = f14, 32
+	stf.spill [r17] = f15, 24
+	;;
+	.mem.offset 0, 0
+	st8.spill [r16] = in0, 16
+	.mem.offset 8, 0
+	st8.spill [r17] = in1, 16
+	add	out1 = 16 + 8*16, sp
+	;;
+	.mem.offset 0, 0
+	st8.spill [r16] = in2, 16
+	.mem.offset 8, 0
+	st8.spill [r17] = in3, 16
+	add	out2 = 16, sp
+	;;
+	.mem.offset 0, 0
+	st8.spill [r16] = in4, 16
+	.mem.offset 8, 0
+	st8.spill [r17] = in5, 16
+	mov	out3 = r8
+	;;
+	.mem.offset 0, 0
+	st8.spill [r16] = in6
+	.mem.offset 8, 0
+	st8.spill [r17] = in7
+
+	/* Invoke ffi_closure_unix_inner for the hard work.  */
+	br.call.sptk.many b0 = ffi_closure_unix_inner
+	;;
+
+	/* Dispatch to handle return value.  */
+	mov	gp = loc3
+	zxt1	r16 = r8
+	;;
+	addl	r18 = @ltoffx(.Lld_table), gp
+	mov	ar.pfs = loc0
+	;;
+	ld8.mov	r18 = [r18], .Lld_table
+	mov	b0 = loc1
+	;;
+	shladd	r18 = r16, 3, r18
+	mov	ar.unat = loc2
+	;;
+	ld8	r17 = [r18]
+	shr	r8 = r8, 8
+	;;
+	add	r17 = r17, r18
+	add	r16 = 16, sp
+	;;
+	mov	b6 = r17
+	br	b6
+	;;
+	.label_state 1
+
+.Lld_void:
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+.Lld_int:
+	.body
+	.copy_state 1
+	ld8	r8 = [r16]
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+.Lld_float:
+	.body
+	.copy_state 1
+	ldfs	f8 = [r16]
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+.Lld_double:
+	.body
+	.copy_state 1
+	ldfd	f8 = [r16]
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+.Lld_ldouble:
+	.body
+	.copy_state 1
+	ldfe	f8 = [r16]
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+
+.Lld_small_struct:
+	.body
+	.copy_state 1
+	add	r17 = 8, r16
+	cmp.lt	p6, p0 = 8, r8
+	cmp.lt	p7, p0 = 16, r8
+	cmp.lt	p8, p0 = 24, r8
+	;;
+	ld8	r8 = [r16], 16
+(p6)	ld8	r9 = [r17], 16
+	;;
+(p7)	ld8	r10 = [r16]
+(p8)	ld8	r11 = [r17]
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+
+.Lld_hfa_float:
+	.body
+	.copy_state 1
+	add	r17 = 4, r16
+	cmp.lt	p6, p0 = 4, r8
+	;;
+	ldfs	f8 = [r16], 8
+(p6)	ldfs	f9 = [r17], 8
+	cmp.lt	p7, p0 = 8, r8
+	cmp.lt	p8, p0 = 12, r8
+	;;
+(p7)	ldfs	f10 = [r16], 8
+(p8)	ldfs	f11 = [r17], 8
+	cmp.lt	p9, p0 = 16, r8
+	cmp.lt	p10, p0 = 20, r8
+	;;
+(p9)	ldfs	f12 = [r16], 8
+(p10)	ldfs	f13 = [r17], 8
+	cmp.lt	p6, p0 = 24, r8
+	cmp.lt	p7, p0 = 28, r8
+	;;
+(p6)	ldfs	f14 = [r16]
+(p7)	ldfs	f15 = [r17]
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+
+.Lld_hfa_double:
+	.body
+	.copy_state 1
+	add	r17 = 8, r16
+	cmp.lt	p6, p0 = 8, r8
+	;;
+	ldfd	f8 = [r16], 16
+(p6)	ldfd	f9 = [r17], 16
+	cmp.lt	p7, p0 = 16, r8
+	cmp.lt	p8, p0 = 24, r8
+	;;
+(p7)	ldfd	f10 = [r16], 16
+(p8)	ldfd	f11 = [r17], 16
+	cmp.lt	p9, p0 = 32, r8
+	cmp.lt	p10, p0 = 40, r8
+	;;
+(p9)	ldfd	f12 = [r16], 16
+(p10)	ldfd	f13 = [r17], 16
+	cmp.lt	p6, p0 = 48, r8
+	cmp.lt	p7, p0 = 56, r8
+	;;
+(p6)	ldfd	f14 = [r16]
+(p7)	ldfd	f15 = [r17]
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+
+.Lld_hfa_ldouble:
+	.body
+	.copy_state 1
+	add	r17 = 16, r16
+	cmp.lt	p6, p0 = 16, r8
+	;;
+	ldfe	f8 = [r16], 32
+(p6)	ldfe	f9 = [r17], 32
+	cmp.lt	p7, p0 = 32, r8
+	cmp.lt	p8, p0 = 48, r8
+	;;
+(p7)	ldfe	f10 = [r16], 32
+(p8)	ldfe	f11 = [r17], 32
+	cmp.lt	p9, p0 = 64, r8
+	cmp.lt	p10, p0 = 80, r8
+	;;
+(p9)	ldfe	f12 = [r16], 32
+(p10)	ldfe	f13 = [r17], 32
+	cmp.lt	p6, p0 = 96, r8
+	cmp.lt	p7, p0 = 112, r8
+	;;
+(p6)	ldfe	f14 = [r16]
+(p7)	ldfe	f15 = [r17]
+	.restore sp
+	add	sp = FRAME_SIZE, sp
+	br.ret.sptk.many b0
+	;;
+
+	.endp	ffi_closure_unix
+
+	.section .rodata
+	.align	8
+.Lst_table:
+	data8	@pcrel(.Lst_void)		// FFI_TYPE_VOID
+	data8	@pcrel(.Lst_sint32)		// FFI_TYPE_INT
+	data8	@pcrel(.Lst_float)		// FFI_TYPE_FLOAT
+	data8	@pcrel(.Lst_double)		// FFI_TYPE_DOUBLE
+	data8	@pcrel(.Lst_ldouble)		// FFI_TYPE_LONGDOUBLE
+	data8	@pcrel(.Lst_uint8)		// FFI_TYPE_UINT8
+	data8	@pcrel(.Lst_sint8)		// FFI_TYPE_SINT8
+	data8	@pcrel(.Lst_uint16)		// FFI_TYPE_UINT16
+	data8	@pcrel(.Lst_sint16)		// FFI_TYPE_SINT16
+	data8	@pcrel(.Lst_uint32)		// FFI_TYPE_UINT32
+	data8	@pcrel(.Lst_sint32)		// FFI_TYPE_SINT32
+	data8	@pcrel(.Lst_int64)		// FFI_TYPE_UINT64
+	data8	@pcrel(.Lst_int64)		// FFI_TYPE_SINT64
+	data8	@pcrel(.Lst_void)		// FFI_TYPE_STRUCT
+	data8	@pcrel(.Lst_int64)		// FFI_TYPE_POINTER
+	data8 	@pcrel(.Lst_small_struct)	// FFI_IA64_TYPE_SMALL_STRUCT
+	data8	@pcrel(.Lst_hfa_float)		// FFI_IA64_TYPE_HFA_FLOAT
+	data8	@pcrel(.Lst_hfa_double)		// FFI_IA64_TYPE_HFA_DOUBLE
+	data8	@pcrel(.Lst_hfa_ldouble)	// FFI_IA64_TYPE_HFA_LDOUBLE
+
+.Lld_table:
+	data8	@pcrel(.Lld_void)		// FFI_TYPE_VOID
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_INT
+	data8	@pcrel(.Lld_float)		// FFI_TYPE_FLOAT
+	data8	@pcrel(.Lld_double)		// FFI_TYPE_DOUBLE
+	data8	@pcrel(.Lld_ldouble)		// FFI_TYPE_LONGDOUBLE
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_UINT8
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_SINT8
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_UINT16
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_SINT16
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_UINT32
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_SINT32
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_UINT64
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_SINT64
+	data8	@pcrel(.Lld_void)		// FFI_TYPE_STRUCT
+	data8	@pcrel(.Lld_int)		// FFI_TYPE_POINTER
+	data8 	@pcrel(.Lld_small_struct)	// FFI_IA64_TYPE_SMALL_STRUCT
+	data8	@pcrel(.Lld_hfa_float)		// FFI_IA64_TYPE_HFA_FLOAT
+	data8	@pcrel(.Lld_hfa_double)		// FFI_IA64_TYPE_HFA_DOUBLE
+	data8	@pcrel(.Lld_hfa_ldouble)	// FFI_IA64_TYPE_HFA_LDOUBLE
--- a/src/foreign/gcc/libffi/src/m32r/ffitarget.h
+++ b/src/foreign/gcc/libffi/src/m32r/ffitarget.h
@ -41,7 +41,7 @@ typedef enum ffi_abi
  } ffi_abi;
 #endif

-#define FFI_CLOSURES 		1
+#define FFI_CLOSURES 		0
 #define FFI_TRAMPOLINE_SIZE	24
 #define FFI_NATIVE_RAW_API 	0

--- a/src/foreign/gcc/libffi/src/mips/o32.S
+++ b/src/foreign/gcc/libffi/src/mips/o32.S
@ -73,7 +73,7 @@ sixteen:
 	ADDU	a0, $sp, 4 * FFI_SIZEOF_ARG
 	ADDU	a3, $fp, SIZEOF_FRAME + 3*FFI_SIZEOF_ARG

-	jal	t9
+	jalr	t9
 	
 	REG_L	t0, SIZEOF_FRAME + 3*FFI_SIZEOF_ARG($fp)  # load the flags word
 	add	t2, t0, 0			   # and copy it into t2
@ -141,7 +141,7 @@ call_it:
 	beqz	t1, noretval

 	bne     t2, FFI_TYPE_INT, retlonglong
-	jal	t9
+	jalr	t9
 	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 	REG_S	v0, 0(t0)
 	b	epilogue
@ -149,7 +149,7 @@ call_it:
 retlonglong:
 	# Really any 64-bit int, signed or not.
 	bne	t2, FFI_TYPE_UINT64, retfloat
-	jal	t9
+	jalr	t9
 	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 	REG_S	v1, 4(t0)
 	REG_S	v0, 0(t0)
@ -157,20 +157,20 @@ retlonglong:

 retfloat:
 	bne     t2, FFI_TYPE_FLOAT, retdouble
-	jal	t9
+	jalr	t9
 	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 	s.s	$f0, 0(t0)
 	b	epilogue

 retdouble:	
 	bne	t2, FFI_TYPE_DOUBLE, noretval
-	jal	t9
+	jalr	t9
 	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 	s.d	$f0, 0(t0)
 	b	epilogue
 	
 noretval:	
-	jal	t9
+	jalr	t9
 	
 	# Epilogue
 epilogue:	
@ -255,7 +255,7 @@ $LCFI7:
 	addu	$5, $fp, SIZEOF_FRAME2 -  6*FFI_SIZEOF_ARG
 	addu	$6, $fp, SIZEOF_FRAME2 +  0*FFI_SIZEOF_ARG
 	addu	$7, $fp, SIZEOF_FRAME2 - 10*FFI_SIZEOF_ARG
-	jal	$31, $25
+	jalr	$31, $25

 	# Load the return value into the appropriate register.
 	move	$8, $2
@ -306,7 +306,7 @@ $LSCIE0:
 	.sleb128 4	 # CIE Data Alignment Factor
 	.byte	0x1f	 # CIE RA Column
 	.uleb128 0x1	 # Augmentation size
-	.byte	0x1b	 # FDE Encoding (pcrel sdata4)
+	.byte	0x00	 # FDE Encoding (absptr)
 	.byte	0xc	 # DW_CFA_def_cfa
 	.uleb128 0x1d
 	.uleb128 0x0
@ -316,7 +316,7 @@ $LSFDE0:
 	.4byte	$LEFDE0-$LASFDE0	 # FDE Length
 $LASFDE0:
 	.4byte	$LASFDE0-$Lframe0	 # FDE CIE offset
-	.4byte	$LFB0-.	 # FDE initial location
+	.4byte	$LFB0	 # FDE initial location
 	.4byte	$LFE0-$LFB0	 # FDE address range
 	.uleb128 0x0	 # Augmentation size
 	.byte	0x4	 # DW_CFA_advance_loc4
@ -342,7 +342,7 @@ $LSFDE1:
 	.4byte	$LEFDE1-$LASFDE1	 # FDE Length
 $LASFDE1:
 	.4byte	$LASFDE1-$Lframe0	 # FDE CIE offset
-	.4byte	$LFB1-.	 # FDE initial location
+	.4byte	$LFB1	 # FDE initial location
 	.4byte	$LFE1-$LFB1	 # FDE address range
 	.uleb128 0x0	 # Augmentation size
 	.byte	0x4	 # DW_CFA_advance_loc4
--- a/src/foreign/gcc/libffi/src/powerpc/darwin.S
+++ b/src/foreign/gcc/libffi/src/powerpc/darwin.S
@ -24,6 +24,16 @@
   OTHER DEALINGS IN THE SOFTWARE.
   ----------------------------------------------------------------------- */

+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define g_long  MODE_CHOICE(long, quad)         /* usage is ".g_long" */
+
+#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES) */
+
 #define LIBFFI_ASM
 #include <fficonfig.h>
 #include <ffi.h>
@ -190,7 +200,7 @@ LSCIE1:
 	.byte	0xc	; DW_CFA_def_cfa
 	.byte	0x1	; uleb128 0x1
 	.byte	0x0	; uleb128 0x0
-	.align	2
+	.align	LOG2_GPR_BYTES
 LECIE1:
 .globl _ffi_call_DARWIN.eh
 _ffi_call_DARWIN.eh:
@ -199,9 +209,9 @@ LSFDE1:
 	.long	L$set$1	; FDE Length
 LASFDE1:
 	.long	LASFDE1-EH_frame1 ; FDE CIE offset
-	.long	LLFB0$non_lazy_ptr-.	; FDE initial location
+	.g_long	LLFB0$non_lazy_ptr-.	; FDE initial location
 	.set	L$set$3,LFE1-LFB0
-	.long	L$set$3	; FDE address range
+	.g_long	L$set$3	; FDE address range
 	.byte   0x0     ; uleb128 0x0; Augmentation size
 	.byte	0x4	; DW_CFA_advance_loc4
 	.set	L$set$4,LCFI0-LFB1
@ -227,9 +237,9 @@ LASFDE1:
 	.long	L$set$6
 	.byte	0xd	; DW_CFA_def_cfa_register
 	.byte	0x1c	; uleb128 0x1c
-	.align 2
+	.align LOG2_GPR_BYTES
 LEFDE1:
 .data
-	.align 2
+	.align LOG2_GPR_BYTES
 LLFB0$non_lazy_ptr:
-	.long   LFB0
+	.g_long LFB0
--- a/src/foreign/gcc/libffi/src/powerpc/darwin_closure.S
+++ b/src/foreign/gcc/libffi/src/powerpc/darwin_closure.S
@ -27,13 +27,25 @@
 #define LIBFFI_ASM
 #define L(x) x

+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define lgu     MODE_CHOICE(lwzu, ldu)
+
+#define g_long  MODE_CHOICE(long, quad)         /* usage is ".g_long" */
+
+#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES) */
+
 	.file	"darwin_closure.S"
 .text
-	.align 2
+	.align LOG2_GPR_BYTES
 .globl _ffi_closure_ASM

 .text
-	.align 2
+	.align LOG2_GPR_BYTES
 _ffi_closure_ASM:
 LFB1:
 	mflr	r0		/* extract return address  */
@ -234,7 +246,7 @@ Lfinish:
 /* END(ffi_closure_ASM)  */

 .data
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
 EH_frame1:
 	.set	L$set$0,LECIE1-LSCIE1
 	.long	L$set$0	; Length of Common Information Entry
@ -250,7 +262,7 @@ LSCIE1:
 	.byte	0xc	; DW_CFA_def_cfa
 	.byte	0x1	; uleb128 0x1
 	.byte	0x0	; uleb128 0x0
-	.align	2
+	.align	LOG2_GPR_BYTES
 LECIE1:
 .globl _ffi_closure_ASM.eh
 _ffi_closure_ASM.eh:
@ -260,9 +272,9 @@ LSFDE1:

 LASFDE1:
 	.long	LASFDE1-EH_frame1	; FDE CIE offset
-	.long	LLFB1$non_lazy_ptr-.	; FDE initial location
+	.g_long	LLFB1$non_lazy_ptr-.	; FDE initial location
 	.set	L$set$3,LFE1-LFB1
-	.long	L$set$3	; FDE address range
+	.g_long	L$set$3	; FDE address range
 	.byte   0x0     ; uleb128 0x0; Augmentation size
 	.byte	0x4	; DW_CFA_advance_loc4
 	.set	L$set$3,LCFI1-LCFI0
@ -275,14 +287,15 @@ LASFDE1:
 	.byte   0x11    ; DW_CFA_offset_extended_sf
 	.byte	0x41	; uleb128 0x41
 	.byte   0x7e    ; sleb128 -2
-	.align	2
+	.align	LOG2_GPR_BYTES
 LEFDE1:
 .data
-	.align	2
+	.align	LOG2_GPR_BYTES
 LDFCM0:
 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
-	.align	2
+	.align	LOG2_GPR_BYTES
 Lffi_closure_helper_DARWIN$stub:
+#if 1
 	.indirect_symbol _ffi_closure_helper_DARWIN
 	mflr	r0
 	bcl	20,31,LO$ffi_closure_helper_DARWIN
@ -290,15 +303,15 @@ LO$ffi_closure_helper_DARWIN:
 	mflr	r11
 	addis	r11,r11,ha16(L_ffi_closure_helper_DARWIN$lazy_ptr - LO$ffi_closure_helper_DARWIN)
 	mtlr	r0
-	lwzu	r12,lo16(L_ffi_closure_helper_DARWIN$lazy_ptr - LO$ffi_closure_helper_DARWIN)(r11)
+	lgu	r12,lo16(L_ffi_closure_helper_DARWIN$lazy_ptr - LO$ffi_closure_helper_DARWIN)(r11)
 	mtctr	r12
 	bctr
-.data
 .lazy_symbol_pointer
 L_ffi_closure_helper_DARWIN$lazy_ptr:
 	.indirect_symbol _ffi_closure_helper_DARWIN
-	.long   dyld_stub_binding_helper
+	.g_long dyld_stub_binding_helper
+#endif
 .data
-	.align 2
+	.align LOG2_GPR_BYTES
 LLFB1$non_lazy_ptr:
-	.long LFB1
+	.g_long LFB1
--- a/src/foreign/gcc/libffi/src/powerpc/ffi.c
+++ b/src/foreign/gcc/libffi/src/powerpc/ffi.c
@ -573,10 +573,14 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
 	    /* 'long long' arguments are passed as two words, but
 	       either both words must fit in registers or both go
 	       on the stack.  If they go on the stack, they must
-	       be 8-byte-aligned.  */
+	       be 8-byte-aligned.  
+
+	       Also, only certain register pairs can be used for
+	       passing long long int -- specifically (r3,r4), (r5,r6),
+	       (r7,r8), (r9,r10).
+	    */
 	    if (intarg_count == NUM_GPR_ARG_REGISTERS-1
-		|| (intarg_count >= NUM_GPR_ARG_REGISTERS
-		    && intarg_count%2 != 0))
+		|| intarg_count%2 != 0)
 	      intarg_count++;
 	    intarg_count += 2;
 	    break;
--- a/src/foreign/gcc/libffi/src/prep_cif.c
+++ b/src/foreign/gcc/libffi/src/prep_cif.c
@ -81,6 +81,11 @@ static ffi_status initialize_aggregate(/*@out@*/ ffi_type *arg)
  /*@=usedef@*/
 }

+#ifndef __CRIS__
+/* The CRIS ABI specifies structure elements to have byte
+   alignment only, so it completely overrides this functions,
+   which assumes "natural" alignment and padding.  */
+
 /* Perform machine independent ffi_cif preparation, then call
   machine dependent routine. */

@ -158,3 +163,4 @@ ffi_status ffi_prep_cif(/*@out@*/ /*@partial@*/ ffi_cif *cif,
  /* Perform machine dependent cif processing */
  return ffi_prep_cif_machdep(cif);
 }
+#endif /* not __CRIS__ */
--- a/src/foreign/gcc/libffi/src/sparc/ffi.c
+++ b/src/foreign/gcc/libffi/src/sparc/ffi.c
@ -470,7 +470,7 @@ ffi_prep_closure (ffi_closure* closure,

 int
 ffi_closure_sparc_inner_v8(ffi_closure *closure,
-  void *rvalue, unsigned long *gpr)
+  void *rvalue, unsigned long *gpr, unsigned long *scratch)
 {
  ffi_cif *cif;
  ffi_type **arg_types;
@ -505,6 +505,19 @@ ffi_closure_sparc_inner_v8(ffi_closure *closure,
 	  /* Straight copy of invisible reference.  */
 	  avalue[i] = (void *)gpr[argn++];
 	}
+      else if ((arg_types[i]->type == FFI_TYPE_DOUBLE
+	       || arg_types[i]->type == FFI_TYPE_SINT64
+	       || arg_types[i]->type == FFI_TYPE_UINT64)
+	       /* gpr is 8-byte aligned.  */
+	       && (argn % 2) != 0)
+	{
+	  /* Align on a 8-byte boundary.  */
+	  scratch[0] = gpr[argn];
+	  scratch[1] = gpr[argn+1];
+	  avalue[i] = scratch;
+	  scratch -= 2;
+	  argn += 2;
+	}
      else
 	{
 	  /* Always right-justify.  */
--- a/src/foreign/gcc/libffi/src/sparc/v8.S
+++ b/src/foreign/gcc/libffi/src/sparc/v8.S
@ -115,7 +115,15 @@ ffi_closure_v8:
 		.register	%g2, #scratch
 #endif
 .LLFB2:
-	save	%sp, -STACKFRAME, %sp
+	! Reserve frame space for all arguments in case
+	! we need to align them on a 8-byte boundary.
+	ld	[%g2+FFI_TRAMPOLINE_SIZE], %g1
+	ld	[%g1+4], %g1
+	sll	%g1, 3, %g1
+	add	%g1, STACKFRAME, %g1
+	! %g1 == STACKFRAME + 8*nargs
+	neg	%g1
+	save	%sp, %g1, %sp
 .LLCFI1:

 	! Store all of the potential argument registers in va_list format.
@ -129,8 +137,9 @@ ffi_closure_v8:
 	! Call ffi_closure_sparc_inner to do the bulk of the work.
 	mov	%g2, %o0
 	add	%fp, -8, %o1
+	add	%fp,  64, %o2
 	call	ffi_closure_sparc_inner_v8
-	 add	%fp,  64, %o2
+	 add	%fp, -16, %o3

 	! Load up the return value in the proper type.
 	! See ffi_prep_cif_machdep for the list of cases.
@ -157,6 +166,7 @@ ffi_closure_v8:
 	be	done2

 	! FFI_TYPE_SINT64
+	! FFI_TYPE_UINT64
 	ld	[%fp-4], %i1

 integer:
--- a/src/foreign/gcc/libffi/src/types.c
+++ b/src/foreign/gcc/libffi/src/types.c
@ -28,86 +28,33 @@

 /* Type definitions */

-#define FFI_INTEGRAL_TYPEDEF(n, s, a, t) ffi_type ffi_type_##n = { s, a, t, NULL }
-#define FFI_AGGREGATE_TYPEDEF(n, e) ffi_type ffi_type_##n = { 0, 0, FFI_TYPE_STRUCT, e }
+#define FFI_TYPEDEF(name, type, id)		\
+struct struct_align_##name {			\
+  char c;					\
+  type x;					\
+};						\
+ffi_type ffi_type_##name = {			\
+  sizeof(type),					\
+  offsetof(struct struct_align_##name, x),	\
+  id, NULL					\
+}

 /* Size and alignment are fake here. They must not be 0. */
-FFI_INTEGRAL_TYPEDEF(void, 1, 1, FFI_TYPE_VOID);
+ffi_type ffi_type_void = {
+  1, 1, FFI_TYPE_VOID, NULL
+};

-FFI_INTEGRAL_TYPEDEF(uint8, 1, 1, FFI_TYPE_UINT8);
-FFI_INTEGRAL_TYPEDEF(sint8, 1, 1, FFI_TYPE_SINT8);
-FFI_INTEGRAL_TYPEDEF(uint16, 2, 2, FFI_TYPE_UINT16);
-FFI_INTEGRAL_TYPEDEF(sint16, 2, 2, FFI_TYPE_SINT16);
-FFI_INTEGRAL_TYPEDEF(uint32, 4, 4, FFI_TYPE_UINT32);
-FFI_INTEGRAL_TYPEDEF(sint32, 4, 4, FFI_TYPE_SINT32);
-FFI_INTEGRAL_TYPEDEF(float, 4, 4, FFI_TYPE_FLOAT);
+FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8);
+FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8);
+FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16);
+FFI_TYPEDEF(sint16, SINT16, FFI_TYPE_SINT16);
+FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32);
+FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32);
+FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64);
+FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64);

-#if defined ALPHA || defined SPARC64 || defined X86_64 || defined S390X \
-    || defined IA64 || defined POWERPC64
-
-FFI_INTEGRAL_TYPEDEF(pointer, 8, 8, FFI_TYPE_POINTER);
-
-#else
-
-FFI_INTEGRAL_TYPEDEF(pointer, 4, 4, FFI_TYPE_POINTER);
-
-#endif
-
-#if defined X86 || defined ARM || defined M68K
-
-FFI_INTEGRAL_TYPEDEF(uint64, 8, 4, FFI_TYPE_UINT64);
-FFI_INTEGRAL_TYPEDEF(sint64, 8, 4, FFI_TYPE_SINT64);
-
-#elif defined SH
-
-FFI_INTEGRAL_TYPEDEF(uint64, 8, 4, FFI_TYPE_UINT64);
-FFI_INTEGRAL_TYPEDEF(sint64, 8, 4, FFI_TYPE_SINT64);
-
-#else
-
-FFI_INTEGRAL_TYPEDEF(uint64, 8, 8, FFI_TYPE_UINT64);
-FFI_INTEGRAL_TYPEDEF(sint64, 8, 8, FFI_TYPE_SINT64);
-
-#endif
-
-
-#if defined X86 || defined X86_WIN32 || defined M68K
-
-#ifdef X86_WIN32
-FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
-#else
-FFI_INTEGRAL_TYPEDEF(double, 8, 4, FFI_TYPE_DOUBLE);
-#endif
-FFI_INTEGRAL_TYPEDEF(longdouble, 12, 4, FFI_TYPE_LONGDOUBLE);
-
-#elif defined ARM || defined SH || defined POWERPC_AIX || defined M32R
-
-FFI_INTEGRAL_TYPEDEF(double, 8, 4, FFI_TYPE_DOUBLE);
-FFI_INTEGRAL_TYPEDEF(longdouble, 8, 4, FFI_TYPE_LONGDOUBLE);
-
-#elif defined POWERPC_DARWIN
-
-FFI_INTEGRAL_TYPEDEF(double, 8, 4, FFI_TYPE_DOUBLE);
-FFI_INTEGRAL_TYPEDEF(longdouble, 16, 16, FFI_TYPE_LONGDOUBLE);
-
-#elif defined SPARC
-
-FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
-#ifdef SPARC64
-FFI_INTEGRAL_TYPEDEF(longdouble, 16, 16, FFI_TYPE_LONGDOUBLE);
-#else
-FFI_INTEGRAL_TYPEDEF(longdouble, 16, 8, FFI_TYPE_LONGDOUBLE);
-#endif
-
-#elif defined X86_64 || defined POWERPC64
-
-FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
-FFI_INTEGRAL_TYPEDEF(longdouble, 16, 16, FFI_TYPE_LONGDOUBLE);
-
-#else
-
-FFI_INTEGRAL_TYPEDEF(double, 8, 8, FFI_TYPE_DOUBLE);
-FFI_INTEGRAL_TYPEDEF(longdouble, 8, 8, FFI_TYPE_LONGDOUBLE);
-
-#endif
+FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER);

+FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT);
+FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE);
+FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE);
--- a/src/foreign/gcc/libffi/src/x86/ffi64.c
+++ b/src/foreign/gcc/libffi/src/x86/ffi64.c
@ -29,22 +29,20 @@
 #include <stdlib.h>
 #include <stdarg.h>

-/* ffi_prep_args is called by the assembly routine once stack space
-   has been allocated for the function's arguments */
-
 #ifdef __x86_64__

 #define MAX_GPR_REGS 6
 #define MAX_SSE_REGS 8
-typedef struct
+
+struct register_args
 {
  /* Registers for argument passing.  */
-  long gpr[MAX_GPR_REGS];
+  UINT64 gpr[MAX_GPR_REGS];
  __int128_t sse[MAX_SSE_REGS];
+};

-  /* Stack space for arguments.  */
-  char argspace[0];
-} stackLayout;
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(), unsigned ssecount);

 /* All reference to register classes here is identical to the code in
   gcc/config/i386/i386.c. Do *not* change one without the other.  */
@ -55,8 +53,7 @@ typedef struct
   use SF or DFmode move instead of DImode to avoid reformating penalties.

   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
-   whenever possible (upper half does contain padding).
- */
+   whenever possible (upper half does contain padding).  */
 enum x86_64_reg_class
  {
    X86_64_NO_CLASS,
@ -68,11 +65,14 @@ enum x86_64_reg_class
    X86_64_SSEUP_CLASS,
    X86_64_X87_CLASS,
    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
    X86_64_MEMORY_CLASS
  };

 #define MAX_CLASSES 4

+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
   of this code is to classify each 8bytes of incoming argument by the register
   class and assign registers accordingly.  */
@ -106,9 +106,14 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
    return X86_64_INTEGER_CLASS;

-  /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
-  if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
-      || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
    return X86_64_MEMORY_CLASS;

  /* Rule #6: Otherwise class SSE is used.  */
@ -125,11 +130,8 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
 */
 static int
 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
-		   int *byte_offset)
+		   size_t byte_offset)
 {
-  /* First, align to the right place.  */
-  *byte_offset = ALIGN(*byte_offset, type->alignment);
-
  switch (type->type)
    {
    case FFI_TYPE_UINT8:
@ -141,13 +143,13 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
    case FFI_TYPE_UINT64:
    case FFI_TYPE_SINT64:
    case FFI_TYPE_POINTER:
-      if (((*byte_offset) % 8 + type->size) <= 4)
+      if (byte_offset + type->size <= 4)
 	classes[0] = X86_64_INTEGERSI_CLASS;
      else
 	classes[0] = X86_64_INTEGER_CLASS;
      return 1;
    case FFI_TYPE_FLOAT:
-      if (((*byte_offset) % 8) == 0)
+      if (byte_offset == 0)
 	classes[0] = X86_64_SSESF_CLASS;
      else
 	classes[0] = X86_64_SSE_CLASS;
@ -175,22 +177,23 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 	  classes[i] = X86_64_NO_CLASS;

 	/* Merge the fields of structure.  */
-	for (ptr=type->elements; (*ptr)!=NULL; ptr++)
+	for (ptr = type->elements; *ptr != NULL; ptr++)
 	  {
 	    int num;

-	    num = classify_argument (*ptr, subclasses, byte_offset);
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
 	    if (num == 0)
 	      return 0;
 	    for (i = 0; i < num; i++)
 	      {
-		int pos = *byte_offset / 8;
+		int pos = byte_offset / 8;
 		classes[i + pos] =
 		  merge_classes (subclasses[i], classes[i + pos]);
 	      }

-	    if ((*ptr)->type != FFI_TYPE_STRUCT)
-	      *byte_offset += (*ptr)->size;
+	    byte_offset += (*ptr)->size;
 	  }

 	/* Final merger cleanup.  */
@ -222,155 +225,198 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 }

 /* Examine the argument and return set number of register required in each
-   class.  Return 0 iff parameter should be passed in memory.  */
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
 static int
-examine_argument (ffi_type *type, int in_return, int *int_nregs,int *sse_nregs)
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
 {
-  enum x86_64_reg_class class[MAX_CLASSES];
-  int offset = 0;
-  int n;
-
-  n = classify_argument (type, class, &offset);
+  int i, n, ngpr, nsse;

+  n = classify_argument (type, classes, 0);
  if (n == 0)
    return 0;

-  *int_nregs = 0;
-  *sse_nregs = 0;
-  for (n--; n>=0; n--)
-    switch (class[n])
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
      {
      case X86_64_INTEGER_CLASS:
      case X86_64_INTEGERSI_CLASS:
-	(*int_nregs)++;
+	ngpr++;
 	break;
      case X86_64_SSE_CLASS:
      case X86_64_SSESF_CLASS:
      case X86_64_SSEDF_CLASS:
-	(*sse_nregs)++;
+	nsse++;
 	break;
      case X86_64_NO_CLASS:
      case X86_64_SSEUP_CLASS:
 	break;
      case X86_64_X87_CLASS:
      case X86_64_X87UP_CLASS:
-	if (!in_return)
-	  return 0;
-	break;
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
      default:
 	abort ();
      }
-  return 1;
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
 }

-/* Functions to load floats and double to an SSE register placeholder.  */
-extern void float2sse (float, __int128_t *);
-extern void double2sse (double, __int128_t *);
-extern void floatfloat2sse (void *, __int128_t *);
+/* Perform machine dependent cif processing.  */

-/* Functions to put the floats and doubles back.  */
-extern float sse2float (__int128_t *);
-extern double sse2double (__int128_t *);
-extern void sse2floatfloat(__int128_t *, void *);
-
-/*@-exportheader@*/
-void
-ffi_prep_args (stackLayout *stack, extended_cif *ecif)
-/*@=exportheader@*/
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
 {
-  int gprcount, ssecount, i, g, s;
-  void **p_argv;
-  void *argp = &stack->argspace;
-  ffi_type **p_arg;
+  int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes;

-  /* First check if the return value should be passed in memory. If so,
-     pass the pointer as the first argument.  */
  gprcount = ssecount = 0;
-  if (ecif->cif->rtype->type != FFI_TYPE_VOID 
-      && examine_argument (ecif->cif->rtype, 1, &g, &s) == 0)
-    stack->gpr[gprcount++] = (long) ecif->rvalue;

-  for (i=ecif->cif->nargs, p_arg=ecif->cif->arg_types, p_argv = ecif->avalue;
-       i!=0; i--, p_arg++, p_argv++)
+  flags = cif->rtype->type;
+  if (flags != FFI_TYPE_VOID)
    {
-      int in_register = 0;
-
-      switch ((*p_arg)->type)
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
 	{
-	case FFI_TYPE_SINT8:
-	case FFI_TYPE_SINT16:
-	case FFI_TYPE_SINT32:
-	case FFI_TYPE_SINT64:
-	case FFI_TYPE_UINT8:
-	case FFI_TYPE_UINT16:
-	case FFI_TYPE_UINT32:
-	case FFI_TYPE_UINT64:
-	case FFI_TYPE_POINTER:
-	  if (gprcount < MAX_GPR_REGS)
-	    {
-	      stack->gpr[gprcount] = 0;
-	      stack->gpr[gprcount++] = *(long long *)(*p_argv);
-	      in_register = 1;
-	    }
-	  break;
-
-	case FFI_TYPE_FLOAT:
-	  if (ssecount < MAX_SSE_REGS)
-	    {
-	      float2sse (*(float *)(*p_argv), &stack->sse[ssecount++]);
-	      in_register = 1;
-	    }
-	  break;
-
-	case FFI_TYPE_DOUBLE:
-	  if (ssecount < MAX_SSE_REGS)
-	    {
-	      double2sse (*(double *)(*p_argv), &stack->sse[ssecount++]);
-	      in_register = 1;
-	    }
-	  break;
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = FFI_TYPE_VOID;
 	}
-
-      if (in_register)
-	continue;
-
-      /* Either all places in registers where filled, or this is a
-	 type that potentially goes into a memory slot.  */
-      if (examine_argument (*p_arg, 0, &g, &s) == 0
-	  || gprcount + g > MAX_GPR_REGS || ssecount + s > MAX_SSE_REGS)
+      else if (flags == FFI_TYPE_STRUCT)
 	{
-	  /* Pass this argument in memory.  */
-	  argp = (void *)ALIGN(argp, (*p_arg)->alignment);
-	  /* Stack arguments are *always* at least 8 byte aligned.  */
-	  argp = (void *)ALIGN(argp, 8);
-	  memcpy (argp, *p_argv, (*p_arg)->size);
-	  argp += (*p_arg)->size;
+	  /* Mark which registers the result appears in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	  if (sse0 && !sse1)
+	    flags |= 1 << 8;
+	  else if (!sse0 && sse1)
+	    flags |= 1 << 9;
+	  else if (sse0 && sse1)
+	    flags |= 1 << 10;
+	  /* Mark the true size of the structure.  */
+	  flags |= cif->rtype->size << 12;
+	}
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN(bytes, align);
+	  bytes += cif->arg_types[i]->size;
 	}
      else
 	{
-	  /* All easy cases are eliminated. Now fire the big guns.  */
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
+  cif->bytes = bytes;

-	  enum x86_64_reg_class classes[MAX_CLASSES];
-	  int offset = 0, j, num;
-	  void *a;
+  return FFI_OK;
+}

-	  num = classify_argument (*p_arg, classes, &offset);
-	  for (j=0, a=*p_argv; j<num; j++, a+=8)
+void
+ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn;
+  _Bool ret_in_memory;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Note the setting of flags to
+     VOID above in ffi_prep_cif_machdep.  */
+  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
+  if (rvalue == NULL && ret_in_memory)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (ret_in_memory)
+    reg_args->gpr[gprcount++] = (long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t size = arg_types[i]->size;
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
 	    {
 	      switch (classes[j])
 		{
 		case X86_64_INTEGER_CLASS:
 		case X86_64_INTEGERSI_CLASS:
-		  stack->gpr[gprcount++] = *(long long *)a;
+		  reg_args->gpr[gprcount] = 0;
+		  memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+		  gprcount++;
 		  break;
 		case X86_64_SSE_CLASS:
-		  floatfloat2sse (a, &stack->sse[ssecount++]);
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++] = *(UINT64 *) a;
 		  break;
 		case X86_64_SSESF_CLASS:
-		  float2sse (*(float *)a, &stack->sse[ssecount++]);
-		  break;
-		case X86_64_SSEDF_CLASS:
-		  double2sse (*(double *)a, &stack->sse[ssecount++]);
+		  reg_args->sse[ssecount++] = *(UINT32 *) a;
 		  break;
 		default:
 		  abort();
@ -378,203 +424,13 @@ ffi_prep_args (stackLayout *stack, extended_cif *ecif)
 	    }
 	}
    }
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   cif->flags, rvalue, fn, ssecount);
 }

-/* Perform machine dependent cif processing.  */
-ffi_status
-ffi_prep_cif_machdep (ffi_cif *cif)
-{
-  int gprcount, ssecount, i, g, s;

-  gprcount = ssecount = 0;
-
-  /* Reset the byte count. We handle this size estimation here.  */
-  cif->bytes = 0;
-
-  /* If the return value should be passed in memory, pass the pointer
-     as the first argument. The actual memory isn't allocated here.  */
-  if (cif->rtype->type != FFI_TYPE_VOID 
-      && examine_argument (cif->rtype, 1, &g, &s) == 0)
-    gprcount = 1;
-
-  /* Go over all arguments and determine the way they should be passed.
-     If it's in a register and there is space for it, let that be so. If
-     not, add it's size to the stack byte count.  */
-  for (i=0; i<cif->nargs; i++)
-    {
-      if (examine_argument (cif->arg_types[i], 0, &g, &s) == 0
-	  || gprcount + g > MAX_GPR_REGS || ssecount + s > MAX_SSE_REGS)
-	{
-	  /* This is passed in memory. First align to the basic type.  */
-	  cif->bytes = ALIGN(cif->bytes, cif->arg_types[i]->alignment);
-
-	  /* Stack arguments are *always* at least 8 byte aligned.  */
-	  cif->bytes = ALIGN(cif->bytes, 8);
-
-	  /* Now add the size of this argument.  */
-	  cif->bytes += cif->arg_types[i]->size;
-	}
-      else
-	{
-	  gprcount += g;
-	  ssecount += s;
-	}
-    }
-
-  /* Set the flag for the closures return.  */
-    switch (cif->rtype->type)
-    {
-    case FFI_TYPE_VOID:
-    case FFI_TYPE_STRUCT:
-    case FFI_TYPE_SINT64:
-    case FFI_TYPE_FLOAT:
-    case FFI_TYPE_DOUBLE:
-    case FFI_TYPE_LONGDOUBLE:
-      cif->flags = (unsigned) cif->rtype->type;
-      break;
-
-    case FFI_TYPE_UINT64:
-      cif->flags = FFI_TYPE_SINT64;
-      break;
-
-    default:
-      cif->flags = FFI_TYPE_INT;
-      break;
-    }
-
-  return FFI_OK;
-}
-
-typedef struct
-{
-  long gpr[2];
-  __int128_t sse[2];
-  long double st0;
-} return_value;
-
-void
-ffi_fill_return_value (return_value *rv, extended_cif *ecif)
-{
-  enum x86_64_reg_class classes[MAX_CLASSES];
-  int i = 0, num;
-  long *gpr = rv->gpr;
-  __int128_t *sse = rv->sse;
-  signed char sc;
-  signed short ss;
-
-  /* This is needed because of the way x86-64 handles signed short
-     integers.  */
-  switch (ecif->cif->rtype->type)
-    {
-    case FFI_TYPE_SINT8:
-      sc = *(signed char *)gpr;
-      *(long long *)ecif->rvalue = (long long)sc;
-      return;
-    case FFI_TYPE_SINT16:
-      ss = *(signed short *)gpr;
-      *(long long *)ecif->rvalue = (long long)ss;
-      return;
-    default:
-      /* Just continue.  */
-      ;
-    }
-
-  num = classify_argument (ecif->cif->rtype, classes, &i);
-
-  if (num == 0)
-    /* Return in memory.  */
-    ecif->rvalue = (void *) rv->gpr[0];
-  else if (num == 2 && classes[0] == X86_64_X87_CLASS &&
-	classes[1] == X86_64_X87UP_CLASS)
-    /* This is a long double (this is easiest to handle this way instead
-       of an eightbyte at a time as in the loop below.  */
-    *((long double *)ecif->rvalue) = rv->st0;
-  else
-    {
-      void *a;
-
-      for (i=0, a=ecif->rvalue; i<num; i++, a+=8)
-	{
-	  switch (classes[i])
-	    {
-	    case X86_64_INTEGER_CLASS:
-	    case X86_64_INTEGERSI_CLASS:
-	      *(long long *)a = *gpr;
-	      gpr++;
-	      break;
-	    case X86_64_SSE_CLASS:
-	      sse2floatfloat (sse++, a);
-	      break;
-	    case X86_64_SSESF_CLASS:
-	      *(float *)a = sse2float (sse++);
-	      break;
-	    case X86_64_SSEDF_CLASS:
-	      *(double *)a = sse2double (sse++);
-	      break;
-	    default:
-	      abort();
-	    }
-	}
-    }
-}
-
-/*@-declundef@*/
-/*@-exportheader@*/
-extern void ffi_call_UNIX64(void (*)(stackLayout *, extended_cif *),
-			    void (*) (return_value *, extended_cif *),
-			    /*@out@*/ extended_cif *, 
-			    unsigned, /*@out@*/ unsigned *, void (*fn)());
-/*@=declundef@*/
-/*@=exportheader@*/
-
-void ffi_call(/*@dependent@*/ ffi_cif *cif, 
-	      void (*fn)(), 
-	      /*@out@*/ void *rvalue, 
-	      /*@dependent@*/ void **avalue)
-{
-  extended_cif ecif;
-  int dummy;
-
-  ecif.cif = cif;
-  ecif.avalue = avalue;
-  
-  /* If the return value is a struct and we don't have a return	*/
-  /* value address then we need to make one		        */
-
-  if ((rvalue == NULL) && 
-      (examine_argument (cif->rtype, 1, &dummy, &dummy) == 0))
-    {
-      /*@-sysunrecog@*/
-      ecif.rvalue = alloca(cif->rtype->size);
-      /*@=sysunrecog@*/
-    }
-  else
-    ecif.rvalue = rvalue;
-    
-  /* Stack must always be 16byte aligned. Make it so.  */
-  cif->bytes = ALIGN(cif->bytes, 16);
-  
-  switch (cif->abi) 
-    {
-    case FFI_SYSV:
-      /* Calling 32bit code from 64bit is not possible  */
-      FFI_ASSERT(0);
-      break;
-
-    case FFI_UNIX64:
-      /*@-usedef@*/
-      ffi_call_UNIX64 (ffi_prep_args, ffi_fill_return_value, &ecif,
-		       cif->bytes, ecif.rvalue, fn);
-      /*@=usedef@*/
-      break;
-
-    default:
-      FFI_ASSERT(0);
-      break;
-    }
-}
-
-extern void ffi_closure_UNIX64(void);
+extern void ffi_closure_unix64(void);

 ffi_status
 ffi_prep_closure (ffi_closure* closure,
@ -584,16 +440,19 @@ ffi_prep_closure (ffi_closure* closure,
 {
  volatile unsigned short *tramp;

-  /* FFI_ASSERT (cif->abi == FFI_OSF);  */
-
  tramp = (volatile unsigned short *) &closure->tramp[0];
+
  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  *(void * volatile *) &tramp[1] = ffi_closure_unix64;
  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
-  tramp[10] = 0xff49;		/* jmp *%r11	*/
-  tramp[11] = 0x00e3;
-  *(void * volatile *) &tramp[1] = ffi_closure_UNIX64;
  *(void * volatile *) &tramp[6] = closure;

+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
  closure->cif = cif;
  closure->fun = fun;
  closure->user_data = user_data;
@ -602,107 +461,109 @@ ffi_prep_closure (ffi_closure* closure,
 }

 int
-ffi_closure_UNIX64_inner(ffi_closure *closure, va_list l, void *rp)
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+			 struct register_args *reg_args, char *argp)
 {
  ffi_cif *cif;
  void **avalue;
  ffi_type **arg_types;
-  long i, avn, argn;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int ret;

  cif = closure->cif;
  avalue = alloca(cif->nargs * sizeof(void *));
+  gprcount = ssecount = 0;

-  argn = 0;
+  ret = cif->rtype->type;
+  if (ret != FFI_TYPE_VOID)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value goes in memory.  Arrange for the closure
+	     return value to go directly back to the original caller.  */
+	  rvalue = (void *) reg_args->gpr[gprcount++];
+	  /* We don't have to do anything in asm for the return.  */
+	  ret = FFI_TYPE_VOID;
+	}
+      else if (ret == FFI_TYPE_STRUCT && n == 2)
+	{
+	  /* Mark which register the second word of the structure goes in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = SSE_CLASS_P (classes[1]);
+	  if (!sse0 && sse1)
+	    ret |= 1 << 8;
+	  else if (sse0 && !sse1)
+	    ret |= 1 << 9;
+	}
+    }

-  i = 0;
  avn = cif->nargs;
  arg_types = cif->arg_types;
  
-  /* Grab the addresses of the arguments from the stack frame.  */
-  while (i < avn)
+  for (i = 0; i < avn; ++i)
    {
-      switch (arg_types[i]->type)
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
 	{
-	case FFI_TYPE_SINT8:
-	case FFI_TYPE_UINT8:
-	case FFI_TYPE_SINT16:
-	case FFI_TYPE_UINT16:
-	case FFI_TYPE_SINT32:
-	case FFI_TYPE_UINT32:
-	case FFI_TYPE_SINT64:
-	case FFI_TYPE_UINT64:
-	case FFI_TYPE_POINTER:
-	  {
-	    if (l->gp_offset > 48-8)
-	      {
-		avalue[i] = l->overflow_arg_area;
-		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
-	      }
-	    else
-	      {
-		avalue[i] = (char *)l->reg_save_area + l->gp_offset;
-		l->gp_offset += 8;
-	      }
-	  }
-	  break;
+	  long align = arg_types[i]->alignment;

-	case FFI_TYPE_STRUCT:
-	  /* FIXME  */
-	  FFI_ASSERT(0);
-	  break;
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;

-	case FFI_TYPE_DOUBLE:
-	  {
-	    if (l->fp_offset > 176-16)
-	      {
-		avalue[i] = l->overflow_arg_area;
-		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
-	      }
-	    else
-	      {
-		avalue[i] = (char *)l->reg_save_area + l->fp_offset;
-		l->fp_offset += 16;
-	      }
-	  }
-#if DEBUG_FFI
-	  fprintf (stderr, "double arg %d = %g\n", i, *(double *)avalue[i]);
-#endif
-	  break;
-	  
-	case FFI_TYPE_FLOAT:
-	  {
-	    if (l->fp_offset > 176-16)
-	      {
-		avalue[i] = l->overflow_arg_area;
-		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
-	      }
-	    else
-	      {
-		avalue[i] = (char *)l->reg_save_area + l->fp_offset;
-		l->fp_offset += 16;
-	      }
-	  }
-#if DEBUG_FFI
-	  fprintf (stderr, "float arg %d = %g\n", i, *(float *)avalue[i]);
-#endif
-	  break;
-	  
-	default:
-	  FFI_ASSERT(0);
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
 	}
+      /* If the argument is in a single register, or two consecutive
+	 registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2
+		   && SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;

-      argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
-      i++;
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
    }

  /* Invoke the closure.  */
-  (closure->fun) (cif, rp, avalue, closure->user_data);
+  closure->fun (cif, rvalue, avalue, closure->user_data);

-  /* FIXME: Structs not supported.  */
-  FFI_ASSERT(cif->rtype->type != FFI_TYPE_STRUCT);
-
-  /* Tell ffi_closure_UNIX64 how to perform return type promotions.  */
-
-  return cif->rtype->type;
+  /* Tell assembly how to perform return type promotions.  */
+  return ret;
 }
-#endif /* ifndef __x86_64__ */
+
+#endif /* __x86_64__ */
--- a/src/foreign/gcc/libffi/src/x86/unix64.S
+++ b/src/foreign/gcc/libffi/src/x86/unix64.S
@ -28,276 +28,385 @@
 #include <fficonfig.h>
 #include <ffi.h>

-	.section	.rodata
-.LC0:
-	.string	"asm in progress %lld\n"
-.LC1:
-	.string	"asm in progress\n"
 .text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+	            void *raddr, void (*fnaddr)());
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
 	.align	2
-.globl ffi_call_UNIX64
-        .type	ffi_call_UNIX64,@function
+	.globl	ffi_call_unix64
+	.type	ffi_call_unix64,@function

-ffi_call_UNIX64:
-.LFB1:
-        pushq	%rbp
-.LCFI0:
-        movq	%rsp, %rbp
-.LCFI1:
-	/* Save all arguments */
-	subq	$48, %rsp
-.LCFI2:
-	movq	%rdi, -8(%rbp)		/* ffi_prep_args	 */
-	movq	%rsi, -16(%rbp)		/* ffi_fill_return_value */
-	movq	%rdx, -24(%rbp)		/* ecif			 */
-	movq	%rcx, -32(%rbp)		/* cif->bytes		 */
-	movq	%r8, -40(%rbp)		/* ecif.rvalue		 */
-	movq	%r9, -48(%rbp)		/* fn			 */
+ffi_call_unix64:
+.LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+.LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */

-	/* Make room for all of the new args and the register args */
-	addl	$176, %ecx
-.LCFI3:
-	subq	%rcx, %rsp
-.LCFI4:
-	/* Setup the call to ffi_prep_args.  */
-	movq	%rdi, %rax		/* &ffi_prep_args	*/
-	movq	%rsp, %rdi		/* stackLayout		*/
-	movq	%rdx, %rsi		/* ecif			*/
-	call	*%rax			/* ffi_prep_args(stackLayout, ecif);*/ 
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	.Lload_sse
+.Lret_from_load_sse:

-	/* ffi_prep_args have put all the register contents into the  */
-	/* stackLayout struct. Now put the register values in place.  */
-	movq	(%rsp), %rdi
-	movq	8(%rsp), %rsi
-	movq	16(%rsp), %rdx
-	movq	24(%rsp), %rcx
-	movq	32(%rsp), %r8
-	movq	40(%rsp), %r9
-	movaps	48(%rsp), %xmm0
-	movaps	64(%rsp), %xmm1
-	movaps	80(%rsp), %xmm2
-	movaps	96(%rsp), %xmm3
-	movaps	112(%rsp), %xmm4
-	movaps	128(%rsp), %xmm5
-	movaps	144(%rsp), %xmm6
-	movaps	160(%rsp), %xmm7
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp

-	/* Remove space for stackLayout so stack arguments are placed
-	   correctly for the call.  */
-.LCFI5:
-	addq	$176, %rsp
-.LCFI6:
 	/* Call the user function.  */
-	call	*-48(%rbp)
+	call	*%r11

-	/* Make stack space for the return_value struct.  */
-	subq	$64, %rsp
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp

-	/* Fill in all potential return values to this struct.  */
-	movq	%rax, (%rsp)
-	movq	%rdx, 8(%rsp)
-	movaps	%xmm0, 16(%rsp)
-	movaps	%xmm1, 32(%rsp)
-	fstpt	48(%rsp)
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+.LUW2:

-	/* Now call ffi_fill_return_value.  */
-	movq	%rsp, %rdi		/* struct return_value	  */
-	movq	-24(%rbp), %rsi		/* ecif			  */
-	movq	-16(%rbp), %rax		/* &ffi_fill_return_value */
-	call	*%rax			/* call it		  */
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	.Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10

-	/* And the work is done.  */
-        leave
-        ret
-.LFE1:
-.ffi_call_UNIX64_end:
-        .size    ffi_call_UNIX64,.ffi_call_UNIX64_end-ffi_call_UNIX64
+	.section .rodata
+.Lstore_table:
+	.long	.Lst_void-.Lstore_table		/* FFI_TYPE_VOID */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_INT */
+	.long	.Lst_float-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	.Lst_double-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lst_ldouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lst_uint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	.Lst_sint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	.Lst_uint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	.Lst_sint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	.Lst_uint32-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	.Lst_struct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_POINTER */

-.text
-	.align	2
-.globl float2sse
-        .type	float2sse,@function
-float2sse:
-	/* Save the contents of this sse-float in a pointer.  */
-	movaps	%xmm0, (%rdi)
+	.text
+	.align 2
+.Lst_void:
+	ret
+	.align 2
+
+.Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_int64:
+	movq	%rax, (%rdi)
 	ret

-	.align	2
-.globl floatfloat2sse
-        .type	floatfloat2sse,@function
-floatfloat2sse:
-	/* Save the contents of these two sse-floats in a pointer.  */
-	movq	(%rdi), %xmm0
-	movaps	%xmm0, (%rsi)
+	.align 2
+.Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align 2
+.Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+.Lst_ldouble:
+	fstpt	(%rdi)
 	ret

-	.align	2
-.globl double2sse
-        .type	double2sse,@function
-double2sse:
-	/* Save the contents of this sse-double in a pointer.  */
-	movaps	%xmm0, (%rdi)
+	.align 2
+.Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
 	ret

-	.align	2
-.globl sse2float
-        .type	sse2float,@function
-sse2float:
-	/* Save the contents of this sse-float in a pointer.  */
-	movaps	(%rdi), %xmm0
-	ret
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align 2
+.LUW3:
+.Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	.Lret_from_load_sse
+
+.LUW4:
+	.size    ffi_call_unix64,.-ffi_call_unix64

 	.align	2
-.globl sse2double
-        .type	sse2double,@function
-sse2double:
-	/* Save the contents of this pointer in a sse-double.  */
-	movaps	(%rdi), %xmm0
-	ret
+	.globl ffi_closure_unix64
+	.type	ffi_closure_unix64,@function

-	.align	2
-.globl sse2floatfloat
-        .type	sse2floatfloat,@function
-sse2floatfloat:
-	/* Save the contents of this pointer in two sse-floats.  */
-	movaps	(%rdi), %xmm0
-	movq	%xmm0, (%rsi)
-	ret
+ffi_closure_unix64:
+.LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+.LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      .Lsave_sse
+.Lret_from_save_sse:

-	.align	2
-.globl ffi_closure_UNIX64
-        .type	ffi_closure_UNIX64,@function
-
-ffi_closure_UNIX64:
-.LFB2:
-        pushq   %rbp
-.LCFI10:
-        movq    %rsp, %rbp
-.LCFI11:
-        subq    $240, %rsp
-.LCFI12:
-	movq	%rdi, -176(%rbp)
-        movq    %rsi, -168(%rbp)
-        movq    %rdx, -160(%rbp)
-        movq    %rcx, -152(%rbp)
-        movq    %r8, -144(%rbp)
-        movq    %r9, -136(%rbp)
-        /* FIXME: We can avoid all this stashing of XMM registers by
-	   (in ffi_prep_closure) computing the number of
-	   floating-point args and moving it into %rax before calling
-	   this function.  Once this is done, uncomment the next few
-	   lines and only the essential XMM registers will be written
-	   to memory.  This is a significant saving.  */
-/*         movzbl  %al, %eax  */
-/*         movq    %rax, %rdx */
-/*         leaq    0(,%rdx,4), %rax */
-/*         leaq    2f(%rip), %rdx */
-/*         subq    %rax, %rdx */
-        leaq    -1(%rbp), %rax
-/*         jmp     *%rdx */
-        movaps  %xmm7, -15(%rax)
-        movaps  %xmm6, -31(%rax)
-        movaps  %xmm5, -47(%rax)
-        movaps  %xmm4, -63(%rax)
-        movaps  %xmm3, -79(%rax)
-        movaps  %xmm2, -95(%rax)
-        movaps  %xmm1, -111(%rax)
-        movaps  %xmm0, -127(%rax)
-2:
-        movl    %edi, -180(%rbp)
-        movl    $0, -224(%rbp)
-        movl    $48, -220(%rbp)
-        leaq    16(%rbp), %rax
-        movq    %rax, -216(%rbp)
-        leaq    -176(%rbp), %rdx
-        movq    %rdx, -208(%rbp)
-        leaq    -224(%rbp), %rsi
 	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
 	movq	%rsp, %rdx
-        call    ffi_closure_UNIX64_inner@PLT
+	leaq	208(%rsp), %rcx
+	call	ffi_closure_unix64_inner@PLT

-	cmpl	$FFI_TYPE_FLOAT, %eax
-	je	1f
-	cmpl	$FFI_TYPE_DOUBLE, %eax
-	je	2f
-	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
-	je	3f
-	cmpl	$FFI_TYPE_STRUCT, %eax
-	je	4f
-	popq	%rax
-        leave
-        ret
-1:
-2:
-3:	
-	movaps	-240(%rbp), %xmm0
-        leave
-        ret
-4:
-	leave
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+.LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	.Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+	.section .rodata
+.Lload_table:
+	.long	.Lld_void-.Lload_table		/* FFI_TYPE_VOID */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_INT */
+	.long	.Lld_float-.Lload_table		/* FFI_TYPE_FLOAT */
+	.long	.Lld_double-.Lload_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lld_ldouble-.Lload_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_UINT8 */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_SINT8 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_UINT16 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_SINT16 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_UINT32 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_SINT32 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_UINT64 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_SINT64 */
+	.long	.Lld_struct-.Lload_table	/* FFI_TYPE_STRUCT */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align 2
+.Lld_void:
 	ret
-.LFE2:	
-		
-        .section        .eh_frame,EH_FRAME_FLAGS,@progbits
-.Lframe0:
-        .long   .LECIE1-.LSCIE1
+
+	.align 2
+.Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+
+	.align 2
+.Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+
+	.align 2
+.Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above .Lload_sse; the same logic applies here.  */
+	.align 2
+.LUW8:
+.Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	.Lret_from_save_sse
+
+.LUW9:
+	.size	ffi_closure_unix64,.-ffi_closure_unix64
+
+	.section	.eh_frame,"a",@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1		/* CIE Length */
 .LSCIE1:
-        .long   0x0
-        .byte   0x1
-        .string "zR"
-        .uleb128 0x1
-        .sleb128 -8
-        .byte   0x10
-        .uleb128 0x1
-        .byte   0x1b
-        .byte   0xc
-        .uleb128 0x7
-        .uleb128 0x8
-        .byte   0x90
-        .uleb128 0x1
-        .align 8
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii "zR\0"			/* CIE Augmentation */
+	.uleb128 1			/* CIE Code Alignment Factor */
+	.sleb128 -8			/* CIE Data Alignment Factor */
+	.byte	0x10			/* CIE RA Column */
+	.uleb128 1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0x80+16			/* DW_CFA_offset, %rip offset 1*-8 */
+	.uleb128 1
+	.align 8
 .LECIE1:
 .LSFDE1:
-	.long	.LEFDE1-.LASFDE1
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
 .LASFDE1:
-        .long   .LASFDE1-.Lframe0
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+	.long	.LUW0-.			/* FDE initial location */
+	.long	.LUW4-.LUW0		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW1-.LUW0
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.uleb128 6
+	.uleb128 32
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.uleb128 2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW2-.LUW1
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW3-.LUW2
+	.byte	0xb			/* DW_CFA_restore_state */

-        .long   .LFB1-.
-        .long   .LFE1-.LFB1
-        .uleb128 0x0
-        .byte   0x4		# DW_CFA_advance_loc4
-        .long   .LCFI0-.LFB1
-        .byte   0xe		# DW_CFA_def_cfa_offset
-        .uleb128 0x10
-        .byte   0x86		# DW_CFA_offset: r6 at cfa-16
-        .uleb128 0x2
-        .byte   0x4		# DW_CFA_advance_loc4
-        .long   .LCFI1-.LCFI0
-        .byte   0x86		# DW_CFA_offset: r6 at cfa-16
-        .uleb128 0x2
-        .byte   0xd		# DW_CFA_def_cfa_reg: r6
-        .uleb128 0x6
 	.align 8
 .LEFDE1:
 .LSFDE3:
-        .long   .LEFDE3-.LASFDE3        # FDE Length
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
 .LASFDE3:
-        .long   .LASFDE3-.Lframe0       # FDE CIE offset
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+	.long	.LUW5-.			/* FDE initial location */
+	.long	.LUW9-.LUW5		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */

-        .long   .LFB2-. # FDE initial location
-        .long   .LFE2-.LFB2     # FDE address range
-        .uleb128 0x0    # Augmentation size
-        .byte   0x4     # DW_CFA_advance_loc4
-        .long   .LCFI10-.LFB2
-        .byte   0xe     # DW_CFA_def_cfa_offset
-        .uleb128 0x10
-        .byte   0x86    # DW_CFA_offset, column 0x6
-        .uleb128 0x2
-        .byte   0x4     # DW_CFA_advance_loc4
-        .long   .LCFI11-.LCFI10
-        .byte   0xd     # DW_CFA_def_cfa_register
-        .uleb128 0x6
-        .align 8
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW6-.LUW5
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 208
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW7-.LUW6
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW8-.LUW7
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
 .LEFDE3:

-#endif /* __x86_64__  */
+#endif /* __x86_64__ */
--- a/src/foreign/libffi_msvc/prep_cif.c
+++ b/src/foreign/libffi_msvc/prep_cif.c
@ -116,6 +116,9 @@ ffi_status ffi_prep_cif(/*@out@*/ /*@partial@*/ ffi_cif *cif,
 #if !defined M68K && !defined __x86_64__ && !defined S390
  /* Make space for the return structure pointer */
  if (cif->rtype->type == FFI_TYPE_STRUCT
+      /* MSVC returns small structures in registers.  But we have a different
+      workaround: pretend int32 or int64 return type, and converting to
+      structure afterwards. */
 #ifdef SPARC
      && (cif->abi != FFI_V9 || cif->rtype->size > 32)
 #endif