1  /* -----------------------------------------------------------------------
       2     ffi_linux64.c - Copyright (C) 2013 IBM
       3                     Copyright (C) 2011 Anthony Green
       4                     Copyright (C) 2011 Kyle Moffett
       5                     Copyright (C) 2008 Red Hat, Inc
       6                     Copyright (C) 2007, 2008 Free Software Foundation, Inc
       7                     Copyright (c) 1998 Geoffrey Keating
       8  
       9     PowerPC Foreign Function Interface
      10  
      11     Permission is hereby granted, free of charge, to any person obtaining
      12     a copy of this software and associated documentation files (the
      13     ``Software''), to deal in the Software without restriction, including
      14     without limitation the rights to use, copy, modify, merge, publish,
      15     distribute, sublicense, and/or sell copies of the Software, and to
      16     permit persons to whom the Software is furnished to do so, subject to
      17     the following conditions:
      18  
      19     The above copyright notice and this permission notice shall be included
      20     in all copies or substantial portions of the Software.
      21  
      22     THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
      23     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      24     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      25     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
      26     OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      27     ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      28     OTHER DEALINGS IN THE SOFTWARE.
      29     ----------------------------------------------------------------------- */
      30  
      31  #include "ffi.h"
      32  
      33  #ifdef POWERPC64
      34  #include "ffi_common.h"
      35  #include "ffi_powerpc.h"
      36  
      37  
      38  /* About the LINUX64 ABI.  */
      39  enum {
      40    NUM_GPR_ARG_REGISTERS64 = 8,
      41    NUM_FPR_ARG_REGISTERS64 = 13,
      42    NUM_VEC_ARG_REGISTERS64 = 12,
      43  };
      44  enum { ASM_NEEDS_REGISTERS64 = 4 };
      45  
      46  
      47  #if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
      48  /* Adjust size of ffi_type_longdouble.  */
      49  void FFI_HIDDEN
      50  ffi_prep_types_linux64 (ffi_abi abi)
      51  {
      52    if ((abi & (FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128)) == FFI_LINUX)
      53      {
      54        ffi_type_longdouble.size = 8;
      55        ffi_type_longdouble.alignment = 8;
      56      }
      57    else
      58      {
      59        ffi_type_longdouble.size = 16;
      60        ffi_type_longdouble.alignment = 16;
      61      }
      62  }
      63  #endif
      64  
      65  
      66  static unsigned int
      67  discover_homogeneous_aggregate (ffi_abi abi,
      68                                  const ffi_type *t,
      69                                  unsigned int *elnum)
      70  {
      71    switch (t->type)
      72      {
      73  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
      74      case FFI_TYPE_LONGDOUBLE:
      75        /* 64-bit long doubles are equivalent to doubles. */
      76        if ((abi & FFI_LINUX_LONG_DOUBLE_128) == 0)
      77          {
      78            *elnum = 1;
      79            return FFI_TYPE_DOUBLE;
      80          }
      81        /* IBM extended precision values use unaligned pairs
      82           of FPRs, but according to the ABI must be considered
      83           distinct from doubles. They are also limited to a
      84           maximum of four members in a homogeneous aggregate. */
      85        else if ((abi & FFI_LINUX_LONG_DOUBLE_IEEE128) == 0)
      86          {
      87            *elnum = 2;
      88            return FFI_TYPE_LONGDOUBLE;
      89          }
      90        /* Fall through. */
      91  #endif
      92      case FFI_TYPE_FLOAT:
      93      case FFI_TYPE_DOUBLE:
      94        *elnum = 1;
      95        return (int) t->type;
      96  
      97      case FFI_TYPE_STRUCT:;
      98        {
      99  	unsigned int base_elt = 0, total_elnum = 0;
     100  	ffi_type **el = t->elements;
     101  	while (*el)
     102  	  {
     103  	    unsigned int el_elt, el_elnum = 0;
     104  	    el_elt = discover_homogeneous_aggregate (abi, *el, &el_elnum);
     105  	    if (el_elt == 0
     106  		|| (base_elt && base_elt != el_elt))
     107  	      return 0;
     108  	    base_elt = el_elt;
     109  	    total_elnum += el_elnum;
     110  #if _CALL_ELF == 2
     111  	    if (total_elnum > 8)
     112  	      return 0;
     113  #else
     114  	    if (total_elnum > 1)
     115  	      return 0;
     116  #endif
     117  	    el++;
     118  	  }
     119  	*elnum = total_elnum;
     120  	return base_elt;
     121        }
     122  
     123      default:
     124        return 0;
     125      }
     126  }
     127  
     128  
     129  /* Perform machine dependent cif processing */
     130  static ffi_status
     131  ffi_prep_cif_linux64_core (ffi_cif *cif)
     132  {
     133    ffi_type **ptr;
     134    unsigned bytes;
     135    unsigned i, fparg_count = 0, intarg_count = 0, vecarg_count = 0;
     136    unsigned flags = cif->flags;
     137    unsigned elt, elnum, rtype;
     138  
     139  #if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
     140    /* If compiled without long double support... */
     141    if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0 ||
     142        (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     143      return FFI_BAD_ABI;
     144  #elif !defined(__VEC__)
     145    /* If compiled without vector register support (used by assembly)... */
     146    if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     147      return FFI_BAD_ABI;
     148  #else
     149    /* If the IEEE128 flag is set, but long double is only 64 bits wide... */
     150    if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) == 0 &&
     151        (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     152      return FFI_BAD_ABI;
     153  #endif
     154  
     155    /* The machine-independent calculation of cif->bytes doesn't work
     156       for us.  Redo the calculation.  */
     157  #if _CALL_ELF == 2
     158    /* Space for backchain, CR, LR, TOC and the asm's temp regs.  */
     159    bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
     160  
     161    /* Space for the general registers.  */
     162    bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
     163  #else
     164    /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
     165       regs.  */
     166    bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
     167  
     168    /* Space for the mandatory parm save area and general registers.  */
     169    bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
     170  #endif
     171  
     172    /* Return value handling.  */
     173    rtype = cif->rtype->type;
     174  #if _CALL_ELF == 2
     175  homogeneous:
     176  #endif
     177    switch (rtype)
     178      {
     179  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     180      case FFI_TYPE_LONGDOUBLE:
     181        if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     182          {
     183            flags |= FLAG_RETURNS_VEC;
     184            break;
     185          }
     186        if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
     187  	flags |= FLAG_RETURNS_128BITS;
     188        /* Fall through.  */
     189  #endif
     190      case FFI_TYPE_DOUBLE:
     191        flags |= FLAG_RETURNS_64BITS;
     192        /* Fall through.  */
     193      case FFI_TYPE_FLOAT:
     194        flags |= FLAG_RETURNS_FP;
     195        break;
     196  
     197      case FFI_TYPE_UINT128:
     198        flags |= FLAG_RETURNS_128BITS;
     199        /* Fall through.  */
     200      case FFI_TYPE_UINT64:
     201      case FFI_TYPE_SINT64:
     202      case FFI_TYPE_POINTER:
     203        flags |= FLAG_RETURNS_64BITS;
     204        break;
     205  
     206      case FFI_TYPE_STRUCT:
     207  #if _CALL_ELF == 2
     208        elt = discover_homogeneous_aggregate (cif->abi, cif->rtype, &elnum);
     209        if (elt)
     210          {
     211            flags |= FLAG_RETURNS_SMST;
     212            rtype = elt;
     213            goto homogeneous;
     214          }
     215        if (cif->rtype->size <= 16)
     216          {
     217            flags |= FLAG_RETURNS_SMST;
     218            break;
     219          }
     220  #endif
     221        intarg_count++;
     222        flags |= FLAG_RETVAL_REFERENCE;
     223        /* Fall through.  */
     224      case FFI_TYPE_VOID:
     225        flags |= FLAG_RETURNS_NOTHING;
     226        break;
     227  
     228      default:
     229        /* Returns 32-bit integer, or similar.  Nothing to do here.  */
     230        break;
     231      }
     232  
     233    for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
     234      {
     235        unsigned int align;
     236  
     237        switch ((*ptr)->type)
     238  	{
     239  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     240  	case FFI_TYPE_LONGDOUBLE:
     241            if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     242              {
     243                vecarg_count++;
     244                /* Align to 16 bytes, plus the 16-byte argument. */
     245                intarg_count = (intarg_count + 3) & ~0x1;
     246                if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
     247                  flags |= FLAG_ARG_NEEDS_PSAVE;
     248                break;
     249              }
     250  	  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
     251  	    {
     252  	      fparg_count++;
     253  	      intarg_count++;
     254  	    }
     255  	  /* Fall through.  */
     256  #endif
     257  	case FFI_TYPE_DOUBLE:
     258  	case FFI_TYPE_FLOAT:
     259  	  fparg_count++;
     260  	  intarg_count++;
     261  	  if (fparg_count > NUM_FPR_ARG_REGISTERS64)
     262  	    flags |= FLAG_ARG_NEEDS_PSAVE;
     263  	  break;
     264  
     265  	case FFI_TYPE_STRUCT:
     266  	  if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
     267  	    {
     268  	      align = (*ptr)->alignment;
     269  	      if (align > 16)
     270  		align = 16;
     271  	      align = align / 8;
     272  	      if (align > 1)
     273  		intarg_count = FFI_ALIGN (intarg_count, align);
     274  	    }
     275  	  intarg_count += ((*ptr)->size + 7) / 8;
     276  	  elt = discover_homogeneous_aggregate (cif->abi, *ptr, &elnum);
     277  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     278            if (elt == FFI_TYPE_LONGDOUBLE &&
     279                (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     280              {
     281                vecarg_count += elnum;
     282                if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
     283                  flags |= FLAG_ARG_NEEDS_PSAVE;
     284                break;
     285              }
     286  	  else
     287  #endif
     288  	  if (elt)
     289  	    {
     290  	      fparg_count += elnum;
     291  	      if (fparg_count > NUM_FPR_ARG_REGISTERS64)
     292  		flags |= FLAG_ARG_NEEDS_PSAVE;
     293  	    }
     294  	  else
     295  	    {
     296  	      if (intarg_count > NUM_GPR_ARG_REGISTERS64)
     297  		flags |= FLAG_ARG_NEEDS_PSAVE;
     298  	    }
     299  	  break;
     300  
     301  	case FFI_TYPE_POINTER:
     302  	case FFI_TYPE_UINT64:
     303  	case FFI_TYPE_SINT64:
     304  	case FFI_TYPE_INT:
     305  	case FFI_TYPE_UINT32:
     306  	case FFI_TYPE_SINT32:
     307  	case FFI_TYPE_UINT16:
     308  	case FFI_TYPE_SINT16:
     309  	case FFI_TYPE_UINT8:
     310  	case FFI_TYPE_SINT8:
     311  	  /* Everything else is passed as a 8-byte word in a GPR, either
     312  	     the object itself or a pointer to it.  */
     313  	  intarg_count++;
     314  	  if (intarg_count > NUM_GPR_ARG_REGISTERS64)
     315  	    flags |= FLAG_ARG_NEEDS_PSAVE;
     316  	  break;
     317  	default:
     318  	  FFI_ASSERT (0);
     319  	}
     320      }
     321  
     322    if (fparg_count != 0)
     323      flags |= FLAG_FP_ARGUMENTS;
     324    if (intarg_count > 4)
     325      flags |= FLAG_4_GPR_ARGUMENTS;
     326    if (vecarg_count != 0)
     327      flags |= FLAG_VEC_ARGUMENTS;
     328  
     329    /* Space for the FPR registers, if needed.  */
     330    if (fparg_count != 0)
     331      bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
     332    /* Space for the vector registers, if needed, aligned to 16 bytes. */
     333    if (vecarg_count != 0) {
     334      bytes = (bytes + 15) & ~0xF;
     335      bytes += NUM_VEC_ARG_REGISTERS64 * sizeof (float128);
     336    }
     337  
     338    /* Stack space.  */
     339  #if _CALL_ELF == 2
     340    if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
     341      bytes += intarg_count * sizeof (long);
     342  #else
     343    if (intarg_count > NUM_GPR_ARG_REGISTERS64)
     344      bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
     345  #endif
     346  
     347    /* The stack space allocated needs to be a multiple of 16 bytes.  */
     348    bytes = (bytes + 15) & ~0xF;
     349  
     350    cif->flags = flags;
     351    cif->bytes = bytes;
     352  
     353    return FFI_OK;
     354  }
     355  
     356  ffi_status FFI_HIDDEN
     357  ffi_prep_cif_linux64 (ffi_cif *cif)
     358  {
     359    if ((cif->abi & FFI_LINUX) != 0)
     360      cif->nfixedargs = cif->nargs;
     361  #if _CALL_ELF != 2
     362    else if (cif->abi == FFI_COMPAT_LINUX64)
     363      {
     364        /* This call is from old code.  Don't touch cif->nfixedargs
     365  	 since old code will be using a smaller cif.  */
     366        cif->flags |= FLAG_COMPAT;
     367        /* Translate to new abi value.  */
     368        cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
     369      }
     370  #endif
     371    else
     372      return FFI_BAD_ABI;
     373    return ffi_prep_cif_linux64_core (cif);
     374  }
     375  
     376  ffi_status FFI_HIDDEN
     377  ffi_prep_cif_linux64_var (ffi_cif *cif,
     378  			  unsigned int nfixedargs,
     379  			  unsigned int ntotalargs MAYBE_UNUSED)
     380  {
     381    if ((cif->abi & FFI_LINUX) != 0)
     382      cif->nfixedargs = nfixedargs;
     383  #if _CALL_ELF != 2
     384    else if (cif->abi == FFI_COMPAT_LINUX64)
     385      {
     386        /* This call is from old code.  Don't touch cif->nfixedargs
     387  	 since old code will be using a smaller cif.  */
     388        cif->flags |= FLAG_COMPAT;
     389        /* Translate to new abi value.  */
     390        cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
     391      }
     392  #endif
     393    else
     394      return FFI_BAD_ABI;
     395  #if _CALL_ELF == 2
     396    cif->flags |= FLAG_ARG_NEEDS_PSAVE;
     397  #endif
     398    return ffi_prep_cif_linux64_core (cif);
     399  }
     400  
     401  
     402  /* ffi_prep_args64 is called by the assembly routine once stack space
     403     has been allocated for the function's arguments.
     404  
     405     The stack layout we want looks like this:
     406  
     407     |   Ret addr from ffi_call_LINUX64	8bytes	|	higher addresses
     408     |--------------------------------------------|
     409     |   CR save area			8bytes	|
     410     |--------------------------------------------|
     411     |   Previous backchain pointer	8	|	stack pointer here
     412     |--------------------------------------------|<+ <<<	on entry to
     413     |   Saved r28-r31			4*8	| |	ffi_call_LINUX64
     414     |--------------------------------------------| |
     415     |   GPR registers r3-r10		8*8	| |
     416     |--------------------------------------------| |
     417     |   FPR registers f1-f13 (optional)	13*8	| |
     418     |--------------------------------------------| |
     419     |   VEC registers v2-v13 (optional)  12*16   | |
     420     |--------------------------------------------| |
     421     |   Parameter save area		        | |
     422     |--------------------------------------------| |
     423     |   TOC save area			8	| |
     424     |--------------------------------------------| |	stack	|
     425     |   Linker doubleword		8	| |	grows	|
     426     |--------------------------------------------| |	down	V
     427     |   Compiler doubleword		8	| |
     428     |--------------------------------------------| |	lower addresses
     429     |   Space for callee's LR		8	| |
     430     |--------------------------------------------| |
     431     |   CR save area			8	| |
     432     |--------------------------------------------| |	stack pointer here
     433     |   Current backchain pointer	8	|-/	during
     434     |--------------------------------------------|   <<<	ffi_call_LINUX64
     435  
     436  */
     437  
     438  void FFI_HIDDEN
     439  ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
     440  {
     441    const unsigned long bytes = ecif->cif->bytes;
     442    const unsigned long flags = ecif->cif->flags;
     443  
     444    typedef union
     445    {
     446      char *c;
     447      unsigned long *ul;
     448      float *f;
     449      double *d;
     450      float128 *f128;
     451      size_t p;
     452    } valp;
     453  
     454    /* 'stacktop' points at the previous backchain pointer.  */
     455    valp stacktop;
     456  
     457    /* 'next_arg' points at the space for gpr3, and grows upwards as
     458       we use GPR registers, then continues at rest.  */
     459    valp gpr_base;
     460    valp gpr_end;
     461    valp rest;
     462    valp next_arg;
     463  
     464    /* 'fpr_base' points at the space for f1, and grows upwards as
     465       we use FPR registers.  */
     466    valp fpr_base;
     467    unsigned int fparg_count;
     468  
     469    /* 'vec_base' points at the space for v2, and grows upwards as
     470       we use vector registers.  */
     471    valp vec_base;
     472    unsigned int vecarg_count;
     473  
     474    unsigned int i, words, nargs, nfixedargs;
     475    ffi_type **ptr;
     476    double double_tmp;
     477    union
     478    {
     479      void **v;
     480      char **c;
     481      signed char **sc;
     482      unsigned char **uc;
     483      signed short **ss;
     484      unsigned short **us;
     485      signed int **si;
     486      unsigned int **ui;
     487      unsigned long **ul;
     488      float **f;
     489      double **d;
     490      float128 **f128;
     491    } p_argv;
     492    unsigned long gprvalue;
     493    unsigned long align;
     494  
     495    stacktop.c = (char *) stack + bytes;
     496    gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
     497    gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
     498  #if _CALL_ELF == 2
     499    rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
     500  #else
     501    rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
     502  #endif
     503    fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
     504    fparg_count = 0;
     505    /* Place the vector args below the FPRs, if used, else the GPRs. */
     506    if (ecif->cif->flags & FLAG_FP_ARGUMENTS)
     507      vec_base.p = fpr_base.p & ~0xF;
     508    else
     509      vec_base.p = gpr_base.p;
     510    vec_base.f128 -= NUM_VEC_ARG_REGISTERS64;
     511    vecarg_count = 0;
     512    next_arg.ul = gpr_base.ul;
     513  
     514    /* Check that everything starts aligned properly.  */
     515    FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
     516    FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
     517    FFI_ASSERT (((unsigned long) gpr_base.c & 0xF) == 0);
     518    FFI_ASSERT (((unsigned long) gpr_end.c  & 0xF) == 0);
     519    FFI_ASSERT (((unsigned long) vec_base.c & 0xF) == 0);
     520    FFI_ASSERT ((bytes & 0xF) == 0);
     521  
     522    /* Deal with return values that are actually pass-by-reference.  */
     523    if (flags & FLAG_RETVAL_REFERENCE)
     524      *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
     525  
     526    /* Now for the arguments.  */
     527    p_argv.v = ecif->avalue;
     528    nargs = ecif->cif->nargs;
     529  #if _CALL_ELF != 2
     530    nfixedargs = (unsigned) -1;
     531    if ((flags & FLAG_COMPAT) == 0)
     532  #endif
     533      nfixedargs = ecif->cif->nfixedargs;
     534    for (ptr = ecif->cif->arg_types, i = 0;
     535         i < nargs;
     536         i++, ptr++, p_argv.v++)
     537      {
     538        unsigned int elt, elnum;
     539  
     540        switch ((*ptr)->type)
     541  	{
     542  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     543  	case FFI_TYPE_LONGDOUBLE:
     544            if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     545              {
     546                next_arg.p = FFI_ALIGN (next_arg.p, 16);
     547                if (next_arg.ul == gpr_end.ul)
     548                  next_arg.ul = rest.ul;
     549                if (vecarg_count < NUM_VEC_ARG_REGISTERS64 && i < nfixedargs)
     550  		memcpy (vec_base.f128++, *p_argv.f128, sizeof (float128));
     551                else
     552  		memcpy (next_arg.f128, *p_argv.f128, sizeof (float128));
     553                if (++next_arg.f128 == gpr_end.f128)
     554                  next_arg.f128 = rest.f128;
     555                vecarg_count++;
     556                FFI_ASSERT (__LDBL_MANT_DIG__ == 113);
     557                FFI_ASSERT (flags & FLAG_VEC_ARGUMENTS);
     558                break;
     559              }
     560  	  if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
     561  	    {
     562  	      double_tmp = (*p_argv.d)[0];
     563  	      if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
     564  		{
     565  		  *fpr_base.d++ = double_tmp;
     566  # if _CALL_ELF != 2
     567  		  if ((flags & FLAG_COMPAT) != 0)
     568  		    *next_arg.d = double_tmp;
     569  # endif
     570  		}
     571  	      else
     572  		*next_arg.d = double_tmp;
     573  	      if (++next_arg.ul == gpr_end.ul)
     574  		next_arg.ul = rest.ul;
     575  	      fparg_count++;
     576  	      double_tmp = (*p_argv.d)[1];
     577  	      if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
     578  		{
     579  		  *fpr_base.d++ = double_tmp;
     580  # if _CALL_ELF != 2
     581  		  if ((flags & FLAG_COMPAT) != 0)
     582  		    *next_arg.d = double_tmp;
     583  # endif
     584  		}
     585  	      else
     586  		*next_arg.d = double_tmp;
     587  	      if (++next_arg.ul == gpr_end.ul)
     588  		next_arg.ul = rest.ul;
     589  	      fparg_count++;
     590  	      FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
     591  	      FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
     592  	      break;
     593  	    }
     594  	  /* Fall through.  */
     595  #endif
     596  	case FFI_TYPE_DOUBLE:
     597  #if _CALL_ELF != 2
     598  	do_double:
     599  #endif
     600  	  double_tmp = **p_argv.d;
     601  	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
     602  	    {
     603  	      *fpr_base.d++ = double_tmp;
     604  #if _CALL_ELF != 2
     605  	      if ((flags & FLAG_COMPAT) != 0)
     606  		*next_arg.d = double_tmp;
     607  #endif
     608  	    }
     609  	  else
     610  	    *next_arg.d = double_tmp;
     611  	  if (++next_arg.ul == gpr_end.ul)
     612  	    next_arg.ul = rest.ul;
     613  	  fparg_count++;
     614  	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
     615  	  break;
     616  
     617  	case FFI_TYPE_FLOAT:
     618  #if _CALL_ELF != 2
     619  	do_float:
     620  #endif
     621  	  double_tmp = **p_argv.f;
     622  	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
     623  	    {
     624  	      *fpr_base.d++ = double_tmp;
     625  #if _CALL_ELF != 2
     626  	      if ((flags & FLAG_COMPAT) != 0)
     627  		{
     628  # ifndef __LITTLE_ENDIAN__
     629  		  next_arg.f[1] = (float) double_tmp;
     630  # else
     631  		  next_arg.f[0] = (float) double_tmp;
     632  # endif
     633  		}
     634  #endif
     635  	    }
     636  	  else
     637  	    {
     638  # ifndef __LITTLE_ENDIAN__
     639  	      next_arg.f[1] = (float) double_tmp;
     640  # else
     641  	      next_arg.f[0] = (float) double_tmp;
     642  # endif
     643  	    }
     644  	  if (++next_arg.ul == gpr_end.ul)
     645  	    next_arg.ul = rest.ul;
     646  	  fparg_count++;
     647  	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
     648  	  break;
     649  
     650  	case FFI_TYPE_STRUCT:
     651  	  if ((ecif->cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
     652  	    {
     653  	      align = (*ptr)->alignment;
     654  	      if (align > 16)
     655  		align = 16;
     656  	      if (align > 1)
     657                  {
     658                    next_arg.p = FFI_ALIGN (next_arg.p, align);
     659                    if (next_arg.ul == gpr_end.ul)
     660                      next_arg.ul = rest.ul;
     661                  }
     662  	    }
     663  	  elt = discover_homogeneous_aggregate (ecif->cif->abi, *ptr, &elnum);
     664  	  if (elt)
     665  	    {
     666  #if _CALL_ELF == 2
     667  	      union {
     668  		void *v;
     669  		float *f;
     670  		double *d;
     671  		float128 *f128;
     672  	      } arg;
     673  
     674  	      arg.v = *p_argv.v;
     675  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     676                if (elt == FFI_TYPE_LONGDOUBLE &&
     677                    (ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     678                  {
     679                    do
     680                      {
     681                        if (vecarg_count < NUM_VEC_ARG_REGISTERS64
     682                            && i < nfixedargs)
     683  		        memcpy (vec_base.f128++, arg.f128++, sizeof (float128));
     684                        else
     685  		        memcpy (next_arg.f128, arg.f128++, sizeof (float128));
     686                        if (++next_arg.f128 == gpr_end.f128)
     687                          next_arg.f128 = rest.f128;
     688                        vecarg_count++;
     689                      }
     690                    while (--elnum != 0);
     691                  }
     692                else
     693  #endif
     694  	      if (elt == FFI_TYPE_FLOAT)
     695  		{
     696  		  do
     697  		    {
     698  		      double_tmp = *arg.f++;
     699  		      if (fparg_count < NUM_FPR_ARG_REGISTERS64
     700  			  && i < nfixedargs)
     701  			*fpr_base.d++ = double_tmp;
     702  		      else
     703  			*next_arg.f = (float) double_tmp;
     704  		      if (++next_arg.f == gpr_end.f)
     705  			next_arg.f = rest.f;
     706  		      fparg_count++;
     707  		    }
     708  		  while (--elnum != 0);
     709  		  if ((next_arg.p & 7) != 0)
     710                      if (++next_arg.f == gpr_end.f)
     711                        next_arg.f = rest.f;
     712  		}
     713  	      else
     714  		do
     715  		  {
     716  		    double_tmp = *arg.d++;
     717  		    if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
     718  		      *fpr_base.d++ = double_tmp;
     719  		    else
     720  		      *next_arg.d = double_tmp;
     721  		    if (++next_arg.d == gpr_end.d)
     722  		      next_arg.d = rest.d;
     723  		    fparg_count++;
     724  		  }
     725  		while (--elnum != 0);
     726  #else
     727  	      if (elt == FFI_TYPE_FLOAT)
     728  		goto do_float;
     729  	      else
     730  		goto do_double;
     731  #endif
     732  	    }
     733  	  else
     734  	    {
     735  	      words = ((*ptr)->size + 7) / 8;
     736  	      if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
     737  		{
     738  		  size_t first = gpr_end.c - next_arg.c;
     739  		  memcpy (next_arg.c, *p_argv.c, first);
     740  		  memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
     741  		  next_arg.c = rest.c + words * 8 - first;
     742  		}
     743  	      else
     744  		{
     745  		  char *where = next_arg.c;
     746  
     747  #ifndef __LITTLE_ENDIAN__
     748  		  /* Structures with size less than eight bytes are passed
     749  		     left-padded.  */
     750  		  if ((*ptr)->size < 8)
     751  		    where += 8 - (*ptr)->size;
     752  #endif
     753  		  memcpy (where, *p_argv.c, (*ptr)->size);
     754  		  next_arg.ul += words;
     755  		  if (next_arg.ul == gpr_end.ul)
     756  		    next_arg.ul = rest.ul;
     757  		}
     758  	    }
     759  	  break;
     760  
     761  	case FFI_TYPE_UINT8:
     762  	  gprvalue = **p_argv.uc;
     763  	  goto putgpr;
     764  	case FFI_TYPE_SINT8:
     765  	  gprvalue = **p_argv.sc;
     766  	  goto putgpr;
     767  	case FFI_TYPE_UINT16:
     768  	  gprvalue = **p_argv.us;
     769  	  goto putgpr;
     770  	case FFI_TYPE_SINT16:
     771  	  gprvalue = **p_argv.ss;
     772  	  goto putgpr;
     773  	case FFI_TYPE_UINT32:
     774  	  gprvalue = **p_argv.ui;
     775  	  goto putgpr;
     776  	case FFI_TYPE_INT:
     777  	case FFI_TYPE_SINT32:
     778  	  gprvalue = **p_argv.si;
     779  	  goto putgpr;
     780  
     781  	case FFI_TYPE_UINT64:
     782  	case FFI_TYPE_SINT64:
     783  	case FFI_TYPE_POINTER:
     784  	  gprvalue = **p_argv.ul;
     785  	putgpr:
     786  	  *next_arg.ul++ = gprvalue;
     787  	  if (next_arg.ul == gpr_end.ul)
     788  	    next_arg.ul = rest.ul;
     789  	  break;
     790  	}
     791      }
     792  
     793    FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
     794  	      || (next_arg.ul >= gpr_base.ul
     795  		  && next_arg.ul <= gpr_base.ul + 4));
     796  }
     797  
     798  
     799  #if _CALL_ELF == 2
     800  #define MIN_CACHE_LINE_SIZE 8
     801  
     802  static void
     803  flush_icache (char *wraddr, char *xaddr, int size)
     804  {
     805    int i;
     806    for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
     807      __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
     808  		      : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
     809    __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
     810  		    : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
     811  		    : "memory");
     812  }
     813  #endif
     814  
     815  
     816  ffi_status FFI_HIDDEN
     817  ffi_prep_closure_loc_linux64 (ffi_closure *closure,
     818  			      ffi_cif *cif,
     819  			      void (*fun) (ffi_cif *, void *, void **, void *),
     820  			      void *user_data,
     821  			      void *codeloc)
     822  {
     823  #if _CALL_ELF == 2
     824    unsigned int *tramp = (unsigned int *) &closure->tramp[0];
     825  
     826    if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
     827      return FFI_BAD_ABI;
     828  
     829    tramp[0] = 0xe96c0018;	/* 0:	ld	11,2f-0b(12)	*/
     830    tramp[1] = 0xe98c0010;	/*	ld	12,1f-0b(12)	*/
     831    tramp[2] = 0x7d8903a6;	/*	mtctr	12		*/
     832    tramp[3] = 0x4e800420;	/*	bctr			*/
     833  				/* 1:	.quad	function_addr	*/
     834  				/* 2:	.quad	context		*/
     835    *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
     836    *(void **) &tramp[6] = codeloc;
     837    flush_icache ((char *) tramp, (char *) codeloc, 4 * 4);
     838  #else
     839    void **tramp = (void **) &closure->tramp[0];
     840  
     841    if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
     842      return FFI_BAD_ABI;
     843  
     844    /* Copy function address and TOC from ffi_closure_LINUX64 OPD.  */
     845    memcpy (&tramp[0], (void **) ffi_closure_LINUX64, sizeof (void *));
     846    tramp[1] = codeloc;
     847    memcpy (&tramp[2], (void **) ffi_closure_LINUX64 + 1, sizeof (void *));
     848  #endif
     849  
     850    closure->cif = cif;
     851    closure->fun = fun;
     852    closure->user_data = user_data;
     853  
     854    return FFI_OK;
     855  }
     856  
     857  
     858  int FFI_HIDDEN
     859  ffi_closure_helper_LINUX64 (ffi_cif *cif,
     860  			    void (*fun) (ffi_cif *, void *, void **, void *),
     861  			    void *user_data,
     862  			    void *rvalue,
     863  			    unsigned long *pst,
     864                              ffi_dblfl *pfr,
     865                              float128 *pvec)
     866  {
     867    /* rvalue is the pointer to space for return value in closure assembly */
     868    /* pst is the pointer to parameter save area
     869       (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
     870    /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
     871    /* pvec is the pointer to where v2-v13 are stored in ffi_closure_LINUX64 */
     872  
     873    void **avalue;
     874    ffi_type **arg_types;
     875    unsigned long i, avn, nfixedargs;
     876    ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
     877    float128 *end_pvec = pvec + NUM_VEC_ARG_REGISTERS64;
     878    unsigned long align;
     879  
     880    avalue = alloca (cif->nargs * sizeof (void *));
     881  
     882    /* Copy the caller's structure return value address so that the
     883       closure returns the data directly to the caller.  */
     884    if (cif->rtype->type == FFI_TYPE_STRUCT
     885        && (cif->flags & FLAG_RETURNS_SMST) == 0)
     886      {
     887        rvalue = (void *) *pst;
     888        pst++;
     889      }
     890  
     891    i = 0;
     892    avn = cif->nargs;
     893  #if _CALL_ELF != 2
     894    nfixedargs = (unsigned) -1;
     895    if ((cif->flags & FLAG_COMPAT) == 0)
     896  #endif
     897      nfixedargs = cif->nfixedargs;
     898    arg_types = cif->arg_types;
     899  
     900    /* Grab the addresses of the arguments from the stack frame.  */
     901    while (i < avn)
     902      {
     903        unsigned int elt, elnum;
     904  
     905        switch (arg_types[i]->type)
     906  	{
     907  	case FFI_TYPE_SINT8:
     908  	case FFI_TYPE_UINT8:
     909  #ifndef __LITTLE_ENDIAN__
     910  	  avalue[i] = (char *) pst + 7;
     911  	  pst++;
     912  	  break;
     913  #endif
     914  
     915  	case FFI_TYPE_SINT16:
     916  	case FFI_TYPE_UINT16:
     917  #ifndef __LITTLE_ENDIAN__
     918  	  avalue[i] = (char *) pst + 6;
     919  	  pst++;
     920  	  break;
     921  #endif
     922  
     923  	case FFI_TYPE_SINT32:
     924  	case FFI_TYPE_UINT32:
     925  #ifndef __LITTLE_ENDIAN__
     926  	  avalue[i] = (char *) pst + 4;
     927  	  pst++;
     928  	  break;
     929  #endif
     930  
     931  	case FFI_TYPE_SINT64:
     932  	case FFI_TYPE_UINT64:
     933  	case FFI_TYPE_POINTER:
     934  	  avalue[i] = pst;
     935  	  pst++;
     936  	  break;
     937  
     938  	case FFI_TYPE_STRUCT:
     939  	  if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
     940  	    {
     941  	      align = arg_types[i]->alignment;
     942  	      if (align > 16)
     943  		align = 16;
     944  	      if (align > 1)
     945  		pst = (unsigned long *) FFI_ALIGN ((size_t) pst, align);
     946  	    }
     947  	  elt = discover_homogeneous_aggregate (cif->abi, arg_types[i], &elnum);
     948  	  if (elt)
     949  	    {
     950  #if _CALL_ELF == 2
     951  	      union {
     952  		void *v;
     953  		unsigned long *ul;
     954  		float *f;
     955  		double *d;
     956  		float128 *f128;
     957  		size_t p;
     958  	      } to, from;
     959  
     960  	      /* Repackage the aggregate from its parts.  The
     961  		 aggregate size is not greater than the space taken by
     962  		 the registers so store back to the register/parameter
     963  		 save arrays.  */
     964  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     965                if (elt == FFI_TYPE_LONGDOUBLE &&
     966                    (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     967                  {
     968                    if (pvec + elnum <= end_pvec)
     969                      to.v = pvec;
     970                    else
     971                      to.v = pst;
     972                  }
     973                else
     974  #endif
     975  	      if (pfr + elnum <= end_pfr)
     976  		to.v = pfr;
     977  	      else
     978  		to.v = pst;
     979  
     980  	      avalue[i] = to.v;
     981  	      from.ul = pst;
     982  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     983                if (elt == FFI_TYPE_LONGDOUBLE &&
     984                    (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
     985                  {
     986                    do
     987                      {
     988                        if (pvec < end_pvec && i < nfixedargs)
     989  		        memcpy (to.f128, pvec++, sizeof (float128));
     990                        else
     991  		        memcpy (to.f128, from.f128, sizeof (float128));
     992                        to.f128++;
     993                        from.f128++;
     994                      }
     995                    while (--elnum != 0);
     996                  }
     997                else
     998  #endif
     999  	      if (elt == FFI_TYPE_FLOAT)
    1000  		{
    1001  		  do
    1002  		    {
    1003  		      if (pfr < end_pfr && i < nfixedargs)
    1004  			{
    1005  			  *to.f = (float) pfr->d;
    1006  			  pfr++;
    1007  			}
    1008  		      else
    1009  			*to.f = *from.f;
    1010  		      to.f++;
    1011  		      from.f++;
    1012  		    }
    1013  		  while (--elnum != 0);
    1014  		}
    1015  	      else
    1016  		{
    1017  		  do
    1018  		    {
    1019  		      if (pfr < end_pfr && i < nfixedargs)
    1020  			{
    1021  			  *to.d = pfr->d;
    1022  			  pfr++;
    1023  			}
    1024  		      else
    1025  			*to.d = *from.d;
    1026  		      to.d++;
    1027  		      from.d++;
    1028  		    }
    1029  		  while (--elnum != 0);
    1030  		}
    1031  #else
    1032  	      if (elt == FFI_TYPE_FLOAT)
    1033  		goto do_float;
    1034  	      else
    1035  		goto do_double;
    1036  #endif
    1037  	    }
    1038  	  else
    1039  	    {
    1040  #ifndef __LITTLE_ENDIAN__
    1041  	      /* Structures with size less than eight bytes are passed
    1042  		 left-padded.  */
    1043  	      if (arg_types[i]->size < 8)
    1044  		avalue[i] = (char *) pst + 8 - arg_types[i]->size;
    1045  	      else
    1046  #endif
    1047  		avalue[i] = pst;
    1048  	    }
    1049  	  pst += (arg_types[i]->size + 7) / 8;
    1050  	  break;
    1051  
    1052  #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
    1053  	case FFI_TYPE_LONGDOUBLE:
    1054            if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
    1055              {
    1056                if (((unsigned long) pst & 0xF) != 0)
    1057                  ++pst;
    1058                if (pvec < end_pvec && i < nfixedargs)
    1059                  avalue[i] = pvec++;
    1060                else
    1061                  avalue[i] = pst;
    1062                pst += 2;
    1063                break;
    1064              }
    1065            else if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
    1066  	    {
    1067  	      if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
    1068  		{
    1069  		  avalue[i] = pfr;
    1070  		  pfr += 2;
    1071  		}
    1072  	      else
    1073  		{
    1074  		  if (pfr < end_pfr && i < nfixedargs)
    1075  		    {
    1076  		      /* Passed partly in f13 and partly on the stack.
    1077  			 Move it all to the stack.  */
    1078  		      *pst = *(unsigned long *) pfr;
    1079  		      pfr++;
    1080  		    }
    1081  		  avalue[i] = pst;
    1082  		}
    1083  	      pst += 2;
    1084  	      break;
    1085  	    }
    1086  	  /* Fall through.  */
    1087  #endif
    1088  	case FFI_TYPE_DOUBLE:
    1089  #if _CALL_ELF != 2
    1090  	do_double:
    1091  #endif
    1092  	  /* On the outgoing stack all values are aligned to 8 */
    1093  	  /* there are 13 64bit floating point registers */
    1094  
    1095  	  if (pfr < end_pfr && i < nfixedargs)
    1096  	    {
    1097  	      avalue[i] = pfr;
    1098  	      pfr++;
    1099  	    }
    1100  	  else
    1101  	    avalue[i] = pst;
    1102  	  pst++;
    1103  	  break;
    1104  
    1105  	case FFI_TYPE_FLOAT:
    1106  #if _CALL_ELF != 2
    1107  	do_float:
    1108  #endif
    1109  	  if (pfr < end_pfr && i < nfixedargs)
    1110  	    {
    1111  	      /* Float values are stored as doubles in the
    1112  		 ffi_closure_LINUX64 code.  Fix them here.  */
    1113  	      pfr->f = (float) pfr->d;
    1114  	      avalue[i] = pfr;
    1115  	      pfr++;
    1116  	    }
    1117  	  else
    1118  	    {
    1119  #ifndef __LITTLE_ENDIAN__
    1120  	      avalue[i] = (char *) pst + 4;
    1121  #else
    1122  	      avalue[i] = pst;
    1123  #endif
    1124  	    }
    1125  	  pst++;
    1126  	  break;
    1127  
    1128  	default:
    1129  	  FFI_ASSERT (0);
    1130  	}
    1131  
    1132        i++;
    1133      }
    1134  
    1135    (*fun) (cif, rvalue, avalue, user_data);
    1136  
    1137    /* Tell ffi_closure_LINUX64 how to perform return type promotions.  */
    1138    if ((cif->flags & FLAG_RETURNS_SMST) != 0)
    1139      {
    1140        if ((cif->flags & (FLAG_RETURNS_FP | FLAG_RETURNS_VEC)) == 0)
    1141  	return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
    1142        else if ((cif->flags & FLAG_RETURNS_VEC) != 0)
    1143          return FFI_V2_TYPE_VECTOR_HOMOG;
    1144        else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
    1145  	return FFI_V2_TYPE_DOUBLE_HOMOG;
    1146        else
    1147  	return FFI_V2_TYPE_FLOAT_HOMOG;
    1148      }
    1149    if ((cif->flags & FLAG_RETURNS_VEC) != 0)
    1150      return FFI_V2_TYPE_VECTOR;
    1151    return cif->rtype->type;
    1152  }
    1153  #endif