(root)/
gcc-13.2.0/
libffi/
src/
aarch64/
ffi.c
       1  /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
       2  
       3  Permission is hereby granted, free of charge, to any person obtaining
       4  a copy of this software and associated documentation files (the
       5  ``Software''), to deal in the Software without restriction, including
       6  without limitation the rights to use, copy, modify, merge, publish,
       7  distribute, sublicense, and/or sell copies of the Software, and to
       8  permit persons to whom the Software is furnished to do so, subject to
       9  the following conditions:
      10  
      11  The above copyright notice and this permission notice shall be
      12  included in all copies or substantial portions of the Software.
      13  
      14  THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
      15  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
      16  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
      17  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
      18  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
      19  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      20  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
      21  
      22  #if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)
      23  #include <stdio.h>
      24  #include <stdlib.h>
      25  #include <stdint.h>
      26  #include <fficonfig.h>
      27  #include <ffi.h>
      28  #include <ffi_common.h>
      29  #include "internal.h"
      30  #ifdef _WIN32
      31  #define WIN32_LEAN_AND_MEAN
      32  #include <windows.h> /* FlushInstructionCache */
      33  #endif
      34  #include <tramp.h>
      35  
      36  /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
      37     all further uses in this file will refer to the 128-bit type.  */
      38  #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
      39  # if FFI_TYPE_LONGDOUBLE != 4
      40  #  error FFI_TYPE_LONGDOUBLE out of date
      41  # endif
      42  #else
      43  # undef FFI_TYPE_LONGDOUBLE
      44  # define FFI_TYPE_LONGDOUBLE 4
      45  #endif
      46  
      47  union _d
      48  {
      49    UINT64 d;
      50    UINT32 s[2];
      51  };
      52  
      53  struct _v
      54  {
      55    union _d d[2] __attribute__((aligned(16)));
      56  };
      57  
      58  struct call_context
      59  {
      60    struct _v v[N_V_ARG_REG];
      61    UINT64 x[N_X_ARG_REG];
      62  };
      63  
      64  #if FFI_EXEC_TRAMPOLINE_TABLE
      65  
      66  #ifdef __MACH__
      67  #ifdef HAVE_PTRAUTH
      68  #include <ptrauth.h>
      69  #endif
      70  #include <mach/vm_param.h>
      71  #endif
      72  
      73  #else
      74  
      75  #if defined (__clang__) && defined (__APPLE__)
      76  extern void sys_icache_invalidate (void *start, size_t len);
      77  #endif
      78  
      79  static inline void
      80  ffi_clear_cache (void *start, void *end)
      81  {
      82  #if defined (__clang__) && defined (__APPLE__)
      83    sys_icache_invalidate (start, (char *)end - (char *)start);
      84  #elif defined (__GNUC__)
      85    __builtin___clear_cache (start, end);
      86  #elif defined (_WIN32)
      87    FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start);
      88  #else
      89  #error "Missing builtin to flush instruction cache"
      90  #endif
      91  }
      92  
      93  #endif
      94  
      95  /* A subroutine of is_vfp_type.  Given a structure type, return the type code
      96     of the first non-structure element.  Recurse for structure elements.
      97     Return -1 if the structure is in fact empty, i.e. no nested elements.  */
      98  
      99  static int
     100  is_hfa0 (const ffi_type *ty)
     101  {
     102    ffi_type **elements = ty->elements;
     103    int i, ret = -1;
     104  
     105    if (elements != NULL)
     106      for (i = 0; elements[i]; ++i)
     107        {
     108          ret = elements[i]->type;
     109          if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
     110            {
     111              ret = is_hfa0 (elements[i]);
     112              if (ret < 0)
     113                continue;
     114            }
     115          break;
     116        }
     117  
     118    return ret;
     119  }
     120  
     121  /* A subroutine of is_vfp_type.  Given a structure type, return true if all
     122     of the non-structure elements are the same as CANDIDATE.  */
     123  
     124  static int
     125  is_hfa1 (const ffi_type *ty, int candidate)
     126  {
     127    ffi_type **elements = ty->elements;
     128    int i;
     129  
     130    if (elements != NULL)
     131      for (i = 0; elements[i]; ++i)
     132        {
     133          int t = elements[i]->type;
     134          if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
     135            {
     136              if (!is_hfa1 (elements[i], candidate))
     137                return 0;
     138            }
     139          else if (t != candidate)
     140            return 0;
     141        }
     142  
     143    return 1;
     144  }
     145  
     146  /* Determine if TY may be allocated to the FP registers.  This is both an
     147     fp scalar type as well as an homogenous floating point aggregate (HFA).
     148     That is, a structure consisting of 1 to 4 members of all the same type,
     149     where that type is an fp scalar.
     150  
     151     Returns non-zero iff TY is an HFA.  The result is the AARCH64_RET_*
     152     constant for the type.  */
     153  
     154  static int
     155  is_vfp_type (const ffi_type *ty)
     156  {
     157    ffi_type **elements;
     158    int candidate, i;
     159    size_t size, ele_count;
     160  
     161    /* Quickest tests first.  */
     162    candidate = ty->type;
     163    switch (candidate)
     164      {
     165      default:
     166        return 0;
     167      case FFI_TYPE_FLOAT:
     168      case FFI_TYPE_DOUBLE:
     169      case FFI_TYPE_LONGDOUBLE:
     170        ele_count = 1;
     171        goto done;
     172      case FFI_TYPE_COMPLEX:
     173        candidate = ty->elements[0]->type;
     174        switch (candidate)
     175  	{
     176  	case FFI_TYPE_FLOAT:
     177  	case FFI_TYPE_DOUBLE:
     178  	case FFI_TYPE_LONGDOUBLE:
     179  	  ele_count = 2;
     180  	  goto done;
     181  	}
     182        return 0;
     183      case FFI_TYPE_STRUCT:
     184        break;
     185      }
     186  
     187    /* No HFA types are smaller than 4 bytes, or larger than 64 bytes.  */
     188    size = ty->size;
     189    if (size < 4 || size > 64)
     190      return 0;
     191  
     192    /* Find the type of the first non-structure member.  */
     193    elements = ty->elements;
     194    candidate = elements[0]->type;
     195    if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
     196      {
     197        for (i = 0; ; ++i)
     198          {
     199            candidate = is_hfa0 (elements[i]);
     200            if (candidate >= 0)
     201              break;
     202          }
     203      }
     204  
     205    /* If the first member is not a floating point type, it's not an HFA.
     206       Also quickly re-check the size of the structure.  */
     207    switch (candidate)
     208      {
     209      case FFI_TYPE_FLOAT:
     210        ele_count = size / sizeof(float);
     211        if (size != ele_count * sizeof(float))
     212          return 0;
     213        break;
     214      case FFI_TYPE_DOUBLE:
     215        ele_count = size / sizeof(double);
     216        if (size != ele_count * sizeof(double))
     217          return 0;
     218        break;
     219      case FFI_TYPE_LONGDOUBLE:
     220        ele_count = size / sizeof(long double);
     221        if (size != ele_count * sizeof(long double))
     222          return 0;
     223        break;
     224      default:
     225        return 0;
     226      }
     227    if (ele_count > 4)
     228      return 0;
     229  
     230    /* Finally, make sure that all scalar elements are the same type.  */
     231    for (i = 0; elements[i]; ++i)
     232      {
     233        int t = elements[i]->type;
     234        if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
     235          {
     236            if (!is_hfa1 (elements[i], candidate))
     237              return 0;
     238          }
     239        else if (t != candidate)
     240          return 0;
     241      }
     242  
     243    /* All tests succeeded.  Encode the result.  */
     244   done:
     245    return candidate * 4 + (4 - (int)ele_count);
     246  }
     247  
     248  /* Representation of the procedure call argument marshalling
     249     state.
     250  
     251     The terse state variable names match the names used in the AARCH64
     252     PCS. */
     253  
     254  struct arg_state
     255  {
     256    unsigned ngrn;                /* Next general-purpose register number. */
     257    unsigned nsrn;                /* Next vector register number. */
     258    size_t nsaa;                  /* Next stack offset. */
     259  
     260  #if defined (__APPLE__)
     261    unsigned allocating_variadic;
     262  #endif
     263  };
     264  
     265  /* Initialize a procedure call argument marshalling state.  */
     266  static void
     267  arg_init (struct arg_state *state)
     268  {
     269    state->ngrn = 0;
     270    state->nsrn = 0;
     271    state->nsaa = 0;
     272  #if defined (__APPLE__)
     273    state->allocating_variadic = 0;
     274  #endif
     275  }
     276  
     277  /* Allocate an aligned slot on the stack and return a pointer to it.  */
     278  static void *
     279  allocate_to_stack (struct arg_state *state, void *stack,
     280  		   size_t alignment, size_t size)
     281  {
     282    size_t nsaa = state->nsaa;
     283  
     284    /* Round up the NSAA to the larger of 8 or the natural
     285       alignment of the argument's type.  */
     286  #if defined (__APPLE__)
     287    if (state->allocating_variadic && alignment < 8)
     288      alignment = 8;
     289  #else
     290    if (alignment < 8)
     291      alignment = 8;
     292  #endif
     293      
     294    nsaa = FFI_ALIGN (nsaa, alignment);
     295    state->nsaa = nsaa + size;
     296  
     297    return (char *)stack + nsaa;
     298  }
     299  
     300  static ffi_arg
     301  extend_integer_type (void *source, int type)
     302  {
     303    switch (type)
     304      {
     305      case FFI_TYPE_UINT8:
     306        return *(UINT8 *) source;
     307      case FFI_TYPE_SINT8:
     308        return *(SINT8 *) source;
     309      case FFI_TYPE_UINT16:
     310        return *(UINT16 *) source;
     311      case FFI_TYPE_SINT16:
     312        return *(SINT16 *) source;
     313      case FFI_TYPE_UINT32:
     314        return *(UINT32 *) source;
     315      case FFI_TYPE_INT:
     316      case FFI_TYPE_SINT32:
     317        return *(SINT32 *) source;
     318      case FFI_TYPE_UINT64:
     319      case FFI_TYPE_SINT64:
     320        return *(UINT64 *) source;
     321        break;
     322      case FFI_TYPE_POINTER:
     323        return *(uintptr_t *) source;
     324      default:
     325        abort();
     326      }
     327  }
     328  
     329  #if defined(_MSC_VER)
     330  void extend_hfa_type (void *dest, void *src, int h);
     331  #else
     332  static void
     333  extend_hfa_type (void *dest, void *src, int h)
     334  {
     335    ssize_t f = h - AARCH64_RET_S4;
     336    void *x0;
     337  
     338    asm volatile (
     339  	"adr	%0, 0f\n"
     340  "	add	%0, %0, %1\n"
     341  "	br	%0\n"
     342  "0:	ldp	s16, s17, [%3]\n"	/* S4 */
     343  "	ldp	s18, s19, [%3, #8]\n"
     344  "	b	4f\n"
     345  "	ldp	s16, s17, [%3]\n"	/* S3 */
     346  "	ldr	s18, [%3, #8]\n"
     347  "	b	3f\n"
     348  "	ldp	s16, s17, [%3]\n"	/* S2 */
     349  "	b	2f\n"
     350  "	nop\n"
     351  "	ldr	s16, [%3]\n"		/* S1 */
     352  "	b	1f\n"
     353  "	nop\n"
     354  "	ldp	d16, d17, [%3]\n"	/* D4 */
     355  "	ldp	d18, d19, [%3, #16]\n"
     356  "	b	4f\n"
     357  "	ldp	d16, d17, [%3]\n"	/* D3 */
     358  "	ldr	d18, [%3, #16]\n"
     359  "	b	3f\n"
     360  "	ldp	d16, d17, [%3]\n"	/* D2 */
     361  "	b	2f\n"
     362  "	nop\n"
     363  "	ldr	d16, [%3]\n"		/* D1 */
     364  "	b	1f\n"
     365  "	nop\n"
     366  "	ldp	q16, q17, [%3]\n"	/* Q4 */
     367  "	ldp	q18, q19, [%3, #32]\n"
     368  "	b	4f\n"
     369  "	ldp	q16, q17, [%3]\n"	/* Q3 */
     370  "	ldr	q18, [%3, #32]\n"
     371  "	b	3f\n"
     372  "	ldp	q16, q17, [%3]\n"	/* Q2 */
     373  "	b	2f\n"
     374  "	nop\n"
     375  "	ldr	q16, [%3]\n"		/* Q1 */
     376  "	b	1f\n"
     377  "4:	str	q19, [%2, #48]\n"
     378  "3:	str	q18, [%2, #32]\n"
     379  "2:	str	q17, [%2, #16]\n"
     380  "1:	str	q16, [%2]"
     381      : "=&r"(x0)
     382      : "r"(f * 12), "r"(dest), "r"(src)
     383      : "memory", "v16", "v17", "v18", "v19");
     384  }
     385  #endif
     386  
     387  #if defined(_MSC_VER)
     388  void* compress_hfa_type (void *dest, void *src, int h);
     389  #else
     390  static void *
     391  compress_hfa_type (void *dest, void *reg, int h)
     392  {
     393    switch (h)
     394      {
     395      case AARCH64_RET_S1:
     396        if (dest == reg)
     397  	{
     398  #ifdef __AARCH64EB__
     399  	  dest += 12;
     400  #endif
     401  	}
     402        else
     403  	*(float *)dest = *(float *)reg;
     404        break;
     405      case AARCH64_RET_S2:
     406        asm ("ldp q16, q17, [%1]\n\t"
     407  	   "st2 { v16.s, v17.s }[0], [%0]"
     408  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
     409        break;
     410      case AARCH64_RET_S3:
     411        asm ("ldp q16, q17, [%1]\n\t"
     412  	   "ldr q18, [%1, #32]\n\t"
     413  	   "st3 { v16.s, v17.s, v18.s }[0], [%0]"
     414  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
     415        break;
     416      case AARCH64_RET_S4:
     417        asm ("ldp q16, q17, [%1]\n\t"
     418  	   "ldp q18, q19, [%1, #32]\n\t"
     419  	   "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
     420  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
     421        break;
     422  
     423      case AARCH64_RET_D1:
     424        if (dest == reg)
     425  	{
     426  #ifdef __AARCH64EB__
     427  	  dest += 8;
     428  #endif
     429  	}
     430        else
     431  	*(double *)dest = *(double *)reg;
     432        break;
     433      case AARCH64_RET_D2:
     434        asm ("ldp q16, q17, [%1]\n\t"
     435  	   "st2 { v16.d, v17.d }[0], [%0]"
     436  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
     437        break;
     438      case AARCH64_RET_D3:
     439        asm ("ldp q16, q17, [%1]\n\t"
     440  	   "ldr q18, [%1, #32]\n\t"
     441  	   "st3 { v16.d, v17.d, v18.d }[0], [%0]"
     442  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
     443        break;
     444      case AARCH64_RET_D4:
     445        asm ("ldp q16, q17, [%1]\n\t"
     446  	   "ldp q18, q19, [%1, #32]\n\t"
     447  	   "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
     448  	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
     449        break;
     450  
     451      default:
     452        if (dest != reg)
     453  	return memcpy (dest, reg, 16 * (4 - (h & 3)));
     454        break;
     455      }
     456    return dest;
     457  }
     458  #endif
     459  
     460  /* Either allocate an appropriate register for the argument type, or if
     461     none are available, allocate a stack slot and return a pointer
     462     to the allocated space.  */
     463  
     464  static void *
     465  allocate_int_to_reg_or_stack (struct call_context *context,
     466  			      struct arg_state *state,
     467  			      void *stack, size_t size)
     468  {
     469    if (state->ngrn < N_X_ARG_REG)
     470      return &context->x[state->ngrn++];
     471  
     472    state->ngrn = N_X_ARG_REG;
     473    return allocate_to_stack (state, stack, size, size);
     474  }
     475  
     476  ffi_status FFI_HIDDEN
     477  ffi_prep_cif_machdep (ffi_cif *cif)
     478  {
     479    ffi_type *rtype = cif->rtype;
     480    size_t bytes = cif->bytes;
     481    int flags, i, n;
     482  
     483    switch (rtype->type)
     484      {
     485      case FFI_TYPE_VOID:
     486        flags = AARCH64_RET_VOID;
     487        break;
     488      case FFI_TYPE_UINT8:
     489        flags = AARCH64_RET_UINT8;
     490        break;
     491      case FFI_TYPE_UINT16:
     492        flags = AARCH64_RET_UINT16;
     493        break;
     494      case FFI_TYPE_UINT32:
     495        flags = AARCH64_RET_UINT32;
     496        break;
     497      case FFI_TYPE_SINT8:
     498        flags = AARCH64_RET_SINT8;
     499        break;
     500      case FFI_TYPE_SINT16:
     501        flags = AARCH64_RET_SINT16;
     502        break;
     503      case FFI_TYPE_INT:
     504      case FFI_TYPE_SINT32:
     505        flags = AARCH64_RET_SINT32;
     506        break;
     507      case FFI_TYPE_SINT64:
     508      case FFI_TYPE_UINT64:
     509        flags = AARCH64_RET_INT64;
     510        break;
     511      case FFI_TYPE_POINTER:
     512        flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
     513        break;
     514  
     515      case FFI_TYPE_FLOAT:
     516      case FFI_TYPE_DOUBLE:
     517      case FFI_TYPE_LONGDOUBLE:
     518      case FFI_TYPE_STRUCT:
     519      case FFI_TYPE_COMPLEX:
     520        flags = is_vfp_type (rtype);
     521        if (flags == 0)
     522  	{
     523  	  size_t s = rtype->size;
     524  	  if (s > 16)
     525  	    {
     526  	      flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
     527  	      bytes += 8;
     528  	    }
     529  	  else if (s == 16)
     530  	    flags = AARCH64_RET_INT128;
     531  	  else if (s == 8)
     532  	    flags = AARCH64_RET_INT64;
     533  	  else
     534  	    flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
     535  	}
     536        break;
     537  
     538      default:
     539        abort();
     540      }
     541  
     542    for (i = 0, n = cif->nargs; i < n; i++)
     543      if (is_vfp_type (cif->arg_types[i]))
     544        {
     545  	flags |= AARCH64_FLAG_ARG_V;
     546  	break;
     547        }
     548  
     549    /* Round the stack up to a multiple of the stack alignment requirement. */
     550    cif->bytes = (unsigned) FFI_ALIGN(bytes, 16);
     551    cif->flags = flags;
     552  #if defined (__APPLE__)
     553    cif->aarch64_nfixedargs = 0;
     554  #endif
     555  
     556    return FFI_OK;
     557  }
     558  
     559  #if defined (__APPLE__)
     560  /* Perform Apple-specific cif processing for variadic calls */
     561  ffi_status FFI_HIDDEN
     562  ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs,
     563  			 unsigned int ntotalargs)
     564  {
     565    ffi_status status = ffi_prep_cif_machdep (cif);
     566    cif->aarch64_nfixedargs = nfixedargs;
     567    return status;
     568  }
     569  #else
     570  ffi_status FFI_HIDDEN
     571  ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs, unsigned int ntotalargs)
     572  {
     573    ffi_status status = ffi_prep_cif_machdep (cif);
     574    cif->flags |= AARCH64_FLAG_VARARG;
     575    return status;
     576  }
     577  #endif /* __APPLE__ */
     578  
     579  extern void ffi_call_SYSV (struct call_context *context, void *frame,
     580  			   void (*fn)(void), void *rvalue, int flags,
     581  			   void *closure) FFI_HIDDEN;
     582  
     583  /* Call a function with the provided arguments and capture the return
     584     value.  */
     585  static void
     586  ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
     587  	      void **avalue, void *closure)
     588  {
     589    struct call_context *context;
     590    void *stack, *frame, *rvalue;
     591    struct arg_state state;
     592    size_t stack_bytes, rtype_size, rsize;
     593    int i, nargs, flags, isvariadic = 0;
     594    ffi_type *rtype;
     595  
     596    flags = cif->flags;
     597    rtype = cif->rtype;
     598    rtype_size = rtype->size;
     599    stack_bytes = cif->bytes;
     600  
     601    if (flags & AARCH64_FLAG_VARARG)
     602    {
     603      isvariadic = 1;
     604      flags &= ~AARCH64_FLAG_VARARG;
     605    }
     606  
     607    /* If the target function returns a structure via hidden pointer,
     608       then we cannot allow a null rvalue.  Otherwise, mash a null
     609       rvalue to void return type.  */
     610    rsize = 0;
     611    if (flags & AARCH64_RET_IN_MEM)
     612      {
     613        if (orig_rvalue == NULL)
     614  	rsize = rtype_size;
     615      }
     616    else if (orig_rvalue == NULL)
     617      flags &= AARCH64_FLAG_ARG_V;
     618    else if (flags & AARCH64_RET_NEED_COPY)
     619      rsize = 16;
     620  
     621    /* Allocate consectutive stack for everything we'll need.
     622       The frame uses 40 bytes for: lr, fp, rvalue, flags, sp */
     623    context = alloca (sizeof(struct call_context) + stack_bytes + 40 + rsize);
     624    stack = context + 1;
     625    frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes);
     626    rvalue = (rsize ? (void*)((uintptr_t)frame + 40) : orig_rvalue);
     627  
     628    arg_init (&state);
     629    for (i = 0, nargs = cif->nargs; i < nargs; i++)
     630      {
     631        ffi_type *ty = cif->arg_types[i];
     632        size_t s = ty->size;
     633        void *a = avalue[i];
     634        int h, t;
     635  
     636        t = ty->type;
     637        switch (t)
     638  	{
     639  	case FFI_TYPE_VOID:
     640  	  FFI_ASSERT (0);
     641  	  break;
     642  
     643  	/* If the argument is a basic type the argument is allocated to an
     644  	   appropriate register, or if none are available, to the stack.  */
     645  	case FFI_TYPE_INT:
     646  	case FFI_TYPE_UINT8:
     647  	case FFI_TYPE_SINT8:
     648  	case FFI_TYPE_UINT16:
     649  	case FFI_TYPE_SINT16:
     650  	case FFI_TYPE_UINT32:
     651  	case FFI_TYPE_SINT32:
     652  	case FFI_TYPE_UINT64:
     653  	case FFI_TYPE_SINT64:
     654  	case FFI_TYPE_POINTER:
     655  	do_pointer:
     656  	  {
     657  	    ffi_arg ext = extend_integer_type (a, t);
     658  	    if (state.ngrn < N_X_ARG_REG)
     659  	      context->x[state.ngrn++] = ext;
     660  	    else
     661  	      {
     662  		void *d = allocate_to_stack (&state, stack, ty->alignment, s);
     663  		state.ngrn = N_X_ARG_REG;
     664  		/* Note that the default abi extends each argument
     665  		   to a full 64-bit slot, while the iOS abi allocates
     666  		   only enough space. */
     667  #ifdef __APPLE__
     668  		memcpy(d, a, s);
     669  #else
     670  		*(ffi_arg *)d = ext;
     671  #endif
     672  	      }
     673  	  }
     674  	  break;
     675  
     676  	case FFI_TYPE_FLOAT:
     677  	case FFI_TYPE_DOUBLE:
     678  	case FFI_TYPE_LONGDOUBLE:
     679  	case FFI_TYPE_STRUCT:
     680  	case FFI_TYPE_COMPLEX:
     681  	  {
     682  	    void *dest;
     683  
     684  	    h = is_vfp_type (ty);
     685  	    if (h)
     686  	      {
     687                int elems = 4 - (h & 3);
     688                if (cif->abi == FFI_WIN64 && isvariadic)
     689                {
     690                  if (state.ngrn + elems <= N_X_ARG_REG)
     691                  {
     692                    dest = &context->x[state.ngrn];
     693                    state.ngrn += elems;
     694                    extend_hfa_type(dest, a, h);
     695                    break;
     696                  }
     697                  state.nsrn = N_X_ARG_REG;
     698                  dest = allocate_to_stack(&state, stack, ty->alignment, s);
     699                }
     700                else
     701                {
     702                  if (state.nsrn + elems <= N_V_ARG_REG)
     703                  {
     704                    dest = &context->v[state.nsrn];
     705                    state.nsrn += elems;
     706                    extend_hfa_type (dest, a, h);
     707                    break;
     708                  }
     709                  state.nsrn = N_V_ARG_REG;
     710                  dest = allocate_to_stack (&state, stack, ty->alignment, s);
     711                }
     712  	      }
     713  	    else if (s > 16)
     714  	      {
     715  		/* If the argument is a composite type that is larger than 16
     716  		   bytes, then the argument has been copied to memory, and
     717  		   the argument is replaced by a pointer to the copy.  */
     718  		a = &avalue[i];
     719  		t = FFI_TYPE_POINTER;
     720  		s = sizeof (void *);
     721  		goto do_pointer;
     722  	      }
     723  	    else
     724  	      {
     725  		size_t n = (s + 7) / 8;
     726  		if (state.ngrn + n <= N_X_ARG_REG)
     727  		  {
     728  		    /* If the argument is a composite type and the size in
     729  		       double-words is not more than the number of available
     730  		       X registers, then the argument is copied into
     731  		       consecutive X registers.  */
     732  		    dest = &context->x[state.ngrn];
     733                      state.ngrn += (unsigned int)n;
     734  		  }
     735  		else
     736  		  {
     737  		    /* Otherwise, there are insufficient X registers. Further
     738  		       X register allocations are prevented, the NSAA is
     739  		       adjusted and the argument is copied to memory at the
     740  		       adjusted NSAA.  */
     741  		    state.ngrn = N_X_ARG_REG;
     742  		    dest = allocate_to_stack (&state, stack, ty->alignment, s);
     743  		  }
     744  		}
     745  	      memcpy (dest, a, s);
     746  	    }
     747  	  break;
     748  
     749  	default:
     750  	  abort();
     751  	}
     752  
     753  #if defined (__APPLE__)
     754        if (i + 1 == cif->aarch64_nfixedargs)
     755  	{
     756  	  state.ngrn = N_X_ARG_REG;
     757  	  state.nsrn = N_V_ARG_REG;
     758  	  state.allocating_variadic = 1;
     759  	}
     760  #endif
     761      }
     762  
     763    ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
     764  
     765    if (flags & AARCH64_RET_NEED_COPY)
     766      memcpy (orig_rvalue, rvalue, rtype_size);
     767  }
     768  
     769  void
     770  ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
     771  {
     772    ffi_call_int (cif, fn, rvalue, avalue, NULL);
     773  }
     774  
     775  #if FFI_CLOSURES
     776  
     777  #ifdef FFI_GO_CLOSURES
     778  void
     779  ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
     780  	     void **avalue, void *closure)
     781  {
     782    ffi_call_int (cif, fn, rvalue, avalue, closure);
     783  }
     784  #endif /* FFI_GO_CLOSURES */
     785  
     786  /* Build a trampoline.  */
     787  
     788  extern void ffi_closure_SYSV (void) FFI_HIDDEN;
     789  extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
     790  #if defined(FFI_EXEC_STATIC_TRAMP)
     791  extern void ffi_closure_SYSV_alt (void) FFI_HIDDEN;
     792  extern void ffi_closure_SYSV_V_alt (void) FFI_HIDDEN;
     793  #endif
     794  
     795  ffi_status
     796  ffi_prep_closure_loc (ffi_closure *closure,
     797                        ffi_cif* cif,
     798                        void (*fun)(ffi_cif*,void*,void**,void*),
     799                        void *user_data,
     800                        void *codeloc)
     801  {
     802    if (cif->abi != FFI_SYSV && cif->abi != FFI_WIN64)
     803      return FFI_BAD_ABI;
     804  
     805    void (*start)(void);
     806    
     807    if (cif->flags & AARCH64_FLAG_ARG_V)
     808      start = ffi_closure_SYSV_V;
     809    else
     810      start = ffi_closure_SYSV;
     811  
     812  #if FFI_EXEC_TRAMPOLINE_TABLE
     813  #ifdef __MACH__
     814  #ifdef HAVE_PTRAUTH
     815    codeloc = ptrauth_auth_data(codeloc, ptrauth_key_function_pointer, 0);
     816  #endif
     817    void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
     818    config[0] = closure;
     819    config[1] = start;
     820  #endif
     821  #else
     822    static const unsigned char trampoline[16] = {
     823      0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
     824      0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
     825      0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/
     826    };
     827    char *tramp = closure->tramp;
     828  
     829  #if defined(FFI_EXEC_STATIC_TRAMP)
     830    if (ffi_tramp_is_present(closure))
     831      {
     832        /* Initialize the static trampoline's parameters. */
     833        if (start == ffi_closure_SYSV_V)
     834            start = ffi_closure_SYSV_V_alt;
     835        else
     836            start = ffi_closure_SYSV_alt;
     837        ffi_tramp_set_parms (closure->ftramp, start, closure);
     838        goto out;
     839      }
     840  #endif
     841  
     842    /* Initialize the dynamic trampoline. */
     843    memcpy (tramp, trampoline, sizeof(trampoline));
     844    
     845    *(UINT64 *)(tramp + 16) = (uintptr_t)start;
     846  
     847    ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
     848  
     849    /* Also flush the cache for code mapping.  */
     850  #ifdef _WIN32
     851    // Not using dlmalloc.c for Windows ARM64 builds
     852    // so calling ffi_data_to_code_pointer() isn't necessary
     853    unsigned char *tramp_code = tramp;
     854    #else
     855    unsigned char *tramp_code = ffi_data_to_code_pointer (tramp);
     856    #endif
     857    ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE);
     858  out:
     859  #endif
     860  
     861    closure->cif = cif;
     862    closure->fun = fun;
     863    closure->user_data = user_data;
     864  
     865    return FFI_OK;
     866  }
     867  
     868  #ifdef FFI_GO_CLOSURES
     869  extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
     870  extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
     871  
     872  ffi_status
     873  ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
     874                       void (*fun)(ffi_cif*,void*,void**,void*))
     875  {
     876    void (*start)(void);
     877  
     878    if (cif->abi != FFI_SYSV && cif->abi != FFI_WIN64)
     879      return FFI_BAD_ABI;
     880  
     881    if (cif->flags & AARCH64_FLAG_ARG_V)
     882      start = ffi_go_closure_SYSV_V;
     883    else
     884      start = ffi_go_closure_SYSV;
     885  
     886    closure->tramp = start;
     887    closure->cif = cif;
     888    closure->fun = fun;
     889  
     890    return FFI_OK;
     891  }
     892  #endif /* FFI_GO_CLOSURES */
     893  
     894  /* Primary handler to setup and invoke a function within a closure.
     895  
     896     A closure when invoked enters via the assembler wrapper
     897     ffi_closure_SYSV(). The wrapper allocates a call context on the
     898     stack, saves the interesting registers (from the perspective of
     899     the calling convention) into the context then passes control to
     900     ffi_closure_SYSV_inner() passing the saved context and a pointer to
     901     the stack at the point ffi_closure_SYSV() was invoked.
     902  
     903     On the return path the assembler wrapper will reload call context
     904     registers.
     905  
     906     ffi_closure_SYSV_inner() marshalls the call context into ffi value
     907     descriptors, invokes the wrapped function, then marshalls the return
     908     value back into the call context.  */
     909  
     910  int FFI_HIDDEN
     911  ffi_closure_SYSV_inner (ffi_cif *cif,
     912  			void (*fun)(ffi_cif*,void*,void**,void*),
     913  			void *user_data,
     914  			struct call_context *context,
     915  			void *stack, void *rvalue, void *struct_rvalue)
     916  {
     917    void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
     918    int i, h, nargs, flags, isvariadic = 0;
     919    struct arg_state state;
     920  
     921    arg_init (&state);
     922  
     923    flags = cif->flags;
     924    if (flags & AARCH64_FLAG_VARARG)
     925    {
     926      isvariadic = 1;
     927      flags &= ~AARCH64_FLAG_VARARG;
     928    }
     929  
     930    for (i = 0, nargs = cif->nargs; i < nargs; i++)
     931      {
     932        ffi_type *ty = cif->arg_types[i];
     933        int t = ty->type;
     934        size_t n, s = ty->size;
     935  
     936        switch (t)
     937  	{
     938  	case FFI_TYPE_VOID:
     939  	  FFI_ASSERT (0);
     940  	  break;
     941  
     942  	case FFI_TYPE_INT:
     943  	case FFI_TYPE_UINT8:
     944  	case FFI_TYPE_SINT8:
     945  	case FFI_TYPE_UINT16:
     946  	case FFI_TYPE_SINT16:
     947  	case FFI_TYPE_UINT32:
     948  	case FFI_TYPE_SINT32:
     949  	case FFI_TYPE_UINT64:
     950  	case FFI_TYPE_SINT64:
     951  	case FFI_TYPE_POINTER:
     952  	  avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
     953  	  break;
     954  
     955  	case FFI_TYPE_FLOAT:
     956  	case FFI_TYPE_DOUBLE:
     957  	case FFI_TYPE_LONGDOUBLE:
     958  	case FFI_TYPE_STRUCT:
     959  	case FFI_TYPE_COMPLEX:
     960  	  h = is_vfp_type (ty);
     961  	  if (h)
     962  	    {
     963  	      n = 4 - (h & 3);
     964                if (cif->abi == FFI_WIN64 && isvariadic)
     965                  {
     966                    if (state.ngrn + n <= N_X_ARG_REG)
     967                      {
     968                        void *reg = &context->x[state.ngrn];
     969                        state.ngrn += (unsigned int)n;
     970      
     971                        /* Eeek! We need a pointer to the structure, however the
     972                         homogeneous float elements are being passed in individual
     973                         registers, therefore for float and double the structure
     974                         is not represented as a contiguous sequence of bytes in
     975                         our saved register context.  We don't need the original
     976                         contents of the register storage, so we reformat the
     977                         structure into the same memory.  */
     978                        avalue[i] = compress_hfa_type(reg, reg, h);
     979                      }
     980                    else
     981                      {
     982                        state.ngrn = N_X_ARG_REG;
     983                        state.nsrn = N_V_ARG_REG;
     984                        avalue[i] = allocate_to_stack(&state, stack,
     985                               ty->alignment, s);
     986                      }
     987                  }
     988                else
     989                  {
     990                    if (state.nsrn + n <= N_V_ARG_REG)
     991                      {
     992                        void *reg = &context->v[state.nsrn];
     993                        state.nsrn += (unsigned int)n;
     994                        avalue[i] = compress_hfa_type(reg, reg, h);
     995                      }
     996                    else
     997                      {
     998                        state.nsrn = N_V_ARG_REG;
     999                        avalue[i] = allocate_to_stack(&state, stack,
    1000                                                     ty->alignment, s);
    1001                      }
    1002                  }
    1003              }
    1004            else if (s > 16)
    1005              {
    1006                /* Replace Composite type of size greater than 16 with a
    1007                    pointer.  */
    1008                avalue[i] = *(void **)
    1009                allocate_int_to_reg_or_stack (context, &state, stack,
    1010                                           sizeof (void *));
    1011              }
    1012            else
    1013              {
    1014                n = (s + 7) / 8;
    1015                if (state.ngrn + n <= N_X_ARG_REG)
    1016                  {
    1017                    avalue[i] = &context->x[state.ngrn];
    1018                    state.ngrn += (unsigned int)n;
    1019                  }
    1020                else
    1021                  {
    1022                    state.ngrn = N_X_ARG_REG;
    1023                    avalue[i] = allocate_to_stack(&state, stack,
    1024                                             ty->alignment, s);
    1025                  }
    1026              }
    1027            break;
    1028  
    1029          default:
    1030            abort();
    1031        }
    1032  
    1033  #if defined (__APPLE__)
    1034        if (i + 1 == cif->aarch64_nfixedargs)
    1035  	{
    1036  	  state.ngrn = N_X_ARG_REG;
    1037  	  state.nsrn = N_V_ARG_REG;
    1038  	  state.allocating_variadic = 1;
    1039  	}
    1040  #endif
    1041      }
    1042  
    1043    if (flags & AARCH64_RET_IN_MEM)
    1044      rvalue = struct_rvalue;
    1045  
    1046    fun (cif, rvalue, avalue, user_data);
    1047  
    1048    return flags;
    1049  }
    1050  
    1051  #if defined(FFI_EXEC_STATIC_TRAMP)
    1052  void *
    1053  ffi_tramp_arch (size_t *tramp_size, size_t *map_size)
    1054  {
    1055    extern void *trampoline_code_table;
    1056  
    1057    *tramp_size = AARCH64_TRAMP_SIZE;
    1058    *map_size = AARCH64_TRAMP_MAP_SIZE;
    1059    return &trampoline_code_table;
    1060  }
    1061  #endif
    1062  
    1063  #endif /* FFI_CLOSURES */
    1064  
    1065  #endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/