(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
arm/
neon-thumb2-move.c
       1  /* { dg-do compile } */
       2  /* { dg-require-effective-target arm_neon_ok } */
       3  /* { dg-require-effective-target arm_thumb2_ok } */
       4  /* { dg-options "-O2 -mthumb" } */
       5  /* { dg-add-options arm_neon } */
       6  /* { dg-prune-output "switch .* conflicts with" } */
       7  
       8  #include <arm_neon.h>
       9  #include <stddef.h>
      10  
      11  void *
      12  memset (DST, C, LENGTH)
      13       void *DST;
      14       int C;
      15       size_t LENGTH;
      16  {
      17    void* DST0 = DST;
      18    unsigned char C_BYTE = C;
      19  
      20  
      21    if (__builtin_expect(LENGTH < 4, 1)) {
      22      size_t i = 0;
      23      while (i < LENGTH) {
      24        ((char*)DST)[i] = C_BYTE;
      25        i++;
      26      }
      27      return DST;
      28    }
      29  
      30    const char* DST_end = (char*)DST + LENGTH;
      31  
      32  
      33    while ((uintptr_t)DST % 4 != 0) {
      34      *(char*) (DST++) = C_BYTE;
      35    }
      36  
      37  
      38    uint32_t C_SHORTWORD = (uint32_t)(unsigned char)(C_BYTE) * 0x01010101;
      39  
      40  
      41    if (__builtin_expect(DST_end - (char*)DST >= 16, 0)) {
      42      while ((uintptr_t)DST % 16 != 0) {
      43        *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
      44        DST += 4;
      45      }
      46  
      47  
      48      uint8x16_t C_WORD = vdupq_n_u8(C_BYTE);
      49  
      50  
      51  
      52  
      53  
      54      size_t i = 0;
      55      LENGTH = DST_end - (char*)DST;
      56      while (i + 16 * 16 <= LENGTH) {
      57        *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
      58        *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
      59        *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
      60        *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
      61        *((uint8x16_t*)((char*)(DST) + (i + 16 * 4))) = C_WORD;
      62        *((uint8x16_t*)((char*)(DST) + (i + 16 * 5))) = C_WORD;
      63        *((uint8x16_t*)((char*)(DST) + (i + 16 * 6))) = C_WORD;
      64        *((uint8x16_t*)((char*)(DST) + (i + 16 * 7))) = C_WORD;
      65        *((uint8x16_t*)((char*)(DST) + (i + 16 * 8))) = C_WORD;
      66        *((uint8x16_t*)((char*)(DST) + (i + 16 * 9))) = C_WORD;
      67        *((uint8x16_t*)((char*)(DST) + (i + 16 * 10))) = C_WORD;
      68        *((uint8x16_t*)((char*)(DST) + (i + 16 * 11))) = C_WORD;
      69        *((uint8x16_t*)((char*)(DST) + (i + 16 * 12))) = C_WORD;
      70        *((uint8x16_t*)((char*)(DST) + (i + 16 * 13))) = C_WORD;
      71        *((uint8x16_t*)((char*)(DST) + (i + 16 * 14))) = C_WORD;
      72        *((uint8x16_t*)((char*)(DST) + (i + 16 * 15))) = C_WORD;
      73        i += 16 * 16;
      74      }
      75      while (i + 16 * 4 <= LENGTH) {
      76        *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
      77        *((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
      78        *((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
      79        *((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
      80        i += 16 * 4;
      81      }
      82      while (i + 16 <= LENGTH) {
      83        *((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
      84        i += 16;
      85      }
      86      DST += i;
      87    }
      88  
      89    while (4 <= DST_end - (char*)DST) {
      90      *((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
      91      DST += 4;
      92    }
      93  
      94  
      95    while ((char*)DST < DST_end) {
      96      *((char*)DST) = C_BYTE;
      97      DST++;
      98    }
      99  
     100    return DST0;
     101  }