(root)/
glibc-2.38/
sysdeps/
alpha/
memchr.c
       1  /* Copyright (C) 2010-2023 Free Software Foundation, Inc.
       2     This file is part of the GNU C Library.
       3  
       4     The GNU C Library is free software; you can redistribute it and/or
       5     modify it under the terms of the GNU Lesser General Public
       6     License as published by the Free Software Foundation; either
       7     version 2.1 of the License, or (at your option) any later version.
       8  
       9     The GNU C Library is distributed in the hope that it will be useful,
      10     but WITHOUT ANY WARRANTY; without even the implied warranty of
      11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12     Lesser General Public License for more details.
      13  
      14     You should have received a copy of the GNU Lesser General Public
      15     License along with the GNU C Library.  If not, see
      16     <https://www.gnu.org/licenses/>.  */
      17  
      18  #include <string.h>
      19  
      20  typedef unsigned long word;
      21  
      22  static inline word
      23  ldq_u(const void *s)
      24  {
      25    return *(const word *)((word)s & -8);
      26  }
      27  
      28  #define unlikely(X)	__builtin_expect ((X), 0)
      29  #define prefetch(X)	__builtin_prefetch ((void *)(X), 0)
      30  
      31  #define cmpbeq0(X)	__builtin_alpha_cmpbge(0, (X))
      32  #define find(X, Y)	cmpbeq0 ((X) ^ (Y))
      33  
      34  /* Search no more than N bytes of S for C.  */
      35  
      36  void *
      37  __memchr (const void *s, int xc, size_t n)
      38  {
      39    const word *s_align;
      40    word t, current, found, mask, offset;
      41  
      42    if (unlikely (n == 0))
      43      return 0;
      44  
      45    current = ldq_u (s);
      46  
      47    /* Replicate low byte of XC into all bytes of C.  */
      48    t = xc & 0xff;			/* 0000000c */
      49    t = (t << 8) | t;			/* 000000cc */
      50    t = (t << 16) | t;			/* 0000cccc */
      51    const word c = (t << 32) | t;		/* cccccccc */
      52  
      53    /* Align the source, and decrement the count by the number
      54       of bytes searched in the first word.  */
      55    s_align = (const word *)((word)s & -8);
      56    {
      57      size_t inc = n + ((word)s & 7);
      58      n = inc | -(inc < n);
      59    }
      60  
      61    /* Deal with misalignment in the first word for the comparison.  */
      62    mask = (1ul << ((word)s & 7)) - 1;
      63  
      64    /* If the entire string fits within one word, we may need masking
      65       at both the front and the back of the string.  */
      66    if (unlikely (n <= 8))
      67      {
      68        mask |= -1ul << n;
      69        goto last_quad;
      70      }
      71  
      72    found = find (current, c) & ~mask;
      73    if (unlikely (found))
      74      goto found_it;
      75  
      76    s_align++;
      77    n -= 8;
      78  
      79    /* If the block is sufficiently large, align to cacheline and prefetch.  */
      80    if (unlikely (n >= 256))
      81      {
      82        /* Prefetch 3 cache lines beyond the one we're working on.  */
      83        prefetch (s_align + 8);
      84        prefetch (s_align + 16);
      85        prefetch (s_align + 24);
      86  
      87        while ((word)s_align & 63)
      88  	{
      89  	  current = *s_align;
      90  	  found = find (current, c);
      91  	  if (found)
      92  	    goto found_it;
      93  	  s_align++;
      94  	  n -= 8;
      95  	}
      96  
      97  	/* Within each cacheline, advance the load for the next word
      98  	   before the test for the previous word is complete.  This
      99  	   allows us to hide the 3 cycle L1 cache load latency.  We
     100  	   only perform this advance load within a cacheline to prevent
     101  	   reading across page boundary.  */
     102  #define CACHELINE_LOOP				\
     103  	do {					\
     104  	  word i, next = s_align[0];		\
     105  	  for (i = 0; i < 7; ++i)		\
     106  	    {					\
     107  	      current = next;			\
     108  	      next = s_align[1];		\
     109  	      found = find (current, c);	\
     110  	      if (unlikely (found))		\
     111  		goto found_it;			\
     112  	      s_align++;			\
     113  	    }					\
     114  	  current = next;			\
     115  	  found = find (current, c);		\
     116  	  if (unlikely (found))			\
     117  	    goto found_it;			\
     118  	  s_align++;				\
     119  	  n -= 64;				\
     120  	} while (0)
     121  
     122        /* While there's still lots more data to potentially be read,
     123  	 continue issuing prefetches for the 4th cacheline out.  */
     124        while (n >= 256)
     125  	{
     126  	  prefetch (s_align + 24);
     127  	  CACHELINE_LOOP;
     128  	}
     129  
     130        /* Up to 3 cache lines remaining.  Continue issuing advanced
     131  	 loads, but stop prefetching.  */
     132        while (n >= 64)
     133  	CACHELINE_LOOP;
     134  
     135        /* We may have exhausted the buffer.  */
     136        if (n == 0)
     137  	return NULL;
     138      }
     139  
     140    /* Quadword aligned loop.  */
     141    current = *s_align;
     142    while (n > 8)
     143      {
     144        found = find (current, c);
     145        if (unlikely (found))
     146  	goto found_it;
     147        current = *++s_align;
     148        n -= 8;
     149      }
     150  
     151    /* The last word may need masking at the tail of the compare.  */
     152    mask = -1ul << n;
     153   last_quad:
     154    found = find (current, c) & ~mask;
     155    if (found == 0)
     156      return NULL;
     157  
     158   found_it:
     159  #ifdef __alpha_cix__
     160    offset = __builtin_alpha_cttz (found);
     161  #else
     162    /* Extract LSB.  */
     163    found &= -found;
     164  
     165    /* Binary search for the LSB.  */
     166    offset  = (found & 0x0f ? 0 : 4);
     167    offset += (found & 0x33 ? 0 : 2);
     168    offset += (found & 0x55 ? 0 : 1);
     169  #endif
     170  
     171    return (void *)((word)s_align + offset);
     172  }
     173  
     174  #ifdef weak_alias
     175  weak_alias (__memchr, memchr)
     176  #endif
     177  libc_hidden_builtin_def (memchr)