(root)/
glibc-2.38/
sysdeps/
i386/
i586/
memcopy.h
       1  /* memcopy.h -- definitions for memory copy functions.  Pentium version.
       2     Copyright (C) 1994-2023 Free Software Foundation, Inc.
       3     This file is part of the GNU C Library.
       4  
       5     The GNU C Library is free software; you can redistribute it and/or
       6     modify it under the terms of the GNU Lesser General Public
       7     License as published by the Free Software Foundation; either
       8     version 2.1 of the License, or (at your option) any later version.
       9  
      10     The GNU C Library is distributed in the hope that it will be useful,
      11     but WITHOUT ANY WARRANTY; without even the implied warranty of
      12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13     Lesser General Public License for more details.
      14  
      15     You should have received a copy of the GNU Lesser General Public
      16     License along with the GNU C Library; if not, see
      17     <https://www.gnu.org/licenses/>.  */
      18  
      19  /* Get the i386 definitions.  We will override some of them below.  */
      20  #include <sysdeps/i386/memcopy.h>
      21  
      22  /* Written like this, the Pentium pipeline can execute the loop at a
      23     sustained rate of 2 instructions/clock, or asymptotically 480
      24     Mbytes/second at 60Mhz.  */
      25  
      26  #undef	WORD_COPY_FWD
      27  #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		\
      28    do									\
      29      {									\
      30        asm volatile ("subl	$32,%2\n"				\
      31  		    "js		2f\n"					\
      32  		    "movl	0(%0),%%edx\n"	/* alloc dest line */	\
      33  		    "1:\n"						\
      34  		    "movl	28(%0),%%eax\n"	/* alloc dest line */	\
      35  		    "subl	$32,%2\n"	/* decr loop count */	\
      36  		    "movl	0(%1),%%eax\n"	/* U pipe */		\
      37  		    "movl	4(%1),%%edx\n"	/* V pipe */		\
      38  		    "movl	%%eax,0(%0)\n"	/* U pipe */		\
      39  		    "movl	%%edx,4(%0)\n"	/* V pipe */		\
      40  		    "movl	8(%1),%%eax\n"				\
      41  		    "movl	12(%1),%%edx\n"				\
      42  		    "movl	%%eax,8(%0)\n"				\
      43  		    "movl	%%edx,12(%0)\n"				\
      44  		    "movl	16(%1),%%eax\n"				\
      45  		    "movl	20(%1),%%edx\n"				\
      46  		    "movl	%%eax,16(%0)\n"				\
      47  		    "movl	%%edx,20(%0)\n"				\
      48  		    "movl	24(%1),%%eax\n"				\
      49  		    "movl	28(%1),%%edx\n"				\
      50  		    "movl	%%eax,24(%0)\n"				\
      51  		    "movl	%%edx,28(%0)\n"				\
      52  		    "leal	32(%1),%1\n"	/* update src ptr */	\
      53  		    "leal	32(%0),%0\n"	/* update dst ptr */	\
      54  		    "jns	1b\n"					\
      55  		    "2: addl	$32,%2" :				\
      56  		    "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) :	\
      57  		    "0" (dst_bp), "1" (src_bp), "2" (nbytes) :		\
      58  		    "ax", "dx");					\
      59      } while (0)
      60  
      61  #undef	WORD_COPY_BWD
      62  #define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes)		\
      63    do									\
      64      {									\
      65        asm volatile ("subl	$32,%2\n"				\
      66  		    "js		2f\n"					\
      67  		    "movl	-4(%0),%%edx\n"				\
      68  		    "1:\n"						\
      69  		    "movl	-32(%0),%%eax\n"			\
      70  		    "subl	$32,%2\n"				\
      71  		    "movl	-4(%1),%%eax\n"				\
      72  		    "movl	-8(%1),%%edx\n"				\
      73  		    "movl	%%eax,-4(%0)\n"				\
      74  		    "movl	%%edx,-8(%0)\n"				\
      75  		    "movl	-12(%1),%%eax\n"			\
      76  		    "movl	-16(%1),%%edx\n"			\
      77  		    "movl	%%eax,-12(%0)\n"			\
      78  		    "movl	%%edx,-16(%0)\n"			\
      79  		    "movl	-20(%1),%%eax\n"			\
      80  		    "movl	-24(%1),%%edx\n"			\
      81  		    "movl	%%eax,-20(%0)\n"			\
      82  		    "movl	%%edx,-24(%0)\n"			\
      83  		    "movl	-28(%1),%%eax\n"			\
      84  		    "movl	-32(%1),%%edx\n"			\
      85  		    "movl	%%eax,-28(%0)\n"			\
      86  		    "movl	%%edx,-32(%0)\n"			\
      87  		    "leal	-32(%1),%1\n"				\
      88  		    "leal	-32(%0),%0\n"				\
      89  		    "jns	1b\n"					\
      90  		    "2: addl	$32,%2" :				\
      91  		    "=r" (dst_ep), "=r" (src_ep), "=r" (nbytes_left) :	\
      92  		    "0" (dst_ep), "1" (src_ep), "2" (nbytes) :		\
      93  		    "ax", "dx");					\
      94      } while (0)