(root)/
gcc-13.2.0/
zlib/
contrib/
inflate86/
inffas86.c
       1  /* inffas86.c is a hand tuned assembler version of
       2   *
       3   * inffast.c -- fast decoding
       4   * Copyright (C) 1995-2003 Mark Adler
       5   * For conditions of distribution and use, see copyright notice in zlib.h
       6   *
       7   * Copyright (C) 2003 Chris Anderson <christop@charm.net>
       8   * Please use the copyright conditions above.
       9   *
      10   * Dec-29-2003 -- I added AMD64 inflate asm support.  This version is also
      11   * slightly quicker on x86 systems because, instead of using rep movsb to copy
      12   * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
      13   * bytes.  I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
      14   * from http://fedora.linux.duke.edu/fc1_x86_64
      15   * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
      16   * 1GB ram.  The 64-bit version is about 4% faster than the 32-bit version,
      17   * when decompressing mozilla-source-1.3.tar.gz.
      18   *
      19   * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
      20   * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
      21   * the moment.  I have successfully compiled and tested this code with gcc2.96,
      22   * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
      23   * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
      24   * enabled.  I will attempt to merge the MMX code into this version.  Newer
      25   * versions of this and inffast.S can be found at
      26   * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
      27   */
      28  
      29  #include "zutil.h"
      30  #include "inftrees.h"
      31  #include "inflate.h"
      32  #include "inffast.h"
      33  
      34  /* Mark Adler's comments from inffast.c: */
      35  
      36  /*
      37     Decode literal, length, and distance codes and write out the resulting
      38     literal and match bytes until either not enough input or output is
      39     available, an end-of-block is encountered, or a data error is encountered.
      40     When large enough input and output buffers are supplied to inflate(), for
      41     example, a 16K input buffer and a 64K output buffer, more than 95% of the
      42     inflate execution time is spent in this routine.
      43  
      44     Entry assumptions:
      45  
      46          state->mode == LEN
      47          strm->avail_in >= 6
      48          strm->avail_out >= 258
      49          start >= strm->avail_out
      50          state->bits < 8
      51  
      52     On return, state->mode is one of:
      53  
      54          LEN -- ran out of enough output space or enough available input
      55          TYPE -- reached end of block code, inflate() to interpret next block
      56          BAD -- error in block data
      57  
      58     Notes:
      59  
      60      - The maximum input bits used by a length/distance pair is 15 bits for the
      61        length code, 5 bits for the length extra, 15 bits for the distance code,
      62        and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
      63        Therefore if strm->avail_in >= 6, then there is enough input to avoid
      64        checking for available input while decoding.
      65  
      66      - The maximum bytes that a single length/distance pair can output is 258
      67        bytes, which is the maximum length that can be coded.  inflate_fast()
      68        requires strm->avail_out >= 258 for each loop to avoid checking for
      69        output space.
      70   */
      71  void inflate_fast(strm, start)
      72  z_streamp strm;
      73  unsigned start;         /* inflate()'s starting value for strm->avail_out */
      74  {
      75      struct inflate_state FAR *state;
      76      struct inffast_ar {
      77  /* 64   32                               x86  x86_64 */
      78  /* ar offset                              register */
      79  /*  0    0 */ void *esp;                /* esp save */
      80  /*  8    4 */ void *ebp;                /* ebp save */
      81  /* 16    8 */ unsigned char FAR *in;    /* esi rsi  local strm->next_in */
      82  /* 24   12 */ unsigned char FAR *last;  /*     r9   while in < last */
      83  /* 32   16 */ unsigned char FAR *out;   /* edi rdi  local strm->next_out */
      84  /* 40   20 */ unsigned char FAR *beg;   /*          inflate()'s init next_out */
      85  /* 48   24 */ unsigned char FAR *end;   /*     r10  while out < end */
      86  /* 56   28 */ unsigned char FAR *window;/*          size of window, wsize!=0 */
      87  /* 64   32 */ code const FAR *lcode;    /* ebp rbp  local strm->lencode */
      88  /* 72   36 */ code const FAR *dcode;    /*     r11  local strm->distcode */
      89  /* 80   40 */ unsigned long hold;       /* edx rdx  local strm->hold */
      90  /* 88   44 */ unsigned bits;            /* ebx rbx  local strm->bits */
      91  /* 92   48 */ unsigned wsize;           /*          window size */
      92  /* 96   52 */ unsigned write;           /*          window write index */
      93  /*100   56 */ unsigned lmask;           /*     r12  mask for lcode */
      94  /*104   60 */ unsigned dmask;           /*     r13  mask for dcode */
      95  /*108   64 */ unsigned len;             /*     r14  match length */
      96  /*112   68 */ unsigned dist;            /*     r15  match distance */
      97  /*116   72 */ unsigned status;          /*          set when state chng*/
      98      } ar;
      99  
     100  #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
     101  #define PAD_AVAIL_IN 6
     102  #define PAD_AVAIL_OUT 258
     103  #else
     104  #define PAD_AVAIL_IN 5
     105  #define PAD_AVAIL_OUT 257
     106  #endif
     107  
     108      /* copy state to local variables */
     109      state = (struct inflate_state FAR *)strm->state;
     110      ar.in = strm->next_in;
     111      ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
     112      ar.out = strm->next_out;
     113      ar.beg = ar.out - (start - strm->avail_out);
     114      ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
     115      ar.wsize = state->wsize;
     116      ar.write = state->wnext;
     117      ar.window = state->window;
     118      ar.hold = state->hold;
     119      ar.bits = state->bits;
     120      ar.lcode = state->lencode;
     121      ar.dcode = state->distcode;
     122      ar.lmask = (1U << state->lenbits) - 1;
     123      ar.dmask = (1U << state->distbits) - 1;
     124  
     125      /* decode literals and length/distances until end-of-block or not enough
     126         input data or output space */
     127  
     128      /* align in on 1/2 hold size boundary */
     129      while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
     130          ar.hold += (unsigned long)*ar.in++ << ar.bits;
     131          ar.bits += 8;
     132      }
     133  
     134  #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
     135      __asm__ __volatile__ (
     136  "        leaq    %0, %%rax\n"
     137  "        movq    %%rbp, 8(%%rax)\n"       /* save regs rbp and rsp */
     138  "        movq    %%rsp, (%%rax)\n"
     139  "        movq    %%rax, %%rsp\n"          /* make rsp point to &ar */
     140  "        movq    16(%%rsp), %%rsi\n"      /* rsi  = in */
     141  "        movq    32(%%rsp), %%rdi\n"      /* rdi  = out */
     142  "        movq    24(%%rsp), %%r9\n"       /* r9   = last */
     143  "        movq    48(%%rsp), %%r10\n"      /* r10  = end */
     144  "        movq    64(%%rsp), %%rbp\n"      /* rbp  = lcode */
     145  "        movq    72(%%rsp), %%r11\n"      /* r11  = dcode */
     146  "        movq    80(%%rsp), %%rdx\n"      /* rdx  = hold */
     147  "        movl    88(%%rsp), %%ebx\n"      /* ebx  = bits */
     148  "        movl    100(%%rsp), %%r12d\n"    /* r12d = lmask */
     149  "        movl    104(%%rsp), %%r13d\n"    /* r13d = dmask */
     150                                            /* r14d = len */
     151                                            /* r15d = dist */
     152  "        cld\n"
     153  "        cmpq    %%rdi, %%r10\n"
     154  "        je      .L_one_time\n"           /* if only one decode left */
     155  "        cmpq    %%rsi, %%r9\n"
     156  "        je      .L_one_time\n"
     157  "        jmp     .L_do_loop\n"
     158  
     159  ".L_one_time:\n"
     160  "        movq    %%r12, %%r8\n"           /* r8 = lmask */
     161  "        cmpb    $32, %%bl\n"
     162  "        ja      .L_get_length_code_one_time\n"
     163  
     164  "        lodsl\n"                         /* eax = *(uint *)in++ */
     165  "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
     166  "        addb    $32, %%bl\n"             /* bits += 32 */
     167  "        shlq    %%cl, %%rax\n"
     168  "        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
     169  "        jmp     .L_get_length_code_one_time\n"
     170  
     171  ".align 32,0x90\n"
     172  ".L_while_test:\n"
     173  "        cmpq    %%rdi, %%r10\n"
     174  "        jbe     .L_break_loop\n"
     175  "        cmpq    %%rsi, %%r9\n"
     176  "        jbe     .L_break_loop\n"
     177  
     178  ".L_do_loop:\n"
     179  "        movq    %%r12, %%r8\n"           /* r8 = lmask */
     180  "        cmpb    $32, %%bl\n"
     181  "        ja      .L_get_length_code\n"    /* if (32 < bits) */
     182  
     183  "        lodsl\n"                         /* eax = *(uint *)in++ */
     184  "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
     185  "        addb    $32, %%bl\n"             /* bits += 32 */
     186  "        shlq    %%cl, %%rax\n"
     187  "        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
     188  
     189  ".L_get_length_code:\n"
     190  "        andq    %%rdx, %%r8\n"            /* r8 &= hold */
     191  "        movl    (%%rbp,%%r8,4), %%eax\n"  /* eax = lcode[hold & lmask] */
     192  
     193  "        movb    %%ah, %%cl\n"            /* cl = this.bits */
     194  "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
     195  "        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
     196  
     197  "        testb   %%al, %%al\n"
     198  "        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
     199  
     200  "        movq    %%r12, %%r8\n"            /* r8 = lmask */
     201  "        shrl    $16, %%eax\n"            /* output this.val char */
     202  "        stosb\n"
     203  
     204  ".L_get_length_code_one_time:\n"
     205  "        andq    %%rdx, %%r8\n"            /* r8 &= hold */
     206  "        movl    (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
     207  
     208  ".L_dolen:\n"
     209  "        movb    %%ah, %%cl\n"            /* cl = this.bits */
     210  "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
     211  "        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
     212  
     213  "        testb   %%al, %%al\n"
     214  "        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
     215  
     216  "        shrl    $16, %%eax\n"            /* output this.val char */
     217  "        stosb\n"
     218  "        jmp     .L_while_test\n"
     219  
     220  ".align 32,0x90\n"
     221  ".L_test_for_length_base:\n"
     222  "        movl    %%eax, %%r14d\n"         /* len = this */
     223  "        shrl    $16, %%r14d\n"           /* len = this.val */
     224  "        movb    %%al, %%cl\n"
     225  
     226  "        testb   $16, %%al\n"
     227  "        jz      .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
     228  "        andb    $15, %%cl\n"             /* op &= 15 */
     229  "        jz      .L_decode_distance\n"    /* if (!op) */
     230  
     231  ".L_add_bits_to_len:\n"
     232  "        subb    %%cl, %%bl\n"
     233  "        xorl    %%eax, %%eax\n"
     234  "        incl    %%eax\n"
     235  "        shll    %%cl, %%eax\n"
     236  "        decl    %%eax\n"
     237  "        andl    %%edx, %%eax\n"          /* eax &= hold */
     238  "        shrq    %%cl, %%rdx\n"
     239  "        addl    %%eax, %%r14d\n"         /* len += hold & mask[op] */
     240  
     241  ".L_decode_distance:\n"
     242  "        movq    %%r13, %%r8\n"           /* r8 = dmask */
     243  "        cmpb    $32, %%bl\n"
     244  "        ja      .L_get_distance_code\n"  /* if (32 < bits) */
     245  
     246  "        lodsl\n"                         /* eax = *(uint *)in++ */
     247  "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
     248  "        addb    $32, %%bl\n"             /* bits += 32 */
     249  "        shlq    %%cl, %%rax\n"
     250  "        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
     251  
     252  ".L_get_distance_code:\n"
     253  "        andq    %%rdx, %%r8\n"           /* r8 &= hold */
     254  "        movl    (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
     255  
     256  ".L_dodist:\n"
     257  "        movl    %%eax, %%r15d\n"         /* dist = this */
     258  "        shrl    $16, %%r15d\n"           /* dist = this.val */
     259  "        movb    %%ah, %%cl\n"
     260  "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
     261  "        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
     262  "        movb    %%al, %%cl\n"            /* cl = this.op */
     263  
     264  "        testb   $16, %%al\n"             /* if ((op & 16) == 0) */
     265  "        jz      .L_test_for_second_level_dist\n"
     266  "        andb    $15, %%cl\n"             /* op &= 15 */
     267  "        jz      .L_check_dist_one\n"
     268  
     269  ".L_add_bits_to_dist:\n"
     270  "        subb    %%cl, %%bl\n"
     271  "        xorl    %%eax, %%eax\n"
     272  "        incl    %%eax\n"
     273  "        shll    %%cl, %%eax\n"
     274  "        decl    %%eax\n"                 /* (1 << op) - 1 */
     275  "        andl    %%edx, %%eax\n"          /* eax &= hold */
     276  "        shrq    %%cl, %%rdx\n"
     277  "        addl    %%eax, %%r15d\n"         /* dist += hold & ((1 << op) - 1) */
     278  
     279  ".L_check_window:\n"
     280  "        movq    %%rsi, %%r8\n"           /* save in so from can use it's reg */
     281  "        movq    %%rdi, %%rax\n"
     282  "        subq    40(%%rsp), %%rax\n"      /* nbytes = out - beg */
     283  
     284  "        cmpl    %%r15d, %%eax\n"
     285  "        jb      .L_clip_window\n"        /* if (dist > nbytes) 4.2% */
     286  
     287  "        movl    %%r14d, %%ecx\n"         /* ecx = len */
     288  "        movq    %%rdi, %%rsi\n"
     289  "        subq    %%r15, %%rsi\n"          /* from = out - dist */
     290  
     291  "        sarl    %%ecx\n"
     292  "        jnc     .L_copy_two\n"           /* if len % 2 == 0 */
     293  
     294  "        rep     movsw\n"
     295  "        movb    (%%rsi), %%al\n"
     296  "        movb    %%al, (%%rdi)\n"
     297  "        incq    %%rdi\n"
     298  
     299  "        movq    %%r8, %%rsi\n"           /* move in back to %rsi, toss from */
     300  "        jmp     .L_while_test\n"
     301  
     302  ".L_copy_two:\n"
     303  "        rep     movsw\n"
     304  "        movq    %%r8, %%rsi\n"           /* move in back to %rsi, toss from */
     305  "        jmp     .L_while_test\n"
     306  
     307  ".align 32,0x90\n"
     308  ".L_check_dist_one:\n"
     309  "        cmpl    $1, %%r15d\n"            /* if dist 1, is a memset */
     310  "        jne     .L_check_window\n"
     311  "        cmpq    %%rdi, 40(%%rsp)\n"      /* if out == beg, outside window */
     312  "        je      .L_check_window\n"
     313  
     314  "        movl    %%r14d, %%ecx\n"         /* ecx = len */
     315  "        movb    -1(%%rdi), %%al\n"
     316  "        movb    %%al, %%ah\n"
     317  
     318  "        sarl    %%ecx\n"
     319  "        jnc     .L_set_two\n"
     320  "        movb    %%al, (%%rdi)\n"
     321  "        incq    %%rdi\n"
     322  
     323  ".L_set_two:\n"
     324  "        rep     stosw\n"
     325  "        jmp     .L_while_test\n"
     326  
     327  ".align 32,0x90\n"
     328  ".L_test_for_second_level_length:\n"
     329  "        testb   $64, %%al\n"
     330  "        jnz     .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
     331  
     332  "        xorl    %%eax, %%eax\n"
     333  "        incl    %%eax\n"
     334  "        shll    %%cl, %%eax\n"
     335  "        decl    %%eax\n"
     336  "        andl    %%edx, %%eax\n"         /* eax &= hold */
     337  "        addl    %%r14d, %%eax\n"        /* eax += len */
     338  "        movl    (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
     339  "        jmp     .L_dolen\n"
     340  
     341  ".align 32,0x90\n"
     342  ".L_test_for_second_level_dist:\n"
     343  "        testb   $64, %%al\n"
     344  "        jnz     .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
     345  
     346  "        xorl    %%eax, %%eax\n"
     347  "        incl    %%eax\n"
     348  "        shll    %%cl, %%eax\n"
     349  "        decl    %%eax\n"
     350  "        andl    %%edx, %%eax\n"         /* eax &= hold */
     351  "        addl    %%r15d, %%eax\n"        /* eax += dist */
     352  "        movl    (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
     353  "        jmp     .L_dodist\n"
     354  
     355  ".align 32,0x90\n"
     356  ".L_clip_window:\n"
     357  "        movl    %%eax, %%ecx\n"         /* ecx = nbytes */
     358  "        movl    92(%%rsp), %%eax\n"     /* eax = wsize, prepare for dist cmp */
     359  "        negl    %%ecx\n"                /* nbytes = -nbytes */
     360  
     361  "        cmpl    %%r15d, %%eax\n"
     362  "        jb      .L_invalid_distance_too_far\n" /* if (dist > wsize) */
     363  
     364  "        addl    %%r15d, %%ecx\n"         /* nbytes = dist - nbytes */
     365  "        cmpl    $0, 96(%%rsp)\n"
     366  "        jne     .L_wrap_around_window\n" /* if (write != 0) */
     367  
     368  "        movq    56(%%rsp), %%rsi\n"     /* from  = window */
     369  "        subl    %%ecx, %%eax\n"         /* eax  -= nbytes */
     370  "        addq    %%rax, %%rsi\n"         /* from += wsize - nbytes */
     371  
     372  "        movl    %%r14d, %%eax\n"        /* eax = len */
     373  "        cmpl    %%ecx, %%r14d\n"
     374  "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
     375  
     376  "        subl    %%ecx, %%eax\n"         /* eax -= nbytes */
     377  "        rep     movsb\n"
     378  "        movq    %%rdi, %%rsi\n"
     379  "        subq    %%r15, %%rsi\n"         /* from = &out[ -dist ] */
     380  "        jmp     .L_do_copy\n"
     381  
     382  ".align 32,0x90\n"
     383  ".L_wrap_around_window:\n"
     384  "        movl    96(%%rsp), %%eax\n"     /* eax = write */
     385  "        cmpl    %%eax, %%ecx\n"
     386  "        jbe     .L_contiguous_in_window\n" /* if (write >= nbytes) */
     387  
     388  "        movl    92(%%rsp), %%esi\n"     /* from  = wsize */
     389  "        addq    56(%%rsp), %%rsi\n"     /* from += window */
     390  "        addq    %%rax, %%rsi\n"         /* from += write */
     391  "        subq    %%rcx, %%rsi\n"         /* from -= nbytes */
     392  "        subl    %%eax, %%ecx\n"         /* nbytes -= write */
     393  
     394  "        movl    %%r14d, %%eax\n"        /* eax = len */
     395  "        cmpl    %%ecx, %%eax\n"
     396  "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
     397  
     398  "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
     399  "        rep     movsb\n"
     400  "        movq    56(%%rsp), %%rsi\n"     /* from = window */
     401  "        movl    96(%%rsp), %%ecx\n"     /* nbytes = write */
     402  "        cmpl    %%ecx, %%eax\n"
     403  "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
     404  
     405  "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
     406  "        rep     movsb\n"
     407  "        movq    %%rdi, %%rsi\n"
     408  "        subq    %%r15, %%rsi\n"         /* from = out - dist */
     409  "        jmp     .L_do_copy\n"
     410  
     411  ".align 32,0x90\n"
     412  ".L_contiguous_in_window:\n"
     413  "        movq    56(%%rsp), %%rsi\n"     /* rsi = window */
     414  "        addq    %%rax, %%rsi\n"
     415  "        subq    %%rcx, %%rsi\n"         /* from += write - nbytes */
     416  
     417  "        movl    %%r14d, %%eax\n"        /* eax = len */
     418  "        cmpl    %%ecx, %%eax\n"
     419  "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
     420  
     421  "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
     422  "        rep     movsb\n"
     423  "        movq    %%rdi, %%rsi\n"
     424  "        subq    %%r15, %%rsi\n"         /* from = out - dist */
     425  "        jmp     .L_do_copy\n"           /* if (nbytes >= len) */
     426  
     427  ".align 32,0x90\n"
     428  ".L_do_copy:\n"
     429  "        movl    %%eax, %%ecx\n"         /* ecx = len */
     430  "        rep     movsb\n"
     431  
     432  "        movq    %%r8, %%rsi\n"          /* move in back to %esi, toss from */
     433  "        jmp     .L_while_test\n"
     434  
     435  ".L_test_for_end_of_block:\n"
     436  "        testb   $32, %%al\n"
     437  "        jz      .L_invalid_literal_length_code\n"
     438  "        movl    $1, 116(%%rsp)\n"
     439  "        jmp     .L_break_loop_with_status\n"
     440  
     441  ".L_invalid_literal_length_code:\n"
     442  "        movl    $2, 116(%%rsp)\n"
     443  "        jmp     .L_break_loop_with_status\n"
     444  
     445  ".L_invalid_distance_code:\n"
     446  "        movl    $3, 116(%%rsp)\n"
     447  "        jmp     .L_break_loop_with_status\n"
     448  
     449  ".L_invalid_distance_too_far:\n"
     450  "        movl    $4, 116(%%rsp)\n"
     451  "        jmp     .L_break_loop_with_status\n"
     452  
     453  ".L_break_loop:\n"
     454  "        movl    $0, 116(%%rsp)\n"
     455  
     456  ".L_break_loop_with_status:\n"
     457  /* put in, out, bits, and hold back into ar and pop esp */
     458  "        movq    %%rsi, 16(%%rsp)\n"     /* in */
     459  "        movq    %%rdi, 32(%%rsp)\n"     /* out */
     460  "        movl    %%ebx, 88(%%rsp)\n"     /* bits */
     461  "        movq    %%rdx, 80(%%rsp)\n"     /* hold */
     462  "        movq    (%%rsp), %%rax\n"       /* restore rbp and rsp */
     463  "        movq    8(%%rsp), %%rbp\n"
     464  "        movq    %%rax, %%rsp\n"
     465            :
     466            : "m" (ar)
     467            : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
     468              "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
     469      );
     470  #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
     471      __asm__ __volatile__ (
     472  "        leal    %0, %%eax\n"
     473  "        movl    %%esp, (%%eax)\n"        /* save esp, ebp */
     474  "        movl    %%ebp, 4(%%eax)\n"
     475  "        movl    %%eax, %%esp\n"
     476  "        movl    8(%%esp), %%esi\n"       /* esi = in */
     477  "        movl    16(%%esp), %%edi\n"      /* edi = out */
     478  "        movl    40(%%esp), %%edx\n"      /* edx = hold */
     479  "        movl    44(%%esp), %%ebx\n"      /* ebx = bits */
     480  "        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
     481  
     482  "        cld\n"
     483  "        jmp     .L_do_loop\n"
     484  
     485  ".align 32,0x90\n"
     486  ".L_while_test:\n"
     487  "        cmpl    %%edi, 24(%%esp)\n"      /* out < end */
     488  "        jbe     .L_break_loop\n"
     489  "        cmpl    %%esi, 12(%%esp)\n"      /* in < last */
     490  "        jbe     .L_break_loop\n"
     491  
     492  ".L_do_loop:\n"
     493  "        cmpb    $15, %%bl\n"
     494  "        ja      .L_get_length_code\n"    /* if (15 < bits) */
     495  
     496  "        xorl    %%eax, %%eax\n"
     497  "        lodsw\n"                         /* al = *(ushort *)in++ */
     498  "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
     499  "        addb    $16, %%bl\n"             /* bits += 16 */
     500  "        shll    %%cl, %%eax\n"
     501  "        orl     %%eax, %%edx\n"        /* hold |= *((ushort *)in)++ << bits */
     502  
     503  ".L_get_length_code:\n"
     504  "        movl    56(%%esp), %%eax\n"      /* eax = lmask */
     505  "        andl    %%edx, %%eax\n"          /* eax &= hold */
     506  "        movl    (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
     507  
     508  ".L_dolen:\n"
     509  "        movb    %%ah, %%cl\n"            /* cl = this.bits */
     510  "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
     511  "        shrl    %%cl, %%edx\n"           /* hold >>= this.bits */
     512  
     513  "        testb   %%al, %%al\n"
     514  "        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
     515  
     516  "        shrl    $16, %%eax\n"            /* output this.val char */
     517  "        stosb\n"
     518  "        jmp     .L_while_test\n"
     519  
     520  ".align 32,0x90\n"
     521  ".L_test_for_length_base:\n"
     522  "        movl    %%eax, %%ecx\n"          /* len = this */
     523  "        shrl    $16, %%ecx\n"            /* len = this.val */
     524  "        movl    %%ecx, 64(%%esp)\n"      /* save len */
     525  "        movb    %%al, %%cl\n"
     526  
     527  "        testb   $16, %%al\n"
     528  "        jz      .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
     529  "        andb    $15, %%cl\n"             /* op &= 15 */
     530  "        jz      .L_decode_distance\n"    /* if (!op) */
     531  "        cmpb    %%cl, %%bl\n"
     532  "        jae     .L_add_bits_to_len\n"    /* if (op <= bits) */
     533  
     534  "        movb    %%cl, %%ch\n"            /* stash op in ch, freeing cl */
     535  "        xorl    %%eax, %%eax\n"
     536  "        lodsw\n"                         /* al = *(ushort *)in++ */
     537  "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
     538  "        addb    $16, %%bl\n"             /* bits += 16 */
     539  "        shll    %%cl, %%eax\n"
     540  "        orl     %%eax, %%edx\n"         /* hold |= *((ushort *)in)++ << bits */
     541  "        movb    %%ch, %%cl\n"            /* move op back to ecx */
     542  
     543  ".L_add_bits_to_len:\n"
     544  "        subb    %%cl, %%bl\n"
     545  "        xorl    %%eax, %%eax\n"
     546  "        incl    %%eax\n"
     547  "        shll    %%cl, %%eax\n"
     548  "        decl    %%eax\n"
     549  "        andl    %%edx, %%eax\n"          /* eax &= hold */
     550  "        shrl    %%cl, %%edx\n"
     551  "        addl    %%eax, 64(%%esp)\n"      /* len += hold & mask[op] */
     552  
     553  ".L_decode_distance:\n"
     554  "        cmpb    $15, %%bl\n"
     555  "        ja      .L_get_distance_code\n"  /* if (15 < bits) */
     556  
     557  "        xorl    %%eax, %%eax\n"
     558  "        lodsw\n"                         /* al = *(ushort *)in++ */
     559  "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
     560  "        addb    $16, %%bl\n"             /* bits += 16 */
     561  "        shll    %%cl, %%eax\n"
     562  "        orl     %%eax, %%edx\n"         /* hold |= *((ushort *)in)++ << bits */
     563  
     564  ".L_get_distance_code:\n"
     565  "        movl    60(%%esp), %%eax\n"      /* eax = dmask */
     566  "        movl    36(%%esp), %%ecx\n"      /* ecx = dcode */
     567  "        andl    %%edx, %%eax\n"          /* eax &= hold */
     568  "        movl    (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
     569  
     570  ".L_dodist:\n"
     571  "        movl    %%eax, %%ebp\n"          /* dist = this */
     572  "        shrl    $16, %%ebp\n"            /* dist = this.val */
     573  "        movb    %%ah, %%cl\n"
     574  "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
     575  "        shrl    %%cl, %%edx\n"           /* hold >>= this.bits */
     576  "        movb    %%al, %%cl\n"            /* cl = this.op */
     577  
     578  "        testb   $16, %%al\n"             /* if ((op & 16) == 0) */
     579  "        jz      .L_test_for_second_level_dist\n"
     580  "        andb    $15, %%cl\n"             /* op &= 15 */
     581  "        jz      .L_check_dist_one\n"
     582  "        cmpb    %%cl, %%bl\n"
     583  "        jae     .L_add_bits_to_dist\n"   /* if (op <= bits) 97.6% */
     584  
     585  "        movb    %%cl, %%ch\n"            /* stash op in ch, freeing cl */
     586  "        xorl    %%eax, %%eax\n"
     587  "        lodsw\n"                         /* al = *(ushort *)in++ */
     588  "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
     589  "        addb    $16, %%bl\n"             /* bits += 16 */
     590  "        shll    %%cl, %%eax\n"
     591  "        orl     %%eax, %%edx\n"        /* hold |= *((ushort *)in)++ << bits */
     592  "        movb    %%ch, %%cl\n"            /* move op back to ecx */
     593  
     594  ".L_add_bits_to_dist:\n"
     595  "        subb    %%cl, %%bl\n"
     596  "        xorl    %%eax, %%eax\n"
     597  "        incl    %%eax\n"
     598  "        shll    %%cl, %%eax\n"
     599  "        decl    %%eax\n"                 /* (1 << op) - 1 */
     600  "        andl    %%edx, %%eax\n"          /* eax &= hold */
     601  "        shrl    %%cl, %%edx\n"
     602  "        addl    %%eax, %%ebp\n"          /* dist += hold & ((1 << op) - 1) */
     603  
     604  ".L_check_window:\n"
     605  "        movl    %%esi, 8(%%esp)\n"       /* save in so from can use it's reg */
     606  "        movl    %%edi, %%eax\n"
     607  "        subl    20(%%esp), %%eax\n"      /* nbytes = out - beg */
     608  
     609  "        cmpl    %%ebp, %%eax\n"
     610  "        jb      .L_clip_window\n"        /* if (dist > nbytes) 4.2% */
     611  
     612  "        movl    64(%%esp), %%ecx\n"      /* ecx = len */
     613  "        movl    %%edi, %%esi\n"
     614  "        subl    %%ebp, %%esi\n"          /* from = out - dist */
     615  
     616  "        sarl    %%ecx\n"
     617  "        jnc     .L_copy_two\n"           /* if len % 2 == 0 */
     618  
     619  "        rep     movsw\n"
     620  "        movb    (%%esi), %%al\n"
     621  "        movb    %%al, (%%edi)\n"
     622  "        incl    %%edi\n"
     623  
     624  "        movl    8(%%esp), %%esi\n"       /* move in back to %esi, toss from */
     625  "        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
     626  "        jmp     .L_while_test\n"
     627  
     628  ".L_copy_two:\n"
     629  "        rep     movsw\n"
     630  "        movl    8(%%esp), %%esi\n"       /* move in back to %esi, toss from */
     631  "        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
     632  "        jmp     .L_while_test\n"
     633  
     634  ".align 32,0x90\n"
     635  ".L_check_dist_one:\n"
     636  "        cmpl    $1, %%ebp\n"            /* if dist 1, is a memset */
     637  "        jne     .L_check_window\n"
     638  "        cmpl    %%edi, 20(%%esp)\n"
     639  "        je      .L_check_window\n"      /* out == beg, if outside window */
     640  
     641  "        movl    64(%%esp), %%ecx\n"      /* ecx = len */
     642  "        movb    -1(%%edi), %%al\n"
     643  "        movb    %%al, %%ah\n"
     644  
     645  "        sarl    %%ecx\n"
     646  "        jnc     .L_set_two\n"
     647  "        movb    %%al, (%%edi)\n"
     648  "        incl    %%edi\n"
     649  
     650  ".L_set_two:\n"
     651  "        rep     stosw\n"
     652  "        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
     653  "        jmp     .L_while_test\n"
     654  
     655  ".align 32,0x90\n"
     656  ".L_test_for_second_level_length:\n"
     657  "        testb   $64, %%al\n"
     658  "        jnz     .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
     659  
     660  "        xorl    %%eax, %%eax\n"
     661  "        incl    %%eax\n"
     662  "        shll    %%cl, %%eax\n"
     663  "        decl    %%eax\n"
     664  "        andl    %%edx, %%eax\n"         /* eax &= hold */
     665  "        addl    64(%%esp), %%eax\n"     /* eax += len */
     666  "        movl    (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
     667  "        jmp     .L_dolen\n"
     668  
     669  ".align 32,0x90\n"
     670  ".L_test_for_second_level_dist:\n"
     671  "        testb   $64, %%al\n"
     672  "        jnz     .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
     673  
     674  "        xorl    %%eax, %%eax\n"
     675  "        incl    %%eax\n"
     676  "        shll    %%cl, %%eax\n"
     677  "        decl    %%eax\n"
     678  "        andl    %%edx, %%eax\n"         /* eax &= hold */
     679  "        addl    %%ebp, %%eax\n"         /* eax += dist */
     680  "        movl    36(%%esp), %%ecx\n"     /* ecx = dcode */
     681  "        movl    (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
     682  "        jmp     .L_dodist\n"
     683  
     684  ".align 32,0x90\n"
     685  ".L_clip_window:\n"
     686  "        movl    %%eax, %%ecx\n"
     687  "        movl    48(%%esp), %%eax\n"     /* eax = wsize */
     688  "        negl    %%ecx\n"                /* nbytes = -nbytes */
     689  "        movl    28(%%esp), %%esi\n"     /* from = window */
     690  
     691  "        cmpl    %%ebp, %%eax\n"
     692  "        jb      .L_invalid_distance_too_far\n" /* if (dist > wsize) */
     693  
     694  "        addl    %%ebp, %%ecx\n"         /* nbytes = dist - nbytes */
     695  "        cmpl    $0, 52(%%esp)\n"
     696  "        jne     .L_wrap_around_window\n" /* if (write != 0) */
     697  
     698  "        subl    %%ecx, %%eax\n"
     699  "        addl    %%eax, %%esi\n"         /* from += wsize - nbytes */
     700  
     701  "        movl    64(%%esp), %%eax\n"     /* eax = len */
     702  "        cmpl    %%ecx, %%eax\n"
     703  "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
     704  
     705  "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
     706  "        rep     movsb\n"
     707  "        movl    %%edi, %%esi\n"
     708  "        subl    %%ebp, %%esi\n"         /* from = out - dist */
     709  "        jmp     .L_do_copy\n"
     710  
     711  ".align 32,0x90\n"
     712  ".L_wrap_around_window:\n"
     713  "        movl    52(%%esp), %%eax\n"     /* eax = write */
     714  "        cmpl    %%eax, %%ecx\n"
     715  "        jbe     .L_contiguous_in_window\n" /* if (write >= nbytes) */
     716  
     717  "        addl    48(%%esp), %%esi\n"     /* from += wsize */
     718  "        addl    %%eax, %%esi\n"         /* from += write */
     719  "        subl    %%ecx, %%esi\n"         /* from -= nbytes */
     720  "        subl    %%eax, %%ecx\n"         /* nbytes -= write */
     721  
     722  "        movl    64(%%esp), %%eax\n"     /* eax = len */
     723  "        cmpl    %%ecx, %%eax\n"
     724  "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
     725  
     726  "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
     727  "        rep     movsb\n"
     728  "        movl    28(%%esp), %%esi\n"     /* from = window */
     729  "        movl    52(%%esp), %%ecx\n"     /* nbytes = write */
     730  "        cmpl    %%ecx, %%eax\n"
     731  "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
     732  
     733  "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
     734  "        rep     movsb\n"
     735  "        movl    %%edi, %%esi\n"
     736  "        subl    %%ebp, %%esi\n"         /* from = out - dist */
     737  "        jmp     .L_do_copy\n"
     738  
     739  ".align 32,0x90\n"
     740  ".L_contiguous_in_window:\n"
     741  "        addl    %%eax, %%esi\n"
     742  "        subl    %%ecx, %%esi\n"         /* from += write - nbytes */
     743  
     744  "        movl    64(%%esp), %%eax\n"     /* eax = len */
     745  "        cmpl    %%ecx, %%eax\n"
     746  "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
     747  
     748  "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
     749  "        rep     movsb\n"
     750  "        movl    %%edi, %%esi\n"
     751  "        subl    %%ebp, %%esi\n"         /* from = out - dist */
     752  "        jmp     .L_do_copy\n"           /* if (nbytes >= len) */
     753  
     754  ".align 32,0x90\n"
     755  ".L_do_copy:\n"
     756  "        movl    %%eax, %%ecx\n"
     757  "        rep     movsb\n"
     758  
     759  "        movl    8(%%esp), %%esi\n"      /* move in back to %esi, toss from */
     760  "        movl    32(%%esp), %%ebp\n"     /* ebp = lcode */
     761  "        jmp     .L_while_test\n"
     762  
     763  ".L_test_for_end_of_block:\n"
     764  "        testb   $32, %%al\n"
     765  "        jz      .L_invalid_literal_length_code\n"
     766  "        movl    $1, 72(%%esp)\n"
     767  "        jmp     .L_break_loop_with_status\n"
     768  
     769  ".L_invalid_literal_length_code:\n"
     770  "        movl    $2, 72(%%esp)\n"
     771  "        jmp     .L_break_loop_with_status\n"
     772  
     773  ".L_invalid_distance_code:\n"
     774  "        movl    $3, 72(%%esp)\n"
     775  "        jmp     .L_break_loop_with_status\n"
     776  
     777  ".L_invalid_distance_too_far:\n"
     778  "        movl    8(%%esp), %%esi\n"
     779  "        movl    $4, 72(%%esp)\n"
     780  "        jmp     .L_break_loop_with_status\n"
     781  
     782  ".L_break_loop:\n"
     783  "        movl    $0, 72(%%esp)\n"
     784  
     785  ".L_break_loop_with_status:\n"
     786  /* put in, out, bits, and hold back into ar and pop esp */
     787  "        movl    %%esi, 8(%%esp)\n"      /* save in */
     788  "        movl    %%edi, 16(%%esp)\n"     /* save out */
     789  "        movl    %%ebx, 44(%%esp)\n"     /* save bits */
     790  "        movl    %%edx, 40(%%esp)\n"     /* save hold */
     791  "        movl    4(%%esp), %%ebp\n"      /* restore esp, ebp */
     792  "        movl    (%%esp), %%esp\n"
     793            :
     794            : "m" (ar)
     795            : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
     796      );
     797  #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
     798      __asm {
     799  	lea	eax, ar
     800  	mov	[eax], esp         /* save esp, ebp */
     801  	mov	[eax+4], ebp
     802  	mov	esp, eax
     803  	mov	esi, [esp+8]       /* esi = in */
     804  	mov	edi, [esp+16]      /* edi = out */
     805  	mov	edx, [esp+40]      /* edx = hold */
     806  	mov	ebx, [esp+44]      /* ebx = bits */
     807  	mov	ebp, [esp+32]      /* ebp = lcode */
     808  
     809  	cld
     810  	jmp	L_do_loop
     811  
     812  ALIGN 4
     813  L_while_test:
     814  	cmp	[esp+24], edi
     815  	jbe	L_break_loop
     816  	cmp	[esp+12], esi
     817  	jbe	L_break_loop
     818  
     819  L_do_loop:
     820  	cmp	bl, 15
     821  	ja	L_get_length_code    /* if (15 < bits) */
     822  
     823  	xor	eax, eax
     824  	lodsw                         /* al = *(ushort *)in++ */
     825  	mov	cl, bl            /* cl = bits, needs it for shifting */
     826  	add	bl, 16             /* bits += 16 */
     827  	shl	eax, cl
     828  	or	edx, eax        /* hold |= *((ushort *)in)++ << bits */
     829  
     830  L_get_length_code:
     831  	mov	eax, [esp+56]      /* eax = lmask */
     832  	and	eax, edx          /* eax &= hold */
     833  	mov	eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
     834  
     835  L_dolen:
     836  	mov	cl, ah            /* cl = this.bits */
     837  	sub	bl, ah            /* bits -= this.bits */
     838  	shr	edx, cl           /* hold >>= this.bits */
     839  
     840  	test	al, al
     841  	jnz	L_test_for_length_base /* if (op != 0) 45.7% */
     842  
     843  	shr	eax, 16            /* output this.val char */
     844  	stosb
     845  	jmp	L_while_test
     846  
     847  ALIGN 4
     848  L_test_for_length_base:
     849  	mov	ecx, eax          /* len = this */
     850  	shr	ecx, 16            /* len = this.val */
     851  	mov	[esp+64], ecx      /* save len */
     852  	mov	cl, al
     853  
     854  	test	al, 16
     855  	jz	L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
     856  	and	cl, 15             /* op &= 15 */
     857  	jz	L_decode_distance    /* if (!op) */
     858  	cmp	bl, cl
     859  	jae	L_add_bits_to_len    /* if (op <= bits) */
     860  
     861  	mov	ch, cl            /* stash op in ch, freeing cl */
     862  	xor	eax, eax
     863  	lodsw                         /* al = *(ushort *)in++ */
     864  	mov	cl, bl            /* cl = bits, needs it for shifting */
     865  	add	bl, 16             /* bits += 16 */
     866  	shl	eax, cl
     867  	or	edx, eax         /* hold |= *((ushort *)in)++ << bits */
     868  	mov	cl, ch            /* move op back to ecx */
     869  
     870  L_add_bits_to_len:
     871  	sub	bl, cl
     872  	xor	eax, eax
     873  	inc	eax
     874  	shl	eax, cl
     875  	dec	eax
     876  	and	eax, edx          /* eax &= hold */
     877  	shr	edx, cl
     878  	add	[esp+64], eax      /* len += hold & mask[op] */
     879  
     880  L_decode_distance:
     881  	cmp	bl, 15
     882  	ja	L_get_distance_code  /* if (15 < bits) */
     883  
     884  	xor	eax, eax
     885  	lodsw                         /* al = *(ushort *)in++ */
     886  	mov	cl, bl            /* cl = bits, needs it for shifting */
     887  	add	bl, 16             /* bits += 16 */
     888  	shl	eax, cl
     889  	or	edx, eax         /* hold |= *((ushort *)in)++ << bits */
     890  
     891  L_get_distance_code:
     892  	mov	eax, [esp+60]      /* eax = dmask */
     893  	mov	ecx, [esp+36]      /* ecx = dcode */
     894  	and	eax, edx          /* eax &= hold */
     895  	mov	eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
     896  
     897  L_dodist:
     898  	mov	ebp, eax          /* dist = this */
     899  	shr	ebp, 16            /* dist = this.val */
     900  	mov	cl, ah
     901  	sub	bl, ah            /* bits -= this.bits */
     902  	shr	edx, cl           /* hold >>= this.bits */
     903  	mov	cl, al            /* cl = this.op */
     904  
     905  	test	al, 16             /* if ((op & 16) == 0) */
     906  	jz	L_test_for_second_level_dist
     907  	and	cl, 15             /* op &= 15 */
     908  	jz	L_check_dist_one
     909  	cmp	bl, cl
     910  	jae	L_add_bits_to_dist   /* if (op <= bits) 97.6% */
     911  
     912  	mov	ch, cl            /* stash op in ch, freeing cl */
     913  	xor	eax, eax
     914  	lodsw                         /* al = *(ushort *)in++ */
     915  	mov	cl, bl            /* cl = bits, needs it for shifting */
     916  	add	bl, 16             /* bits += 16 */
     917  	shl	eax, cl
     918  	or	edx, eax        /* hold |= *((ushort *)in)++ << bits */
     919  	mov	cl, ch            /* move op back to ecx */
     920  
     921  L_add_bits_to_dist:
     922  	sub	bl, cl
     923  	xor	eax, eax
     924  	inc	eax
     925  	shl	eax, cl
     926  	dec	eax                 /* (1 << op) - 1 */
     927  	and	eax, edx          /* eax &= hold */
     928  	shr	edx, cl
     929  	add	ebp, eax          /* dist += hold & ((1 << op) - 1) */
     930  
     931  L_check_window:
     932  	mov	[esp+8], esi       /* save in so from can use it's reg */
     933  	mov	eax, edi
     934  	sub	eax, [esp+20]      /* nbytes = out - beg */
     935  
     936  	cmp	eax, ebp
     937  	jb	L_clip_window        /* if (dist > nbytes) 4.2% */
     938  
     939  	mov	ecx, [esp+64]      /* ecx = len */
     940  	mov	esi, edi
     941  	sub	esi, ebp          /* from = out - dist */
     942  
     943  	sar	ecx, 1
     944  	jnc	L_copy_two
     945  
     946  	rep     movsw
     947  	mov	al, [esi]
     948  	mov	[edi], al
     949  	inc	edi
     950  
     951  	mov	esi, [esp+8]      /* move in back to %esi, toss from */
     952  	mov	ebp, [esp+32]     /* ebp = lcode */
     953  	jmp	L_while_test
     954  
     955  L_copy_two:
     956  	rep     movsw
     957  	mov	esi, [esp+8]      /* move in back to %esi, toss from */
     958  	mov	ebp, [esp+32]     /* ebp = lcode */
     959  	jmp	L_while_test
     960  
     961  ALIGN 4
     962  L_check_dist_one:
     963  	cmp	ebp, 1            /* if dist 1, is a memset */
     964  	jne	L_check_window
     965  	cmp	[esp+20], edi
     966  	je	L_check_window    /* out == beg, if outside window */
     967  
     968  	mov	ecx, [esp+64]     /* ecx = len */
     969  	mov	al, [edi-1]
     970  	mov	ah, al
     971  
     972  	sar	ecx, 1
     973  	jnc	L_set_two
     974  	mov	[edi], al         /* memset out with from[-1] */
     975  	inc	edi
     976  
     977  L_set_two:
     978  	rep     stosw
     979  	mov	ebp, [esp+32]     /* ebp = lcode */
     980  	jmp	L_while_test
     981  
     982  ALIGN 4
     983  L_test_for_second_level_length:
     984  	test	al, 64
     985  	jnz	L_test_for_end_of_block /* if ((op & 64) != 0) */
     986  
     987  	xor	eax, eax
     988  	inc	eax
     989  	shl	eax, cl
     990  	dec	eax
     991  	and	eax, edx         /* eax &= hold */
     992  	add	eax, [esp+64]     /* eax += len */
     993  	mov	eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
     994  	jmp	L_dolen
     995  
     996  ALIGN 4
     997  L_test_for_second_level_dist:
     998  	test	al, 64
     999  	jnz	L_invalid_distance_code /* if ((op & 64) != 0) */
    1000  
    1001  	xor	eax, eax
    1002  	inc	eax
    1003  	shl	eax, cl
    1004  	dec	eax
    1005  	and	eax, edx         /* eax &= hold */
    1006  	add	eax, ebp         /* eax += dist */
    1007  	mov	ecx, [esp+36]     /* ecx = dcode */
    1008  	mov	eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
    1009  	jmp	L_dodist
    1010  
    1011  ALIGN 4
    1012  L_clip_window:
    1013  	mov	ecx, eax
    1014  	mov	eax, [esp+48]     /* eax = wsize */
    1015  	neg	ecx                /* nbytes = -nbytes */
    1016  	mov	esi, [esp+28]     /* from = window */
    1017  
    1018  	cmp	eax, ebp
    1019  	jb	L_invalid_distance_too_far /* if (dist > wsize) */
    1020  
    1021  	add	ecx, ebp         /* nbytes = dist - nbytes */
    1022  	cmp	dword ptr [esp+52], 0
    1023  	jne	L_wrap_around_window /* if (write != 0) */
    1024  
    1025  	sub	eax, ecx
    1026  	add	esi, eax         /* from += wsize - nbytes */
    1027  
    1028  	mov	eax, [esp+64]    /* eax = len */
    1029  	cmp	eax, ecx
    1030  	jbe	L_do_copy          /* if (nbytes >= len) */
    1031  
    1032  	sub	eax, ecx         /* len -= nbytes */
    1033  	rep     movsb
    1034  	mov	esi, edi
    1035  	sub	esi, ebp         /* from = out - dist */
    1036  	jmp	L_do_copy
    1037  
    1038  ALIGN 4
    1039  L_wrap_around_window:
    1040  	mov	eax, [esp+52]    /* eax = write */
    1041  	cmp	ecx, eax
    1042  	jbe	L_contiguous_in_window /* if (write >= nbytes) */
    1043  
    1044  	add	esi, [esp+48]    /* from += wsize */
    1045  	add	esi, eax         /* from += write */
    1046  	sub	esi, ecx         /* from -= nbytes */
    1047  	sub	ecx, eax         /* nbytes -= write */
    1048  
    1049  	mov	eax, [esp+64]    /* eax = len */
    1050  	cmp	eax, ecx
    1051  	jbe	L_do_copy          /* if (nbytes >= len) */
    1052  
    1053  	sub	eax, ecx         /* len -= nbytes */
    1054  	rep     movsb
    1055  	mov	esi, [esp+28]     /* from = window */
    1056  	mov	ecx, [esp+52]     /* nbytes = write */
    1057  	cmp	eax, ecx
    1058  	jbe	L_do_copy          /* if (nbytes >= len) */
    1059  
    1060  	sub	eax, ecx         /* len -= nbytes */
    1061  	rep     movsb
    1062  	mov	esi, edi
    1063  	sub	esi, ebp         /* from = out - dist */
    1064  	jmp	L_do_copy
    1065  
    1066  ALIGN 4
    1067  L_contiguous_in_window:
    1068  	add	esi, eax
    1069  	sub	esi, ecx         /* from += write - nbytes */
    1070  
    1071  	mov	eax, [esp+64]    /* eax = len */
    1072  	cmp	eax, ecx
    1073  	jbe	L_do_copy          /* if (nbytes >= len) */
    1074  
    1075  	sub	eax, ecx         /* len -= nbytes */
    1076  	rep     movsb
    1077  	mov	esi, edi
    1078  	sub	esi, ebp         /* from = out - dist */
    1079  	jmp	L_do_copy
    1080  
    1081  ALIGN 4
    1082  L_do_copy:
    1083  	mov	ecx, eax
    1084  	rep     movsb
    1085  
    1086  	mov	esi, [esp+8]      /* move in back to %esi, toss from */
    1087  	mov	ebp, [esp+32]     /* ebp = lcode */
    1088  	jmp	L_while_test
    1089  
    1090  L_test_for_end_of_block:
    1091  	test	al, 32
    1092  	jz	L_invalid_literal_length_code
    1093  	mov	dword ptr [esp+72], 1
    1094  	jmp	L_break_loop_with_status
    1095  
    1096  L_invalid_literal_length_code:
    1097  	mov	dword ptr [esp+72], 2
    1098  	jmp	L_break_loop_with_status
    1099  
    1100  L_invalid_distance_code:
    1101  	mov	dword ptr [esp+72], 3
    1102  	jmp	L_break_loop_with_status
    1103  
    1104  L_invalid_distance_too_far:
    1105  	mov	esi, [esp+4]
    1106  	mov	dword ptr [esp+72], 4
    1107  	jmp	L_break_loop_with_status
    1108  
    1109  L_break_loop:
    1110  	mov	dword ptr [esp+72], 0
    1111  
    1112  L_break_loop_with_status:
    1113  /* put in, out, bits, and hold back into ar and pop esp */
    1114  	mov	[esp+8], esi     /* save in */
    1115  	mov	[esp+16], edi    /* save out */
    1116  	mov	[esp+44], ebx    /* save bits */
    1117  	mov	[esp+40], edx    /* save hold */
    1118  	mov	ebp, [esp+4]     /* restore esp, ebp */
    1119  	mov	esp, [esp]
    1120      }
    1121  #else
    1122  #error "x86 architecture not defined"
    1123  #endif
    1124  
    1125      if (ar.status > 1) {
    1126          if (ar.status == 2)
    1127              strm->msg = "invalid literal/length code";
    1128          else if (ar.status == 3)
    1129              strm->msg = "invalid distance code";
    1130          else
    1131              strm->msg = "invalid distance too far back";
    1132          state->mode = BAD;
    1133      }
    1134      else if ( ar.status == 1 ) {
    1135          state->mode = TYPE;
    1136      }
    1137  
    1138      /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
    1139      ar.len = ar.bits >> 3;
    1140      ar.in -= ar.len;
    1141      ar.bits -= ar.len << 3;
    1142      ar.hold &= (1U << ar.bits) - 1;
    1143  
    1144      /* update state and return */
    1145      strm->next_in = ar.in;
    1146      strm->next_out = ar.out;
    1147      strm->avail_in = (unsigned)(ar.in < ar.last ?
    1148                                  PAD_AVAIL_IN + (ar.last - ar.in) :
    1149                                  PAD_AVAIL_IN - (ar.in - ar.last));
    1150      strm->avail_out = (unsigned)(ar.out < ar.end ?
    1151                                   PAD_AVAIL_OUT + (ar.end - ar.out) :
    1152                                   PAD_AVAIL_OUT - (ar.out - ar.end));
    1153      state->hold = ar.hold;
    1154      state->bits = ar.bits;
    1155      return;
    1156  }
    1157