(root)/
Python-3.12.0/
Modules/
_decimal/
libmpdec/
transpose.c
       1  /*
       2   * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
       3   *
       4   * Redistribution and use in source and binary forms, with or without
       5   * modification, are permitted provided that the following conditions
       6   * are met:
       7   *
       8   * 1. Redistributions of source code must retain the above copyright
       9   *    notice, this list of conditions and the following disclaimer.
      10   *
      11   * 2. Redistributions in binary form must reproduce the above copyright
      12   *    notice, this list of conditions and the following disclaimer in the
      13   *    documentation and/or other materials provided with the distribution.
      14   *
      15   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
      16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      17   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      18   * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
      19   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      20   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      21   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      22   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      23   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      24   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      25   * SUCH DAMAGE.
      26   */
      27  
      28  
      29  #include "mpdecimal.h"
      30  
      31  #include <assert.h>
      32  #include <limits.h>
      33  #include <stdio.h>
      34  #include <stdlib.h>
      35  #include <string.h>
      36  
      37  #include "bits.h"
      38  #include "constants.h"
      39  #include "transpose.h"
      40  #include "typearith.h"
      41  
      42  
      43  #define BUFSIZE 4096
      44  #define SIDE 128
      45  
      46  
      47  /* Bignum: The transpose functions are used for very large transforms
      48     in sixstep.c and fourstep.c. */
      49  
      50  
      51  /* Definition of the matrix transpose */
      52  void
      53  std_trans(mpd_uint_t dest[], mpd_uint_t src[], mpd_size_t rows, mpd_size_t cols)
      54  {
      55      mpd_size_t idest, isrc;
      56      mpd_size_t r, c;
      57  
      58      for (r = 0; r < rows; r++) {
      59          isrc = r * cols;
      60          idest = r;
      61          for (c = 0; c < cols; c++) {
      62              dest[idest] = src[isrc];
      63              isrc += 1;
      64              idest += rows;
      65          }
      66      }
      67  }
      68  
      69  /*
      70   * Swap half-rows of 2^n * (2*2^n) matrix.
      71   * FORWARD_CYCLE: even/odd permutation of the halfrows.
      72   * BACKWARD_CYCLE: reverse the even/odd permutation.
      73   */
      74  static int
      75  swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir)
      76  {
      77      mpd_uint_t buf1[BUFSIZE];
      78      mpd_uint_t buf2[BUFSIZE];
      79      mpd_uint_t *readbuf, *writebuf, *hp;
      80      mpd_size_t *done, dbits;
      81      mpd_size_t b = BUFSIZE, stride;
      82      mpd_size_t hn, hmax; /* halfrow number */
      83      mpd_size_t m, r=0;
      84      mpd_size_t offset;
      85      mpd_size_t next;
      86  
      87  
      88      assert(cols == mul_size_t(2, rows));
      89  
      90      if (dir == FORWARD_CYCLE) {
      91          r = rows;
      92      }
      93      else if (dir == BACKWARD_CYCLE) {
      94          r = 2;
      95      }
      96      else {
      97          abort(); /* GCOV_NOT_REACHED */
      98      }
      99  
     100      m = cols - 1;
     101      hmax = rows; /* cycles start at odd halfrows */
     102      dbits = 8 * sizeof *done;
     103      if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) {
     104          return 0;
     105      }
     106  
     107      for (hn = 1; hn <= hmax; hn += 2) {
     108  
     109          if (done[hn/dbits] & mpd_bits[hn%dbits]) {
     110              continue;
     111          }
     112  
     113          readbuf = buf1; writebuf = buf2;
     114  
     115          for (offset = 0; offset < cols/2; offset += b) {
     116  
     117              stride = (offset + b < cols/2) ? b : cols/2-offset;
     118  
     119              hp = matrix + hn*cols/2;
     120              memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
     121              pointerswap(&readbuf, &writebuf);
     122  
     123              next = mulmod_size_t(hn, r, m);
     124              hp = matrix + next*cols/2;
     125  
     126              while (next != hn) {
     127  
     128                  memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
     129                  memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
     130                  pointerswap(&readbuf, &writebuf);
     131  
     132                  done[next/dbits] |= mpd_bits[next%dbits];
     133  
     134                  next = mulmod_size_t(next, r, m);
     135                      hp = matrix + next*cols/2;
     136  
     137              }
     138  
     139              memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
     140  
     141              done[hn/dbits] |= mpd_bits[hn%dbits];
     142          }
     143      }
     144  
     145      mpd_free(done);
     146      return 1;
     147  }
     148  
     149  /* In-place transpose of a square matrix */
     150  static inline void
     151  squaretrans(mpd_uint_t *buf, mpd_size_t cols)
     152  {
     153      mpd_uint_t tmp;
     154      mpd_size_t idest, isrc;
     155      mpd_size_t r, c;
     156  
     157      for (r = 0; r < cols; r++) {
     158          c = r+1;
     159          isrc = r*cols + c;
     160          idest = c*cols + r;
     161          for (c = r+1; c < cols; c++) {
     162              tmp = buf[isrc];
     163              buf[isrc] = buf[idest];
     164              buf[idest] = tmp;
     165              isrc += 1;
     166              idest += cols;
     167          }
     168      }
     169  }
     170  
     171  /*
     172   * Transpose 2^n * 2^n matrix. For cache efficiency, the matrix is split into
     173   * square blocks with side length 'SIDE'. First, the blocks are transposed,
     174   * then a square transposition is done on each individual block.
     175   */
     176  static void
     177  squaretrans_pow2(mpd_uint_t *matrix, mpd_size_t size)
     178  {
     179      mpd_uint_t buf1[SIDE*SIDE];
     180      mpd_uint_t buf2[SIDE*SIDE];
     181      mpd_uint_t *to, *from;
     182      mpd_size_t b = size;
     183      mpd_size_t r, c;
     184      mpd_size_t i;
     185  
     186      while (b > SIDE) b >>= 1;
     187  
     188      for (r = 0; r < size; r += b) {
     189  
     190          for (c = r; c < size; c += b) {
     191  
     192              from = matrix + r*size + c;
     193              to = buf1;
     194              for (i = 0; i < b; i++) {
     195                  memcpy(to, from, b*(sizeof *to));
     196                  from += size;
     197                  to += b;
     198              }
     199              squaretrans(buf1, b);
     200  
     201              if (r == c) {
     202                  to = matrix + r*size + c;
     203                  from = buf1;
     204                  for (i = 0; i < b; i++) {
     205                      memcpy(to, from, b*(sizeof *to));
     206                      from += b;
     207                      to += size;
     208                  }
     209                  continue;
     210              }
     211              else {
     212                  from = matrix + c*size + r;
     213                  to = buf2;
     214                  for (i = 0; i < b; i++) {
     215                      memcpy(to, from, b*(sizeof *to));
     216                      from += size;
     217                      to += b;
     218                  }
     219                  squaretrans(buf2, b);
     220  
     221                  to = matrix + c*size + r;
     222                  from = buf1;
     223                  for (i = 0; i < b; i++) {
     224                      memcpy(to, from, b*(sizeof *to));
     225                      from += b;
     226                      to += size;
     227                  }
     228  
     229                  to = matrix + r*size + c;
     230                  from = buf2;
     231                  for (i = 0; i < b; i++) {
     232                      memcpy(to, from, b*(sizeof *to));
     233                      from += b;
     234                      to += size;
     235                  }
     236              }
     237          }
     238      }
     239  
     240  }
     241  
     242  /*
     243   * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n)
     244   * or a (2*2^n) x 2^n matrix.
     245   */
     246  int
     247  transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols)
     248  {
     249      mpd_size_t size = mul_size_t(rows, cols);
     250  
     251      assert(ispower2(rows));
     252      assert(ispower2(cols));
     253  
     254      if (cols == rows) {
     255          squaretrans_pow2(matrix, rows);
     256      }
     257      else if (cols == mul_size_t(2, rows)) {
     258          if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) {
     259              return 0;
     260          }
     261          squaretrans_pow2(matrix, rows);
     262          squaretrans_pow2(matrix+(size/2), rows);
     263      }
     264      else if (rows == mul_size_t(2, cols)) {
     265          squaretrans_pow2(matrix, cols);
     266          squaretrans_pow2(matrix+(size/2), cols);
     267          if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) {
     268              return 0;
     269          }
     270      }
     271      else {
     272          abort(); /* GCOV_NOT_REACHED */
     273      }
     274  
     275      return 1;
     276  }