(root)/
gettext-0.22.4/
gnulib-local/
lib/
libxml/
timsort.h
       1  /* libxml2 - Library for parsing XML documents
       2   * Copyright (C) 2006-2021 Free Software Foundation, Inc.
       3   *
       4   * This file is not part of the GNU gettext program, but is used with
       5   * GNU gettext.
       6   *
       7   * The original copyright notice is as follows:
       8   */
       9  
      10  /*
      11   * Copyright (C) 1998-2012 Daniel Veillard.  All Rights Reserved.
      12   *
      13   * Permission is hereby granted, free of charge, to any person obtaining a copy
      14   * of this software and associated documentation files (the "Software"), to deal
      15   * in the Software without restriction, including without limitation the rights
      16   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      17   * copies of the Software, and to permit persons to whom the Software is fur-
      18   * nished to do so, subject to the following conditions:
      19   *
      20   * The above copyright notice and this permission notice shall be included in
      21   * all copies or substantial portions of the Software.
      22   *
      23   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      24   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
      25   * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
      26   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      27   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      28   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      29   * THE SOFTWARE.
      30   */
      31  
      32  /*
      33   * The MIT License (MIT)
      34   *
      35   * Copyright (c) 2010-2017 Christopher Swenson.
      36   * Copyright (c) 2012 Vojtech Fried.
      37   * Copyright (c) 2012 Google Inc. All Rights Reserved.
      38   *
      39   * Permission is hereby granted, free of charge, to any person obtaining a
      40   * copy of this software and associated documentation files (the "Software"),
      41   * to deal in the Software without restriction, including without limitation
      42   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      43   * and/or sell copies of the Software, and to permit persons to whom the
      44   * Software is furnished to do so, subject to the following conditions:
      45   *
      46   * The above copyright notice and this permission notice shall be included in
      47   * all copies or substantial portions of the Software.
      48   *
      49   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      50   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      51   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      52   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      53   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      54   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      55   * DEALINGS IN THE SOFTWARE.
      56   */
      57  
      58  /*
      59   * Taken from https://github.com/swenson/sort
      60   * Revision: 05fd77bfec049ce8b7c408c4d3dd2d51ee061a15
      61   * Removed all code unrelated to Timsort and made minor adjustments for
      62   * cross-platform compatibility.
      63   */
      64  
      65  #include <stdlib.h>
      66  #include <stdio.h>
      67  #include <string.h>
      68  #ifdef HAVE_STDINT_H
      69  #include <stdint.h>
      70  #elif defined(_WIN32)
      71  typedef unsigned __int64 uint64_t;
      72  #endif
      73  
      74  #ifndef SORT_NAME
      75  #error "Must declare SORT_NAME"
      76  #endif
      77  
      78  #ifndef SORT_TYPE
      79  #error "Must declare SORT_TYPE"
      80  #endif
      81  
      82  #ifndef SORT_CMP
      83  #define SORT_CMP(x, y)  ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1))
      84  #endif
      85  
      86  #ifndef TIM_SORT_STACK_SIZE
      87  #define TIM_SORT_STACK_SIZE 128
      88  #endif
      89  
      90  #define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;}
      91  
      92  
      93  /* Common, type-agnosting functions and constants that we don't want to declare twice. */
      94  #ifndef SORT_COMMON_H
      95  #define SORT_COMMON_H
      96  
      97  #ifndef MAX
      98  #define MAX(x,y) (((x) > (y) ? (x) : (y)))
      99  #endif
     100  
     101  #ifndef MIN
     102  #define MIN(x,y) (((x) < (y) ? (x) : (y)))
     103  #endif
     104  
     105  static int compute_minrun(const uint64_t);
     106  
     107  #ifndef CLZ
     108  #if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
     109  #define CLZ __builtin_clzll
     110  #else
     111  
     112  static int clzll(uint64_t);
     113  
     114  /* adapted from Hacker's Delight */
     115  static int clzll(uint64_t x) {
     116    int n;
     117  
     118    if (x == 0) {
     119      return 64;
     120    }
     121  
     122    n = 0;
     123  
     124    if (x <= 0x00000000FFFFFFFFL) {
     125      n = n + 32;
     126      x = x << 32;
     127    }
     128  
     129    if (x <= 0x0000FFFFFFFFFFFFL) {
     130      n = n + 16;
     131      x = x << 16;
     132    }
     133  
     134    if (x <= 0x00FFFFFFFFFFFFFFL) {
     135      n = n + 8;
     136      x = x << 8;
     137    }
     138  
     139    if (x <= 0x0FFFFFFFFFFFFFFFL) {
     140      n = n + 4;
     141      x = x << 4;
     142    }
     143  
     144    if (x <= 0x3FFFFFFFFFFFFFFFL) {
     145      n = n + 2;
     146      x = x << 2;
     147    }
     148  
     149    if (x <= 0x7FFFFFFFFFFFFFFFL) {
     150      n = n + 1;
     151    }
     152  
     153    return n;
     154  }
     155  
     156  #define CLZ clzll
     157  #endif
     158  #endif
     159  
     160  static inline int compute_minrun(const uint64_t size) {
     161    const int top_bit = 64 - CLZ(size);
     162    const int shift = MAX(top_bit, 6) - 6;
     163    const int minrun = size >> shift;
     164    const uint64_t mask = (1ULL << shift) - 1;
     165  
     166    if (mask & size) {
     167      return minrun + 1;
     168    }
     169  
     170    return minrun;
     171  }
     172  
     173  #endif /* SORT_COMMON_H */
     174  
     175  #define SORT_CONCAT(x, y) x ## _ ## y
     176  #define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
     177  #define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
     178  
     179  #define BINARY_INSERTION_FIND          SORT_MAKE_STR(binary_insertion_find)
     180  #define BINARY_INSERTION_SORT_START    SORT_MAKE_STR(binary_insertion_sort_start)
     181  #define BINARY_INSERTION_SORT          SORT_MAKE_STR(binary_insertion_sort)
     182  #define REVERSE_ELEMENTS               SORT_MAKE_STR(reverse_elements)
     183  #define COUNT_RUN                      SORT_MAKE_STR(count_run)
     184  #define CHECK_INVARIANT                SORT_MAKE_STR(check_invariant)
     185  #define TIM_SORT                       SORT_MAKE_STR(tim_sort)
     186  #define TIM_SORT_RESIZE                SORT_MAKE_STR(tim_sort_resize)
     187  #define TIM_SORT_MERGE                 SORT_MAKE_STR(tim_sort_merge)
     188  #define TIM_SORT_COLLAPSE              SORT_MAKE_STR(tim_sort_collapse)
     189  
     190  #ifndef MAX
     191  #define MAX(x,y) (((x) > (y) ? (x) : (y)))
     192  #endif
     193  #ifndef MIN
     194  #define MIN(x,y) (((x) < (y) ? (x) : (y)))
     195  #endif
     196  
     197  typedef struct {
     198    size_t start;
     199    size_t length;
     200  } TIM_SORT_RUN_T;
     201  
     202  
     203  void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
     204  void TIM_SORT(SORT_TYPE *dst, const size_t size);
     205  
     206  
     207  /* Function used to do a binary search for binary insertion sort */
     208  static inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x,
     209      const size_t size) {
     210    size_t l, c, r;
     211    SORT_TYPE cx;
     212    l = 0;
     213    r = size - 1;
     214    c = r >> 1;
     215  
     216    /* check for out of bounds at the beginning. */
     217    if (SORT_CMP(x, dst[0]) < 0) {
     218      return 0;
     219    } else if (SORT_CMP(x, dst[r]) > 0) {
     220      return r;
     221    }
     222  
     223    cx = dst[c];
     224  
     225    while (1) {
     226      const int val = SORT_CMP(x, cx);
     227  
     228      if (val < 0) {
     229        if (c - l <= 1) {
     230          return c;
     231        }
     232  
     233        r = c;
     234      } else { /* allow = for stability. The binary search favors the right. */
     235        if (r - c <= 1) {
     236          return c + 1;
     237        }
     238  
     239        l = c;
     240      }
     241  
     242      c = l + ((r - l) >> 1);
     243      cx = dst[c];
     244    }
     245  }
     246  
     247  /* Binary insertion sort, but knowing that the first "start" entries are sorted.  Used in timsort. */
     248  static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size) {
     249    size_t i;
     250  
     251    for (i = start; i < size; i++) {
     252      size_t j;
     253      SORT_TYPE x;
     254      size_t location;
     255  
     256      /* If this entry is already correct, just move along */
     257      if (SORT_CMP(dst[i - 1], dst[i]) <= 0) {
     258        continue;
     259      }
     260  
     261      /* Else we need to find the right place, shift everything over, and squeeze in */
     262      x = dst[i];
     263      location = BINARY_INSERTION_FIND(dst, x, i);
     264  
     265      for (j = i - 1; j >= location; j--) {
     266        dst[j + 1] = dst[j];
     267  
     268        if (j == 0) { /* check edge case because j is unsigned */
     269          break;
     270        }
     271      }
     272  
     273      dst[location] = x;
     274    }
     275  }
     276  
     277  /* Binary insertion sort */
     278  void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size) {
     279    /* don't bother sorting an array of size <= 1 */
     280    if (size <= 1) {
     281      return;
     282    }
     283  
     284    BINARY_INSERTION_SORT_START(dst, 1, size);
     285  }
     286  
     287  /* timsort implementation, based on timsort.txt */
     288  
     289  static inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end) {
     290    while (1) {
     291      if (start >= end) {
     292        return;
     293      }
     294  
     295      SORT_SWAP(dst[start], dst[end]);
     296      start++;
     297      end--;
     298    }
     299  }
     300  
     301  static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size) {
     302    size_t curr;
     303  
     304    if (size - start == 1) {
     305      return 1;
     306    }
     307  
     308    if (start >= size - 2) {
     309      if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) {
     310        SORT_SWAP(dst[size - 2], dst[size - 1]);
     311      }
     312  
     313      return 2;
     314    }
     315  
     316    curr = start + 2;
     317  
     318    if (SORT_CMP(dst[start], dst[start + 1]) <= 0) {
     319      /* increasing run */
     320      while (1) {
     321        if (curr == size - 1) {
     322          break;
     323        }
     324  
     325        if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) {
     326          break;
     327        }
     328  
     329        curr++;
     330      }
     331  
     332      return curr - start;
     333    } else {
     334      /* decreasing run */
     335      while (1) {
     336        if (curr == size - 1) {
     337          break;
     338        }
     339  
     340        if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) {
     341          break;
     342        }
     343  
     344        curr++;
     345      }
     346  
     347      /* reverse in-place */
     348      REVERSE_ELEMENTS(dst, start, curr - 1);
     349      return curr - start;
     350    }
     351  }
     352  
     353  static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr) {
     354    size_t A, B, C;
     355  
     356    if (stack_curr < 2) {
     357      return 1;
     358    }
     359  
     360    if (stack_curr == 2) {
     361      const size_t A1 = stack[stack_curr - 2].length;
     362      const size_t B1 = stack[stack_curr - 1].length;
     363  
     364      if (A1 <= B1) {
     365        return 0;
     366      }
     367  
     368      return 1;
     369    }
     370  
     371    A = stack[stack_curr - 3].length;
     372    B = stack[stack_curr - 2].length;
     373    C = stack[stack_curr - 1].length;
     374  
     375    if ((A <= B + C) || (B <= C)) {
     376      return 0;
     377    }
     378  
     379    return 1;
     380  }
     381  
     382  typedef struct {
     383    size_t alloc;
     384    SORT_TYPE *storage;
     385  } TEMP_STORAGE_T;
     386  
     387  static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size) {
     388    if (store->alloc < new_size) {
     389      SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE));
     390  
     391      if (tempstore == NULL) {
     392        fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes",
     393                (unsigned long)(sizeof(SORT_TYPE) * new_size));
     394        exit(1);
     395      }
     396  
     397      store->storage = tempstore;
     398      store->alloc = new_size;
     399    }
     400  }
     401  
     402  static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr,
     403                             TEMP_STORAGE_T *store) {
     404    const size_t A = stack[stack_curr - 2].length;
     405    const size_t B = stack[stack_curr - 1].length;
     406    const size_t curr = stack[stack_curr - 2].start;
     407    SORT_TYPE *storage;
     408    size_t i, j, k;
     409    TIM_SORT_RESIZE(store, MIN(A, B));
     410    storage = store->storage;
     411  
     412    /* left merge */
     413    if (A < B) {
     414      memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE));
     415      i = 0;
     416      j = curr + A;
     417  
     418      for (k = curr; k < curr + A + B; k++) {
     419        if ((i < A) && (j < curr + A + B)) {
     420          if (SORT_CMP(storage[i], dst[j]) <= 0) {
     421            dst[k] = storage[i++];
     422          } else {
     423            dst[k] = dst[j++];
     424          }
     425        } else if (i < A) {
     426          dst[k] = storage[i++];
     427        } else {
     428          break;
     429        }
     430      }
     431    } else {
     432      /* right merge */
     433      memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE));
     434      i = B;
     435      j = curr + A;
     436      k = curr + A + B;
     437  
     438      while (k-- > curr) {
     439        if ((i > 0) && (j > curr)) {
     440          if (SORT_CMP(dst[j - 1], storage[i - 1]) > 0) {
     441            dst[k] = dst[--j];
     442          } else {
     443            dst[k] = storage[--i];
     444          }
     445        } else if (i > 0) {
     446          dst[k] = storage[--i];
     447        } else {
     448          break;
     449        }
     450      }
     451    }
     452  }
     453  
     454  static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr,
     455                               TEMP_STORAGE_T *store, const size_t size) {
     456    while (1) {
     457      size_t A, B, C, D;
     458      int ABC, BCD, CD;
     459  
     460      /* if the stack only has one thing on it, we are done with the collapse */
     461      if (stack_curr <= 1) {
     462        break;
     463      }
     464  
     465      /* if this is the last merge, just do it */
     466      if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
     467        TIM_SORT_MERGE(dst, stack, stack_curr, store);
     468        stack[0].length += stack[1].length;
     469        stack_curr--;
     470        break;
     471      }
     472      /* check if the invariant is off for a stack of 2 elements */
     473      else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
     474        TIM_SORT_MERGE(dst, stack, stack_curr, store);
     475        stack[0].length += stack[1].length;
     476        stack_curr--;
     477        break;
     478      } else if (stack_curr == 2) {
     479        break;
     480      }
     481  
     482      B = stack[stack_curr - 3].length;
     483      C = stack[stack_curr - 2].length;
     484      D = stack[stack_curr - 1].length;
     485  
     486      if (stack_curr >= 4) {
     487        A = stack[stack_curr - 4].length;
     488        ABC = (A <= B + C);
     489      } else {
     490        ABC = 0;
     491      }
     492  
     493      BCD = (B <= C + D) || ABC;
     494      CD = (C <= D);
     495  
     496      /* Both invariants are good */
     497      if (!BCD && !CD) {
     498        break;
     499      }
     500  
     501      /* left merge */
     502      if (BCD && !CD) {
     503        TIM_SORT_MERGE(dst, stack, stack_curr - 1, store);
     504        stack[stack_curr - 3].length += stack[stack_curr - 2].length;
     505        stack[stack_curr - 2] = stack[stack_curr - 1];
     506        stack_curr--;
     507      } else {
     508        /* right merge */
     509        TIM_SORT_MERGE(dst, stack, stack_curr, store);
     510        stack[stack_curr - 2].length += stack[stack_curr - 1].length;
     511        stack_curr--;
     512      }
     513    }
     514  
     515    return stack_curr;
     516  }
     517  
     518  static inline int PUSH_NEXT(SORT_TYPE *dst,
     519                              const size_t size,
     520                              TEMP_STORAGE_T *store,
     521                              const size_t minrun,
     522                              TIM_SORT_RUN_T *run_stack,
     523                              size_t *stack_curr,
     524                              size_t *curr) {
     525    size_t len = COUNT_RUN(dst, *curr, size);
     526    size_t run = minrun;
     527  
     528    if (run > size - *curr) {
     529      run = size - *curr;
     530    }
     531  
     532    if (run > len) {
     533      BINARY_INSERTION_SORT_START(&dst[*curr], len, run);
     534      len = run;
     535    }
     536  
     537    run_stack[*stack_curr].start = *curr;
     538    run_stack[*stack_curr].length = len;
     539    (*stack_curr)++;
     540    *curr += len;
     541  
     542    if (*curr == size) {
     543      /* finish up */
     544      while (*stack_curr > 1) {
     545        TIM_SORT_MERGE(dst, run_stack, *stack_curr, store);
     546        run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length;
     547        (*stack_curr)--;
     548      }
     549  
     550      if (store->storage != NULL) {
     551        free(store->storage);
     552        store->storage = NULL;
     553      }
     554  
     555      return 0;
     556    }
     557  
     558    return 1;
     559  }
     560  
     561  void TIM_SORT(SORT_TYPE *dst, const size_t size) {
     562    size_t minrun;
     563    TEMP_STORAGE_T _store, *store;
     564    TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE];
     565    size_t stack_curr = 0;
     566    size_t curr = 0;
     567  
     568    /* don't bother sorting an array of size 1 */
     569    if (size <= 1) {
     570      return;
     571    }
     572  
     573    if (size < 64) {
     574      BINARY_INSERTION_SORT(dst, size);
     575      return;
     576    }
     577  
     578    /* compute the minimum run length */
     579    minrun = compute_minrun(size);
     580    /* temporary storage for merges */
     581    store = &_store;
     582    store->alloc = 0;
     583    store->storage = NULL;
     584  
     585    if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
     586      return;
     587    }
     588  
     589    if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
     590      return;
     591    }
     592  
     593    if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
     594      return;
     595    }
     596  
     597    while (1) {
     598      if (!CHECK_INVARIANT(run_stack, stack_curr)) {
     599        stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, stack_curr, store, size);
     600        continue;
     601      }
     602  
     603      if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
     604        return;
     605      }
     606    }
     607  }
     608  
     609  #undef SORT_CONCAT
     610  #undef SORT_MAKE_STR1
     611  #undef SORT_MAKE_STR
     612  #undef SORT_NAME
     613  #undef SORT_TYPE
     614  #undef SORT_CMP
     615  #undef TEMP_STORAGE_T
     616  #undef TIM_SORT_RUN_T
     617  #undef PUSH_NEXT
     618  #undef SORT_SWAP
     619  #undef SORT_CONCAT
     620  #undef SORT_MAKE_STR1
     621  #undef SORT_MAKE_STR
     622  #undef BINARY_INSERTION_FIND
     623  #undef BINARY_INSERTION_SORT_START
     624  #undef BINARY_INSERTION_SORT
     625  #undef REVERSE_ELEMENTS
     626  #undef COUNT_RUN
     627  #undef TIM_SORT
     628  #undef TIM_SORT_RESIZE
     629  #undef TIM_SORT_COLLAPSE
     630  #undef TIM_SORT_RUN_T
     631  #undef TEMP_STORAGE_T