(root)/
libxml2-2.12.3/
timsort.h
       1  /*
       2   * Taken from https://github.com/swenson/sort
       3   * Revision: 05fd77bfec049ce8b7c408c4d3dd2d51ee061a15
       4   * Removed all code unrelated to Timsort and made minor adjustments for
       5   * cross-platform compatibility.
       6   */
       7  
       8  /*
       9   * The MIT License (MIT)
      10   *
      11   * Copyright (c) 2010-2017 Christopher Swenson.
      12   * Copyright (c) 2012 Vojtech Fried.
      13   * Copyright (c) 2012 Google Inc. All Rights Reserved.
      14   *
      15   * Permission is hereby granted, free of charge, to any person obtaining a
      16   * copy of this software and associated documentation files (the "Software"),
      17   * to deal in the Software without restriction, including without limitation
      18   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      19   * and/or sell copies of the Software, and to permit persons to whom the
      20   * Software is furnished to do so, subject to the following conditions:
      21   *
      22   * The above copyright notice and this permission notice shall be included in
      23   * all copies or substantial portions of the Software.
      24   *
      25   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      26   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      27   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      28   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      29   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      30   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      31   * DEALINGS IN THE SOFTWARE.
      32   */
      33  
      34  #include <stdlib.h>
      35  #include <stdio.h>
      36  #include <string.h>
      37  #ifdef HAVE_STDINT_H
      38  #include <stdint.h>
      39  #elif defined(_WIN32)
      40  typedef unsigned __int64 uint64_t;
      41  #endif
      42  
      43  #ifndef SORT_NAME
      44  #error "Must declare SORT_NAME"
      45  #endif
      46  
      47  #ifndef SORT_TYPE
      48  #error "Must declare SORT_TYPE"
      49  #endif
      50  
      51  #ifndef SORT_CMP
      52  #define SORT_CMP(x, y)  ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1))
      53  #endif
      54  
      55  #ifndef TIM_SORT_STACK_SIZE
      56  #define TIM_SORT_STACK_SIZE 128
      57  #endif
      58  
      59  #define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;}
      60  
      61  
      62  /* Common, type-agnostic functions and constants that we don't want to declare twice. */
      63  #ifndef SORT_COMMON_H
      64  #define SORT_COMMON_H
      65  
      66  #ifndef MAX
      67  #define MAX(x,y) (((x) > (y) ? (x) : (y)))
      68  #endif
      69  
      70  #ifndef MIN
      71  #define MIN(x,y) (((x) < (y) ? (x) : (y)))
      72  #endif
      73  
      74  static int compute_minrun(const uint64_t);
      75  
      76  #ifndef CLZ
      77  #if defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3))
      78  #define CLZ __builtin_clzll
      79  #else
      80  
      81  static int clzll(uint64_t);
      82  
      83  /* adapted from Hacker's Delight */
      84  static int clzll(uint64_t x) {
      85    int n;
      86  
      87    if (x == 0) {
      88      return 64;
      89    }
      90  
      91    n = 0;
      92  
      93    if (x <= 0x00000000FFFFFFFFL) {
      94      n = n + 32;
      95      x = x << 32;
      96    }
      97  
      98    if (x <= 0x0000FFFFFFFFFFFFL) {
      99      n = n + 16;
     100      x = x << 16;
     101    }
     102  
     103    if (x <= 0x00FFFFFFFFFFFFFFL) {
     104      n = n + 8;
     105      x = x << 8;
     106    }
     107  
     108    if (x <= 0x0FFFFFFFFFFFFFFFL) {
     109      n = n + 4;
     110      x = x << 4;
     111    }
     112  
     113    if (x <= 0x3FFFFFFFFFFFFFFFL) {
     114      n = n + 2;
     115      x = x << 2;
     116    }
     117  
     118    if (x <= 0x7FFFFFFFFFFFFFFFL) {
     119      n = n + 1;
     120    }
     121  
     122    return n;
     123  }
     124  
     125  #define CLZ clzll
     126  #endif
     127  #endif
     128  
     129  static __inline int compute_minrun(const uint64_t size) {
     130    const int top_bit = 64 - CLZ(size);
     131    const int shift = MAX(top_bit, 6) - 6;
     132    const int minrun = size >> shift;
     133    const uint64_t mask = (1ULL << shift) - 1;
     134  
     135    if (mask & size) {
     136      return minrun + 1;
     137    }
     138  
     139    return minrun;
     140  }
     141  
     142  #endif /* SORT_COMMON_H */
     143  
     144  #define SORT_CONCAT(x, y) x ## _ ## y
     145  #define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
     146  #define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
     147  
     148  #define BINARY_INSERTION_FIND          SORT_MAKE_STR(binary_insertion_find)
     149  #define BINARY_INSERTION_SORT_START    SORT_MAKE_STR(binary_insertion_sort_start)
     150  #define BINARY_INSERTION_SORT          SORT_MAKE_STR(binary_insertion_sort)
     151  #define REVERSE_ELEMENTS               SORT_MAKE_STR(reverse_elements)
     152  #define COUNT_RUN                      SORT_MAKE_STR(count_run)
     153  #define CHECK_INVARIANT                SORT_MAKE_STR(check_invariant)
     154  #define TIM_SORT                       SORT_MAKE_STR(tim_sort)
     155  #define TIM_SORT_RESIZE                SORT_MAKE_STR(tim_sort_resize)
     156  #define TIM_SORT_MERGE                 SORT_MAKE_STR(tim_sort_merge)
     157  #define TIM_SORT_COLLAPSE              SORT_MAKE_STR(tim_sort_collapse)
     158  
     159  #ifndef MAX
     160  #define MAX(x,y) (((x) > (y) ? (x) : (y)))
     161  #endif
     162  #ifndef MIN
     163  #define MIN(x,y) (((x) < (y) ? (x) : (y)))
     164  #endif
     165  
     166  typedef struct {
     167    size_t start;
     168    size_t length;
     169  } TIM_SORT_RUN_T;
     170  
     171  
     172  void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
     173  void TIM_SORT(SORT_TYPE *dst, const size_t size);
     174  
     175  
     176  /* Function used to do a binary search for binary insertion sort */
     177  static __inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x,
     178      const size_t size) {
     179    size_t l, c, r;
     180    SORT_TYPE cx;
     181    l = 0;
     182    r = size - 1;
     183    c = r >> 1;
     184  
     185    /* check for out of bounds at the beginning. */
     186    if (SORT_CMP(x, dst[0]) < 0) {
     187      return 0;
     188    } else if (SORT_CMP(x, dst[r]) > 0) {
     189      return r;
     190    }
     191  
     192    cx = dst[c];
     193  
     194    while (1) {
     195      const int val = SORT_CMP(x, cx);
     196  
     197      if (val < 0) {
     198        if (c - l <= 1) {
     199          return c;
     200        }
     201  
     202        r = c;
     203      } else { /* allow = for stability. The binary search favors the right. */
     204        if (r - c <= 1) {
     205          return c + 1;
     206        }
     207  
     208        l = c;
     209      }
     210  
     211      c = l + ((r - l) >> 1);
     212      cx = dst[c];
     213    }
     214  }
     215  
     216  /* Binary insertion sort, but knowing that the first "start" entries are sorted.  Used in timsort. */
     217  static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size) {
     218    size_t i;
     219  
     220    for (i = start; i < size; i++) {
     221      size_t j;
     222      SORT_TYPE x;
     223      size_t location;
     224  
     225      /* If this entry is already correct, just move along */
     226      if (SORT_CMP(dst[i - 1], dst[i]) <= 0) {
     227        continue;
     228      }
     229  
     230      /* Else we need to find the right place, shift everything over, and squeeze in */
     231      x = dst[i];
     232      location = BINARY_INSERTION_FIND(dst, x, i);
     233  
     234      for (j = i - 1; j >= location; j--) {
     235        dst[j + 1] = dst[j];
     236  
     237        if (j == 0) { /* check edge case because j is unsigned */
     238          break;
     239        }
     240      }
     241  
     242      dst[location] = x;
     243    }
     244  }
     245  
     246  /* Binary insertion sort */
     247  void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size) {
     248    /* don't bother sorting an array of size <= 1 */
     249    if (size <= 1) {
     250      return;
     251    }
     252  
     253    BINARY_INSERTION_SORT_START(dst, 1, size);
     254  }
     255  
     256  /* timsort implementation, based on timsort.txt */
     257  
     258  static __inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end) {
     259    while (1) {
     260      if (start >= end) {
     261        return;
     262      }
     263  
     264      SORT_SWAP(dst[start], dst[end]);
     265      start++;
     266      end--;
     267    }
     268  }
     269  
     270  static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size) {
     271    size_t curr;
     272  
     273    if (size - start == 1) {
     274      return 1;
     275    }
     276  
     277    if (start >= size - 2) {
     278      if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) {
     279        SORT_SWAP(dst[size - 2], dst[size - 1]);
     280      }
     281  
     282      return 2;
     283    }
     284  
     285    curr = start + 2;
     286  
     287    if (SORT_CMP(dst[start], dst[start + 1]) <= 0) {
     288      /* increasing run */
     289      while (1) {
     290        if (curr == size - 1) {
     291          break;
     292        }
     293  
     294        if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) {
     295          break;
     296        }
     297  
     298        curr++;
     299      }
     300  
     301      return curr - start;
     302    } else {
     303      /* decreasing run */
     304      while (1) {
     305        if (curr == size - 1) {
     306          break;
     307        }
     308  
     309        if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) {
     310          break;
     311        }
     312  
     313        curr++;
     314      }
     315  
     316      /* reverse in-place */
     317      REVERSE_ELEMENTS(dst, start, curr - 1);
     318      return curr - start;
     319    }
     320  }
     321  
     322  static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr) {
     323    size_t A, B, C;
     324  
     325    if (stack_curr < 2) {
     326      return 1;
     327    }
     328  
     329    if (stack_curr == 2) {
     330      const size_t A1 = stack[stack_curr - 2].length;
     331      const size_t B1 = stack[stack_curr - 1].length;
     332  
     333      if (A1 <= B1) {
     334        return 0;
     335      }
     336  
     337      return 1;
     338    }
     339  
     340    A = stack[stack_curr - 3].length;
     341    B = stack[stack_curr - 2].length;
     342    C = stack[stack_curr - 1].length;
     343  
     344    if ((A <= B + C) || (B <= C)) {
     345      return 0;
     346    }
     347  
     348    return 1;
     349  }
     350  
     351  typedef struct {
     352    size_t alloc;
     353    SORT_TYPE *storage;
     354  } TEMP_STORAGE_T;
     355  
     356  static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size) {
     357    if (store->alloc < new_size) {
     358      SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE));
     359  
     360      if (tempstore == NULL) {
     361        fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes",
     362                (unsigned long)(sizeof(SORT_TYPE) * new_size));
     363        exit(1);
     364      }
     365  
     366      store->storage = tempstore;
     367      store->alloc = new_size;
     368    }
     369  }
     370  
     371  static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr,
     372                             TEMP_STORAGE_T *store) {
     373    const size_t A = stack[stack_curr - 2].length;
     374    const size_t B = stack[stack_curr - 1].length;
     375    const size_t curr = stack[stack_curr - 2].start;
     376    SORT_TYPE *storage;
     377    size_t i, j, k;
     378    TIM_SORT_RESIZE(store, MIN(A, B));
     379    storage = store->storage;
     380  
     381    /* left merge */
     382    if (A < B) {
     383      memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE));
     384      i = 0;
     385      j = curr + A;
     386  
     387      for (k = curr; k < curr + A + B; k++) {
     388        if ((i < A) && (j < curr + A + B)) {
     389          if (SORT_CMP(storage[i], dst[j]) <= 0) {
     390            dst[k] = storage[i++];
     391          } else {
     392            dst[k] = dst[j++];
     393          }
     394        } else if (i < A) {
     395          dst[k] = storage[i++];
     396        } else {
     397          break;
     398        }
     399      }
     400    } else {
     401      /* right merge */
     402      memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE));
     403      i = B;
     404      j = curr + A;
     405      k = curr + A + B;
     406  
     407      while (k > curr) {
     408        k--;
     409        if ((i > 0) && (j > curr)) {
     410          if (SORT_CMP(dst[j - 1], storage[i - 1]) > 0) {
     411            dst[k] = dst[--j];
     412          } else {
     413            dst[k] = storage[--i];
     414          }
     415        } else if (i > 0) {
     416          dst[k] = storage[--i];
     417        } else {
     418          break;
     419        }
     420      }
     421    }
     422  }
     423  
     424  static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr,
     425                               TEMP_STORAGE_T *store, const size_t size) {
     426    while (1) {
     427      size_t A, B, C, D;
     428      int ABC, BCD, CD;
     429  
     430      /* if the stack only has one thing on it, we are done with the collapse */
     431      if (stack_curr <= 1) {
     432        break;
     433      }
     434  
     435      /* if this is the last merge, just do it */
     436      if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
     437        TIM_SORT_MERGE(dst, stack, stack_curr, store);
     438        stack[0].length += stack[1].length;
     439        stack_curr--;
     440        break;
     441      }
     442      /* check if the invariant is off for a stack of 2 elements */
     443      else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
     444        TIM_SORT_MERGE(dst, stack, stack_curr, store);
     445        stack[0].length += stack[1].length;
     446        stack_curr--;
     447        break;
     448      } else if (stack_curr == 2) {
     449        break;
     450      }
     451  
     452      B = stack[stack_curr - 3].length;
     453      C = stack[stack_curr - 2].length;
     454      D = stack[stack_curr - 1].length;
     455  
     456      if (stack_curr >= 4) {
     457        A = stack[stack_curr - 4].length;
     458        ABC = (A <= B + C);
     459      } else {
     460        ABC = 0;
     461      }
     462  
     463      BCD = (B <= C + D) || ABC;
     464      CD = (C <= D);
     465  
     466      /* Both invariants are good */
     467      if (!BCD && !CD) {
     468        break;
     469      }
     470  
     471      /* left merge */
     472      if (BCD && !CD) {
     473        TIM_SORT_MERGE(dst, stack, stack_curr - 1, store);
     474        stack[stack_curr - 3].length += stack[stack_curr - 2].length;
     475        stack[stack_curr - 2] = stack[stack_curr - 1];
     476        stack_curr--;
     477      } else {
     478        /* right merge */
     479        TIM_SORT_MERGE(dst, stack, stack_curr, store);
     480        stack[stack_curr - 2].length += stack[stack_curr - 1].length;
     481        stack_curr--;
     482      }
     483    }
     484  
     485    return stack_curr;
     486  }
     487  
     488  static __inline int PUSH_NEXT(SORT_TYPE *dst,
     489                                const size_t size,
     490                                TEMP_STORAGE_T *store,
     491                                const size_t minrun,
     492                                TIM_SORT_RUN_T *run_stack,
     493                                size_t *stack_curr,
     494                                size_t *curr) {
     495    size_t len = COUNT_RUN(dst, *curr, size);
     496    size_t run = minrun;
     497  
     498    if (run > size - *curr) {
     499      run = size - *curr;
     500    }
     501  
     502    if (run > len) {
     503      BINARY_INSERTION_SORT_START(&dst[*curr], len, run);
     504      len = run;
     505    }
     506  
     507    run_stack[*stack_curr].start = *curr;
     508    run_stack[*stack_curr].length = len;
     509    (*stack_curr)++;
     510    *curr += len;
     511  
     512    if (*curr == size) {
     513      /* finish up */
     514      while (*stack_curr > 1) {
     515        TIM_SORT_MERGE(dst, run_stack, *stack_curr, store);
     516        run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length;
     517        (*stack_curr)--;
     518      }
     519  
     520      if (store->storage != NULL) {
     521        free(store->storage);
     522        store->storage = NULL;
     523      }
     524  
     525      return 0;
     526    }
     527  
     528    return 1;
     529  }
     530  
     531  void TIM_SORT(SORT_TYPE *dst, const size_t size) {
     532    size_t minrun;
     533    TEMP_STORAGE_T _store, *store;
     534    TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE];
     535    size_t stack_curr = 0;
     536    size_t curr = 0;
     537  
     538    /* don't bother sorting an array of size 1 */
     539    if (size <= 1) {
     540      return;
     541    }
     542  
     543    if (size < 64) {
     544      BINARY_INSERTION_SORT(dst, size);
     545      return;
     546    }
     547  
     548    /* compute the minimum run length */
     549    minrun = compute_minrun(size);
     550    /* temporary storage for merges */
     551    store = &_store;
     552    store->alloc = 0;
     553    store->storage = NULL;
     554  
     555    if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
     556      return;
     557    }
     558  
     559    if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
     560      return;
     561    }
     562  
     563    if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
     564      return;
     565    }
     566  
     567    while (1) {
     568      if (!CHECK_INVARIANT(run_stack, stack_curr)) {
     569        stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, stack_curr, store, size);
     570        continue;
     571      }
     572  
     573      if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
     574        return;
     575      }
     576    }
     577  }
     578  
     579  #undef SORT_CONCAT
     580  #undef SORT_MAKE_STR1
     581  #undef SORT_MAKE_STR
     582  #undef SORT_NAME
     583  #undef SORT_TYPE
     584  #undef SORT_CMP
     585  #undef TEMP_STORAGE_T
     586  #undef TIM_SORT_RUN_T
     587  #undef PUSH_NEXT
     588  #undef SORT_SWAP
     589  #undef SORT_CONCAT
     590  #undef SORT_MAKE_STR1
     591  #undef SORT_MAKE_STR
     592  #undef BINARY_INSERTION_FIND
     593  #undef BINARY_INSERTION_SORT_START
     594  #undef BINARY_INSERTION_SORT
     595  #undef REVERSE_ELEMENTS
     596  #undef COUNT_RUN
     597  #undef TIM_SORT
     598  #undef TIM_SORT_RESIZE
     599  #undef TIM_SORT_COLLAPSE
     600  #undef TIM_SORT_RUN_T
     601  #undef TEMP_STORAGE_T