(root)/
binutils-2.41/
libiberty/
rust-demangle.c
       1  /* Demangler for the Rust programming language
       2     Copyright (C) 2016-2023 Free Software Foundation, Inc.
       3     Written by David Tolnay (dtolnay@gmail.com).
       4     Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
       5  
       6  This file is part of the libiberty library.
       7  Libiberty is free software; you can redistribute it and/or
       8  modify it under the terms of the GNU Library General Public
       9  License as published by the Free Software Foundation; either
      10  version 2 of the License, or (at your option) any later version.
      11  
      12  In addition to the permissions in the GNU Library General Public
      13  License, the Free Software Foundation gives you unlimited permission
      14  to link the compiled version of this file into combinations with other
      15  programs, and to distribute those combinations without any restriction
      16  coming from the use of this file.  (The Library Public License
      17  restrictions do apply in other respects; for example, they cover
      18  modification of the file, and distribution when not linked into a
      19  combined executable.)
      20  
      21  Libiberty is distributed in the hope that it will be useful,
      22  but WITHOUT ANY WARRANTY; without even the implied warranty of
      23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      24  Library General Public License for more details.
      25  
      26  You should have received a copy of the GNU Library General Public
      27  License along with libiberty; see the file COPYING.LIB.
      28  If not, see <http://www.gnu.org/licenses/>.  */
      29  
      30  
      31  #ifdef HAVE_CONFIG_H
      32  #include "config.h"
      33  #endif
      34  
      35  #include "safe-ctype.h"
      36  
      37  #include <inttypes.h>
      38  #include <sys/types.h>
      39  #include <string.h>
      40  #include <stdio.h>
      41  #include <stdlib.h>
      42  
      43  #ifdef HAVE_STRING_H
      44  #include <string.h>
      45  #else
      46  extern size_t strlen(const char *s);
      47  extern int strncmp(const char *s1, const char *s2, size_t n);
      48  extern void *memset(void *s, int c, size_t n);
      49  #endif
      50  
      51  #include <demangle.h>
      52  #include "libiberty.h"
      53  
      54  struct rust_demangler
      55  {
      56    const char *sym;
      57    size_t sym_len;
      58  
      59    void *callback_opaque;
      60    demangle_callbackref callback;
      61  
      62    /* Position of the next character to read from the symbol. */
      63    size_t next;
      64  
      65    /* Non-zero if any error occurred. */
      66    int errored;
      67  
      68    /* Non-zero if nothing should be printed. */
      69    int skipping_printing;
      70  
      71    /* Non-zero if printing should be verbose (e.g. include hashes). */
      72    int verbose;
      73  
      74    /* Rust mangling version, with legacy mangling being -1. */
      75    int version;
      76  
      77    /* Recursion depth.  */
      78    unsigned int recursion;
      79    /* Maximum number of times demangle_path may be called recursively.  */
      80  #define RUST_MAX_RECURSION_COUNT  1024
      81  #define RUST_NO_RECURSION_LIMIT   ((unsigned int) -1)
      82  
      83    uint64_t bound_lifetime_depth;
      84  };
      85  
      86  /* Parsing functions. */
      87  
      88  static char
      89  peek (const struct rust_demangler *rdm)
      90  {
      91    if (rdm->next < rdm->sym_len)
      92      return rdm->sym[rdm->next];
      93    return 0;
      94  }
      95  
      96  static int
      97  eat (struct rust_demangler *rdm, char c)
      98  {
      99    if (peek (rdm) == c)
     100      {
     101        rdm->next++;
     102        return 1;
     103      }
     104    else
     105      return 0;
     106  }
     107  
     108  static char
     109  next (struct rust_demangler *rdm)
     110  {
     111    char c = peek (rdm);
     112    if (!c)
     113      rdm->errored = 1;
     114    else
     115      rdm->next++;
     116    return c;
     117  }
     118  
     119  static uint64_t
     120  parse_integer_62 (struct rust_demangler *rdm)
     121  {
     122    char c;
     123    uint64_t x;
     124  
     125    if (eat (rdm, '_'))
     126      return 0;
     127  
     128    x = 0;
     129    while (!eat (rdm, '_') && !rdm->errored)
     130      {
     131        c = next (rdm);
     132        x *= 62;
     133        if (ISDIGIT (c))
     134          x += c - '0';
     135        else if (ISLOWER (c))
     136          x += 10 + (c - 'a');
     137        else if (ISUPPER (c))
     138          x += 10 + 26 + (c - 'A');
     139        else
     140          {
     141            rdm->errored = 1;
     142            return 0;
     143          }
     144      }
     145    return x + 1;
     146  }
     147  
     148  static uint64_t
     149  parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
     150  {
     151    if (!eat (rdm, tag))
     152      return 0;
     153    return 1 + parse_integer_62 (rdm);
     154  }
     155  
     156  static uint64_t
     157  parse_disambiguator (struct rust_demangler *rdm)
     158  {
     159    return parse_opt_integer_62 (rdm, 's');
     160  }
     161  
     162  static size_t
     163  parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
     164  {
     165    char c;
     166    size_t hex_len;
     167  
     168    hex_len = 0;
     169    *value = 0;
     170  
     171    while (!eat (rdm, '_'))
     172      {
     173        *value <<= 4;
     174  
     175        c = next (rdm);
     176        if (ISDIGIT (c))
     177          *value |= c - '0';
     178        else if (c >= 'a' && c <= 'f')
     179          *value |= 10 + (c - 'a');
     180        else
     181          {
     182            rdm->errored = 1;
     183            return 0;
     184          }
     185        hex_len++;
     186      }
     187  
     188    return hex_len;
     189  }
     190  
     191  struct rust_mangled_ident
     192  {
     193    /* ASCII part of the identifier. */
     194    const char *ascii;
     195    size_t ascii_len;
     196  
     197    /* Punycode insertion codes for Unicode codepoints, if any. */
     198    const char *punycode;
     199    size_t punycode_len;
     200  };
     201  
     202  static struct rust_mangled_ident
     203  parse_ident (struct rust_demangler *rdm)
     204  {
     205    char c;
     206    size_t start, len;
     207    int is_punycode = 0;
     208    struct rust_mangled_ident ident;
     209  
     210    ident.ascii = NULL;
     211    ident.ascii_len = 0;
     212    ident.punycode = NULL;
     213    ident.punycode_len = 0;
     214  
     215    if (rdm->version != -1)
     216      is_punycode = eat (rdm, 'u');
     217  
     218    c = next (rdm);
     219    if (!ISDIGIT (c))
     220      {
     221        rdm->errored = 1;
     222        return ident;
     223      }
     224    len = c - '0';
     225  
     226    if (c != '0')
     227      while (ISDIGIT (peek (rdm)))
     228        len = len * 10 + (next (rdm) - '0');
     229  
     230    /* Skip past the optional `_` separator (v0). */
     231    if (rdm->version != -1)
     232      eat (rdm, '_');
     233  
     234    start = rdm->next;
     235    rdm->next += len;
     236    /* Check for overflows. */
     237    if ((start > rdm->next) || (rdm->next > rdm->sym_len))
     238      {
     239        rdm->errored = 1;
     240        return ident;
     241      }
     242  
     243    ident.ascii = rdm->sym + start;
     244    ident.ascii_len = len;
     245  
     246    if (is_punycode)
     247      {
     248        ident.punycode_len = 0;
     249        while (ident.ascii_len > 0)
     250          {
     251            ident.ascii_len--;
     252  
     253            /* The last '_' is a separator between ascii & punycode. */
     254            if (ident.ascii[ident.ascii_len] == '_')
     255              break;
     256  
     257            ident.punycode_len++;
     258          }
     259        if (!ident.punycode_len)
     260          {
     261            rdm->errored = 1;
     262            return ident;
     263          }
     264        ident.punycode = ident.ascii + (len - ident.punycode_len);
     265      }
     266  
     267    if (ident.ascii_len == 0)
     268      ident.ascii = NULL;
     269  
     270    return ident;
     271  }
     272  
     273  /* Printing functions. */
     274  
     275  static void
     276  print_str (struct rust_demangler *rdm, const char *data, size_t len)
     277  {
     278    if (!rdm->errored && !rdm->skipping_printing)
     279      rdm->callback (data, len, rdm->callback_opaque);
     280  }
     281  
     282  #define PRINT(s) print_str (rdm, s, strlen (s))
     283  
     284  static void
     285  print_uint64 (struct rust_demangler *rdm, uint64_t x)
     286  {
     287    char s[21];
     288    snprintf (s, 21, "%" PRIu64, x);
     289    PRINT (s);
     290  }
     291  
     292  static void
     293  print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
     294  {
     295    char s[17];
     296    snprintf (s, 17, "%" PRIx64, x);
     297    PRINT (s);
     298  }
     299  
     300  /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
     301  static int
     302  decode_lower_hex_nibble (char nibble)
     303  {
     304    if ('0' <= nibble && nibble <= '9')
     305      return nibble - '0';
     306    if ('a' <= nibble && nibble <= 'f')
     307      return 0xa + (nibble - 'a');
     308    return -1;
     309  }
     310  
     311  /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
     312  static char
     313  decode_legacy_escape (const char *e, size_t len, size_t *out_len)
     314  {
     315    char c = 0;
     316    size_t escape_len = 0;
     317    int lo_nibble = -1, hi_nibble = -1;
     318  
     319    if (len < 3 || e[0] != '$')
     320      return 0;
     321  
     322    e++;
     323    len--;
     324  
     325    if (e[0] == 'C')
     326      {
     327        escape_len = 1;
     328  
     329        c = ',';
     330      }
     331    else if (len > 2)
     332      {
     333        escape_len = 2;
     334  
     335        if (e[0] == 'S' && e[1] == 'P')
     336          c = '@';
     337        else if (e[0] == 'B' && e[1] == 'P')
     338          c = '*';
     339        else if (e[0] == 'R' && e[1] == 'F')
     340          c = '&';
     341        else if (e[0] == 'L' && e[1] == 'T')
     342          c = '<';
     343        else if (e[0] == 'G' && e[1] == 'T')
     344          c = '>';
     345        else if (e[0] == 'L' && e[1] == 'P')
     346          c = '(';
     347        else if (e[0] == 'R' && e[1] == 'P')
     348          c = ')';
     349        else if (e[0] == 'u' && len > 3)
     350          {
     351            escape_len = 3;
     352  
     353            hi_nibble = decode_lower_hex_nibble (e[1]);
     354            if (hi_nibble < 0)
     355              return 0;
     356            lo_nibble = decode_lower_hex_nibble (e[2]);
     357            if (lo_nibble < 0)
     358              return 0;
     359  
     360            /* Only allow non-control ASCII characters. */
     361            if (hi_nibble > 7)
     362              return 0;
     363            c = (hi_nibble << 4) | lo_nibble;
     364            if (c < 0x20)
     365              return 0;
     366          }
     367      }
     368  
     369    if (!c || len <= escape_len || e[escape_len] != '$')
     370      return 0;
     371  
     372    *out_len = 2 + escape_len;
     373    return c;
     374  }
     375  
     376  static void
     377  print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
     378  {
     379    char unescaped;
     380    uint8_t *out, *p, d;
     381    size_t len, cap, punycode_pos, j;
     382    /* Punycode parameters and state. */
     383    uint32_t c;
     384    size_t base, t_min, t_max, skew, damp, bias, i;
     385    size_t delta, w, k, t;
     386  
     387    if (rdm->errored || rdm->skipping_printing)
     388      return;
     389  
     390    if (rdm->version == -1)
     391      {
     392        /* Ignore leading underscores preceding escape sequences.
     393           The mangler inserts an underscore to make sure the
     394           identifier begins with a XID_Start character. */
     395        if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
     396            && ident.ascii[1] == '$')
     397          {
     398            ident.ascii++;
     399            ident.ascii_len--;
     400          }
     401  
     402        while (ident.ascii_len > 0)
     403          {
     404            /* Handle legacy escape sequences ("$...$", ".." or "."). */
     405            if (ident.ascii[0] == '$')
     406              {
     407                unescaped
     408                    = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
     409                if (unescaped)
     410                  print_str (rdm, &unescaped, 1);
     411                else
     412                  {
     413                    /* Unexpected escape sequence, print the rest verbatim. */
     414                    print_str (rdm, ident.ascii, ident.ascii_len);
     415                    return;
     416                  }
     417              }
     418            else if (ident.ascii[0] == '.')
     419              {
     420                if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
     421                  {
     422                    /* ".." becomes "::" */
     423                    PRINT ("::");
     424                    len = 2;
     425                  }
     426                else
     427                  {
     428                    PRINT (".");
     429                    len = 1;
     430                  }
     431              }
     432            else
     433              {
     434                /* Print everything before the next escape sequence, at once. */
     435                for (len = 0; len < ident.ascii_len; len++)
     436                  if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
     437                    break;
     438  
     439                print_str (rdm, ident.ascii, len);
     440              }
     441  
     442            ident.ascii += len;
     443            ident.ascii_len -= len;
     444          }
     445  
     446        return;
     447      }
     448  
     449    if (!ident.punycode)
     450      {
     451        print_str (rdm, ident.ascii, ident.ascii_len);
     452        return;
     453      }
     454  
     455    len = 0;
     456    cap = 4;
     457    while (cap < ident.ascii_len)
     458      {
     459        cap *= 2;
     460        /* Check for overflows. */
     461        if ((cap * 4) / 4 != cap)
     462          {
     463            rdm->errored = 1;
     464            return;
     465          }
     466      }
     467  
     468    /* Store the output codepoints as groups of 4 UTF-8 bytes. */
     469    out = (uint8_t *)malloc (cap * 4);
     470    if (!out)
     471      {
     472        rdm->errored = 1;
     473        return;
     474      }
     475  
     476    /* Populate initial output from ASCII fragment. */
     477    for (len = 0; len < ident.ascii_len; len++)
     478      {
     479        p = out + 4 * len;
     480        p[0] = 0;
     481        p[1] = 0;
     482        p[2] = 0;
     483        p[3] = ident.ascii[len];
     484      }
     485  
     486    /* Punycode parameters and initial state. */
     487    base = 36;
     488    t_min = 1;
     489    t_max = 26;
     490    skew = 38;
     491    damp = 700;
     492    bias = 72;
     493    i = 0;
     494    c = 0x80;
     495  
     496    punycode_pos = 0;
     497    while (punycode_pos < ident.punycode_len)
     498      {
     499        /* Read one delta value. */
     500        delta = 0;
     501        w = 1;
     502        k = 0;
     503        do
     504          {
     505            k += base;
     506            t = k < bias ? 0 : (k - bias);
     507            if (t < t_min)
     508              t = t_min;
     509            if (t > t_max)
     510              t = t_max;
     511  
     512            if (punycode_pos >= ident.punycode_len)
     513              goto cleanup;
     514            d = ident.punycode[punycode_pos++];
     515  
     516            if (ISLOWER (d))
     517              d = d - 'a';
     518            else if (ISDIGIT (d))
     519              d = 26 + (d - '0');
     520            else
     521              {
     522                rdm->errored = 1;
     523                goto cleanup;
     524              }
     525  
     526            delta += d * w;
     527            w *= base - t;
     528          }
     529        while (d >= t);
     530  
     531        /* Compute the new insert position and character. */
     532        len++;
     533        i += delta;
     534        c += i / len;
     535        i %= len;
     536  
     537        /* Ensure enough space is available. */
     538        if (cap < len)
     539          {
     540            cap *= 2;
     541            /* Check for overflows. */
     542            if ((cap * 4) / 4 != cap || cap < len)
     543              {
     544                rdm->errored = 1;
     545                goto cleanup;
     546              }
     547          }
     548        p = (uint8_t *)realloc (out, cap * 4);
     549        if (!p)
     550          {
     551            rdm->errored = 1;
     552            goto cleanup;
     553          }
     554        out = p;
     555  
     556        /* Move the characters after the insert position. */
     557        p = out + i * 4;
     558        memmove (p + 4, p, (len - i - 1) * 4);
     559  
     560        /* Insert the new character, as UTF-8 bytes. */
     561        p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
     562        p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
     563        p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
     564        p[3] = 0x80 | (c & 0x3f);
     565  
     566        /* If there are no more deltas, decoding is complete. */
     567        if (punycode_pos == ident.punycode_len)
     568          break;
     569  
     570        i++;
     571  
     572        /* Perform bias adaptation. */
     573        delta /= damp;
     574        damp = 2;
     575  
     576        delta += delta / len;
     577        k = 0;
     578        while (delta > ((base - t_min) * t_max) / 2)
     579          {
     580            delta /= base - t_min;
     581            k += base;
     582          }
     583        bias = k + ((base - t_min + 1) * delta) / (delta + skew);
     584      }
     585  
     586    /* Remove all the 0 bytes to leave behind an UTF-8 string. */
     587    for (i = 0, j = 0; i < len * 4; i++)
     588      if (out[i] != 0)
     589        out[j++] = out[i];
     590  
     591    print_str (rdm, (const char *)out, j);
     592  
     593  cleanup:
     594    free (out);
     595  }
     596  
     597  /* Print the lifetime according to the previously decoded index.
     598     An index of `0` always refers to `'_`, but starting with `1`,
     599     indices refer to late-bound lifetimes introduced by a binder. */
     600  static void
     601  print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
     602  {
     603    char c;
     604    uint64_t depth;
     605  
     606    PRINT ("'");
     607    if (lt == 0)
     608      {
     609        PRINT ("_");
     610        return;
     611      }
     612  
     613    depth = rdm->bound_lifetime_depth - lt;
     614    /* Try to print lifetimes alphabetically first. */
     615    if (depth < 26)
     616      {
     617        c = 'a' + depth;
     618        print_str (rdm, &c, 1);
     619      }
     620    else
     621      {
     622        /* Use `'_123` after running out of letters. */
     623        PRINT ("_");
     624        print_uint64 (rdm, depth);
     625      }
     626  }
     627  
     628  /* Demangling functions. */
     629  
     630  static void demangle_binder (struct rust_demangler *rdm);
     631  static void demangle_path (struct rust_demangler *rdm, int in_value);
     632  static void demangle_generic_arg (struct rust_demangler *rdm);
     633  static void demangle_type (struct rust_demangler *rdm);
     634  static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
     635  static void demangle_dyn_trait (struct rust_demangler *rdm);
     636  static void demangle_const (struct rust_demangler *rdm);
     637  static void demangle_const_uint (struct rust_demangler *rdm);
     638  static void demangle_const_int (struct rust_demangler *rdm);
     639  static void demangle_const_bool (struct rust_demangler *rdm);
     640  static void demangle_const_char (struct rust_demangler *rdm);
     641  
     642  /* Optionally enter a binder ('G') for late-bound lifetimes,
     643     printing e.g. `for<'a, 'b> `, and make those lifetimes visible
     644     to the caller (via depth level, which the caller should reset). */
     645  static void
     646  demangle_binder (struct rust_demangler *rdm)
     647  {
     648    uint64_t i, bound_lifetimes;
     649  
     650    if (rdm->errored)
     651      return;
     652  
     653    bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
     654    if (bound_lifetimes > 0)
     655      {
     656        PRINT ("for<");
     657        for (i = 0; i < bound_lifetimes; i++)
     658          {
     659            if (i > 0)
     660              PRINT (", ");
     661            rdm->bound_lifetime_depth++;
     662            print_lifetime_from_index (rdm, 1);
     663          }
     664        PRINT ("> ");
     665      }
     666  }
     667  
     668  static void
     669  demangle_path (struct rust_demangler *rdm, int in_value)
     670  {
     671    char tag, ns;
     672    int was_skipping_printing;
     673    size_t i, backref, old_next;
     674    uint64_t dis;
     675    struct rust_mangled_ident name;
     676  
     677    if (rdm->errored)
     678      return;
     679  
     680    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
     681      {
     682        ++ rdm->recursion;
     683        if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
     684  	/* FIXME: There ought to be a way to report
     685  	   that the recursion limit has been reached.  */
     686  	goto fail_return;
     687      }
     688  
     689    switch (tag = next (rdm))
     690      {
     691      case 'C':
     692        dis = parse_disambiguator (rdm);
     693        name = parse_ident (rdm);
     694  
     695        print_ident (rdm, name);
     696        if (rdm->verbose)
     697          {
     698            PRINT ("[");
     699            print_uint64_hex (rdm, dis);
     700            PRINT ("]");
     701          }
     702        break;
     703      case 'N':
     704        ns = next (rdm);
     705        if (!ISLOWER (ns) && !ISUPPER (ns))
     706  	goto fail_return;
     707  
     708        demangle_path (rdm, in_value);
     709  
     710        dis = parse_disambiguator (rdm);
     711        name = parse_ident (rdm);
     712  
     713        if (ISUPPER (ns))
     714          {
     715            /* Special namespaces, like closures and shims. */
     716            PRINT ("::{");
     717            switch (ns)
     718              {
     719              case 'C':
     720                PRINT ("closure");
     721                break;
     722              case 'S':
     723                PRINT ("shim");
     724                break;
     725              default:
     726                print_str (rdm, &ns, 1);
     727              }
     728            if (name.ascii || name.punycode)
     729              {
     730                PRINT (":");
     731                print_ident (rdm, name);
     732              }
     733            PRINT ("#");
     734            print_uint64 (rdm, dis);
     735            PRINT ("}");
     736          }
     737        else
     738          {
     739            /* Implementation-specific/unspecified namespaces. */
     740  
     741            if (name.ascii || name.punycode)
     742              {
     743                PRINT ("::");
     744                print_ident (rdm, name);
     745              }
     746          }
     747        break;
     748      case 'M':
     749      case 'X':
     750        /* Ignore the `impl`'s own path.*/
     751        parse_disambiguator (rdm);
     752        was_skipping_printing = rdm->skipping_printing;
     753        rdm->skipping_printing = 1;
     754        demangle_path (rdm, in_value);
     755        rdm->skipping_printing = was_skipping_printing;
     756        /* fallthrough */
     757      case 'Y':
     758        PRINT ("<");
     759        demangle_type (rdm);
     760        if (tag != 'M')
     761          {
     762            PRINT (" as ");
     763            demangle_path (rdm, 0);
     764          }
     765        PRINT (">");
     766        break;
     767      case 'I':
     768        demangle_path (rdm, in_value);
     769        if (in_value)
     770          PRINT ("::");
     771        PRINT ("<");
     772        for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
     773          {
     774            if (i > 0)
     775              PRINT (", ");
     776            demangle_generic_arg (rdm);
     777          }
     778        PRINT (">");
     779        break;
     780      case 'B':
     781        backref = parse_integer_62 (rdm);
     782        if (!rdm->skipping_printing)
     783          {
     784            old_next = rdm->next;
     785            rdm->next = backref;
     786            demangle_path (rdm, in_value);
     787            rdm->next = old_next;
     788          }
     789        break;
     790      default:
     791        goto fail_return;
     792      }
     793    goto pass_return;
     794  
     795   fail_return:
     796    rdm->errored = 1;
     797   pass_return:
     798    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
     799      -- rdm->recursion;
     800  }
     801  
     802  static void
     803  demangle_generic_arg (struct rust_demangler *rdm)
     804  {
     805    uint64_t lt;
     806    if (eat (rdm, 'L'))
     807      {
     808        lt = parse_integer_62 (rdm);
     809        print_lifetime_from_index (rdm, lt);
     810      }
     811    else if (eat (rdm, 'K'))
     812      demangle_const (rdm);
     813    else
     814      demangle_type (rdm);
     815  }
     816  
     817  static const char *
     818  basic_type (char tag)
     819  {
     820    switch (tag)
     821      {
     822      case 'b':
     823        return "bool";
     824      case 'c':
     825        return "char";
     826      case 'e':
     827        return "str";
     828      case 'u':
     829        return "()";
     830      case 'a':
     831        return "i8";
     832      case 's':
     833        return "i16";
     834      case 'l':
     835        return "i32";
     836      case 'x':
     837        return "i64";
     838      case 'n':
     839        return "i128";
     840      case 'i':
     841        return "isize";
     842      case 'h':
     843        return "u8";
     844      case 't':
     845        return "u16";
     846      case 'm':
     847        return "u32";
     848      case 'y':
     849        return "u64";
     850      case 'o':
     851        return "u128";
     852      case 'j':
     853        return "usize";
     854      case 'f':
     855        return "f32";
     856      case 'd':
     857        return "f64";
     858      case 'z':
     859        return "!";
     860      case 'p':
     861        return "_";
     862      case 'v':
     863        return "...";
     864  
     865      default:
     866        return NULL;
     867      }
     868  }
     869  
     870  static void
     871  demangle_type (struct rust_demangler *rdm)
     872  {
     873    char tag;
     874    size_t i, old_next, backref;
     875    uint64_t lt, old_bound_lifetime_depth;
     876    const char *basic;
     877    struct rust_mangled_ident abi;
     878  
     879    if (rdm->errored)
     880      return;
     881  
     882    tag = next (rdm);
     883  
     884    basic = basic_type (tag);
     885    if (basic)
     886      {
     887        PRINT (basic);
     888        return;
     889      }
     890  
     891     if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
     892      {
     893        ++ rdm->recursion;
     894        if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
     895  	/* FIXME: There ought to be a way to report
     896  	   that the recursion limit has been reached.  */
     897  	{
     898  	  rdm->errored = 1;
     899  	  -- rdm->recursion;
     900  	  return;
     901  	}
     902      }
     903  
     904    switch (tag)
     905      {
     906      case 'R':
     907      case 'Q':
     908        PRINT ("&");
     909        if (eat (rdm, 'L'))
     910          {
     911            lt = parse_integer_62 (rdm);
     912            if (lt)
     913              {
     914                print_lifetime_from_index (rdm, lt);
     915                PRINT (" ");
     916              }
     917          }
     918        if (tag != 'R')
     919          PRINT ("mut ");
     920        demangle_type (rdm);
     921        break;
     922      case 'P':
     923      case 'O':
     924        PRINT ("*");
     925        if (tag != 'P')
     926          PRINT ("mut ");
     927        else
     928          PRINT ("const ");
     929        demangle_type (rdm);
     930        break;
     931      case 'A':
     932      case 'S':
     933        PRINT ("[");
     934        demangle_type (rdm);
     935        if (tag == 'A')
     936          {
     937            PRINT ("; ");
     938            demangle_const (rdm);
     939          }
     940        PRINT ("]");
     941        break;
     942      case 'T':
     943        PRINT ("(");
     944        for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
     945          {
     946            if (i > 0)
     947              PRINT (", ");
     948            demangle_type (rdm);
     949          }
     950        if (i == 1)
     951          PRINT (",");
     952        PRINT (")");
     953        break;
     954      case 'F':
     955        old_bound_lifetime_depth = rdm->bound_lifetime_depth;
     956        demangle_binder (rdm);
     957  
     958        if (eat (rdm, 'U'))
     959          PRINT ("unsafe ");
     960  
     961        if (eat (rdm, 'K'))
     962          {
     963            if (eat (rdm, 'C'))
     964              {
     965                abi.ascii = "C";
     966                abi.ascii_len = 1;
     967              }
     968            else
     969              {
     970                abi = parse_ident (rdm);
     971                if (!abi.ascii || abi.punycode)
     972                  {
     973                    rdm->errored = 1;
     974                    goto restore;
     975                  }
     976              }
     977  
     978            PRINT ("extern \"");
     979  
     980            /* If the ABI had any `-`, they were replaced with `_`,
     981               so the parts between `_` have to be re-joined with `-`. */
     982            for (i = 0; i < abi.ascii_len; i++)
     983              {
     984                if (abi.ascii[i] == '_')
     985                  {
     986                    print_str (rdm, abi.ascii, i);
     987                    PRINT ("-");
     988                    abi.ascii += i + 1;
     989                    abi.ascii_len -= i + 1;
     990                    i = 0;
     991                  }
     992              }
     993            print_str (rdm, abi.ascii, abi.ascii_len);
     994  
     995            PRINT ("\" ");
     996          }
     997  
     998        PRINT ("fn(");
     999        for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
    1000          {
    1001            if (i > 0)
    1002              PRINT (", ");
    1003            demangle_type (rdm);
    1004          }
    1005        PRINT (")");
    1006  
    1007        if (eat (rdm, 'u'))
    1008          {
    1009            /* Skip printing the return type if it's 'u', i.e. `()`. */
    1010          }
    1011        else
    1012          {
    1013            PRINT (" -> ");
    1014            demangle_type (rdm);
    1015          }
    1016  
    1017      /* Restore `bound_lifetime_depth` to outside the binder. */
    1018      restore:
    1019        rdm->bound_lifetime_depth = old_bound_lifetime_depth;
    1020        break;
    1021      case 'D':
    1022        PRINT ("dyn ");
    1023  
    1024        old_bound_lifetime_depth = rdm->bound_lifetime_depth;
    1025        demangle_binder (rdm);
    1026  
    1027        for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
    1028          {
    1029            if (i > 0)
    1030              PRINT (" + ");
    1031            demangle_dyn_trait (rdm);
    1032          }
    1033  
    1034        /* Restore `bound_lifetime_depth` to outside the binder. */
    1035        rdm->bound_lifetime_depth = old_bound_lifetime_depth;
    1036  
    1037        if (!eat (rdm, 'L'))
    1038          {
    1039            rdm->errored = 1;
    1040            return;
    1041          }
    1042        lt = parse_integer_62 (rdm);
    1043        if (lt)
    1044          {
    1045            PRINT (" + ");
    1046            print_lifetime_from_index (rdm, lt);
    1047          }
    1048        break;
    1049      case 'B':
    1050        backref = parse_integer_62 (rdm);
    1051        if (!rdm->skipping_printing)
    1052          {
    1053            old_next = rdm->next;
    1054            rdm->next = backref;
    1055            demangle_type (rdm);
    1056            rdm->next = old_next;
    1057          }
    1058        break;
    1059      default:
    1060        /* Go back to the tag, so `demangle_path` also sees it. */
    1061        rdm->next--;
    1062        demangle_path (rdm, 0);
    1063      }
    1064  
    1065    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
    1066      -- rdm->recursion;
    1067  }
    1068  
    1069  /* A trait in a trait object may have some "existential projections"
    1070     (i.e. associated type bindings) after it, which should be printed
    1071     in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
    1072     To this end, this method will keep the `<...>` of an 'I' path
    1073     open, by omitting the `>`, and return `Ok(true)` in that case. */
    1074  static int
    1075  demangle_path_maybe_open_generics (struct rust_demangler *rdm)
    1076  {
    1077    int open;
    1078    size_t i, old_next, backref;
    1079  
    1080    open = 0;
    1081  
    1082    if (rdm->errored)
    1083      return open;
    1084  
    1085    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
    1086      {
    1087        ++ rdm->recursion;
    1088        if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
    1089  	{
    1090  	  /* FIXME: There ought to be a way to report
    1091  	     that the recursion limit has been reached.  */
    1092  	  rdm->errored = 1;
    1093  	  goto end_of_func;
    1094  	}
    1095      }
    1096  
    1097    if (eat (rdm, 'B'))
    1098      {
    1099        backref = parse_integer_62 (rdm);
    1100        if (!rdm->skipping_printing)
    1101          {
    1102            old_next = rdm->next;
    1103            rdm->next = backref;
    1104            open = demangle_path_maybe_open_generics (rdm);
    1105            rdm->next = old_next;
    1106          }
    1107      }
    1108    else if (eat (rdm, 'I'))
    1109      {
    1110        demangle_path (rdm, 0);
    1111        PRINT ("<");
    1112        open = 1;
    1113        for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
    1114          {
    1115            if (i > 0)
    1116              PRINT (", ");
    1117            demangle_generic_arg (rdm);
    1118          }
    1119      }
    1120    else
    1121      demangle_path (rdm, 0);
    1122  
    1123   end_of_func:
    1124    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
    1125      -- rdm->recursion;
    1126  
    1127    return open;
    1128  }
    1129  
    1130  static void
    1131  demangle_dyn_trait (struct rust_demangler *rdm)
    1132  {
    1133    int open;
    1134    struct rust_mangled_ident name;
    1135  
    1136    if (rdm->errored)
    1137      return;
    1138  
    1139    open = demangle_path_maybe_open_generics (rdm);
    1140  
    1141    while (eat (rdm, 'p'))
    1142      {
    1143        if (!open)
    1144          PRINT ("<");
    1145        else
    1146          PRINT (", ");
    1147        open = 1;
    1148  
    1149        name = parse_ident (rdm);
    1150        print_ident (rdm, name);
    1151        PRINT (" = ");
    1152        demangle_type (rdm);
    1153      }
    1154  
    1155    if (open)
    1156      PRINT (">");
    1157  }
    1158  
    1159  static void
    1160  demangle_const (struct rust_demangler *rdm)
    1161  {
    1162    char ty_tag;
    1163    size_t old_next, backref;
    1164  
    1165    if (rdm->errored)
    1166      return;
    1167  
    1168    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
    1169      {
    1170        ++ rdm->recursion;
    1171        if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
    1172  	/* FIXME: There ought to be a way to report
    1173  	   that the recursion limit has been reached.  */
    1174  	goto fail_return;
    1175      }
    1176  
    1177    if (eat (rdm, 'B'))
    1178      {
    1179        backref = parse_integer_62 (rdm);
    1180        if (!rdm->skipping_printing)
    1181          {
    1182            old_next = rdm->next;
    1183            rdm->next = backref;
    1184            demangle_const (rdm);
    1185            rdm->next = old_next;
    1186          }
    1187        goto pass_return;
    1188      }
    1189  
    1190    ty_tag = next (rdm);
    1191    switch (ty_tag)
    1192      {
    1193      /* Placeholder. */
    1194      case 'p':
    1195        PRINT ("_");
    1196        goto pass_return;
    1197  
    1198      /* Unsigned integer types. */
    1199      case 'h':
    1200      case 't':
    1201      case 'm':
    1202      case 'y':
    1203      case 'o':
    1204      case 'j':
    1205        demangle_const_uint (rdm);
    1206        break;
    1207  
    1208      /* Signed integer types. */
    1209      case 'a':
    1210      case 's':
    1211      case 'l':
    1212      case 'x':
    1213      case 'n':
    1214      case 'i':
    1215        demangle_const_int (rdm);
    1216        break;
    1217  
    1218      /* Boolean. */
    1219      case 'b':
    1220        demangle_const_bool (rdm);
    1221        break;
    1222  
    1223      /* Character. */
    1224      case 'c':
    1225        demangle_const_char (rdm);
    1226        break;
    1227  
    1228      default:
    1229        goto fail_return;
    1230      }
    1231  
    1232    if (!rdm->errored && rdm->verbose)
    1233      {
    1234        PRINT (": ");
    1235        PRINT (basic_type (ty_tag));
    1236      }
    1237    goto pass_return;
    1238  
    1239   fail_return:
    1240    rdm->errored = 1;
    1241   pass_return:
    1242    if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
    1243      -- rdm->recursion;
    1244  }
    1245  
    1246  static void
    1247  demangle_const_uint (struct rust_demangler *rdm)
    1248  {
    1249    size_t hex_len;
    1250    uint64_t value;
    1251  
    1252    if (rdm->errored)
    1253      return;
    1254  
    1255    hex_len = parse_hex_nibbles (rdm, &value);
    1256  
    1257    if (hex_len > 16)
    1258      {
    1259        /* Print anything that doesn't fit in `uint64_t` verbatim. */
    1260        PRINT ("0x");
    1261        print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
    1262      }
    1263    else if (hex_len > 0)
    1264      print_uint64 (rdm, value);
    1265    else
    1266      rdm->errored = 1;
    1267  }
    1268  
    1269  static void
    1270  demangle_const_int (struct rust_demangler *rdm)
    1271  {
    1272    if (eat (rdm, 'n'))
    1273      PRINT ("-");
    1274    demangle_const_uint (rdm);
    1275  }
    1276  
    1277  static void
    1278  demangle_const_bool (struct rust_demangler *rdm)
    1279  {
    1280    uint64_t value;
    1281  
    1282    if (parse_hex_nibbles (rdm, &value) != 1)
    1283      {
    1284        rdm->errored = 1;
    1285        return;
    1286      }
    1287  
    1288    if (value == 0)
    1289      PRINT ("false");
    1290    else if (value == 1)
    1291      PRINT ("true");
    1292    else
    1293      rdm->errored = 1;
    1294  }
    1295  
    1296  static void
    1297  demangle_const_char (struct rust_demangler *rdm)
    1298  {
    1299    size_t hex_len;
    1300    uint64_t value;
    1301  
    1302    hex_len = parse_hex_nibbles (rdm, &value);
    1303  
    1304    if (hex_len == 0 || hex_len > 8)
    1305      {
    1306        rdm->errored = 1;
    1307        return;
    1308      }
    1309  
    1310    /* Match Rust's character "debug" output as best as we can. */
    1311    PRINT ("'");
    1312    if (value == '\t')
    1313      PRINT ("\\t");
    1314    else if (value == '\r')
    1315      PRINT ("\\r");
    1316    else if (value == '\n')
    1317      PRINT ("\\n");
    1318    else if (value > ' ' && value < '~')
    1319      {
    1320        /* Rust also considers many non-ASCII codepoints to be printable, but
    1321  	 that logic is not easily ported to C. */
    1322        char c = value;
    1323        print_str (rdm, &c, 1);
    1324      }
    1325    else
    1326      {
    1327        PRINT ("\\u{");
    1328        print_uint64_hex (rdm, value);
    1329        PRINT ("}");
    1330      }
    1331    PRINT ("'");
    1332  }
    1333  
    1334  /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
    1335     The hex digits must contain at least 5 distinct digits. */
    1336  static int
    1337  is_legacy_prefixed_hash (struct rust_mangled_ident ident)
    1338  {
    1339    uint16_t seen;
    1340    int nibble;
    1341    size_t i, count;
    1342  
    1343    if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
    1344      return 0;
    1345  
    1346    seen = 0;
    1347    for (i = 0; i < 16; i++)
    1348      {
    1349        nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
    1350        if (nibble < 0)
    1351          return 0;
    1352        seen |= (uint16_t)1 << nibble;
    1353      }
    1354  
    1355    /* Count how many distinct digits were seen. */
    1356    count = 0;
    1357    while (seen)
    1358      {
    1359        if (seen & 1)
    1360          count++;
    1361        seen >>= 1;
    1362      }
    1363  
    1364    return count >= 5;
    1365  }
    1366  
    1367  int
    1368  rust_demangle_callback (const char *mangled, int options,
    1369                          demangle_callbackref callback, void *opaque)
    1370  {
    1371    const char *p;
    1372    struct rust_demangler rdm;
    1373    struct rust_mangled_ident ident;
    1374  
    1375    rdm.sym = mangled;
    1376    rdm.sym_len = 0;
    1377  
    1378    rdm.callback_opaque = opaque;
    1379    rdm.callback = callback;
    1380  
    1381    rdm.next = 0;
    1382    rdm.errored = 0;
    1383    rdm.skipping_printing = 0;
    1384    rdm.verbose = (options & DMGL_VERBOSE) != 0;
    1385    rdm.version = 0;
    1386    rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0;
    1387    rdm.bound_lifetime_depth = 0;
    1388  
    1389    /* Rust symbols always start with _R (v0) or _ZN (legacy). */
    1390    if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
    1391      rdm.sym += 2;
    1392    else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
    1393      {
    1394        rdm.sym += 3;
    1395        rdm.version = -1;
    1396      }
    1397    else
    1398      return 0;
    1399  
    1400    /* Paths (v0) always start with uppercase characters. */
    1401    if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
    1402      return 0;
    1403  
    1404    /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
    1405    for (p = rdm.sym; *p; p++)
    1406      {
    1407        /* Rust v0 symbols can have '.' suffixes, ignore those.  */
    1408        if (rdm.version == 0 && *p == '.')
    1409          break;
    1410  
    1411        rdm.sym_len++;
    1412  
    1413        if (*p == '_' || ISALNUM (*p))
    1414          continue;
    1415  
    1416        /* Legacy Rust symbols can also contain [.:$] characters.
    1417           Or @ in the .suffix (which will be skipped, see below). */
    1418        if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
    1419                                  || *p == '@'))
    1420          continue;
    1421  
    1422        return 0;
    1423      }
    1424  
    1425    /* Legacy Rust symbols need to be handled separately. */
    1426    if (rdm.version == -1)
    1427      {
    1428        /* Legacy Rust symbols always end with E.  But can be followed by a
    1429           .suffix (which we want to ignore).  */
    1430        int dot_suffix = 1;
    1431        while (rdm.sym_len > 0 &&
    1432               !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
    1433          {
    1434            dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
    1435            rdm.sym_len--;
    1436          }
    1437  
    1438        if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
    1439          return 0;
    1440        rdm.sym_len--;
    1441  
    1442        /* Legacy Rust symbols also always end with a path segment
    1443           that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
    1444           This early check, before any parse_ident calls, should
    1445           quickly filter out most C++ symbols unrelated to Rust. */
    1446        if (!(rdm.sym_len > 19
    1447              && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
    1448          return 0;
    1449  
    1450        do
    1451          {
    1452            ident = parse_ident (&rdm);
    1453            if (rdm.errored || !ident.ascii)
    1454              return 0;
    1455          }
    1456        while (rdm.next < rdm.sym_len);
    1457  
    1458        /* The last path segment should be the hash. */
    1459        if (!is_legacy_prefixed_hash (ident))
    1460          return 0;
    1461  
    1462        /* Reset the state for a second pass, to print the symbol. */
    1463        rdm.next = 0;
    1464        if (!rdm.verbose && rdm.sym_len > 19)
    1465          {
    1466            /* Hide the last segment, containing the hash, if not verbose. */
    1467            rdm.sym_len -= 19;
    1468          }
    1469  
    1470        do
    1471          {
    1472            if (rdm.next > 0)
    1473              print_str (&rdm, "::", 2);
    1474  
    1475            ident = parse_ident (&rdm);
    1476            print_ident (&rdm, ident);
    1477          }
    1478        while (rdm.next < rdm.sym_len);
    1479      }
    1480    else
    1481      {
    1482        demangle_path (&rdm, 1);
    1483  
    1484        /* Skip instantiating crate. */
    1485        if (!rdm.errored && rdm.next < rdm.sym_len)
    1486          {
    1487            rdm.skipping_printing = 1;
    1488            demangle_path (&rdm, 0);
    1489          }
    1490  
    1491        /* It's an error to not reach the end. */
    1492        rdm.errored |= rdm.next != rdm.sym_len;
    1493      }
    1494  
    1495    return !rdm.errored;
    1496  }
    1497  
    1498  /* Growable string buffers. */
    1499  struct str_buf
    1500  {
    1501    char *ptr;
    1502    size_t len;
    1503    size_t cap;
    1504    int errored;
    1505  };
    1506  
    1507  static void
    1508  str_buf_reserve (struct str_buf *buf, size_t extra)
    1509  {
    1510    size_t available, min_new_cap, new_cap;
    1511    char *new_ptr;
    1512  
    1513    /* Allocation failed before. */
    1514    if (buf->errored)
    1515      return;
    1516  
    1517    available = buf->cap - buf->len;
    1518  
    1519    if (extra <= available)
    1520      return;
    1521  
    1522    min_new_cap = buf->cap + (extra - available);
    1523  
    1524    /* Check for overflows. */
    1525    if (min_new_cap < buf->cap)
    1526      {
    1527        buf->errored = 1;
    1528        return;
    1529      }
    1530  
    1531    new_cap = buf->cap;
    1532  
    1533    if (new_cap == 0)
    1534      new_cap = 4;
    1535  
    1536    /* Double capacity until sufficiently large. */
    1537    while (new_cap < min_new_cap)
    1538      {
    1539        new_cap *= 2;
    1540  
    1541        /* Check for overflows. */
    1542        if (new_cap < buf->cap)
    1543          {
    1544            buf->errored = 1;
    1545            return;
    1546          }
    1547      }
    1548  
    1549    new_ptr = (char *)realloc (buf->ptr, new_cap);
    1550    if (new_ptr == NULL)
    1551      {
    1552        free (buf->ptr);
    1553        buf->ptr = NULL;
    1554        buf->len = 0;
    1555        buf->cap = 0;
    1556        buf->errored = 1;
    1557      }
    1558    else
    1559      {
    1560        buf->ptr = new_ptr;
    1561        buf->cap = new_cap;
    1562      }
    1563  }
    1564  
    1565  static void
    1566  str_buf_append (struct str_buf *buf, const char *data, size_t len)
    1567  {
    1568    str_buf_reserve (buf, len);
    1569    if (buf->errored)
    1570      return;
    1571  
    1572    memcpy (buf->ptr + buf->len, data, len);
    1573    buf->len += len;
    1574  }
    1575  
    1576  static void
    1577  str_buf_demangle_callback (const char *data, size_t len, void *opaque)
    1578  {
    1579    str_buf_append ((struct str_buf *)opaque, data, len);
    1580  }
    1581  
    1582  char *
    1583  rust_demangle (const char *mangled, int options)
    1584  {
    1585    struct str_buf out;
    1586    int success;
    1587  
    1588    out.ptr = NULL;
    1589    out.len = 0;
    1590    out.cap = 0;
    1591    out.errored = 0;
    1592  
    1593    success = rust_demangle_callback (mangled, options,
    1594                                      str_buf_demangle_callback, &out);
    1595  
    1596    if (!success)
    1597      {
    1598        free (out.ptr);
    1599        return NULL;
    1600      }
    1601  
    1602    str_buf_append (&out, "\0", 1);
    1603    return out.ptr;
    1604  }