1  /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */
       2  
       3  /* GLIB - Library of useful routines for C programming
       4   * Copyright (C) 2008 Red Hat, Inc.
       5   *
       6   * SPDX-License-Identifier: LGPL-2.1-or-later
       7   *
       8   * This library is free software; you can redistribute it and/or
       9   * modify it under the terms of the GNU Lesser General Public
      10   * License as published by the Free Software Foundation; either
      11   * version 2.1 of the License, or (at your option) any later version.
      12   *
      13   * This library is distributed in the hope that it will be useful,
      14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16   * Lesser General Public License for more details.
      17   *
      18   * You should have received a copy of the GNU Lesser General
      19   * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
      20   */
      21  
      22  #include "config.h"
      23  #include "glibconfig.h"
      24  
      25  #include <string.h>
      26  
      27  #ifdef G_OS_UNIX
      28  #include <unistd.h>
      29  #endif
      30  
      31  #include "ghostutils.h"
      32  
      33  #include "garray.h"
      34  #include "gmem.h"
      35  #include "gstring.h"
      36  #include "gstrfuncs.h"
      37  #include "glibintl.h"
      38  
      39  #ifdef G_PLATFORM_WIN32
      40  #include <windows.h>
      41  #endif
      42  
      43  
      44  #define IDNA_ACE_PREFIX     "xn--"
      45  #define IDNA_ACE_PREFIX_LEN 4
      46  
      47  /* Punycode constants, from RFC 3492. */
      48  
      49  #define PUNYCODE_BASE          36
      50  #define PUNYCODE_TMIN           1
      51  #define PUNYCODE_TMAX          26
      52  #define PUNYCODE_SKEW          38
      53  #define PUNYCODE_DAMP         700
      54  #define PUNYCODE_INITIAL_BIAS  72
      55  #define PUNYCODE_INITIAL_N   0x80
      56  
      57  #define PUNYCODE_IS_BASIC(cp) ((guint)(cp) < 0x80)
      58  
      59  /* Encode/decode a single base-36 digit */
      60  static inline gchar
      61  encode_digit (guint dig)
      62  {
      63    if (dig < 26)
      64      return dig + 'a';
      65    else
      66      return dig - 26 + '0';
      67  }
      68  
      69  static inline guint
      70  decode_digit (gchar dig)
      71  {
      72    if (dig >= 'A' && dig <= 'Z')
      73      return dig - 'A';
      74    else if (dig >= 'a' && dig <= 'z')
      75      return dig - 'a';
      76    else if (dig >= '0' && dig <= '9')
      77      return dig - '0' + 26;
      78    else
      79      return G_MAXUINT;
      80  }
      81  
      82  /* Punycode bias adaptation algorithm, RFC 3492 section 6.1 */
      83  static guint
      84  adapt (guint    delta,
      85         guint    numpoints,
      86         gboolean firsttime)
      87  {
      88    guint k;
      89  
      90    delta = firsttime ? delta / PUNYCODE_DAMP : delta / 2;
      91    delta += delta / numpoints;
      92  
      93    k = 0;
      94    while (delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2)
      95      {
      96        delta /= PUNYCODE_BASE - PUNYCODE_TMIN;
      97        k += PUNYCODE_BASE;
      98      }
      99  
     100    return k + ((PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta /
     101  	      (delta + PUNYCODE_SKEW));
     102  }
     103  
     104  /* Punycode encoder, RFC 3492 section 6.3. The algorithm is
     105   * sufficiently bizarre that it's not really worth trying to explain
     106   * here.
     107   */
     108  static gboolean
     109  punycode_encode (const gchar *input_utf8,
     110                   gsize        input_utf8_length,
     111  		 GString     *output)
     112  {
     113    guint delta, handled_chars, num_basic_chars, bias, j, q, k, t, digit;
     114    gunichar n, m, *input;
     115    glong written_chars;
     116    gsize input_length;
     117    gboolean success = FALSE;
     118  
     119    /* Convert from UTF-8 to Unicode code points */
     120    input = g_utf8_to_ucs4 (input_utf8, input_utf8_length, NULL,
     121  			  &written_chars, NULL);
     122    if (!input)
     123      return FALSE;
     124  
     125    input_length = (gsize) (written_chars > 0 ? written_chars : 0);
     126  
     127    /* Copy basic chars */
     128    for (j = num_basic_chars = 0; j < input_length; j++)
     129      {
     130        if (PUNYCODE_IS_BASIC (input[j]))
     131  	{
     132  	  g_string_append_c (output, g_ascii_tolower (input[j]));
     133  	  num_basic_chars++;
     134  	}
     135      }
     136    if (num_basic_chars)
     137      g_string_append_c (output, '-');
     138  
     139    handled_chars = num_basic_chars;
     140  
     141    /* Encode non-basic chars */
     142    delta = 0;
     143    bias = PUNYCODE_INITIAL_BIAS;
     144    n = PUNYCODE_INITIAL_N;
     145    while (handled_chars < input_length)
     146      {
     147        /* let m = the minimum {non-basic} code point >= n in the input */
     148        for (m = G_MAXUINT, j = 0; j < input_length; j++)
     149  	{
     150  	  if (input[j] >= n && input[j] < m)
     151  	    m = input[j];
     152  	}
     153  
     154        if (m - n > (G_MAXUINT - delta) / (handled_chars + 1))
     155  	goto fail;
     156        delta += (m - n) * (handled_chars + 1);
     157        n = m;
     158  
     159        for (j = 0; j < input_length; j++)
     160  	{
     161  	  if (input[j] < n)
     162  	    {
     163  	      if (++delta == 0)
     164  		goto fail;
     165  	    }
     166  	  else if (input[j] == n)
     167  	    {
     168  	      q = delta;
     169  	      for (k = PUNYCODE_BASE; ; k += PUNYCODE_BASE)
     170  		{
     171  		  if (k <= bias)
     172  		    t = PUNYCODE_TMIN;
     173  		  else if (k >= bias + PUNYCODE_TMAX)
     174  		    t = PUNYCODE_TMAX;
     175  		  else
     176  		    t = k - bias;
     177  		  if (q < t)
     178  		    break;
     179  		  digit = t + (q - t) % (PUNYCODE_BASE - t);
     180  		  g_string_append_c (output, encode_digit (digit));
     181  		  q = (q - t) / (PUNYCODE_BASE - t);
     182  		}
     183  
     184  	      g_string_append_c (output, encode_digit (q));
     185  	      bias = adapt (delta, handled_chars + 1, handled_chars == num_basic_chars);
     186  	      delta = 0;
     187  	      handled_chars++;
     188  	    }
     189  	}
     190  
     191        delta++;
     192        n++;
     193      }
     194  
     195    success = TRUE;
     196  
     197   fail:
     198    g_free (input);
     199    return success;
     200  }
     201  
     202  /* From RFC 3454, Table B.1 */
     203  #define idna_is_junk(ch) ((ch) == 0x00AD || (ch) == 0x1806 || (ch) == 0x200B || (ch) == 0x2060 || (ch) == 0xFEFF || (ch) == 0x034F || (ch) == 0x180B || (ch) == 0x180C || (ch) == 0x180D || (ch) == 0x200C || (ch) == 0x200D || ((ch) >= 0xFE00 && (ch) <= 0xFE0F))
     204  
     205  /* Scan @str for "junk" and return a cleaned-up string if any junk
     206   * is found. Else return %NULL.
     207   */
     208  static gchar *
     209  remove_junk (const gchar *str,
     210               gint         len)
     211  {
     212    GString *cleaned = NULL;
     213    const gchar *p;
     214    gunichar ch;
     215  
     216    for (p = str; len == -1 ? *p : p < str + len; p = g_utf8_next_char (p))
     217      {
     218        ch = g_utf8_get_char (p);
     219        if (idna_is_junk (ch))
     220  	{
     221  	  if (!cleaned)
     222  	    {
     223  	      cleaned = g_string_new (NULL);
     224  	      g_string_append_len (cleaned, str, p - str);
     225  	    }
     226  	}
     227        else if (cleaned)
     228  	g_string_append_unichar (cleaned, ch);
     229      }
     230  
     231    if (cleaned)
     232      return g_string_free (cleaned, FALSE);
     233    else
     234      return NULL;
     235  }
     236  
     237  static inline gboolean
     238  contains_uppercase_letters (const gchar *str,
     239                              gint         len)
     240  {
     241    const gchar *p;
     242  
     243    for (p = str; len == -1 ? *p : p < str + len; p = g_utf8_next_char (p))
     244      {
     245        if (g_unichar_isupper (g_utf8_get_char (p)))
     246  	return TRUE;
     247      }
     248    return FALSE;
     249  }
     250  
     251  static inline gboolean
     252  contains_non_ascii (const gchar *str,
     253                      gint         len)
     254  {
     255    const gchar *p;
     256  
     257    for (p = str; len == -1 ? *p : p < str + len; p++)
     258      {
     259        if ((guchar)*p > 0x80)
     260  	return TRUE;
     261      }
     262    return FALSE;
     263  }
     264  
     265  /* RFC 3454, Appendix C. ish. */
     266  static inline gboolean
     267  idna_is_prohibited (gunichar ch)
     268  {
     269    switch (g_unichar_type (ch))
     270      {
     271      case G_UNICODE_CONTROL:
     272      case G_UNICODE_FORMAT:
     273      case G_UNICODE_UNASSIGNED:
     274      case G_UNICODE_PRIVATE_USE:
     275      case G_UNICODE_SURROGATE:
     276      case G_UNICODE_LINE_SEPARATOR:
     277      case G_UNICODE_PARAGRAPH_SEPARATOR:
     278      case G_UNICODE_SPACE_SEPARATOR:
     279        return TRUE;
     280  
     281      case G_UNICODE_OTHER_SYMBOL:
     282        if (ch == 0xFFFC || ch == 0xFFFD ||
     283  	  (ch >= 0x2FF0 && ch <= 0x2FFB))
     284  	return TRUE;
     285        return FALSE;
     286  
     287      case G_UNICODE_NON_SPACING_MARK:
     288        if (ch == 0x0340 || ch == 0x0341)
     289  	return TRUE;
     290        return FALSE;
     291  
     292      default:
     293        return FALSE;
     294      }
     295  }
     296  
     297  /* RFC 3491 IDN cleanup algorithm. */
     298  static gchar *
     299  nameprep (const gchar *hostname,
     300            gint         len,
     301            gboolean    *is_unicode)
     302  {
     303    gchar *name, *tmp = NULL, *p;
     304  
     305    /* It would be nice if we could do this without repeatedly
     306     * allocating strings and converting back and forth between
     307     * gunichars and UTF-8... The code does at least avoid doing most of
     308     * the sub-operations when they would just be equivalent to a
     309     * g_strdup().
     310     */
     311  
     312    /* Remove presentation-only characters */
     313    name = remove_junk (hostname, len);
     314    if (name)
     315      {
     316        tmp = name;
     317        len = -1;
     318      }
     319    else
     320      name = (gchar *)hostname;
     321  
     322    /* Convert to lowercase */
     323    if (contains_uppercase_letters (name, len))
     324      {
     325        name = g_utf8_strdown (name, len);
     326        g_free (tmp);
     327        tmp = name;
     328        len = -1;
     329      }
     330  
     331    /* If there are no UTF8 characters, we're done. */
     332    if (!contains_non_ascii (name, len))
     333      {
     334        *is_unicode = FALSE;
     335        if (name == (gchar *)hostname)
     336          return len == -1 ? g_strdup (hostname) : g_strndup (hostname, len);
     337        else
     338          return name;
     339      }
     340  
     341    *is_unicode = TRUE;
     342  
     343    /* Normalize */
     344    name = g_utf8_normalize (name, len, G_NORMALIZE_NFKC);
     345    g_free (tmp);
     346    tmp = name;
     347  
     348    if (!name)
     349      return NULL;
     350  
     351    /* KC normalization may have created more capital letters (eg,
     352     * angstrom -> capital A with ring). So we have to lowercasify a
     353     * second time. (This is more-or-less how the nameprep algorithm
     354     * does it. If tolower(nfkc(tolower(X))) is guaranteed to be the
     355     * same as tolower(nfkc(X)), then we could skip the first tolower,
     356     * but I'm not sure it is.)
     357     */
     358    if (contains_uppercase_letters (name, -1))
     359      {
     360        name = g_utf8_strdown (name, -1);
     361        g_free (tmp);
     362        tmp = name;
     363      }
     364  
     365    /* Check for prohibited characters */
     366    for (p = name; *p; p = g_utf8_next_char (p))
     367      {
     368        if (idna_is_prohibited (g_utf8_get_char (p)))
     369  	{
     370  	  name = NULL;
     371            g_free (tmp);
     372  	  goto done;
     373  	}
     374      }
     375  
     376    /* FIXME: We're supposed to verify certain constraints on bidi
     377     * characters, but glib does not appear to have that information.
     378     */
     379  
     380   done:
     381    return name;
     382  }
     383  
     384  /* RFC 3490, section 3.1 says '.', 0x3002, 0xFF0E, and 0xFF61 count as
     385   * label-separating dots. @str must be '\0'-terminated.
     386   */
     387  #define idna_is_dot(str) ( \
     388    ((guchar)(str)[0] == '.') ||                                                 \
     389    ((guchar)(str)[0] == 0xE3 && (guchar)(str)[1] == 0x80 && (guchar)(str)[2] == 0x82) || \
     390    ((guchar)(str)[0] == 0xEF && (guchar)(str)[1] == 0xBC && (guchar)(str)[2] == 0x8E) || \
     391    ((guchar)(str)[0] == 0xEF && (guchar)(str)[1] == 0xBD && (guchar)(str)[2] == 0xA1) )
     392  
     393  static const gchar *
     394  idna_end_of_label (const gchar *str)
     395  {
     396    for (; *str; str = g_utf8_next_char (str))
     397      {
     398        if (idna_is_dot (str))
     399          return str;
     400      }
     401    return str;
     402  }
     403  
     404  static gsize
     405  get_hostname_max_length_bytes (void)
     406  {
     407  #if defined(G_OS_WIN32)
     408    wchar_t tmp[MAX_COMPUTERNAME_LENGTH];
     409    return sizeof (tmp) / sizeof (tmp[0]);
     410  #elif defined(_SC_HOST_NAME_MAX)
     411    glong max = sysconf (_SC_HOST_NAME_MAX);
     412    if (max > 0)
     413      return (gsize) max;
     414  
     415  #ifdef HOST_NAME_MAX
     416    return HOST_NAME_MAX;
     417  #else
     418    return _POSIX_HOST_NAME_MAX;
     419  #endif /* HOST_NAME_MAX */
     420  #else
     421    /* Fallback to some reasonable value
     422     * See https://stackoverflow.com/questions/8724954/what-is-the-maximum-number-of-characters-for-a-host-name-in-unix/28918017#28918017 */
     423    return 255;
     424  #endif
     425  }
     426  
     427  /* Returns %TRUE if `strlen (str) > comparison_length`, but without actually
     428   * running `strlen(str)`, as that would take a very long time for long
     429   * (untrusted) input strings. */
     430  static gboolean
     431  strlen_greater_than (const gchar *str,
     432                       gsize        comparison_length)
     433  {
     434    gsize i;
     435  
     436    for (i = 0; str[i] != '\0'; i++)
     437      if (i > comparison_length)
     438        return TRUE;
     439  
     440    return FALSE;
     441  }
     442  
     443  /**
     444   * g_hostname_to_ascii:
     445   * @hostname: a valid UTF-8 or ASCII hostname
     446   *
     447   * Converts @hostname to its canonical ASCII form; an ASCII-only
     448   * string containing no uppercase letters and not ending with a
     449   * trailing dot.
     450   *
     451   * Returns: (nullable) (transfer full): an ASCII hostname, which must be freed,
     452   *    or %NULL if @hostname is in some way invalid.
     453   *
     454   * Since: 2.22
     455   **/
     456  gchar *
     457  g_hostname_to_ascii (const gchar *hostname)
     458  {
     459    gchar *name, *label, *p;
     460    GString *out;
     461    gssize llen, oldlen;
     462    gboolean unicode;
     463    gsize hostname_max_length_bytes = get_hostname_max_length_bytes ();
     464  
     465    /* Do an initial check on the hostname length, as overlong hostnames take a
     466     * long time in the IDN cleanup algorithm in nameprep(). The ultimate
     467     * restriction is that the IDN-decoded (i.e. pure ASCII) hostname cannot be
     468     * longer than 255 bytes. That’s the least restrictive limit on hostname
     469     * length of all the ways hostnames can be interpreted. Typically, the
     470     * hostname will be an FQDN, which is limited to 253 bytes long. POSIX
     471     * hostnames are limited to `get_hostname_max_length_bytes()` (typically 255
     472     * bytes).
     473     *
     474     * See https://stackoverflow.com/a/28918017/2931197
     475     *
     476     * It’s possible for a hostname to be %-encoded, in which case its decoded
     477     * length will be as much as 3× shorter.
     478     *
     479     * It’s also possible for a hostname to use overlong UTF-8 encodings, in which
     480     * case its decoded length will be as much as 4× shorter.
     481     *
     482     * Note: This check is not intended as an absolute guarantee that a hostname
     483     * is the right length and will be accepted by other systems. It’s intended to
     484     * stop wildly-invalid hostnames from taking forever in nameprep().
     485     */
     486    if (hostname_max_length_bytes <= G_MAXSIZE / 4 &&
     487        strlen_greater_than (hostname, 4 * MAX (255, hostname_max_length_bytes)))
     488      return NULL;
     489  
     490    label = name = nameprep (hostname, -1, &unicode);
     491    if (!name || !unicode)
     492      return name;
     493  
     494    out = g_string_new (NULL);
     495  
     496    do
     497      {
     498        unicode = FALSE;
     499        for (p = label; *p && !idna_is_dot (p); p++)
     500  	{
     501  	  if ((guchar)*p > 0x80)
     502  	    unicode = TRUE;
     503  	}
     504  
     505        oldlen = out->len;
     506        llen = p - label;
     507        if (unicode)
     508  	{
     509            if (!strncmp (label, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
     510              goto fail;
     511  
     512  	  g_string_append (out, IDNA_ACE_PREFIX);
     513  	  if (!punycode_encode (label, llen, out))
     514  	    goto fail;
     515  	}
     516        else
     517          g_string_append_len (out, label, llen);
     518  
     519        if (out->len - oldlen > 63)
     520  	goto fail;
     521  
     522        label += llen;
     523        if (*label)
     524          label = g_utf8_next_char (label);
     525        if (*label)
     526          g_string_append_c (out, '.');
     527      }
     528    while (*label);
     529  
     530    g_free (name);
     531    return g_string_free (out, FALSE);
     532  
     533   fail:
     534    g_free (name);
     535    g_string_free (out, TRUE);
     536    return NULL;
     537  }
     538  
     539  /**
     540   * g_hostname_is_non_ascii:
     541   * @hostname: a hostname
     542   *
     543   * Tests if @hostname contains Unicode characters. If this returns
     544   * %TRUE, you need to encode the hostname with g_hostname_to_ascii()
     545   * before using it in non-IDN-aware contexts.
     546   *
     547   * Note that a hostname might contain a mix of encoded and unencoded
     548   * segments, and so it is possible for g_hostname_is_non_ascii() and
     549   * g_hostname_is_ascii_encoded() to both return %TRUE for a name.
     550   *
     551   * Returns: %TRUE if @hostname contains any non-ASCII characters
     552   *
     553   * Since: 2.22
     554   **/
     555  gboolean
     556  g_hostname_is_non_ascii (const gchar *hostname)
     557  {
     558    return contains_non_ascii (hostname, -1);
     559  }
     560  
     561  /* Punycode decoder, RFC 3492 section 6.2. As with punycode_encode(),
     562   * read the RFC if you want to understand what this is actually doing.
     563   */
     564  static gboolean
     565  punycode_decode (const gchar *input,
     566                   gsize        input_length,
     567                   GString     *output)
     568  {
     569    GArray *output_chars;
     570    gunichar n;
     571    guint i, bias;
     572    guint oldi, w, k, digit, t;
     573    const gchar *split;
     574  
     575    n = PUNYCODE_INITIAL_N;
     576    i = 0;
     577    bias = PUNYCODE_INITIAL_BIAS;
     578  
     579    split = input + input_length - 1;
     580    while (split > input && *split != '-')
     581      split--;
     582    if (split > input)
     583      {
     584        output_chars = g_array_sized_new (FALSE, FALSE, sizeof (gunichar),
     585  					split - input);
     586        input_length -= (split - input) + 1;
     587        while (input < split)
     588  	{
     589  	  gunichar ch = (gunichar)*input++;
     590  	  if (!PUNYCODE_IS_BASIC (ch))
     591  	    goto fail;
     592  	  g_array_append_val (output_chars, ch);
     593  	}
     594        input++;
     595      }
     596    else
     597      output_chars = g_array_new (FALSE, FALSE, sizeof (gunichar));
     598  
     599    while (input_length)
     600      {
     601        oldi = i;
     602        w = 1;
     603        for (k = PUNYCODE_BASE; ; k += PUNYCODE_BASE)
     604  	{
     605  	  if (!input_length--)
     606  	    goto fail;
     607  	  digit = decode_digit (*input++);
     608  	  if (digit >= PUNYCODE_BASE)
     609  	    goto fail;
     610  	  if (digit > (G_MAXUINT - i) / w)
     611  	    goto fail;
     612  	  i += digit * w;
     613  	  if (k <= bias)
     614  	    t = PUNYCODE_TMIN;
     615  	  else if (k >= bias + PUNYCODE_TMAX)
     616  	    t = PUNYCODE_TMAX;
     617  	  else
     618  	    t = k - bias;
     619  	  if (digit < t)
     620  	    break;
     621  	  if (w > G_MAXUINT / (PUNYCODE_BASE - t))
     622  	    goto fail;
     623  	  w *= (PUNYCODE_BASE - t);
     624  	}
     625  
     626        bias = adapt (i - oldi, output_chars->len + 1, oldi == 0);
     627  
     628        if (i / (output_chars->len + 1) > G_MAXUINT - n)
     629  	goto fail;
     630        n += i / (output_chars->len + 1);
     631        i %= (output_chars->len + 1);
     632  
     633        g_array_insert_val (output_chars, i++, n);
     634      }
     635  
     636    for (i = 0; i < output_chars->len; i++)
     637      g_string_append_unichar (output, g_array_index (output_chars, gunichar, i));
     638    g_array_free (output_chars, TRUE);
     639    return TRUE;
     640  
     641   fail:
     642    g_array_free (output_chars, TRUE);
     643    return FALSE;
     644  }
     645  
     646  /**
     647   * g_hostname_to_unicode:
     648   * @hostname: a valid UTF-8 or ASCII hostname
     649   *
     650   * Converts @hostname to its canonical presentation form; a UTF-8
     651   * string in Unicode normalization form C, containing no uppercase
     652   * letters, no forbidden characters, and no ASCII-encoded segments,
     653   * and not ending with a trailing dot.
     654   *
     655   * Of course if @hostname is not an internationalized hostname, then
     656   * the canonical presentation form will be entirely ASCII.
     657   *
     658   * Returns: (nullable) (transfer full): a UTF-8 hostname, which must be freed,
     659   *    or %NULL if @hostname is in some way invalid.
     660   *
     661   * Since: 2.22
     662   **/
     663  gchar *
     664  g_hostname_to_unicode (const gchar *hostname)
     665  {
     666    GString *out;
     667    gssize llen;
     668    gsize hostname_max_length_bytes = get_hostname_max_length_bytes ();
     669  
     670    /* See the comment at the top of g_hostname_to_ascii(). */
     671    if (hostname_max_length_bytes <= G_MAXSIZE / 4 &&
     672        strlen_greater_than (hostname, 4 * MAX (255, hostname_max_length_bytes)))
     673      return NULL;
     674  
     675    out = g_string_new (NULL);
     676  
     677    do
     678      {
     679        llen = idna_end_of_label (hostname) - hostname;
     680        if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
     681  	{
     682  	  hostname += IDNA_ACE_PREFIX_LEN;
     683  	  llen -= IDNA_ACE_PREFIX_LEN;
     684  	  if (!punycode_decode (hostname, llen, out))
     685  	    {
     686  	      g_string_free (out, TRUE);
     687  	      return NULL;
     688  	    }
     689  	}
     690        else
     691          {
     692            gboolean unicode;
     693            gchar *canonicalized = nameprep (hostname, llen, &unicode);
     694  
     695            if (!canonicalized)
     696              {
     697                g_string_free (out, TRUE);
     698                return NULL;
     699              }
     700            g_string_append (out, canonicalized);
     701            g_free (canonicalized);
     702          }
     703  
     704        hostname += llen;
     705        if (*hostname)
     706          hostname = g_utf8_next_char (hostname);
     707        if (*hostname)
     708          g_string_append_c (out, '.');
     709      }
     710    while (*hostname);
     711  
     712    return g_string_free (out, FALSE);
     713  }
     714  
     715  /**
     716   * g_hostname_is_ascii_encoded:
     717   * @hostname: a hostname
     718   *
     719   * Tests if @hostname contains segments with an ASCII-compatible
     720   * encoding of an Internationalized Domain Name. If this returns
     721   * %TRUE, you should decode the hostname with g_hostname_to_unicode()
     722   * before displaying it to the user.
     723   *
     724   * Note that a hostname might contain a mix of encoded and unencoded
     725   * segments, and so it is possible for g_hostname_is_non_ascii() and
     726   * g_hostname_is_ascii_encoded() to both return %TRUE for a name.
     727   *
     728   * Returns: %TRUE if @hostname contains any ASCII-encoded
     729   * segments.
     730   *
     731   * Since: 2.22
     732   **/
     733  gboolean
     734  g_hostname_is_ascii_encoded (const gchar *hostname)
     735  {
     736    while (1)
     737      {
     738        if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
     739  	return TRUE;
     740        hostname = idna_end_of_label (hostname);
     741        if (*hostname)
     742          hostname = g_utf8_next_char (hostname);
     743        if (!*hostname)
     744  	return FALSE;
     745      }
     746  }
     747  
     748  /**
     749   * g_hostname_is_ip_address:
     750   * @hostname: a hostname (or IP address in string form)
     751   *
     752   * Tests if @hostname is the string form of an IPv4 or IPv6 address.
     753   * (Eg, "192.168.0.1".)
     754   *
     755   * Since 2.66, IPv6 addresses with a zone-id are accepted (RFC6874).
     756   *
     757   * Returns: %TRUE if @hostname is an IP address
     758   *
     759   * Since: 2.22
     760   **/
     761  gboolean
     762  g_hostname_is_ip_address (const gchar *hostname)
     763  {
     764    gchar *p, *end;
     765    gint nsegments, octet;
     766  
     767    /* On Linux we could implement this using inet_pton, but the Windows
     768     * equivalent of that requires linking against winsock, so we just
     769     * figure this out ourselves. Tested by tests/hostutils.c.
     770     */
     771  
     772    p = (char *)hostname;
     773  
     774    if (strchr (p, ':'))
     775      {
     776        gboolean skipped;
     777  
     778        /* If it contains a ':', it's an IPv6 address (assuming it's an
     779         * IP address at all). This consists of eight ':'-separated
     780         * segments, each containing a 1-4 digit hex number, except that
     781         * optionally: (a) the last two segments can be replaced by an
     782         * IPv4 address, and (b) a single span of 1 to 8 "0000" segments
     783         * can be replaced with just "::".
     784         */
     785  
     786        nsegments = 0;
     787        skipped = FALSE;
     788        while (*p && *p != '%' && nsegments < 8)
     789          {
     790            /* Each segment after the first must be preceded by a ':'.
     791             * (We also handle half of the "string starts with ::" case
     792             * here.)
     793             */
     794            if (p != (char *)hostname || (p[0] == ':' && p[1] == ':'))
     795              {
     796                if (*p != ':')
     797                  return FALSE;
     798                p++;
     799              }
     800  
     801            /* If there's another ':', it means we're skipping some segments */
     802            if (*p == ':' && !skipped)
     803              {
     804                skipped = TRUE;
     805                nsegments++;
     806  
     807                /* Handle the "string ends with ::" case */
     808                if (!p[1])
     809                  p++;
     810  
     811                continue;
     812              }
     813  
     814            /* Read the segment, make sure it's valid. */
     815            for (end = p; g_ascii_isxdigit (*end); end++)
     816              ;
     817            if (end == p || end > p + 4)
     818              return FALSE;
     819  
     820            if (*end == '.')
     821              {
     822                if ((nsegments == 6 && !skipped) || (nsegments <= 6 && skipped))
     823                  goto parse_ipv4;
     824                else
     825                  return FALSE;
     826              }
     827  
     828            nsegments++;
     829            p = end;
     830          }
     831  
     832        return (!*p || (p[0] == '%' && p[1])) && (nsegments == 8 || skipped);
     833      }
     834  
     835   parse_ipv4:
     836  
     837    /* Parse IPv4: N.N.N.N, where each N <= 255 and doesn't have leading 0s. */
     838    for (nsegments = 0; nsegments < 4; nsegments++)
     839      {
     840        if (nsegments != 0)
     841          {
     842            if (*p != '.')
     843              return FALSE;
     844            p++;
     845          }
     846  
     847        /* Check the segment; a little tricker than the IPv6 case since
     848         * we can't allow extra leading 0s, and we can't assume that all
     849         * strings of valid length are within range.
     850         */
     851        octet = 0;
     852        if (*p == '0')
     853          end = p + 1;
     854        else
     855          {
     856            for (end = p; g_ascii_isdigit (*end); end++)
     857              {
     858                octet = 10 * octet + (*end - '0');
     859  
     860                if (octet > 255)
     861                  break;
     862              }
     863          }
     864        if (end == p || end > p + 3 || octet > 255)
     865          return FALSE;
     866  
     867        p = end;
     868      }
     869  
     870    /* If there's nothing left to parse, then it's ok. */
     871    return !*p;
     872  }