(root)/
gettext-0.22.4/
gettext-tools/
gnulib-lib/
libxml/
uri.c
       1  /* libxml2 - Library for parsing XML documents
       2   * Copyright (C) 2006-2019 Free Software Foundation, Inc.
       3   *
       4   * This file is not part of the GNU gettext program, but is used with
       5   * GNU gettext.
       6   *
       7   * The original copyright notice is as follows:
       8   */
       9  
      10  /*
      11   * Copyright (C) 1998-2012 Daniel Veillard.  All Rights Reserved.
      12   *
      13   * Permission is hereby granted, free of charge, to any person obtaining a copy
      14   * of this software and associated documentation files (the "Software"), to deal
      15   * in the Software without restriction, including without limitation the rights
      16   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      17   * copies of the Software, and to permit persons to whom the Software is fur-
      18   * nished to do so, subject to the following conditions:
      19   *
      20   * The above copyright notice and this permission notice shall be included in
      21   * all copies or substantial portions of the Software.
      22   *
      23   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      24   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
      25   * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
      26   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      27   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      28   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      29   * THE SOFTWARE.
      30   *
      31   * daniel@veillard.com
      32   */
      33  
      34  /**
      35   * uri.c: set of generic URI related routines
      36   *
      37   * Reference: RFCs 3986, 2732 and 2373
      38   */
      39  
      40  #define IN_LIBXML
      41  #include "libxml.h"
      42  
      43  #include <string.h>
      44  
      45  #include <libxml/xmlmemory.h>
      46  #include <libxml/uri.h>
      47  #include <libxml/globals.h>
      48  #include <libxml/xmlerror.h>
      49  
      50  /**
      51   * MAX_URI_LENGTH:
      52   *
      53   * The definition of the URI regexp in the above RFC has no size limit
      54   * In practice they are usually relativey short except for the
      55   * data URI scheme as defined in RFC 2397. Even for data URI the usual
      56   * maximum size before hitting random practical limits is around 64 KB
      57   * and 4KB is usually a maximum admitted limit for proper operations.
      58   * The value below is more a security limit than anything else and
      59   * really should never be hit by 'normal' operations
      60   * Set to 1 MByte in 2012, this is only enforced on output
      61   */
      62  #define MAX_URI_LENGTH 1024 * 1024
      63  
      64  static void
      65  xmlURIErrMemory(const char *extra)
      66  {
      67      if (extra)
      68          __xmlRaiseError(NULL, NULL, NULL,
      69                          NULL, NULL, XML_FROM_URI,
      70                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
      71                          extra, NULL, NULL, 0, 0,
      72                          "Memory allocation failed : %s\n", extra);
      73      else
      74          __xmlRaiseError(NULL, NULL, NULL,
      75                          NULL, NULL, XML_FROM_URI,
      76                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
      77                          NULL, NULL, NULL, 0, 0,
      78                          "Memory allocation failed\n");
      79  }
      80  
      81  static void xmlCleanURI(xmlURIPtr uri);
      82  
      83  /*
      84   * Old rule from 2396 used in legacy handling code
      85   * alpha    = lowalpha | upalpha
      86   */
      87  #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
      88  
      89  
      90  /*
      91   * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
      92   *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
      93   *            "u" | "v" | "w" | "x" | "y" | "z"
      94   */
      95  
      96  #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
      97  
      98  /*
      99   * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
     100   *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
     101   *           "U" | "V" | "W" | "X" | "Y" | "Z"
     102   */
     103  #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
     104  
     105  #ifdef IS_DIGIT
     106  #undef IS_DIGIT
     107  #endif
     108  /*
     109   * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
     110   */
     111  #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
     112  
     113  /*
     114   * alphanum = alpha | digit
     115   */
     116  
     117  #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
     118  
     119  /*
     120   * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
     121   */
     122  
     123  #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
     124      ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
     125      ((x) == '(') || ((x) == ')'))
     126  
     127  /*
     128   * unwise = "{" | "}" | "|" | "\" | "^" | "`"
     129   */
     130  
     131  #define IS_UNWISE(p)                                                    \
     132        (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
     133         ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
     134         ((*(p) == ']')) || ((*(p) == '`')))
     135  /*
     136   * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
     137   *            "[" | "]"
     138   */
     139  
     140  #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
     141          ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
     142          ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
     143          ((x) == ']'))
     144  
     145  /*
     146   * unreserved = alphanum | mark
     147   */
     148  
     149  #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
     150  
     151  /*
     152   * Skip to next pointer char, handle escaped sequences
     153   */
     154  
     155  #define NEXT(p) ((*p == '%')? p += 3 : p++)
     156  
     157  /*
     158   * Productions from the spec.
     159   *
     160   *    authority     = server | reg_name
     161   *    reg_name      = 1*( unreserved | escaped | "$" | "," |
     162   *                        ";" | ":" | "@" | "&" | "=" | "+" )
     163   *
     164   * path          = [ abs_path | opaque_part ]
     165   */
     166  
     167  #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
     168  
     169  /************************************************************************
     170   *									*
     171   *                         RFC 3986 parser				*
     172   *									*
     173   ************************************************************************/
     174  
     175  #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
     176  #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
     177                        ((*(p) >= 'A') && (*(p) <= 'Z')))
     178  #define ISA_HEXDIG(p)							\
     179         (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
     180          ((*(p) >= 'A') && (*(p) <= 'F')))
     181  
     182  /*
     183   *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
     184   *                     / "*" / "+" / "," / ";" / "="
     185   */
     186  #define ISA_SUB_DELIM(p)						\
     187        (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
     188         ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
     189         ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
     190         ((*(p) == '=')) || ((*(p) == '\'')))
     191  
     192  /*
     193   *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
     194   */
     195  #define ISA_GEN_DELIM(p)						\
     196        (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
     197         ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
     198         ((*(p) == '@')))
     199  
     200  /*
     201   *    reserved      = gen-delims / sub-delims
     202   */
     203  #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
     204  
     205  /*
     206   *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
     207   */
     208  #define ISA_UNRESERVED(p)						\
     209        ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
     210         ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
     211  
     212  /*
     213   *    pct-encoded   = "%" HEXDIG HEXDIG
     214   */
     215  #define ISA_PCT_ENCODED(p)						\
     216       ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
     217  
     218  /*
     219   *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
     220   */
     221  #define ISA_PCHAR(p)							\
     222       (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
     223        ((*(p) == ':')) || ((*(p) == '@')))
     224  
     225  /**
     226   * xmlParse3986Scheme:
     227   * @uri:  pointer to an URI structure
     228   * @str:  pointer to the string to analyze
     229   *
     230   * Parse an URI scheme
     231   *
     232   * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
     233   *
     234   * Returns 0 or the error code
     235   */
     236  static int
     237  xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
     238      const char *cur;
     239  
     240      if (str == NULL)
     241  	return(-1);
     242  
     243      cur = *str;
     244      if (!ISA_ALPHA(cur))
     245  	return(2);
     246      cur++;
     247      while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
     248             (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
     249      if (uri != NULL) {
     250  	if (uri->scheme != NULL) xmlFree(uri->scheme);
     251  	uri->scheme = STRNDUP(*str, cur - *str);
     252      }
     253      *str = cur;
     254      return(0);
     255  }
     256  
     257  /**
     258   * xmlParse3986Fragment:
     259   * @uri:  pointer to an URI structure
     260   * @str:  pointer to the string to analyze
     261   *
     262   * Parse the query part of an URI
     263   *
     264   * fragment      = *( pchar / "/" / "?" )
     265   * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
     266   *       in the fragment identifier but this is used very broadly for
     267   *       xpointer scheme selection, so we are allowing it here to not break
     268   *       for example all the DocBook processing chains.
     269   *
     270   * Returns 0 or the error code
     271   */
     272  static int
     273  xmlParse3986Fragment(xmlURIPtr uri, const char **str)
     274  {
     275      const char *cur;
     276  
     277      if (str == NULL)
     278          return (-1);
     279  
     280      cur = *str;
     281  
     282      while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
     283             (*cur == '[') || (*cur == ']') ||
     284             ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
     285          NEXT(cur);
     286      if (uri != NULL) {
     287          if (uri->fragment != NULL)
     288              xmlFree(uri->fragment);
     289  	if (uri->cleanup & 2)
     290  	    uri->fragment = STRNDUP(*str, cur - *str);
     291  	else
     292  	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
     293      }
     294      *str = cur;
     295      return (0);
     296  }
     297  
     298  /**
     299   * xmlParse3986Query:
     300   * @uri:  pointer to an URI structure
     301   * @str:  pointer to the string to analyze
     302   *
     303   * Parse the query part of an URI
     304   *
     305   * query = *uric
     306   *
     307   * Returns 0 or the error code
     308   */
     309  static int
     310  xmlParse3986Query(xmlURIPtr uri, const char **str)
     311  {
     312      const char *cur;
     313  
     314      if (str == NULL)
     315          return (-1);
     316  
     317      cur = *str;
     318  
     319      while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
     320             ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
     321          NEXT(cur);
     322      if (uri != NULL) {
     323          if (uri->query != NULL)
     324              xmlFree(uri->query);
     325  	if (uri->cleanup & 2)
     326  	    uri->query = STRNDUP(*str, cur - *str);
     327  	else
     328  	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
     329  
     330  	/* Save the raw bytes of the query as well.
     331  	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
     332  	 */
     333  	if (uri->query_raw != NULL)
     334  	    xmlFree (uri->query_raw);
     335  	uri->query_raw = STRNDUP (*str, cur - *str);
     336      }
     337      *str = cur;
     338      return (0);
     339  }
     340  
     341  /**
     342   * xmlParse3986Port:
     343   * @uri:  pointer to an URI structure
     344   * @str:  the string to analyze
     345   *
     346   * Parse a port part and fills in the appropriate fields
     347   * of the @uri structure
     348   *
     349   * port          = *DIGIT
     350   *
     351   * Returns 0 or the error code
     352   */
     353  static int
     354  xmlParse3986Port(xmlURIPtr uri, const char **str)
     355  {
     356      const char *cur = *str;
     357      unsigned port = 0; /* unsigned for defined overflow behavior */
     358  
     359      if (ISA_DIGIT(cur)) {
     360  	while (ISA_DIGIT(cur)) {
     361  	    port = port * 10 + (*cur - '0');
     362  
     363  	    cur++;
     364  	}
     365  	if (uri != NULL)
     366  	    uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
     367  	*str = cur;
     368  	return(0);
     369      }
     370      return(1);
     371  }
     372  
     373  /**
     374   * xmlParse3986Userinfo:
     375   * @uri:  pointer to an URI structure
     376   * @str:  the string to analyze
     377   *
     378   * Parse an user informations part and fills in the appropriate fields
     379   * of the @uri structure
     380   *
     381   * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
     382   *
     383   * Returns 0 or the error code
     384   */
     385  static int
     386  xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
     387  {
     388      const char *cur;
     389  
     390      cur = *str;
     391      while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
     392             ISA_SUB_DELIM(cur) || (*cur == ':'))
     393  	NEXT(cur);
     394      if (*cur == '@') {
     395  	if (uri != NULL) {
     396  	    if (uri->user != NULL) xmlFree(uri->user);
     397  	    if (uri->cleanup & 2)
     398  		uri->user = STRNDUP(*str, cur - *str);
     399  	    else
     400  		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
     401  	}
     402  	*str = cur;
     403  	return(0);
     404      }
     405      return(1);
     406  }
     407  
     408  /**
     409   * xmlParse3986DecOctet:
     410   * @str:  the string to analyze
     411   *
     412   *    dec-octet     = DIGIT                 ; 0-9
     413   *                  / %x31-39 DIGIT         ; 10-99
     414   *                  / "1" 2DIGIT            ; 100-199
     415   *                  / "2" %x30-34 DIGIT     ; 200-249
     416   *                  / "25" %x30-35          ; 250-255
     417   *
     418   * Skip a dec-octet.
     419   *
     420   * Returns 0 if found and skipped, 1 otherwise
     421   */
     422  static int
     423  xmlParse3986DecOctet(const char **str) {
     424      const char *cur = *str;
     425  
     426      if (!(ISA_DIGIT(cur)))
     427          return(1);
     428      if (!ISA_DIGIT(cur+1))
     429  	cur++;
     430      else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
     431  	cur += 2;
     432      else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
     433  	cur += 3;
     434      else if ((*cur == '2') && (*(cur + 1) >= '0') &&
     435  	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
     436  	cur += 3;
     437      else if ((*cur == '2') && (*(cur + 1) == '5') &&
     438  	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
     439  	cur += 3;
     440      else
     441          return(1);
     442      *str = cur;
     443      return(0);
     444  }
     445  /**
     446   * xmlParse3986Host:
     447   * @uri:  pointer to an URI structure
     448   * @str:  the string to analyze
     449   *
     450   * Parse an host part and fills in the appropriate fields
     451   * of the @uri structure
     452   *
     453   * host          = IP-literal / IPv4address / reg-name
     454   * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
     455   * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
     456   * reg-name      = *( unreserved / pct-encoded / sub-delims )
     457   *
     458   * Returns 0 or the error code
     459   */
     460  static int
     461  xmlParse3986Host(xmlURIPtr uri, const char **str)
     462  {
     463      const char *cur = *str;
     464      const char *host;
     465  
     466      host = cur;
     467      /*
     468       * IPv6 and future adressing scheme are enclosed between brackets
     469       */
     470      if (*cur == '[') {
     471          cur++;
     472  	while ((*cur != ']') && (*cur != 0))
     473  	    cur++;
     474  	if (*cur != ']')
     475  	    return(1);
     476  	cur++;
     477  	goto found;
     478      }
     479      /*
     480       * try to parse an IPv4
     481       */
     482      if (ISA_DIGIT(cur)) {
     483          if (xmlParse3986DecOctet(&cur) != 0)
     484  	    goto not_ipv4;
     485  	if (*cur != '.')
     486  	    goto not_ipv4;
     487  	cur++;
     488          if (xmlParse3986DecOctet(&cur) != 0)
     489  	    goto not_ipv4;
     490  	if (*cur != '.')
     491  	    goto not_ipv4;
     492          if (xmlParse3986DecOctet(&cur) != 0)
     493  	    goto not_ipv4;
     494  	if (*cur != '.')
     495  	    goto not_ipv4;
     496          if (xmlParse3986DecOctet(&cur) != 0)
     497  	    goto not_ipv4;
     498  	goto found;
     499  not_ipv4:
     500          cur = *str;
     501      }
     502      /*
     503       * then this should be a hostname which can be empty
     504       */
     505      while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
     506          NEXT(cur);
     507  found:
     508      if (uri != NULL) {
     509  	if (uri->authority != NULL) xmlFree(uri->authority);
     510  	uri->authority = NULL;
     511  	if (uri->server != NULL) xmlFree(uri->server);
     512  	if (cur != host) {
     513  	    if (uri->cleanup & 2)
     514  		uri->server = STRNDUP(host, cur - host);
     515  	    else
     516  		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
     517  	} else
     518  	    uri->server = NULL;
     519      }
     520      *str = cur;
     521      return(0);
     522  }
     523  
     524  /**
     525   * xmlParse3986Authority:
     526   * @uri:  pointer to an URI structure
     527   * @str:  the string to analyze
     528   *
     529   * Parse an authority part and fills in the appropriate fields
     530   * of the @uri structure
     531   *
     532   * authority     = [ userinfo "@" ] host [ ":" port ]
     533   *
     534   * Returns 0 or the error code
     535   */
     536  static int
     537  xmlParse3986Authority(xmlURIPtr uri, const char **str)
     538  {
     539      const char *cur;
     540      int ret;
     541  
     542      cur = *str;
     543      /*
     544       * try to parse an userinfo and check for the trailing @
     545       */
     546      ret = xmlParse3986Userinfo(uri, &cur);
     547      if ((ret != 0) || (*cur != '@'))
     548          cur = *str;
     549      else
     550          cur++;
     551      ret = xmlParse3986Host(uri, &cur);
     552      if (ret != 0) return(ret);
     553      if (*cur == ':') {
     554          cur++;
     555          ret = xmlParse3986Port(uri, &cur);
     556  	if (ret != 0) return(ret);
     557      }
     558      *str = cur;
     559      return(0);
     560  }
     561  
     562  /**
     563   * xmlParse3986Segment:
     564   * @str:  the string to analyze
     565   * @forbid: an optional forbidden character
     566   * @empty: allow an empty segment
     567   *
     568   * Parse a segment and fills in the appropriate fields
     569   * of the @uri structure
     570   *
     571   * segment       = *pchar
     572   * segment-nz    = 1*pchar
     573   * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
     574   *               ; non-zero-length segment without any colon ":"
     575   *
     576   * Returns 0 or the error code
     577   */
     578  static int
     579  xmlParse3986Segment(const char **str, char forbid, int empty)
     580  {
     581      const char *cur;
     582  
     583      cur = *str;
     584      if (!ISA_PCHAR(cur)) {
     585          if (empty)
     586  	    return(0);
     587  	return(1);
     588      }
     589      while (ISA_PCHAR(cur) && (*cur != forbid))
     590          NEXT(cur);
     591      *str = cur;
     592      return (0);
     593  }
     594  
     595  /**
     596   * xmlParse3986PathAbEmpty:
     597   * @uri:  pointer to an URI structure
     598   * @str:  the string to analyze
     599   *
     600   * Parse an path absolute or empty and fills in the appropriate fields
     601   * of the @uri structure
     602   *
     603   * path-abempty  = *( "/" segment )
     604   *
     605   * Returns 0 or the error code
     606   */
     607  static int
     608  xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
     609  {
     610      const char *cur;
     611      int ret;
     612  
     613      cur = *str;
     614  
     615      while (*cur == '/') {
     616          cur++;
     617  	ret = xmlParse3986Segment(&cur, 0, 1);
     618  	if (ret != 0) return(ret);
     619      }
     620      if (uri != NULL) {
     621  	if (uri->path != NULL) xmlFree(uri->path);
     622          if (*str != cur) {
     623              if (uri->cleanup & 2)
     624                  uri->path = STRNDUP(*str, cur - *str);
     625              else
     626                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
     627          } else {
     628              uri->path = NULL;
     629          }
     630      }
     631      *str = cur;
     632      return (0);
     633  }
     634  
     635  /**
     636   * xmlParse3986PathAbsolute:
     637   * @uri:  pointer to an URI structure
     638   * @str:  the string to analyze
     639   *
     640   * Parse an path absolute and fills in the appropriate fields
     641   * of the @uri structure
     642   *
     643   * path-absolute = "/" [ segment-nz *( "/" segment ) ]
     644   *
     645   * Returns 0 or the error code
     646   */
     647  static int
     648  xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
     649  {
     650      const char *cur;
     651      int ret;
     652  
     653      cur = *str;
     654  
     655      if (*cur != '/')
     656          return(1);
     657      cur++;
     658      ret = xmlParse3986Segment(&cur, 0, 0);
     659      if (ret == 0) {
     660  	while (*cur == '/') {
     661  	    cur++;
     662  	    ret = xmlParse3986Segment(&cur, 0, 1);
     663  	    if (ret != 0) return(ret);
     664  	}
     665      }
     666      if (uri != NULL) {
     667  	if (uri->path != NULL) xmlFree(uri->path);
     668          if (cur != *str) {
     669              if (uri->cleanup & 2)
     670                  uri->path = STRNDUP(*str, cur - *str);
     671              else
     672                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
     673          } else {
     674              uri->path = NULL;
     675          }
     676      }
     677      *str = cur;
     678      return (0);
     679  }
     680  
     681  /**
     682   * xmlParse3986PathRootless:
     683   * @uri:  pointer to an URI structure
     684   * @str:  the string to analyze
     685   *
     686   * Parse an path without root and fills in the appropriate fields
     687   * of the @uri structure
     688   *
     689   * path-rootless = segment-nz *( "/" segment )
     690   *
     691   * Returns 0 or the error code
     692   */
     693  static int
     694  xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
     695  {
     696      const char *cur;
     697      int ret;
     698  
     699      cur = *str;
     700  
     701      ret = xmlParse3986Segment(&cur, 0, 0);
     702      if (ret != 0) return(ret);
     703      while (*cur == '/') {
     704          cur++;
     705  	ret = xmlParse3986Segment(&cur, 0, 1);
     706  	if (ret != 0) return(ret);
     707      }
     708      if (uri != NULL) {
     709  	if (uri->path != NULL) xmlFree(uri->path);
     710          if (cur != *str) {
     711              if (uri->cleanup & 2)
     712                  uri->path = STRNDUP(*str, cur - *str);
     713              else
     714                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
     715          } else {
     716              uri->path = NULL;
     717          }
     718      }
     719      *str = cur;
     720      return (0);
     721  }
     722  
     723  /**
     724   * xmlParse3986PathNoScheme:
     725   * @uri:  pointer to an URI structure
     726   * @str:  the string to analyze
     727   *
     728   * Parse an path which is not a scheme and fills in the appropriate fields
     729   * of the @uri structure
     730   *
     731   * path-noscheme = segment-nz-nc *( "/" segment )
     732   *
     733   * Returns 0 or the error code
     734   */
     735  static int
     736  xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
     737  {
     738      const char *cur;
     739      int ret;
     740  
     741      cur = *str;
     742  
     743      ret = xmlParse3986Segment(&cur, ':', 0);
     744      if (ret != 0) return(ret);
     745      while (*cur == '/') {
     746          cur++;
     747  	ret = xmlParse3986Segment(&cur, 0, 1);
     748  	if (ret != 0) return(ret);
     749      }
     750      if (uri != NULL) {
     751  	if (uri->path != NULL) xmlFree(uri->path);
     752          if (cur != *str) {
     753              if (uri->cleanup & 2)
     754                  uri->path = STRNDUP(*str, cur - *str);
     755              else
     756                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
     757          } else {
     758              uri->path = NULL;
     759          }
     760      }
     761      *str = cur;
     762      return (0);
     763  }
     764  
     765  /**
     766   * xmlParse3986HierPart:
     767   * @uri:  pointer to an URI structure
     768   * @str:  the string to analyze
     769   *
     770   * Parse an hierarchical part and fills in the appropriate fields
     771   * of the @uri structure
     772   *
     773   * hier-part     = "//" authority path-abempty
     774   *                / path-absolute
     775   *                / path-rootless
     776   *                / path-empty
     777   *
     778   * Returns 0 or the error code
     779   */
     780  static int
     781  xmlParse3986HierPart(xmlURIPtr uri, const char **str)
     782  {
     783      const char *cur;
     784      int ret;
     785  
     786      cur = *str;
     787  
     788      if ((*cur == '/') && (*(cur + 1) == '/')) {
     789          cur += 2;
     790  	ret = xmlParse3986Authority(uri, &cur);
     791  	if (ret != 0) return(ret);
     792  	if (uri->server == NULL)
     793  	    uri->port = -1;
     794  	ret = xmlParse3986PathAbEmpty(uri, &cur);
     795  	if (ret != 0) return(ret);
     796  	*str = cur;
     797  	return(0);
     798      } else if (*cur == '/') {
     799          ret = xmlParse3986PathAbsolute(uri, &cur);
     800  	if (ret != 0) return(ret);
     801      } else if (ISA_PCHAR(cur)) {
     802          ret = xmlParse3986PathRootless(uri, &cur);
     803  	if (ret != 0) return(ret);
     804      } else {
     805  	/* path-empty is effectively empty */
     806  	if (uri != NULL) {
     807  	    if (uri->path != NULL) xmlFree(uri->path);
     808  	    uri->path = NULL;
     809  	}
     810      }
     811      *str = cur;
     812      return (0);
     813  }
     814  
     815  /**
     816   * xmlParse3986RelativeRef:
     817   * @uri:  pointer to an URI structure
     818   * @str:  the string to analyze
     819   *
     820   * Parse an URI string and fills in the appropriate fields
     821   * of the @uri structure
     822   *
     823   * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
     824   * relative-part = "//" authority path-abempty
     825   *               / path-absolute
     826   *               / path-noscheme
     827   *               / path-empty
     828   *
     829   * Returns 0 or the error code
     830   */
     831  static int
     832  xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
     833      int ret;
     834  
     835      if ((*str == '/') && (*(str + 1) == '/')) {
     836          str += 2;
     837  	ret = xmlParse3986Authority(uri, &str);
     838  	if (ret != 0) return(ret);
     839  	ret = xmlParse3986PathAbEmpty(uri, &str);
     840  	if (ret != 0) return(ret);
     841      } else if (*str == '/') {
     842  	ret = xmlParse3986PathAbsolute(uri, &str);
     843  	if (ret != 0) return(ret);
     844      } else if (ISA_PCHAR(str)) {
     845          ret = xmlParse3986PathNoScheme(uri, &str);
     846  	if (ret != 0) return(ret);
     847      } else {
     848  	/* path-empty is effectively empty */
     849  	if (uri != NULL) {
     850  	    if (uri->path != NULL) xmlFree(uri->path);
     851  	    uri->path = NULL;
     852  	}
     853      }
     854  
     855      if (*str == '?') {
     856  	str++;
     857  	ret = xmlParse3986Query(uri, &str);
     858  	if (ret != 0) return(ret);
     859      }
     860      if (*str == '#') {
     861  	str++;
     862  	ret = xmlParse3986Fragment(uri, &str);
     863  	if (ret != 0) return(ret);
     864      }
     865      if (*str != 0) {
     866  	xmlCleanURI(uri);
     867  	return(1);
     868      }
     869      return(0);
     870  }
     871  
     872  
     873  /**
     874   * xmlParse3986URI:
     875   * @uri:  pointer to an URI structure
     876   * @str:  the string to analyze
     877   *
     878   * Parse an URI string and fills in the appropriate fields
     879   * of the @uri structure
     880   *
     881   * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
     882   *
     883   * Returns 0 or the error code
     884   */
     885  static int
     886  xmlParse3986URI(xmlURIPtr uri, const char *str) {
     887      int ret;
     888  
     889      ret = xmlParse3986Scheme(uri, &str);
     890      if (ret != 0) return(ret);
     891      if (*str != ':') {
     892  	return(1);
     893      }
     894      str++;
     895      ret = xmlParse3986HierPart(uri, &str);
     896      if (ret != 0) return(ret);
     897      if (*str == '?') {
     898  	str++;
     899  	ret = xmlParse3986Query(uri, &str);
     900  	if (ret != 0) return(ret);
     901      }
     902      if (*str == '#') {
     903  	str++;
     904  	ret = xmlParse3986Fragment(uri, &str);
     905  	if (ret != 0) return(ret);
     906      }
     907      if (*str != 0) {
     908  	xmlCleanURI(uri);
     909  	return(1);
     910      }
     911      return(0);
     912  }
     913  
     914  /**
     915   * xmlParse3986URIReference:
     916   * @uri:  pointer to an URI structure
     917   * @str:  the string to analyze
     918   *
     919   * Parse an URI reference string and fills in the appropriate fields
     920   * of the @uri structure
     921   *
     922   * URI-reference = URI / relative-ref
     923   *
     924   * Returns 0 or the error code
     925   */
     926  static int
     927  xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
     928      int ret;
     929  
     930      if (str == NULL)
     931  	return(-1);
     932      xmlCleanURI(uri);
     933  
     934      /*
     935       * Try first to parse absolute refs, then fallback to relative if
     936       * it fails.
     937       */
     938      ret = xmlParse3986URI(uri, str);
     939      if (ret != 0) {
     940  	xmlCleanURI(uri);
     941          ret = xmlParse3986RelativeRef(uri, str);
     942  	if (ret != 0) {
     943  	    xmlCleanURI(uri);
     944  	    return(ret);
     945  	}
     946      }
     947      return(0);
     948  }
     949  
     950  /**
     951   * xmlParseURI:
     952   * @str:  the URI string to analyze
     953   *
     954   * Parse an URI based on RFC 3986
     955   *
     956   * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
     957   *
     958   * Returns a newly built xmlURIPtr or NULL in case of error
     959   */
     960  xmlURIPtr
     961  xmlParseURI(const char *str) {
     962      xmlURIPtr uri;
     963      int ret;
     964  
     965      if (str == NULL)
     966  	return(NULL);
     967      uri = xmlCreateURI();
     968      if (uri != NULL) {
     969  	ret = xmlParse3986URIReference(uri, str);
     970          if (ret) {
     971  	    xmlFreeURI(uri);
     972  	    return(NULL);
     973  	}
     974      }
     975      return(uri);
     976  }
     977  
     978  /**
     979   * xmlParseURIReference:
     980   * @uri:  pointer to an URI structure
     981   * @str:  the string to analyze
     982   *
     983   * Parse an URI reference string based on RFC 3986 and fills in the
     984   * appropriate fields of the @uri structure
     985   *
     986   * URI-reference = URI / relative-ref
     987   *
     988   * Returns 0 or the error code
     989   */
     990  int
     991  xmlParseURIReference(xmlURIPtr uri, const char *str) {
     992      return(xmlParse3986URIReference(uri, str));
     993  }
     994  
     995  /**
     996   * xmlParseURIRaw:
     997   * @str:  the URI string to analyze
     998   * @raw:  if 1 unescaping of URI pieces are disabled
     999   *
    1000   * Parse an URI but allows to keep intact the original fragments.
    1001   *
    1002   * URI-reference = URI / relative-ref
    1003   *
    1004   * Returns a newly built xmlURIPtr or NULL in case of error
    1005   */
    1006  xmlURIPtr
    1007  xmlParseURIRaw(const char *str, int raw) {
    1008      xmlURIPtr uri;
    1009      int ret;
    1010  
    1011      if (str == NULL)
    1012  	return(NULL);
    1013      uri = xmlCreateURI();
    1014      if (uri != NULL) {
    1015          if (raw) {
    1016  	    uri->cleanup |= 2;
    1017  	}
    1018  	ret = xmlParseURIReference(uri, str);
    1019          if (ret) {
    1020  	    xmlFreeURI(uri);
    1021  	    return(NULL);
    1022  	}
    1023      }
    1024      return(uri);
    1025  }
    1026  
    1027  /************************************************************************
    1028   *									*
    1029   *			Generic URI structure functions			*
    1030   *									*
    1031   ************************************************************************/
    1032  
    1033  /**
    1034   * xmlCreateURI:
    1035   *
    1036   * Simply creates an empty xmlURI
    1037   *
    1038   * Returns the new structure or NULL in case of error
    1039   */
    1040  xmlURIPtr
    1041  xmlCreateURI(void) {
    1042      xmlURIPtr ret;
    1043  
    1044      ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
    1045      if (ret == NULL) {
    1046          xmlURIErrMemory("creating URI structure\n");
    1047  	return(NULL);
    1048      }
    1049      memset(ret, 0, sizeof(xmlURI));
    1050      return(ret);
    1051  }
    1052  
    1053  /**
    1054   * xmlSaveUriRealloc:
    1055   *
    1056   * Function to handle properly a reallocation when saving an URI
    1057   * Also imposes some limit on the length of an URI string output
    1058   */
    1059  static xmlChar *
    1060  xmlSaveUriRealloc(xmlChar *ret, int *max) {
    1061      xmlChar *temp;
    1062      int tmp;
    1063  
    1064      if (*max > MAX_URI_LENGTH) {
    1065          xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
    1066          return(NULL);
    1067      }
    1068      tmp = *max * 2;
    1069      temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
    1070      if (temp == NULL) {
    1071          xmlURIErrMemory("saving URI\n");
    1072          return(NULL);
    1073      }
    1074      *max = tmp;
    1075      return(temp);
    1076  }
    1077  
    1078  /**
    1079   * xmlSaveUri:
    1080   * @uri:  pointer to an xmlURI
    1081   *
    1082   * Save the URI as an escaped string
    1083   *
    1084   * Returns a new string (to be deallocated by caller)
    1085   */
    1086  xmlChar *
    1087  xmlSaveUri(xmlURIPtr uri) {
    1088      xmlChar *ret = NULL;
    1089      xmlChar *temp;
    1090      const char *p;
    1091      int len;
    1092      int max;
    1093  
    1094      if (uri == NULL) return(NULL);
    1095  
    1096  
    1097      max = 80;
    1098      ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
    1099      if (ret == NULL) {
    1100          xmlURIErrMemory("saving URI\n");
    1101  	return(NULL);
    1102      }
    1103      len = 0;
    1104  
    1105      if (uri->scheme != NULL) {
    1106  	p = uri->scheme;
    1107  	while (*p != 0) {
    1108  	    if (len >= max) {
    1109                  temp = xmlSaveUriRealloc(ret, &max);
    1110                  if (temp == NULL) goto mem_error;
    1111  		ret = temp;
    1112  	    }
    1113  	    ret[len++] = *p++;
    1114  	}
    1115  	if (len >= max) {
    1116              temp = xmlSaveUriRealloc(ret, &max);
    1117              if (temp == NULL) goto mem_error;
    1118              ret = temp;
    1119  	}
    1120  	ret[len++] = ':';
    1121      }
    1122      if (uri->opaque != NULL) {
    1123  	p = uri->opaque;
    1124  	while (*p != 0) {
    1125  	    if (len + 3 >= max) {
    1126                  temp = xmlSaveUriRealloc(ret, &max);
    1127                  if (temp == NULL) goto mem_error;
    1128                  ret = temp;
    1129  	    }
    1130  	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
    1131  		ret[len++] = *p++;
    1132  	    else {
    1133  		int val = *(unsigned char *)p++;
    1134  		int hi = val / 0x10, lo = val % 0x10;
    1135  		ret[len++] = '%';
    1136  		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1137  		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1138  	    }
    1139  	}
    1140      } else {
    1141  	if ((uri->server != NULL) || (uri->port == -1)) {
    1142  	    if (len + 3 >= max) {
    1143                  temp = xmlSaveUriRealloc(ret, &max);
    1144                  if (temp == NULL) goto mem_error;
    1145                  ret = temp;
    1146  	    }
    1147  	    ret[len++] = '/';
    1148  	    ret[len++] = '/';
    1149  	    if (uri->user != NULL) {
    1150  		p = uri->user;
    1151  		while (*p != 0) {
    1152  		    if (len + 3 >= max) {
    1153                          temp = xmlSaveUriRealloc(ret, &max);
    1154                          if (temp == NULL) goto mem_error;
    1155                          ret = temp;
    1156  		    }
    1157  		    if ((IS_UNRESERVED(*(p))) ||
    1158  			((*(p) == ';')) || ((*(p) == ':')) ||
    1159  			((*(p) == '&')) || ((*(p) == '=')) ||
    1160  			((*(p) == '+')) || ((*(p) == '$')) ||
    1161  			((*(p) == ',')))
    1162  			ret[len++] = *p++;
    1163  		    else {
    1164  			int val = *(unsigned char *)p++;
    1165  			int hi = val / 0x10, lo = val % 0x10;
    1166  			ret[len++] = '%';
    1167  			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1168  			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1169  		    }
    1170  		}
    1171  		if (len + 3 >= max) {
    1172                      temp = xmlSaveUriRealloc(ret, &max);
    1173                      if (temp == NULL) goto mem_error;
    1174                      ret = temp;
    1175  		}
    1176  		ret[len++] = '@';
    1177  	    }
    1178  	    if (uri->server != NULL) {
    1179  		p = uri->server;
    1180  		while (*p != 0) {
    1181  		    if (len >= max) {
    1182  			temp = xmlSaveUriRealloc(ret, &max);
    1183  			if (temp == NULL) goto mem_error;
    1184  			ret = temp;
    1185  		    }
    1186  		    ret[len++] = *p++;
    1187  		}
    1188  		if (uri->port > 0) {
    1189  		    if (len + 10 >= max) {
    1190  			temp = xmlSaveUriRealloc(ret, &max);
    1191  			if (temp == NULL) goto mem_error;
    1192  			ret = temp;
    1193  		    }
    1194  		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
    1195  		}
    1196  	    }
    1197  	} else if (uri->authority != NULL) {
    1198  	    if (len + 3 >= max) {
    1199                  temp = xmlSaveUriRealloc(ret, &max);
    1200                  if (temp == NULL) goto mem_error;
    1201                  ret = temp;
    1202  	    }
    1203  	    ret[len++] = '/';
    1204  	    ret[len++] = '/';
    1205  	    p = uri->authority;
    1206  	    while (*p != 0) {
    1207  		if (len + 3 >= max) {
    1208                      temp = xmlSaveUriRealloc(ret, &max);
    1209                      if (temp == NULL) goto mem_error;
    1210                      ret = temp;
    1211  		}
    1212  		if ((IS_UNRESERVED(*(p))) ||
    1213                      ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
    1214                      ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
    1215                      ((*(p) == '=')) || ((*(p) == '+')))
    1216  		    ret[len++] = *p++;
    1217  		else {
    1218  		    int val = *(unsigned char *)p++;
    1219  		    int hi = val / 0x10, lo = val % 0x10;
    1220  		    ret[len++] = '%';
    1221  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1222  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1223  		}
    1224  	    }
    1225  	} else if (uri->scheme != NULL) {
    1226  	    if (len + 3 >= max) {
    1227                  temp = xmlSaveUriRealloc(ret, &max);
    1228                  if (temp == NULL) goto mem_error;
    1229                  ret = temp;
    1230  	    }
    1231  	}
    1232  	if (uri->path != NULL) {
    1233  	    p = uri->path;
    1234  	    /*
    1235  	     * the colon in file:///d: should not be escaped or
    1236  	     * Windows accesses fail later.
    1237  	     */
    1238  	    if ((uri->scheme != NULL) &&
    1239  		(p[0] == '/') &&
    1240  		(((p[1] >= 'a') && (p[1] <= 'z')) ||
    1241  		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
    1242  		(p[2] == ':') &&
    1243  	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
    1244  		if (len + 3 >= max) {
    1245                      temp = xmlSaveUriRealloc(ret, &max);
    1246                      if (temp == NULL) goto mem_error;
    1247                      ret = temp;
    1248  		}
    1249  		ret[len++] = *p++;
    1250  		ret[len++] = *p++;
    1251  		ret[len++] = *p++;
    1252  	    }
    1253  	    while (*p != 0) {
    1254  		if (len + 3 >= max) {
    1255                      temp = xmlSaveUriRealloc(ret, &max);
    1256                      if (temp == NULL) goto mem_error;
    1257                      ret = temp;
    1258  		}
    1259  		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
    1260                      ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
    1261  	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
    1262  	            ((*(p) == ',')))
    1263  		    ret[len++] = *p++;
    1264  		else {
    1265  		    int val = *(unsigned char *)p++;
    1266  		    int hi = val / 0x10, lo = val % 0x10;
    1267  		    ret[len++] = '%';
    1268  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1269  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1270  		}
    1271  	    }
    1272  	}
    1273  	if (uri->query_raw != NULL) {
    1274  	    if (len + 1 >= max) {
    1275                  temp = xmlSaveUriRealloc(ret, &max);
    1276                  if (temp == NULL) goto mem_error;
    1277                  ret = temp;
    1278  	    }
    1279  	    ret[len++] = '?';
    1280  	    p = uri->query_raw;
    1281  	    while (*p != 0) {
    1282  		if (len + 1 >= max) {
    1283                      temp = xmlSaveUriRealloc(ret, &max);
    1284                      if (temp == NULL) goto mem_error;
    1285                      ret = temp;
    1286  		}
    1287  		ret[len++] = *p++;
    1288  	    }
    1289  	} else if (uri->query != NULL) {
    1290  	    if (len + 3 >= max) {
    1291                  temp = xmlSaveUriRealloc(ret, &max);
    1292                  if (temp == NULL) goto mem_error;
    1293                  ret = temp;
    1294  	    }
    1295  	    ret[len++] = '?';
    1296  	    p = uri->query;
    1297  	    while (*p != 0) {
    1298  		if (len + 3 >= max) {
    1299                      temp = xmlSaveUriRealloc(ret, &max);
    1300                      if (temp == NULL) goto mem_error;
    1301                      ret = temp;
    1302  		}
    1303  		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
    1304  		    ret[len++] = *p++;
    1305  		else {
    1306  		    int val = *(unsigned char *)p++;
    1307  		    int hi = val / 0x10, lo = val % 0x10;
    1308  		    ret[len++] = '%';
    1309  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1310  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1311  		}
    1312  	    }
    1313  	}
    1314      }
    1315      if (uri->fragment != NULL) {
    1316  	if (len + 3 >= max) {
    1317              temp = xmlSaveUriRealloc(ret, &max);
    1318              if (temp == NULL) goto mem_error;
    1319              ret = temp;
    1320  	}
    1321  	ret[len++] = '#';
    1322  	p = uri->fragment;
    1323  	while (*p != 0) {
    1324  	    if (len + 3 >= max) {
    1325                  temp = xmlSaveUriRealloc(ret, &max);
    1326                  if (temp == NULL) goto mem_error;
    1327                  ret = temp;
    1328  	    }
    1329  	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
    1330  		ret[len++] = *p++;
    1331  	    else {
    1332  		int val = *(unsigned char *)p++;
    1333  		int hi = val / 0x10, lo = val % 0x10;
    1334  		ret[len++] = '%';
    1335  		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1336  		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1337  	    }
    1338  	}
    1339      }
    1340      if (len >= max) {
    1341          temp = xmlSaveUriRealloc(ret, &max);
    1342          if (temp == NULL) goto mem_error;
    1343          ret = temp;
    1344      }
    1345      ret[len] = 0;
    1346      return(ret);
    1347  
    1348  mem_error:
    1349      xmlFree(ret);
    1350      return(NULL);
    1351  }
    1352  
    1353  /**
    1354   * xmlPrintURI:
    1355   * @stream:  a FILE* for the output
    1356   * @uri:  pointer to an xmlURI
    1357   *
    1358   * Prints the URI in the stream @stream.
    1359   */
    1360  void
    1361  xmlPrintURI(FILE *stream, xmlURIPtr uri) {
    1362      xmlChar *out;
    1363  
    1364      out = xmlSaveUri(uri);
    1365      if (out != NULL) {
    1366  	fprintf(stream, "%s", (char *) out);
    1367  	xmlFree(out);
    1368      }
    1369  }
    1370  
    1371  /**
    1372   * xmlCleanURI:
    1373   * @uri:  pointer to an xmlURI
    1374   *
    1375   * Make sure the xmlURI struct is free of content
    1376   */
    1377  static void
    1378  xmlCleanURI(xmlURIPtr uri) {
    1379      if (uri == NULL) return;
    1380  
    1381      if (uri->scheme != NULL) xmlFree(uri->scheme);
    1382      uri->scheme = NULL;
    1383      if (uri->server != NULL) xmlFree(uri->server);
    1384      uri->server = NULL;
    1385      if (uri->user != NULL) xmlFree(uri->user);
    1386      uri->user = NULL;
    1387      if (uri->path != NULL) xmlFree(uri->path);
    1388      uri->path = NULL;
    1389      if (uri->fragment != NULL) xmlFree(uri->fragment);
    1390      uri->fragment = NULL;
    1391      if (uri->opaque != NULL) xmlFree(uri->opaque);
    1392      uri->opaque = NULL;
    1393      if (uri->authority != NULL) xmlFree(uri->authority);
    1394      uri->authority = NULL;
    1395      if (uri->query != NULL) xmlFree(uri->query);
    1396      uri->query = NULL;
    1397      if (uri->query_raw != NULL) xmlFree(uri->query_raw);
    1398      uri->query_raw = NULL;
    1399  }
    1400  
    1401  /**
    1402   * xmlFreeURI:
    1403   * @uri:  pointer to an xmlURI
    1404   *
    1405   * Free up the xmlURI struct
    1406   */
    1407  void
    1408  xmlFreeURI(xmlURIPtr uri) {
    1409      if (uri == NULL) return;
    1410  
    1411      if (uri->scheme != NULL) xmlFree(uri->scheme);
    1412      if (uri->server != NULL) xmlFree(uri->server);
    1413      if (uri->user != NULL) xmlFree(uri->user);
    1414      if (uri->path != NULL) xmlFree(uri->path);
    1415      if (uri->fragment != NULL) xmlFree(uri->fragment);
    1416      if (uri->opaque != NULL) xmlFree(uri->opaque);
    1417      if (uri->authority != NULL) xmlFree(uri->authority);
    1418      if (uri->query != NULL) xmlFree(uri->query);
    1419      if (uri->query_raw != NULL) xmlFree(uri->query_raw);
    1420      xmlFree(uri);
    1421  }
    1422  
    1423  /************************************************************************
    1424   *									*
    1425   *			Helper functions				*
    1426   *									*
    1427   ************************************************************************/
    1428  
    1429  /**
    1430   * xmlNormalizeURIPath:
    1431   * @path:  pointer to the path string
    1432   *
    1433   * Applies the 5 normalization steps to a path string--that is, RFC 2396
    1434   * Section 5.2, steps 6.c through 6.g.
    1435   *
    1436   * Normalization occurs directly on the string, no new allocation is done
    1437   *
    1438   * Returns 0 or an error code
    1439   */
    1440  int
    1441  xmlNormalizeURIPath(char *path) {
    1442      char *cur, *out;
    1443  
    1444      if (path == NULL)
    1445  	return(-1);
    1446  
    1447      /* Skip all initial "/" chars.  We want to get to the beginning of the
    1448       * first non-empty segment.
    1449       */
    1450      cur = path;
    1451      while (cur[0] == '/')
    1452        ++cur;
    1453      if (cur[0] == '\0')
    1454        return(0);
    1455  
    1456      /* Keep everything we've seen so far.  */
    1457      out = cur;
    1458  
    1459      /*
    1460       * Analyze each segment in sequence for cases (c) and (d).
    1461       */
    1462      while (cur[0] != '\0') {
    1463  	/*
    1464  	 * c) All occurrences of "./", where "." is a complete path segment,
    1465  	 *    are removed from the buffer string.
    1466  	 */
    1467  	if ((cur[0] == '.') && (cur[1] == '/')) {
    1468  	    cur += 2;
    1469  	    /* '//' normalization should be done at this point too */
    1470  	    while (cur[0] == '/')
    1471  		cur++;
    1472  	    continue;
    1473  	}
    1474  
    1475  	/*
    1476  	 * d) If the buffer string ends with "." as a complete path segment,
    1477  	 *    that "." is removed.
    1478  	 */
    1479  	if ((cur[0] == '.') && (cur[1] == '\0'))
    1480  	    break;
    1481  
    1482  	/* Otherwise keep the segment.  */
    1483  	while (cur[0] != '/') {
    1484              if (cur[0] == '\0')
    1485                goto done_cd;
    1486  	    (out++)[0] = (cur++)[0];
    1487  	}
    1488  	/* nomalize // */
    1489  	while ((cur[0] == '/') && (cur[1] == '/'))
    1490  	    cur++;
    1491  
    1492          (out++)[0] = (cur++)[0];
    1493      }
    1494   done_cd:
    1495      out[0] = '\0';
    1496  
    1497      /* Reset to the beginning of the first segment for the next sequence.  */
    1498      cur = path;
    1499      while (cur[0] == '/')
    1500        ++cur;
    1501      if (cur[0] == '\0')
    1502  	return(0);
    1503  
    1504      /*
    1505       * Analyze each segment in sequence for cases (e) and (f).
    1506       *
    1507       * e) All occurrences of "<segment>/../", where <segment> is a
    1508       *    complete path segment not equal to "..", are removed from the
    1509       *    buffer string.  Removal of these path segments is performed
    1510       *    iteratively, removing the leftmost matching pattern on each
    1511       *    iteration, until no matching pattern remains.
    1512       *
    1513       * f) If the buffer string ends with "<segment>/..", where <segment>
    1514       *    is a complete path segment not equal to "..", that
    1515       *    "<segment>/.." is removed.
    1516       *
    1517       * To satisfy the "iterative" clause in (e), we need to collapse the
    1518       * string every time we find something that needs to be removed.  Thus,
    1519       * we don't need to keep two pointers into the string: we only need a
    1520       * "current position" pointer.
    1521       */
    1522      while (1) {
    1523          char *segp, *tmp;
    1524  
    1525          /* At the beginning of each iteration of this loop, "cur" points to
    1526           * the first character of the segment we want to examine.
    1527           */
    1528  
    1529          /* Find the end of the current segment.  */
    1530          segp = cur;
    1531          while ((segp[0] != '/') && (segp[0] != '\0'))
    1532            ++segp;
    1533  
    1534          /* If this is the last segment, we're done (we need at least two
    1535           * segments to meet the criteria for the (e) and (f) cases).
    1536           */
    1537          if (segp[0] == '\0')
    1538            break;
    1539  
    1540          /* If the first segment is "..", or if the next segment _isn't_ "..",
    1541           * keep this segment and try the next one.
    1542           */
    1543          ++segp;
    1544          if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
    1545              || ((segp[0] != '.') || (segp[1] != '.')
    1546                  || ((segp[2] != '/') && (segp[2] != '\0')))) {
    1547            cur = segp;
    1548            continue;
    1549          }
    1550  
    1551          /* If we get here, remove this segment and the next one and back up
    1552           * to the previous segment (if there is one), to implement the
    1553           * "iteratively" clause.  It's pretty much impossible to back up
    1554           * while maintaining two pointers into the buffer, so just compact
    1555           * the whole buffer now.
    1556           */
    1557  
    1558          /* If this is the end of the buffer, we're done.  */
    1559          if (segp[2] == '\0') {
    1560            cur[0] = '\0';
    1561            break;
    1562          }
    1563          /* Valgrind complained, strcpy(cur, segp + 3); */
    1564          /* string will overlap, do not use strcpy */
    1565          tmp = cur;
    1566          segp += 3;
    1567          while ((*tmp++ = *segp++) != 0)
    1568            ;
    1569  
    1570          /* If there are no previous segments, then keep going from here.  */
    1571          segp = cur;
    1572          while ((segp > path) && ((--segp)[0] == '/'))
    1573            ;
    1574          if (segp == path)
    1575            continue;
    1576  
    1577          /* "segp" is pointing to the end of a previous segment; find it's
    1578           * start.  We need to back up to the previous segment and start
    1579           * over with that to handle things like "foo/bar/../..".  If we
    1580           * don't do this, then on the first pass we'll remove the "bar/..",
    1581           * but be pointing at the second ".." so we won't realize we can also
    1582           * remove the "foo/..".
    1583           */
    1584          cur = segp;
    1585          while ((cur > path) && (cur[-1] != '/'))
    1586            --cur;
    1587      }
    1588      out[0] = '\0';
    1589  
    1590      /*
    1591       * g) If the resulting buffer string still begins with one or more
    1592       *    complete path segments of "..", then the reference is
    1593       *    considered to be in error. Implementations may handle this
    1594       *    error by retaining these components in the resolved path (i.e.,
    1595       *    treating them as part of the final URI), by removing them from
    1596       *    the resolved path (i.e., discarding relative levels above the
    1597       *    root), or by avoiding traversal of the reference.
    1598       *
    1599       * We discard them from the final path.
    1600       */
    1601      if (path[0] == '/') {
    1602        cur = path;
    1603        while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
    1604               && ((cur[3] == '/') || (cur[3] == '\0')))
    1605  	cur += 3;
    1606  
    1607        if (cur != path) {
    1608  	out = path;
    1609  	while (cur[0] != '\0')
    1610            (out++)[0] = (cur++)[0];
    1611  	out[0] = 0;
    1612        }
    1613      }
    1614  
    1615      return(0);
    1616  }
    1617  
    1618  static int is_hex(char c) {
    1619      if (((c >= '0') && (c <= '9')) ||
    1620          ((c >= 'a') && (c <= 'f')) ||
    1621          ((c >= 'A') && (c <= 'F')))
    1622  	return(1);
    1623      return(0);
    1624  }
    1625  
    1626  /**
    1627   * xmlURIUnescapeString:
    1628   * @str:  the string to unescape
    1629   * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
    1630   * @target:  optional destination buffer
    1631   *
    1632   * Unescaping routine, but does not check that the string is an URI. The
    1633   * output is a direct unsigned char translation of %XX values (no encoding)
    1634   * Note that the length of the result can only be smaller or same size as
    1635   * the input string.
    1636   *
    1637   * Returns a copy of the string, but unescaped, will return NULL only in case
    1638   * of error
    1639   */
    1640  char *
    1641  xmlURIUnescapeString(const char *str, int len, char *target) {
    1642      char *ret, *out;
    1643      const char *in;
    1644  
    1645      if (str == NULL)
    1646  	return(NULL);
    1647      if (len <= 0) len = strlen(str);
    1648      if (len < 0) return(NULL);
    1649  
    1650      if (target == NULL) {
    1651  	ret = (char *) xmlMallocAtomic(len + 1);
    1652  	if (ret == NULL) {
    1653              xmlURIErrMemory("unescaping URI value\n");
    1654  	    return(NULL);
    1655  	}
    1656      } else
    1657  	ret = target;
    1658      in = str;
    1659      out = ret;
    1660      while(len > 0) {
    1661  	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
    1662  	    in++;
    1663  	    if ((*in >= '0') && (*in <= '9'))
    1664  	        *out = (*in - '0');
    1665  	    else if ((*in >= 'a') && (*in <= 'f'))
    1666  	        *out = (*in - 'a') + 10;
    1667  	    else if ((*in >= 'A') && (*in <= 'F'))
    1668  	        *out = (*in - 'A') + 10;
    1669  	    in++;
    1670  	    if ((*in >= '0') && (*in <= '9'))
    1671  	        *out = *out * 16 + (*in - '0');
    1672  	    else if ((*in >= 'a') && (*in <= 'f'))
    1673  	        *out = *out * 16 + (*in - 'a') + 10;
    1674  	    else if ((*in >= 'A') && (*in <= 'F'))
    1675  	        *out = *out * 16 + (*in - 'A') + 10;
    1676  	    in++;
    1677  	    len -= 3;
    1678  	    out++;
    1679  	} else {
    1680  	    *out++ = *in++;
    1681  	    len--;
    1682  	}
    1683      }
    1684      *out = 0;
    1685      return(ret);
    1686  }
    1687  
    1688  /**
    1689   * xmlURIEscapeStr:
    1690   * @str:  string to escape
    1691   * @list: exception list string of chars not to escape
    1692   *
    1693   * This routine escapes a string to hex, ignoring reserved characters (a-z)
    1694   * and the characters in the exception list.
    1695   *
    1696   * Returns a new escaped string or NULL in case of error.
    1697   */
    1698  xmlChar *
    1699  xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
    1700      xmlChar *ret, ch;
    1701      xmlChar *temp;
    1702      const xmlChar *in;
    1703      int len, out;
    1704  
    1705      if (str == NULL)
    1706  	return(NULL);
    1707      if (str[0] == 0)
    1708  	return(xmlStrdup(str));
    1709      len = xmlStrlen(str);
    1710      if (!(len > 0)) return(NULL);
    1711  
    1712      len += 20;
    1713      ret = (xmlChar *) xmlMallocAtomic(len);
    1714      if (ret == NULL) {
    1715          xmlURIErrMemory("escaping URI value\n");
    1716  	return(NULL);
    1717      }
    1718      in = (const xmlChar *) str;
    1719      out = 0;
    1720      while(*in != 0) {
    1721  	if (len - out <= 3) {
    1722              temp = xmlSaveUriRealloc(ret, &len);
    1723  	    if (temp == NULL) {
    1724                  xmlURIErrMemory("escaping URI value\n");
    1725  		xmlFree(ret);
    1726  		return(NULL);
    1727  	    }
    1728  	    ret = temp;
    1729  	}
    1730  
    1731  	ch = *in;
    1732  
    1733  	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
    1734  	    unsigned char val;
    1735  	    ret[out++] = '%';
    1736  	    val = ch >> 4;
    1737  	    if (val <= 9)
    1738  		ret[out++] = '0' + val;
    1739  	    else
    1740  		ret[out++] = 'A' + val - 0xA;
    1741  	    val = ch & 0xF;
    1742  	    if (val <= 9)
    1743  		ret[out++] = '0' + val;
    1744  	    else
    1745  		ret[out++] = 'A' + val - 0xA;
    1746  	    in++;
    1747  	} else {
    1748  	    ret[out++] = *in++;
    1749  	}
    1750  
    1751      }
    1752      ret[out] = 0;
    1753      return(ret);
    1754  }
    1755  
    1756  /**
    1757   * xmlURIEscape:
    1758   * @str:  the string of the URI to escape
    1759   *
    1760   * Escaping routine, does not do validity checks !
    1761   * It will try to escape the chars needing this, but this is heuristic
    1762   * based it's impossible to be sure.
    1763   *
    1764   * Returns an copy of the string, but escaped
    1765   *
    1766   * 25 May 2001
    1767   * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
    1768   * according to RFC2396.
    1769   *   - Carl Douglas
    1770   */
    1771  xmlChar *
    1772  xmlURIEscape(const xmlChar * str)
    1773  {
    1774      xmlChar *ret, *segment = NULL;
    1775      xmlURIPtr uri;
    1776      int ret2;
    1777  
    1778  #define NULLCHK(p) if(!p) { \
    1779           xmlURIErrMemory("escaping URI value\n"); \
    1780           xmlFreeURI(uri); \
    1781           return NULL; } \
    1782  
    1783      if (str == NULL)
    1784          return (NULL);
    1785  
    1786      uri = xmlCreateURI();
    1787      if (uri != NULL) {
    1788  	/*
    1789  	 * Allow escaping errors in the unescaped form
    1790  	 */
    1791          uri->cleanup = 1;
    1792          ret2 = xmlParseURIReference(uri, (const char *)str);
    1793          if (ret2) {
    1794              xmlFreeURI(uri);
    1795              return (NULL);
    1796          }
    1797      }
    1798  
    1799      if (!uri)
    1800          return NULL;
    1801  
    1802      ret = NULL;
    1803  
    1804      if (uri->scheme) {
    1805          segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
    1806          NULLCHK(segment)
    1807          ret = xmlStrcat(ret, segment);
    1808          ret = xmlStrcat(ret, BAD_CAST ":");
    1809          xmlFree(segment);
    1810      }
    1811  
    1812      if (uri->authority) {
    1813          segment =
    1814              xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
    1815          NULLCHK(segment)
    1816          ret = xmlStrcat(ret, BAD_CAST "//");
    1817          ret = xmlStrcat(ret, segment);
    1818          xmlFree(segment);
    1819      }
    1820  
    1821      if (uri->user) {
    1822          segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
    1823          NULLCHK(segment)
    1824  		ret = xmlStrcat(ret,BAD_CAST "//");
    1825          ret = xmlStrcat(ret, segment);
    1826          ret = xmlStrcat(ret, BAD_CAST "@");
    1827          xmlFree(segment);
    1828      }
    1829  
    1830      if (uri->server) {
    1831          segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
    1832          NULLCHK(segment)
    1833  		if (uri->user == NULL)
    1834  		ret = xmlStrcat(ret, BAD_CAST "//");
    1835          ret = xmlStrcat(ret, segment);
    1836          xmlFree(segment);
    1837      }
    1838  
    1839      if (uri->port) {
    1840          xmlChar port[10];
    1841  
    1842          snprintf((char *) port, 10, "%d", uri->port);
    1843          ret = xmlStrcat(ret, BAD_CAST ":");
    1844          ret = xmlStrcat(ret, port);
    1845      }
    1846  
    1847      if (uri->path) {
    1848          segment =
    1849              xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
    1850          NULLCHK(segment)
    1851          ret = xmlStrcat(ret, segment);
    1852          xmlFree(segment);
    1853      }
    1854  
    1855      if (uri->query_raw) {
    1856          ret = xmlStrcat(ret, BAD_CAST "?");
    1857          ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
    1858      }
    1859      else if (uri->query) {
    1860          segment =
    1861              xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
    1862          NULLCHK(segment)
    1863          ret = xmlStrcat(ret, BAD_CAST "?");
    1864          ret = xmlStrcat(ret, segment);
    1865          xmlFree(segment);
    1866      }
    1867  
    1868      if (uri->opaque) {
    1869          segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
    1870          NULLCHK(segment)
    1871          ret = xmlStrcat(ret, segment);
    1872          xmlFree(segment);
    1873      }
    1874  
    1875      if (uri->fragment) {
    1876          segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
    1877          NULLCHK(segment)
    1878          ret = xmlStrcat(ret, BAD_CAST "#");
    1879          ret = xmlStrcat(ret, segment);
    1880          xmlFree(segment);
    1881      }
    1882  
    1883      xmlFreeURI(uri);
    1884  #undef NULLCHK
    1885  
    1886      return (ret);
    1887  }
    1888  
    1889  /************************************************************************
    1890   *									*
    1891   *			Public functions				*
    1892   *									*
    1893   ************************************************************************/
    1894  
    1895  /**
    1896   * xmlBuildURI:
    1897   * @URI:  the URI instance found in the document
    1898   * @base:  the base value
    1899   *
    1900   * Computes he final URI of the reference done by checking that
    1901   * the given URI is valid, and building the final URI using the
    1902   * base URI. This is processed according to section 5.2 of the
    1903   * RFC 2396
    1904   *
    1905   * 5.2. Resolving Relative References to Absolute Form
    1906   *
    1907   * Returns a new URI string (to be freed by the caller) or NULL in case
    1908   *         of error.
    1909   */
    1910  xmlChar *
    1911  xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
    1912      xmlChar *val = NULL;
    1913      int ret, len, indx, cur, out;
    1914      xmlURIPtr ref = NULL;
    1915      xmlURIPtr bas = NULL;
    1916      xmlURIPtr res = NULL;
    1917  
    1918      /*
    1919       * 1) The URI reference is parsed into the potential four components and
    1920       *    fragment identifier, as described in Section 4.3.
    1921       *
    1922       *    NOTE that a completely empty URI is treated by modern browsers
    1923       *    as a reference to "." rather than as a synonym for the current
    1924       *    URI.  Should we do that here?
    1925       */
    1926      if (URI == NULL)
    1927  	ret = -1;
    1928      else {
    1929  	if (*URI) {
    1930  	    ref = xmlCreateURI();
    1931  	    if (ref == NULL)
    1932  		goto done;
    1933  	    ret = xmlParseURIReference(ref, (const char *) URI);
    1934  	}
    1935  	else
    1936  	    ret = 0;
    1937      }
    1938      if (ret != 0)
    1939  	goto done;
    1940      if ((ref != NULL) && (ref->scheme != NULL)) {
    1941  	/*
    1942  	 * The URI is absolute don't modify.
    1943  	 */
    1944  	val = xmlStrdup(URI);
    1945  	goto done;
    1946      }
    1947      if (base == NULL)
    1948  	ret = -1;
    1949      else {
    1950  	bas = xmlCreateURI();
    1951  	if (bas == NULL)
    1952  	    goto done;
    1953  	ret = xmlParseURIReference(bas, (const char *) base);
    1954      }
    1955      if (ret != 0) {
    1956  	if (ref)
    1957  	    val = xmlSaveUri(ref);
    1958  	goto done;
    1959      }
    1960      if (ref == NULL) {
    1961  	/*
    1962  	 * the base fragment must be ignored
    1963  	 */
    1964  	if (bas->fragment != NULL) {
    1965  	    xmlFree(bas->fragment);
    1966  	    bas->fragment = NULL;
    1967  	}
    1968  	val = xmlSaveUri(bas);
    1969  	goto done;
    1970      }
    1971  
    1972      /*
    1973       * 2) If the path component is empty and the scheme, authority, and
    1974       *    query components are undefined, then it is a reference to the
    1975       *    current document and we are done.  Otherwise, the reference URI's
    1976       *    query and fragment components are defined as found (or not found)
    1977       *    within the URI reference and not inherited from the base URI.
    1978       *
    1979       *    NOTE that in modern browsers, the parsing differs from the above
    1980       *    in the following aspect:  the query component is allowed to be
    1981       *    defined while still treating this as a reference to the current
    1982       *    document.
    1983       */
    1984      res = xmlCreateURI();
    1985      if (res == NULL)
    1986  	goto done;
    1987      if ((ref->scheme == NULL) && (ref->path == NULL) &&
    1988  	((ref->authority == NULL) && (ref->server == NULL))) {
    1989  	if (bas->scheme != NULL)
    1990  	    res->scheme = xmlMemStrdup(bas->scheme);
    1991  	if (bas->authority != NULL)
    1992  	    res->authority = xmlMemStrdup(bas->authority);
    1993  	else if ((bas->server != NULL) || (bas->port == -1)) {
    1994  	    if (bas->server != NULL)
    1995  		res->server = xmlMemStrdup(bas->server);
    1996  	    if (bas->user != NULL)
    1997  		res->user = xmlMemStrdup(bas->user);
    1998  	    res->port = bas->port;
    1999  	}
    2000  	if (bas->path != NULL)
    2001  	    res->path = xmlMemStrdup(bas->path);
    2002  	if (ref->query_raw != NULL)
    2003  	    res->query_raw = xmlMemStrdup (ref->query_raw);
    2004  	else if (ref->query != NULL)
    2005  	    res->query = xmlMemStrdup(ref->query);
    2006  	else if (bas->query_raw != NULL)
    2007  	    res->query_raw = xmlMemStrdup(bas->query_raw);
    2008  	else if (bas->query != NULL)
    2009  	    res->query = xmlMemStrdup(bas->query);
    2010  	if (ref->fragment != NULL)
    2011  	    res->fragment = xmlMemStrdup(ref->fragment);
    2012  	goto step_7;
    2013      }
    2014  
    2015      /*
    2016       * 3) If the scheme component is defined, indicating that the reference
    2017       *    starts with a scheme name, then the reference is interpreted as an
    2018       *    absolute URI and we are done.  Otherwise, the reference URI's
    2019       *    scheme is inherited from the base URI's scheme component.
    2020       */
    2021      if (ref->scheme != NULL) {
    2022  	val = xmlSaveUri(ref);
    2023  	goto done;
    2024      }
    2025      if (bas->scheme != NULL)
    2026  	res->scheme = xmlMemStrdup(bas->scheme);
    2027  
    2028      if (ref->query_raw != NULL)
    2029  	res->query_raw = xmlMemStrdup(ref->query_raw);
    2030      else if (ref->query != NULL)
    2031  	res->query = xmlMemStrdup(ref->query);
    2032      if (ref->fragment != NULL)
    2033  	res->fragment = xmlMemStrdup(ref->fragment);
    2034  
    2035      /*
    2036       * 4) If the authority component is defined, then the reference is a
    2037       *    network-path and we skip to step 7.  Otherwise, the reference
    2038       *    URI's authority is inherited from the base URI's authority
    2039       *    component, which will also be undefined if the URI scheme does not
    2040       *    use an authority component.
    2041       */
    2042      if ((ref->authority != NULL) || (ref->server != NULL)) {
    2043  	if (ref->authority != NULL)
    2044  	    res->authority = xmlMemStrdup(ref->authority);
    2045  	else {
    2046  	    res->server = xmlMemStrdup(ref->server);
    2047  	    if (ref->user != NULL)
    2048  		res->user = xmlMemStrdup(ref->user);
    2049              res->port = ref->port;
    2050  	}
    2051  	if (ref->path != NULL)
    2052  	    res->path = xmlMemStrdup(ref->path);
    2053  	goto step_7;
    2054      }
    2055      if (bas->authority != NULL)
    2056  	res->authority = xmlMemStrdup(bas->authority);
    2057      else if ((bas->server != NULL) || (bas->port == -1)) {
    2058  	if (bas->server != NULL)
    2059  	    res->server = xmlMemStrdup(bas->server);
    2060  	if (bas->user != NULL)
    2061  	    res->user = xmlMemStrdup(bas->user);
    2062  	res->port = bas->port;
    2063      }
    2064  
    2065      /*
    2066       * 5) If the path component begins with a slash character ("/"), then
    2067       *    the reference is an absolute-path and we skip to step 7.
    2068       */
    2069      if ((ref->path != NULL) && (ref->path[0] == '/')) {
    2070  	res->path = xmlMemStrdup(ref->path);
    2071  	goto step_7;
    2072      }
    2073  
    2074  
    2075      /*
    2076       * 6) If this step is reached, then we are resolving a relative-path
    2077       *    reference.  The relative path needs to be merged with the base
    2078       *    URI's path.  Although there are many ways to do this, we will
    2079       *    describe a simple method using a separate string buffer.
    2080       *
    2081       * Allocate a buffer large enough for the result string.
    2082       */
    2083      len = 2; /* extra / and 0 */
    2084      if (ref->path != NULL)
    2085  	len += strlen(ref->path);
    2086      if (bas->path != NULL)
    2087  	len += strlen(bas->path);
    2088      res->path = (char *) xmlMallocAtomic(len);
    2089      if (res->path == NULL) {
    2090          xmlURIErrMemory("resolving URI against base\n");
    2091  	goto done;
    2092      }
    2093      res->path[0] = 0;
    2094  
    2095      /*
    2096       * a) All but the last segment of the base URI's path component is
    2097       *    copied to the buffer.  In other words, any characters after the
    2098       *    last (right-most) slash character, if any, are excluded.
    2099       */
    2100      cur = 0;
    2101      out = 0;
    2102      if (bas->path != NULL) {
    2103  	while (bas->path[cur] != 0) {
    2104  	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
    2105  		cur++;
    2106  	    if (bas->path[cur] == 0)
    2107  		break;
    2108  
    2109  	    cur++;
    2110  	    while (out < cur) {
    2111  		res->path[out] = bas->path[out];
    2112  		out++;
    2113  	    }
    2114  	}
    2115      }
    2116      res->path[out] = 0;
    2117  
    2118      /*
    2119       * b) The reference's path component is appended to the buffer
    2120       *    string.
    2121       */
    2122      if (ref->path != NULL && ref->path[0] != 0) {
    2123  	indx = 0;
    2124  	/*
    2125  	 * Ensure the path includes a '/'
    2126  	 */
    2127  	if ((out == 0) && (bas->server != NULL))
    2128  	    res->path[out++] = '/';
    2129  	while (ref->path[indx] != 0) {
    2130  	    res->path[out++] = ref->path[indx++];
    2131  	}
    2132      }
    2133      res->path[out] = 0;
    2134  
    2135      /*
    2136       * Steps c) to h) are really path normalization steps
    2137       */
    2138      xmlNormalizeURIPath(res->path);
    2139  
    2140  step_7:
    2141  
    2142      /*
    2143       * 7) The resulting URI components, including any inherited from the
    2144       *    base URI, are recombined to give the absolute form of the URI
    2145       *    reference.
    2146       */
    2147      val = xmlSaveUri(res);
    2148  
    2149  done:
    2150      if (ref != NULL)
    2151  	xmlFreeURI(ref);
    2152      if (bas != NULL)
    2153  	xmlFreeURI(bas);
    2154      if (res != NULL)
    2155  	xmlFreeURI(res);
    2156      return(val);
    2157  }
    2158  
    2159  /**
    2160   * xmlBuildRelativeURI:
    2161   * @URI:  the URI reference under consideration
    2162   * @base:  the base value
    2163   *
    2164   * Expresses the URI of the reference in terms relative to the
    2165   * base.  Some examples of this operation include:
    2166   *     base = "http://site1.com/docs/book1.html"
    2167   *        URI input                        URI returned
    2168   *     docs/pic1.gif                    pic1.gif
    2169   *     docs/img/pic1.gif                img/pic1.gif
    2170   *     img/pic1.gif                     ../img/pic1.gif
    2171   *     http://site1.com/docs/pic1.gif   pic1.gif
    2172   *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
    2173   *
    2174   *     base = "docs/book1.html"
    2175   *        URI input                        URI returned
    2176   *     docs/pic1.gif                    pic1.gif
    2177   *     docs/img/pic1.gif                img/pic1.gif
    2178   *     img/pic1.gif                     ../img/pic1.gif
    2179   *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
    2180   *
    2181   *
    2182   * Note: if the URI reference is really wierd or complicated, it may be
    2183   *       worthwhile to first convert it into a "nice" one by calling
    2184   *       xmlBuildURI (using 'base') before calling this routine,
    2185   *       since this routine (for reasonable efficiency) assumes URI has
    2186   *       already been through some validation.
    2187   *
    2188   * Returns a new URI string (to be freed by the caller) or NULL in case
    2189   * error.
    2190   */
    2191  xmlChar *
    2192  xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
    2193  {
    2194      xmlChar *val = NULL;
    2195      int ret;
    2196      int ix;
    2197      int nbslash = 0;
    2198      int len;
    2199      xmlURIPtr ref = NULL;
    2200      xmlURIPtr bas = NULL;
    2201      xmlChar *bptr, *uptr, *vptr;
    2202      int remove_path = 0;
    2203  
    2204      if ((URI == NULL) || (*URI == 0))
    2205  	return NULL;
    2206  
    2207      /*
    2208       * First parse URI into a standard form
    2209       */
    2210      ref = xmlCreateURI ();
    2211      if (ref == NULL)
    2212  	return NULL;
    2213      /* If URI not already in "relative" form */
    2214      if (URI[0] != '.') {
    2215  	ret = xmlParseURIReference (ref, (const char *) URI);
    2216  	if (ret != 0)
    2217  	    goto done;		/* Error in URI, return NULL */
    2218      } else
    2219  	ref->path = (char *)xmlStrdup(URI);
    2220  
    2221      /*
    2222       * Next parse base into the same standard form
    2223       */
    2224      if ((base == NULL) || (*base == 0)) {
    2225  	val = xmlStrdup (URI);
    2226  	goto done;
    2227      }
    2228      bas = xmlCreateURI ();
    2229      if (bas == NULL)
    2230  	goto done;
    2231      if (base[0] != '.') {
    2232  	ret = xmlParseURIReference (bas, (const char *) base);
    2233  	if (ret != 0)
    2234  	    goto done;		/* Error in base, return NULL */
    2235      } else
    2236  	bas->path = (char *)xmlStrdup(base);
    2237  
    2238      /*
    2239       * If the scheme / server on the URI differs from the base,
    2240       * just return the URI
    2241       */
    2242      if ((ref->scheme != NULL) &&
    2243  	((bas->scheme == NULL) ||
    2244  	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
    2245  	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
    2246  	val = xmlStrdup (URI);
    2247  	goto done;
    2248      }
    2249      if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
    2250  	val = xmlStrdup(BAD_CAST "");
    2251  	goto done;
    2252      }
    2253      if (bas->path == NULL) {
    2254  	val = xmlStrdup((xmlChar *)ref->path);
    2255  	goto done;
    2256      }
    2257      if (ref->path == NULL) {
    2258          ref->path = (char *) "/";
    2259  	remove_path = 1;
    2260      }
    2261  
    2262      /*
    2263       * At this point (at last!) we can compare the two paths
    2264       *
    2265       * First we take care of the special case where either of the
    2266       * two path components may be missing (bug 316224)
    2267       */
    2268      bptr = (xmlChar *)bas->path;
    2269      {
    2270          xmlChar *rptr = (xmlChar *) ref->path;
    2271          int pos = 0;
    2272  
    2273          /*
    2274           * Next we compare the two strings and find where they first differ
    2275           */
    2276  	if ((*rptr == '.') && (rptr[1] == '/'))
    2277              rptr += 2;
    2278  	if ((*bptr == '.') && (bptr[1] == '/'))
    2279              bptr += 2;
    2280  	else if ((*bptr == '/') && (*rptr != '/'))
    2281  	    bptr++;
    2282  	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
    2283  	    pos++;
    2284  
    2285  	if (bptr[pos] == rptr[pos]) {
    2286  	    val = xmlStrdup(BAD_CAST "");
    2287  	    goto done;		/* (I can't imagine why anyone would do this) */
    2288  	}
    2289  
    2290  	/*
    2291  	 * In URI, "back up" to the last '/' encountered.  This will be the
    2292  	 * beginning of the "unique" suffix of URI
    2293  	 */
    2294  	ix = pos;
    2295  	for (; ix > 0; ix--) {
    2296  	    if (rptr[ix - 1] == '/')
    2297  		break;
    2298  	}
    2299  	uptr = (xmlChar *)&rptr[ix];
    2300  
    2301  	/*
    2302  	 * In base, count the number of '/' from the differing point
    2303  	 */
    2304  	for (; bptr[ix] != 0; ix++) {
    2305  	    if (bptr[ix] == '/')
    2306  		nbslash++;
    2307  	}
    2308  
    2309  	/*
    2310  	 * e.g: URI="foo/" base="foo/bar" -> "./"
    2311  	 */
    2312  	if (nbslash == 0 && !uptr[0]) {
    2313  	    val = xmlStrdup(BAD_CAST "./");
    2314  	    goto done;
    2315  	}
    2316  
    2317  	len = xmlStrlen (uptr) + 1;
    2318      }
    2319  
    2320      if (nbslash == 0) {
    2321  	if (uptr != NULL)
    2322  	    /* exception characters from xmlSaveUri */
    2323  	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
    2324  	goto done;
    2325      }
    2326  
    2327      /*
    2328       * Allocate just enough space for the returned string -
    2329       * length of the remainder of the URI, plus enough space
    2330       * for the "../" groups, plus one for the terminator
    2331       */
    2332      val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
    2333      if (val == NULL) {
    2334          xmlURIErrMemory("building relative URI\n");
    2335  	goto done;
    2336      }
    2337      vptr = val;
    2338      /*
    2339       * Put in as many "../" as needed
    2340       */
    2341      for (; nbslash>0; nbslash--) {
    2342  	*vptr++ = '.';
    2343  	*vptr++ = '.';
    2344  	*vptr++ = '/';
    2345      }
    2346      /*
    2347       * Finish up with the end of the URI
    2348       */
    2349      if (uptr != NULL) {
    2350          if ((vptr > val) && (len > 0) &&
    2351  	    (uptr[0] == '/') && (vptr[-1] == '/')) {
    2352  	    memcpy (vptr, uptr + 1, len - 1);
    2353  	    vptr[len - 2] = 0;
    2354  	} else {
    2355  	    memcpy (vptr, uptr, len);
    2356  	    vptr[len - 1] = 0;
    2357  	}
    2358      } else {
    2359  	vptr[len - 1] = 0;
    2360      }
    2361  
    2362      /* escape the freshly-built path */
    2363      vptr = val;
    2364  	/* exception characters from xmlSaveUri */
    2365      val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
    2366      xmlFree(vptr);
    2367  
    2368  done:
    2369      /*
    2370       * Free the working variables
    2371       */
    2372      if (remove_path != 0)
    2373          ref->path = NULL;
    2374      if (ref != NULL)
    2375  	xmlFreeURI (ref);
    2376      if (bas != NULL)
    2377  	xmlFreeURI (bas);
    2378  
    2379      return val;
    2380  }
    2381  
    2382  /**
    2383   * xmlCanonicPath:
    2384   * @path:  the resource locator in a filesystem notation
    2385   *
    2386   * Constructs a canonic path from the specified path.
    2387   *
    2388   * Returns a new canonic path, or a duplicate of the path parameter if the
    2389   * construction fails. The caller is responsible for freeing the memory occupied
    2390   * by the returned string. If there is insufficient memory available, or the
    2391   * argument is NULL, the function returns NULL.
    2392   */
    2393  #define IS_WINDOWS_PATH(p)					\
    2394  	((p != NULL) &&						\
    2395  	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
    2396  	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
    2397  	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
    2398  xmlChar *
    2399  xmlCanonicPath(const xmlChar *path)
    2400  {
    2401  /*
    2402   * For Windows implementations, additional work needs to be done to
    2403   * replace backslashes in pathnames with "forward slashes"
    2404   */
    2405  #if defined(_WIN32) && !defined(__CYGWIN__)
    2406      int len = 0;
    2407      char *p = NULL;
    2408  #endif
    2409      xmlURIPtr uri;
    2410      xmlChar *ret;
    2411      const xmlChar *absuri;
    2412  
    2413      if (path == NULL)
    2414  	return(NULL);
    2415  
    2416  #if defined(_WIN32)
    2417      /*
    2418       * We must not change the backslashes to slashes if the the path
    2419       * starts with \\?\
    2420       * Those paths can be up to 32k characters long.
    2421       * Was added specifically for OpenOffice, those paths can't be converted
    2422       * to URIs anyway.
    2423       */
    2424      if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
    2425          (path[3] == '\\') )
    2426  	return xmlStrdup((const xmlChar *) path);
    2427  #endif
    2428  
    2429  	/* sanitize filename starting with // so it can be used as URI */
    2430      if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
    2431          path++;
    2432  
    2433      if ((uri = xmlParseURI((const char *) path)) != NULL) {
    2434  	xmlFreeURI(uri);
    2435  	return xmlStrdup(path);
    2436      }
    2437  
    2438      /* Check if this is an "absolute uri" */
    2439      absuri = xmlStrstr(path, BAD_CAST "://");
    2440      if (absuri != NULL) {
    2441          int l, j;
    2442  	unsigned char c;
    2443  	xmlChar *escURI;
    2444  
    2445          /*
    2446  	 * this looks like an URI where some parts have not been
    2447  	 * escaped leading to a parsing problem.  Check that the first
    2448  	 * part matches a protocol.
    2449  	 */
    2450  	l = absuri - path;
    2451  	/* Bypass if first part (part before the '://') is > 20 chars */
    2452  	if ((l <= 0) || (l > 20))
    2453  	    goto path_processing;
    2454  	/* Bypass if any non-alpha characters are present in first part */
    2455  	for (j = 0;j < l;j++) {
    2456  	    c = path[j];
    2457  	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
    2458  	        goto path_processing;
    2459  	}
    2460  
    2461  	/* Escape all except the characters specified in the supplied path */
    2462          escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
    2463  	if (escURI != NULL) {
    2464  	    /* Try parsing the escaped path */
    2465  	    uri = xmlParseURI((const char *) escURI);
    2466  	    /* If successful, return the escaped string */
    2467  	    if (uri != NULL) {
    2468  	        xmlFreeURI(uri);
    2469  		return escURI;
    2470  	    }
    2471              xmlFree(escURI);
    2472  	}
    2473      }
    2474  
    2475  path_processing:
    2476  /* For Windows implementations, replace backslashes with 'forward slashes' */
    2477  #if defined(_WIN32) && !defined(__CYGWIN__)
    2478      /*
    2479       * Create a URI structure
    2480       */
    2481      uri = xmlCreateURI();
    2482      if (uri == NULL) {		/* Guard against 'out of memory' */
    2483          return(NULL);
    2484      }
    2485  
    2486      len = xmlStrlen(path);
    2487      if ((len > 2) && IS_WINDOWS_PATH(path)) {
    2488          /* make the scheme 'file' */
    2489  	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
    2490  	/* allocate space for leading '/' + path + string terminator */
    2491  	uri->path = xmlMallocAtomic(len + 2);
    2492  	if (uri->path == NULL) {
    2493  	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
    2494  	    return(NULL);
    2495  	}
    2496  	/* Put in leading '/' plus path */
    2497  	uri->path[0] = '/';
    2498  	p = uri->path + 1;
    2499  	strncpy(p, (char *) path, len + 1);
    2500      } else {
    2501  	uri->path = (char *) xmlStrdup(path);
    2502  	if (uri->path == NULL) {
    2503  	    xmlFreeURI(uri);
    2504  	    return(NULL);
    2505  	}
    2506  	p = uri->path;
    2507      }
    2508      /* Now change all occurences of '\' to '/' */
    2509      while (*p != '\0') {
    2510  	if (*p == '\\')
    2511  	    *p = '/';
    2512  	p++;
    2513      }
    2514  
    2515      if (uri->scheme == NULL) {
    2516  	ret = xmlStrdup((const xmlChar *) uri->path);
    2517      } else {
    2518  	ret = xmlSaveUri(uri);
    2519      }
    2520  
    2521      xmlFreeURI(uri);
    2522  #else
    2523      ret = xmlStrdup((const xmlChar *) path);
    2524  #endif
    2525      return(ret);
    2526  }
    2527  
    2528  /**
    2529   * xmlPathToURI:
    2530   * @path:  the resource locator in a filesystem notation
    2531   *
    2532   * Constructs an URI expressing the existing path
    2533   *
    2534   * Returns a new URI, or a duplicate of the path parameter if the
    2535   * construction fails. The caller is responsible for freeing the memory
    2536   * occupied by the returned string. If there is insufficient memory available,
    2537   * or the argument is NULL, the function returns NULL.
    2538   */
    2539  xmlChar *
    2540  xmlPathToURI(const xmlChar *path)
    2541  {
    2542      xmlURIPtr uri;
    2543      xmlURI temp;
    2544      xmlChar *ret, *cal;
    2545  
    2546      if (path == NULL)
    2547          return(NULL);
    2548  
    2549      if ((uri = xmlParseURI((const char *) path)) != NULL) {
    2550  	xmlFreeURI(uri);
    2551  	return xmlStrdup(path);
    2552      }
    2553      cal = xmlCanonicPath(path);
    2554      if (cal == NULL)
    2555          return(NULL);
    2556  #if defined(_WIN32) && !defined(__CYGWIN__)
    2557      /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
    2558         If 'cal' is a valid URI allready then we are done here, as continuing would make
    2559         it invalid. */
    2560      if ((uri = xmlParseURI((const char *) cal)) != NULL) {
    2561  	xmlFreeURI(uri);
    2562  	return cal;
    2563      }
    2564      /* 'cal' can contain a relative path with backslashes. If that is processed
    2565         by xmlSaveURI, they will be escaped and the external entity loader machinery
    2566         will fail. So convert them to slashes. Misuse 'ret' for walking. */
    2567      ret = cal;
    2568      while (*ret != '\0') {
    2569  	if (*ret == '\\')
    2570  	    *ret = '/';
    2571  	ret++;
    2572      }
    2573  #endif
    2574      memset(&temp, 0, sizeof(temp));
    2575      temp.path = (char *) cal;
    2576      ret = xmlSaveUri(&temp);
    2577      xmlFree(cal);
    2578      return(ret);
    2579  }
    2580  #define bottom_uri
    2581  #include "elfgcchack.h"