(root)/
libxml2-2.12.3/
uri.c
       1  /**
       2   * uri.c: set of generic URI related routines
       3   *
       4   * Reference: RFCs 3986, 2732 and 2373
       5   *
       6   * See Copyright for the status of this software.
       7   *
       8   * daniel@veillard.com
       9   */
      10  
      11  #define IN_LIBXML
      12  #include "libxml.h"
      13  
      14  #include <limits.h>
      15  #include <string.h>
      16  
      17  #include <libxml/xmlmemory.h>
      18  #include <libxml/uri.h>
      19  #include <libxml/xmlerror.h>
      20  
      21  #include "private/error.h"
      22  
      23  /**
      24   * MAX_URI_LENGTH:
      25   *
      26   * The definition of the URI regexp in the above RFC has no size limit
      27   * In practice they are usually relatively short except for the
      28   * data URI scheme as defined in RFC 2397. Even for data URI the usual
      29   * maximum size before hitting random practical limits is around 64 KB
      30   * and 4KB is usually a maximum admitted limit for proper operations.
      31   * The value below is more a security limit than anything else and
      32   * really should never be hit by 'normal' operations
      33   * Set to 1 MByte in 2012, this is only enforced on output
      34   */
      35  #define MAX_URI_LENGTH 1024 * 1024
      36  
      37  #define PORT_EMPTY           0
      38  #define PORT_EMPTY_SERVER   -1
      39  
      40  static void
      41  xmlURIErrMemory(const char *extra)
      42  {
      43      if (extra)
      44          __xmlRaiseError(NULL, NULL, NULL,
      45                          NULL, NULL, XML_FROM_URI,
      46                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
      47                          extra, NULL, NULL, 0, 0,
      48                          "Memory allocation failed : %s\n", extra);
      49      else
      50          __xmlRaiseError(NULL, NULL, NULL,
      51                          NULL, NULL, XML_FROM_URI,
      52                          XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
      53                          NULL, NULL, NULL, 0, 0,
      54                          "Memory allocation failed\n");
      55  }
      56  
      57  static void xmlCleanURI(xmlURIPtr uri);
      58  
      59  /*
      60   * Old rule from 2396 used in legacy handling code
      61   * alpha    = lowalpha | upalpha
      62   */
      63  #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
      64  
      65  
      66  /*
      67   * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
      68   *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
      69   *            "u" | "v" | "w" | "x" | "y" | "z"
      70   */
      71  
      72  #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
      73  
      74  /*
      75   * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
      76   *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
      77   *           "U" | "V" | "W" | "X" | "Y" | "Z"
      78   */
      79  #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
      80  
      81  #ifdef IS_DIGIT
      82  #undef IS_DIGIT
      83  #endif
      84  /*
      85   * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
      86   */
      87  #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
      88  
      89  /*
      90   * alphanum = alpha | digit
      91   */
      92  
      93  #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
      94  
      95  /*
      96   * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
      97   */
      98  
      99  #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
     100      ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
     101      ((x) == '(') || ((x) == ')'))
     102  
     103  /*
     104   * unwise = "{" | "}" | "|" | "\" | "^" | "`"
     105   */
     106  
     107  #define IS_UNWISE(p)                                                    \
     108        (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
     109         ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
     110         ((*(p) == ']')) || ((*(p) == '`')))
     111  /*
     112   * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
     113   *            "[" | "]"
     114   */
     115  
     116  #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
     117          ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
     118          ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
     119          ((x) == ']'))
     120  
     121  /*
     122   * unreserved = alphanum | mark
     123   */
     124  
     125  #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
     126  
     127  /*
     128   * Skip to next pointer char, handle escaped sequences
     129   */
     130  
     131  #define NEXT(p) ((*p == '%')? p += 3 : p++)
     132  
     133  /*
     134   * Productions from the spec.
     135   *
     136   *    authority     = server | reg_name
     137   *    reg_name      = 1*( unreserved | escaped | "$" | "," |
     138   *                        ";" | ":" | "@" | "&" | "=" | "+" )
     139   *
     140   * path          = [ abs_path | opaque_part ]
     141   */
     142  
     143  #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
     144  
     145  /************************************************************************
     146   *									*
     147   *                         RFC 3986 parser				*
     148   *									*
     149   ************************************************************************/
     150  
     151  #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
     152  #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
     153                        ((*(p) >= 'A') && (*(p) <= 'Z')))
     154  #define ISA_HEXDIG(p)							\
     155         (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
     156          ((*(p) >= 'A') && (*(p) <= 'F')))
     157  
     158  /*
     159   *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
     160   *                     / "*" / "+" / "," / ";" / "="
     161   */
     162  #define ISA_SUB_DELIM(p)						\
     163        (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
     164         ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
     165         ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
     166         ((*(p) == '=')) || ((*(p) == '\'')))
     167  
     168  /*
     169   *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
     170   */
     171  #define ISA_GEN_DELIM(p)						\
     172        (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
     173         ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
     174         ((*(p) == '@')))
     175  
     176  /*
     177   *    reserved      = gen-delims / sub-delims
     178   */
     179  #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
     180  
     181  /*
     182   *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
     183   */
     184  #define ISA_UNRESERVED(p)						\
     185        ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
     186         ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
     187  
     188  /*
     189   *    pct-encoded   = "%" HEXDIG HEXDIG
     190   */
     191  #define ISA_PCT_ENCODED(p)						\
     192       ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
     193  
     194  /*
     195   *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
     196   */
     197  #define ISA_PCHAR(p)							\
     198       (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
     199        ((*(p) == ':')) || ((*(p) == '@')))
     200  
     201  /**
     202   * xmlParse3986Scheme:
     203   * @uri:  pointer to an URI structure
     204   * @str:  pointer to the string to analyze
     205   *
     206   * Parse an URI scheme
     207   *
     208   * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
     209   *
     210   * Returns 0 or the error code
     211   */
     212  static int
     213  xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
     214      const char *cur;
     215  
     216      if (str == NULL)
     217  	return(-1);
     218  
     219      cur = *str;
     220      if (!ISA_ALPHA(cur))
     221  	return(2);
     222      cur++;
     223      while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
     224             (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
     225      if (uri != NULL) {
     226  	if (uri->scheme != NULL) xmlFree(uri->scheme);
     227  	uri->scheme = STRNDUP(*str, cur - *str);
     228      }
     229      *str = cur;
     230      return(0);
     231  }
     232  
     233  /**
     234   * xmlParse3986Fragment:
     235   * @uri:  pointer to an URI structure
     236   * @str:  pointer to the string to analyze
     237   *
     238   * Parse the query part of an URI
     239   *
     240   * fragment      = *( pchar / "/" / "?" )
     241   * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
     242   *       in the fragment identifier but this is used very broadly for
     243   *       xpointer scheme selection, so we are allowing it here to not break
     244   *       for example all the DocBook processing chains.
     245   *
     246   * Returns 0 or the error code
     247   */
     248  static int
     249  xmlParse3986Fragment(xmlURIPtr uri, const char **str)
     250  {
     251      const char *cur;
     252  
     253      if (str == NULL)
     254          return (-1);
     255  
     256      cur = *str;
     257  
     258      while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
     259             (*cur == '[') || (*cur == ']') ||
     260             ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
     261          NEXT(cur);
     262      if (uri != NULL) {
     263          if (uri->fragment != NULL)
     264              xmlFree(uri->fragment);
     265  	if (uri->cleanup & 2)
     266  	    uri->fragment = STRNDUP(*str, cur - *str);
     267  	else
     268  	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
     269      }
     270      *str = cur;
     271      return (0);
     272  }
     273  
     274  /**
     275   * xmlParse3986Query:
     276   * @uri:  pointer to an URI structure
     277   * @str:  pointer to the string to analyze
     278   *
     279   * Parse the query part of an URI
     280   *
     281   * query = *uric
     282   *
     283   * Returns 0 or the error code
     284   */
     285  static int
     286  xmlParse3986Query(xmlURIPtr uri, const char **str)
     287  {
     288      const char *cur;
     289  
     290      if (str == NULL)
     291          return (-1);
     292  
     293      cur = *str;
     294  
     295      while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
     296             ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
     297          NEXT(cur);
     298      if (uri != NULL) {
     299          if (uri->query != NULL)
     300              xmlFree(uri->query);
     301  	if (uri->cleanup & 2)
     302  	    uri->query = STRNDUP(*str, cur - *str);
     303  	else
     304  	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
     305  
     306  	/* Save the raw bytes of the query as well.
     307  	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
     308  	 */
     309  	if (uri->query_raw != NULL)
     310  	    xmlFree (uri->query_raw);
     311  	uri->query_raw = STRNDUP (*str, cur - *str);
     312      }
     313      *str = cur;
     314      return (0);
     315  }
     316  
     317  /**
     318   * xmlParse3986Port:
     319   * @uri:  pointer to an URI structure
     320   * @str:  the string to analyze
     321   *
     322   * Parse a port part and fills in the appropriate fields
     323   * of the @uri structure
     324   *
     325   * port          = *DIGIT
     326   *
     327   * Returns 0 or the error code
     328   */
     329  static int
     330  xmlParse3986Port(xmlURIPtr uri, const char **str)
     331  {
     332      const char *cur = *str;
     333      int port = 0;
     334  
     335      if (ISA_DIGIT(cur)) {
     336  	while (ISA_DIGIT(cur)) {
     337              int digit = *cur - '0';
     338  
     339              if (port > INT_MAX / 10)
     340                  return(1);
     341              port *= 10;
     342              if (port > INT_MAX - digit)
     343                  return(1);
     344  	    port += digit;
     345  
     346  	    cur++;
     347  	}
     348  	if (uri != NULL)
     349  	    uri->port = port;
     350  	*str = cur;
     351  	return(0);
     352      }
     353      return(1);
     354  }
     355  
     356  /**
     357   * xmlParse3986Userinfo:
     358   * @uri:  pointer to an URI structure
     359   * @str:  the string to analyze
     360   *
     361   * Parse an user information part and fills in the appropriate fields
     362   * of the @uri structure
     363   *
     364   * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
     365   *
     366   * Returns 0 or the error code
     367   */
     368  static int
     369  xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
     370  {
     371      const char *cur;
     372  
     373      cur = *str;
     374      while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
     375             ISA_SUB_DELIM(cur) || (*cur == ':'))
     376  	NEXT(cur);
     377      if (*cur == '@') {
     378  	if (uri != NULL) {
     379  	    if (uri->user != NULL) xmlFree(uri->user);
     380  	    if (uri->cleanup & 2)
     381  		uri->user = STRNDUP(*str, cur - *str);
     382  	    else
     383  		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
     384  	}
     385  	*str = cur;
     386  	return(0);
     387      }
     388      return(1);
     389  }
     390  
     391  /**
     392   * xmlParse3986DecOctet:
     393   * @str:  the string to analyze
     394   *
     395   *    dec-octet     = DIGIT                 ; 0-9
     396   *                  / %x31-39 DIGIT         ; 10-99
     397   *                  / "1" 2DIGIT            ; 100-199
     398   *                  / "2" %x30-34 DIGIT     ; 200-249
     399   *                  / "25" %x30-35          ; 250-255
     400   *
     401   * Skip a dec-octet.
     402   *
     403   * Returns 0 if found and skipped, 1 otherwise
     404   */
     405  static int
     406  xmlParse3986DecOctet(const char **str) {
     407      const char *cur = *str;
     408  
     409      if (!(ISA_DIGIT(cur)))
     410          return(1);
     411      if (!ISA_DIGIT(cur+1))
     412  	cur++;
     413      else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
     414  	cur += 2;
     415      else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
     416  	cur += 3;
     417      else if ((*cur == '2') && (*(cur + 1) >= '0') &&
     418  	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
     419  	cur += 3;
     420      else if ((*cur == '2') && (*(cur + 1) == '5') &&
     421  	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
     422  	cur += 3;
     423      else
     424          return(1);
     425      *str = cur;
     426      return(0);
     427  }
     428  /**
     429   * xmlParse3986Host:
     430   * @uri:  pointer to an URI structure
     431   * @str:  the string to analyze
     432   *
     433   * Parse an host part and fills in the appropriate fields
     434   * of the @uri structure
     435   *
     436   * host          = IP-literal / IPv4address / reg-name
     437   * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
     438   * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
     439   * reg-name      = *( unreserved / pct-encoded / sub-delims )
     440   *
     441   * Returns 0 or the error code
     442   */
     443  static int
     444  xmlParse3986Host(xmlURIPtr uri, const char **str)
     445  {
     446      const char *cur = *str;
     447      const char *host;
     448  
     449      host = cur;
     450      /*
     451       * IPv6 and future addressing scheme are enclosed between brackets
     452       */
     453      if (*cur == '[') {
     454          cur++;
     455  	while ((*cur != ']') && (*cur != 0))
     456  	    cur++;
     457  	if (*cur != ']')
     458  	    return(1);
     459  	cur++;
     460  	goto found;
     461      }
     462      /*
     463       * try to parse an IPv4
     464       */
     465      if (ISA_DIGIT(cur)) {
     466          if (xmlParse3986DecOctet(&cur) != 0)
     467  	    goto not_ipv4;
     468  	if (*cur != '.')
     469  	    goto not_ipv4;
     470  	cur++;
     471          if (xmlParse3986DecOctet(&cur) != 0)
     472  	    goto not_ipv4;
     473  	if (*cur != '.')
     474  	    goto not_ipv4;
     475          if (xmlParse3986DecOctet(&cur) != 0)
     476  	    goto not_ipv4;
     477  	if (*cur != '.')
     478  	    goto not_ipv4;
     479          if (xmlParse3986DecOctet(&cur) != 0)
     480  	    goto not_ipv4;
     481  	goto found;
     482  not_ipv4:
     483          cur = *str;
     484      }
     485      /*
     486       * then this should be a hostname which can be empty
     487       */
     488      while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
     489          NEXT(cur);
     490  found:
     491      if (uri != NULL) {
     492  	if (uri->authority != NULL) xmlFree(uri->authority);
     493  	uri->authority = NULL;
     494  	if (uri->server != NULL) xmlFree(uri->server);
     495  	if (cur != host) {
     496  	    if (uri->cleanup & 2)
     497  		uri->server = STRNDUP(host, cur - host);
     498  	    else
     499  		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
     500  	} else
     501  	    uri->server = NULL;
     502      }
     503      *str = cur;
     504      return(0);
     505  }
     506  
     507  /**
     508   * xmlParse3986Authority:
     509   * @uri:  pointer to an URI structure
     510   * @str:  the string to analyze
     511   *
     512   * Parse an authority part and fills in the appropriate fields
     513   * of the @uri structure
     514   *
     515   * authority     = [ userinfo "@" ] host [ ":" port ]
     516   *
     517   * Returns 0 or the error code
     518   */
     519  static int
     520  xmlParse3986Authority(xmlURIPtr uri, const char **str)
     521  {
     522      const char *cur;
     523      int ret;
     524  
     525      cur = *str;
     526      /*
     527       * try to parse an userinfo and check for the trailing @
     528       */
     529      ret = xmlParse3986Userinfo(uri, &cur);
     530      if ((ret != 0) || (*cur != '@'))
     531          cur = *str;
     532      else
     533          cur++;
     534      ret = xmlParse3986Host(uri, &cur);
     535      if (ret != 0) return(ret);
     536      if (*cur == ':') {
     537          cur++;
     538          ret = xmlParse3986Port(uri, &cur);
     539  	if (ret != 0) return(ret);
     540      }
     541      *str = cur;
     542      return(0);
     543  }
     544  
     545  /**
     546   * xmlParse3986Segment:
     547   * @str:  the string to analyze
     548   * @forbid: an optional forbidden character
     549   * @empty: allow an empty segment
     550   *
     551   * Parse a segment and fills in the appropriate fields
     552   * of the @uri structure
     553   *
     554   * segment       = *pchar
     555   * segment-nz    = 1*pchar
     556   * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
     557   *               ; non-zero-length segment without any colon ":"
     558   *
     559   * Returns 0 or the error code
     560   */
     561  static int
     562  xmlParse3986Segment(const char **str, char forbid, int empty)
     563  {
     564      const char *cur;
     565  
     566      cur = *str;
     567      if (!ISA_PCHAR(cur)) {
     568          if (empty)
     569  	    return(0);
     570  	return(1);
     571      }
     572      while (ISA_PCHAR(cur) && (*cur != forbid))
     573          NEXT(cur);
     574      *str = cur;
     575      return (0);
     576  }
     577  
     578  /**
     579   * xmlParse3986PathAbEmpty:
     580   * @uri:  pointer to an URI structure
     581   * @str:  the string to analyze
     582   *
     583   * Parse an path absolute or empty and fills in the appropriate fields
     584   * of the @uri structure
     585   *
     586   * path-abempty  = *( "/" segment )
     587   *
     588   * Returns 0 or the error code
     589   */
     590  static int
     591  xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
     592  {
     593      const char *cur;
     594      int ret;
     595  
     596      cur = *str;
     597  
     598      while (*cur == '/') {
     599          cur++;
     600  	ret = xmlParse3986Segment(&cur, 0, 1);
     601  	if (ret != 0) return(ret);
     602      }
     603      if (uri != NULL) {
     604  	if (uri->path != NULL) xmlFree(uri->path);
     605          if (*str != cur) {
     606              if (uri->cleanup & 2)
     607                  uri->path = STRNDUP(*str, cur - *str);
     608              else
     609                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
     610          } else {
     611              uri->path = NULL;
     612          }
     613      }
     614      *str = cur;
     615      return (0);
     616  }
     617  
     618  /**
     619   * xmlParse3986PathAbsolute:
     620   * @uri:  pointer to an URI structure
     621   * @str:  the string to analyze
     622   *
     623   * Parse an path absolute and fills in the appropriate fields
     624   * of the @uri structure
     625   *
     626   * path-absolute = "/" [ segment-nz *( "/" segment ) ]
     627   *
     628   * Returns 0 or the error code
     629   */
     630  static int
     631  xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
     632  {
     633      const char *cur;
     634      int ret;
     635  
     636      cur = *str;
     637  
     638      if (*cur != '/')
     639          return(1);
     640      cur++;
     641      ret = xmlParse3986Segment(&cur, 0, 0);
     642      if (ret == 0) {
     643  	while (*cur == '/') {
     644  	    cur++;
     645  	    ret = xmlParse3986Segment(&cur, 0, 1);
     646  	    if (ret != 0) return(ret);
     647  	}
     648      }
     649      if (uri != NULL) {
     650  	if (uri->path != NULL) xmlFree(uri->path);
     651          if (cur != *str) {
     652              if (uri->cleanup & 2)
     653                  uri->path = STRNDUP(*str, cur - *str);
     654              else
     655                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
     656          } else {
     657              uri->path = NULL;
     658          }
     659      }
     660      *str = cur;
     661      return (0);
     662  }
     663  
     664  /**
     665   * xmlParse3986PathRootless:
     666   * @uri:  pointer to an URI structure
     667   * @str:  the string to analyze
     668   *
     669   * Parse an path without root and fills in the appropriate fields
     670   * of the @uri structure
     671   *
     672   * path-rootless = segment-nz *( "/" segment )
     673   *
     674   * Returns 0 or the error code
     675   */
     676  static int
     677  xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
     678  {
     679      const char *cur;
     680      int ret;
     681  
     682      cur = *str;
     683  
     684      ret = xmlParse3986Segment(&cur, 0, 0);
     685      if (ret != 0) return(ret);
     686      while (*cur == '/') {
     687          cur++;
     688  	ret = xmlParse3986Segment(&cur, 0, 1);
     689  	if (ret != 0) return(ret);
     690      }
     691      if (uri != NULL) {
     692  	if (uri->path != NULL) xmlFree(uri->path);
     693          if (cur != *str) {
     694              if (uri->cleanup & 2)
     695                  uri->path = STRNDUP(*str, cur - *str);
     696              else
     697                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
     698          } else {
     699              uri->path = NULL;
     700          }
     701      }
     702      *str = cur;
     703      return (0);
     704  }
     705  
     706  /**
     707   * xmlParse3986PathNoScheme:
     708   * @uri:  pointer to an URI structure
     709   * @str:  the string to analyze
     710   *
     711   * Parse an path which is not a scheme and fills in the appropriate fields
     712   * of the @uri structure
     713   *
     714   * path-noscheme = segment-nz-nc *( "/" segment )
     715   *
     716   * Returns 0 or the error code
     717   */
     718  static int
     719  xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
     720  {
     721      const char *cur;
     722      int ret;
     723  
     724      cur = *str;
     725  
     726      ret = xmlParse3986Segment(&cur, ':', 0);
     727      if (ret != 0) return(ret);
     728      while (*cur == '/') {
     729          cur++;
     730  	ret = xmlParse3986Segment(&cur, 0, 1);
     731  	if (ret != 0) return(ret);
     732      }
     733      if (uri != NULL) {
     734  	if (uri->path != NULL) xmlFree(uri->path);
     735          if (cur != *str) {
     736              if (uri->cleanup & 2)
     737                  uri->path = STRNDUP(*str, cur - *str);
     738              else
     739                  uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
     740          } else {
     741              uri->path = NULL;
     742          }
     743      }
     744      *str = cur;
     745      return (0);
     746  }
     747  
     748  /**
     749   * xmlParse3986HierPart:
     750   * @uri:  pointer to an URI structure
     751   * @str:  the string to analyze
     752   *
     753   * Parse an hierarchical part and fills in the appropriate fields
     754   * of the @uri structure
     755   *
     756   * hier-part     = "//" authority path-abempty
     757   *                / path-absolute
     758   *                / path-rootless
     759   *                / path-empty
     760   *
     761   * Returns 0 or the error code
     762   */
     763  static int
     764  xmlParse3986HierPart(xmlURIPtr uri, const char **str)
     765  {
     766      const char *cur;
     767      int ret;
     768  
     769      cur = *str;
     770  
     771      if ((*cur == '/') && (*(cur + 1) == '/')) {
     772          cur += 2;
     773  	ret = xmlParse3986Authority(uri, &cur);
     774  	if (ret != 0) return(ret);
     775          /*
     776           * An empty server is marked with a special URI value.
     777           */
     778  	if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
     779  	    uri->port = PORT_EMPTY_SERVER;
     780  	ret = xmlParse3986PathAbEmpty(uri, &cur);
     781  	if (ret != 0) return(ret);
     782  	*str = cur;
     783  	return(0);
     784      } else if (*cur == '/') {
     785          ret = xmlParse3986PathAbsolute(uri, &cur);
     786  	if (ret != 0) return(ret);
     787      } else if (ISA_PCHAR(cur)) {
     788          ret = xmlParse3986PathRootless(uri, &cur);
     789  	if (ret != 0) return(ret);
     790      } else {
     791  	/* path-empty is effectively empty */
     792  	if (uri != NULL) {
     793  	    if (uri->path != NULL) xmlFree(uri->path);
     794  	    uri->path = NULL;
     795  	}
     796      }
     797      *str = cur;
     798      return (0);
     799  }
     800  
     801  /**
     802   * xmlParse3986RelativeRef:
     803   * @uri:  pointer to an URI structure
     804   * @str:  the string to analyze
     805   *
     806   * Parse an URI string and fills in the appropriate fields
     807   * of the @uri structure
     808   *
     809   * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
     810   * relative-part = "//" authority path-abempty
     811   *               / path-absolute
     812   *               / path-noscheme
     813   *               / path-empty
     814   *
     815   * Returns 0 or the error code
     816   */
     817  static int
     818  xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
     819      int ret;
     820  
     821      if ((*str == '/') && (*(str + 1) == '/')) {
     822          str += 2;
     823  	ret = xmlParse3986Authority(uri, &str);
     824  	if (ret != 0) return(ret);
     825  	ret = xmlParse3986PathAbEmpty(uri, &str);
     826  	if (ret != 0) return(ret);
     827      } else if (*str == '/') {
     828  	ret = xmlParse3986PathAbsolute(uri, &str);
     829  	if (ret != 0) return(ret);
     830      } else if (ISA_PCHAR(str)) {
     831          ret = xmlParse3986PathNoScheme(uri, &str);
     832  	if (ret != 0) return(ret);
     833      } else {
     834  	/* path-empty is effectively empty */
     835  	if (uri != NULL) {
     836  	    if (uri->path != NULL) xmlFree(uri->path);
     837  	    uri->path = NULL;
     838  	}
     839      }
     840  
     841      if (*str == '?') {
     842  	str++;
     843  	ret = xmlParse3986Query(uri, &str);
     844  	if (ret != 0) return(ret);
     845      }
     846      if (*str == '#') {
     847  	str++;
     848  	ret = xmlParse3986Fragment(uri, &str);
     849  	if (ret != 0) return(ret);
     850      }
     851      if (*str != 0) {
     852  	xmlCleanURI(uri);
     853  	return(1);
     854      }
     855      return(0);
     856  }
     857  
     858  
     859  /**
     860   * xmlParse3986URI:
     861   * @uri:  pointer to an URI structure
     862   * @str:  the string to analyze
     863   *
     864   * Parse an URI string and fills in the appropriate fields
     865   * of the @uri structure
     866   *
     867   * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
     868   *
     869   * Returns 0 or the error code
     870   */
     871  static int
     872  xmlParse3986URI(xmlURIPtr uri, const char *str) {
     873      int ret;
     874  
     875      ret = xmlParse3986Scheme(uri, &str);
     876      if (ret != 0) return(ret);
     877      if (*str != ':') {
     878  	return(1);
     879      }
     880      str++;
     881      ret = xmlParse3986HierPart(uri, &str);
     882      if (ret != 0) return(ret);
     883      if (*str == '?') {
     884  	str++;
     885  	ret = xmlParse3986Query(uri, &str);
     886  	if (ret != 0) return(ret);
     887      }
     888      if (*str == '#') {
     889  	str++;
     890  	ret = xmlParse3986Fragment(uri, &str);
     891  	if (ret != 0) return(ret);
     892      }
     893      if (*str != 0) {
     894  	xmlCleanURI(uri);
     895  	return(1);
     896      }
     897      return(0);
     898  }
     899  
     900  /**
     901   * xmlParse3986URIReference:
     902   * @uri:  pointer to an URI structure
     903   * @str:  the string to analyze
     904   *
     905   * Parse an URI reference string and fills in the appropriate fields
     906   * of the @uri structure
     907   *
     908   * URI-reference = URI / relative-ref
     909   *
     910   * Returns 0 or the error code
     911   */
     912  static int
     913  xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
     914      int ret;
     915  
     916      if (str == NULL)
     917  	return(-1);
     918      xmlCleanURI(uri);
     919  
     920      /*
     921       * Try first to parse absolute refs, then fallback to relative if
     922       * it fails.
     923       */
     924      ret = xmlParse3986URI(uri, str);
     925      if (ret != 0) {
     926  	xmlCleanURI(uri);
     927          ret = xmlParse3986RelativeRef(uri, str);
     928  	if (ret != 0) {
     929  	    xmlCleanURI(uri);
     930  	    return(ret);
     931  	}
     932      }
     933      return(0);
     934  }
     935  
     936  /**
     937   * xmlParseURI:
     938   * @str:  the URI string to analyze
     939   *
     940   * Parse an URI based on RFC 3986
     941   *
     942   * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
     943   *
     944   * Returns a newly built xmlURIPtr or NULL in case of error
     945   */
     946  xmlURIPtr
     947  xmlParseURI(const char *str) {
     948      xmlURIPtr uri;
     949      int ret;
     950  
     951      if (str == NULL)
     952  	return(NULL);
     953      uri = xmlCreateURI();
     954      if (uri != NULL) {
     955  	ret = xmlParse3986URIReference(uri, str);
     956          if (ret) {
     957  	    xmlFreeURI(uri);
     958  	    return(NULL);
     959  	}
     960      }
     961      return(uri);
     962  }
     963  
     964  /**
     965   * xmlParseURIReference:
     966   * @uri:  pointer to an URI structure
     967   * @str:  the string to analyze
     968   *
     969   * Parse an URI reference string based on RFC 3986 and fills in the
     970   * appropriate fields of the @uri structure
     971   *
     972   * URI-reference = URI / relative-ref
     973   *
     974   * Returns 0 or the error code
     975   */
     976  int
     977  xmlParseURIReference(xmlURIPtr uri, const char *str) {
     978      return(xmlParse3986URIReference(uri, str));
     979  }
     980  
     981  /**
     982   * xmlParseURIRaw:
     983   * @str:  the URI string to analyze
     984   * @raw:  if 1 unescaping of URI pieces are disabled
     985   *
     986   * Parse an URI but allows to keep intact the original fragments.
     987   *
     988   * URI-reference = URI / relative-ref
     989   *
     990   * Returns a newly built xmlURIPtr or NULL in case of error
     991   */
     992  xmlURIPtr
     993  xmlParseURIRaw(const char *str, int raw) {
     994      xmlURIPtr uri;
     995      int ret;
     996  
     997      if (str == NULL)
     998  	return(NULL);
     999      uri = xmlCreateURI();
    1000      if (uri != NULL) {
    1001          if (raw) {
    1002  	    uri->cleanup |= 2;
    1003  	}
    1004  	ret = xmlParseURIReference(uri, str);
    1005          if (ret) {
    1006  	    xmlFreeURI(uri);
    1007  	    return(NULL);
    1008  	}
    1009      }
    1010      return(uri);
    1011  }
    1012  
    1013  /************************************************************************
    1014   *									*
    1015   *			Generic URI structure functions			*
    1016   *									*
    1017   ************************************************************************/
    1018  
    1019  /**
    1020   * xmlCreateURI:
    1021   *
    1022   * Simply creates an empty xmlURI
    1023   *
    1024   * Returns the new structure or NULL in case of error
    1025   */
    1026  xmlURIPtr
    1027  xmlCreateURI(void) {
    1028      xmlURIPtr ret;
    1029  
    1030      ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
    1031      if (ret == NULL) {
    1032          xmlURIErrMemory("creating URI structure\n");
    1033  	return(NULL);
    1034      }
    1035      memset(ret, 0, sizeof(xmlURI));
    1036      ret->port = PORT_EMPTY;
    1037      return(ret);
    1038  }
    1039  
    1040  /**
    1041   * xmlSaveUriRealloc:
    1042   *
    1043   * Function to handle properly a reallocation when saving an URI
    1044   * Also imposes some limit on the length of an URI string output
    1045   */
    1046  static xmlChar *
    1047  xmlSaveUriRealloc(xmlChar *ret, int *max) {
    1048      xmlChar *temp;
    1049      int tmp;
    1050  
    1051      if (*max > MAX_URI_LENGTH) {
    1052          xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
    1053          return(NULL);
    1054      }
    1055      tmp = *max * 2;
    1056      temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
    1057      if (temp == NULL) {
    1058          xmlURIErrMemory("saving URI\n");
    1059          return(NULL);
    1060      }
    1061      *max = tmp;
    1062      return(temp);
    1063  }
    1064  
    1065  /**
    1066   * xmlSaveUri:
    1067   * @uri:  pointer to an xmlURI
    1068   *
    1069   * Save the URI as an escaped string
    1070   *
    1071   * Returns a new string (to be deallocated by caller)
    1072   */
    1073  xmlChar *
    1074  xmlSaveUri(xmlURIPtr uri) {
    1075      xmlChar *ret = NULL;
    1076      xmlChar *temp;
    1077      const char *p;
    1078      int len;
    1079      int max;
    1080  
    1081      if (uri == NULL) return(NULL);
    1082  
    1083  
    1084      max = 80;
    1085      ret = (xmlChar *) xmlMallocAtomic(max + 1);
    1086      if (ret == NULL) {
    1087          xmlURIErrMemory("saving URI\n");
    1088  	return(NULL);
    1089      }
    1090      len = 0;
    1091  
    1092      if (uri->scheme != NULL) {
    1093  	p = uri->scheme;
    1094  	while (*p != 0) {
    1095  	    if (len >= max) {
    1096                  temp = xmlSaveUriRealloc(ret, &max);
    1097                  if (temp == NULL) goto mem_error;
    1098  		ret = temp;
    1099  	    }
    1100  	    ret[len++] = *p++;
    1101  	}
    1102  	if (len >= max) {
    1103              temp = xmlSaveUriRealloc(ret, &max);
    1104              if (temp == NULL) goto mem_error;
    1105              ret = temp;
    1106  	}
    1107  	ret[len++] = ':';
    1108      }
    1109      if (uri->opaque != NULL) {
    1110  	p = uri->opaque;
    1111  	while (*p != 0) {
    1112  	    if (len + 3 >= max) {
    1113                  temp = xmlSaveUriRealloc(ret, &max);
    1114                  if (temp == NULL) goto mem_error;
    1115                  ret = temp;
    1116  	    }
    1117  	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
    1118  		ret[len++] = *p++;
    1119  	    else {
    1120  		int val = *(unsigned char *)p++;
    1121  		int hi = val / 0x10, lo = val % 0x10;
    1122  		ret[len++] = '%';
    1123  		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1124  		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1125  	    }
    1126  	}
    1127      } else {
    1128  	if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
    1129  	    if (len + 3 >= max) {
    1130                  temp = xmlSaveUriRealloc(ret, &max);
    1131                  if (temp == NULL) goto mem_error;
    1132                  ret = temp;
    1133  	    }
    1134  	    ret[len++] = '/';
    1135  	    ret[len++] = '/';
    1136  	    if (uri->user != NULL) {
    1137  		p = uri->user;
    1138  		while (*p != 0) {
    1139  		    if (len + 3 >= max) {
    1140                          temp = xmlSaveUriRealloc(ret, &max);
    1141                          if (temp == NULL) goto mem_error;
    1142                          ret = temp;
    1143  		    }
    1144  		    if ((IS_UNRESERVED(*(p))) ||
    1145  			((*(p) == ';')) || ((*(p) == ':')) ||
    1146  			((*(p) == '&')) || ((*(p) == '=')) ||
    1147  			((*(p) == '+')) || ((*(p) == '$')) ||
    1148  			((*(p) == ',')))
    1149  			ret[len++] = *p++;
    1150  		    else {
    1151  			int val = *(unsigned char *)p++;
    1152  			int hi = val / 0x10, lo = val % 0x10;
    1153  			ret[len++] = '%';
    1154  			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1155  			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1156  		    }
    1157  		}
    1158  		if (len + 3 >= max) {
    1159                      temp = xmlSaveUriRealloc(ret, &max);
    1160                      if (temp == NULL) goto mem_error;
    1161                      ret = temp;
    1162  		}
    1163  		ret[len++] = '@';
    1164  	    }
    1165  	    if (uri->server != NULL) {
    1166  		p = uri->server;
    1167  		while (*p != 0) {
    1168  		    if (len >= max) {
    1169  			temp = xmlSaveUriRealloc(ret, &max);
    1170  			if (temp == NULL) goto mem_error;
    1171  			ret = temp;
    1172  		    }
    1173                      /* TODO: escaping? */
    1174  		    ret[len++] = (xmlChar) *p++;
    1175  		}
    1176  	    }
    1177              if (uri->port > 0) {
    1178                  if (len + 10 >= max) {
    1179                      temp = xmlSaveUriRealloc(ret, &max);
    1180                      if (temp == NULL) goto mem_error;
    1181                      ret = temp;
    1182                  }
    1183                  len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
    1184              }
    1185  	} else if (uri->authority != NULL) {
    1186  	    if (len + 3 >= max) {
    1187                  temp = xmlSaveUriRealloc(ret, &max);
    1188                  if (temp == NULL) goto mem_error;
    1189                  ret = temp;
    1190  	    }
    1191  	    ret[len++] = '/';
    1192  	    ret[len++] = '/';
    1193  	    p = uri->authority;
    1194  	    while (*p != 0) {
    1195  		if (len + 3 >= max) {
    1196                      temp = xmlSaveUriRealloc(ret, &max);
    1197                      if (temp == NULL) goto mem_error;
    1198                      ret = temp;
    1199  		}
    1200  		if ((IS_UNRESERVED(*(p))) ||
    1201                      ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
    1202                      ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
    1203                      ((*(p) == '=')) || ((*(p) == '+')))
    1204  		    ret[len++] = *p++;
    1205  		else {
    1206  		    int val = *(unsigned char *)p++;
    1207  		    int hi = val / 0x10, lo = val % 0x10;
    1208  		    ret[len++] = '%';
    1209  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1210  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1211  		}
    1212  	    }
    1213  	} else if (uri->scheme != NULL) {
    1214  	    if (len + 3 >= max) {
    1215                  temp = xmlSaveUriRealloc(ret, &max);
    1216                  if (temp == NULL) goto mem_error;
    1217                  ret = temp;
    1218  	    }
    1219  	}
    1220  	if (uri->path != NULL) {
    1221  	    p = uri->path;
    1222  	    /*
    1223  	     * the colon in file:///d: should not be escaped or
    1224  	     * Windows accesses fail later.
    1225  	     */
    1226  	    if ((uri->scheme != NULL) &&
    1227  		(p[0] == '/') &&
    1228  		(((p[1] >= 'a') && (p[1] <= 'z')) ||
    1229  		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
    1230  		(p[2] == ':') &&
    1231  	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
    1232  		if (len + 3 >= max) {
    1233                      temp = xmlSaveUriRealloc(ret, &max);
    1234                      if (temp == NULL) goto mem_error;
    1235                      ret = temp;
    1236  		}
    1237  		ret[len++] = *p++;
    1238  		ret[len++] = *p++;
    1239  		ret[len++] = *p++;
    1240  	    }
    1241  	    while (*p != 0) {
    1242  		if (len + 3 >= max) {
    1243                      temp = xmlSaveUriRealloc(ret, &max);
    1244                      if (temp == NULL) goto mem_error;
    1245                      ret = temp;
    1246  		}
    1247  		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
    1248                      ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
    1249  	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
    1250  	            ((*(p) == ',')))
    1251  		    ret[len++] = *p++;
    1252  		else {
    1253  		    int val = *(unsigned char *)p++;
    1254  		    int hi = val / 0x10, lo = val % 0x10;
    1255  		    ret[len++] = '%';
    1256  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1257  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1258  		}
    1259  	    }
    1260  	}
    1261  	if (uri->query_raw != NULL) {
    1262  	    if (len + 1 >= max) {
    1263                  temp = xmlSaveUriRealloc(ret, &max);
    1264                  if (temp == NULL) goto mem_error;
    1265                  ret = temp;
    1266  	    }
    1267  	    ret[len++] = '?';
    1268  	    p = uri->query_raw;
    1269  	    while (*p != 0) {
    1270  		if (len + 1 >= max) {
    1271                      temp = xmlSaveUriRealloc(ret, &max);
    1272                      if (temp == NULL) goto mem_error;
    1273                      ret = temp;
    1274  		}
    1275  		ret[len++] = *p++;
    1276  	    }
    1277  	} else if (uri->query != NULL) {
    1278  	    if (len + 3 >= max) {
    1279                  temp = xmlSaveUriRealloc(ret, &max);
    1280                  if (temp == NULL) goto mem_error;
    1281                  ret = temp;
    1282  	    }
    1283  	    ret[len++] = '?';
    1284  	    p = uri->query;
    1285  	    while (*p != 0) {
    1286  		if (len + 3 >= max) {
    1287                      temp = xmlSaveUriRealloc(ret, &max);
    1288                      if (temp == NULL) goto mem_error;
    1289                      ret = temp;
    1290  		}
    1291  		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
    1292  		    ret[len++] = *p++;
    1293  		else {
    1294  		    int val = *(unsigned char *)p++;
    1295  		    int hi = val / 0x10, lo = val % 0x10;
    1296  		    ret[len++] = '%';
    1297  		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1298  		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1299  		}
    1300  	    }
    1301  	}
    1302      }
    1303      if (uri->fragment != NULL) {
    1304  	if (len + 3 >= max) {
    1305              temp = xmlSaveUriRealloc(ret, &max);
    1306              if (temp == NULL) goto mem_error;
    1307              ret = temp;
    1308  	}
    1309  	ret[len++] = '#';
    1310  	p = uri->fragment;
    1311  	while (*p != 0) {
    1312  	    if (len + 3 >= max) {
    1313                  temp = xmlSaveUriRealloc(ret, &max);
    1314                  if (temp == NULL) goto mem_error;
    1315                  ret = temp;
    1316  	    }
    1317  	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
    1318  		ret[len++] = *p++;
    1319  	    else {
    1320  		int val = *(unsigned char *)p++;
    1321  		int hi = val / 0x10, lo = val % 0x10;
    1322  		ret[len++] = '%';
    1323  		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
    1324  		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
    1325  	    }
    1326  	}
    1327      }
    1328      if (len >= max) {
    1329          temp = xmlSaveUriRealloc(ret, &max);
    1330          if (temp == NULL) goto mem_error;
    1331          ret = temp;
    1332      }
    1333      ret[len] = 0;
    1334      return(ret);
    1335  
    1336  mem_error:
    1337      xmlFree(ret);
    1338      return(NULL);
    1339  }
    1340  
    1341  /**
    1342   * xmlPrintURI:
    1343   * @stream:  a FILE* for the output
    1344   * @uri:  pointer to an xmlURI
    1345   *
    1346   * Prints the URI in the stream @stream.
    1347   */
    1348  void
    1349  xmlPrintURI(FILE *stream, xmlURIPtr uri) {
    1350      xmlChar *out;
    1351  
    1352      out = xmlSaveUri(uri);
    1353      if (out != NULL) {
    1354  	fprintf(stream, "%s", (char *) out);
    1355  	xmlFree(out);
    1356      }
    1357  }
    1358  
    1359  /**
    1360   * xmlCleanURI:
    1361   * @uri:  pointer to an xmlURI
    1362   *
    1363   * Make sure the xmlURI struct is free of content
    1364   */
    1365  static void
    1366  xmlCleanURI(xmlURIPtr uri) {
    1367      if (uri == NULL) return;
    1368  
    1369      if (uri->scheme != NULL) xmlFree(uri->scheme);
    1370      uri->scheme = NULL;
    1371      if (uri->server != NULL) xmlFree(uri->server);
    1372      uri->server = NULL;
    1373      if (uri->user != NULL) xmlFree(uri->user);
    1374      uri->user = NULL;
    1375      if (uri->path != NULL) xmlFree(uri->path);
    1376      uri->path = NULL;
    1377      if (uri->fragment != NULL) xmlFree(uri->fragment);
    1378      uri->fragment = NULL;
    1379      if (uri->opaque != NULL) xmlFree(uri->opaque);
    1380      uri->opaque = NULL;
    1381      if (uri->authority != NULL) xmlFree(uri->authority);
    1382      uri->authority = NULL;
    1383      if (uri->query != NULL) xmlFree(uri->query);
    1384      uri->query = NULL;
    1385      if (uri->query_raw != NULL) xmlFree(uri->query_raw);
    1386      uri->query_raw = NULL;
    1387  }
    1388  
    1389  /**
    1390   * xmlFreeURI:
    1391   * @uri:  pointer to an xmlURI
    1392   *
    1393   * Free up the xmlURI struct
    1394   */
    1395  void
    1396  xmlFreeURI(xmlURIPtr uri) {
    1397      if (uri == NULL) return;
    1398  
    1399      if (uri->scheme != NULL) xmlFree(uri->scheme);
    1400      if (uri->server != NULL) xmlFree(uri->server);
    1401      if (uri->user != NULL) xmlFree(uri->user);
    1402      if (uri->path != NULL) xmlFree(uri->path);
    1403      if (uri->fragment != NULL) xmlFree(uri->fragment);
    1404      if (uri->opaque != NULL) xmlFree(uri->opaque);
    1405      if (uri->authority != NULL) xmlFree(uri->authority);
    1406      if (uri->query != NULL) xmlFree(uri->query);
    1407      if (uri->query_raw != NULL) xmlFree(uri->query_raw);
    1408      xmlFree(uri);
    1409  }
    1410  
    1411  /************************************************************************
    1412   *									*
    1413   *			Helper functions				*
    1414   *									*
    1415   ************************************************************************/
    1416  
    1417  /**
    1418   * xmlNormalizeURIPath:
    1419   * @path:  pointer to the path string
    1420   *
    1421   * Applies the 5 normalization steps to a path string--that is, RFC 2396
    1422   * Section 5.2, steps 6.c through 6.g.
    1423   *
    1424   * Normalization occurs directly on the string, no new allocation is done
    1425   *
    1426   * Returns 0 or an error code
    1427   */
    1428  int
    1429  xmlNormalizeURIPath(char *path) {
    1430      char *cur, *out;
    1431  
    1432      if (path == NULL)
    1433  	return(-1);
    1434  
    1435      /* Skip all initial "/" chars.  We want to get to the beginning of the
    1436       * first non-empty segment.
    1437       */
    1438      cur = path;
    1439      while (cur[0] == '/')
    1440        ++cur;
    1441      if (cur[0] == '\0')
    1442        return(0);
    1443  
    1444      /* Keep everything we've seen so far.  */
    1445      out = cur;
    1446  
    1447      /*
    1448       * Analyze each segment in sequence for cases (c) and (d).
    1449       */
    1450      while (cur[0] != '\0') {
    1451  	/*
    1452  	 * c) All occurrences of "./", where "." is a complete path segment,
    1453  	 *    are removed from the buffer string.
    1454  	 */
    1455  	if ((cur[0] == '.') && (cur[1] == '/')) {
    1456  	    cur += 2;
    1457  	    /* '//' normalization should be done at this point too */
    1458  	    while (cur[0] == '/')
    1459  		cur++;
    1460  	    continue;
    1461  	}
    1462  
    1463  	/*
    1464  	 * d) If the buffer string ends with "." as a complete path segment,
    1465  	 *    that "." is removed.
    1466  	 */
    1467  	if ((cur[0] == '.') && (cur[1] == '\0'))
    1468  	    break;
    1469  
    1470  	/* Otherwise keep the segment.  */
    1471  	while (cur[0] != '/') {
    1472              if (cur[0] == '\0')
    1473                goto done_cd;
    1474  	    (out++)[0] = (cur++)[0];
    1475  	}
    1476  	/* normalize // */
    1477  	while ((cur[0] == '/') && (cur[1] == '/'))
    1478  	    cur++;
    1479  
    1480          (out++)[0] = (cur++)[0];
    1481      }
    1482   done_cd:
    1483      out[0] = '\0';
    1484  
    1485      /* Reset to the beginning of the first segment for the next sequence.  */
    1486      cur = path;
    1487      while (cur[0] == '/')
    1488        ++cur;
    1489      if (cur[0] == '\0')
    1490  	return(0);
    1491  
    1492      /*
    1493       * Analyze each segment in sequence for cases (e) and (f).
    1494       *
    1495       * e) All occurrences of "<segment>/../", where <segment> is a
    1496       *    complete path segment not equal to "..", are removed from the
    1497       *    buffer string.  Removal of these path segments is performed
    1498       *    iteratively, removing the leftmost matching pattern on each
    1499       *    iteration, until no matching pattern remains.
    1500       *
    1501       * f) If the buffer string ends with "<segment>/..", where <segment>
    1502       *    is a complete path segment not equal to "..", that
    1503       *    "<segment>/.." is removed.
    1504       *
    1505       * To satisfy the "iterative" clause in (e), we need to collapse the
    1506       * string every time we find something that needs to be removed.  Thus,
    1507       * we don't need to keep two pointers into the string: we only need a
    1508       * "current position" pointer.
    1509       */
    1510      while (1) {
    1511          char *segp, *tmp;
    1512  
    1513          /* At the beginning of each iteration of this loop, "cur" points to
    1514           * the first character of the segment we want to examine.
    1515           */
    1516  
    1517          /* Find the end of the current segment.  */
    1518          segp = cur;
    1519          while ((segp[0] != '/') && (segp[0] != '\0'))
    1520            ++segp;
    1521  
    1522          /* If this is the last segment, we're done (we need at least two
    1523           * segments to meet the criteria for the (e) and (f) cases).
    1524           */
    1525          if (segp[0] == '\0')
    1526            break;
    1527  
    1528          /* If the first segment is "..", or if the next segment _isn't_ "..",
    1529           * keep this segment and try the next one.
    1530           */
    1531          ++segp;
    1532          if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
    1533              || ((segp[0] != '.') || (segp[1] != '.')
    1534                  || ((segp[2] != '/') && (segp[2] != '\0')))) {
    1535            cur = segp;
    1536            continue;
    1537          }
    1538  
    1539          /* If we get here, remove this segment and the next one and back up
    1540           * to the previous segment (if there is one), to implement the
    1541           * "iteratively" clause.  It's pretty much impossible to back up
    1542           * while maintaining two pointers into the buffer, so just compact
    1543           * the whole buffer now.
    1544           */
    1545  
    1546          /* If this is the end of the buffer, we're done.  */
    1547          if (segp[2] == '\0') {
    1548            cur[0] = '\0';
    1549            break;
    1550          }
    1551          /* Valgrind complained, strcpy(cur, segp + 3); */
    1552          /* string will overlap, do not use strcpy */
    1553          tmp = cur;
    1554          segp += 3;
    1555          while ((*tmp++ = *segp++) != 0)
    1556            ;
    1557  
    1558          /* If there are no previous segments, then keep going from here.  */
    1559          segp = cur;
    1560          while ((segp > path) && ((--segp)[0] == '/'))
    1561            ;
    1562          if (segp == path)
    1563            continue;
    1564  
    1565          /* "segp" is pointing to the end of a previous segment; find it's
    1566           * start.  We need to back up to the previous segment and start
    1567           * over with that to handle things like "foo/bar/../..".  If we
    1568           * don't do this, then on the first pass we'll remove the "bar/..",
    1569           * but be pointing at the second ".." so we won't realize we can also
    1570           * remove the "foo/..".
    1571           */
    1572          cur = segp;
    1573          while ((cur > path) && (cur[-1] != '/'))
    1574            --cur;
    1575      }
    1576      out[0] = '\0';
    1577  
    1578      /*
    1579       * g) If the resulting buffer string still begins with one or more
    1580       *    complete path segments of "..", then the reference is
    1581       *    considered to be in error. Implementations may handle this
    1582       *    error by retaining these components in the resolved path (i.e.,
    1583       *    treating them as part of the final URI), by removing them from
    1584       *    the resolved path (i.e., discarding relative levels above the
    1585       *    root), or by avoiding traversal of the reference.
    1586       *
    1587       * We discard them from the final path.
    1588       */
    1589      if (path[0] == '/') {
    1590        cur = path;
    1591        while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
    1592               && ((cur[3] == '/') || (cur[3] == '\0')))
    1593  	cur += 3;
    1594  
    1595        if (cur != path) {
    1596  	out = path;
    1597  	while (cur[0] != '\0')
    1598            (out++)[0] = (cur++)[0];
    1599  	out[0] = 0;
    1600        }
    1601      }
    1602  
    1603      return(0);
    1604  }
    1605  
    1606  static int is_hex(char c) {
    1607      if (((c >= '0') && (c <= '9')) ||
    1608          ((c >= 'a') && (c <= 'f')) ||
    1609          ((c >= 'A') && (c <= 'F')))
    1610  	return(1);
    1611      return(0);
    1612  }
    1613  
    1614  /**
    1615   * xmlURIUnescapeString:
    1616   * @str:  the string to unescape
    1617   * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
    1618   * @target:  optional destination buffer
    1619   *
    1620   * Unescaping routine, but does not check that the string is an URI. The
    1621   * output is a direct unsigned char translation of %XX values (no encoding)
    1622   * Note that the length of the result can only be smaller or same size as
    1623   * the input string.
    1624   *
    1625   * Returns a copy of the string, but unescaped, will return NULL only in case
    1626   * of error
    1627   */
    1628  char *
    1629  xmlURIUnescapeString(const char *str, int len, char *target) {
    1630      char *ret, *out;
    1631      const char *in;
    1632  
    1633      if (str == NULL)
    1634  	return(NULL);
    1635      if (len <= 0) len = strlen(str);
    1636      if (len < 0) return(NULL);
    1637  
    1638      if (target == NULL) {
    1639  	ret = (char *) xmlMallocAtomic(len + 1);
    1640  	if (ret == NULL) {
    1641              xmlURIErrMemory("unescaping URI value\n");
    1642  	    return(NULL);
    1643  	}
    1644      } else
    1645  	ret = target;
    1646      in = str;
    1647      out = ret;
    1648      while(len > 0) {
    1649  	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
    1650              int c = 0;
    1651  	    in++;
    1652  	    if ((*in >= '0') && (*in <= '9'))
    1653  	        c = (*in - '0');
    1654  	    else if ((*in >= 'a') && (*in <= 'f'))
    1655  	        c = (*in - 'a') + 10;
    1656  	    else if ((*in >= 'A') && (*in <= 'F'))
    1657  	        c = (*in - 'A') + 10;
    1658  	    in++;
    1659  	    if ((*in >= '0') && (*in <= '9'))
    1660  	        c = c * 16 + (*in - '0');
    1661  	    else if ((*in >= 'a') && (*in <= 'f'))
    1662  	        c = c * 16 + (*in - 'a') + 10;
    1663  	    else if ((*in >= 'A') && (*in <= 'F'))
    1664  	        c = c * 16 + (*in - 'A') + 10;
    1665  	    in++;
    1666  	    len -= 3;
    1667              /* Explicit sign change */
    1668  	    *out++ = (char) c;
    1669  	} else {
    1670  	    *out++ = *in++;
    1671  	    len--;
    1672  	}
    1673      }
    1674      *out = 0;
    1675      return(ret);
    1676  }
    1677  
    1678  /**
    1679   * xmlURIEscapeStr:
    1680   * @str:  string to escape
    1681   * @list: exception list string of chars not to escape
    1682   *
    1683   * This routine escapes a string to hex, ignoring reserved characters
    1684   * (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
    1685   *
    1686   * Returns a new escaped string or NULL in case of error.
    1687   */
    1688  xmlChar *
    1689  xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
    1690      xmlChar *ret, ch;
    1691      xmlChar *temp;
    1692      const xmlChar *in;
    1693      int len, out;
    1694  
    1695      if (str == NULL)
    1696  	return(NULL);
    1697      if (str[0] == 0)
    1698  	return(xmlStrdup(str));
    1699      len = xmlStrlen(str);
    1700      if (!(len > 0)) return(NULL);
    1701  
    1702      len += 20;
    1703      ret = (xmlChar *) xmlMallocAtomic(len);
    1704      if (ret == NULL) {
    1705          xmlURIErrMemory("escaping URI value\n");
    1706  	return(NULL);
    1707      }
    1708      in = (const xmlChar *) str;
    1709      out = 0;
    1710      while(*in != 0) {
    1711  	if (len - out <= 3) {
    1712              temp = xmlSaveUriRealloc(ret, &len);
    1713  	    if (temp == NULL) {
    1714                  xmlURIErrMemory("escaping URI value\n");
    1715  		xmlFree(ret);
    1716  		return(NULL);
    1717  	    }
    1718  	    ret = temp;
    1719  	}
    1720  
    1721  	ch = *in;
    1722  
    1723  	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
    1724  	    unsigned char val;
    1725  	    ret[out++] = '%';
    1726  	    val = ch >> 4;
    1727  	    if (val <= 9)
    1728  		ret[out++] = '0' + val;
    1729  	    else
    1730  		ret[out++] = 'A' + val - 0xA;
    1731  	    val = ch & 0xF;
    1732  	    if (val <= 9)
    1733  		ret[out++] = '0' + val;
    1734  	    else
    1735  		ret[out++] = 'A' + val - 0xA;
    1736  	    in++;
    1737  	} else {
    1738  	    ret[out++] = *in++;
    1739  	}
    1740  
    1741      }
    1742      ret[out] = 0;
    1743      return(ret);
    1744  }
    1745  
    1746  /**
    1747   * xmlURIEscape:
    1748   * @str:  the string of the URI to escape
    1749   *
    1750   * Escaping routine, does not do validity checks !
    1751   * It will try to escape the chars needing this, but this is heuristic
    1752   * based it's impossible to be sure.
    1753   *
    1754   * Returns an copy of the string, but escaped
    1755   *
    1756   * 25 May 2001
    1757   * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
    1758   * according to RFC2396.
    1759   *   - Carl Douglas
    1760   */
    1761  xmlChar *
    1762  xmlURIEscape(const xmlChar * str)
    1763  {
    1764      xmlChar *ret, *segment = NULL;
    1765      xmlURIPtr uri;
    1766      int ret2;
    1767  
    1768      if (str == NULL)
    1769          return (NULL);
    1770  
    1771      uri = xmlCreateURI();
    1772      if (uri != NULL) {
    1773  	/*
    1774  	 * Allow escaping errors in the unescaped form
    1775  	 */
    1776          uri->cleanup = 1;
    1777          ret2 = xmlParseURIReference(uri, (const char *)str);
    1778          if (ret2) {
    1779              xmlFreeURI(uri);
    1780              return (NULL);
    1781          }
    1782      }
    1783  
    1784      if (!uri)
    1785          return NULL;
    1786  
    1787      ret = NULL;
    1788  
    1789  #define NULLCHK(p) if(!p) { \
    1790           xmlURIErrMemory("escaping URI value\n"); \
    1791           xmlFreeURI(uri); \
    1792           xmlFree(ret); \
    1793           return NULL; } \
    1794  
    1795      if (uri->scheme) {
    1796          segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
    1797          NULLCHK(segment)
    1798          ret = xmlStrcat(ret, segment);
    1799          ret = xmlStrcat(ret, BAD_CAST ":");
    1800          xmlFree(segment);
    1801      }
    1802  
    1803      if (uri->authority) {
    1804          segment =
    1805              xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
    1806          NULLCHK(segment)
    1807          ret = xmlStrcat(ret, BAD_CAST "//");
    1808          ret = xmlStrcat(ret, segment);
    1809          xmlFree(segment);
    1810      }
    1811  
    1812      if (uri->user) {
    1813          segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
    1814          NULLCHK(segment)
    1815          ret = xmlStrcat(ret,BAD_CAST "//");
    1816          ret = xmlStrcat(ret, segment);
    1817          ret = xmlStrcat(ret, BAD_CAST "@");
    1818          xmlFree(segment);
    1819      }
    1820  
    1821      if (uri->server) {
    1822          segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
    1823          NULLCHK(segment)
    1824          if (uri->user == NULL)
    1825              ret = xmlStrcat(ret, BAD_CAST "//");
    1826          ret = xmlStrcat(ret, segment);
    1827          xmlFree(segment);
    1828      }
    1829  
    1830      if (uri->port > 0) {
    1831          xmlChar port[11];
    1832  
    1833          snprintf((char *) port, 11, "%d", uri->port);
    1834          ret = xmlStrcat(ret, BAD_CAST ":");
    1835          ret = xmlStrcat(ret, port);
    1836      }
    1837  
    1838      if (uri->path) {
    1839          segment =
    1840              xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
    1841          NULLCHK(segment)
    1842          ret = xmlStrcat(ret, segment);
    1843          xmlFree(segment);
    1844      }
    1845  
    1846      if (uri->query_raw) {
    1847          ret = xmlStrcat(ret, BAD_CAST "?");
    1848          ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
    1849      }
    1850      else if (uri->query) {
    1851          segment =
    1852              xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
    1853          NULLCHK(segment)
    1854          ret = xmlStrcat(ret, BAD_CAST "?");
    1855          ret = xmlStrcat(ret, segment);
    1856          xmlFree(segment);
    1857      }
    1858  
    1859      if (uri->opaque) {
    1860          segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
    1861          NULLCHK(segment)
    1862          ret = xmlStrcat(ret, segment);
    1863          xmlFree(segment);
    1864      }
    1865  
    1866      if (uri->fragment) {
    1867          segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
    1868          NULLCHK(segment)
    1869          ret = xmlStrcat(ret, BAD_CAST "#");
    1870          ret = xmlStrcat(ret, segment);
    1871          xmlFree(segment);
    1872      }
    1873  
    1874      xmlFreeURI(uri);
    1875  #undef NULLCHK
    1876  
    1877      return (ret);
    1878  }
    1879  
    1880  /************************************************************************
    1881   *									*
    1882   *			Public functions				*
    1883   *									*
    1884   ************************************************************************/
    1885  
    1886  /**
    1887   * xmlBuildURI:
    1888   * @URI:  the URI instance found in the document
    1889   * @base:  the base value
    1890   *
    1891   * Computes he final URI of the reference done by checking that
    1892   * the given URI is valid, and building the final URI using the
    1893   * base URI. This is processed according to section 5.2 of the
    1894   * RFC 2396
    1895   *
    1896   * 5.2. Resolving Relative References to Absolute Form
    1897   *
    1898   * Returns a new URI string (to be freed by the caller) or NULL in case
    1899   *         of error.
    1900   */
    1901  xmlChar *
    1902  xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
    1903      xmlChar *val = NULL;
    1904      int ret, len, indx, cur, out;
    1905      xmlURIPtr ref = NULL;
    1906      xmlURIPtr bas = NULL;
    1907      xmlURIPtr res = NULL;
    1908  
    1909      /*
    1910       * 1) The URI reference is parsed into the potential four components and
    1911       *    fragment identifier, as described in Section 4.3.
    1912       *
    1913       *    NOTE that a completely empty URI is treated by modern browsers
    1914       *    as a reference to "." rather than as a synonym for the current
    1915       *    URI.  Should we do that here?
    1916       */
    1917      if (URI == NULL)
    1918  	ret = -1;
    1919      else {
    1920  	if (*URI) {
    1921  	    ref = xmlCreateURI();
    1922  	    if (ref == NULL)
    1923  		goto done;
    1924  	    ret = xmlParseURIReference(ref, (const char *) URI);
    1925  	}
    1926  	else
    1927  	    ret = 0;
    1928      }
    1929      if (ret != 0)
    1930  	goto done;
    1931      if ((ref != NULL) && (ref->scheme != NULL)) {
    1932  	/*
    1933  	 * The URI is absolute don't modify.
    1934  	 */
    1935  	val = xmlStrdup(URI);
    1936  	goto done;
    1937      }
    1938      if (base == NULL)
    1939  	ret = -1;
    1940      else {
    1941  	bas = xmlCreateURI();
    1942  	if (bas == NULL)
    1943  	    goto done;
    1944  	ret = xmlParseURIReference(bas, (const char *) base);
    1945      }
    1946      if (ret != 0) {
    1947  	if (ref)
    1948  	    val = xmlSaveUri(ref);
    1949  	goto done;
    1950      }
    1951      if (ref == NULL) {
    1952  	/*
    1953  	 * the base fragment must be ignored
    1954  	 */
    1955  	if (bas->fragment != NULL) {
    1956  	    xmlFree(bas->fragment);
    1957  	    bas->fragment = NULL;
    1958  	}
    1959  	val = xmlSaveUri(bas);
    1960  	goto done;
    1961      }
    1962  
    1963      /*
    1964       * 2) If the path component is empty and the scheme, authority, and
    1965       *    query components are undefined, then it is a reference to the
    1966       *    current document and we are done.  Otherwise, the reference URI's
    1967       *    query and fragment components are defined as found (or not found)
    1968       *    within the URI reference and not inherited from the base URI.
    1969       *
    1970       *    NOTE that in modern browsers, the parsing differs from the above
    1971       *    in the following aspect:  the query component is allowed to be
    1972       *    defined while still treating this as a reference to the current
    1973       *    document.
    1974       */
    1975      res = xmlCreateURI();
    1976      if (res == NULL)
    1977  	goto done;
    1978      if ((ref->scheme == NULL) && (ref->path == NULL) &&
    1979  	((ref->authority == NULL) && (ref->server == NULL) &&
    1980           (ref->port == PORT_EMPTY))) {
    1981  	if (bas->scheme != NULL)
    1982  	    res->scheme = xmlMemStrdup(bas->scheme);
    1983  	if (bas->authority != NULL)
    1984  	    res->authority = xmlMemStrdup(bas->authority);
    1985  	else {
    1986  	    if (bas->server != NULL)
    1987  		res->server = xmlMemStrdup(bas->server);
    1988  	    if (bas->user != NULL)
    1989  		res->user = xmlMemStrdup(bas->user);
    1990  	    res->port = bas->port;
    1991  	}
    1992  	if (bas->path != NULL)
    1993  	    res->path = xmlMemStrdup(bas->path);
    1994  	if (ref->query_raw != NULL)
    1995  	    res->query_raw = xmlMemStrdup (ref->query_raw);
    1996  	else if (ref->query != NULL)
    1997  	    res->query = xmlMemStrdup(ref->query);
    1998  	else if (bas->query_raw != NULL)
    1999  	    res->query_raw = xmlMemStrdup(bas->query_raw);
    2000  	else if (bas->query != NULL)
    2001  	    res->query = xmlMemStrdup(bas->query);
    2002  	if (ref->fragment != NULL)
    2003  	    res->fragment = xmlMemStrdup(ref->fragment);
    2004  	goto step_7;
    2005      }
    2006  
    2007      /*
    2008       * 3) If the scheme component is defined, indicating that the reference
    2009       *    starts with a scheme name, then the reference is interpreted as an
    2010       *    absolute URI and we are done.  Otherwise, the reference URI's
    2011       *    scheme is inherited from the base URI's scheme component.
    2012       */
    2013      if (ref->scheme != NULL) {
    2014  	val = xmlSaveUri(ref);
    2015  	goto done;
    2016      }
    2017      if (bas->scheme != NULL)
    2018  	res->scheme = xmlMemStrdup(bas->scheme);
    2019  
    2020      if (ref->query_raw != NULL)
    2021  	res->query_raw = xmlMemStrdup(ref->query_raw);
    2022      else if (ref->query != NULL)
    2023  	res->query = xmlMemStrdup(ref->query);
    2024      if (ref->fragment != NULL)
    2025  	res->fragment = xmlMemStrdup(ref->fragment);
    2026  
    2027      /*
    2028       * 4) If the authority component is defined, then the reference is a
    2029       *    network-path and we skip to step 7.  Otherwise, the reference
    2030       *    URI's authority is inherited from the base URI's authority
    2031       *    component, which will also be undefined if the URI scheme does not
    2032       *    use an authority component.
    2033       */
    2034      if ((ref->authority != NULL) || (ref->server != NULL) ||
    2035           (ref->port != PORT_EMPTY)) {
    2036  	if (ref->authority != NULL)
    2037  	    res->authority = xmlMemStrdup(ref->authority);
    2038  	else {
    2039              if (ref->server != NULL)
    2040                  res->server = xmlMemStrdup(ref->server);
    2041  	    if (ref->user != NULL)
    2042  		res->user = xmlMemStrdup(ref->user);
    2043              res->port = ref->port;
    2044  	}
    2045  	if (ref->path != NULL)
    2046  	    res->path = xmlMemStrdup(ref->path);
    2047  	goto step_7;
    2048      }
    2049      if (bas->authority != NULL)
    2050  	res->authority = xmlMemStrdup(bas->authority);
    2051      else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
    2052  	if (bas->server != NULL)
    2053  	    res->server = xmlMemStrdup(bas->server);
    2054  	if (bas->user != NULL)
    2055  	    res->user = xmlMemStrdup(bas->user);
    2056  	res->port = bas->port;
    2057      }
    2058  
    2059      /*
    2060       * 5) If the path component begins with a slash character ("/"), then
    2061       *    the reference is an absolute-path and we skip to step 7.
    2062       */
    2063      if ((ref->path != NULL) && (ref->path[0] == '/')) {
    2064  	res->path = xmlMemStrdup(ref->path);
    2065  	goto step_7;
    2066      }
    2067  
    2068  
    2069      /*
    2070       * 6) If this step is reached, then we are resolving a relative-path
    2071       *    reference.  The relative path needs to be merged with the base
    2072       *    URI's path.  Although there are many ways to do this, we will
    2073       *    describe a simple method using a separate string buffer.
    2074       *
    2075       * Allocate a buffer large enough for the result string.
    2076       */
    2077      len = 2; /* extra / and 0 */
    2078      if (ref->path != NULL)
    2079  	len += strlen(ref->path);
    2080      if (bas->path != NULL)
    2081  	len += strlen(bas->path);
    2082      res->path = (char *) xmlMallocAtomic(len);
    2083      if (res->path == NULL) {
    2084          xmlURIErrMemory("resolving URI against base\n");
    2085  	goto done;
    2086      }
    2087      res->path[0] = 0;
    2088  
    2089      /*
    2090       * a) All but the last segment of the base URI's path component is
    2091       *    copied to the buffer.  In other words, any characters after the
    2092       *    last (right-most) slash character, if any, are excluded.
    2093       */
    2094      cur = 0;
    2095      out = 0;
    2096      if (bas->path != NULL) {
    2097  	while (bas->path[cur] != 0) {
    2098  	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
    2099  		cur++;
    2100  	    if (bas->path[cur] == 0)
    2101  		break;
    2102  
    2103  	    cur++;
    2104  	    while (out < cur) {
    2105  		res->path[out] = bas->path[out];
    2106  		out++;
    2107  	    }
    2108  	}
    2109      }
    2110      res->path[out] = 0;
    2111  
    2112      /*
    2113       * b) The reference's path component is appended to the buffer
    2114       *    string.
    2115       */
    2116      if (ref->path != NULL && ref->path[0] != 0) {
    2117  	indx = 0;
    2118  	/*
    2119  	 * Ensure the path includes a '/'
    2120  	 */
    2121  	if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
    2122  	    res->path[out++] = '/';
    2123  	while (ref->path[indx] != 0) {
    2124  	    res->path[out++] = ref->path[indx++];
    2125  	}
    2126      }
    2127      res->path[out] = 0;
    2128  
    2129      /*
    2130       * Steps c) to h) are really path normalization steps
    2131       */
    2132      xmlNormalizeURIPath(res->path);
    2133  
    2134  step_7:
    2135  
    2136      /*
    2137       * 7) The resulting URI components, including any inherited from the
    2138       *    base URI, are recombined to give the absolute form of the URI
    2139       *    reference.
    2140       */
    2141      val = xmlSaveUri(res);
    2142  
    2143  done:
    2144      if (ref != NULL)
    2145  	xmlFreeURI(ref);
    2146      if (bas != NULL)
    2147  	xmlFreeURI(bas);
    2148      if (res != NULL)
    2149  	xmlFreeURI(res);
    2150      return(val);
    2151  }
    2152  
    2153  /**
    2154   * xmlBuildRelativeURI:
    2155   * @URI:  the URI reference under consideration
    2156   * @base:  the base value
    2157   *
    2158   * Expresses the URI of the reference in terms relative to the
    2159   * base.  Some examples of this operation include:
    2160   *     base = "http://site1.com/docs/book1.html"
    2161   *        URI input                        URI returned
    2162   *     docs/pic1.gif                    pic1.gif
    2163   *     docs/img/pic1.gif                img/pic1.gif
    2164   *     img/pic1.gif                     ../img/pic1.gif
    2165   *     http://site1.com/docs/pic1.gif   pic1.gif
    2166   *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
    2167   *
    2168   *     base = "docs/book1.html"
    2169   *        URI input                        URI returned
    2170   *     docs/pic1.gif                    pic1.gif
    2171   *     docs/img/pic1.gif                img/pic1.gif
    2172   *     img/pic1.gif                     ../img/pic1.gif
    2173   *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
    2174   *
    2175   *
    2176   * Note: if the URI reference is really weird or complicated, it may be
    2177   *       worthwhile to first convert it into a "nice" one by calling
    2178   *       xmlBuildURI (using 'base') before calling this routine,
    2179   *       since this routine (for reasonable efficiency) assumes URI has
    2180   *       already been through some validation.
    2181   *
    2182   * Returns a new URI string (to be freed by the caller) or NULL in case
    2183   * error.
    2184   */
    2185  xmlChar *
    2186  xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
    2187  {
    2188      xmlChar *val = NULL;
    2189      int ret;
    2190      int ix;
    2191      int nbslash = 0;
    2192      int len;
    2193      xmlURIPtr ref = NULL;
    2194      xmlURIPtr bas = NULL;
    2195      xmlChar *bptr, *uptr, *vptr;
    2196      int remove_path = 0;
    2197  
    2198      if ((URI == NULL) || (*URI == 0))
    2199  	return NULL;
    2200  
    2201      /*
    2202       * First parse URI into a standard form
    2203       */
    2204      ref = xmlCreateURI ();
    2205      if (ref == NULL)
    2206  	return NULL;
    2207      /* If URI not already in "relative" form */
    2208      if (URI[0] != '.') {
    2209  	ret = xmlParseURIReference (ref, (const char *) URI);
    2210  	if (ret != 0)
    2211  	    goto done;		/* Error in URI, return NULL */
    2212      } else
    2213  	ref->path = (char *)xmlStrdup(URI);
    2214  
    2215      /*
    2216       * Next parse base into the same standard form
    2217       */
    2218      if ((base == NULL) || (*base == 0)) {
    2219  	val = xmlStrdup (URI);
    2220  	goto done;
    2221      }
    2222      bas = xmlCreateURI ();
    2223      if (bas == NULL)
    2224  	goto done;
    2225      if (base[0] != '.') {
    2226  	ret = xmlParseURIReference (bas, (const char *) base);
    2227  	if (ret != 0)
    2228  	    goto done;		/* Error in base, return NULL */
    2229      } else
    2230  	bas->path = (char *)xmlStrdup(base);
    2231  
    2232      /*
    2233       * If the scheme / server on the URI differs from the base,
    2234       * just return the URI
    2235       */
    2236      if ((ref->scheme != NULL) &&
    2237  	((bas->scheme == NULL) ||
    2238  	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
    2239  	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
    2240           (bas->port != ref->port))) {
    2241  	val = xmlStrdup (URI);
    2242  	goto done;
    2243      }
    2244      if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
    2245  	val = xmlStrdup(BAD_CAST "");
    2246  	goto done;
    2247      }
    2248      if (bas->path == NULL) {
    2249  	val = xmlStrdup((xmlChar *)ref->path);
    2250  	goto done;
    2251      }
    2252      if (ref->path == NULL) {
    2253          ref->path = (char *) "/";
    2254  	remove_path = 1;
    2255      }
    2256  
    2257      /*
    2258       * At this point (at last!) we can compare the two paths
    2259       *
    2260       * First we take care of the special case where either of the
    2261       * two path components may be missing (bug 316224)
    2262       */
    2263      bptr = (xmlChar *)bas->path;
    2264      {
    2265          xmlChar *rptr = (xmlChar *) ref->path;
    2266          int pos = 0;
    2267  
    2268          /*
    2269           * Next we compare the two strings and find where they first differ
    2270           */
    2271  	if ((*rptr == '.') && (rptr[1] == '/'))
    2272              rptr += 2;
    2273  	if ((*bptr == '.') && (bptr[1] == '/'))
    2274              bptr += 2;
    2275  	else if ((*bptr == '/') && (*rptr != '/'))
    2276  	    bptr++;
    2277  	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
    2278  	    pos++;
    2279  
    2280  	if (bptr[pos] == rptr[pos]) {
    2281  	    val = xmlStrdup(BAD_CAST "");
    2282  	    goto done;		/* (I can't imagine why anyone would do this) */
    2283  	}
    2284  
    2285  	/*
    2286  	 * In URI, "back up" to the last '/' encountered.  This will be the
    2287  	 * beginning of the "unique" suffix of URI
    2288  	 */
    2289  	ix = pos;
    2290  	for (; ix > 0; ix--) {
    2291  	    if (rptr[ix - 1] == '/')
    2292  		break;
    2293  	}
    2294  	uptr = (xmlChar *)&rptr[ix];
    2295  
    2296  	/*
    2297  	 * In base, count the number of '/' from the differing point
    2298  	 */
    2299  	for (; bptr[ix] != 0; ix++) {
    2300  	    if (bptr[ix] == '/')
    2301  		nbslash++;
    2302  	}
    2303  
    2304  	/*
    2305  	 * e.g: URI="foo/" base="foo/bar" -> "./"
    2306  	 */
    2307  	if (nbslash == 0 && !uptr[0]) {
    2308  	    val = xmlStrdup(BAD_CAST "./");
    2309  	    goto done;
    2310  	}
    2311  
    2312  	len = xmlStrlen (uptr) + 1;
    2313      }
    2314  
    2315      if (nbslash == 0) {
    2316  	if (uptr != NULL)
    2317  	    /* exception characters from xmlSaveUri */
    2318  	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
    2319  	goto done;
    2320      }
    2321  
    2322      /*
    2323       * Allocate just enough space for the returned string -
    2324       * length of the remainder of the URI, plus enough space
    2325       * for the "../" groups, plus one for the terminator
    2326       */
    2327      val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
    2328      if (val == NULL) {
    2329          xmlURIErrMemory("building relative URI\n");
    2330  	goto done;
    2331      }
    2332      vptr = val;
    2333      /*
    2334       * Put in as many "../" as needed
    2335       */
    2336      for (; nbslash>0; nbslash--) {
    2337  	*vptr++ = '.';
    2338  	*vptr++ = '.';
    2339  	*vptr++ = '/';
    2340      }
    2341      /*
    2342       * Finish up with the end of the URI
    2343       */
    2344      if (uptr != NULL) {
    2345          if ((vptr > val) && (len > 0) &&
    2346  	    (uptr[0] == '/') && (vptr[-1] == '/')) {
    2347  	    memcpy (vptr, uptr + 1, len - 1);
    2348  	    vptr[len - 2] = 0;
    2349  	} else {
    2350  	    memcpy (vptr, uptr, len);
    2351  	    vptr[len - 1] = 0;
    2352  	}
    2353      } else {
    2354  	vptr[len - 1] = 0;
    2355      }
    2356  
    2357      /* escape the freshly-built path */
    2358      vptr = val;
    2359  	/* exception characters from xmlSaveUri */
    2360      val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
    2361      xmlFree(vptr);
    2362  
    2363  done:
    2364      /*
    2365       * Free the working variables
    2366       */
    2367      if (remove_path != 0)
    2368          ref->path = NULL;
    2369      if (ref != NULL)
    2370  	xmlFreeURI (ref);
    2371      if (bas != NULL)
    2372  	xmlFreeURI (bas);
    2373  
    2374      return val;
    2375  }
    2376  
    2377  /**
    2378   * xmlCanonicPath:
    2379   * @path:  the resource locator in a filesystem notation
    2380   *
    2381   * Constructs a canonic path from the specified path.
    2382   *
    2383   * Returns a new canonic path, or a duplicate of the path parameter if the
    2384   * construction fails. The caller is responsible for freeing the memory occupied
    2385   * by the returned string. If there is insufficient memory available, or the
    2386   * argument is NULL, the function returns NULL.
    2387   */
    2388  #define IS_WINDOWS_PATH(p)					\
    2389  	((p != NULL) &&						\
    2390  	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
    2391  	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
    2392  	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
    2393  xmlChar *
    2394  xmlCanonicPath(const xmlChar *path)
    2395  {
    2396  /*
    2397   * For Windows implementations, additional work needs to be done to
    2398   * replace backslashes in pathnames with "forward slashes"
    2399   */
    2400  #if defined(_WIN32)
    2401      int len = 0;
    2402      char *p = NULL;
    2403  #endif
    2404      xmlURIPtr uri;
    2405      xmlChar *ret;
    2406      const xmlChar *absuri;
    2407  
    2408      if (path == NULL)
    2409  	return(NULL);
    2410  
    2411  #if defined(_WIN32)
    2412      /*
    2413       * We must not change the backslashes to slashes if the the path
    2414       * starts with \\?\
    2415       * Those paths can be up to 32k characters long.
    2416       * Was added specifically for OpenOffice, those paths can't be converted
    2417       * to URIs anyway.
    2418       */
    2419      if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
    2420          (path[3] == '\\') )
    2421  	return xmlStrdup((const xmlChar *) path);
    2422  #endif
    2423  
    2424  	/* sanitize filename starting with // so it can be used as URI */
    2425      if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
    2426          path++;
    2427  
    2428      if ((uri = xmlParseURI((const char *) path)) != NULL) {
    2429  	xmlFreeURI(uri);
    2430  	return xmlStrdup(path);
    2431      }
    2432  
    2433      /* Check if this is an "absolute uri" */
    2434      absuri = xmlStrstr(path, BAD_CAST "://");
    2435      if (absuri != NULL) {
    2436          int l, j;
    2437  	unsigned char c;
    2438  	xmlChar *escURI;
    2439  
    2440          /*
    2441  	 * this looks like an URI where some parts have not been
    2442  	 * escaped leading to a parsing problem.  Check that the first
    2443  	 * part matches a protocol.
    2444  	 */
    2445  	l = absuri - path;
    2446  	/* Bypass if first part (part before the '://') is > 20 chars */
    2447  	if ((l <= 0) || (l > 20))
    2448  	    goto path_processing;
    2449  	/* Bypass if any non-alpha characters are present in first part */
    2450  	for (j = 0;j < l;j++) {
    2451  	    c = path[j];
    2452  	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
    2453  	        goto path_processing;
    2454  	}
    2455  
    2456  	/* Escape all except the characters specified in the supplied path */
    2457          escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
    2458  	if (escURI != NULL) {
    2459  	    /* Try parsing the escaped path */
    2460  	    uri = xmlParseURI((const char *) escURI);
    2461  	    /* If successful, return the escaped string */
    2462  	    if (uri != NULL) {
    2463  	        xmlFreeURI(uri);
    2464  		return escURI;
    2465  	    }
    2466              xmlFree(escURI);
    2467  	}
    2468      }
    2469  
    2470  path_processing:
    2471  /* For Windows implementations, replace backslashes with 'forward slashes' */
    2472  #if defined(_WIN32)
    2473      /*
    2474       * Create a URI structure
    2475       */
    2476      uri = xmlCreateURI();
    2477      if (uri == NULL) {		/* Guard against 'out of memory' */
    2478          return(NULL);
    2479      }
    2480  
    2481      len = xmlStrlen(path);
    2482      if ((len > 2) && IS_WINDOWS_PATH(path)) {
    2483          /* make the scheme 'file' */
    2484  	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
    2485  	/* allocate space for leading '/' + path + string terminator */
    2486  	uri->path = xmlMallocAtomic(len + 2);
    2487  	if (uri->path == NULL) {
    2488  	    xmlFreeURI(uri);	/* Guard against 'out of memory' */
    2489  	    return(NULL);
    2490  	}
    2491  	/* Put in leading '/' plus path */
    2492  	uri->path[0] = '/';
    2493  	p = uri->path + 1;
    2494  	strncpy(p, (char *) path, len + 1);
    2495      } else {
    2496  	uri->path = (char *) xmlStrdup(path);
    2497  	if (uri->path == NULL) {
    2498  	    xmlFreeURI(uri);
    2499  	    return(NULL);
    2500  	}
    2501  	p = uri->path;
    2502      }
    2503      /* Now change all occurrences of '\' to '/' */
    2504      while (*p != '\0') {
    2505  	if (*p == '\\')
    2506  	    *p = '/';
    2507  	p++;
    2508      }
    2509  
    2510      if (uri->scheme == NULL) {
    2511  	ret = xmlStrdup((const xmlChar *) uri->path);
    2512      } else {
    2513  	ret = xmlSaveUri(uri);
    2514      }
    2515  
    2516      xmlFreeURI(uri);
    2517  #else
    2518      ret = xmlStrdup((const xmlChar *) path);
    2519  #endif
    2520      return(ret);
    2521  }
    2522  
    2523  /**
    2524   * xmlPathToURI:
    2525   * @path:  the resource locator in a filesystem notation
    2526   *
    2527   * Constructs an URI expressing the existing path
    2528   *
    2529   * Returns a new URI, or a duplicate of the path parameter if the
    2530   * construction fails. The caller is responsible for freeing the memory
    2531   * occupied by the returned string. If there is insufficient memory available,
    2532   * or the argument is NULL, the function returns NULL.
    2533   */
    2534  xmlChar *
    2535  xmlPathToURI(const xmlChar *path)
    2536  {
    2537      xmlURIPtr uri;
    2538      xmlURI temp;
    2539      xmlChar *ret, *cal;
    2540  
    2541      if (path == NULL)
    2542          return(NULL);
    2543  
    2544      if ((uri = xmlParseURI((const char *) path)) != NULL) {
    2545  	xmlFreeURI(uri);
    2546  	return xmlStrdup(path);
    2547      }
    2548      cal = xmlCanonicPath(path);
    2549      if (cal == NULL)
    2550          return(NULL);
    2551  #if defined(_WIN32)
    2552      /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
    2553         If 'cal' is a valid URI already then we are done here, as continuing would make
    2554         it invalid. */
    2555      if ((uri = xmlParseURI((const char *) cal)) != NULL) {
    2556  	xmlFreeURI(uri);
    2557  	return cal;
    2558      }
    2559      /* 'cal' can contain a relative path with backslashes. If that is processed
    2560         by xmlSaveURI, they will be escaped and the external entity loader machinery
    2561         will fail. So convert them to slashes. Misuse 'ret' for walking. */
    2562      ret = cal;
    2563      while (*ret != '\0') {
    2564  	if (*ret == '\\')
    2565  	    *ret = '/';
    2566  	ret++;
    2567      }
    2568  #endif
    2569      memset(&temp, 0, sizeof(temp));
    2570      temp.path = (char *) cal;
    2571      ret = xmlSaveUri(&temp);
    2572      xmlFree(cal);
    2573      return(ret);
    2574  }