(root)/
gettext-0.22.4/
libtextstyle/
lib/
libxml/
xmlstring.c
       1  /* libxml2 - Library for parsing XML documents
       2   * Copyright (C) 2006-2019 Free Software Foundation, Inc.
       3   *
       4   * This file is not part of the GNU gettext program, but is used with
       5   * GNU gettext.
       6   *
       7   * The original copyright notice is as follows:
       8   */
       9  
      10  /*
      11   * Copyright (C) 1998-2012 Daniel Veillard.  All Rights Reserved.
      12   *
      13   * Permission is hereby granted, free of charge, to any person obtaining a copy
      14   * of this software and associated documentation files (the "Software"), to deal
      15   * in the Software without restriction, including without limitation the rights
      16   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      17   * copies of the Software, and to permit persons to whom the Software is fur-
      18   * nished to do so, subject to the following conditions:
      19   *
      20   * The above copyright notice and this permission notice shall be included in
      21   * all copies or substantial portions of the Software.
      22   *
      23   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      24   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
      25   * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
      26   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      27   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      28   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      29   * THE SOFTWARE.
      30   *
      31   * UTF8 string routines from:
      32   * William Brack <wbrack@mmm.com.hk>
      33   *
      34   * daniel@veillard.com
      35   */
      36  
      37  /*
      38   * string.c : an XML string utilities module
      39   *
      40   * This module provides various utility functions for manipulating
      41   * the xmlChar* type. All functions named xmlStr* have been moved here
      42   * from the parser.c file (their original home).
      43   */
      44  
      45  #define IN_LIBXML
      46  #include "libxml.h"
      47  
      48  #include <stdlib.h>
      49  #include <string.h>
      50  #include <libxml/xmlmemory.h>
      51  #include <libxml/parserInternals.h>
      52  #include <libxml/xmlstring.h>
      53  
      54  /************************************************************************
      55   *                                                                      *
      56   *                Commodity functions to handle xmlChars                *
      57   *                                                                      *
      58   ************************************************************************/
      59  
      60  /**
      61   * xmlStrndup:
      62   * @cur:  the input xmlChar *
      63   * @len:  the len of @cur
      64   *
      65   * a strndup for array of xmlChar's
      66   *
      67   * Returns a new xmlChar * or NULL
      68   */
      69  xmlChar *
      70  xmlStrndup(const xmlChar *cur, int len) {
      71      xmlChar *ret;
      72  
      73      if ((cur == NULL) || (len < 0)) return(NULL);
      74      ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
      75      if (ret == NULL) {
      76          xmlErrMemory(NULL, NULL);
      77          return(NULL);
      78      }
      79      memcpy(ret, cur, len * sizeof(xmlChar));
      80      ret[len] = 0;
      81      return(ret);
      82  }
      83  
      84  /**
      85   * xmlStrdup:
      86   * @cur:  the input xmlChar *
      87   *
      88   * a strdup for array of xmlChar's. Since they are supposed to be
      89   * encoded in UTF-8 or an encoding with 8bit based chars, we assume
      90   * a termination mark of '0'.
      91   *
      92   * Returns a new xmlChar * or NULL
      93   */
      94  xmlChar *
      95  xmlStrdup(const xmlChar *cur) {
      96      const xmlChar *p = cur;
      97  
      98      if (cur == NULL) return(NULL);
      99      while (*p != 0) p++; /* non input consuming */
     100      return(xmlStrndup(cur, p - cur));
     101  }
     102  
     103  /**
     104   * xmlCharStrndup:
     105   * @cur:  the input char *
     106   * @len:  the len of @cur
     107   *
     108   * a strndup for char's to xmlChar's
     109   *
     110   * Returns a new xmlChar * or NULL
     111   */
     112  
     113  xmlChar *
     114  xmlCharStrndup(const char *cur, int len) {
     115      int i;
     116      xmlChar *ret;
     117  
     118      if ((cur == NULL) || (len < 0)) return(NULL);
     119      ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
     120      if (ret == NULL) {
     121          xmlErrMemory(NULL, NULL);
     122          return(NULL);
     123      }
     124      for (i = 0;i < len;i++) {
     125          ret[i] = (xmlChar) cur[i];
     126          if (ret[i] == 0) return(ret);
     127      }
     128      ret[len] = 0;
     129      return(ret);
     130  }
     131  
     132  /**
     133   * xmlCharStrdup:
     134   * @cur:  the input char *
     135   *
     136   * a strdup for char's to xmlChar's
     137   *
     138   * Returns a new xmlChar * or NULL
     139   */
     140  
     141  xmlChar *
     142  xmlCharStrdup(const char *cur) {
     143      const char *p = cur;
     144  
     145      if (cur == NULL) return(NULL);
     146      while (*p != '\0') p++; /* non input consuming */
     147      return(xmlCharStrndup(cur, p - cur));
     148  }
     149  
     150  /**
     151   * xmlStrcmp:
     152   * @str1:  the first xmlChar *
     153   * @str2:  the second xmlChar *
     154   *
     155   * a strcmp for xmlChar's
     156   *
     157   * Returns the integer result of the comparison
     158   */
     159  
     160  int
     161  xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
     162      register int tmp;
     163  
     164      if (str1 == str2) return(0);
     165      if (str1 == NULL) return(-1);
     166      if (str2 == NULL) return(1);
     167      do {
     168          tmp = *str1++ - *str2;
     169          if (tmp != 0) return(tmp);
     170      } while (*str2++ != 0);
     171      return 0;
     172  }
     173  
     174  /**
     175   * xmlStrEqual:
     176   * @str1:  the first xmlChar *
     177   * @str2:  the second xmlChar *
     178   *
     179   * Check if both strings are equal of have same content.
     180   * Should be a bit more readable and faster than xmlStrcmp()
     181   *
     182   * Returns 1 if they are equal, 0 if they are different
     183   */
     184  
     185  int
     186  xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
     187      if (str1 == str2) return(1);
     188      if (str1 == NULL) return(0);
     189      if (str2 == NULL) return(0);
     190      do {
     191          if (*str1++ != *str2) return(0);
     192      } while (*str2++);
     193      return(1);
     194  }
     195  
     196  /**
     197   * xmlStrQEqual:
     198   * @pref:  the prefix of the QName
     199   * @name:  the localname of the QName
     200   * @str:  the second xmlChar *
     201   *
     202   * Check if a QName is Equal to a given string
     203   *
     204   * Returns 1 if they are equal, 0 if they are different
     205   */
     206  
     207  int
     208  xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
     209      if (pref == NULL) return(xmlStrEqual(name, str));
     210      if (name == NULL) return(0);
     211      if (str == NULL) return(0);
     212  
     213      do {
     214          if (*pref++ != *str) return(0);
     215      } while ((*str++) && (*pref));
     216      if (*str++ != ':') return(0);
     217      do {
     218          if (*name++ != *str) return(0);
     219      } while (*str++);
     220      return(1);
     221  }
     222  
     223  /**
     224   * xmlStrncmp:
     225   * @str1:  the first xmlChar *
     226   * @str2:  the second xmlChar *
     227   * @len:  the max comparison length
     228   *
     229   * a strncmp for xmlChar's
     230   *
     231   * Returns the integer result of the comparison
     232   */
     233  
     234  int
     235  xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
     236      register int tmp;
     237  
     238      if (len <= 0) return(0);
     239      if (str1 == str2) return(0);
     240      if (str1 == NULL) return(-1);
     241      if (str2 == NULL) return(1);
     242  #ifdef __GNUC__
     243      tmp = strncmp((const char *)str1, (const char *)str2, len);
     244      return tmp;
     245  #else
     246      do {
     247          tmp = *str1++ - *str2;
     248          if (tmp != 0 || --len == 0) return(tmp);
     249      } while (*str2++ != 0);
     250      return 0;
     251  #endif
     252  }
     253  
     254  static const xmlChar casemap[256] = {
     255      0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
     256      0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
     257      0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
     258      0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
     259      0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
     260      0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
     261      0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
     262      0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
     263      0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
     264      0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
     265      0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
     266      0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
     267      0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
     268      0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
     269      0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
     270      0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
     271      0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
     272      0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
     273      0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
     274      0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
     275      0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
     276      0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
     277      0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
     278      0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
     279      0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
     280      0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
     281      0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
     282      0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
     283      0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
     284      0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
     285      0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
     286      0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
     287  };
     288  
     289  /**
     290   * xmlStrcasecmp:
     291   * @str1:  the first xmlChar *
     292   * @str2:  the second xmlChar *
     293   *
     294   * a strcasecmp for xmlChar's
     295   *
     296   * Returns the integer result of the comparison
     297   */
     298  
     299  int
     300  xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
     301      register int tmp;
     302  
     303      if (str1 == str2) return(0);
     304      if (str1 == NULL) return(-1);
     305      if (str2 == NULL) return(1);
     306      do {
     307          tmp = casemap[*str1++] - casemap[*str2];
     308          if (tmp != 0) return(tmp);
     309      } while (*str2++ != 0);
     310      return 0;
     311  }
     312  
     313  /**
     314   * xmlStrncasecmp:
     315   * @str1:  the first xmlChar *
     316   * @str2:  the second xmlChar *
     317   * @len:  the max comparison length
     318   *
     319   * a strncasecmp for xmlChar's
     320   *
     321   * Returns the integer result of the comparison
     322   */
     323  
     324  int
     325  xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
     326      register int tmp;
     327  
     328      if (len <= 0) return(0);
     329      if (str1 == str2) return(0);
     330      if (str1 == NULL) return(-1);
     331      if (str2 == NULL) return(1);
     332      do {
     333          tmp = casemap[*str1++] - casemap[*str2];
     334          if (tmp != 0 || --len == 0) return(tmp);
     335      } while (*str2++ != 0);
     336      return 0;
     337  }
     338  
     339  /**
     340   * xmlStrchr:
     341   * @str:  the xmlChar * array
     342   * @val:  the xmlChar to search
     343   *
     344   * a strchr for xmlChar's
     345   *
     346   * Returns the xmlChar * for the first occurrence or NULL.
     347   */
     348  
     349  const xmlChar *
     350  xmlStrchr(const xmlChar *str, xmlChar val) {
     351      if (str == NULL) return(NULL);
     352      while (*str != 0) { /* non input consuming */
     353          if (*str == val) return((xmlChar *) str);
     354          str++;
     355      }
     356      return(NULL);
     357  }
     358  
     359  /**
     360   * xmlStrstr:
     361   * @str:  the xmlChar * array (haystack)
     362   * @val:  the xmlChar to search (needle)
     363   *
     364   * a strstr for xmlChar's
     365   *
     366   * Returns the xmlChar * for the first occurrence or NULL.
     367   */
     368  
     369  const xmlChar *
     370  xmlStrstr(const xmlChar *str, const xmlChar *val) {
     371      int n;
     372  
     373      if (str == NULL) return(NULL);
     374      if (val == NULL) return(NULL);
     375      n = xmlStrlen(val);
     376  
     377      if (n == 0) return(str);
     378      while (*str != 0) { /* non input consuming */
     379          if (*str == *val) {
     380              if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
     381          }
     382          str++;
     383      }
     384      return(NULL);
     385  }
     386  
     387  /**
     388   * xmlStrcasestr:
     389   * @str:  the xmlChar * array (haystack)
     390   * @val:  the xmlChar to search (needle)
     391   *
     392   * a case-ignoring strstr for xmlChar's
     393   *
     394   * Returns the xmlChar * for the first occurrence or NULL.
     395   */
     396  
     397  const xmlChar *
     398  xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
     399      int n;
     400  
     401      if (str == NULL) return(NULL);
     402      if (val == NULL) return(NULL);
     403      n = xmlStrlen(val);
     404  
     405      if (n == 0) return(str);
     406      while (*str != 0) { /* non input consuming */
     407          if (casemap[*str] == casemap[*val])
     408              if (!xmlStrncasecmp(str, val, n)) return(str);
     409          str++;
     410      }
     411      return(NULL);
     412  }
     413  
     414  /**
     415   * xmlStrsub:
     416   * @str:  the xmlChar * array (haystack)
     417   * @start:  the index of the first char (zero based)
     418   * @len:  the length of the substring
     419   *
     420   * Extract a substring of a given string
     421   *
     422   * Returns the xmlChar * for the first occurrence or NULL.
     423   */
     424  
     425  xmlChar *
     426  xmlStrsub(const xmlChar *str, int start, int len) {
     427      int i;
     428  
     429      if (str == NULL) return(NULL);
     430      if (start < 0) return(NULL);
     431      if (len < 0) return(NULL);
     432  
     433      for (i = 0;i < start;i++) {
     434          if (*str == 0) return(NULL);
     435          str++;
     436      }
     437      if (*str == 0) return(NULL);
     438      return(xmlStrndup(str, len));
     439  }
     440  
     441  /**
     442   * xmlStrlen:
     443   * @str:  the xmlChar * array
     444   *
     445   * length of a xmlChar's string
     446   *
     447   * Returns the number of xmlChar contained in the ARRAY.
     448   */
     449  
     450  int
     451  xmlStrlen(const xmlChar *str) {
     452      int len = 0;
     453  
     454      if (str == NULL) return(0);
     455      while (*str != 0) { /* non input consuming */
     456          str++;
     457          len++;
     458      }
     459      return(len);
     460  }
     461  
     462  /**
     463   * xmlStrncat:
     464   * @cur:  the original xmlChar * array
     465   * @add:  the xmlChar * array added
     466   * @len:  the length of @add
     467   *
     468   * a strncat for array of xmlChar's, it will extend @cur with the len
     469   * first bytes of @add. Note that if @len < 0 then this is an API error
     470   * and NULL will be returned.
     471   *
     472   * Returns a new xmlChar *, the original @cur is reallocated and should
     473   * not be freed.
     474   */
     475  
     476  xmlChar *
     477  xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
     478      int size;
     479      xmlChar *ret;
     480  
     481      if ((add == NULL) || (len == 0))
     482          return(cur);
     483      if (len < 0)
     484  	return(NULL);
     485      if (cur == NULL)
     486          return(xmlStrndup(add, len));
     487  
     488      size = xmlStrlen(cur);
     489      if (size < 0)
     490          return(NULL);
     491      ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
     492      if (ret == NULL) {
     493          xmlErrMemory(NULL, NULL);
     494          return(cur);
     495      }
     496      memcpy(&ret[size], add, len * sizeof(xmlChar));
     497      ret[size + len] = 0;
     498      return(ret);
     499  }
     500  
     501  /**
     502   * xmlStrncatNew:
     503   * @str1:  first xmlChar string
     504   * @str2:  second xmlChar string
     505   * @len:  the len of @str2 or < 0
     506   *
     507   * same as xmlStrncat, but creates a new string.  The original
     508   * two strings are not freed. If @len is < 0 then the length
     509   * will be calculated automatically.
     510   *
     511   * Returns a new xmlChar * or NULL
     512   */
     513  xmlChar *
     514  xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
     515      int size;
     516      xmlChar *ret;
     517  
     518      if (len < 0) {
     519          len = xmlStrlen(str2);
     520          if (len < 0)
     521              return(NULL);
     522      }
     523      if ((str2 == NULL) || (len == 0))
     524          return(xmlStrdup(str1));
     525      if (str1 == NULL)
     526          return(xmlStrndup(str2, len));
     527  
     528      size = xmlStrlen(str1);
     529      if (size < 0)
     530          return(NULL);
     531      ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));
     532      if (ret == NULL) {
     533          xmlErrMemory(NULL, NULL);
     534          return(xmlStrndup(str1, size));
     535      }
     536      memcpy(ret, str1, size * sizeof(xmlChar));
     537      memcpy(&ret[size], str2, len * sizeof(xmlChar));
     538      ret[size + len] = 0;
     539      return(ret);
     540  }
     541  
     542  /**
     543   * xmlStrcat:
     544   * @cur:  the original xmlChar * array
     545   * @add:  the xmlChar * array added
     546   *
     547   * a strcat for array of xmlChar's. Since they are supposed to be
     548   * encoded in UTF-8 or an encoding with 8bit based chars, we assume
     549   * a termination mark of '0'.
     550   *
     551   * Returns a new xmlChar * containing the concatenated string. The original
     552   * @cur is reallocated and should not be freed.
     553   */
     554  xmlChar *
     555  xmlStrcat(xmlChar *cur, const xmlChar *add) {
     556      const xmlChar *p = add;
     557  
     558      if (add == NULL) return(cur);
     559      if (cur == NULL)
     560          return(xmlStrdup(add));
     561  
     562      while (*p != 0) p++; /* non input consuming */
     563      return(xmlStrncat(cur, add, p - add));
     564  }
     565  
     566  /**
     567   * xmlStrPrintf:
     568   * @buf:   the result buffer.
     569   * @len:   the result buffer length.
     570   * @msg:   the message with printf formatting.
     571   * @...:   extra parameters for the message.
     572   *
     573   * Formats @msg and places result into @buf.
     574   *
     575   * Returns the number of characters written to @buf or -1 if an error occurs.
     576   */
     577  int XMLCDECL
     578  xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {
     579      va_list args;
     580      int ret;
     581  
     582      if((buf == NULL) || (msg == NULL)) {
     583          return(-1);
     584      }
     585  
     586      va_start(args, msg);
     587      ret = vsnprintf((char *) buf, len, (const char *) msg, args);
     588      va_end(args);
     589      buf[len - 1] = 0; /* be safe ! */
     590  
     591      return(ret);
     592  }
     593  
     594  /**
     595   * xmlStrVPrintf:
     596   * @buf:   the result buffer.
     597   * @len:   the result buffer length.
     598   * @msg:   the message with printf formatting.
     599   * @ap:    extra parameters for the message.
     600   *
     601   * Formats @msg and places result into @buf.
     602   *
     603   * Returns the number of characters written to @buf or -1 if an error occurs.
     604   */
     605  int
     606  xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {
     607      int ret;
     608  
     609      if((buf == NULL) || (msg == NULL)) {
     610          return(-1);
     611      }
     612  
     613      ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
     614      buf[len - 1] = 0; /* be safe ! */
     615  
     616      return(ret);
     617  }
     618  
     619  /************************************************************************
     620   *                                                                      *
     621   *              Generic UTF8 handling routines                          *
     622   *                                                                      *
     623   * From rfc2044: encoding of the Unicode values on UTF-8:               *
     624   *                                                                      *
     625   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *
     626   * 0000 0000-0000 007F   0xxxxxxx                                       *
     627   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *
     628   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *
     629   *                                                                      *
     630   * I hope we won't use values > 0xFFFF anytime soon !                   *
     631   *                                                                      *
     632   ************************************************************************/
     633  
     634  
     635  /**
     636   * xmlUTF8Size:
     637   * @utf: pointer to the UTF8 character
     638   *
     639   * calculates the internal size of a UTF8 character
     640   *
     641   * returns the numbers of bytes in the character, -1 on format error
     642   */
     643  int
     644  xmlUTF8Size(const xmlChar *utf) {
     645      xmlChar mask;
     646      int len;
     647  
     648      if (utf == NULL)
     649          return -1;
     650      if (*utf < 0x80)
     651          return 1;
     652      /* check valid UTF8 character */
     653      if (!(*utf & 0x40))
     654          return -1;
     655      /* determine number of bytes in char */
     656      len = 2;
     657      for (mask=0x20; mask != 0; mask>>=1) {
     658          if (!(*utf & mask))
     659              return len;
     660          len++;
     661      }
     662      return -1;
     663  }
     664  
     665  /**
     666   * xmlUTF8Charcmp:
     667   * @utf1: pointer to first UTF8 char
     668   * @utf2: pointer to second UTF8 char
     669   *
     670   * compares the two UCS4 values
     671   *
     672   * returns result of the compare as with xmlStrncmp
     673   */
     674  int
     675  xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
     676  
     677      if (utf1 == NULL ) {
     678          if (utf2 == NULL)
     679              return 0;
     680          return -1;
     681      }
     682      return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
     683  }
     684  
     685  /**
     686   * xmlUTF8Strlen:
     687   * @utf:  a sequence of UTF-8 encoded bytes
     688   *
     689   * compute the length of an UTF8 string, it doesn't do a full UTF8
     690   * checking of the content of the string.
     691   *
     692   * Returns the number of characters in the string or -1 in case of error
     693   */
     694  int
     695  xmlUTF8Strlen(const xmlChar *utf) {
     696      int ret = 0;
     697  
     698      if (utf == NULL)
     699          return(-1);
     700  
     701      while (*utf != 0) {
     702          if (utf[0] & 0x80) {
     703              if ((utf[1] & 0xc0) != 0x80)
     704                  return(-1);
     705              if ((utf[0] & 0xe0) == 0xe0) {
     706                  if ((utf[2] & 0xc0) != 0x80)
     707                      return(-1);
     708                  if ((utf[0] & 0xf0) == 0xf0) {
     709                      if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
     710                          return(-1);
     711                      utf += 4;
     712                  } else {
     713                      utf += 3;
     714                  }
     715              } else {
     716                  utf += 2;
     717              }
     718          } else {
     719              utf++;
     720          }
     721          ret++;
     722      }
     723      return(ret);
     724  }
     725  
     726  /**
     727   * xmlGetUTF8Char:
     728   * @utf:  a sequence of UTF-8 encoded bytes
     729   * @len:  a pointer to the minimum number of bytes present in
     730   *        the sequence.  This is used to assure the next character
     731   *        is completely contained within the sequence.
     732   *
     733   * Read the first UTF8 character from @utf
     734   *
     735   * Returns the char value or -1 in case of error, and sets *len to
     736   *        the actual number of bytes consumed (0 in case of error)
     737   */
     738  int
     739  xmlGetUTF8Char(const unsigned char *utf, int *len) {
     740      unsigned int c;
     741  
     742      if (utf == NULL)
     743          goto error;
     744      if (len == NULL)
     745          goto error;
     746      if (*len < 1)
     747          goto error;
     748  
     749      c = utf[0];
     750      if (c & 0x80) {
     751          if (*len < 2)
     752              goto error;
     753          if ((utf[1] & 0xc0) != 0x80)
     754              goto error;
     755          if ((c & 0xe0) == 0xe0) {
     756              if (*len < 3)
     757                  goto error;
     758              if ((utf[2] & 0xc0) != 0x80)
     759                  goto error;
     760              if ((c & 0xf0) == 0xf0) {
     761                  if (*len < 4)
     762                      goto error;
     763                  if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
     764                      goto error;
     765                  *len = 4;
     766                  /* 4-byte code */
     767                  c = (utf[0] & 0x7) << 18;
     768                  c |= (utf[1] & 0x3f) << 12;
     769                  c |= (utf[2] & 0x3f) << 6;
     770                  c |= utf[3] & 0x3f;
     771              } else {
     772                /* 3-byte code */
     773                  *len = 3;
     774                  c = (utf[0] & 0xf) << 12;
     775                  c |= (utf[1] & 0x3f) << 6;
     776                  c |= utf[2] & 0x3f;
     777              }
     778          } else {
     779            /* 2-byte code */
     780              *len = 2;
     781              c = (utf[0] & 0x1f) << 6;
     782              c |= utf[1] & 0x3f;
     783          }
     784      } else {
     785          /* 1-byte code */
     786          *len = 1;
     787      }
     788      return(c);
     789  
     790  error:
     791      if (len != NULL)
     792  	*len = 0;
     793      return(-1);
     794  }
     795  
     796  /**
     797   * xmlCheckUTF8:
     798   * @utf: Pointer to putative UTF-8 encoded string.
     799   *
     800   * Checks @utf for being valid UTF-8. @utf is assumed to be
     801   * null-terminated. This function is not super-strict, as it will
     802   * allow longer UTF-8 sequences than necessary. Note that Java is
     803   * capable of producing these sequences if provoked. Also note, this
     804   * routine checks for the 4-byte maximum size, but does not check for
     805   * 0x10ffff maximum value.
     806   *
     807   * Return value: true if @utf is valid.
     808   **/
     809  int
     810  xmlCheckUTF8(const unsigned char *utf)
     811  {
     812      int ix;
     813      unsigned char c;
     814  
     815      if (utf == NULL)
     816          return(0);
     817      /*
     818       * utf is a string of 1, 2, 3 or 4 bytes.  The valid strings
     819       * are as follows (in "bit format"):
     820       *    0xxxxxxx                                      valid 1-byte
     821       *    110xxxxx 10xxxxxx                             valid 2-byte
     822       *    1110xxxx 10xxxxxx 10xxxxxx                    valid 3-byte
     823       *    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx           valid 4-byte
     824       */
     825      for (ix = 0; (c = utf[ix]);) {      /* string is 0-terminated */
     826          if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
     827              ix++;
     828  	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
     829  	    if ((utf[ix+1] & 0xc0 ) != 0x80)
     830  	        return 0;
     831  	    ix += 2;
     832  	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
     833  	    if (((utf[ix+1] & 0xc0) != 0x80) ||
     834  	        ((utf[ix+2] & 0xc0) != 0x80))
     835  		    return 0;
     836  	    ix += 3;
     837  	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
     838  	    if (((utf[ix+1] & 0xc0) != 0x80) ||
     839  	        ((utf[ix+2] & 0xc0) != 0x80) ||
     840  		((utf[ix+3] & 0xc0) != 0x80))
     841  		    return 0;
     842  	    ix += 4;
     843  	} else				/* unknown encoding */
     844  	    return 0;
     845        }
     846        return(1);
     847  }
     848  
     849  /**
     850   * xmlUTF8Strsize:
     851   * @utf:  a sequence of UTF-8 encoded bytes
     852   * @len:  the number of characters in the array
     853   *
     854   * storage size of an UTF8 string
     855   * the behaviour is not guaranteed if the input string is not UTF-8
     856   *
     857   * Returns the storage size of
     858   * the first 'len' characters of ARRAY
     859   */
     860  
     861  int
     862  xmlUTF8Strsize(const xmlChar *utf, int len) {
     863      const xmlChar   *ptr=utf;
     864      xmlChar         ch;
     865  
     866      if (utf == NULL)
     867          return(0);
     868  
     869      if (len <= 0)
     870          return(0);
     871  
     872      while ( len-- > 0) {
     873          if ( !*ptr )
     874              break;
     875          if ( (ch = *ptr++) & 0x80)
     876              while ((ch<<=1) & 0x80 ) {
     877  		if (*ptr == 0) break;
     878                  ptr++;
     879  	    }
     880      }
     881      return (ptr - utf);
     882  }
     883  
     884  
     885  /**
     886   * xmlUTF8Strndup:
     887   * @utf:  the input UTF8 *
     888   * @len:  the len of @utf (in chars)
     889   *
     890   * a strndup for array of UTF8's
     891   *
     892   * Returns a new UTF8 * or NULL
     893   */
     894  xmlChar *
     895  xmlUTF8Strndup(const xmlChar *utf, int len) {
     896      xmlChar *ret;
     897      int i;
     898  
     899      if ((utf == NULL) || (len < 0)) return(NULL);
     900      i = xmlUTF8Strsize(utf, len);
     901      ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
     902      if (ret == NULL) {
     903          xmlGenericError(xmlGenericErrorContext,
     904                  "malloc of %ld byte failed\n",
     905                  (len + 1) * (long)sizeof(xmlChar));
     906          return(NULL);
     907      }
     908      memcpy(ret, utf, i * sizeof(xmlChar));
     909      ret[i] = 0;
     910      return(ret);
     911  }
     912  
     913  /**
     914   * xmlUTF8Strpos:
     915   * @utf:  the input UTF8 *
     916   * @pos:  the position of the desired UTF8 char (in chars)
     917   *
     918   * a function to provide the equivalent of fetching a
     919   * character from a string array
     920   *
     921   * Returns a pointer to the UTF8 character or NULL
     922   */
     923  const xmlChar *
     924  xmlUTF8Strpos(const xmlChar *utf, int pos) {
     925      xmlChar ch;
     926  
     927      if (utf == NULL) return(NULL);
     928      if (pos < 0)
     929          return(NULL);
     930      while (pos--) {
     931          if ((ch=*utf++) == 0) return(NULL);
     932          if ( ch & 0x80 ) {
     933              /* if not simple ascii, verify proper format */
     934              if ( (ch & 0xc0) != 0xc0 )
     935                  return(NULL);
     936              /* then skip over remaining bytes for this char */
     937              while ( (ch <<= 1) & 0x80 )
     938                  if ( (*utf++ & 0xc0) != 0x80 )
     939                      return(NULL);
     940          }
     941      }
     942      return((xmlChar *)utf);
     943  }
     944  
     945  /**
     946   * xmlUTF8Strloc:
     947   * @utf:  the input UTF8 *
     948   * @utfchar:  the UTF8 character to be found
     949   *
     950   * a function to provide the relative location of a UTF8 char
     951   *
     952   * Returns the relative character position of the desired char
     953   * or -1 if not found
     954   */
     955  int
     956  xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
     957      int i, size;
     958      xmlChar ch;
     959  
     960      if (utf==NULL || utfchar==NULL) return -1;
     961      size = xmlUTF8Strsize(utfchar, 1);
     962          for(i=0; (ch=*utf) != 0; i++) {
     963              if (xmlStrncmp(utf, utfchar, size)==0)
     964                  return(i);
     965              utf++;
     966              if ( ch & 0x80 ) {
     967                  /* if not simple ascii, verify proper format */
     968                  if ( (ch & 0xc0) != 0xc0 )
     969                      return(-1);
     970                  /* then skip over remaining bytes for this char */
     971                  while ( (ch <<= 1) & 0x80 )
     972                      if ( (*utf++ & 0xc0) != 0x80 )
     973                          return(-1);
     974              }
     975          }
     976  
     977      return(-1);
     978  }
     979  /**
     980   * xmlUTF8Strsub:
     981   * @utf:  a sequence of UTF-8 encoded bytes
     982   * @start: relative pos of first char
     983   * @len:   total number to copy
     984   *
     985   * Create a substring from a given UTF-8 string
     986   * Note:  positions are given in units of UTF-8 chars
     987   *
     988   * Returns a pointer to a newly created string
     989   * or NULL if any problem
     990   */
     991  
     992  xmlChar *
     993  xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
     994      int            i;
     995      xmlChar ch;
     996  
     997      if (utf == NULL) return(NULL);
     998      if (start < 0) return(NULL);
     999      if (len < 0) return(NULL);
    1000  
    1001      /*
    1002       * Skip over any leading chars
    1003       */
    1004      for (i = 0;i < start;i++) {
    1005          if ((ch=*utf++) == 0) return(NULL);
    1006          if ( ch & 0x80 ) {
    1007              /* if not simple ascii, verify proper format */
    1008              if ( (ch & 0xc0) != 0xc0 )
    1009                  return(NULL);
    1010              /* then skip over remaining bytes for this char */
    1011              while ( (ch <<= 1) & 0x80 )
    1012                  if ( (*utf++ & 0xc0) != 0x80 )
    1013                      return(NULL);
    1014          }
    1015      }
    1016  
    1017      return(xmlUTF8Strndup(utf, len));
    1018  }
    1019  
    1020  /**
    1021   * xmlEscapeFormatString:
    1022   * @msg:  a pointer to the string in which to escape '%' characters.
    1023   * Must be a heap-allocated buffer created by libxml2 that may be
    1024   * returned, or that may be freed and replaced.
    1025   *
    1026   * Replaces the string pointed to by 'msg' with an escaped string.
    1027   * Returns the same string with all '%' characters escaped.
    1028   */
    1029  xmlChar *
    1030  xmlEscapeFormatString(xmlChar **msg)
    1031  {
    1032      xmlChar *msgPtr = NULL;
    1033      xmlChar *result = NULL;
    1034      xmlChar *resultPtr = NULL;
    1035      size_t count = 0;
    1036      size_t msgLen = 0;
    1037      size_t resultLen = 0;
    1038  
    1039      if (!msg || !*msg)
    1040          return(NULL);
    1041  
    1042      for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {
    1043          ++msgLen;
    1044          if (*msgPtr == '%')
    1045              ++count;
    1046      }
    1047  
    1048      if (count == 0)
    1049          return(*msg);
    1050  
    1051      resultLen = msgLen + count + 1;
    1052      result = (xmlChar *) xmlMallocAtomic(resultLen * sizeof(xmlChar));
    1053      if (result == NULL) {
    1054          /* Clear *msg to prevent format string vulnerabilities in
    1055             out-of-memory situations. */
    1056          xmlFree(*msg);
    1057          *msg = NULL;
    1058          xmlErrMemory(NULL, NULL);
    1059          return(NULL);
    1060      }
    1061  
    1062      for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {
    1063          *resultPtr = *msgPtr;
    1064          if (*msgPtr == '%')
    1065              *(++resultPtr) = '%';
    1066      }
    1067      result[resultLen - 1] = '\0';
    1068  
    1069      xmlFree(*msg);
    1070      *msg = result;
    1071  
    1072      return *msg;
    1073  }
    1074  
    1075  #define bottom_xmlstring
    1076  #include "elfgcchack.h"