(root)/
libxml2-2.12.3/
testchar.c
       1  /**
       2   * Test the UTF-8 decoding routines
       3   *
       4   * author: Daniel Veillard
       5   * copy: see Copyright for the status of this software.
       6   */
       7  
       8  #define XML_DEPRECATED
       9  
      10  #include <stdio.h>
      11  #include <string.h>
      12  #include <libxml/tree.h>
      13  #include <libxml/parser.h>
      14  #include <libxml/parserInternals.h>
      15  
      16  int lastError;
      17  
      18  static void errorHandler(void *unused, const xmlError *err) {
      19      if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
      20          lastError = err->code;
      21      }
      22  }
      23  
      24  char document1[100] = "<doc>XXXX</doc>";
      25  char document2[100] = "<doc foo='XXXX'/>";
      26  
      27  static int testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
      28                    int len,  char *data, int forbid1, int forbid2) {
      29      int i;
      30      xmlDocPtr res;
      31  
      32      for (i = 0;i <= 0xFF;i++) {
      33  	lastError = 0;
      34  	xmlCtxtReset(ctxt);
      35  
      36          data[0] = (char) i;
      37  
      38  	res = xmlReadMemory(document, len, "test", NULL, 0);
      39  
      40  	if ((i == forbid1) || (i == forbid2)) {
      41  	    if ((lastError == 0) || (res != NULL)) {
      42  	        fprintf(stderr,
      43  		    "Failed to detect invalid char for Byte 0x%02X: %c\n",
      44  		        i, i);
      45  		return(1);
      46  	    }
      47  	}
      48  
      49  	else if ((i == '<') || (i == '&')) {
      50  	    if ((lastError == 0) || (res != NULL)) {
      51  	        fprintf(stderr,
      52  		    "Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
      53  		return(1);
      54  	    }
      55  	}
      56  	else if (((i < 0x20) || (i >= 0x80)) &&
      57  	    (i != 0x9) && (i != 0xA) && (i != 0xD)) {
      58  	    if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL)) {
      59  	        fprintf(stderr,
      60  		    "Failed to detect invalid char for Byte 0x%02X\n", i);
      61  		return(1);
      62  	    }
      63  	}
      64  	else if (res == NULL) {
      65  	    fprintf(stderr,
      66  		"Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
      67  		return(1);
      68  	}
      69  	if (res != NULL)
      70  	    xmlFreeDoc(res);
      71      }
      72      return(0);
      73  }
      74  
      75  static int testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
      76                    int len,  char *data) {
      77      int i, j;
      78      xmlDocPtr res;
      79  
      80      for (i = 0x80;i <= 0xFF;i++) {
      81      for (j = 0;j <= 0xFF;j++) {
      82  	lastError = 0;
      83  	xmlCtxtReset(ctxt);
      84  
      85          data[0] = (char) i;
      86          data[1] = (char) j;
      87  
      88  	res = xmlReadMemory(document, len, "test", NULL, 0);
      89  
      90  	/* if first bit of first char is set, then second bit must too */
      91  	if ((i & 0x80) && ((i & 0x40) == 0)) {
      92  	    if ((lastError == 0) || (res != NULL)) {
      93  		fprintf(stderr,
      94  		"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
      95  			i, j);
      96  		return(1);
      97  	    }
      98  	}
      99  
     100  	/*
     101  	 * if first bit of first char is set, then second char first
     102  	 * bits must be 10
     103  	 */
     104  	else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
     105  	    if ((lastError == 0) || (res != NULL)) {
     106  		fprintf(stderr,
     107  	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
     108  			i, j);
     109  		return(1);
     110  	    }
     111  	}
     112  
     113  	/*
     114  	 * if using a 2 byte encoding then the value must be greater
     115  	 * than 0x80, i.e. one of bits 5 to 1 of i must be set
     116  	 */
     117  	else if ((i & 0x80) && ((i & 0x1E) == 0)) {
     118  	    if ((lastError == 0) || (res != NULL)) {
     119  		fprintf(stderr,
     120  	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
     121  			i, j);
     122  		return(1);
     123  	    }
     124  	}
     125  
     126  	/*
     127  	 * if third bit of first char is set, then the sequence would need
     128  	 * at least 3 bytes, but we give only 2 !
     129  	 */
     130  	else if ((i & 0xE0) == 0xE0) {
     131  	    if ((lastError == 0) || (res != NULL)) {
     132  		fprintf(stderr,
     133  	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
     134  			i, j);
     135  		return(1);
     136  	    }
     137  	}
     138  
     139  	/*
     140  	 * We should see no error in remaining cases
     141  	 */
     142  	else if ((lastError != 0) || (res == NULL)) {
     143  	    fprintf(stderr,
     144  		"Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
     145  	    return(1);
     146  	}
     147  	if (res != NULL)
     148  	    xmlFreeDoc(res);
     149      }
     150      }
     151      return(0);
     152  }
     153  
     154  /**
     155   * testDocumentRanges:
     156   *
     157   * Test the correct UTF8 character parsing in context of XML documents
     158   * Those are in-context injection tests checking the parser behaviour on
     159   * edge case values at different point in content, beginning and end of
     160   * CDATA in text or in attribute values.
     161   */
     162  
     163  static int testDocumentRanges(void) {
     164      xmlParserCtxtPtr ctxt;
     165      char *data;
     166      int test_ret = 0;
     167  
     168      /*
     169       * Set up a parsing context using the first document as
     170       * the current input source.
     171       */
     172      ctxt = xmlNewParserCtxt();
     173      if (ctxt == NULL) {
     174          fprintf(stderr, "Failed to allocate parser context\n");
     175  	return(1);
     176      }
     177  
     178      printf("testing 1 byte char in document: 1");
     179      fflush(stdout);
     180      data = &document1[5];
     181      data[0] = ' ';
     182      data[1] = ' ';
     183      data[2] = ' ';
     184      data[3] = ' ';
     185      /* test 1 byte injection at beginning of area */
     186      test_ret += testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
     187                             data, -1, -1);
     188      printf(" 2");
     189      fflush(stdout);
     190      data[0] = ' ';
     191      data[1] = ' ';
     192      data[2] = ' ';
     193      data[3] = ' ';
     194      /* test 1 byte injection at end of area */
     195      test_ret += testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
     196                             data + 3, -1, -1);
     197  
     198      printf(" 3");
     199      fflush(stdout);
     200      data = &document2[10];
     201      data[0] = ' ';
     202      data[1] = ' ';
     203      data[2] = ' ';
     204      data[3] = ' ';
     205      /* test 1 byte injection at beginning of area */
     206      test_ret += testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
     207                             data, '\'', -1);
     208      printf(" 4");
     209      fflush(stdout);
     210      data[0] = ' ';
     211      data[1] = ' ';
     212      data[2] = ' ';
     213      data[3] = ' ';
     214      /* test 1 byte injection at end of area */
     215      test_ret += testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
     216                             data + 3, '\'', -1);
     217      printf(" done\n");
     218  
     219      printf("testing 2 byte char in document: 1");
     220      fflush(stdout);
     221      data = &document1[5];
     222      data[0] = ' ';
     223      data[1] = ' ';
     224      data[2] = ' ';
     225      data[3] = ' ';
     226      /* test 2 byte injection at beginning of area */
     227      test_ret += testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
     228                             data);
     229      printf(" 2");
     230      fflush(stdout);
     231      data[0] = ' ';
     232      data[1] = ' ';
     233      data[2] = ' ';
     234      data[3] = ' ';
     235      /* test 2 byte injection at end of area */
     236      test_ret += testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
     237                             data + 2);
     238  
     239      printf(" 3");
     240      fflush(stdout);
     241      data = &document2[10];
     242      data[0] = ' ';
     243      data[1] = ' ';
     244      data[2] = ' ';
     245      data[3] = ' ';
     246      /* test 2 byte injection at beginning of area */
     247      test_ret += testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
     248                             data);
     249      printf(" 4");
     250      fflush(stdout);
     251      data[0] = ' ';
     252      data[1] = ' ';
     253      data[2] = ' ';
     254      data[3] = ' ';
     255      /* test 2 byte injection at end of area */
     256      test_ret += testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
     257                             data + 2);
     258      printf(" done\n");
     259  
     260      xmlFreeParserCtxt(ctxt);
     261      return(test_ret);
     262  }
     263  
     264  static int
     265  testCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
     266      const xmlChar *oldcur;
     267      int c, err, len2;
     268  
     269      lastError = 0;
     270      c = xmlCurrentChar(ctxt, len);
     271      ctxt->input->flags = 0;
     272      err = lastError;
     273  
     274      oldcur = ctxt->input->cur;
     275      lastError = 0;
     276      xmlNextChar(ctxt);
     277      ctxt->input->flags = 0;
     278      len2 = ctxt->input->cur - oldcur;
     279      ctxt->input->cur = oldcur;
     280  
     281      if ((*ctxt->input->cur != 0) && (err != lastError)) {
     282          fprintf(stderr, "xmlCurrentChar and xmlNextChar report different "
     283                  "errors: %d %d\n", err, lastError);
     284          return(-1);
     285      }
     286  
     287      if ((err == 0) && (*len != len2)) {
     288          fprintf(stderr, "xmlCurrentChar and xmlNextChar report different "
     289                  "lengths: %d %d\n", *len, len2);
     290          return(-1);
     291      }
     292  
     293      lastError = err;
     294  
     295      return(c);
     296  }
     297  
     298  static int testCharRangeByte1(xmlParserCtxtPtr ctxt) {
     299      int i = 0;
     300      int len, c;
     301      char *data = (char *) ctxt->input->cur;
     302  
     303      data[1] = 0;
     304      data[2] = 0;
     305      data[3] = 0;
     306      for (i = 0;i <= 0xFF;i++) {
     307          data[0] = (char) i;
     308          ctxt->nbErrors = 0;
     309  
     310          c = testCurrentChar(ctxt, &len);
     311          if (c < 0)
     312              continue;
     313  	if ((i == 0) || (i >= 0x80)) {
     314  	    /* we must see an error there */
     315  	    if (lastError != XML_ERR_INVALID_CHAR) {
     316  	        fprintf(stderr,
     317  		    "Failed to detect invalid char for Byte 0x%02X\n", i);
     318  		return(1);
     319  	    }
     320  	} else if (i == 0xD) {
     321  	    if ((c != 0xA) || (len != 1)) {
     322  		fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
     323  		return(1);
     324  	    }
     325  	} else if ((c != i) || (len != 1)) {
     326  	    fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
     327  	    return(1);
     328  	}
     329      }
     330      return(0);
     331  }
     332  
     333  static int testCharRangeByte2(xmlParserCtxtPtr ctxt) {
     334      int i, j;
     335      int len, c;
     336      char *data = (char *) ctxt->input->cur;
     337  
     338      data[2] = 0;
     339      data[3] = 0;
     340      for (i = 0x80;i <= 0xFF;i++) {
     341  	for (j = 0;j <= 0xFF;j++) {
     342  	    data[0] = (char) i;
     343  	    data[1] = (char) j;
     344              ctxt->nbErrors = 0;
     345  
     346              c = testCurrentChar(ctxt, &len);
     347              if (c < 0)
     348                  continue;
     349  
     350  	    /* if first bit of first char is set, then second bit must too */
     351  	    if ((i & 0x80) && ((i & 0x40) == 0)) {
     352  		if (lastError != XML_ERR_INVALID_CHAR) {
     353  		    fprintf(stderr,
     354  		    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
     355  		            i, j);
     356  		    return(1);
     357  		}
     358  	    }
     359  
     360  	    /*
     361  	     * if first bit of first char is set, then second char first
     362  	     * bits must be 10
     363  	     */
     364  	    else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
     365  		if (lastError != XML_ERR_INVALID_CHAR) {
     366  		    fprintf(stderr,
     367  		"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
     368  		            i, j, c);
     369  		    return(1);
     370  		}
     371  	    }
     372  
     373  	    /*
     374  	     * if using a 2 byte encoding then the value must be greater
     375  	     * than 0x80, i.e. one of bits 5 to 1 of i must be set
     376  	     */
     377  	    else if ((i & 0x80) && ((i & 0x1E) == 0)) {
     378  		if (lastError != XML_ERR_INVALID_CHAR) {
     379  		    fprintf(stderr,
     380  		"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
     381  		            i, j, c);
     382  		    return(1);
     383  		}
     384  	    }
     385  
     386  	    /*
     387  	     * if third bit of first char is set, then the sequence would need
     388  	     * at least 3 bytes, but we give only 2 !
     389  	     */
     390  	    else if ((i & 0xE0) == 0xE0) {
     391  		if (lastError != XML_ERR_INVALID_CHAR) {
     392  		    fprintf(stderr,
     393  		"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
     394  		            i, j);
     395  		    return(1);
     396  		}
     397  	    }
     398  
     399              /*
     400  	     * We should see no error in remaining cases
     401  	     */
     402  	    else if ((lastError != 0) || (len != 2)) {
     403  		fprintf(stderr,
     404  		    "Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
     405  		return(1);
     406  	    }
     407  
     408              /*
     409  	     * Finally check the value is right
     410  	     */
     411  	    else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
     412  		fprintf(stderr,
     413  	"Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
     414  	                i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
     415  		return(1);
     416  	    }
     417          }
     418      }
     419      return(0);
     420  }
     421  
     422  static int testCharRangeByte3(xmlParserCtxtPtr ctxt) {
     423      int i, j, k, K;
     424      int len, c;
     425      unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
     426      char *data = (char *) ctxt->input->cur;
     427      int value;
     428  
     429      data[3] = 0;
     430      for (i = 0xE0;i <= 0xFF;i++) {
     431      for (j = 0;j <= 0xFF;j++) {
     432      for (k = 0;k < 6;k++) {
     433  	data[0] = (char) i;
     434  	data[1] = (char) j;
     435  	K = lows[k];
     436  	data[2] = (char) K;
     437  	value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
     438          ctxt->nbErrors = 0;
     439  
     440          c = testCurrentChar(ctxt, &len);
     441          if (c < 0)
     442              continue;
     443  
     444  	/*
     445  	 * if fourth bit of first char is set, then the sequence would need
     446  	 * at least 4 bytes, but we give only 3 !
     447  	 */
     448  	if ((i & 0xF0) == 0xF0) {
     449  	    if (lastError != XML_ERR_INVALID_CHAR) {
     450  		fprintf(stderr,
     451  	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
     452  			i, j, K, data[3]);
     453  		return(1);
     454  	    }
     455  	}
     456  
     457          /*
     458  	 * The second and the third bytes must start with 10
     459  	 */
     460  	else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
     461  	    if (lastError != XML_ERR_INVALID_CHAR) {
     462  		fprintf(stderr,
     463  	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
     464  			i, j, K);
     465  		return(1);
     466  	    }
     467  	}
     468  
     469  	/*
     470  	 * if using a 3 byte encoding then the value must be greater
     471  	 * than 0x800, i.e. one of bits 4 to 0 of i must be set or
     472  	 * the 6th byte of data[1] must be set
     473  	 */
     474  	else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
     475  	    if (lastError != XML_ERR_INVALID_CHAR) {
     476  		fprintf(stderr,
     477  	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
     478  			i, j, K);
     479  		return(1);
     480  	    }
     481  	}
     482  
     483          /*
     484  	 * There are values that are not allowed in UTF-8
     485  	 */
     486  	else if ((value > 0xD7FF) && (value <0xE000)) {
     487  	    if (lastError != XML_ERR_INVALID_CHAR) {
     488  		fprintf(stderr,
     489  	"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
     490  			value, i, j, K);
     491  		return(1);
     492  	    }
     493  	}
     494  
     495  	/*
     496  	 * We should see no error in remaining cases
     497  	 */
     498  	else if ((lastError != 0) || (len != 3)) {
     499  	    fprintf(stderr,
     500  		"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
     501  		    i, j, K);
     502  	    return(1);
     503  	}
     504  
     505  	/*
     506  	 * Finally check the value is right
     507  	 */
     508  	else if (c != value) {
     509  	    fprintf(stderr,
     510      "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
     511  		i, j, data[2], value, c);
     512  	    return(1);
     513  	}
     514      }
     515      }
     516      }
     517      return(0);
     518  }
     519  
     520  static int testCharRangeByte4(xmlParserCtxtPtr ctxt) {
     521      int i, j, k, K, l, L;
     522      int len, c;
     523      unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
     524      char *data = (char *) ctxt->input->cur;
     525      int value;
     526  
     527      data[4] = 0;
     528      for (i = 0xF0;i <= 0xFF;i++) {
     529      for (j = 0;j <= 0xFF;j++) {
     530      for (k = 0;k < 6;k++) {
     531      for (l = 0;l < 6;l++) {
     532  	data[0] = (char) i;
     533  	data[1] = (char) j;
     534  	K = lows[k];
     535  	data[2] = (char) K;
     536  	L = lows[l];
     537  	data[3] = (char) L;
     538  	value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
     539  	        ((i & 0x7) << 18);
     540          ctxt->nbErrors = 0;
     541  
     542          c = testCurrentChar(ctxt, &len);
     543          if (c < 0)
     544              continue;
     545  
     546  	/*
     547  	 * if fifth bit of first char is set, then the sequence would need
     548  	 * at least 5 bytes, but we give only 4 !
     549  	 */
     550  	if ((i & 0xF8) == 0xF8) {
     551  	    if (lastError != XML_ERR_INVALID_CHAR) {
     552  		fprintf(stderr,
     553    "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
     554  			i, j, K, data[3]);
     555  		return(1);
     556  	    }
     557  	}
     558  
     559          /*
     560  	 * The second, third and fourth bytes must start with 10
     561  	 */
     562  	else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
     563  	         ((L & 0xC0) != 0x80)) {
     564  	    if (lastError != XML_ERR_INVALID_CHAR) {
     565  		fprintf(stderr,
     566  	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
     567  			i, j, K, L);
     568  		return(1);
     569  	    }
     570  	}
     571  
     572  	/*
     573  	 * if using a 3 byte encoding then the value must be greater
     574  	 * than 0x10000, i.e. one of bits 3 to 0 of i must be set or
     575  	 * the 6 or 5th byte of j must be set
     576  	 */
     577  	else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
     578  	    if (lastError != XML_ERR_INVALID_CHAR) {
     579  		fprintf(stderr,
     580  	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
     581  			i, j, K, L);
     582  		return(1);
     583  	    }
     584  	}
     585  
     586          /*
     587  	 * There are values in that are not allowed in UTF-8
     588  	 */
     589  	else if (((value > 0xD7FF) && (value < 0xE000)) ||
     590  		 (value > 0x10FFFF)) {
     591  	    if (lastError != XML_ERR_INVALID_CHAR) {
     592  		fprintf(stderr,
     593  "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
     594  			value, i, j, K, L);
     595  		return(1);
     596  	    }
     597  	}
     598  
     599  	/*
     600  	 * We should see no error in remaining cases
     601  	 */
     602  	else if ((lastError != 0) || (len != 4)) {
     603  	    fprintf(stderr,
     604  		"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
     605  		    i, j, K);
     606  	    return(1);
     607  	}
     608  
     609  	/*
     610  	 * Finally check the value is right
     611  	 */
     612  	else if (c != value) {
     613  	    fprintf(stderr,
     614      "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
     615  		i, j, data[2], value, c);
     616  	    return(1);
     617  	}
     618      }
     619      }
     620      }
     621      }
     622      return(0);
     623  }
     624  
     625  /**
     626   * testCharRanges:
     627   *
     628   * Test the correct UTF8 character parsing in isolation i.e.
     629   * not when parsing a full document, this is less expensive and we can
     630   * cover the full range of UTF-8 chars accepted by XML-1.0
     631   */
     632  
     633  static int testCharRanges(void) {
     634      char data[5];
     635      xmlParserCtxtPtr ctxt;
     636      xmlParserInputBufferPtr buf;
     637      xmlParserInputPtr input;
     638      int test_ret = 0;
     639  
     640      memset(data, 0, 5);
     641  
     642      /*
     643       * Set up a parsing context using the above data buffer as
     644       * the current input source.
     645       */
     646      ctxt = xmlNewParserCtxt();
     647      if (ctxt == NULL) {
     648          fprintf(stderr, "Failed to allocate parser context\n");
     649  	return(1);
     650      }
     651      buf = xmlParserInputBufferCreateMem(data, sizeof(data),
     652                                          XML_CHAR_ENCODING_NONE);
     653      if (buf == NULL) {
     654          fprintf(stderr, "Failed to allocate input buffer\n");
     655  	test_ret = 1;
     656  	goto error;
     657      }
     658      input = xmlNewInputStream(ctxt);
     659      if (input == NULL) {
     660          xmlFreeParserInputBuffer(buf);
     661  	test_ret = 1;
     662  	goto error;
     663      }
     664      input->filename = NULL;
     665      input->buf = buf;
     666      input->cur =
     667      input->base = xmlBufContent(input->buf->buffer);
     668      input->end = input->base + 4;
     669      inputPush(ctxt, input);
     670  
     671      printf("testing char range: 1");
     672      fflush(stdout);
     673      test_ret += testCharRangeByte1(ctxt);
     674      printf(" 2");
     675      fflush(stdout);
     676      test_ret += testCharRangeByte2(ctxt);
     677      printf(" 3");
     678      fflush(stdout);
     679      test_ret += testCharRangeByte3(ctxt);
     680      printf(" 4");
     681      fflush(stdout);
     682      test_ret += testCharRangeByte4(ctxt);
     683      printf(" done\n");
     684      fflush(stdout);
     685  
     686  error:
     687      xmlFreeParserCtxt(ctxt);
     688      return(test_ret);
     689  }
     690  
     691  static int
     692  testUserEncoding(void) {
     693      /*
     694       * Create a document encoded as UTF-16LE with an ISO-8859-1 encoding
     695       * declaration, then parse it with xmlReadMemory and the encoding
     696       * argument set to UTF-16LE.
     697       */
     698      xmlDocPtr doc = NULL;
     699      const char *start = "<?xml version='1.0' encoding='ISO-8859-1'?><d>";
     700      const char *end = "</d>";
     701      char *buf = NULL;
     702      xmlChar *text;
     703      int startSize = strlen(start);
     704      int textSize = 100000; /* Make sure to exceed internal buffer sizes. */
     705      int endSize = strlen(end);
     706      int totalSize = startSize + textSize + endSize;
     707      int k = 0;
     708      int i;
     709      int ret = 1;
     710  
     711      buf = xmlMalloc(2 * totalSize);
     712      for (i = 0; start[i] != 0; i++) {
     713          buf[k++] = start[i];
     714          buf[k++] = 0;
     715      }
     716      for (i = 0; i < textSize; i++) {
     717          buf[k++] = 'x';
     718          buf[k++] = 0;
     719      }
     720      for (i = 0; end[i] != 0; i++) {
     721          buf[k++] = end[i];
     722          buf[k++] = 0;
     723      }
     724  
     725      doc = xmlReadMemory(buf, 2 * totalSize, NULL, "UTF-16LE", 0);
     726      if (doc == NULL) {
     727          fprintf(stderr, "failed to parse document\n");
     728          goto error;
     729      }
     730  
     731      text = doc->children->children->content;
     732      for (i = 0; i < textSize; i++) {
     733          if (text[i] != 'x') {
     734              fprintf(stderr, "text node has wrong content at offset %d\n", k);
     735              goto error;
     736          }
     737      }
     738  
     739      ret = 0;
     740  
     741  error:
     742      xmlFreeDoc(doc);
     743      xmlFree(buf);
     744  
     745      return ret;
     746  }
     747  
     748  #if defined(LIBXML_PUSH_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
     749  
     750  static char *
     751  convert(xmlCharEncodingHandlerPtr handler, const char *utf8, int size,
     752          int *outSize) {
     753      char *ret;
     754      int inlen;
     755      int res;
     756  
     757      inlen = size;
     758      *outSize = size * 2;
     759      ret = xmlMalloc(*outSize);
     760      if (ret == NULL)
     761          return(NULL);
     762      res = handler->output(BAD_CAST ret, outSize, BAD_CAST utf8, &inlen);
     763      if ((res < 0) || (inlen != size)) {
     764          xmlFree(ret);
     765          return(NULL);
     766      }
     767  
     768      return(ret);
     769  }
     770  
     771  static int
     772  testUserEncodingPush(void) {
     773      xmlCharEncodingHandlerPtr handler;
     774      xmlParserCtxtPtr ctxt;
     775      xmlDocPtr doc;
     776      char buf[] =
     777          "\xEF\xBB\xBF"
     778          "<?xml version='1.0' encoding='ISO-8859-1'?>\n"
     779          "<d>text</d>\n";
     780      char *utf16;
     781      int utf16Size;
     782      int ret = 1;
     783  
     784      handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_UTF16LE);
     785      utf16 = convert(handler, buf, sizeof(buf) - 1, &utf16Size);
     786      ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
     787      xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF16LE);
     788      xmlParseChunk(ctxt, utf16, utf16Size, 0);
     789      xmlParseChunk(ctxt, NULL, 0, 1);
     790      doc = ctxt->myDoc;
     791  
     792      if ((doc != NULL) &&
     793          (doc->children != NULL) &&
     794          (doc->children->children != NULL) &&
     795          (xmlStrcmp(doc->children->children->content, BAD_CAST "text") == 0))
     796          ret = 0;
     797  
     798      xmlFreeDoc(doc);
     799      xmlFreeParserCtxt(ctxt);
     800      xmlFree(utf16);
     801  
     802      return(ret);
     803  }
     804  
     805  static int
     806  testUTF8Chunks(void) {
     807      xmlParserCtxtPtr ctxt;
     808      xmlChar *out;
     809      int outSize;
     810      char *buf;
     811      int i;
     812      int ret = 0;
     813  
     814      ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
     815  
     816      xmlParseChunk(ctxt, "<d>", 3, 0);
     817      xmlParseChunk(ctxt, "\xF0", 1, 0);
     818      xmlParseChunk(ctxt, "\x9F", 1, 0);
     819      xmlParseChunk(ctxt, "\x98", 1, 0);
     820      xmlParseChunk(ctxt, "\x8A", 1, 0);
     821      xmlParseChunk(ctxt, "</d>", 4, 1);
     822  
     823      xmlDocDumpMemory(ctxt->myDoc, &out, &outSize);
     824      if (strcmp((char *) out,
     825                 "<?xml version=\"1.0\"?>\n<d>&#x1F60A;</d>\n") != 0) {
     826          fprintf(stderr, "failed UTF-8 chunk test 1\n");
     827          ret += 1;
     828      }
     829  
     830      xmlFree(out);
     831      xmlFreeDoc(ctxt->myDoc);
     832      xmlFreeParserCtxt(ctxt);
     833  
     834      ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
     835  
     836      xmlParseChunk(ctxt, "<d>", 3, 0);
     837  
     838      /*
     839       * Create a chunk longer than XML_PARSER_BIG_BUFFER_SIZE (300) ending
     840       * with an incomplete UTF-8 sequence.
     841       */
     842      buf = xmlMalloc(1000 * 2 + 1);
     843      for (i = 0; i < 2000; i += 2)
     844          memcpy(buf + i, "\xCE\xB1", 2);
     845      buf[i] = '\xCE';
     846      xmlParseChunk(ctxt, buf, 2001, 0);
     847      xmlFree(buf);
     848  
     849      xmlParseChunk(ctxt, "\xB1</d>", 4, 0);
     850      xmlParseChunk(ctxt, NULL, 0, 0);
     851  
     852      xmlDocDumpMemory(ctxt->myDoc, &out, &outSize);
     853      if (strncmp((char *) out, "<?xml version=\"1.0\"?>\n<d>", 25) != 0) {
     854          fprintf(stderr, "failed UTF-8 chunk test 2-1\n");
     855          ret += 1;
     856          goto error;
     857      }
     858      for (i = 25; i < 25 + 1001 * 7; i += 7) {
     859          if (memcmp(out + i, "&#x3B1;", 7) != 0) {
     860              fprintf(stderr, "failed UTF-8 chunk test 2-2 %d\n", i);
     861              ret += 1;
     862              goto error;
     863          }
     864      }
     865      if (strcmp((char *) out + i, "</d>\n") != 0) {
     866          fprintf(stderr, "failed UTF-8 chunk test 2-3\n");
     867          ret += 1;
     868          goto error;
     869      }
     870  
     871  error:
     872      xmlFree(out);
     873      xmlFreeDoc(ctxt->myDoc);
     874      xmlFreeParserCtxt(ctxt);
     875  
     876      return(ret);
     877      return(0);
     878  }
     879  
     880  #endif
     881  
     882  int main(void) {
     883  
     884      int ret = 0;
     885  
     886      /*
     887       * this initialize the library and check potential ABI mismatches
     888       * between the version it was compiled for and the actual shared
     889       * library used.
     890       */
     891      LIBXML_TEST_VERSION
     892  
     893      /*
     894       * Catch errors separately
     895       */
     896  
     897      xmlSetStructuredErrorFunc(NULL, errorHandler);
     898  
     899      /*
     900       * Run the tests
     901       */
     902      ret += testCharRanges();
     903      ret += testDocumentRanges();
     904      ret += testUserEncoding();
     905  #if defined(LIBXML_PUSH_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
     906      ret += testUserEncodingPush();
     907      ret += testUTF8Chunks();
     908  #endif
     909  
     910      /*
     911       * Cleanup function for the XML library.
     912       */
     913      xmlCleanupParser();
     914      return(ret ? 1 : 0);
     915  }