(root)/
flex-2.6.4/
src/
ccl.c
       1  /* ccl - routines for character classes */
       2  
       3  /*  Copyright (c) 1990 The Regents of the University of California. */
       4  /*  All rights reserved. */
       5  
       6  /*  This code is derived from software contributed to Berkeley by */
       7  /*  Vern Paxson. */
       8  
       9  /*  The United States Government has rights in this work pursuant */
      10  /*  to contract no. DE-AC03-76SF00098 between the United States */
      11   /*  Department of Energy and the University of California. */
      12  
      13  /*  This file is part of flex. */
      14  
      15  /*  Redistribution and use in source and binary forms, with or without */
      16  /*  modification, are permitted provided that the following conditions */
      17  /*  are met: */
      18  
      19  /*  1. Redistributions of source code must retain the above copyright */
      20  /*     notice, this list of conditions and the following disclaimer. */
      21  /*  2. Redistributions in binary form must reproduce the above copyright */
      22  /*     notice, this list of conditions and the following disclaimer in the */
      23  /*     documentation and/or other materials provided with the distribution. */
      24  
      25  /*  Neither the name of the University nor the names of its contributors */
      26  /*  may be used to endorse or promote products derived from this software */
      27  /*  without specific prior written permission. */
      28  
      29  /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
      30  /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
      31  /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
      32  /*  PURPOSE. */
      33  
      34  #include "flexdef.h"
      35  
      36  /* return true if the chr is in the ccl. Takes negation into account. */
      37  static bool
      38  ccl_contains (const int cclp, const int ch)
      39  {
      40  	int     ind, len, i;
      41  
      42  	len = ccllen[cclp];
      43  	ind = cclmap[cclp];
      44  
      45  	for (i = 0; i < len; ++i)
      46  		if (ccltbl[ind + i] == ch)
      47  			return !cclng[cclp];
      48  
      49      return cclng[cclp];
      50  }
      51  
      52  
      53  /* ccladd - add a single character to a ccl */
      54  
      55  void ccladd (int cclp, int ch)
      56  {
      57  	int     ind, len, newpos, i;
      58  
      59  	check_char (ch);
      60  
      61  	len = ccllen[cclp];
      62  	ind = cclmap[cclp];
      63  
      64  	/* check to see if the character is already in the ccl */
      65  
      66  	for (i = 0; i < len; ++i)
      67  		if (ccltbl[ind + i] == ch)
      68  			return;
      69  
      70  	/* mark newlines */
      71  	if (ch == nlch)
      72  		ccl_has_nl[cclp] = true;
      73  
      74  	newpos = ind + len;
      75  
      76  	if (newpos >= current_max_ccl_tbl_size) {
      77  		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
      78  
      79  		++num_reallocs;
      80  
      81  		ccltbl = reallocate_Character_array (ccltbl,
      82  						     current_max_ccl_tbl_size);
      83  	}
      84  
      85  	ccllen[cclp] = len + 1;
      86  	ccltbl[newpos] = (unsigned char) ch;
      87  }
      88  
      89  /* dump_cclp - same thing as list_character_set, but for cclps.  */
      90  
      91  static void    dump_cclp (FILE* file, int cclp)
      92  {
      93  	int i;
      94  
      95  	putc ('[', file);
      96  
      97  	for (i = 0; i < csize; ++i) {
      98  		if (ccl_contains(cclp, i)){
      99  			int start_char = i;
     100  
     101  			putc (' ', file);
     102  
     103  			fputs (readable_form (i), file);
     104  
     105  			while (++i < csize && ccl_contains(cclp,i)) ;
     106  
     107  			if (i - 1 > start_char)
     108  				/* this was a run */
     109  				fprintf (file, "-%s",
     110  					 readable_form (i - 1));
     111  
     112  			putc (' ', file);
     113  		}
     114  	}
     115  
     116  	putc (']', file);
     117  }
     118  
     119  
     120  
     121  /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
     122  int
     123  ccl_set_diff (int a, int b)
     124  {
     125      int  d, ch;
     126  
     127      /* create new class  */
     128      d = cclinit();
     129  
     130      /* In order to handle negation, we spin through all possible chars,
     131       * addding each char in a that is not in b.
     132       * (This could be O(n^2), but n is small and bounded.)
     133       */
     134  	for ( ch = 0; ch < csize; ++ch )
     135          if (ccl_contains (a, ch) && !ccl_contains(b, ch))
     136              ccladd (d, ch);
     137  
     138      /* debug */
     139      if (0){
     140          fprintf(stderr, "ccl_set_diff (");
     141              fprintf(stderr, "\n    ");
     142              dump_cclp (stderr, a);
     143              fprintf(stderr, "\n    ");
     144              dump_cclp (stderr, b);
     145              fprintf(stderr, "\n    ");
     146              dump_cclp (stderr, d);
     147          fprintf(stderr, "\n)\n");
     148      }
     149      return d;
     150  }
     151  
     152  /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
     153  int
     154  ccl_set_union (int a, int b)
     155  {
     156      int  d, i;
     157  
     158      /* create new class  */
     159      d = cclinit();
     160  
     161      /* Add all of a */
     162      for (i = 0; i < ccllen[a]; ++i)
     163  		ccladd (d, ccltbl[cclmap[a] + i]);
     164  
     165      /* Add all of b */
     166      for (i = 0; i < ccllen[b]; ++i)
     167  		ccladd (d, ccltbl[cclmap[b] + i]);
     168  
     169      /* debug */
     170      if (0){
     171          fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
     172              fprintf(stderr, "\n    ");
     173              dump_cclp (stderr, a);
     174              fprintf(stderr, "\n    ");
     175              dump_cclp (stderr, b);
     176              fprintf(stderr, "\n    ");
     177              dump_cclp (stderr, d);
     178          fprintf(stderr, "\n)\n");
     179      }
     180      return d;
     181  }
     182  
     183  
     184  /* cclinit - return an empty ccl */
     185  
     186  int     cclinit (void)
     187  {
     188  	if (++lastccl >= current_maxccls) {
     189  		current_maxccls += MAX_CCLS_INCREMENT;
     190  
     191  		++num_reallocs;
     192  
     193  		cclmap =
     194  			reallocate_integer_array (cclmap, current_maxccls);
     195  		ccllen =
     196  			reallocate_integer_array (ccllen, current_maxccls);
     197  		cclng = reallocate_integer_array (cclng, current_maxccls);
     198  		ccl_has_nl =
     199  			reallocate_bool_array (ccl_has_nl,
     200  					       current_maxccls);
     201  	}
     202  
     203  	if (lastccl == 1)
     204  		/* we're making the first ccl */
     205  		cclmap[lastccl] = 0;
     206  
     207  	else
     208  		/* The new pointer is just past the end of the last ccl.
     209  		 * Since the cclmap points to the \first/ character of a
     210  		 * ccl, adding the length of the ccl to the cclmap pointer
     211  		 * will produce a cursor to the first free space.
     212  		 */
     213  		cclmap[lastccl] =
     214  			cclmap[lastccl - 1] + ccllen[lastccl - 1];
     215  
     216  	ccllen[lastccl] = 0;
     217  	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
     218  	ccl_has_nl[lastccl] = false;
     219  
     220  	return lastccl;
     221  }
     222  
     223  
     224  /* cclnegate - negate the given ccl */
     225  
     226  void    cclnegate (int cclp)
     227  {
     228  	cclng[cclp] = 1;
     229  	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
     230  }
     231  
     232  
     233  /* list_character_set - list the members of a set of characters in CCL form
     234   *
     235   * Writes to the given file a character-class representation of those
     236   * characters present in the given CCL.  A character is present if it
     237   * has a non-zero value in the cset array.
     238   */
     239  
     240  void    list_character_set (FILE *file, int cset[])
     241  {
     242  	int i;
     243  
     244  	putc ('[', file);
     245  
     246  	for (i = 0; i < csize; ++i) {
     247  		if (cset[i]) {
     248  			int start_char = i;
     249  
     250  			putc (' ', file);
     251  
     252  			fputs (readable_form (i), file);
     253  
     254  			while (++i < csize && cset[i]) ;
     255  
     256  			if (i - 1 > start_char)
     257  				/* this was a run */
     258  				fprintf (file, "-%s",
     259  					 readable_form (i - 1));
     260  
     261  			putc (' ', file);
     262  		}
     263  	}
     264  
     265  	putc (']', file);
     266  }
     267  
     268  /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
     269   * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
     270   * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
     271   * be in the range. If not, then this range is ambiguous, and the function
     272   * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
     273   * [a-z] will be labeled ambiguous because it does not include [A-Z].
     274   *
     275   * @param c1 the lower end of the range
     276   * @param c2 the upper end of the range
     277   * @return true if [c1-c2] is not ambiguous for a caseless scanner.
     278   */
     279  bool range_covers_case (int c1, int c2)
     280  {
     281  	int     i, o;
     282  
     283  	for (i = c1; i <= c2; i++) {
     284  		if (has_case (i)) {
     285  			o = reverse_case (i);
     286  			if (o < c1 || c2 < o)
     287  				return false;
     288  		}
     289  	}
     290  	return true;
     291  }
     292  
     293  /** Reverse the case of a character, if possible.
     294   * @return c if case-reversal does not apply.
     295   */
     296  int reverse_case (int c)
     297  {
     298  	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
     299  }
     300  
     301  /** Return true if c is uppercase or lowercase. */
     302  bool has_case (int c)
     303  {
     304  	return (isupper (c) || islower (c)) ? true : false;
     305  }