(root)/
gawk-5.2.2/
str_array.c
       1  /*
       2   * str_array.c - routines for associative arrays of string indices.
       3   */
       4  
       5  /*
       6   * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2018, 2019,
       7   * 2021, 2022,
       8   * the Free Software Foundation, Inc.
       9   *
      10   * This file is part of GAWK, the GNU implementation of the
      11   * AWK Programming Language.
      12   *
      13   * GAWK is free software; you can redistribute it and/or modify
      14   * it under the terms of the GNU General Public License as published by
      15   * the Free Software Foundation; either version 3 of the License, or
      16   * (at your option) any later version.
      17   *
      18   * GAWK is distributed in the hope that it will be useful,
      19   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      20   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      21   * GNU General Public License for more details.
      22   *
      23   * You should have received a copy of the GNU General Public License
      24   * along with this program; if not, write to the Free Software
      25   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
      26   */
      27  
      28  #include "awk.h"
      29  
      30  /*
      31   * Tree walks (``for (iggy in foo)'') and array deletions use expensive
      32   * linear searching.  So what we do is start out with small arrays and
      33   * grow them as needed, so that our arrays are hopefully small enough,
      34   * most of the time, that they're pretty full and we're not looking at
      35   * wasted space.
      36   *
      37   * The decision is made to grow the array if the average chain length is
      38   * ``too big''. This is defined as the total number of entries in the table
      39   * divided by the size of the array being greater than some constant.
      40   *
      41   * 11/2002: We make the constant a variable, so that it can be tweaked
      42   * via environment variable.
      43   * 11/2002: Modern machines are bigger, cut this down from 10.
      44   */
      45  
      46  static size_t STR_CHAIN_MAX = 2;
      47  
      48  extern FILE *output_fp;
      49  extern void indent(int indent_level);
      50  
      51  static NODE **str_array_init(NODE *symbol, NODE *subs);
      52  static NODE **str_lookup(NODE *symbol, NODE *subs);
      53  static NODE **str_exists(NODE *symbol, NODE *subs);
      54  static NODE **str_clear(NODE *symbol, NODE *subs);
      55  static NODE **str_remove(NODE *symbol, NODE *subs);
      56  static NODE **str_list(NODE *symbol, NODE *subs);
      57  static NODE **str_copy(NODE *symbol, NODE *newsymb);
      58  static NODE **str_dump(NODE *symbol, NODE *ndump);
      59  
      60  const array_funcs_t str_array_func = {
      61  	"str",
      62  	str_array_init,
      63  	(afunc_t) 0,
      64  	str_lookup,
      65  	str_exists,
      66  	str_clear,
      67  	str_remove,
      68  	str_list,
      69  	str_copy,
      70  	str_dump,
      71  	(afunc_t) 0,
      72  };
      73  
      74  static NODE **env_remove(NODE *symbol, NODE *subs);
      75  static NODE **env_store(NODE *symbol, NODE *subs);
      76  static NODE **env_clear(NODE *symbol, NODE *subs);
      77  
      78  /* special case for ENVIRON */
      79  static const array_funcs_t env_array_func = {
      80  	"env",
      81  	str_array_init,
      82  	(afunc_t) 0,
      83  	str_lookup,
      84  	str_exists,
      85  	env_clear,
      86  	env_remove,
      87  	str_list,
      88  	str_copy,
      89  	str_dump,
      90  	env_store,
      91  };
      92  
      93  static inline NODE **str_find(NODE *symbol, NODE *s1, size_t code1, unsigned long hash1);
      94  static void grow_table(NODE *symbol);
      95  
      96  static unsigned long gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code);
      97  static unsigned long scramble(unsigned long x);
      98  static unsigned long fnv1a_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code);
      99  static unsigned long awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code);
     100  
     101  unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code) = awk_hash;
     102  
     103  
     104  /* str_array_init --- array initialization routine */
     105  
     106  static NODE **
     107  str_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
     108  {
     109  	if (symbol == NULL) {		/* first time */
     110  		long newval;
     111  		const char *val;
     112  
     113  		/* check relevant environment variables */
     114  		if ((newval = getenv_long("STR_CHAIN_MAX")) > 0)
     115  			STR_CHAIN_MAX = newval;
     116  
     117  		if ((val = getenv("AWK_HASH")) != NULL) {
     118  			if (strcmp(val, "gst") == 0)
     119  				hash = gst_hash_string;
     120  			else if (strcmp(val, "fnv1a") == 0)
     121  				hash = fnv1a_hash_string;
     122  		}
     123  	} else
     124  		null_array(symbol);
     125  
     126  	return & success_node;
     127  }
     128  
     129  
     130  /*
     131   * str_lookup:
     132   * Find SYMBOL[SUBS] in the assoc array.  Install it with value "" if it
     133   * isn't there. Returns a pointer ala get_lhs to where its value is stored.
     134   *
     135   * SYMBOL is the address of the node (or other pointer) being dereferenced.
     136   * SUBS is a number or string used as the subscript.
     137   */
     138  
     139  static NODE **
     140  str_lookup(NODE *symbol, NODE *subs)
     141  {
     142  	unsigned long hash1;
     143  	NODE **lhs;
     144  	BUCKET *b;
     145  	size_t code1;
     146  
     147  	subs = force_string(subs);
     148  
     149  	if (symbol->buckets == NULL)
     150  		grow_table(symbol);
     151  	hash1 = hash(subs->stptr, subs->stlen,
     152  			(unsigned long) symbol->array_size, & code1);
     153  	if ((lhs = str_find(symbol, subs, code1, hash1)) != NULL)
     154  		return lhs;
     155  
     156  	/* It's not there, install it. */
     157  	/* first see if we would need to grow the array, before installing */
     158  
     159  	symbol->table_size++;
     160  	if ((symbol->flags & ARRAYMAXED) == 0
     161  			&& (symbol->table_size / symbol->array_size) > STR_CHAIN_MAX) {
     162  		grow_table(symbol);
     163  		/* have to recompute hash value for new size */
     164  		hash1 = code1 % (unsigned long) symbol->array_size;
     165  	}
     166  
     167  
     168  	/*
     169  	 * Repeat after me: "Array indices are always strings."
     170  	 * "Array indices are always strings."
     171  	 * "Array indices are always strings."
     172  	 * "Array indices are always strings."
     173  	 * ....
     174  	 */
     175  	// Special cases:
     176  	// 1. The string was generated using CONVFMT.
     177  	// 2. The string was from an unassigned variable.
     178  	// 3. The string was from a straight number, perniciously, from MPFR
     179  	// 4. The string was from an unassigned field.
     180  	if (   subs->stfmt != STFMT_UNUSED
     181  	    || subs == Nnull_string
     182  	    || (subs->flags & STRING) == 0
     183  	    || (subs->flags & NULL_FIELD) != 0) {
     184  		NODE *tmp;
     185  
     186  		/*
     187  		 * Need to freeze this string value --- it must never
     188  		 * change, no matter what happens to the value
     189  		 * that created it or to CONVFMT, etc.; So, get
     190  		 * a private copy.
     191  		 */
     192  
     193  		tmp = make_string(subs->stptr, subs->stlen);
     194  
     195  		/*
     196  		* Set the numeric value for the index if it's  available. Useful
     197  		* for numeric sorting by index.  Do this only if the numeric
     198  		* value is available, instead of all the time, since doing it
     199  		* all the time is a big performance hit for something that may
     200  		* never be used.
     201  		*/
     202  
     203  		if ((subs->flags & (MPFN|MPZN|NUMCUR)) == NUMCUR) {
     204  			tmp->numbr = subs->numbr;
     205  			tmp->flags |= NUMCUR;
     206  		}
     207  		subs = tmp;
     208  	} else {
     209  		/* string value already "frozen" */
     210  
     211  		subs = dupnode(subs);
     212  	}
     213  
     214  	getbucket(b);
     215  	b->ahnext = symbol->buckets[hash1];
     216  	symbol->buckets[hash1] = b;
     217  	b->ahname = subs;
     218  	b->ahname_str = subs->stptr;
     219  	b->ahname_len = subs->stlen;
     220  	b->ahvalue = new_array_element();
     221  	b->ahcode = code1;
     222  	return & (b->ahvalue);
     223  }
     224  
     225  /* str_exists --- test whether the array element symbol[subs] exists or not,
     226   * 		return pointer to value if it does.
     227   */
     228  
     229  static NODE **
     230  str_exists(NODE *symbol, NODE *subs)
     231  {
     232  	unsigned long hash1;
     233  	size_t code1;
     234  
     235  	if (symbol->table_size == 0)
     236  		return NULL;
     237  
     238  	subs = force_string(subs);
     239  	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size, & code1);
     240  	return str_find(symbol, subs, code1, hash1);
     241  }
     242  
     243  /* str_clear --- flush all the values in symbol[] */
     244  
     245  static NODE **
     246  str_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
     247  {
     248  	unsigned long i;
     249  	BUCKET *b, *next;
     250  	NODE *r;
     251  
     252  	for (i = 0; i < symbol->array_size; i++) {
     253  		for (b = symbol->buckets[i]; b != NULL; b = next) {
     254  			next = b->ahnext;
     255  			r = b->ahvalue;
     256  			if (r->type == Node_var_array) {
     257  				assoc_clear(r);	/* recursively clear all sub-arrays */
     258  				efree(r->vname);
     259  				freenode(r);
     260  			} else
     261  				unref(r);
     262  			unref(b->ahname);
     263  			freebucket(b);
     264  		}
     265  		symbol->buckets[i] = NULL;
     266  	}
     267  
     268  	if (symbol->buckets != NULL)
     269  		efree(symbol->buckets);
     270  	symbol->ainit(symbol, NULL);	/* re-initialize symbol */
     271  	return NULL;
     272  }
     273  
     274  
     275  /* str_remove --- If SUBS is already in the table, remove it. */
     276  
     277  static NODE **
     278  str_remove(NODE *symbol, NODE *subs)
     279  {
     280  	unsigned long hash1;
     281  	BUCKET *b, *prev;
     282  	NODE *s2;
     283  	size_t s1_len;
     284  
     285  	if (symbol->table_size == 0)
     286  		return NULL;
     287  
     288  	s2 = force_string(subs);
     289  	hash1 = hash(s2->stptr, s2->stlen, (unsigned long) symbol->array_size, NULL);
     290  
     291  	for (b = symbol->buckets[hash1], prev = NULL; b != NULL;
     292  				prev = b, b = b->ahnext) {
     293  
     294  		/* Array indexes are strings; compare as such, always! */
     295  		s1_len = b->ahname_len;
     296  
     297  		if (s1_len != s2->stlen)
     298  			continue;
     299  		if (s1_len == 0		/* "" is a valid index */
     300  			    || memcmp(b->ahname_str, s2->stptr, s1_len) == 0) {
     301  			/* item found */
     302  
     303  			unref(b->ahname);
     304  			if (prev != NULL)
     305  				prev->ahnext = b->ahnext;
     306  			else
     307  				symbol->buckets[hash1] = b->ahnext;
     308  
     309  			/* delete bucket */
     310  			freebucket(b);
     311  
     312  			/* one less element in array */
     313  			if (--symbol->table_size == 0) {
     314  				if (symbol->buckets != NULL)
     315  					efree(symbol->buckets);
     316  				symbol->ainit(symbol, NULL);	/* re-initialize symbol */
     317  			}
     318  
     319  			return & success_node;	/* return success */
     320  		}
     321  	}
     322  
     323  	return NULL;
     324  }
     325  
     326  
     327  /* str_copy --- duplicate input array "symbol" */
     328  
     329  static NODE **
     330  str_copy(NODE *symbol, NODE *newsymb)
     331  {
     332  	BUCKET **old, **new, **pnew;
     333  	BUCKET *chain, *newchain;
     334  	unsigned long cursize, i;
     335  
     336  	assert(symbol->table_size > 0);
     337  
     338  	/* find the current hash size */
     339  	cursize = symbol->array_size;
     340  
     341  	/* allocate new table */
     342  	ezalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "str_copy");
     343  
     344  	old = symbol->buckets;
     345  
     346  	for (i = 0; i < cursize; i++) {
     347  		for (chain = old[i], pnew = & new[i]; chain != NULL;
     348  				chain = chain->ahnext
     349  		) {
     350  			NODE *oldval, *newsubs;
     351  
     352  			getbucket(newchain);
     353  
     354  			/*
     355  			 * copy the corresponding name and
     356  			 * value from the original input list
     357  			 */
     358  
     359  			newsubs = newchain->ahname = dupnode(chain->ahname);
     360  			newchain->ahname_str = newsubs->stptr;
     361  			newchain->ahname_len = newsubs->stlen;
     362  
     363  			oldval = chain->ahvalue;
     364  			if (oldval->type == Node_val)
     365  				newchain->ahvalue = dupnode(oldval);
     366  			else {
     367  				NODE *r;
     368  
     369  				r = make_array();
     370  				r->vname = estrdup(oldval->vname, strlen(oldval->vname));
     371  				r->parent_array = newsymb;
     372  				newchain->ahvalue = assoc_copy(oldval, r);
     373  			}
     374  			newchain->ahcode = chain->ahcode;
     375  
     376  			*pnew = newchain;
     377  			newchain->ahnext = NULL;
     378  			pnew = & newchain->ahnext;
     379  		}
     380  	}
     381  
     382  	newsymb->table_size = symbol->table_size;
     383  	newsymb->buckets = new;
     384  	newsymb->array_size = cursize;
     385  	newsymb->flags = symbol->flags;
     386  	return NULL;
     387  }
     388  
     389  
     390  /* str_list --- return a list of array items */
     391  
     392  static NODE**
     393  str_list(NODE *symbol, NODE *t)
     394  {
     395  	NODE **list;
     396  	NODE *subs, *val;
     397  	BUCKET *b;
     398  	unsigned long num_elems, list_size, i, k = 0;
     399  	int elem_size = 1;
     400  	assoc_kind_t assoc_kind;
     401  
     402  	if (symbol->table_size == 0)
     403  		return NULL;
     404  
     405  	assoc_kind = (assoc_kind_t) t->flags;
     406  	if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
     407  		elem_size = 2;
     408  
     409  	/* allocate space for array */
     410  	num_elems = symbol->table_size;
     411  	if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
     412  		num_elems = 1;
     413  	list_size =  elem_size * num_elems;
     414  
     415  	emalloc(list, NODE **, list_size * sizeof(NODE *), "str_list");
     416  
     417  	/* populate it */
     418  
     419  	for (i = 0; i < symbol->array_size; i++) {
     420  		for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext) {
     421  			/* index */
     422  			subs = b->ahname;
     423  			if ((assoc_kind & AINUM) != 0)
     424  				(void) force_number(subs);
     425  			list[k++] = dupnode(subs);
     426  
     427  			/* value */
     428  			if ((assoc_kind & AVALUE) != 0) {
     429  				val = b->ahvalue;
     430  				if (val->type == Node_val) {
     431  					if ((assoc_kind & AVNUM) != 0)
     432  						(void) force_number(val);
     433  					else if ((assoc_kind & AVSTR) != 0)
     434  						val = force_string(val);
     435  				}
     436  				list[k++] = val;
     437  			}
     438  			if (k >= list_size)
     439  				return list;
     440  		}
     441  	}
     442  	return list;
     443  }
     444  
     445  
     446  /* str_kilobytes --- calculate memory consumption of the assoc array */
     447  
     448  AWKNUM
     449  str_kilobytes(NODE *symbol)
     450  {
     451  	unsigned long bucket_cnt;
     452  	AWKNUM kb;
     453  
     454  	bucket_cnt = symbol->table_size;
     455  
     456  	/* This does not include extra memory for indices with stfmt != STFMT_UNUSED */
     457  	kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) +
     458  		((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 1024.0;
     459  	return kb;
     460  }
     461  
     462  
     463  /* str_dump --- dump array info */
     464  
     465  static NODE **
     466  str_dump(NODE *symbol, NODE *ndump)
     467  {
     468  #define HCNT	31
     469  
     470  	int indent_level;
     471  	unsigned long i, bucket_cnt;
     472  	BUCKET *b;
     473  	static size_t hash_dist[HCNT + 1];
     474  
     475  	indent_level = ndump->alevel;
     476  
     477  	if ((symbol->flags & XARRAY) == 0)
     478  		fprintf(output_fp, "%s `%s'\n",
     479  				(symbol->parent_array == NULL) ? "array" : "sub-array",
     480  				array_vname(symbol));
     481  	indent_level++;
     482  	indent(indent_level);
     483  	fprintf(output_fp, "array_func: str_array_func\n");
     484  	if (symbol->flags != 0) {
     485  		indent(indent_level);
     486  		fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
     487  	}
     488  	indent(indent_level);
     489  	fprintf(output_fp, "STR_CHAIN_MAX: %lu\n", (unsigned long) STR_CHAIN_MAX);
     490  	indent(indent_level);
     491  	fprintf(output_fp, "array_size: %lu\n", (unsigned long) symbol->array_size);
     492  	indent(indent_level);
     493  	fprintf(output_fp, "table_size: %lu\n", (unsigned long) symbol->table_size);
     494  	indent(indent_level);
     495  	fprintf(output_fp, "Avg # of items per chain: %.2g\n",
     496  				((AWKNUM) symbol->table_size) / symbol->array_size);
     497  
     498  	indent(indent_level);
     499  	fprintf(output_fp, "memory: %.2g kB\n", str_kilobytes(symbol));
     500  
     501  	/* hash value distribution */
     502  
     503  	memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
     504  	for (i = 0; i < symbol->array_size; i++) {
     505  		bucket_cnt = 0;
     506  		for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext)
     507  			bucket_cnt++;
     508  		if (bucket_cnt >= HCNT)
     509  			bucket_cnt = HCNT;
     510  		hash_dist[bucket_cnt]++;
     511  	}
     512  
     513  	indent(indent_level);
     514  	fprintf(output_fp, "Hash distribution:\n");
     515  	indent_level++;
     516  	for (i = 0; i <= HCNT; i++) {
     517  		if (hash_dist[i] > 0) {
     518  			indent(indent_level);
     519  			if (i == HCNT)
     520  				fprintf(output_fp, "[>=%lu]:%lu\n",
     521  					(unsigned long) HCNT, (unsigned long) hash_dist[i]);
     522  			else
     523  				fprintf(output_fp, "[%lu]:%lu\n",
     524  					(unsigned long) i, (unsigned long) hash_dist[i]);
     525  		}
     526  	}
     527  	indent_level--;
     528  
     529  	/* dump elements */
     530  
     531  	if (ndump->adepth >= 0) {
     532  		const char *aname;
     533  
     534  		fprintf(output_fp, "\n");
     535  		aname = make_aname(symbol);
     536  		for (i = 0; i < symbol->array_size; i++) {
     537  			for (b = symbol->buckets[i]; b != NULL;	b = b->ahnext)
     538  				assoc_info(b->ahname, b->ahvalue, ndump, aname);
     539  		}
     540  	}
     541  
     542  	return NULL;
     543  
     544  #undef HCNT
     545  }
     546  
     547  
     548  /* awk_hash --- calculate the hash function of the string in subs */
     549  
     550  static unsigned long
     551  awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
     552  {
     553  	unsigned long h = 0;
     554  	unsigned long htmp;
     555  
     556  	/*
     557  	 * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
     558  	 * db package.
     559  	 *
     560  	 * This is INCREDIBLY ugly, but fast.  We break the string up into
     561  	 * 8 byte units.  On the first time through the loop we get the
     562  	 * "leftover bytes" (strlen % 8).  On every other iteration, we
     563  	 * perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
     564  	 * saves us 7 cmp & branch instructions.  If this routine is
     565  	 * heavily used enough, it's worth the ugly coding.
     566  	 */
     567  
     568  	/*
     569  	 * Even more speed:
     570  	 * #define HASHC   h = *s++ + 65599 * h
     571  	 * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
     572  	 *
     573  	 * 4/2011: Force the results to 32 bits, to get the same
     574  	 * result on both 32- and 64-bit systems. This may be a
     575  	 * bad idea.
     576  	 */
     577  #define HASHC   htmp = (h << 6);  \
     578  		h = *s++ + htmp + (htmp << 10) - h ; \
     579  		htmp &= 0xFFFFFFFF; \
     580  		h &= 0xFFFFFFFF
     581  
     582  	h = 0;
     583  
     584  	/* "Duff's Device" */
     585  	if (len > 0) {
     586  		size_t loop = (len + 8 - 1) >> 3;
     587  
     588  		switch (len & (8 - 1)) {
     589  		case 0:
     590  			do {	/* All fall throughs */
     591  				HASHC;
     592  		case 7:		HASHC;
     593  		case 6:		HASHC;
     594  		case 5:		HASHC;
     595  		case 4:		HASHC;
     596  		case 3:		HASHC;
     597  		case 2:		HASHC;
     598  		case 1:		HASHC;
     599  			} while (--loop);
     600  		}
     601  	}
     602  
     603  	if (code != NULL)
     604  		*code = h;
     605  
     606  	if (h >= hsize)
     607  		h %= hsize;
     608  	return h;
     609  }
     610  
     611  
     612  /* str_find --- locate symbol[subs] */
     613  
     614  static inline NODE **
     615  str_find(NODE *symbol, NODE *s1, size_t code1, unsigned long hash1)
     616  {
     617  	BUCKET *b;
     618  	size_t s2_len;
     619  
     620  	for (b = symbol->buckets[hash1]; b != NULL; b = b->ahnext) {
     621  		/*
     622  		 * This used to use cmp_nodes() here.  That's wrong.
     623  		 * Array indexes are strings; compare as such, always!
     624  	 	 */
     625  		s2_len = b->ahname_len;
     626  
     627  		if (code1 == b->ahcode
     628  			&& s1->stlen == s2_len
     629  			&& (s2_len == 0		/* "" is a valid index */
     630  				|| memcmp(s1->stptr, b->ahname_str, s2_len) == 0)
     631  		)
     632  			return & (b->ahvalue);
     633  	}
     634  	return NULL;
     635  }
     636  
     637  
     638  /* grow_table --- grow a hash table */
     639  
     640  static void
     641  grow_table(NODE *symbol)
     642  {
     643  	BUCKET **old, **new;
     644  	BUCKET *chain, *next;
     645  	int i, j;
     646  	unsigned long oldsize, newsize, k;
     647  	unsigned long hash1;
     648  
     649  	/*
     650  	 * This is an array of primes. We grow the table by an order of
     651  	 * magnitude each time (not just doubling) so that growing is a
     652  	 * rare operation. We expect, on average, that it won't happen
     653  	 * more than twice.  The final size is also chosen to be small
     654  	 * enough so that MS-DOG mallocs can handle it. When things are
     655  	 * very large (> 8K), we just double more or less, instead of
     656  	 * just jumping from 8K to 64K.
     657  	 */
     658  
     659  	static const unsigned long sizes[] = {
     660  		13, 127, 1021, 8191, 16381, 32749, 65497,
     661  		131101, 262147, 524309, 1048583, 2097169,
     662  		4194319, 8388617, 16777259, 33554467,
     663  		67108879, 134217757, 268435459, 536870923,
     664  		1073741827
     665  	};
     666  
     667  	/* find next biggest hash size */
     668  	newsize = oldsize = symbol->array_size;
     669  
     670  	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
     671  		if (oldsize < sizes[i]) {
     672  			newsize = sizes[i];
     673  			break;
     674  		}
     675  	}
     676  	if (newsize == oldsize) {	/* table already at max (!) */
     677  		symbol->flags |= ARRAYMAXED;
     678  		return;
     679  	}
     680  
     681  	/* allocate new table */
     682  	ezalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_table");
     683  
     684  	old = symbol->buckets;
     685  	symbol->buckets = new;
     686  	symbol->array_size = newsize;
     687  
     688  	/* brand new hash table, set things up and return */
     689  	if (old == NULL) {
     690  		symbol->table_size = 0;
     691  		return;
     692  	}
     693  
     694  	/* old hash table there, move stuff to new, free old */
     695  
     696  	/*
     697  	 * note that symbol->table_size does not change if an old array,
     698  	 * and is explicitly set to 0 if a new one.
     699  	 */
     700  
     701  	for (k = 0; k < oldsize; k++) {
     702  		for (chain = old[k]; chain != NULL; chain = next) {
     703  			next = chain->ahnext;
     704  			hash1 = chain->ahcode % newsize;
     705  
     706  			/* remove from old list, add to new */
     707  			chain->ahnext = new[hash1];
     708  			new[hash1] = chain;
     709  		}
     710  	}
     711  	efree(old);
     712  }
     713  
     714  
     715  
     716  /*
     717  From bonzini@gnu.org  Mon Oct 28 16:05:26 2002
     718  Date: Mon, 28 Oct 2002 13:33:03 +0100
     719  From: Paolo Bonzini <bonzini@gnu.org>
     720  To: arnold@skeeve.com
     721  Subject: Hash function
     722  Message-ID: <20021028123303.GA6832@biancaneve>
     723  
     724  Here is the hash function I'm using in GNU Smalltalk.  The scrambling is
     725  needed if you use powers of two as the table sizes.  If you use primes it
     726  is not needed.
     727  
     728  To use double-hashing with power-of-two size, you should use the
     729  _gst_hash_string(str, len) as the primary hash and
     730  scramble(_gst_hash_string (str, len)) | 1 as the secondary hash.
     731  
     732  Paolo
     733  
     734  */
     735  /*
     736   * ADR: Slightly modified to work w/in the context of gawk.
     737   */
     738  
     739  static unsigned long
     740  gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code)
     741  {
     742  	unsigned long hashVal = 1497032417;    /* arbitrary value */
     743  	unsigned long ret;
     744  
     745  	while (len--) {
     746  		hashVal += *str++;
     747  		hashVal += (hashVal << 10);
     748  		hashVal ^= (hashVal >> 6);
     749  	}
     750  
     751  	ret = scramble(hashVal);
     752  
     753  	if (code != NULL)
     754  		*code = ret;
     755  
     756  	if (ret >= hsize)
     757  		ret %= hsize;
     758  
     759  	return ret;
     760  }
     761  
     762  static unsigned long
     763  scramble(unsigned long x)
     764  {
     765  	if (sizeof(long) == 4) {
     766  		int y = ~x;
     767  
     768  		x += (y << 10) | (y >> 22);
     769  		x += (x << 6)  | (x >> 26);
     770  		x -= (x << 16) | (x >> 16);
     771  	} else {
     772  		x ^= (~x) >> 31;
     773  		x += (x << 21) | (x >> 11);
     774  		x += (x << 5) | (x >> 27);
     775  		x += (x << 27) | (x >> 5);
     776  		x += (x << 31);
     777  	}
     778  
     779  	return x;
     780  }
     781  
     782  /* fnv1a_hash_string --- fnv1a hash function */
     783  
     784  /*
     785   * FNV-1a hash function
     786   * http://www.isthe.com/chongo/tech/comp/fnv/index.html
     787   */
     788  
     789  static unsigned long
     790  fnv1a_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code)
     791  {
     792  	/* FNV-1a */
     793  	register unsigned ret = 2166136261U;
     794  
     795  	while (len > 0) {
     796  		ret ^= (unsigned char) (*str++);
     797  		ret *= 16777619U;
     798  		len-- ;
     799  	}
     800  
     801  	if (code != NULL)
     802  		*code = ret;
     803  
     804  	if (ret >= hsize)
     805  		ret %= hsize;
     806  
     807  	return ret;
     808  }
     809  
     810  /* env_remove --- for ENVIRON, remove value from real environment */
     811  
     812  static NODE **
     813  env_remove(NODE *symbol, NODE *subs)
     814  {
     815  	NODE **val = str_remove(symbol, subs);
     816  	char save;
     817  
     818  	if (val != NULL) {
     819  		str_terminate(subs, save);
     820  		(void) unsetenv(subs->stptr);
     821  		str_restore(subs, save);
     822  	}
     823  
     824  	return val;
     825  }
     826  
     827  /* env_clear --- clear out the environment when ENVIRON is deleted */
     828  
     829  static NODE **
     830  env_clear(NODE *symbol, NODE *subs)
     831  {
     832  	extern char **environ;
     833  	NODE **val = str_clear(symbol, subs);
     834  
     835  	environ = NULL;	/* ZAP! */
     836  
     837  	/* str_clear zaps the vtable, reset it */
     838  	symbol->array_funcs = & env_array_func;
     839  
     840  	return val;
     841  }
     842  
     843  /* env_store --- post assign function for ENVIRON, put new value into env */
     844  
     845  static NODE **
     846  env_store(NODE *symbol, NODE *subs)
     847  {
     848  	NODE **val = str_exists(symbol, subs);
     849  	const char *newval;
     850  
     851  	assert(val != NULL);
     852  
     853  	newval = (*val)->stptr;
     854  	if (newval == NULL)
     855  		newval = "";
     856  
     857  	(void) setenv(subs->stptr, newval, 1);
     858  
     859  	return val;
     860  }
     861  
     862  /* init_env_array --- set up the pointers for ENVIRON. A bit hacky. */
     863  
     864  void
     865  init_env_array(NODE *env_node)
     866  {
     867  	/* If POSIX simply don't reset the vtable and things work as before */
     868  	if (do_posix)
     869  		return;
     870  
     871  	env_node->array_funcs = & env_array_func;
     872  }