(root)/
gawk-5.2.2/
int_array.c
       1  /*
       2   * int_array.c - routines for arrays of integer indices.
       3   */
       4  
       5  /*
       6   * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2019, 2020, 2022,
       7   * the Free Software Foundation, Inc.
       8   *
       9   * This file is part of GAWK, the GNU implementation of the
      10   * AWK Programming Language.
      11   *
      12   * GAWK is free software; you can redistribute it and/or modify
      13   * it under the terms of the GNU General Public License as published by
      14   * the Free Software Foundation; either version 3 of the License, or
      15   * (at your option) any later version.
      16   *
      17   * GAWK is distributed in the hope that it will be useful,
      18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
      19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      20   * GNU General Public License for more details.
      21   *
      22   * You should have received a copy of the GNU General Public License
      23   * along with this program; if not, write to the Free Software
      24   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
      25   */
      26  
      27  #include "awk.h"
      28  
      29  extern FILE *output_fp;
      30  extern void indent(int indent_level);
      31  extern NODE **is_integer(NODE *symbol, NODE *subs);
      32  
      33  static size_t INT_CHAIN_MAX = 2;
      34  
      35  static NODE **int_array_init(NODE *symbol, NODE *subs);
      36  static NODE **int_lookup(NODE *symbol, NODE *subs);
      37  static NODE **int_exists(NODE *symbol, NODE *subs);
      38  static NODE **int_clear(NODE *symbol, NODE *subs);
      39  static NODE **int_remove(NODE *symbol, NODE *subs);
      40  static NODE **int_list(NODE *symbol, NODE *t);
      41  static NODE **int_copy(NODE *symbol, NODE *newsymb);
      42  static NODE **int_dump(NODE *symbol, NODE *ndump);
      43  
      44  static uint32_t int_hash(uint32_t k, uint32_t hsize);
      45  static inline NODE **int_find(NODE *symbol, long k, uint32_t hash1);
      46  static NODE **int_insert(NODE *symbol, long k, uint32_t hash1);
      47  static void grow_int_table(NODE *symbol);
      48  
      49  const array_funcs_t int_array_func = {
      50  	"int",
      51  	int_array_init,
      52  	is_integer,
      53  	int_lookup,
      54  	int_exists,
      55  	int_clear,
      56  	int_remove,
      57  	int_list,
      58  	int_copy,
      59  	int_dump,
      60  	(afunc_t) 0,
      61  };
      62  
      63  
      64  /* int_array_init --- array initialization routine */
      65  
      66  static NODE **
      67  int_array_init(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
      68  {
      69  	if (symbol == NULL) {	/* first time */
      70  		long newval;
      71  
      72  		/* check relevant environment variables */
      73  		if ((newval = getenv_long("INT_CHAIN_MAX")) > 0)
      74  			INT_CHAIN_MAX = newval;
      75  	} else
      76  		null_array(symbol);
      77  
      78  	return & success_node;
      79  }
      80  
      81  /*
      82   * standard_integer_string -- check whether the string matches what
      83   * sprintf("%ld", <value>) would produce. This is accomplished by accepting
      84   * only strings that look like /^0$/ or /^-?[1-9][0-9]*$/. This should be
      85   * faster than comparing vs. the results of actually calling sprintf.
      86   */
      87  
      88  static bool
      89  standard_integer_string(const char *s, size_t len)
      90  {
      91  	const char *end;
      92  
      93  	if (len == 0)
      94  		return false;
      95  	if (*s == '0' && len == 1)
      96  		return true;
      97  	end = s + len;
      98  	/* ignore leading minus sign */
      99  	if (*s == '-' && ++s == end)
     100  		return false;
     101  	/* check first char is [1-9] */
     102  	if (*s < '1' || *s > '9')
     103  		return false;
     104  	while (++s < end) {
     105  		if (*s < '0' || *s > '9')
     106  			return false;
     107  	}
     108  	return true;
     109  }
     110  
     111  /* is_integer --- check if subscript is an integer */
     112  
     113  NODE **
     114  is_integer(NODE *symbol, NODE *subs)
     115  {
     116  #ifndef CHECK_INTEGER_USING_FORCE_NUMBER
     117  	long l;
     118  #endif
     119  	AWKNUM d;
     120  
     121  	if ((subs->flags & NUMINT) != 0)
     122  		/* quick exit */
     123  		return & success_node;
     124  
     125  	if (subs == Nnull_string || do_mpfr)
     126  		return NULL;
     127  
     128  #ifdef CHECK_INTEGER_USING_FORCE_NUMBER
     129  	/*
     130  	 * This approach is much simpler, because we remove all of the strtol
     131  	 * logic below. But this may be slower in some usage cases.
     132  	 */
     133  	if ((subs->flags & NUMCUR) == 0) {
     134  		str2number(subs);
     135  
     136  		/* check again in case force_number set NUMINT */
     137  		if ((subs->flags & NUMINT) != 0)
     138  			return & success_node;
     139  	}
     140  #else /* CHECK_INTEGER_USING_FORCE_NUMBER */
     141  	if ((subs->flags & NUMCUR) != 0) {
     142  #endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
     143  		d = subs->numbr;
     144  		if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) {
     145  			/*
     146  			 * The numeric value is an integer, but we must
     147  			 * protect against strings that cannot be generated
     148  			 * from sprintf("%ld", <subscript>). This can happen
     149  			 * with strnum or string values. We could skip this
     150  			 * check for pure NUMBER values, but unfortunately the
     151  			 * code does not currently distinguish between NUMBER
     152  			 * and strnum values.
     153  			 */
     154  			if (   (subs->flags & STRCUR) == 0
     155  			    || standard_integer_string(subs->stptr, subs->stlen)) {
     156  				subs->flags |= NUMINT;
     157  				return & success_node;
     158  			}
     159  		}
     160  		return NULL;
     161  #ifndef CHECK_INTEGER_USING_FORCE_NUMBER
     162  	}
     163  
     164  	/* a[3]=1; print "3" in a    -- true
     165  	 * a[3]=1; print "+3" in a   -- false
     166  	 * a[3]=1; print "03" in a   -- false
     167  	 * a[-3]=1; print "-3" in a  -- true
     168  	 */
     169  
     170  	/* must be a STRING */
     171  	char *cp = subs->stptr, *cpend, *ptr;
     172  	char save;
     173  	size_t len = subs->stlen;
     174  
     175  	if (len == 0 || (! isdigit((unsigned char) *cp) && *cp != '-'))
     176  		return NULL;
     177  
     178  	if (len > 1 &&
     179  		((*cp == '0')		/* "00", "011" .. */
     180  			|| (*cp == '-' && *(cp + 1) == '0')	/* "-0", "-011" .. */
     181  		)
     182  	)
     183  		return NULL;
     184  	if (len == 1 && *cp != '-') {	/* single digit */
     185  		subs->numbr = (long) (*cp - '0');
     186  		if ((subs->flags & USER_INPUT) != 0) {
     187  			/* leave USER_INPUT set */
     188  			subs->flags &= ~STRING;
     189  			subs->flags |= NUMBER;
     190  		}
     191  		subs->flags |= (NUMCUR|NUMINT);
     192  		return & success_node;
     193  	}
     194  
     195  	cpend = cp + len;
     196  	save = *cpend;
     197  	*cpend = '\0';
     198  
     199  	errno = 0;
     200  	l = strtol(cp, & ptr, 10);
     201  	*cpend = save;
     202  	if (errno != 0 || ptr != cpend)
     203  		return NULL;
     204  
     205  	subs->numbr = l;
     206  	if ((subs->flags & USER_INPUT) != 0) {
     207  		/* leave USER_INPUT set */
     208  		subs->flags &= ~STRING;
     209  		subs->flags |= NUMBER;
     210  	}
     211  	subs->flags |= NUMCUR;
     212  	if (l <= INT32_MAX && l >= INT32_MIN) {
     213  		subs->flags |= NUMINT;
     214  		return & success_node;
     215  	}
     216  
     217  	return NULL;
     218  #endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
     219  }
     220  
     221  
     222  /*
     223   * int_lookup --- Find SYMBOL[SUBS] in the assoc array.  Install it with value ""
     224   * if it isn't there. Returns a pointer ala get_lhs to where its value is stored.
     225   */
     226  
     227  static NODE **
     228  int_lookup(NODE *symbol, NODE *subs)
     229  {
     230  	uint32_t hash1;
     231  	long k;
     232  	unsigned long size;
     233  	NODE **lhs;
     234  	NODE *xn;
     235  
     236  	/*
     237  	 * N.B: symbol->table_size is the total # of non-integers (symbol->xarray)
     238  	 *	and integer elements. Also, symbol->xarray must have at least one
     239  	 *	item in it, and cannot exist if there are no integer elements.
     240  	 * 	In that case, symbol->xarray is promoted to 'symbol' (See int_remove).
     241  	 */
     242  
     243  
     244  	if (! is_integer(symbol, subs)) {
     245  		xn = symbol->xarray;
     246  		if (xn == NULL) {
     247  			xn = symbol->xarray = make_array();
     248  			xn->vname = symbol->vname;	/* shallow copy */
     249  			xn->flags |= XARRAY;
     250  		} else if ((lhs = xn->aexists(xn, subs)) != NULL)
     251  			return lhs;
     252  		symbol->table_size++;
     253  		return assoc_lookup(xn, subs);
     254  	}
     255  
     256  	k = subs->numbr;
     257  	if (symbol->buckets == NULL)
     258  		grow_int_table(symbol);
     259  
     260   	hash1 = int_hash(k, symbol->array_size);
     261  	if ((lhs = int_find(symbol, k, hash1)) != NULL)
     262  		return lhs;
     263  
     264  	/* It's not there, install it */
     265  
     266  	symbol->table_size++;
     267  
     268  	/* first see if we would need to grow the array, before installing */
     269  	size = symbol->table_size;
     270  	if ((xn = symbol->xarray) != NULL)
     271  		size -= xn->table_size;
     272  
     273  	if ((symbol->flags & ARRAYMAXED) == 0
     274  		    && (size / symbol->array_size) > INT_CHAIN_MAX) {
     275  		grow_int_table(symbol);
     276  		/* have to recompute hash value for new size */
     277  		hash1 = int_hash(k, symbol->array_size);
     278  	}
     279  
     280  	return int_insert(symbol, k, hash1);
     281  }
     282  
     283  
     284  /*
     285   * int_exists --- test whether the array element symbol[subs] exists or not,
     286   *	return pointer to value if it does.
     287   */
     288  
     289  static NODE **
     290  int_exists(NODE *symbol, NODE *subs)
     291  {
     292  	long k;
     293  	uint32_t hash1;
     294  
     295  	if (! is_integer(symbol, subs)) {
     296  		NODE *xn = symbol->xarray;
     297  		if (xn == NULL)
     298  			return NULL;
     299  		return xn->aexists(xn, subs);
     300  	}
     301  	if (symbol->buckets == NULL)
     302  		return NULL;
     303  
     304  	k = subs->numbr;
     305  	hash1 = int_hash(k, symbol->array_size);
     306  	return int_find(symbol, k, hash1);
     307  }
     308  
     309  /* int_clear --- flush all the values in symbol[] */
     310  
     311  static NODE **
     312  int_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
     313  {
     314  	unsigned long i;
     315  	int j;
     316  	BUCKET *b, *next;
     317  	NODE *r;
     318  
     319  	if (symbol->xarray != NULL) {
     320  		NODE *xn = symbol->xarray;
     321  		assoc_clear(xn);
     322  		freenode(xn);
     323  		symbol->xarray = NULL;
     324  	}
     325  
     326  	for (i = 0; i < symbol->array_size; i++) {
     327  		for (b = symbol->buckets[i]; b != NULL;	b = next) {
     328  			next = b->ainext;
     329  			for (j = 0; j < b->aicount; j++) {
     330  				r = b->aivalue[j];
     331  				if (r->type == Node_var_array) {
     332  					assoc_clear(r);	/* recursively clear all sub-arrays */
     333  					efree(r->vname);
     334  					freenode(r);
     335  				} else
     336  					unref(r);
     337  			}
     338  			freebucket(b);
     339  		}
     340  		symbol->buckets[i] = NULL;
     341  	}
     342  	if (symbol->buckets != NULL)
     343  		efree(symbol->buckets);
     344  	symbol->ainit(symbol, NULL);	/* re-initialize symbol */
     345  	return NULL;
     346  }
     347  
     348  
     349  /* int_remove --- If SUBS is already in the table, remove it. */
     350  
     351  static NODE **
     352  int_remove(NODE *symbol, NODE *subs)
     353  {
     354  	uint32_t hash1;
     355  	BUCKET *b, *prev = NULL;
     356  	long k;
     357  	int i;
     358  	NODE *xn = symbol->xarray;
     359  
     360  	if (symbol->table_size == 0 || symbol->buckets == NULL)
     361  		return NULL;
     362  
     363  	if (! is_integer(symbol, subs)) {
     364  		if (xn == NULL || xn->aremove(xn, subs) == NULL)
     365  			return NULL;
     366  		if (xn->table_size == 0) {
     367  			freenode(xn);
     368  			symbol->xarray = NULL;
     369  		}
     370  		symbol->table_size--;
     371  		assert(symbol->table_size > 0);
     372  		return & success_node;
     373  	}
     374  
     375  	k = subs->numbr;
     376  	hash1 = int_hash(k, symbol->array_size);
     377  
     378  	for (b = symbol->buckets[hash1]; b != NULL; prev = b, b = b->ainext) {
     379  		for (i = 0; i < b->aicount; i++) {
     380  			if (k != b->ainum[i])
     381  				continue;
     382  
     383  			/* item found */
     384  			if (i == 0 && b->aicount == 2) {
     385  				/* removing the 1st item; move 2nd item from position 1 to 0 */
     386  
     387  				b->ainum[0] = b->ainum[1];
     388  				b->aivalue[0] = b->aivalue[1];
     389  			} /* else
     390  				removing the only item or the 2nd item */
     391  
     392  			goto removed;
     393  		}
     394  	}
     395  
     396  	if (b == NULL)	/* item not in array */
     397  		return NULL;
     398  
     399  removed:
     400  	b->aicount--;
     401  
     402  	if (b->aicount == 0) {
     403  		/* detach bucket */
     404  		if (prev != NULL)
     405  			prev->ainext = b->ainext;
     406  		else
     407  			symbol->buckets[hash1] = b->ainext;
     408  
     409  		/* delete bucket */
     410  		freebucket(b);
     411  	} else if (b != symbol->buckets[hash1]) {
     412  		BUCKET *head = symbol->buckets[hash1];
     413  
     414  		assert(b->aicount == 1);
     415  		/* move the last element from head to bucket to make it full. */
     416  		i = --head->aicount;	/* head has one less element */
     417  		b->ainum[1] = head->ainum[i];
     418  		b->aivalue[1] = head->aivalue[i];
     419  		b->aicount++;	/* bucket has one more element */
     420  		if (i == 0) {
     421  			/* head is now empty; delete head */
     422  			symbol->buckets[hash1] = head->ainext;
     423  			freebucket(head);
     424  		}
     425  	} /* else
     426  		do nothing */
     427  
     428  	symbol->table_size--;
     429  	if (xn == NULL && symbol->table_size == 0) {
     430  		efree(symbol->buckets);
     431  		symbol->ainit(symbol, NULL);	/* re-initialize array 'symbol' */
     432  	} else if (xn != NULL && symbol->table_size == xn->table_size) {
     433  		/* promote xn (str_array) to symbol */
     434  		xn->flags &= ~XARRAY;
     435  		xn->parent_array = symbol->parent_array;
     436  		efree(symbol->buckets);
     437  		*symbol = *xn;
     438  		freenode(xn);
     439  	}
     440  
     441  	return & success_node;	/* return success */
     442  }
     443  
     444  
     445  /* int_copy --- duplicate input array "symbol" */
     446  
     447  static NODE **
     448  int_copy(NODE *symbol, NODE *newsymb)
     449  {
     450  	BUCKET **old, **new, **pnew;
     451  	BUCKET *chain, *newchain;
     452  	int j;
     453  	unsigned long i, cursize;
     454  
     455  	assert(symbol->buckets != NULL);
     456  
     457  	/* find the current hash size */
     458  	cursize = symbol->array_size;
     459  
     460  	/* allocate new table */
     461  	ezalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "int_copy");
     462  
     463  	old = symbol->buckets;
     464  
     465  	for (i = 0; i < cursize; i++) {
     466  		for (chain = old[i], pnew = & new[i]; chain != NULL;
     467  				chain = chain->ainext
     468  		) {
     469  			getbucket(newchain);
     470  			newchain->aicount = chain->aicount;
     471  			newchain->ainext = NULL;
     472  			for (j = 0; j < chain->aicount; j++) {
     473  				NODE *oldval;
     474  
     475  				/*
     476  				 * copy the corresponding key and
     477  				 * value from the original input list
     478  				 */
     479  				newchain->ainum[j] = chain->ainum[j];
     480  
     481  				oldval = chain->aivalue[j];
     482  				if (oldval->type == Node_val)
     483  					newchain->aivalue[j] = dupnode(oldval);
     484  				else {
     485  					NODE *r;
     486  					r = make_array();
     487  					r->vname = estrdup(oldval->vname, strlen(oldval->vname));
     488  					r->parent_array = newsymb;
     489  					newchain->aivalue[j] = assoc_copy(oldval, r);
     490  				}
     491  			}
     492  
     493  			*pnew = newchain;
     494  			newchain->ainext = NULL;
     495  			pnew = & newchain->ainext;
     496  		}
     497  	}
     498  
     499  	if (symbol->xarray != NULL) {
     500  		NODE *xn, *n;
     501  		xn = symbol->xarray;
     502  		n = make_array();
     503  		n->vname = newsymb->vname;	/* shallow copy */
     504  		(void) xn->acopy(xn, n);
     505  		newsymb->xarray = n;
     506  	} else
     507  		newsymb->xarray = NULL;
     508  
     509  	newsymb->table_size = symbol->table_size;
     510  	newsymb->buckets = new;
     511  	newsymb->array_size = cursize;
     512  	newsymb->flags = symbol->flags;
     513  
     514  	return NULL;
     515  }
     516  
     517  
     518  /* int_list --- return a list of array items */
     519  
     520  static NODE**
     521  int_list(NODE *symbol, NODE *t)
     522  {
     523  	NODE **list = NULL;
     524  	unsigned long num_elems, list_size, i, k = 0;
     525  	BUCKET *b;
     526  	NODE *r, *subs, *xn;
     527  	int j, elem_size = 1;
     528  	long num;
     529  	static char buf[100];
     530  	assoc_kind_t assoc_kind;
     531  
     532  	if (symbol->table_size == 0)
     533  		return NULL;
     534  
     535  	assoc_kind = (assoc_kind_t) t->flags;
     536  	num_elems = symbol->table_size;
     537  	if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
     538  		num_elems = 1;
     539  
     540  	if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
     541  		elem_size = 2;
     542  	list_size = elem_size * num_elems;
     543  
     544  	if (symbol->xarray != NULL) {
     545  		xn = symbol->xarray;
     546  		list = xn->alist(xn, t);
     547  		assert(list != NULL);
     548  		if (num_elems == 1 || num_elems == xn->table_size)
     549  			return list;
     550  		erealloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
     551  		k = elem_size * xn->table_size;
     552  	} else
     553  		emalloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
     554  
     555  	/* populate it */
     556  
     557  	for (i = 0; i < symbol->array_size; i++) {
     558  		for (b = symbol->buckets[i]; b != NULL;	b = b->ainext) {
     559  			for (j = 0; j < b->aicount; j++) {
     560  				/* index */
     561  				num = b->ainum[j];
     562  				if ((assoc_kind & AISTR) != 0) {
     563  					sprintf(buf, "%ld", num);
     564  					subs = make_string(buf, strlen(buf));
     565  					subs->numbr = num;
     566  					subs->flags |= (NUMCUR|NUMINT);
     567  				} else {
     568  					subs = make_number((AWKNUM) num);
     569  					subs->flags |= (INTIND|NUMINT);
     570  				}
     571  				list[k++] = subs;
     572  
     573  				/* value */
     574  				if ((assoc_kind & AVALUE) != 0) {
     575  					r = b->aivalue[j];
     576  					if (r->type == Node_val) {
     577  						if ((assoc_kind & AVNUM) != 0)
     578  							(void) force_number(r);
     579  						else if ((assoc_kind & AVSTR) != 0)
     580  							r = force_string(r);
     581  					}
     582  					list[k++] = r;
     583  				}
     584  
     585  				if (k >= list_size)
     586  					return list;
     587  			}
     588  		}
     589  	}
     590  	return list;
     591  }
     592  
     593  
     594  /* int_kilobytes --- calculate memory consumption of the assoc array */
     595  
     596  AWKNUM
     597  int_kilobytes(NODE *symbol)
     598  {
     599  	unsigned long i, bucket_cnt = 0;
     600  	BUCKET *b;
     601  	AWKNUM kb;
     602  	extern AWKNUM str_kilobytes(NODE *symbol);
     603  
     604  	for (i = 0; i < symbol->array_size; i++) {
     605  		for (b = symbol->buckets[i]; b != NULL; b = b->ainext)
     606  			bucket_cnt++;
     607  	}
     608  	kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) +
     609  			((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 1024.0;
     610  
     611  	if (symbol->xarray != NULL)
     612  		kb += str_kilobytes(symbol->xarray);
     613  
     614  	return kb;
     615  }
     616  
     617  
     618  /* int_dump --- dump array info */
     619  
     620  static NODE **
     621  int_dump(NODE *symbol, NODE *ndump)
     622  {
     623  #define HCNT	31
     624  
     625  	int indent_level;
     626  	BUCKET *b;
     627  	NODE *xn = NULL;
     628  	unsigned long str_size = 0, int_size = 0;
     629  	unsigned long i;
     630  	size_t j, bucket_cnt;
     631  	static size_t hash_dist[HCNT + 1];
     632  
     633  	indent_level = ndump->alevel;
     634  
     635  	if (symbol->xarray != NULL) {
     636  		xn = symbol->xarray;
     637  		str_size = xn->table_size;
     638  	}
     639  	int_size = symbol->table_size - str_size;
     640  
     641  	if ((symbol->flags & XARRAY) == 0)
     642  		fprintf(output_fp, "%s `%s'\n",
     643  				(symbol->parent_array == NULL) ? "array" : "sub-array",
     644  				array_vname(symbol));
     645  
     646  	indent_level++;
     647  	indent(indent_level);
     648  	fprintf(output_fp, "array_func: int_array_func\n");
     649  	if (symbol->flags != 0) {
     650  		indent(indent_level);
     651  		fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
     652  	}
     653  	indent(indent_level);
     654  	fprintf(output_fp, "INT_CHAIN_MAX: %lu\n", (unsigned long) INT_CHAIN_MAX);
     655  	indent(indent_level);
     656  	fprintf(output_fp, "array_size: %lu (int)\n", (unsigned long) symbol->array_size);
     657  	indent(indent_level);
     658  	fprintf(output_fp, "table_size: %lu (total), %lu (int), %lu (str)\n",
     659  			(unsigned long) symbol->table_size, int_size, str_size);
     660  	indent(indent_level);
     661  	fprintf(output_fp, "Avg # of items per chain (int): %.2g\n",
     662  			((AWKNUM) int_size) / symbol->array_size);
     663  
     664  	indent(indent_level);
     665  	fprintf(output_fp, "memory: %.2g kB (total)\n", int_kilobytes(symbol));
     666  
     667  	/* hash value distribution */
     668  
     669  	memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
     670  	for (i = 0; i < symbol->array_size; i++) {
     671  		bucket_cnt = 0;
     672  		for (b = symbol->buckets[i]; b != NULL;	b = b->ainext)
     673  			bucket_cnt += b->aicount;
     674  		if (bucket_cnt >= HCNT)
     675  			bucket_cnt = HCNT;
     676  		hash_dist[bucket_cnt]++;
     677  	}
     678  
     679  	indent(indent_level);
     680  	fprintf(output_fp, "Hash distribution:\n");
     681  	indent_level++;
     682  	for (j = 0; j <= HCNT; j++) {
     683  		if (hash_dist[j] > 0) {
     684  			indent(indent_level);
     685  			if (j == HCNT)
     686  				fprintf(output_fp, "[>=%lu]:%lu\n",
     687  					(unsigned long) HCNT, (unsigned long) hash_dist[j]);
     688  			else
     689  				fprintf(output_fp, "[%lu]:%lu\n",
     690  					(unsigned long) j, (unsigned long) hash_dist[j]);
     691  		}
     692  	}
     693  	indent_level--;
     694  
     695  	/* dump elements */
     696  
     697  	if (ndump->adepth >= 0) {
     698  		NODE *subs;
     699  		const char *aname;
     700  
     701  		fprintf(output_fp, "\n");
     702  
     703  		aname = make_aname(symbol);
     704  		subs = make_number((AWKNUM) 0);
     705  		subs->flags |= (INTIND|NUMINT);
     706  
     707  		for (i = 0; i < symbol->array_size; i++) {
     708  			for (b = symbol->buckets[i]; b != NULL; b = b->ainext) {
     709  				for (j = 0; j < b->aicount; j++) {
     710  					subs->numbr = b->ainum[j];
     711  					assoc_info(subs, b->aivalue[j], ndump, aname);
     712  				}
     713  			}
     714  		}
     715  		unref(subs);
     716  	}
     717  
     718  	if (xn != NULL)	{
     719  		fprintf(output_fp, "\n");
     720  		xn->adump(xn, ndump);
     721  	}
     722  
     723  	return NULL;
     724  
     725  #undef HCNT
     726  }
     727  
     728  
     729  /* int_hash --- calculate the hash function of the integer subs */
     730  
     731  static uint32_t
     732  int_hash(uint32_t k, uint32_t hsize)
     733  {
     734  
     735  /*
     736   * Code snippet copied from:
     737   *	Hash functions (http://www.azillionmonkeys.com/qed/hash.html).
     738   *	Copyright 2004-2008 by Paul Hsieh. Licenced under LGPL 2.1.
     739   */
     740  
     741  	/* This is the final mixing function used by Paul Hsieh in SuperFastHash. */
     742  
     743  	k ^= k << 3;
     744  	k += k >> 5;
     745  	k ^= k << 4;
     746  	k += k >> 17;
     747  	k ^= k << 25;
     748  	k += k >> 6;
     749  
     750  	if (k >= hsize)
     751  		k %= hsize;
     752  	return k;
     753  }
     754  
     755  /* int_find --- locate symbol[subs] */
     756  
     757  static inline NODE **
     758  int_find(NODE *symbol, long k, uint32_t hash1)
     759  {
     760  	BUCKET *b;
     761  	int i;
     762  
     763  	assert(symbol->buckets != NULL);
     764  	for (b = symbol->buckets[hash1]; b != NULL; b = b->ainext) {
     765  		for (i = 0; i < b->aicount; i++) {
     766  			if (b->ainum[i] == k)
     767  				return (b->aivalue + i);
     768  		}
     769  	}
     770  	return NULL;
     771  }
     772  
     773  
     774  /* int_insert --- install subs in the assoc array */
     775  
     776  static NODE **
     777  int_insert(NODE *symbol, long k, uint32_t hash1)
     778  {
     779  	BUCKET *b;
     780  	int i;
     781  
     782  	b = symbol->buckets[hash1];
     783  
     784  	/* Only the first bucket in the chain can be partially full, but is never empty. */
     785  
     786  	if (b == NULL || (i = b->aicount) == 2) {
     787  		getbucket(b);
     788  		b->aicount = 0;
     789  		b->ainext = symbol->buckets[hash1];
     790  		symbol->buckets[hash1] = b;
     791  		i = 0;
     792  	}
     793  
     794  	b->ainum[i] = k;
     795  	b->aivalue[i] = new_array_element();
     796  	b->aicount++;
     797  	return & b->aivalue[i];
     798  }
     799  
     800  
     801  /* grow_int_table --- grow the hash table */
     802  
     803  static void
     804  grow_int_table(NODE *symbol)
     805  {
     806  	BUCKET **old, **new;
     807  	BUCKET *chain, *next;
     808  	int i, j;
     809  	unsigned long oldsize, newsize, k;
     810  
     811  	/*
     812  	 * This is an array of primes. We grow the table by an order of
     813  	 * magnitude each time (not just doubling) so that growing is a
     814  	 * rare operation. We expect, on average, that it won't happen
     815  	 * more than twice.  The final size is also chosen to be small
     816  	 * enough so that MS-DOG mallocs can handle it. When things are
     817  	 * very large (> 8K), we just double more or less, instead of
     818  	 * just jumping from 8K to 64K.
     819  	 */
     820  
     821  	static const unsigned long sizes[] = {
     822  		13, 127, 1021, 8191, 16381, 32749, 65497,
     823  		131101, 262147, 524309, 1048583, 2097169,
     824  		4194319, 8388617, 16777259, 33554467,
     825  		67108879, 134217757, 268435459, 536870923,
     826  		1073741827
     827  	};
     828  
     829  	/* find next biggest hash size */
     830  	newsize = oldsize = symbol->array_size;
     831  
     832  	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
     833  		if (oldsize < sizes[i]) {
     834  			newsize = sizes[i];
     835  			break;
     836  		}
     837  	}
     838  	if (newsize == oldsize) {	/* table already at max (!) */
     839  		symbol->flags |= ARRAYMAXED;
     840  		return;
     841  	}
     842  
     843  	/* allocate new table */
     844  	ezalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_int_table");
     845  
     846  	old = symbol->buckets;
     847  	symbol->buckets = new;
     848  	symbol->array_size = newsize;
     849  
     850  	/* brand new hash table */
     851  	if (old == NULL)
     852  		return;		/* DO NOT initialize symbol->table_size */
     853  
     854  	/* old hash table there, move stuff to new, free old */
     855  	/* note that symbol->table_size does not change if an old array. */
     856  
     857  	for (k = 0; k < oldsize; k++) {
     858  		long num;
     859  		for (chain = old[k]; chain != NULL; chain = next) {
     860  			for (i = 0; i < chain->aicount; i++) {
     861  				num = chain->ainum[i];
     862  				*int_insert(symbol, num, int_hash(num, newsize)) = chain->aivalue[i];
     863  			}
     864  			next = chain->ainext;
     865  			freebucket(chain);
     866  		}
     867  	}
     868  	efree(old);
     869  }