1 /*
2 * array.c - routines for awk arrays.
3 */
4
5 /*
6 * Copyright (C) 1986, 1988, 1989, 1991-2014, 2016, 2018-2023,
7 * the Free Software Foundation, Inc.
8 *
9 * This file is part of GAWK, the GNU implementation of the
10 * AWK Programming Language.
11 *
12 * GAWK is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 3 of the License, or
15 * (at your option) any later version.
16 *
17 * GAWK is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 */
26
27 #include "awk.h"
28
29 extern FILE *output_fp;
30 extern NODE **fmt_list; /* declared in eval.c */
31
32 NODE *success_node;
33
34 static size_t SUBSEPlen;
35 static char *SUBSEP;
36 static char indent_char[] = " ";
37
38 static int sort_up_value_type(const void *p1, const void *p2);
39 static NODE **null_lookup(NODE *symbol, NODE *subs);
40 static NODE **null_dump(NODE *symbol, NODE *subs);
41 static const array_funcs_t null_array_func = {
42 "null",
43 (afunc_t) 0,
44 (afunc_t) 0,
45 null_lookup,
46 null_afunc,
47 null_afunc,
48 null_afunc,
49 null_afunc,
50 null_afunc,
51 null_dump,
52 (afunc_t) 0,
53 };
54
55 #define MAX_ATYPE 10
56
57 static const array_funcs_t *array_types[MAX_ATYPE];
58 static int num_array_types = 0;
59
60 /* register_array_func --- add routines to handle arrays */
61
62 static int
63 register_array_func(const array_funcs_t *afunc)
64 {
65 if (afunc && num_array_types < MAX_ATYPE) {
66 if (afunc != & str_array_func && afunc->type_of == NULL)
67 return false;
68 array_types[num_array_types++] = afunc;
69 if (afunc->init) /* execute init routine if any */
70 (void) (*afunc->init)(NULL, NULL);
71 return true;
72 }
73 return false;
74 }
75
76
77 /* array_init --- register all builtin array types */
78
79 void
80 array_init()
81 {
82 (void) register_array_func(& str_array_func); /* the default */
83 if (! do_mpfr) {
84 (void) register_array_func(& int_array_func);
85 (void) register_array_func(& cint_array_func);
86 }
87 }
88
89
90 /* make_array --- create an array node */
91
92 NODE *
93 make_array()
94 {
95 NODE *array;
96 getnode(array);
97 memset(array, '\0', sizeof(NODE));
98 array->type = Node_var_array;
99 array->array_funcs = & null_array_func;
100 /* vname, flags, and parent_array not set here */
101
102 return array;
103 }
104
105
106 /* null_array --- force symbol to be an empty typeless array */
107
108 void
109 null_array(NODE *symbol)
110 {
111 symbol->type = Node_var_array;
112 symbol->array_funcs = & null_array_func;
113 symbol->buckets = NULL;
114 symbol->table_size = 0;
115 symbol->array_size = 0;
116 symbol->array_capacity = 0;
117 symbol->flags = 0;
118
119 assert(symbol->xarray == NULL);
120
121 /* vname, parent_array not (re)initialized */
122 }
123
124
125 /* null_lookup --- assign type to an empty array. */
126
127 static NODE **
128 null_lookup(NODE *symbol, NODE *subs)
129 {
130 int i;
131 const array_funcs_t *afunc = NULL;
132
133 assert(symbol->table_size == 0);
134
135 /*
136 * Check which array type wants to accept this sub; traverse
137 * array type list in reverse order.
138 */
139 for (i = num_array_types - 1; i >= 1; i--) {
140 afunc = array_types[i];
141 if (afunc->type_of(symbol, subs) != NULL)
142 break;
143 }
144 if (i == 0 || afunc == NULL)
145 afunc = array_types[0]; /* default is str_array_func */
146 symbol->array_funcs = afunc;
147
148 /* We have the right type of array; install the subscript */
149 return symbol->alookup(symbol, subs);
150 }
151
152
153 /* null_afunc --- default function for array interface */
154
155 NODE **
156 null_afunc(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
157 {
158 return NULL;
159 }
160
161 /* null_dump --- dump function for an empty array */
162
163 static NODE **
164 null_dump(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
165 {
166 fprintf(output_fp, "array `%s' is empty\n", array_vname(symbol));
167 return NULL;
168 }
169
170
171 /* assoc_copy --- duplicate input array "symbol" */
172
173 NODE *
174 assoc_copy(NODE *symbol, NODE *newsymb)
175 {
176 assert(newsymb->vname != NULL);
177
178 assoc_clear(newsymb);
179 (void) symbol->acopy(symbol, newsymb);
180 newsymb->array_funcs = symbol->array_funcs;
181 newsymb->flags = symbol->flags;
182 return newsymb;
183 }
184
185
186 /* assoc_dump --- dump array */
187
188 void
189 assoc_dump(NODE *symbol, NODE *ndump)
190 {
191 if (symbol->adump)
192 (void) symbol->adump(symbol, ndump);
193 }
194
195
196 /* make_aname --- construct a 'vname' for a (sub)array */
197
198 const char *
199 make_aname(const NODE *symbol)
200 {
201 static char *aname = NULL;
202 static size_t alen;
203 static size_t max_alen;
204 #define SLEN 256
205
206 if (symbol->parent_array != NULL) {
207 size_t slen;
208
209 (void) make_aname(symbol->parent_array);
210 slen = strlen(symbol->vname); /* subscript in parent array */
211 if (alen + slen + 4 > max_alen) { /* sizeof("[\"\"]") = 4 */
212 max_alen = alen + slen + 4 + SLEN;
213 erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
214 }
215 alen += sprintf(aname + alen, "[\"%s\"]", symbol->vname);
216 } else {
217 alen = strlen(symbol->vname);
218 if (aname == NULL) {
219 max_alen = alen + SLEN;
220 emalloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
221 } else if (alen > max_alen) {
222 max_alen = alen + SLEN;
223 erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
224 }
225 memcpy(aname, symbol->vname, alen + 1);
226 }
227 return aname;
228 }
229 #undef SLEN
230
231
232 /*
233 * array_vname --- print the name of the array
234 *
235 * Returns a pointer to a statically maintained dynamically allocated string.
236 * It's appropriate for printing the name once; if the caller wants
237 * to save it, they have to make a copy.
238 */
239
240 const char *
241 array_vname(const NODE *symbol)
242 {
243 static char *message = NULL;
244 static size_t msglen = 0;
245 char *s;
246 size_t len;
247 int n;
248 const NODE *save_symbol = symbol;
249 const char *from = _("from %s");
250 const char *aname;
251
252 if (symbol->type != Node_array_ref
253 || symbol->orig_array->type != Node_var_array
254 ) {
255 if (symbol->type != Node_var_array || symbol->parent_array == NULL)
256 return symbol->vname;
257 return make_aname(symbol);
258 }
259
260 /* First, we have to compute the length of the string: */
261
262 len = 2; /* " (" */
263 n = 0;
264 while (symbol->type == Node_array_ref) {
265 len += strlen(symbol->vname);
266 n++;
267 symbol = symbol->prev_array;
268 }
269
270 /* Get the (sub)array name */
271 if (symbol->parent_array == NULL)
272 aname = symbol->vname;
273 else
274 aname = make_aname(symbol);
275 len += strlen(aname);
276 /*
277 * Each node contributes by strlen(from) minus the length
278 * of "%s" in the translation (which is at least 2)
279 * plus 2 for ", " or ")\0"; this adds up to strlen(from).
280 */
281 len += n * strlen(from);
282
283 /* (Re)allocate memory: */
284 if (message == NULL) {
285 emalloc(message, char *, len, "array_vname");
286 msglen = len;
287 } else if (len > msglen) {
288 erealloc(message, char *, len, "array_vname");
289 msglen = len;
290 } /* else
291 current buffer can hold new name */
292
293 /* We're ready to print: */
294 symbol = save_symbol;
295 s = message;
296 /*
297 * Ancient systems have sprintf() returning char *, not int.
298 * If you have one of those, use sprintf(..); s += strlen(s) instead.
299 */
300
301 s += sprintf(s, "%s (", symbol->vname);
302 for (;;) {
303 symbol = symbol->prev_array;
304 if (symbol->type != Node_array_ref)
305 break;
306 s += sprintf(s, from, symbol->vname);
307 s += sprintf(s, ", ");
308 }
309 s += sprintf(s, from, aname);
310 strcpy(s, ")");
311
312 return message;
313 }
314
315
316 /*
317 * force_array --- proceed to the actual Node_var_array,
318 * change Node_var_new or Node_elem_new to an array.
319 * If canfatal and type isn't good, die fatally,
320 * otherwise return the final actual value.
321 */
322
323 NODE *
324 force_array(NODE *symbol, bool canfatal)
325 {
326 NODE *save_symbol = symbol;
327 bool isparam = false;
328
329 if (symbol->type == Node_param_list) {
330 save_symbol = symbol = GET_PARAM(symbol->param_cnt);
331 isparam = true;
332 if (symbol->type == Node_array_ref)
333 symbol = symbol->orig_array;
334 }
335
336 switch (symbol->type) {
337 case Node_elem_new:
338 efree(symbol->stptr);
339 symbol->stptr = NULL;
340 symbol->stlen = 0;
341 /* fall through */
342 case Node_var_new:
343 symbol->xarray = NULL; /* make sure union is as it should be */
344 null_array(symbol);
345 symbol->parent_array = NULL; /* main array has no parent */
346 /* fall through */
347 case Node_var_array:
348 break;
349
350 case Node_array_ref:
351 default:
352 /* notably Node_var but catches also e.g. a[1] = "x"; a[1][1] = "y" */
353 if (canfatal) {
354 if (symbol->type == Node_val)
355 fatal(_("attempt to use a scalar value as array"));
356 if (isparam)
357 fatal(_("attempt to use scalar parameter `%s' as an array"),
358 save_symbol->vname);
359 else
360 fatal(_("attempt to use scalar `%s' as an array"),
361 save_symbol->vname);
362 } else
363 break;
364 }
365
366 return symbol;
367 }
368
369
370 /* set_SUBSEP --- update SUBSEP related variables when SUBSEP assigned to */
371
372 void
373 set_SUBSEP()
374 {
375 SUBSEP_node->var_value = force_string(SUBSEP_node->var_value);
376 SUBSEP = SUBSEP_node->var_value->stptr;
377 SUBSEPlen = SUBSEP_node->var_value->stlen;
378 }
379
380
381 /* concat_exp --- concatenate expression list into a single string */
382
383 NODE *
384 concat_exp(int nargs, bool do_subsep)
385 {
386 /* do_subsep is false for Op_concat */
387 NODE *r;
388 char *str;
389 char *s;
390 size_t len;
391 size_t subseplen = 0;
392 int i;
393 extern NODE **args_array;
394
395 if (nargs == 1)
396 return POP_STRING();
397
398 if (do_subsep)
399 subseplen = SUBSEPlen;
400
401 len = 0;
402 for (i = 1; i <= nargs; i++) {
403 r = TOP();
404 if (r->type == Node_var_array) {
405 while (--i > 0)
406 DEREF(args_array[i]); /* avoid memory leak */
407 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(r));
408 }
409 r = POP_STRING();
410 args_array[i] = r;
411 len += r->stlen;
412 }
413 len += (nargs - 1) * subseplen;
414
415 emalloc(str, char *, len + 1, "concat_exp");
416
417 r = args_array[nargs];
418 memcpy(str, r->stptr, r->stlen);
419 s = str + r->stlen;
420 DEREF(r);
421 for (i = nargs - 1; i > 0; i--) {
422 if (subseplen == 1)
423 *s++ = *SUBSEP;
424 else if (subseplen > 0) {
425 memcpy(s, SUBSEP, subseplen);
426 s += subseplen;
427 }
428 r = args_array[i];
429 memcpy(s, r->stptr, r->stlen);
430 s += r->stlen;
431 DEREF(r);
432 }
433
434 return make_str_node(str, len, ALREADY_MALLOCED);
435 }
436
437
438 /*
439 * adjust_fcall_stack: remove subarray(s) of symbol[] from
440 * function call stack.
441 */
442
443 static void
444 adjust_fcall_stack(NODE *symbol, int nsubs)
445 {
446 NODE *func, *r, *n;
447 NODE **sp;
448 int pcount;
449
450 /*
451 * Solve the nasty problem of disappearing subarray arguments:
452 *
453 * function f(c, d) { delete c; .. use non-existent array d .. }
454 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
455 *
456 * The fix is to convert 'd' to a local empty array; This has
457 * to be done before clearing the parent array to avoid referring to
458 * already free-ed memory.
459 *
460 * Similar situations exist for builtins accepting more than
461 * one array argument: split, patsplit, asort and asorti. For example:
462 *
463 * BEGIN { a[0][0] = 1; split("abc", a, "", a[0]) }
464 *
465 * These cases do not involve the function call stack, and are
466 * handled individually in their respective routines.
467 */
468
469 func = frame_ptr->func_node;
470 if (func == NULL) /* in main */
471 return;
472 pcount = func->param_cnt;
473 sp = frame_ptr->stack;
474
475 for (; pcount > 0; pcount--) {
476 r = *sp++;
477 if (r->type != Node_array_ref
478 || r->orig_array->type != Node_var_array)
479 continue;
480 n = r->orig_array;
481
482 /* Case 1 */
483 if (n == symbol
484 && symbol->parent_array != NULL
485 && nsubs > 0
486 ) {
487 /*
488 * 'symbol' is a subarray, and 'r' is the same subarray:
489 *
490 * function f(c, d) { delete c[0]; .. }
491 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
492 *
493 * But excludes cases like (nsubs = 0):
494 *
495 * function f(c, d) { delete c; ..}
496 * BEGIN { a[0][0] = 1; f(a[0], a[0]); ...}
497 */
498
499 null_array(r);
500 r->parent_array = NULL;
501 continue;
502 }
503
504 /* Case 2 */
505 for (n = n->parent_array; n != NULL; n = n->parent_array) {
506 assert(n->type == Node_var_array);
507 if (n == symbol) {
508 /*
509 * 'r' is a subarray of 'symbol':
510 *
511 * function f(c, d) { delete c; .. use d as array .. }
512 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
513 * OR
514 * BEGIN { a[0][0][0][0] = 1; f(a[0], a[0][0][0]); .. }
515 *
516 */
517 null_array(r);
518 r->parent_array = NULL;
519 break;
520 }
521 }
522 }
523 }
524
525
526 /* do_delete --- perform `delete array[s]' */
527
528 /*
529 * `symbol' is array
530 * `nsubs' is no of subscripts
531 */
532
533 void
534 do_delete(NODE *symbol, int nsubs)
535 {
536 NODE *val, *subs;
537 int i;
538
539 assert(symbol->type == Node_var_array);
540 subs = val = NULL; /* silence the compiler */
541
542 /*
543 * The force_string() call is needed to make sure that
544 * the string subscript is reasonable. For example, with it:
545 *
546 * $ ./gawk --posix 'BEGIN { CONVFMT="%ld"; delete a[1.233]}'
547 * gawk: cmd. line:1: fatal: `%l' is not permitted in POSIX awk formats
548 *
549 * Without it, the code does not fail.
550 */
551
552 #define free_subs(n) do { \
553 NODE *s = PEEK(n - 1); \
554 if (s->type == Node_val) { \
555 (void) force_string(s); /* may have side effects. */ \
556 DEREF(s); \
557 } \
558 } while (--n > 0)
559
560 if (nsubs == 0) {
561 /* delete array */
562
563 adjust_fcall_stack(symbol, 0); /* fix function call stack; See above. */
564 assoc_clear(symbol);
565 return;
566 }
567
568 /* NB: subscripts are in reverse order on stack */
569
570 for (i = nsubs; i > 0; i--) {
571 subs = PEEK(i - 1);
572 if (subs->type != Node_val) {
573 free_subs(i);
574 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(subs));
575 }
576
577 val = in_array(symbol, subs);
578 if (val == NULL) {
579 if (do_lint) {
580 subs = force_string(subs);
581 lintwarn(_("delete: index `%.*s' not in array `%s'"),
582 (int) subs->stlen, subs->stptr, array_vname(symbol));
583 }
584 /* avoid memory leak, free all subs */
585 free_subs(i);
586 return;
587 }
588
589 if (i > 1) {
590 if (val->type != Node_var_array) {
591 /* e.g.: a[1] = 1; delete a[1][1] */
592
593 free_subs(i);
594 subs = force_string(subs);
595 fatal(_("attempt to use scalar `%s[\"%.*s\"]' as an array"),
596 array_vname(symbol),
597 (int) subs->stlen,
598 subs->stptr);
599 }
600 symbol = val;
601 DEREF(subs);
602 }
603 }
604
605 if (val->type == Node_var_array) {
606 adjust_fcall_stack(val, nsubs); /* fix function call stack; See above. */
607 assoc_clear(val);
608 /* cleared a sub-array, free Node_var_array */
609 efree(val->vname);
610 freenode(val);
611 } else
612 unref(val);
613
614 (void) assoc_remove(symbol, subs);
615 DEREF(subs);
616 if (assoc_empty(symbol))
617 /* last element was removed, so reset array type to null */
618 null_array(symbol);
619
620 #undef free_subs
621 }
622
623
624 /* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */
625
626 /*
627 * The primary hassle here is that `iggy' needs to have some arbitrary
628 * array index put in it before we can clear the array, we can't
629 * just replace the loop with `delete foo'.
630 */
631
632 void
633 do_delete_loop(NODE *symbol, NODE **lhs)
634 {
635 NODE **list;
636 NODE akind;
637
638 akind.flags = AINDEX|ADELETE; /* need a single index */
639 list = symbol->alist(symbol, & akind);
640
641 if (assoc_empty(symbol))
642 return;
643
644 unref(*lhs);
645 *lhs = list[0];
646 efree(list);
647
648 /* blast the array in one shot */
649 adjust_fcall_stack(symbol, 0);
650 assoc_clear(symbol);
651 }
652
653
654 /* value_info --- print scalar node info */
655
656 static void
657 value_info(NODE *n)
658 {
659
660 #define PREC_NUM -1
661
662 if (n == Nnull_string || n == Null_field) {
663 fprintf(output_fp, "<(null)>");
664 return;
665 }
666
667 if ((n->flags & (STRING|STRCUR)) != 0) {
668 fprintf(output_fp, "<");
669 fprintf(output_fp, "\"%.*s\"", (int) n->stlen, n->stptr);
670 if ((n->flags & (NUMBER|NUMCUR)) != 0) {
671 #ifdef HAVE_MPFR
672 if (is_mpg_float(n))
673 fprintf(output_fp, ":%s",
674 mpg_fmt("%.*R*g", PREC_NUM, ROUND_MODE, n->mpg_numbr));
675 else if (is_mpg_integer(n))
676 fprintf(output_fp, ":%s", mpg_fmt("%Zd", n->mpg_i));
677 else
678 #endif
679 fprintf(output_fp, ":%.*g", PREC_NUM, n->numbr);
680 }
681 fprintf(output_fp, ">");
682 } else {
683 #ifdef HAVE_MPFR
684 if (is_mpg_float(n))
685 fprintf(output_fp, "<%s>",
686 mpg_fmt("%.*R*g", PREC_NUM, ROUND_MODE, n->mpg_numbr));
687 else if (is_mpg_integer(n))
688 fprintf(output_fp, "<%s>", mpg_fmt("%Zd", n->mpg_i));
689 else
690 #endif
691 fprintf(output_fp, "<%.*g>", PREC_NUM, n->numbr);
692 }
693
694 fprintf(output_fp, ":%s", flags2str(n->flags));
695
696 if ((n->flags & MALLOC) != 0)
697 fprintf(output_fp, ":%ld", n->valref);
698 else
699 fprintf(output_fp, ":");
700
701 if ((n->flags & (STRING|STRCUR)) == STRCUR) {
702 size_t len;
703
704 fprintf(output_fp, "][");
705 fprintf(output_fp, "stfmt=%d, ", n->stfmt);
706 /*
707 * If not STFMT_UNUSED, could be CONVFMT or OFMT if last
708 * used in a print statement. If immutable, could be that it
709 * was originally set as a string, or it's a number that has
710 * an integer value.
711 */
712 len = fmt_list[n->stfmt]->stlen;
713 fmt_list[n->stfmt]->stptr[len] = '\0';
714 fprintf(output_fp, "FMT=\"%s\"",
715 n->stfmt == STFMT_UNUSED ? "<unused>"
716 : fmt_list[n->stfmt]->stptr);
717 #ifdef HAVE_MPFR
718 fprintf(output_fp, ", ROUNDMODE=\"%c\"", n->strndmode);
719 #endif
720 }
721
722 #undef PREC_NUM
723 }
724
725
726 void
727 indent(int indent_level)
728 {
729 int i;
730 for (i = 0; i < indent_level; i++)
731 fprintf(output_fp, "%s", indent_char);
732 }
733
734 /* assoc_info --- print index, value info */
735
736 void
737 assoc_info(NODE *subs, NODE *val, NODE *ndump, const char *aname)
738 {
739 int indent_level = ndump->alevel;
740
741 indent_level++;
742 indent(indent_level);
743 fprintf(output_fp, "I: [%s:", aname);
744 if ((subs->flags & (MPFN|MPZN|INTIND)) == INTIND)
745 fprintf(output_fp, "<%ld>", (long) subs->numbr);
746 else
747 value_info(subs);
748 fprintf(output_fp, "]\n");
749
750 indent(indent_level);
751 switch (val->type) {
752 case Node_val:
753 fprintf(output_fp, "V: [scalar: ");
754 value_info(val);
755 break;
756 case Node_var:
757 fprintf(output_fp, "V: [scalar: ");
758 value_info(val->var_value);
759 break;
760 case Node_var_array:
761 fprintf(output_fp, "V: [");
762 ndump->alevel++;
763 ndump->adepth--;
764 assoc_dump(val, ndump);
765 ndump->adepth++;
766 ndump->alevel--;
767 indent(indent_level);
768 break;
769 case Node_func:
770 fprintf(output_fp, "V: [user_defined_function");
771 break;
772 case Node_ext_func:
773 fprintf(output_fp, "V: [external_function");
774 break;
775 case Node_builtin_func:
776 fprintf(output_fp, "V: [builtin_function");
777 break;
778 default:
779 cant_happen("unexpected node type %s", nodetype2str(val->type));
780 break;
781 }
782 fprintf(output_fp, "]\n");
783 }
784
785
786 /* do_adump --- dump an array: interface to assoc_dump */
787
788 NODE *
789 do_adump(int nargs)
790 {
791 NODE *symbol, *tmp;
792 static NODE ndump;
793 long depth = 0;
794
795 /*
796 * depth < 0, no index and value info.
797 * = 0, main array index and value info; does not descend into sub-arrays.
798 * > 0, descends into 'depth' sub-arrays, and prints index and value info.
799 */
800
801 if (nargs == 2) {
802 tmp = POP_NUMBER();
803 depth = get_number_si(tmp);
804 DEREF(tmp);
805 }
806 symbol = POP_PARAM();
807 if (symbol->type != Node_var_array)
808 fatal(_("%s: first argument is not an array"), "adump");
809
810 ndump.type = Node_dump_array;
811 ndump.adepth = depth;
812 ndump.alevel = 0;
813 assoc_dump(symbol, & ndump);
814 return make_number((AWKNUM) 0);
815 }
816
817
818 /* asort_actual --- do the actual work to sort the input array */
819
820 static NODE *
821 asort_actual(int nargs, sort_context_t ctxt)
822 {
823 NODE *array, *dest = NULL, *result;
824 NODE *r, *subs, *s;
825 NODE **list = NULL, **ptr;
826 unsigned long num_elems, i;
827 const char *sort_str;
828 char save;
829 const char *name = (ctxt == ASORT ? "asort" : "asorti"); // D.R.Y.
830
831 if (nargs == 3) /* 3rd optional arg */
832 s = POP_STRING();
833 else
834 s = dupnode(Nnull_string); /* "" => default sorting */
835
836 s = force_string(s);
837 sort_str = s->stptr;
838 save = s->stptr[s->stlen];
839 s->stptr[s->stlen] = '\0';
840 if (s->stlen == 0) { /* default sorting */
841 if (ctxt == ASORT)
842 sort_str = "@val_type_asc";
843 else
844 sort_str = "@ind_str_asc";
845 }
846
847 if (nargs >= 2) { /* 2nd optional arg */
848 dest = POP_PARAM();
849 if (dest->type != Node_var_array) {
850 fatal(_("%s: second argument is not an array"), name);
851 }
852 check_symtab_functab(dest, name,
853 _("%s: cannot use %s as second argument"));
854 }
855
856 array = POP_PARAM();
857 if (array->type != Node_var_array) {
858 fatal(_("%s: first argument is not an array"), name);
859 }
860 else if (array == symbol_table && dest == NULL)
861 fatal(_("%s: first argument cannot be SYMTAB without a second argument"), name);
862 else if (array == func_table && dest == NULL)
863 fatal(_("%s: first argument cannot be FUNCTAB without a second argument"), name);
864
865 if (dest != NULL) {
866 static bool warned = false;
867
868 if (nargs == 2 && array == dest && ! warned) {
869 warned = true;
870 lintwarn(_("asort/asorti: using the same array as source and destination without "
871 "a third argument is silly."));
872 }
873 for (r = dest->parent_array; r != NULL; r = r->parent_array) {
874 if (r == array)
875 fatal(_("%s: cannot use a subarray of first argument for second argument"),
876 name);
877 }
878 for (r = array->parent_array; r != NULL; r = r->parent_array) {
879 if (r == dest)
880 fatal(_("%s: cannot use a subarray of second argument for first argument"),
881 name);
882 }
883 }
884
885 /* sorting happens inside assoc_list */
886 list = assoc_list(array, sort_str, ctxt);
887 s->stptr[s->stlen] = save;
888 DEREF(s);
889
890 num_elems = assoc_length(array);
891 if (num_elems == 0 || list == NULL) {
892 /* source array is empty */
893 if (dest != NULL && dest != array)
894 assoc_clear(dest);
895 if (list != NULL)
896 efree(list);
897 return make_number((AWKNUM) 0);
898 }
899
900 /*
901 * Must not assoc_clear() the source array before constructing
902 * the output array. assoc_list() does not duplicate array values
903 * which are needed for asort().
904 */
905
906 if (dest != NULL && dest != array) {
907 assoc_clear(dest);
908 result = dest;
909 } else {
910 /* use 'result' as a temporary destination array */
911 result = make_array();
912 result->vname = array->vname;
913 result->parent_array = array->parent_array;
914 }
915
916 if (ctxt == ASORTI) {
917 /* We want the indices of the source array. */
918
919 for (i = 1, ptr = list; i <= num_elems; i++, ptr += 2) {
920 subs = make_number(i);
921 assoc_set(result, subs, *ptr);
922 }
923 } else {
924 /* We want the values of the source array. */
925
926 for (i = 1, ptr = list; i <= num_elems; i++) {
927 subs = make_number(i);
928
929 /* free index node */
930 r = *ptr++;
931 unref(r);
932
933 /* value node */
934 r = *ptr++;
935
936 NODE *value = NULL;
937
938 switch (r->type) {
939 case Node_val:
940 value = dupnode(r);
941 break;
942 case Node_var:
943 /* SYMTAB ... */
944 value = dupnode(r->var_value);
945 break;
946 case Node_var_new:
947 case Node_elem_new:
948 value = dupnode(Nnull_string);
949 break;
950 case Node_builtin_func:
951 case Node_func:
952 case Node_ext_func:
953 /* FUNCTAB ... */
954 value = make_string(r->vname, strlen(r->vname));
955 break;
956 case Node_var_array:
957 {
958 NODE *arr;
959
960 arr = make_array();
961 subs = force_string(subs);
962 arr->vname = subs->stptr;
963 arr->vname[subs->stlen] = '\0';
964 subs->stptr = NULL;
965 subs->flags &= ~STRCUR;
966 arr->parent_array = array; /* actual parent, not the temporary one. */
967
968 value = assoc_copy(r, arr);
969 break;
970 }
971 default:
972 cant_happen("asort_actual: got unexpected type %s", nodetype2str(r->type));
973 }
974 assoc_set(result, subs, value);
975 }
976 }
977
978 efree(list);
979
980 if (result != dest) {
981 /* dest == NULL or dest == array */
982 assoc_clear(array);
983 *array = *result; /* copy result into array */
984 freenode(result);
985 } /* else
986 result == dest
987 dest != NULL and dest != array */
988
989 return make_number((AWKNUM) num_elems);
990 }
991
992 /* do_asort --- sort array by value */
993
994 NODE *
995 do_asort(int nargs)
996 {
997 return asort_actual(nargs, ASORT);
998 }
999
1000 /* do_asorti --- sort array by index */
1001
1002 NODE *
1003 do_asorti(int nargs)
1004 {
1005 return asort_actual(nargs, ASORTI);
1006 }
1007
1008
1009 /*
1010 * cmp_strings --- compare two strings; logic similar to cmp_nodes() in eval.c
1011 * except the extra case-sensitive comparison when the case-insensitive
1012 * result is a match.
1013 */
1014
1015 static int
1016 cmp_strings(const NODE *n1, const NODE *n2)
1017 {
1018 char *s1, *s2;
1019 size_t len1, len2;
1020 int ret;
1021
1022 s1 = n1->stptr;
1023 len1 = n1->stlen;
1024 s2 = n2->stptr;
1025 len2 = n2->stlen;
1026
1027 if (len1 == 0)
1028 return len2 == 0 ? 0 : -1;
1029 if (len2 == 0)
1030 return 1;
1031
1032 /* len1 > 0 && len2 > 0 */
1033 // make const to ensure it doesn't change if we
1034 // need to call memcmp(), below
1035 const size_t lmin = len1 < len2 ? len1 : len2;
1036
1037 if (IGNORECASE) {
1038 const unsigned char *cp1 = (const unsigned char *) s1;
1039 const unsigned char *cp2 = (const unsigned char *) s2;
1040
1041 if (gawk_mb_cur_max > 1) {
1042 ret = strncasecmpmbs((const unsigned char *) cp1,
1043 (const unsigned char *) cp2, lmin);
1044 } else {
1045 size_t count = lmin;
1046
1047 for (ret = 0; count-- > 0 && ret == 0; cp1++, cp2++)
1048 ret = casetable[*cp1] - casetable[*cp2];
1049 }
1050 if (ret != 0)
1051 return ret;
1052 /*
1053 * If case insensitive result is "they're the same",
1054 * use case sensitive comparison to force distinct order.
1055 */
1056 }
1057
1058 ret = memcmp(s1, s2, lmin);
1059 if (ret != 0 || len1 == len2)
1060 return ret;
1061 return (len1 < len2) ? -1 : 1;
1062 }
1063
1064 /* sort_up_index_string --- qsort comparison function; ascending index strings. */
1065
1066 static int
1067 sort_up_index_string(const void *p1, const void *p2)
1068 {
1069 const NODE *t1, *t2;
1070
1071 /* Array indices are strings */
1072 t1 = *((const NODE *const *) p1);
1073 t2 = *((const NODE *const *) p2);
1074 return cmp_strings(t1, t2);
1075 }
1076
1077
1078 /* sort_down_index_str --- qsort comparison function; descending index strings. */
1079
1080 static int
1081 sort_down_index_string(const void *p1, const void *p2)
1082 {
1083 /*
1084 * Negation versus transposed arguments: when all keys are
1085 * distinct, as with array indices here, either method will
1086 * transform an ascending sort into a descending one. But if
1087 * there are equal keys--such as when IGNORECASE is honored--
1088 * that get disambiguated into a determisitc order, negation
1089 * will reverse those but transposed arguments would retain
1090 * their relative order within the rest of the reversed sort.
1091 */
1092 return -sort_up_index_string(p1, p2);
1093 }
1094
1095
1096 /* sort_up_index_number --- qsort comparison function; ascending index numbers. */
1097
1098 static int
1099 sort_up_index_number(const void *p1, const void *p2)
1100 {
1101 const NODE *t1, *t2;
1102 int ret;
1103
1104 t1 = *((const NODE *const *) p1);
1105 t2 = *((const NODE *const *) p2);
1106
1107 ret = cmp_numbers(t1, t2);
1108 if (ret != 0)
1109 return ret;
1110
1111 /* break a tie with the index string itself */
1112 t1 = force_string((NODE *) t1);
1113 t2 = force_string((NODE *) t2);
1114 return cmp_strings(t1, t2);
1115 }
1116
1117 /* sort_down_index_number --- qsort comparison function; descending index numbers */
1118
1119 static int
1120 sort_down_index_number(const void *p1, const void *p2)
1121 {
1122 return -sort_up_index_number(p1, p2);
1123 }
1124
1125
1126 /* sort_up_value_string --- qsort comparison function; ascending value string */
1127
1128 static int
1129 sort_up_value_string(const void *p1, const void *p2)
1130 {
1131 const NODE *t1, *t2;
1132 int ret;
1133
1134 t1 = *((const NODE *const *) p1 + 1);
1135 t2 = *((const NODE *const *) p2 + 1);
1136
1137 if (t1->type != Node_val || t2->type != Node_val)
1138 return sort_up_value_type(p1, p2);
1139
1140 /* t1 and t2 both have string values */
1141 ret = cmp_strings(t1, t2);
1142 if (ret != 0)
1143 return ret;
1144 return sort_up_index_string(p1, p2);
1145 }
1146
1147
1148 /* sort_down_value_string --- qsort comparison function; descending value string */
1149
1150 static int
1151 sort_down_value_string(const void *p1, const void *p2)
1152 {
1153 return -sort_up_value_string(p1, p2);
1154 }
1155
1156
1157 /* sort_up_value_number --- qsort comparison function; ascending value number */
1158
1159 static int
1160 sort_up_value_number(const void *p1, const void *p2)
1161 {
1162 NODE *t1, *t2;
1163 int ret;
1164
1165 t1 = *((NODE *const *) p1 + 1);
1166 t2 = *((NODE *const *) p2 + 1);
1167
1168 if (t1->type != Node_val || t2->type != Node_val)
1169 return sort_up_value_type(p1, p2);
1170
1171 ret = cmp_numbers(t1, t2);
1172 if (ret != 0)
1173 return ret;
1174
1175 /*
1176 * Use string value to guarantee same sort order on all
1177 * versions of qsort().
1178 */
1179 ret = cmp_strings(force_string(t1), force_string(t2));
1180 if (ret != 0)
1181 return ret;
1182 return sort_up_index_string(p1, p2);
1183 }
1184
1185
1186 /* sort_down_value_number --- qsort comparison function; descending value number */
1187
1188 static int
1189 sort_down_value_number(const void *p1, const void *p2)
1190 {
1191 return -sort_up_value_number(p1, p2);
1192 }
1193
1194
1195 /* do_sort_up_value_type --- backend comparison on ascending value type */
1196
1197 static int
1198 do_sort_up_value_type(const void *p1, const void *p2)
1199 {
1200 NODE *n1, *n2;
1201
1202 static const NODETYPE element_types[] = {
1203 Node_builtin_func,
1204 Node_func,
1205 Node_ext_func,
1206 Node_var_new,
1207 Node_elem_new,
1208 Node_var,
1209 Node_var_array,
1210 Node_val,
1211 Node_illegal
1212 };
1213
1214 /* we want to compare the element values */
1215 n1 = *((NODE *const *) p1 + 1);
1216 n2 = *((NODE *const *) p2 + 1);
1217
1218 if (n1->type == Node_var && n2->type == Node_var) {
1219 /* compare the values of the variables */
1220 n1 = n1->var_value;
1221 n2 = n2->var_value;
1222 }
1223
1224 /* 1. Arrays vs. everything else, everything else is less than array */
1225 if (n1->type == Node_var_array) {
1226 /* return 0 if n2 is a sub-array too, else return 1 */
1227 return (n2->type != Node_var_array);
1228 }
1229 if (n2->type == Node_var_array) {
1230 return -1; /* n1 (non-array) < n2 (sub-array) */
1231 }
1232
1233 /* 2. Non scalars */
1234 if (n1->type != Node_val || n2->type != Node_val) {
1235 int n1_pos, n2_pos, i;
1236
1237 n1_pos = n2_pos = -1;
1238 for (i = 0; element_types[i] != Node_illegal; i++) {
1239 if (n1->type == element_types[i])
1240 n1_pos = i;
1241
1242 if (n2->type == element_types[i])
1243 n2_pos = i;
1244 }
1245
1246 assert(n1_pos != -1 && n2_pos != -1);
1247 return (n1_pos - n2_pos);
1248 }
1249
1250 /* two scalars */
1251 (void) fixtype(n1);
1252 (void) fixtype(n2);
1253
1254 /* 3a. Numbers first */
1255 if ((n1->flags & NUMBER) != 0 && (n2->flags & NUMBER) != 0) {
1256 return cmp_numbers(n1, n2);
1257 }
1258
1259 /* 3b. All numbers are less than all strings. This is aribitrary. */
1260 if ((n1->flags & NUMBER) != 0 && (n2->flags & STRING) != 0) {
1261 return -1;
1262 } else if ((n1->flags & STRING) != 0 && (n2->flags & NUMBER) != 0) {
1263 return 1;
1264 }
1265
1266 /* 4. Two strings */
1267 return cmp_strings(n1, n2);
1268 }
1269
1270 /* sort_up_value_type --- qsort comparison function; ascending value type */
1271
1272 static int
1273 sort_up_value_type(const void *p1, const void *p2)
1274 {
1275 int rc = do_sort_up_value_type(p1, p2);
1276
1277 /* use a tie-breaker if do_sort_up_value_type has no opinion */
1278 return rc ? rc : sort_up_index_string(p1, p2);
1279 }
1280
1281 /* sort_down_value_type --- qsort comparison function; descending value type */
1282
1283 static int
1284 sort_down_value_type(const void *p1, const void *p2)
1285 {
1286 return -sort_up_value_type(p1, p2);
1287 }
1288
1289 /* sort_user_func --- user defined qsort comparison function */
1290
1291 static int
1292 sort_user_func(const void *p1, const void *p2)
1293 {
1294 NODE *idx1, *idx2, *val1, *val2, *r;
1295 int ret;
1296 INSTRUCTION *code;
1297
1298 idx1 = *((NODE *const *) p1);
1299 idx2 = *((NODE *const *) p2);
1300 val1 = *((NODE *const *) p1 + 1);
1301 val2 = *((NODE *const *) p2 + 1);
1302
1303 code = TOP()->code_ptr; /* comparison function call instructions */
1304
1305 /* setup 4 arguments to comp_func() */
1306 UPREF(idx1);
1307 PUSH(idx1);
1308 if (val1->type == Node_val)
1309 UPREF(val1);
1310 PUSH(val1);
1311
1312 UPREF(idx2);
1313 PUSH(idx2);
1314 if (val2->type == Node_val)
1315 UPREF(val2);
1316 PUSH(val2);
1317
1318 /* execute the comparison function */
1319 (void) (*interpret)(code);
1320
1321 /* return value of the comparison function */
1322 r = POP_NUMBER();
1323 #ifdef HAVE_MPFR
1324 /*
1325 * mpfr_sgn(mpz_sgn): Returns a positive value if op > 0,
1326 * zero if op = 0, and a negative value if op < 0.
1327 */
1328 if (is_mpg_float(r))
1329 ret = mpfr_sgn(r->mpg_numbr);
1330 else if (is_mpg_integer(r))
1331 ret = mpz_sgn(r->mpg_i);
1332 else
1333 #endif
1334 ret = (r->numbr < 0.0) ? -1 : (r->numbr > 0.0);
1335 DEREF(r);
1336 return ret;
1337 }
1338
1339
1340 /* assoc_list -- construct, and optionally sort, a list of array elements */
1341
1342 NODE **
1343 assoc_list(NODE *symbol, const char *sort_str, sort_context_t sort_ctxt)
1344 {
1345 typedef int (*qsort_compfunc)(const void *, const void *);
1346
1347 static const struct qsort_funcs {
1348 const char *name;
1349 qsort_compfunc comp_func;
1350 assoc_kind_t kind;
1351 } sort_funcs[] = {
1352 { "@ind_str_asc", sort_up_index_string, AINDEX|AISTR|AASC },
1353 { "@ind_num_asc", sort_up_index_number, AINDEX|AINUM|AASC },
1354 { "@val_str_asc", sort_up_value_string, AVALUE|AVSTR|AASC },
1355 { "@val_num_asc", sort_up_value_number, AVALUE|AVNUM|AASC },
1356 { "@ind_str_desc", sort_down_index_string, AINDEX|AISTR|ADESC },
1357 { "@ind_num_desc", sort_down_index_number, AINDEX|AINUM|ADESC },
1358 { "@val_str_desc", sort_down_value_string, AVALUE|AVSTR|ADESC },
1359 { "@val_num_desc", sort_down_value_number, AVALUE|AVNUM|ADESC },
1360 { "@val_type_asc", sort_up_value_type, AVALUE|AASC },
1361 { "@val_type_desc", sort_down_value_type, AVALUE|ADESC },
1362 { "@unsorted", 0, AINDEX },
1363 };
1364
1365 /*
1366 * N.B.: AASC and ADESC are hints to the specific array types.
1367 * See cint_list() in cint_array.c.
1368 */
1369
1370 NODE **list;
1371 NODE akind;
1372 unsigned long num_elems, j;
1373 int elem_size, qi;
1374 qsort_compfunc cmp_func = 0;
1375 INSTRUCTION *code = NULL;
1376 extern int currule;
1377 int save_rule = 0;
1378 assoc_kind_t assoc_kind = ANONE;
1379
1380 elem_size = 1;
1381
1382 for (qi = 0, j = sizeof(sort_funcs)/sizeof(sort_funcs[0]); qi < j; qi++) {
1383 if (strcmp(sort_funcs[qi].name, sort_str) == 0)
1384 break;
1385 }
1386
1387 if (qi < j) {
1388 cmp_func = sort_funcs[qi].comp_func;
1389 assoc_kind = sort_funcs[qi].kind;
1390
1391 if (symbol->array_funcs != & cint_array_func)
1392 assoc_kind &= ~(AASC|ADESC);
1393
1394 if (sort_ctxt != SORTED_IN || (assoc_kind & AVALUE) != 0) {
1395 /* need index and value pair in the list */
1396
1397 assoc_kind |= (AINDEX|AVALUE);
1398 elem_size = 2;
1399 }
1400
1401 } else { /* unrecognized */
1402 NODE *f;
1403 const char *sp;
1404
1405 for (sp = sort_str; *sp != '\0' && ! isspace((unsigned char) *sp); sp++)
1406 continue;
1407
1408 /* empty string or string with space(s) not valid as function name */
1409 if (sp == sort_str || *sp != '\0')
1410 fatal(_("`%s' is invalid as a function name"), sort_str);
1411
1412 f = lookup(sort_str);
1413 if (f == NULL || f->type != Node_func)
1414 fatal(_("sort comparison function `%s' is not defined"), sort_str);
1415
1416 cmp_func = sort_user_func;
1417
1418 /* need index and value pair in the list */
1419 assoc_kind |= (AVALUE|AINDEX);
1420 elem_size = 2;
1421
1422 /* make function call instructions */
1423 code = bcalloc(Op_func_call, 2, 0);
1424 code->func_body = f;
1425 code->func_name = NULL; /* not needed, func_body already assigned */
1426 (code + 1)->expr_count = 4; /* function takes 4 arguments */
1427 code->nexti = bcalloc(Op_stop, 1, 0);
1428
1429 /*
1430 * make non-redirected getline, exit, `next' and `nextfile' fatal in
1431 * callback function by setting currule in interpret()
1432 * to undefined (0).
1433 */
1434
1435 save_rule = currule; /* save current rule */
1436 currule = 0;
1437
1438 PUSH_CODE(code);
1439 }
1440
1441 akind.flags = (unsigned int) assoc_kind; /* kludge */
1442 list = symbol->alist(symbol, & akind);
1443 assoc_kind = (assoc_kind_t) akind.flags; /* symbol->alist can modify it */
1444
1445 /* check for empty list or unsorted, or list already sorted */
1446 if (list != NULL && cmp_func != NULL && (assoc_kind & (AASC|ADESC)) == 0) {
1447 num_elems = assoc_length(symbol);
1448
1449 qsort(list, num_elems, elem_size * sizeof(NODE *), cmp_func); /* shazzam! */
1450
1451 if (sort_ctxt == SORTED_IN && (assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) {
1452 /* relocate all index nodes to the first half of the list. */
1453 for (j = 1; j < num_elems; j++)
1454 list[j] = list[2 * j];
1455
1456 /* give back extra memory */
1457
1458 erealloc(list, NODE **, num_elems * sizeof(NODE *), "assoc_list");
1459 }
1460 }
1461
1462 if (cmp_func == sort_user_func) {
1463 code = POP_CODE();
1464 currule = save_rule; /* restore current rule */
1465 bcfree(code->nexti); /* Op_stop */
1466 bcfree(code); /* Op_func_call */
1467 }
1468
1469 return list;
1470 }
1471
1472 /* new_array_element --- return a new empty element node */
1473
1474 NODE *
1475 new_array_element(void)
1476 {
1477 NODE *n = make_number(0.0);
1478 char *sp;
1479
1480 emalloc(sp, char *, 2, "new_array_element");
1481 sp[0] = sp[1] = '\0';
1482
1483 n->stptr = sp;
1484 n->stlen = 0;
1485 n->stfmt = STFMT_UNUSED;
1486
1487 n->flags |= (MALLOC|STRING|STRCUR);
1488
1489 n->type = Node_elem_new;
1490 n->valref = 1;
1491
1492 return n;
1493 }