1 /* Support routines for GNU DIFF.
2
3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2013,
4 2015-2023 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 #include "diff.h"
22
23 #include <argmatch.h>
24 #include <die.h>
25 #include <dirname.h>
26 #include <error.h>
27 #include <flexmember.h>
28 #include <system-quote.h>
29 #include <xalloc.h>
30
31 #include <stdarg.h>
32 #include <signal.h>
33
34 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
35 present. */
36 #ifndef SA_NOCLDSTOP
37 # define SA_NOCLDSTOP 0
38 # define sigprocmask(How, Set, Oset) 0
39 # if ! HAVE_SIGINTERRUPT
40 # define siginterrupt(sig, flag) 0
41 # endif
42 #endif
43
44 #ifndef SA_RESTART
45 # define SA_RESTART 0
46 #endif
47
48 char const pr_program[] = PR_PROGRAM;
49
50 /* Queue up one-line messages to be printed at the end,
51 when -l is specified. Each message is recorded with a 'struct msg'. */
52
53 struct msg
54 {
55 struct msg *next;
56
57 /* Msgid of printf-style format. */
58 char const *msgid;
59
60 /* Number of bytes in ARGS. */
61 size_t argbytes;
62
63 /* Arg strings, each '\0' terminated, concatenated. */
64 char args[FLEXIBLE_ARRAY_MEMBER];
65 };
66
67 /* Head of the chain of queues messages. */
68
69 static struct msg *msg_chain;
70
71 /* Tail of the chain of queues messages. */
72
73 static struct msg **msg_chain_end = &msg_chain;
74
75 /* Use when a system call returns non-zero status.
76 NAME should normally be the file name. */
77
78 void
79 perror_with_name (char const *name)
80 {
81 error (0, errno, "%s", name);
82 }
83
84 /* Use when a system call returns non-zero status and that is fatal. */
85
86 void
87 pfatal_with_name (char const *name)
88 {
89 int e = errno;
90 print_message_queue ();
91 die (EXIT_TROUBLE, e, "%s", name);
92 }
93
94 /* Print an error message containing MSGID, then exit. */
95
96 void
97 fatal (char const *msgid)
98 {
99 print_message_queue ();
100 die (EXIT_TROUBLE, 0, "%s", _(msgid));
101 }
102
103 /* Like printf, except if -l in effect then save the message and print later.
104 Also, all arguments must be char * or char const *.
105 This is used for things like "Only in ...". */
106
107 void
108 message (char const *format_msgid, ...)
109 {
110 va_list ap;
111 va_start (ap, format_msgid);
112
113 if (paginate)
114 {
115 size_t argbytes = 0;
116
117 for (char const *m = format_msgid; *m; m++)
118 if (*m == '%')
119 {
120 if (m[1] == '%')
121 m++;
122 else
123 argbytes += strlen (va_arg (ap, char const *)) + 1;
124 }
125 va_end (ap);
126
127 struct msg *new = xmalloc (FLEXSIZEOF (struct msg, args, argbytes));
128 new->msgid = format_msgid;
129 new->argbytes = argbytes;
130
131 va_start (ap, format_msgid);
132 char *p = new->args;
133 for (char const *m = format_msgid; *m; m++)
134 if (*m == '%')
135 {
136 if (m[1] == '%')
137 m++;
138 else
139 p = stpcpy (p, va_arg (ap, char const *)) + 1;
140 }
141
142 *msg_chain_end = new;
143 new->next = 0;
144 msg_chain_end = &new->next;
145 }
146 else
147 {
148 if (sdiff_merge_assist)
149 putchar (' ');
150 vprintf (_(format_msgid), ap);
151 }
152
153 va_end (ap);
154 }
155
156 /* Output all the messages that were saved up by calls to 'message'. */
157
158 void
159 print_message_queue (void)
160 {
161 for (struct msg *m = msg_chain; m; )
162 {
163 /* Change this if diff ever has messages with more than 4 args. */
164 char const *p = m->args;
165 char const *plim = p + m->argbytes;
166 /* Unroll the loop to work around GCC 12 bug with
167 -Wanalyzer-use-of-uninitialized-value. */
168 char const *arg0 = p; p += p < plim ? strlen (p) + 1 : 0;
169 char const *arg1 = p; p += p < plim ? strlen (p) + 1 : 0;
170 char const *arg2 = p; p += p < plim ? strlen (p) + 1 : 0;
171 char const *arg3 = p; p += p < plim ? strlen (p) + 1 : 0;
172 printf (_(m->msgid), arg0, arg1, arg2, arg3);
173 if (p < plim)
174 abort ();
175 struct msg *next = m->next;
176 free (m);
177 m = next;
178 }
179 }
180
181 /* Signal handling, needed for restoring default colors. */
182
183 static void
184 xsigaddset (sigset_t *set, int sig)
185 {
186 if (sigaddset (set, sig) != 0)
187 pfatal_with_name ("sigaddset");
188 }
189
190 static bool
191 xsigismember (sigset_t const *set, int sig)
192 {
193 int mem = sigismember (set, sig);
194 if (mem < 0)
195 pfatal_with_name ("sigismember");
196 assume (mem <= 1);
197 return mem;
198 }
199
200 typedef void (*signal_handler) (int);
201 static signal_handler
202 xsignal (int sig, signal_handler func)
203 {
204 signal_handler h = signal (sig, func);
205 if (h == SIG_ERR)
206 pfatal_with_name ("signal");
207 return h;
208 }
209
210 static void
211 xsigprocmask (int how, sigset_t const *restrict set, sigset_t *restrict oset)
212 {
213 if (sigprocmask (how, set, oset) != 0)
214 pfatal_with_name ("sigprocmask");
215 }
216
217 /* If true, some signals are caught. This is separate from
218 'caught_signals' because POSIX doesn't require an all-zero sigset_t
219 to be valid. */
220 static bool some_signals_caught;
221
222 /* The set of signals that are caught. */
223 static sigset_t caught_signals;
224
225 /* If nonzero, the value of the pending fatal signal. */
226 static sig_atomic_t volatile interrupt_signal;
227
228 /* A count of the number of pending stop signals that have been received. */
229 static sig_atomic_t volatile stop_signal_count;
230
231 /* An ordinary signal was received; arrange for the program to exit. */
232
233 static void
234 sighandler (int sig)
235 {
236 if (! SA_NOCLDSTOP)
237 signal (sig, SIG_IGN);
238 if (! interrupt_signal)
239 interrupt_signal = sig;
240 }
241
242 /* A SIGTSTP was received; arrange for the program to suspend itself. */
243
244 static void
245 stophandler (int sig)
246 {
247 if (! SA_NOCLDSTOP)
248 signal (sig, stophandler);
249 if (! interrupt_signal)
250 stop_signal_count++;
251 }
252 /* Process any pending signals. If signals are caught, this function
253 should be called periodically. Ideally there should never be an
254 unbounded amount of time when signals are not being processed.
255 Signal handling can restore the default colors, so callers must
256 immediately change colors after invoking this function. */
257
258 static void
259 process_signals (void)
260 {
261 while (interrupt_signal | stop_signal_count)
262 {
263 set_color_context (RESET_CONTEXT);
264 fflush (stdout);
265
266 sigset_t oldset;
267 xsigprocmask (SIG_BLOCK, &caught_signals, &oldset);
268
269 /* Reload stop_signal_count and (if needed) interrupt_signal, in
270 case a new signal was handled before sigprocmask took effect. */
271 int stops = stop_signal_count, sig;
272
273 /* SIGTSTP is special, since the application can receive that signal
274 more than once. In this case, don't set the signal handler to the
275 default. Instead, just raise the uncatchable SIGSTOP. */
276 if (stops)
277 {
278 stop_signal_count = stops - 1;
279 sig = SIGSTOP;
280 }
281 else
282 {
283 sig = interrupt_signal;
284 xsignal (sig, SIG_DFL);
285 }
286
287 /* Exit or suspend the program. */
288 if (raise (sig) != 0)
289 pfatal_with_name ("raise");
290 xsigprocmask (SIG_SETMASK, &oldset, nullptr);
291
292 /* If execution reaches here, then the program has been
293 continued (after being suspended). */
294 }
295 }
296
297 /* The signals that can be caught, the number of such signals,
298 and which of them are actually caught. */
299 static int const sig[] =
300 {
301 #ifdef SIGTSTP
302 /* This one is handled specially; see is_tstp_index. */
303 SIGTSTP,
304 #endif
305
306 /* The usual suspects. */
307 #ifdef SIGALRM
308 SIGALRM,
309 #endif
310 SIGHUP, SIGINT, SIGPIPE,
311 #ifdef SIGQUIT
312 SIGQUIT,
313 #endif
314 SIGTERM,
315 #ifdef SIGPOLL
316 SIGPOLL,
317 #endif
318 #ifdef SIGPROF
319 SIGPROF,
320 #endif
321 #ifdef SIGVTALRM
322 SIGVTALRM,
323 #endif
324 #ifdef SIGXCPU
325 SIGXCPU,
326 #endif
327 #ifdef SIGXFSZ
328 SIGXFSZ,
329 #endif
330 };
331 enum { nsigs = sizeof (sig) / sizeof *(sig) };
332
333 /* True if sig[j] == SIGTSTP. */
334 static bool
335 is_tstp_index (int j)
336 {
337 #ifdef SIGTSTP
338 return j == 0;
339 #else
340 return false;
341 #endif
342 }
343
344 static void
345 install_signal_handlers (void)
346 {
347 if (sigemptyset (&caught_signals) != 0)
348 pfatal_with_name ("sigemptyset");
349
350 #if SA_NOCLDSTOP
351 for (int j = 0; j < nsigs; j++)
352 {
353 struct sigaction actj;
354 if (sigaction (sig[j], nullptr, &actj) == 0 && actj.sa_handler != SIG_IGN)
355 xsigaddset (&caught_signals, sig[j]);
356 }
357
358 struct sigaction act;
359 act.sa_mask = caught_signals;
360 act.sa_flags = SA_RESTART;
361
362 for (int j = 0; j < nsigs; j++)
363 if (xsigismember (&caught_signals, sig[j]))
364 {
365 act.sa_handler = is_tstp_index (j) ? stophandler : sighandler;
366 if (sigaction (sig[j], &act, nullptr) != 0)
367 pfatal_with_name ("sigaction");
368 some_signals_caught = true;
369 }
370 #else
371 for (int j = 0; j < nsigs; j++)
372 {
373 signal_handler h = signal (sig[j], SIG_IGN);
374 if (h != SIG_IGN && h != SIG_ERR)
375 {
376 xsigaddset (&caught_signals, sig[j]);
377 xsignal (sig[j], is_tstp_index (j) ? stophandler : sighandler);
378 some_signals_caught = true;
379 if (siginterrupt (sig[j], 0) != 0)
380 pfatal_with_name ("siginterrupt");
381 }
382 }
383 #endif
384 }
385
386 /* Clean up signal handlers just before exiting the program. Do this
387 by resetting signal actions back to default, and then processing
388 any signals that arrived before resetting. */
389 void
390 cleanup_signal_handlers (void)
391 {
392 if (some_signals_caught)
393 {
394 for (int j = 0; j < nsigs; j++)
395 if (xsigismember (&caught_signals, sig[j]))
396 xsignal (sig[j], SIG_DFL);
397 process_signals ();
398 }
399 }
400
401 static char const *current_name0;
402 static char const *current_name1;
403 static bool currently_recursive;
404 static bool colors_enabled;
405
406 static struct color_ext_type *color_ext_list = nullptr;
407
408 struct bin_str
409 {
410 size_t len; /* Number of bytes */
411 const char *string; /* Pointer to the same */
412 };
413
414 struct color_ext_type
415 {
416 struct bin_str ext; /* The extension we're looking for */
417 struct bin_str seq; /* The sequence to output when we do */
418 struct color_ext_type *next; /* Next in list */
419 };
420
421 /* Parse a string as part of the --palette argument; this may involve
422 decoding all kinds of escape characters. If equals_end is set an
423 unescaped equal sign ends the string, otherwise only a : or \0
424 does. Set *OUTPUT_COUNT to the number of bytes output. Return
425 true if successful.
426
427 The resulting string is *not* null-terminated, but may contain
428 embedded nulls.
429
430 Note that both dest and src are char **; on return they point to
431 the first free byte after the array and the character that ended
432 the input string, respectively. */
433
434 static bool
435 get_funky_string (char **dest, const char **src, bool equals_end,
436 size_t *output_count)
437 {
438 char num; /* For numerical codes */
439 size_t count; /* Something to count with */
440 enum {
441 ST_GND, ST_BACKSLASH, ST_OCTAL, ST_HEX, ST_CARET, ST_END, ST_ERROR
442 } state;
443 const char *p;
444 char *q;
445
446 p = *src; /* We don't want to double-indirect */
447 q = *dest; /* the whole darn time. */
448
449 count = 0; /* No characters counted in yet. */
450 num = 0;
451
452 state = ST_GND; /* Start in ground state. */
453 while (state < ST_END)
454 {
455 switch (state)
456 {
457 case ST_GND: /* Ground state (no escapes) */
458 switch (*p)
459 {
460 case ':':
461 case '\0':
462 state = ST_END; /* End of string */
463 break;
464 case '\\':
465 state = ST_BACKSLASH; /* Backslash scape sequence */
466 ++p;
467 break;
468 case '^':
469 state = ST_CARET; /* Caret escape */
470 ++p;
471 break;
472 case '=':
473 if (equals_end)
474 {
475 state = ST_END; /* End */
476 break;
477 }
478 FALLTHROUGH;
479 default:
480 *(q++) = *(p++);
481 ++count;
482 break;
483 }
484 break;
485
486 case ST_BACKSLASH: /* Backslash escaped character */
487 switch (*p)
488 {
489 case '0':
490 case '1':
491 case '2':
492 case '3':
493 case '4':
494 case '5':
495 case '6':
496 case '7':
497 state = ST_OCTAL; /* Octal sequence */
498 num = *p - '0';
499 break;
500 case 'x':
501 case 'X':
502 state = ST_HEX; /* Hex sequence */
503 num = 0;
504 break;
505 case 'a': /* Bell */
506 num = '\a';
507 break;
508 case 'b': /* Backspace */
509 num = '\b';
510 break;
511 case 'e': /* Escape */
512 num = 27;
513 break;
514 case 'f': /* Form feed */
515 num = '\f';
516 break;
517 case 'n': /* Newline */
518 num = '\n';
519 break;
520 case 'r': /* Carriage return */
521 num = '\r';
522 break;
523 case 't': /* Tab */
524 num = '\t';
525 break;
526 case 'v': /* Vtab */
527 num = '\v';
528 break;
529 case '?': /* Delete */
530 num = 127;
531 break;
532 case '_': /* Space */
533 num = ' ';
534 break;
535 case '\0': /* End of string */
536 state = ST_ERROR; /* Error! */
537 break;
538 default: /* Escaped character like \ ^ : = */
539 num = *p;
540 break;
541 }
542 if (state == ST_BACKSLASH)
543 {
544 *(q++) = num;
545 ++count;
546 state = ST_GND;
547 }
548 ++p;
549 break;
550
551 case ST_OCTAL: /* Octal sequence */
552 if (*p < '0' || *p > '7')
553 {
554 *(q++) = num;
555 ++count;
556 state = ST_GND;
557 }
558 else
559 num = (num << 3) + (*(p++) - '0');
560 break;
561
562 case ST_HEX: /* Hex sequence */
563 switch (*p)
564 {
565 case '0':
566 case '1':
567 case '2':
568 case '3':
569 case '4':
570 case '5':
571 case '6':
572 case '7':
573 case '8':
574 case '9':
575 num = (num << 4) + (*(p++) - '0');
576 break;
577 case 'a':
578 case 'b':
579 case 'c':
580 case 'd':
581 case 'e':
582 case 'f':
583 num = (num << 4) + (*(p++) - 'a') + 10;
584 break;
585 case 'A':
586 case 'B':
587 case 'C':
588 case 'D':
589 case 'E':
590 case 'F':
591 num = (num << 4) + (*(p++) - 'A') + 10;
592 break;
593 default:
594 *(q++) = num;
595 ++count;
596 state = ST_GND;
597 break;
598 }
599 break;
600
601 case ST_CARET: /* Caret escape */
602 state = ST_GND; /* Should be the next state... */
603 if (*p >= '@' && *p <= '~')
604 {
605 *(q++) = *(p++) & 037;
606 ++count;
607 }
608 else if (*p == '?')
609 {
610 *(q++) = 127;
611 ++count;
612 }
613 else
614 state = ST_ERROR;
615 break;
616
617 default:
618 abort ();
619 }
620 }
621
622 *dest = q;
623 *src = p;
624 *output_count = count;
625
626 return state != ST_ERROR;
627 }
628
629 enum parse_state
630 {
631 PS_START = 1,
632 PS_2,
633 PS_3,
634 PS_4,
635 PS_DONE,
636 PS_FAIL
637 };
638
639 #define LEN_STR_PAIR(s) sizeof (s) - 1, s
640
641 static struct bin_str color_indicator[] =
642 {
643 { LEN_STR_PAIR ("\033[") }, /* lc: Left of color sequence */
644 { LEN_STR_PAIR ("m") }, /* rc: Right of color sequence */
645 { 0, nullptr }, /* ec: End color (replaces lc+rs+rc) */
646 { LEN_STR_PAIR ("0") }, /* rs: Reset to ordinary colors */
647 { LEN_STR_PAIR ("1") }, /* hd: Header */
648 { LEN_STR_PAIR ("32") }, /* ad: Add line */
649 { LEN_STR_PAIR ("31") }, /* de: Delete line */
650 { LEN_STR_PAIR ("36") }, /* ln: Line number */
651 };
652
653 static const char *const indicator_name[] =
654 {
655 "lc", "rc", "ec", "rs", "hd", "ad", "de", "ln", nullptr
656 };
657 ARGMATCH_VERIFY (indicator_name, color_indicator);
658
659 static char const *color_palette;
660
661 void
662 set_color_palette (char const *palette)
663 {
664 color_palette = palette;
665 }
666
667 static void
668 parse_diff_color (void)
669 {
670 char *color_buf;
671 const char *p; /* Pointer to character being parsed */
672 char *buf; /* color_buf buffer pointer */
673 int ind_no; /* Indicator number */
674 char label[] = "??"; /* Indicator label */
675 struct color_ext_type *ext; /* Extension we are working on */
676
677 p = color_palette;
678 if (p == nullptr || *p == '\0')
679 return;
680
681 ext = nullptr;
682
683 /* This is an overly conservative estimate, but any possible
684 --palette string will *not* generate a color_buf longer than
685 itself, so it is a safe way of allocating a buffer in
686 advance. */
687 buf = color_buf = xstrdup (p);
688
689 enum parse_state state = PS_START;
690 while (true)
691 {
692 switch (state)
693 {
694 case PS_START: /* First label character */
695 switch (*p)
696 {
697 case ':':
698 ++p;
699 break;
700
701 case '*':
702 /* Allocate new extension block and add to head of
703 linked list (this way a later definition will
704 override an earlier one, which can be useful for
705 having terminal-specific defs override global). */
706
707 ext = xmalloc (sizeof *ext);
708 ext->next = color_ext_list;
709 color_ext_list = ext;
710
711 ++p;
712 ext->ext.string = buf;
713
714 state = (get_funky_string (&buf, &p, true, &ext->ext.len)
715 ? PS_4 : PS_FAIL);
716 break;
717
718 case '\0':
719 state = PS_DONE; /* Done! */
720 goto done;
721
722 default: /* Assume it is file type label */
723 label[0] = *(p++);
724 state = PS_2;
725 break;
726 }
727 break;
728
729 case PS_2: /* Second label character */
730 if (*p)
731 {
732 label[1] = *(p++);
733 state = PS_3;
734 }
735 else
736 state = PS_FAIL; /* Error */
737 break;
738
739 case PS_3: /* Equal sign after indicator label */
740 state = PS_FAIL; /* Assume failure... */
741 if (*(p++) == '=')/* It *should* be... */
742 {
743 for (ind_no = 0; indicator_name[ind_no] != nullptr; ++ind_no)
744 {
745 if (STREQ (label, indicator_name[ind_no]))
746 {
747 color_indicator[ind_no].string = buf;
748 state = (get_funky_string (&buf, &p, false,
749 &color_indicator[ind_no].len)
750 ? PS_START : PS_FAIL);
751 break;
752 }
753 }
754 if (state == PS_FAIL)
755 error (0, 0, _("unrecognized prefix: %s"), label);
756 }
757 break;
758
759 case PS_4: /* Equal sign after *.ext */
760 if (*(p++) == '=')
761 {
762 ext->seq.string = buf;
763 state = (get_funky_string (&buf, &p, false, &ext->seq.len)
764 ? PS_START : PS_FAIL);
765 }
766 else
767 state = PS_FAIL;
768 break;
769
770 case PS_FAIL:
771 goto done;
772
773 default:
774 abort ();
775 }
776 }
777 done:
778
779 if (state == PS_FAIL)
780 {
781 struct color_ext_type *e;
782 struct color_ext_type *e2;
783
784 error (0, 0,
785 _("unparsable value for --palette"));
786 free (color_buf);
787 for (e = color_ext_list; e != nullptr; /* empty */)
788 {
789 e2 = e;
790 e = e->next;
791 free (e2);
792 }
793 colors_enabled = false;
794 }
795 }
796
797 static void
798 check_color_output (bool is_pipe)
799 {
800 bool output_is_tty;
801
802 if (! outfile || colors_style == NEVER)
803 return;
804
805 output_is_tty = presume_output_tty || (!is_pipe && isatty (fileno (outfile)));
806
807 colors_enabled = (colors_style == ALWAYS
808 || (colors_style == AUTO && output_is_tty));
809
810 if (colors_enabled)
811 parse_diff_color ();
812
813 if (output_is_tty)
814 install_signal_handlers ();
815 }
816
817 /* Call before outputting the results of comparing files NAME0 and NAME1
818 to set up OUTFILE, the stdio stream for the output to go to.
819
820 Usually, OUTFILE is just stdout. But when -l was specified
821 we fork off a 'pr' and make OUTFILE a pipe to it.
822 'pr' then outputs to our stdout. */
823
824 void
825 setup_output (char const *name0, char const *name1, bool recursive)
826 {
827 current_name0 = name0;
828 current_name1 = name1;
829 currently_recursive = recursive;
830 outfile = 0;
831 }
832
833 #if HAVE_WORKING_FORK
834 static pid_t pr_pid;
835 #endif
836
837 static char c_escape_char (char c)
838 {
839 switch (c) {
840 case '\a': return 'a';
841 case '\b': return 'b';
842 case '\t': return 't';
843 case '\n': return 'n';
844 case '\v': return 'v';
845 case '\f': return 'f';
846 case '\r': return 'r';
847 case '"': return '"';
848 case '\\': return '\\';
849 default:
850 return c < 32;
851 }
852 }
853
854 static char *
855 c_escape (char const *str)
856 {
857 char const *s;
858 size_t plus = 0;
859 bool must_quote = false;
860
861 for (s = str; *s; s++)
862 {
863 char c = *s;
864
865 if (c == ' ')
866 {
867 must_quote = true;
868 continue;
869 }
870 switch (c_escape_char (*s))
871 {
872 case 1:
873 plus += 3;
874 /* fall through */
875 case 0:
876 break;
877 default:
878 plus++;
879 break;
880 }
881 }
882
883 if (must_quote || plus)
884 {
885 size_t s_len = s - str;
886 char *buffer = xmalloc (s_len + plus + 3);
887 char *b = buffer;
888
889 *b++ = '"';
890 for (s = str; *s; s++)
891 {
892 char c = *s;
893 char escape = c_escape_char (c);
894
895 switch (escape)
896 {
897 case 0:
898 *b++ = c;
899 break;
900 case 1:
901 *b++ = '\\';
902 *b++ = ((c >> 6) & 03) + '0';
903 *b++ = ((c >> 3) & 07) + '0';
904 *b++ = ((c >> 0) & 07) + '0';
905 break;
906 default:
907 *b++ = '\\';
908 *b++ = escape;
909 break;
910 }
911 }
912 *b++ = '"';
913 *b = 0;
914 return buffer;
915 }
916
917 return (char *) str;
918 }
919
920 void
921 begin_output (void)
922 {
923 char *names[2];
924 char *name;
925
926 if (outfile != 0)
927 return;
928
929 names[0] = c_escape (current_name0);
930 names[1] = c_escape (current_name1);
931
932 /* Construct the header of this piece of diff. */
933 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in
934 the standard: it says that we must print only the last component
935 of the pathnames, and it requires two spaces after "diff" if
936 there are no options. These requirements are silly and do not
937 match historical practice. */
938 name = xmalloc (sizeof "diff" + strlen (switch_string)
939 + 1 + strlen (names[0]) + 1 + strlen (names[1]));
940 char *p = stpcpy (name, "diff");
941 p = stpcpy (p, switch_string);
942 *p++ = ' ';
943 p = stpcpy (p, names[0]);
944 *p++ = ' ';
945 strcpy (p, names[1]);
946
947 if (paginate)
948 {
949 char const *argv[4];
950
951 if (fflush (stdout) != 0)
952 pfatal_with_name (_("write failed"));
953
954 argv[0] = pr_program;
955 argv[1] = "-h";
956 argv[2] = name;
957 argv[3] = 0;
958
959 /* Make OUTFILE a pipe to a subsidiary 'pr'. */
960 {
961 #if HAVE_WORKING_FORK
962 int pipes[2];
963
964 if (pipe (pipes) != 0)
965 pfatal_with_name ("pipe");
966
967 pr_pid = fork ();
968 if (pr_pid < 0)
969 pfatal_with_name ("fork");
970
971 if (pr_pid == 0)
972 {
973 close (pipes[1]);
974 if (pipes[0] != STDIN_FILENO)
975 {
976 if (dup2 (pipes[0], STDIN_FILENO) < 0)
977 pfatal_with_name ("dup2");
978 close (pipes[0]);
979 }
980
981 execv (pr_program, (char **) argv);
982 _exit (errno == ENOENT ? 127 : 126);
983 }
984 else
985 {
986 close (pipes[0]);
987 outfile = fdopen (pipes[1], "w");
988 if (!outfile)
989 pfatal_with_name ("fdopen");
990 check_color_output (true);
991 }
992 #else
993 char *command = system_quote_argv (SCI_SYSTEM, (char **) argv);
994 errno = 0;
995 outfile = popen (command, "w");
996 if (!outfile)
997 pfatal_with_name (command);
998 check_color_output (true);
999 free (command);
1000 #endif
1001 }
1002 }
1003 else
1004 {
1005
1006 /* If -l was not specified, output the diff straight to 'stdout'. */
1007
1008 outfile = stdout;
1009 check_color_output (false);
1010
1011 /* If handling multiple files (because scanning a directory),
1012 print which files the following output is about. */
1013 if (currently_recursive)
1014 printf ("%s\n", name);
1015 }
1016
1017 free (name);
1018
1019 /* A special header is needed at the beginning of context output. */
1020 switch (output_style)
1021 {
1022 case OUTPUT_CONTEXT:
1023 print_context_header (files, (char const *const *)names, false);
1024 break;
1025
1026 case OUTPUT_UNIFIED:
1027 print_context_header (files, (char const *const *)names, true);
1028 break;
1029
1030 default:
1031 break;
1032 }
1033
1034 if (names[0] != current_name0)
1035 free (names[0]);
1036 if (names[1] != current_name1)
1037 free (names[1]);
1038 }
1039
1040 /* Call after the end of output of diffs for one file.
1041 Close OUTFILE and get rid of the 'pr' subfork. */
1042
1043 void
1044 finish_output (void)
1045 {
1046 if (outfile != 0 && outfile != stdout)
1047 {
1048 int status;
1049 int wstatus;
1050 int werrno = 0;
1051 if (ferror (outfile))
1052 fatal ("write failed");
1053 #if ! HAVE_WORKING_FORK
1054 wstatus = pclose (outfile);
1055 if (wstatus == -1)
1056 werrno = errno;
1057 #else
1058 if (fclose (outfile) != 0)
1059 pfatal_with_name (_("write failed"));
1060 if (waitpid (pr_pid, &wstatus, 0) < 0)
1061 pfatal_with_name ("waitpid");
1062 #endif
1063 status = (! werrno && WIFEXITED (wstatus)
1064 ? WEXITSTATUS (wstatus)
1065 : INT_MAX);
1066 if (status)
1067 die (EXIT_TROUBLE, werrno,
1068 _(status == 126
1069 ? "subsidiary program '%s' could not be invoked"
1070 : status == 127
1071 ? "subsidiary program '%s' not found"
1072 : status == INT_MAX
1073 ? "subsidiary program '%s' failed"
1074 : "subsidiary program '%s' failed (exit status %d)"),
1075 pr_program, status);
1076 }
1077
1078 outfile = 0;
1079 }
1080
1081 /* Compare two lines (typically one from each input file)
1082 according to the command line options.
1083 For efficiency, this is invoked only when the lines do not match exactly
1084 but an option like -i might cause us to ignore the difference.
1085 Return nonzero if the lines differ. */
1086
1087 bool
1088 lines_differ (char const *s1, char const *s2)
1089 {
1090 register char const *t1 = s1;
1091 register char const *t2 = s2;
1092 size_t column = 0;
1093
1094 while (1)
1095 {
1096 register unsigned char c1 = *t1++;
1097 register unsigned char c2 = *t2++;
1098
1099 /* Test for exact char equality first, since it's a common case. */
1100 if (c1 != c2)
1101 {
1102 switch (ignore_white_space)
1103 {
1104 case IGNORE_ALL_SPACE:
1105 /* For -w, just skip past any white space. */
1106 while (isspace (c1) && c1 != '\n') c1 = *t1++;
1107 while (isspace (c2) && c2 != '\n') c2 = *t2++;
1108 break;
1109
1110 case IGNORE_SPACE_CHANGE:
1111 /* For -b, advance past any sequence of white space in
1112 line 1 and consider it just one space, or nothing at
1113 all if it is at the end of the line. */
1114 if (isspace (c1))
1115 {
1116 while (c1 != '\n')
1117 {
1118 c1 = *t1++;
1119 if (! isspace (c1))
1120 {
1121 --t1;
1122 c1 = ' ';
1123 break;
1124 }
1125 }
1126 }
1127
1128 /* Likewise for line 2. */
1129 if (isspace (c2))
1130 {
1131 while (c2 != '\n')
1132 {
1133 c2 = *t2++;
1134 if (! isspace (c2))
1135 {
1136 --t2;
1137 c2 = ' ';
1138 break;
1139 }
1140 }
1141 }
1142
1143 if (c1 != c2)
1144 {
1145 /* If we went too far when doing the simple test
1146 for equality, go back to the first non-white-space
1147 character in both sides and try again. */
1148 if (c2 == ' ' && c1 != '\n'
1149 && s1 + 1 < t1
1150 && isspace ((unsigned char) t1[-2]))
1151 {
1152 --t1;
1153 continue;
1154 }
1155 if (c1 == ' ' && c2 != '\n'
1156 && s2 + 1 < t2
1157 && isspace ((unsigned char) t2[-2]))
1158 {
1159 --t2;
1160 continue;
1161 }
1162 }
1163
1164 break;
1165
1166 case IGNORE_TRAILING_SPACE:
1167 case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
1168 if (isspace (c1) && isspace (c2))
1169 {
1170 unsigned char c;
1171 if (c1 != '\n')
1172 {
1173 char const *p = t1;
1174 while ((c = *p) != '\n' && isspace (c))
1175 ++p;
1176 if (c != '\n')
1177 break;
1178 }
1179 if (c2 != '\n')
1180 {
1181 char const *p = t2;
1182 while ((c = *p) != '\n' && isspace (c))
1183 ++p;
1184 if (c != '\n')
1185 break;
1186 }
1187 /* Both lines have nothing but whitespace left. */
1188 return false;
1189 }
1190 if (ignore_white_space == IGNORE_TRAILING_SPACE)
1191 break;
1192 FALLTHROUGH;
1193 case IGNORE_TAB_EXPANSION:
1194 if ((c1 == ' ' && c2 == '\t')
1195 || (c1 == '\t' && c2 == ' '))
1196 {
1197 size_t column2 = column;
1198 for (;; c1 = *t1++)
1199 {
1200 if (c1 == ' ')
1201 column++;
1202 else if (c1 == '\t')
1203 column += tabsize - column % tabsize;
1204 else
1205 break;
1206 }
1207 for (;; c2 = *t2++)
1208 {
1209 if (c2 == ' ')
1210 column2++;
1211 else if (c2 == '\t')
1212 column2 += tabsize - column2 % tabsize;
1213 else
1214 break;
1215 }
1216 if (column != column2)
1217 return true;
1218 }
1219 break;
1220
1221 case IGNORE_NO_WHITE_SPACE:
1222 break;
1223 }
1224
1225 /* Lowercase all letters if -i is specified. */
1226
1227 if (ignore_case)
1228 {
1229 c1 = tolower (c1);
1230 c2 = tolower (c2);
1231 }
1232
1233 if (c1 != c2)
1234 break;
1235 }
1236 if (c1 == '\n')
1237 return false;
1238
1239 column += c1 == '\t' ? tabsize - column % tabsize : 1;
1240 }
1241
1242 return true;
1243 }
1244
1245 /* Find the consecutive changes at the start of the script START.
1246 Return the last link before the first gap. */
1247
1248 struct change * ATTRIBUTE_CONST
1249 find_change (struct change *start)
1250 {
1251 return start;
1252 }
1253
1254 struct change * ATTRIBUTE_CONST
1255 find_reverse_change (struct change *start)
1256 {
1257 return start;
1258 }
1259
1260 /* Divide SCRIPT into pieces by calling HUNKFUN and
1261 print each piece with PRINTFUN.
1262 Both functions take one arg, an edit script.
1263
1264 HUNKFUN is called with the tail of the script
1265 and returns the last link that belongs together with the start
1266 of the tail.
1267
1268 PRINTFUN takes a subscript which belongs together (with a null
1269 link at the end) and prints it. */
1270
1271 void
1272 print_script (struct change *script,
1273 struct change * (*hunkfun) (struct change *),
1274 void (*printfun) (struct change *))
1275 {
1276 struct change *next = script;
1277
1278 while (next)
1279 {
1280 struct change *this, *end;
1281
1282 /* Find a set of changes that belong together. */
1283 this = next;
1284 end = (*hunkfun) (next);
1285
1286 /* Disconnect them from the rest of the changes,
1287 making them a hunk, and remember the rest for next iteration. */
1288 next = end->link;
1289 end->link = 0;
1290 #ifdef DEBUG
1291 debug_script (this);
1292 #endif
1293
1294 /* Print this hunk. */
1295 (*printfun) (this);
1296
1297 /* Reconnect the script so it will all be freed properly. */
1298 end->link = next;
1299 }
1300 }
1301
1302 /* Print the text of a single line LINE,
1303 flagging it with the characters in LINE_FLAG (which say whether
1304 the line is inserted, deleted, changed, etc.). LINE_FLAG must not
1305 end in a blank, unless it is a single blank. */
1306
1307 void
1308 print_1_line (char const *line_flag, char const *const *line)
1309 {
1310 print_1_line_nl (line_flag, line, false);
1311 }
1312
1313 /* Print the text of a single line LINE,
1314 flagging it with the characters in LINE_FLAG (which say whether
1315 the line is inserted, deleted, changed, etc.). LINE_FLAG must not
1316 end in a blank, unless it is a single blank. If SKIP_NL is set, then
1317 the final '\n' is not printed. */
1318
1319 void
1320 print_1_line_nl (char const *line_flag, char const *const *line, bool skip_nl)
1321 {
1322 char const *base = line[0], *limit = line[1]; /* Help the compiler. */
1323 FILE *out = outfile; /* Help the compiler some more. */
1324 char const *flag_format = 0;
1325
1326 /* If -T was specified, use a Tab between the line-flag and the text.
1327 Otherwise use a Space (as Unix diff does).
1328 Print neither space nor tab if line-flags are empty.
1329 But omit trailing blanks if requested. */
1330
1331 if (line_flag && *line_flag)
1332 {
1333 char const *flag_format_1 = flag_format = initial_tab ? "%s\t" : "%s ";
1334 char const *line_flag_1 = line_flag;
1335
1336 if (suppress_blank_empty && **line == '\n')
1337 {
1338 flag_format_1 = "%s";
1339
1340 /* This hack to omit trailing blanks takes advantage of the
1341 fact that the only way that LINE_FLAG can end in a blank
1342 is when LINE_FLAG consists of a single blank. */
1343 line_flag_1 += *line_flag_1 == ' ';
1344 }
1345
1346 fprintf (out, flag_format_1, line_flag_1);
1347 }
1348
1349 output_1_line (base, limit - (skip_nl && limit[-1] == '\n'), flag_format, line_flag);
1350
1351 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
1352 {
1353 set_color_context (RESET_CONTEXT);
1354 fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
1355 }
1356 }
1357
1358 /* Output a line from BASE up to LIMIT.
1359 With -t, expand white space characters to spaces, and if FLAG_FORMAT
1360 is nonzero, output it with argument LINE_FLAG after every
1361 internal carriage return, so that tab stops continue to line up. */
1362
1363 void
1364 output_1_line (char const *base, char const *limit, char const *flag_format,
1365 char const *line_flag)
1366 {
1367 const size_t MAX_CHUNK = 1024;
1368 if (!expand_tabs)
1369 {
1370 size_t left = limit - base;
1371 while (left)
1372 {
1373 size_t to_write = MIN (left, MAX_CHUNK);
1374 size_t written = fwrite (base, sizeof (char), to_write, outfile);
1375 if (written < to_write)
1376 return;
1377 base += written;
1378 left -= written;
1379 process_signals ();
1380 }
1381 }
1382 else
1383 {
1384 register FILE *out = outfile;
1385 register unsigned char c;
1386 register char const *t = base;
1387 register size_t column = 0;
1388 size_t tab_size = tabsize;
1389 size_t counter_proc_signals = 0;
1390
1391 while (t < limit)
1392 {
1393 counter_proc_signals++;
1394 if (counter_proc_signals == MAX_CHUNK)
1395 {
1396 process_signals ();
1397 counter_proc_signals = 0;
1398 }
1399
1400 switch ((c = *t++))
1401 {
1402 case '\t':
1403 {
1404 size_t spaces = tab_size - column % tab_size;
1405 column += spaces;
1406 do
1407 putc (' ', out);
1408 while (--spaces);
1409 }
1410 break;
1411
1412 case '\r':
1413 putc (c, out);
1414 if (flag_format && t < limit && *t != '\n')
1415 fprintf (out, flag_format, line_flag);
1416 column = 0;
1417 break;
1418
1419 case '\b':
1420 if (column == 0)
1421 continue;
1422 column--;
1423 putc (c, out);
1424 break;
1425
1426 default:
1427 column += isprint (c) != 0;
1428 putc (c, out);
1429 break;
1430 }
1431 }
1432 }
1433 }
1434
1435 enum indicator_no
1436 {
1437 C_LEFT, C_RIGHT, C_END, C_RESET, C_HEADER, C_ADD, C_DELETE, C_LINE
1438 };
1439
1440 static void
1441 put_indicator (const struct bin_str *ind)
1442 {
1443 fwrite (ind->string, ind->len, 1, outfile);
1444 }
1445
1446 static enum color_context last_context = RESET_CONTEXT;
1447
1448 void
1449 set_color_context (enum color_context color_context)
1450 {
1451 if (color_context != RESET_CONTEXT)
1452 process_signals ();
1453 if (colors_enabled && last_context != color_context)
1454 {
1455 put_indicator (&color_indicator[C_LEFT]);
1456 switch (color_context)
1457 {
1458 case HEADER_CONTEXT:
1459 put_indicator (&color_indicator[C_HEADER]);
1460 break;
1461
1462 case LINE_NUMBER_CONTEXT:
1463 put_indicator (&color_indicator[C_LINE]);
1464 break;
1465
1466 case ADD_CONTEXT:
1467 put_indicator (&color_indicator[C_ADD]);
1468 break;
1469
1470 case DELETE_CONTEXT:
1471 put_indicator (&color_indicator[C_DELETE]);
1472 break;
1473
1474 case RESET_CONTEXT:
1475 put_indicator (&color_indicator[C_RESET]);
1476 break;
1477
1478 default:
1479 abort ();
1480 }
1481 put_indicator (&color_indicator[C_RIGHT]);
1482 last_context = color_context;
1483 }
1484 }
1485
1486
1487 char const change_letter[] = { 0, 'd', 'a', 'c' };
1488
1489 /* Translate an internal line number (an index into diff's table of lines)
1490 into an actual line number in the input file.
1491 The internal line number is I. FILE points to the data on the file.
1492
1493 Internal line numbers count from 0 starting after the prefix.
1494 Actual line numbers count from 1 within the entire file. */
1495
1496 lin ATTRIBUTE_PURE
1497 translate_line_number (struct file_data const *file, lin i)
1498 {
1499 return i + file->prefix_lines + 1;
1500 }
1501
1502 /* Translate a line number range. */
1503
1504 void
1505 translate_range (struct file_data const *file,
1506 lin a, lin b,
1507 lin *aptr, lin *bptr)
1508 {
1509 *aptr = translate_line_number (file, a - 1) + 1;
1510 *bptr = translate_line_number (file, b + 1) - 1;
1511 }
1512
1513 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
1514 If the two numbers are identical, print just one number.
1515
1516 Args A and B are internal line numbers.
1517 We print the translated (real) line numbers. */
1518
1519 void
1520 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
1521 {
1522 lin trans_a, trans_b;
1523 translate_range (file, a, b, &trans_a, &trans_b);
1524
1525 /* Note: we can have B < A in the case of a range of no lines.
1526 In this case, we should print the line number before the range,
1527 which is B. */
1528 if (trans_b > trans_a)
1529 fprintf (outfile, "%"pI"d%c%"pI"d", trans_a, sepchar, trans_b);
1530 else
1531 fprintf (outfile, "%"pI"d", trans_b);
1532 }
1533
1534 /* Look at a hunk of edit script and report the range of lines in each file
1535 that it applies to. HUNK is the start of the hunk, which is a chain
1536 of 'struct change'. The first and last line numbers of file 0 are stored in
1537 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
1538 Note that these are internal line numbers that count from 0.
1539
1540 If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
1541
1542 Return UNCHANGED if only ignorable lines are inserted or deleted,
1543 OLD if lines of file 0 are deleted,
1544 NEW if lines of file 1 are inserted,
1545 and CHANGED if both kinds of changes are found. */
1546
1547 enum changes
1548 analyze_hunk (struct change *hunk,
1549 lin *first0, lin *last0,
1550 lin *first1, lin *last1)
1551 {
1552 struct change *next;
1553 lin l0, l1;
1554 lin show_from, show_to;
1555 lin i;
1556 bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
1557 size_t trivial_length = ignore_blank_lines - 1;
1558 /* If 0, ignore zero-length lines;
1559 if SIZE_MAX, do not ignore lines just because of their length. */
1560
1561 bool skip_white_space =
1562 ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space;
1563 bool skip_leading_white_space =
1564 skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space;
1565
1566 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
1567 char const * const *linbuf1 = files[1].linbuf;
1568
1569 show_from = show_to = 0;
1570
1571 *first0 = hunk->line0;
1572 *first1 = hunk->line1;
1573
1574 next = hunk;
1575 do
1576 {
1577 l0 = next->line0 + next->deleted - 1;
1578 l1 = next->line1 + next->inserted - 1;
1579 show_from += next->deleted;
1580 show_to += next->inserted;
1581
1582 for (i = next->line0; i <= l0 && trivial; i++)
1583 {
1584 char const *line = linbuf0[i];
1585 char const *lastbyte = linbuf0[i + 1] - 1;
1586 char const *newline = lastbyte + (*lastbyte != '\n');
1587 size_t len = newline - line;
1588 char const *p = line;
1589 if (skip_white_space)
1590 for (; *p != '\n'; p++)
1591 if (! isspace ((unsigned char) *p))
1592 {
1593 if (! skip_leading_white_space)
1594 p = line;
1595 break;
1596 }
1597 if (newline - p != trivial_length
1598 && (! ignore_regexp.fastmap
1599 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
1600 trivial = 0;
1601 }
1602
1603 for (i = next->line1; i <= l1 && trivial; i++)
1604 {
1605 char const *line = linbuf1[i];
1606 char const *lastbyte = linbuf1[i + 1] - 1;
1607 char const *newline = lastbyte + (*lastbyte != '\n');
1608 size_t len = newline - line;
1609 char const *p = line;
1610 if (skip_white_space)
1611 for (; *p != '\n'; p++)
1612 if (! isspace ((unsigned char) *p))
1613 {
1614 if (! skip_leading_white_space)
1615 p = line;
1616 break;
1617 }
1618 if (newline - p != trivial_length
1619 && (! ignore_regexp.fastmap
1620 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
1621 trivial = 0;
1622 }
1623 }
1624 while ((next = next->link) != 0);
1625
1626 *last0 = l0;
1627 *last1 = l1;
1628
1629 /* If all inserted or deleted lines are ignorable,
1630 tell the caller to ignore this hunk. */
1631
1632 if (trivial)
1633 return UNCHANGED;
1634
1635 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
1636 }
1637
1638 #ifdef DEBUG
1639 void
1640 debug_script (struct change *sp)
1641 {
1642 fflush (stdout);
1643
1644 for (; sp; sp = sp->link)
1645 fprintf (stderr, "%3"pI"d %3"pI"d delete %"pI"d insert %"pI"d\n",
1646 sp->line0, sp->line1, sp->deleted, sp->inserted);
1647
1648 fflush (stderr);
1649 }
1650 #endif