1 /* Shared definitions for GNU DIFF
2
3 Copyright (C) 1988-1989, 1991-1995, 1998, 2001-2002, 2004, 2009-2013,
4 2015-2023 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 #include "system.h"
22 #include <regex.h>
23 #include <stdio.h>
24 #include <unlocked-io.h>
25
26 _GL_INLINE_HEADER_BEGIN
27
28 #ifdef GDIFF_MAIN
29 # define DIFF_INLINE _GL_EXTERN_INLINE
30 # define XTERN
31 #else
32 # define DIFF_INLINE _GL_INLINE
33 # define XTERN extern
34 #endif
35
36 /* What kind of changes a hunk contains. */
37 enum changes
38 {
39 /* No changes: lines common to both files. */
40 UNCHANGED,
41
42 /* Deletes only: lines taken from just the first file. */
43 OLD,
44
45 /* Inserts only: lines taken from just the second file. */
46 NEW,
47
48 /* Both deletes and inserts: a hunk containing both old and new lines. */
49 CHANGED
50 };
51
52 /* When colors should be used in the output. */
53 enum colors_style
54 {
55 /* Never output colors. */
56 NEVER,
57
58 /* Output colors if the output is a terminal. */
59 AUTO,
60
61 /* Always output colors. */
62 ALWAYS,
63 };
64
65 /* Variables for command line options */
66
67 enum output_style
68 {
69 /* No output style specified. */
70 OUTPUT_UNSPECIFIED,
71
72 /* Default output style. */
73 OUTPUT_NORMAL,
74
75 /* Output the differences with lines of context before and after (-c). */
76 OUTPUT_CONTEXT,
77
78 /* Output the differences in a unified context diff format (-u). */
79 OUTPUT_UNIFIED,
80
81 /* Output the differences as commands suitable for 'ed' (-e). */
82 OUTPUT_ED,
83
84 /* Output the diff as a forward ed script (-f). */
85 OUTPUT_FORWARD_ED,
86
87 /* Like -f, but output a count of changed lines in each "command" (-n). */
88 OUTPUT_RCS,
89
90 /* Output merged #ifdef'd file (-D). */
91 OUTPUT_IFDEF,
92
93 /* Output sdiff style (-y). */
94 OUTPUT_SDIFF
95 };
96
97 /* True for output styles that are robust,
98 i.e. can handle a file that ends in a non-newline. */
99 DIFF_INLINE bool robust_output_style (enum output_style s)
100 {
101 return s != OUTPUT_ED && s != OUTPUT_FORWARD_ED;
102 }
103
104 XTERN enum output_style output_style;
105
106 /* Define the current color context used to print a line. */
107 XTERN enum colors_style colors_style;
108
109 /* Nonzero if output cannot be generated for identical files. */
110 XTERN bool no_diff_means_no_output;
111
112 /* Number of lines of context to show in each set of diffs.
113 This is zero when context is not to be shown. */
114 XTERN lin context;
115
116 /* Consider all files as text files (-a).
117 Don't interpret codes over 0177 as implying a "binary file". */
118 XTERN bool text;
119
120 /* Number of lines to keep in identical prefix and suffix. */
121 XTERN lin horizon_lines;
122
123 /* The significance of white space during comparisons. */
124 enum DIFF_white_space
125 {
126 /* All white space is significant (the default). */
127 IGNORE_NO_WHITE_SPACE,
128
129 /* Ignore changes due to tab expansion (-E). */
130 IGNORE_TAB_EXPANSION,
131
132 /* Ignore changes in trailing horizontal white space (-Z). */
133 IGNORE_TRAILING_SPACE,
134
135 /* IGNORE_TAB_EXPANSION and IGNORE_TRAILING_SPACE are a special case
136 because they are independent and can be ORed together, yielding
137 IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE. */
138 IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE,
139
140 /* Ignore changes in horizontal white space (-b). */
141 IGNORE_SPACE_CHANGE,
142
143 /* Ignore all horizontal white space (-w). */
144 IGNORE_ALL_SPACE
145 };
146 XTERN enum DIFF_white_space ignore_white_space;
147
148 /* Ignore changes that affect only blank lines (-B). */
149 XTERN bool ignore_blank_lines;
150
151 /* Files can be compared byte-by-byte, as if they were binary.
152 This depends on various options. */
153 XTERN bool files_can_be_treated_as_binary;
154
155 /* Ignore differences in case of letters (-i). */
156 XTERN bool ignore_case;
157
158 /* Ignore differences in case of letters in file names. */
159 XTERN bool ignore_file_name_case;
160
161 /* Act on symbolic links themselves rather than on their target
162 (--no-dereference). */
163 XTERN bool no_dereference_symlinks;
164
165 /* Local timezone for 'c' output headers, if needed. */
166 #if HAVE_TM_GMTOFF
167 # define localtz 0 /* Placeholder since localtz is never needed. */
168 #else
169 XTERN timezone_t localtz;
170 #endif
171
172 /* File labels for '-c' output headers (--label). */
173 XTERN char *file_label[2];
174
175 /* Regexp to identify function-header lines (-F). */
176 XTERN struct re_pattern_buffer function_regexp;
177
178 /* Ignore changes that affect only lines matching this regexp (-I). */
179 XTERN struct re_pattern_buffer ignore_regexp;
180
181 /* Say only whether files differ, not how (-q). */
182 XTERN bool brief;
183
184 /* Expand tabs in the output so the text lines up properly
185 despite the characters added to the front of each line (-t). */
186 XTERN bool expand_tabs;
187
188 /* Number of columns between tab stops. */
189 XTERN size_t tabsize;
190
191 /* Use a tab in the output, rather than a space, before the text of an
192 input line, so as to keep the proper alignment in the input line
193 without changing the characters in it (-T). */
194 XTERN bool initial_tab;
195
196 /* Do not output an initial space or tab before the text of an empty line. */
197 XTERN bool suppress_blank_empty;
198
199 /* Remove trailing carriage returns from input. */
200 XTERN bool strip_trailing_cr;
201
202 /* In directory comparison, specify file to start with (-S).
203 This is used for resuming an aborted comparison.
204 All file names less than this name are ignored. */
205 XTERN char const *starting_file;
206
207 /* Pipe each file's output through pr (-l). */
208 XTERN bool paginate;
209
210 /* Line group formats for unchanged, old, new, and changed groups. */
211 XTERN char const *group_format[CHANGED + 1];
212
213 /* Line formats for unchanged, old, and new lines. */
214 XTERN char const *line_format[NEW + 1];
215
216 /* If using OUTPUT_SDIFF print extra information to help the sdiff filter. */
217 XTERN bool sdiff_merge_assist;
218
219 /* Tell OUTPUT_SDIFF to show only the left version of common lines. */
220 XTERN bool left_column;
221
222 /* Tell OUTPUT_SDIFF to not show common lines. */
223 XTERN bool suppress_common_lines;
224
225 /* The half line width and column 2 offset for OUTPUT_SDIFF. */
226 XTERN size_t sdiff_half_width;
227 XTERN size_t sdiff_column2_offset;
228
229 /* String containing all the command options diff received,
230 with spaces between and at the beginning but none at the end.
231 If there were no options given, this string is empty. */
232 XTERN char *switch_string;
233
234 /* Use heuristics for better speed with large files with a small
235 density of changes. */
236 XTERN bool speed_large_files;
237
238 /* Patterns that match file names to be excluded. */
239 XTERN struct exclude *excluded;
240
241 /* Don't discard lines. This makes things slower (sometimes much
242 slower) but will find a guaranteed minimal set of changes. */
243 XTERN bool minimal;
244
245 /* The strftime format to use for time strings. */
246 XTERN char const *time_format;
247
248 /* The result of comparison is an "edit script": a chain of 'struct change'.
249 Each 'struct change' represents one place where some lines are deleted
250 and some are inserted.
251
252 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
253 DELETED is the number of lines deleted here from file 0.
254 INSERTED is the number of lines inserted here in file 1.
255
256 If DELETED is 0 then LINE0 is the number of the line before
257 which the insertion was done; vice versa for INSERTED and LINE1. */
258
259 struct change
260 {
261 struct change *link; /* Previous or next edit command */
262 lin inserted; /* # lines of file 1 changed here. */
263 lin deleted; /* # lines of file 0 changed here. */
264 lin line0; /* Line number of 1st deleted line. */
265 lin line1; /* Line number of 1st inserted line. */
266 bool ignore; /* Flag used in context.c. */
267 };
268
269 /* Structures that describe the input files. */
270
271 /* Data on one input file being compared. */
272
273 struct file_data {
274 int desc; /* File descriptor */
275 char const *name; /* File name */
276 struct stat stat; /* File status */
277
278 /* Buffer in which text of file is read. */
279 word *buffer;
280
281 /* Allocated size of buffer, in bytes. Always a multiple of
282 sizeof *buffer. */
283 size_t bufsize;
284
285 /* Number of valid bytes now in the buffer. */
286 size_t buffered;
287
288 /* Array of pointers to lines in the file. */
289 char const **linbuf;
290
291 /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
292 linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
293 linebuf[linbuf_base ... valid_lines - 1] contain valid data.
294 linebuf[linbuf_base ... alloc_lines - 1] are allocated. */
295 lin linbuf_base, buffered_lines, valid_lines, alloc_lines;
296
297 /* Pointer to end of prefix of this file to ignore when hashing. */
298 char const *prefix_end;
299
300 /* Count of lines in the prefix.
301 There are this many lines in the file before linbuf[0]. */
302 lin prefix_lines;
303
304 /* Pointer to start of suffix of this file to ignore when hashing. */
305 char const *suffix_begin;
306
307 /* Vector, indexed by line number, containing an equivalence code for
308 each line. It is this vector that is actually compared with that
309 of another file to generate differences. */
310 lin *equivs;
311
312 /* Vector, like the previous one except that
313 the elements for discarded lines have been squeezed out. */
314 lin *undiscarded;
315
316 /* Vector mapping virtual line numbers (not counting discarded lines)
317 to real ones (counting those lines). Both are origin-0. */
318 lin *realindexes;
319
320 /* Total number of nondiscarded lines. */
321 lin nondiscarded_lines;
322
323 /* Vector, indexed by real origin-0 line number,
324 containing 1 for a line that is an insertion or a deletion.
325 The results of comparison are stored here. */
326 char *changed;
327
328 /* 1 if file ends in a line with no final newline. */
329 bool missing_newline;
330
331 /* 1 if at end of file. */
332 bool eof;
333
334 /* 1 more than the maximum equivalence value used for this or its
335 sibling file. */
336 lin equiv_max;
337 };
338
339 /* Data on two input files being compared. */
340
341 struct comparison
342 {
343 struct file_data file[2];
344 struct comparison const *parent; /* parent, if a recursive comparison */
345 };
346
347 /* Describe the two files currently being compared. */
348
349 XTERN struct file_data files[2];
350
351 /* Stdio stream to output diffs to. */
352
353 XTERN FILE *outfile;
354
355 /* Declare various functions. */
356
357 /* analyze.c */
358 extern int diff_2_files (struct comparison *);
359
360 /* context.c */
361 extern void print_context_header (struct file_data[], char const * const *, bool);
362 extern void print_context_script (struct change *, bool);
363
364 /* dir.c */
365 extern int diff_dirs (struct comparison const *,
366 int (*) (struct comparison const *,
367 char const *, char const *));
368 extern char *find_dir_file_pathname (char const *, char const *)
369 ATTRIBUTE_MALLOC ATTRIBUTE_DEALLOC_FREE
370 ATTRIBUTE_RETURNS_NONNULL;
371
372 /* ed.c */
373 extern void print_ed_script (struct change *);
374 extern void pr_forward_ed_script (struct change *);
375
376 /* ifdef.c */
377 extern void print_ifdef_script (struct change *);
378
379 /* io.c */
380 extern void file_block_read (struct file_data *, size_t);
381 extern bool read_files (struct file_data[], bool);
382
383 /* normal.c */
384 extern void print_normal_script (struct change *);
385
386 /* rcs.c */
387 extern void print_rcs_script (struct change *);
388
389 /* side.c */
390 extern void print_sdiff_script (struct change *);
391
392 /* util.c */
393 extern char const change_letter[4];
394 extern char const pr_program[];
395 extern bool lines_differ (char const *, char const *) ATTRIBUTE_PURE;
396 extern lin translate_line_number (struct file_data const *, lin);
397 extern struct change *find_change (struct change *);
398 extern struct change *find_reverse_change (struct change *);
399 extern enum changes analyze_hunk (struct change *, lin *, lin *, lin *, lin *);
400 extern void begin_output (void);
401 extern void cleanup_signal_handlers (void);
402 extern void debug_script (struct change *);
403 extern _Noreturn void fatal (char const *);
404 extern void finish_output (void);
405 extern void message (char const *, ...) ATTRIBUTE_FORMAT ((printf, 1, 2));
406 extern void output_1_line (char const *, char const *, char const *,
407 char const *);
408 extern void perror_with_name (char const *);
409 extern _Noreturn void pfatal_with_name (char const *);
410 extern void print_1_line (char const *, char const * const *);
411 extern void print_1_line_nl (char const *, char const * const *, bool);
412 extern void print_message_queue (void);
413 extern void print_number_range (char, struct file_data *, lin, lin);
414 extern void print_script (struct change *, struct change * (*) (struct change *),
415 void (*) (struct change *));
416 extern void setup_output (char const *, char const *, bool);
417 extern void translate_range (struct file_data const *, lin, lin, lin *, lin *);
418
419 enum color_context
420 {
421 HEADER_CONTEXT,
422 ADD_CONTEXT,
423 DELETE_CONTEXT,
424 RESET_CONTEXT,
425 LINE_NUMBER_CONTEXT,
426 };
427
428 XTERN bool presume_output_tty;
429
430 extern void set_color_context (enum color_context color_context);
431 extern void set_color_palette (char const *palette);
432
433 _GL_INLINE_HEADER_END