1 /* Get the contents of an URL.
2 Copyright (C) 2001-2003, 2005-2010, 2012, 2017-2023 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <locale.h>
30 #include <unistd.h>
31
32 #include "noreturn.h"
33 #include "closeout.h"
34 #include "error.h"
35 #include "error-progname.h"
36 #include "progname.h"
37 #include "relocatable.h"
38 #include "basename-lgpl.h"
39 #include "full-write.h"
40 #include "execute.h"
41 #include "javaexec.h"
42 #include "binary-io.h"
43 #include "propername.h"
44 #include "gettext.h"
45
46 #define _(str) gettext (str)
47
48 #ifndef STDOUT_FILENO
49 # define STDOUT_FILENO 1
50 #endif
51
52
53 /* Only high-level toolkits, written in languages with exception handling,
54 have an URL datatype and operations to fetch an URL's contents. Such
55 toolkits are Java (class java.net.URL), Qt (classes QUrl and QUrlOperator).
56 We use the Java toolkit.
57 Note that this program doesn't handle redirection pages; programs which
58 wish to process HTML redirection tags need to include a HTML parser,
59 and only full-fledged browsers like w3m, lynx, links have have both
60 an URL fetcher (which covers at least the protocols "http", "ftp", "file")
61 and a HTML parser. [Well, this is not true: libxml2 and Java (see
62 <http://java.sun.com/products/jfc/tsc/articles/bookmarks/>) also contain
63 HTML parsers.] */
64
65
66 /* Whether to output something on standard error.
67 This is true by default, because the user should know why we are trying to
68 establish an internet connection. Also, users get confused if a program
69 produces no output for more than 10 seconds for no apparent reason. */
70 static bool verbose = true;
71
72 /* Long options. */
73 static const struct option long_options[] =
74 {
75 { "help", no_argument, NULL, 'h' },
76 { "quiet", no_argument, NULL, 'q' },
77 { "silent", no_argument, NULL, 'q' },
78 { "version", no_argument, NULL, 'V' },
79 { NULL, 0, NULL, 0 }
80 };
81
82
83 /* Forward declaration of local functions. */
84 _GL_NORETURN_FUNC static void usage (int status);
85 static void fetch (const char *url, const char *file);
86
87
88 int
89 main (int argc, char *argv[])
90 {
91 int optchar;
92 bool do_help;
93 bool do_version;
94
95 /* Set program name for messages. */
96 set_program_name (argv[0]);
97 error_print_progname = maybe_print_progname;
98
99 /* Set locale via LC_ALL. */
100 setlocale (LC_ALL, "");
101
102 /* Set the text message domain. */
103 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
104 textdomain (PACKAGE);
105
106 /* Ensure that write errors on stdout are detected. */
107 atexit (close_stdout);
108
109 /* Set default values for variables. */
110 do_help = false;
111 do_version = false;
112
113 /* Parse command line options. */
114 while ((optchar = getopt_long (argc, argv, "hqV", long_options, NULL)) != EOF)
115 switch (optchar)
116 {
117 case '\0': /* Long option. */
118 break;
119 case 'h': /* --help */
120 do_help = true;
121 break;
122 case 'q': /* --quiet / --silent */
123 verbose = false;
124 break;
125 case 'V': /* --version */
126 do_version = true;
127 break;
128 default:
129 usage (EXIT_FAILURE);
130 /* NOTREACHED */
131 }
132
133 /* Version information requested. */
134 if (do_version)
135 {
136 printf ("%s (GNU %s) %s\n", last_component (program_name),
137 PACKAGE, VERSION);
138 /* xgettext: no-wrap */
139 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
140 License GPLv3+: GNU GPL version 3 or later <%s>\n\
141 This is free software: you are free to change and redistribute it.\n\
142 There is NO WARRANTY, to the extent permitted by law.\n\
143 "),
144 "2001-2023", "https://gnu.org/licenses/gpl.html");
145 printf (_("Written by %s.\n"), proper_name ("Bruno Haible"));
146 exit (EXIT_SUCCESS);
147 }
148
149 /* Help is requested. */
150 if (do_help)
151 usage (EXIT_SUCCESS);
152
153 /* Test argument count. */
154 if (optind + 2 != argc)
155 error (EXIT_FAILURE, 0, _("expected two arguments"));
156
157 /* Fetch the contents. */
158 fetch (argv[optind], argv[optind + 1]);
159
160 exit (EXIT_SUCCESS);
161 }
162
163 /* Display usage information and exit. */
164 static void
165 usage (int status)
166 {
167 if (status != EXIT_SUCCESS)
168 fprintf (stderr, _("Try '%s --help' for more information.\n"),
169 program_name);
170 else
171 {
172 printf (_("\
173 Usage: %s [OPTION] URL FILE\n\
174 "), program_name);
175 printf ("\n");
176 /* xgettext: no-wrap */
177 printf (_("\
178 Fetches and outputs the contents of an URL. If the URL cannot be accessed,\n\
179 the locally accessible FILE is used instead.\n\
180 "));
181 printf ("\n");
182 printf (_("\
183 Informative output:\n"));
184 printf (_("\
185 -h, --help display this help and exit\n"));
186 printf (_("\
187 -V, --version output version information and exit\n"));
188 printf (_("\
189 -q, --quiet, --silent suppress progress indicators\n"));
190 printf ("\n");
191 /* TRANSLATORS: The first placeholder is the web address of the Savannah
192 project of this package. The second placeholder is the bug-reporting
193 email address for this package. Please add _another line_ saying
194 "Report translation bugs to <...>\n" with the address for translation
195 bugs (typically your translation team's web or email address). */
196 printf(_("\
197 Report bugs in the bug tracker at <%s>\n\
198 or by email to <%s>.\n"),
199 "https://savannah.gnu.org/projects/gettext",
200 "bug-gettext@gnu.org");
201 }
202
203 exit (status);
204 }
205
206 /* Copy a file's contents to stdout. */
207 static void
208 cat_file (const char *src_filename)
209 {
210 int src_fd;
211 char buf[4096];
212 const int buf_size = sizeof (buf);
213
214 src_fd = open (src_filename, O_RDONLY | O_BINARY);
215 if (src_fd < 0)
216 error (EXIT_FAILURE, errno, _("error while opening \"%s\" for reading"),
217 src_filename);
218
219 for (;;)
220 {
221 ssize_t n_read = read (src_fd, buf, buf_size);
222 if (n_read < 0)
223 {
224 #ifdef EINTR
225 if (errno == EINTR)
226 continue;
227 #endif
228 error (EXIT_FAILURE, errno, _("error reading \"%s\""), src_filename);
229 }
230 if (n_read == 0)
231 break;
232
233 if (full_write (STDOUT_FILENO, buf, n_read) < n_read)
234 error (EXIT_FAILURE, errno, _("error writing stdout"));
235 }
236
237 if (close (src_fd) < 0)
238 error (EXIT_FAILURE, errno, _("error after reading \"%s\""), src_filename);
239 }
240
241 #if USEJAVA
242
243 /* Exit code of the Java program. */
244 static int java_exitcode;
245
246 static bool
247 execute_it (const char *progname,
248 const char *prog_path, const char * const *prog_argv,
249 void *private_data)
250 {
251 (void) private_data;
252
253 java_exitcode =
254 execute (progname, prog_path, prog_argv, NULL,
255 true, true, false, false, true, false, NULL);
256 /* Exit code 0 means success, 2 means timed out. */
257 return !(java_exitcode == 0 || java_exitcode == 2);
258 }
259
260 #endif
261
262 /* Fetch the URL. Upon error, use the FILE as fallback. */
263 static void
264 fetch (const char *url, const char *file)
265 {
266 if (verbose)
267 {
268 fprintf (stderr, _("Retrieving %s..."), url);
269 fflush (stderr);
270 }
271
272 #if USEJAVA
273 /* First try: using Java. */
274 {
275 const char *class_name = "gnu.gettext.GetURL";
276 const char *gettextjar;
277 const char *args[2];
278
279 /* Make it possible to override the gettext.jar location. This is
280 necessary for running the testsuite before "make install". */
281 gettextjar = getenv ("GETTEXTJAR");
282 if (gettextjar == NULL || gettextjar[0] == '\0')
283 gettextjar = relocate (GETTEXTJAR);
284
285 /* Prepare arguments. */
286 args[0] = url;
287 args[1] = NULL;
288
289 /* Fetch the URL's contents. */
290 java_exitcode = 127;
291 if (!execute_java_class (class_name, &gettextjar, 1, true, NULL,
292 args,
293 false, true,
294 execute_it, NULL))
295 {
296 if (verbose)
297 {
298 if (java_exitcode == 0)
299 fprintf (stderr, _(" done.\n"));
300 else if (java_exitcode == 2)
301 fprintf (stderr, _(" timed out.\n"));
302 }
303 return;
304 }
305 }
306 #endif
307
308 /* Second try: using "wget -q -O - -T 30 url". */
309 {
310 static bool wget_tested;
311 static bool wget_present;
312
313 if (!wget_tested)
314 {
315 /* Test for presence of wget: "wget --version > /dev/null" */
316 const char *argv[3];
317 int exitstatus;
318
319 argv[0] = "wget";
320 argv[1] = "--version";
321 argv[2] = NULL;
322 exitstatus = execute ("wget", "wget", argv, NULL,
323 false, false, true, true, true, false, NULL);
324 wget_present = (exitstatus == 0);
325 wget_tested = true;
326 }
327
328 if (wget_present)
329 {
330 const char *argv[10];
331 int exitstatus;
332
333 argv[0] = "wget";
334 argv[1] = "--quiet";
335 argv[2] = "--output-document"; argv[3] = "-";
336 argv[4] = "--timeout"; argv[5] = "30";
337 argv[6] = "--user-agent"; argv[7] = "urlget";
338 argv[8] = url;
339 argv[9] = NULL;
340 exitstatus = execute ("wget", "wget", argv, NULL,
341 true, false, false, false, true, false, NULL);
342 if (exitstatus != 127)
343 {
344 if (exitstatus != 0)
345 goto failed;
346 if (verbose)
347 fprintf (stderr, _(" done.\n"));
348 return;
349 }
350 }
351 }
352
353 /* Third try: using "lynx -source url". */
354 {
355 static bool lynx_tested;
356 static bool lynx_present;
357
358 if (!lynx_tested)
359 {
360 /* Test for presence of lynx: "lynx --version > /dev/null" */
361 const char *argv[3];
362 int exitstatus;
363
364 argv[0] = "lynx";
365 argv[1] = "--version";
366 argv[2] = NULL;
367 exitstatus = execute ("lynx", "lynx", argv, NULL,
368 false, false, true, true, true, false, NULL);
369 lynx_present = (exitstatus == 0);
370 lynx_tested = true;
371 }
372
373 if (lynx_present)
374 {
375 const char *argv[5];
376 int exitstatus;
377
378 argv[0] = "lynx";
379 argv[1] = "-useragent=urlget";
380 argv[2] = "-source";
381 argv[3] = url;
382 argv[4] = NULL;
383 exitstatus = execute ("lynx", "lynx", argv, NULL,
384 true, false, false, false, true, false, NULL);
385 if (exitstatus != 127)
386 {
387 if (exitstatus != 0)
388 goto failed;
389 if (verbose)
390 fprintf (stderr, _(" done.\n"));
391 return;
392 }
393 }
394 }
395
396 /* Fourth try: using "curl --silent url". */
397 {
398 static bool curl_tested;
399 static bool curl_present;
400
401 if (!curl_tested)
402 {
403 /* Test for presence of curl: "curl --version > /dev/null" */
404 const char *argv[3];
405 int exitstatus;
406
407 argv[0] = "curl";
408 argv[1] = "--version";
409 argv[2] = NULL;
410 exitstatus = execute ("curl", "curl", argv, NULL,
411 false, false, true, true, true, false, NULL);
412 curl_present = (exitstatus == 0 || exitstatus == 2);
413 curl_tested = true;
414 }
415
416 if (curl_present)
417 {
418 const char *argv[6];
419 int exitstatus;
420
421 argv[0] = "curl";
422 argv[1] = "--silent";
423 argv[2] = "--user-agent"; argv[3] = "urlget";
424 argv[4] = url;
425 argv[5] = NULL;
426 exitstatus = execute ("curl", "curl", argv, NULL,
427 true, false, false, false, true, false, NULL);
428 if (exitstatus != 127)
429 {
430 if (exitstatus != 0)
431 goto failed;
432 if (verbose)
433 fprintf (stderr, _(" done.\n"));
434 return;
435 }
436 }
437 }
438
439 failed:
440 if (verbose)
441 fprintf (stderr, _(" failed.\n"));
442 /* Use the file as fallback. */
443 cat_file (file);
444 }