(root)/
man-db-2.12.0/
src/
decompress.c
       1  /*
       2   * decompress.c: decompression abstraction layer
       3   *
       4   * Copyright (C) 2007, 2008 Colin Watson.
       5   *
       6   * This file is part of man-db.
       7   *
       8   * man-db is free software; you can redistribute it and/or modify it
       9   * under the terms of the GNU General Public License as published by
      10   * the Free Software Foundation; either version 2 of the License, or
      11   * (at your option) any later version.
      12   *
      13   * man-db is distributed in the hope that it will be useful, but
      14   * WITHOUT ANY WARRANTY; without even the implied warranty of
      15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16   * GNU General Public License for more details.
      17   *
      18   * You should have received a copy of the GNU General Public License
      19   * along with man-db; if not, write to the Free Software Foundation,
      20   * Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
      21   */
      22  
      23  #ifdef HAVE_CONFIG_H
      24  #  include "config.h"
      25  #endif /* HAVE_CONFIG_H */
      26  
      27  #include <assert.h>
      28  #include <string.h>
      29  #include <stdbool.h>
      30  #include <stdlib.h>
      31  #include <stdio.h>
      32  #include <sys/types.h>
      33  #include <sys/stat.h>
      34  #include <unistd.h>
      35  #include <fcntl.h>
      36  
      37  #ifdef HAVE_LIBZ
      38  #  include "zlib.h"
      39  #endif /* HAVE_LIBZ */
      40  
      41  #include "pipeline.h"
      42  
      43  #include "attribute.h"
      44  #include "minmax.h"
      45  #include "xalloc.h"
      46  #include "xstrndup.h"
      47  #include "xvasprintf.h"
      48  
      49  #include "manconfig.h"
      50  
      51  #include "compression.h"
      52  #include "sandbox.h"
      53  
      54  #include "decompress.h"
      55  
      56  enum decompress_tag {
      57  	DECOMPRESS_PIPELINE,
      58  	DECOMPRESS_INPROCESS
      59  };
      60  
      61  struct decompress_inprocess {
      62  	char *buf;
      63  	size_t len;
      64  	size_t offset;
      65  	char *line_cache;
      66  };
      67  
      68  struct decompress {
      69  	enum decompress_tag tag;
      70  	union {
      71  		pipeline *p;
      72  		struct decompress_inprocess inprocess;
      73  	} u;
      74  };
      75  
      76  /* Create a new pipeline-based decompressor.  Takes ownership of p. */
      77  static decompress *decompress_new_pipeline (pipeline *p)
      78  {
      79  	decompress *d = XMALLOC (decompress);
      80  
      81  	d->tag = DECOMPRESS_PIPELINE;
      82  	d->u.p = p;
      83  
      84  	return d;
      85  }
      86  
      87  #ifdef HAVE_LIBZ
      88  
      89  /* Create a new in-process decompressor.  Takes ownership of buf. */
      90  static decompress *decompress_new_inprocess (char *buf, size_t len)
      91  {
      92  	decompress *d = XMALLOC (decompress);
      93  
      94  	d->tag = DECOMPRESS_INPROCESS;
      95  	d->u.inprocess.buf = buf;
      96  	d->u.inprocess.len = len;
      97  	d->u.inprocess.offset = 0;
      98  	d->u.inprocess.line_cache = NULL;
      99  
     100  	return d;
     101  }
     102  
     103  static void decompress_zlib (void *data MAYBE_UNUSED)
     104  {
     105  	gzFile zlibfile;
     106  	int fd;
     107  
     108  	fd = dup (STDIN_FILENO);
     109  	if (fd < 0)
     110  		return;
     111  
     112  	zlibfile = gzdopen (fd, "r");
     113  	if (!zlibfile) {
     114  		close (fd);
     115  		return;
     116  	}
     117  
     118  	for (;;) {
     119  		char buffer[4096];
     120  		int r = gzread (zlibfile, buffer, 4096);
     121  		if (r <= 0)
     122  			break;
     123  		if (fwrite (buffer, 1, (size_t) r, stdout) < (size_t) r)
     124  			break;
     125  	}
     126  
     127  	gzclose (zlibfile);
     128  	return;
     129  }
     130  
     131  /* The largest number of uncompressed bytes we're prepared to read into
     132   * memory.  (We actually allow at most one fewer byte than this, for easy
     133   * EOF detection.)
     134   *
     135   * At the time of writing, 11 out of 27959 (0.04%) installed manual pages on
     136   * the author's system were larger than this.
     137   *
     138   * We could lift this restriction if we streamed in-process decompression
     139   * instead, but that's a bit complicated: we'd also need to stream encoding
     140   * conversion, and there's relatively little point until lexgrog can rely on
     141   * preprocessor header lines rather than having to scan the whole file for
     142   * preprocessor indications.  For the time being, one-shot buffering is
     143   * cheap enough and much simpler.
     144   */
     145  #define MAX_INPROCESS 1048576
     146  
     147  static decompress *decompress_try_zlib (const char *filename)
     148  {
     149  	gzFile zlibfile;
     150  	/* We only ever call this from the parent process (and don't
     151  	 * currently use threads), and this lets us skip per-file memory
     152  	 * allocation.
     153  	 */
     154  	static char buffer[MAX_INPROCESS];
     155  	int len = 0;
     156  
     157  	zlibfile = gzopen (filename, "r");
     158  	if (!zlibfile)
     159  		return NULL;
     160  
     161  	while (len < MAX_INPROCESS) {
     162  		/* Read one more byte than we're prepared to return, in
     163  		 * order to detect EOF at the right position.  The "len >=
     164  		 * MAX_INPROCESS" check below catches the boundary case.
     165  		 */
     166  		int r = gzread (zlibfile, buffer + len, MAX_INPROCESS - len);
     167  		if (r < 0) {
     168  			gzclose (zlibfile);
     169  			return NULL;
     170  		} else if (r == 0)
     171  			break;
     172  		else
     173  			len += r;
     174  	}
     175  
     176  	gzclose (zlibfile);
     177  	if (len >= MAX_INPROCESS)
     178  		return NULL;
     179  	/* Copy input data so that we don't have potential data corruption
     180  	 * if more than one in-process decompressor is active at once.  (An
     181  	 * alternative might be to use a lock to prevent that situation.)
     182  	 */
     183  	return decompress_new_inprocess (xmemdup (buffer, (size_t) len),
     184  					 (size_t) len);
     185  }
     186  
     187  #define OPEN_FLAGS_UNUSED
     188  #else /* !HAVE_LIBZ */
     189  #define OPEN_FLAGS_UNUSED MAYBE_UNUSED
     190  #endif /* HAVE_LIBZ */
     191  
     192  extern man_sandbox *sandbox;
     193  
     194  decompress *decompress_open (const char *filename, int flags OPEN_FLAGS_UNUSED)
     195  {
     196  	pipecmd *cmd;
     197  	pipeline *p;
     198  	struct stat st;
     199  #ifdef HAVE_LIBZ
     200  	size_t filename_len;
     201  #endif /* HAVE_LIBZ */
     202  	char *ext;
     203  	struct compression *comp;
     204  
     205  	if (stat (filename, &st) < 0 || S_ISDIR (st.st_mode))
     206  		return NULL;
     207  
     208  #ifdef HAVE_LIBZ
     209  	filename_len = strlen (filename);
     210  	if (filename_len > 3 && STREQ (filename + filename_len - 3, ".gz")) {
     211  		if (flags & DECOMPRESS_ALLOW_INPROCESS) {
     212  			decompress *d = decompress_try_zlib (filename);
     213  			if (d)
     214  				return d;
     215  		}
     216  
     217  		cmd = pipecmd_new_function ("zcat", &decompress_zlib, NULL,
     218  					    NULL);
     219  		pipecmd_pre_exec (cmd, sandbox_load, sandbox_free, sandbox);
     220  		p = pipeline_new_commands (cmd, (void *) 0);
     221  		goto got_pipeline;
     222  	}
     223  #endif /* HAVE_LIBZ */
     224  
     225  	ext = strrchr (filename, '.');
     226  	if (ext) {
     227  		++ext;
     228  
     229  		for (comp = comp_list; comp->ext; ++comp) {
     230  			if (!STREQ (comp->ext, ext))
     231  				continue;
     232  
     233  			cmd = pipecmd_new_argstr (comp->prog);
     234  			pipecmd_pre_exec (cmd, sandbox_load, sandbox_free,
     235  					  sandbox);
     236  			p = pipeline_new_commands (cmd, (void *) 0);
     237  			goto got_pipeline;
     238  		}
     239  	}
     240  
     241  #ifdef HAVE_GZIP
     242  	/* HP-UX */
     243  	ext = strstr (filename, ".Z/");
     244  	if (ext) {
     245  		cmd = pipecmd_new_argstr (PROG_GUNZIP);
     246  		pipecmd_pre_exec (cmd, sandbox_load, sandbox_free, sandbox);
     247  		p = pipeline_new_commands (cmd, (void *) 0);
     248  		goto got_pipeline;
     249  	}
     250  #endif
     251  
     252  	p = pipeline_new ();
     253  
     254  got_pipeline:
     255  	pipeline_want_infile (p, filename);
     256  	pipeline_want_out (p, -1);
     257  	return decompress_new_pipeline (p);
     258  }
     259  
     260  decompress *decompress_fdopen (int fd)
     261  {
     262  	pipeline *p;
     263  #ifdef HAVE_LIBZ
     264  	pipecmd *cmd;
     265  #endif /* HAVE_LIBZ */
     266  
     267  #ifdef HAVE_LIBZ
     268  	cmd = pipecmd_new_function ("zcat", &decompress_zlib, NULL, NULL);
     269  	pipecmd_pre_exec (cmd, sandbox_load, sandbox_free, sandbox);
     270  	p = pipeline_new_commands (cmd, (void *) 0);
     271  #else /* HAVE_LIBZ */
     272  	p = pipeline_new ();
     273  #endif /* HAVE_LIBZ */
     274  
     275  	pipeline_want_in (p, fd);
     276  	pipeline_want_out (p, -1);
     277  	return decompress_new_pipeline (p);
     278  }
     279  
     280  bool ATTRIBUTE_PURE decompress_is_pipeline (decompress *d)
     281  {
     282  	return d->tag == DECOMPRESS_PIPELINE;
     283  }
     284  
     285  pipeline * ATTRIBUTE_PURE decompress_get_pipeline (decompress *d)
     286  {
     287  	assert (d->tag == DECOMPRESS_PIPELINE);
     288  	return d->u.p;
     289  }
     290  
     291  const char * ATTRIBUTE_PURE decompress_inprocess_buf (decompress *d)
     292  {
     293  	assert (d->tag == DECOMPRESS_INPROCESS);
     294  	return d->u.inprocess.buf;
     295  }
     296  
     297  size_t ATTRIBUTE_PURE decompress_inprocess_len (decompress *d)
     298  {
     299  	assert (d->tag == DECOMPRESS_INPROCESS);
     300  	return d->u.inprocess.len;
     301  }
     302  
     303  void decompress_inprocess_replace (decompress *d, char *buf, size_t len)
     304  {
     305  	assert (d->tag == DECOMPRESS_INPROCESS);
     306  
     307  	free (d->u.inprocess.line_cache);
     308  	free (d->u.inprocess.buf);
     309  
     310  	d->u.inprocess.buf = buf;
     311  	d->u.inprocess.len = len;
     312  	d->u.inprocess.offset = 0;
     313  	d->u.inprocess.line_cache = NULL;
     314  }
     315  
     316  void decompress_start (decompress *d)
     317  {
     318  	if (d->tag == DECOMPRESS_PIPELINE)
     319  		pipeline_start (d->u.p);
     320  }
     321  
     322  const char *decompress_read (decompress *d, size_t *len)
     323  {
     324  	if (d->tag == DECOMPRESS_PIPELINE)
     325  		return pipeline_read (d->u.p, len);
     326  	else {
     327  		const char *ret;
     328  		assert (d->tag == DECOMPRESS_INPROCESS);
     329  		*len = MIN (*len, d->u.inprocess.len - d->u.inprocess.offset);
     330  		ret = d->u.inprocess.buf + d->u.inprocess.offset;
     331  		d->u.inprocess.offset += *len;
     332  		return ret;
     333  	}
     334  }
     335  
     336  const char *decompress_peek (decompress *d, size_t *len)
     337  {
     338  	if (d->tag == DECOMPRESS_PIPELINE)
     339  		return pipeline_peek (d->u.p, len);
     340  	else {
     341  		assert (d->tag == DECOMPRESS_INPROCESS);
     342  		*len = MIN (*len, d->u.inprocess.len - d->u.inprocess.offset);
     343  		return d->u.inprocess.buf + d->u.inprocess.offset;
     344  	}
     345  }
     346  
     347  void decompress_peek_skip (decompress *d, size_t len)
     348  {
     349  	if (d->tag == DECOMPRESS_PIPELINE)
     350  		pipeline_peek_skip (d->u.p, len);
     351  	else {
     352  		assert (d->tag == DECOMPRESS_INPROCESS);
     353  		assert (len <= d->u.inprocess.len - d->u.inprocess.offset);
     354  		d->u.inprocess.offset += len;
     355  	}
     356  }
     357  
     358  const char *decompress_readline (decompress *d)
     359  {
     360  	if (d->tag == DECOMPRESS_PIPELINE)
     361  		return pipeline_readline (d->u.p);
     362  	else {
     363  		const char *cur, *end;
     364  		assert (d->tag == DECOMPRESS_INPROCESS);
     365  		/* This isn't on the hot path (only called for a few lines
     366  		 * at the start of the file), so we can afford to
     367  		 * reallocate.
     368  		 */
     369  		if (d->u.inprocess.line_cache) {
     370  			free (d->u.inprocess.line_cache);
     371  			d->u.inprocess.line_cache = NULL;
     372  		}
     373  		cur = d->u.inprocess.buf + d->u.inprocess.offset;
     374  		end = memchr (cur, '\n',
     375  			      d->u.inprocess.len - d->u.inprocess.offset);
     376  		if (end) {
     377  			d->u.inprocess.line_cache = xstrndup
     378  				(cur, end - cur + 1);
     379  			d->u.inprocess.offset += end - cur + 1;
     380  			return d->u.inprocess.line_cache;
     381  		} else
     382  			return NULL;
     383  	}
     384  }
     385  
     386  const char *decompress_peekline (decompress *d)
     387  {
     388  	if (d->tag == DECOMPRESS_PIPELINE)
     389  		return pipeline_peekline (d->u.p);
     390  	else {
     391  		const char *cur, *end;
     392  		assert (d->tag == DECOMPRESS_INPROCESS);
     393  		/* This isn't on the hot path (only called for a few lines
     394  		 * at the start of the file), so we can afford to
     395  		 * reallocate.
     396  		 */
     397  		if (d->u.inprocess.line_cache) {
     398  			free (d->u.inprocess.line_cache);
     399  			d->u.inprocess.line_cache = NULL;
     400  		}
     401  		cur = d->u.inprocess.buf + d->u.inprocess.offset;
     402  		end = memchr (cur, '\n',
     403  			      d->u.inprocess.len - d->u.inprocess.offset);
     404  		if (end) {
     405  			d->u.inprocess.line_cache = xstrndup
     406  				(cur, end - cur + 1);
     407  			return d->u.inprocess.line_cache;
     408  		} else
     409  			return NULL;
     410  	}
     411  }
     412  
     413  int decompress_wait (decompress *d)
     414  {
     415  	if (d->tag == DECOMPRESS_PIPELINE)
     416  		return pipeline_wait (d->u.p);
     417  	else {
     418  		assert (d->tag == DECOMPRESS_INPROCESS);
     419  		return 0;
     420  	}
     421  }
     422  
     423  void decompress_free (decompress *d)
     424  {
     425  	if (!d)
     426  		return;
     427  	if (d->tag == DECOMPRESS_PIPELINE)
     428  		pipeline_free (d->u.p);
     429  	else {
     430  		assert (d->tag == DECOMPRESS_INPROCESS);
     431  		free (d->u.inprocess.line_cache);
     432  		free (d->u.inprocess.buf);
     433  	}
     434  	free (d);
     435  }