%top{
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif /* HAVE_CONFIG_H */
#include "manconfig.h"
/* Flex emits several functions which might reasonably have various
* attributes applied and many unused macros; none of these are our problem.
*/
#if GNUC_PREREQ(8,0)
# pragma GCC diagnostic ignored "-Wsuggest-attribute=malloc"
#endif
#pragma GCC diagnostic ignored "-Wsuggest-attribute=pure"
#pragma GCC diagnostic ignored "-Wunused-macros"
}
%{
/*
* lexgrog.l: extract 'whatis' info from nroff man / formatted cat pages.
*
* Copyright (C) 1994, 1995 Graeme W. Wilford. (Wilf.)
* Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
* 2011, 2012 Colin Watson.
*
* This file is part of man-db.
*
* man-db is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* man-db is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with man-db; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Wed Oct 12 18:46:11 BST 1994 Wilf. (G.Wilford@ee.surrey.ac.uk)
*
* CJW: Detect grap and vgrind. Understand fill requests. Other improvements
* in the syntax accepted.
*/
#include <sys/stat.h>
#include <errno.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "error.h"
#include "xalloc.h"
#include "gettext.h"
#define _(String) gettext (String)
#include "encodings.h"
#include "pipeline.h"
#include "sandbox.h"
#include "security.h"
#include "util.h"
#include "decompress.h"
#include "lexgrog.h"
#include "manconv.h"
#include "manconv_client.h"
#define YY_READ_BUF_SIZE 1024
#define MAX_NAME 8192
/* defines the ordered list of filters detected by lexgrog */
enum {
TBL_FILTER = 0, /* tbl */
EQN_FILTER, /* eqn */
PIC_FILTER, /* pic */
GRAP_FILTER, /* grap */
REF_FILTER, /* refer */
VGRIND_FILTER, /* vgrind */
MAX_FILTERS /* delimiter */
};
#define ARRAY_SIZE(array) (sizeof (array) / sizeof ((array)[0]))
extern man_sandbox *sandbox;
struct macro {
const char *name;
const char *value;
};
static const struct macro glyphs[] = {
/* It is vital to keep these in strcmp order (sort -t\" -k2)! They
* will be searched using bsearch.
* Data from groff_char(7), although I have omitted some that are
* particularly unlikely to be used in NAME sections.
*/
{ "'A", "Á" },
{ "'C", "Ć" },
{ "'E", "É" },
{ "'I", "Í" },
{ "'O", "Ó" },
{ "'U", "Ú" },
{ "'Y", "Ý" },
{ "'a", "á" },
{ "'c", "ć" },
{ "'e", "é" },
{ "'i", "í" },
{ "'o", "ó" },
{ "'u", "ú" },
{ "'y", "ý" },
{ ",C", "Ç" },
{ ",c", "ç" },
{ "-D", "Ð" },
{ ".i", "ı" },
{ "/L", "Ł" },
{ "/O", "Ø" },
{ "/l", "ł" },
{ "/o", "ø" },
{ ":A", "Ä" },
{ ":E", "Ë" },
{ ":I", "Ï" },
{ ":O", "Ö" },
{ ":U", "Ü" },
{ ":Y", "Ÿ" },
{ ":a", "ä" },
{ ":e", "ë" },
{ ":i", "ï" },
{ ":o", "ö" },
{ ":u", "ü" },
{ ":y", "ÿ" },
{ "AE", "Æ" },
{ "Bq", "„" },
{ "Fc", "»" },
{ "Fi", "ffi" },
{ "Fl", "ffl" },
{ "Fo", "«" },
{ "IJ", "IJ" },
{ "OE", "Œ" },
{ "Sd", "ð" },
{ "TP", "Þ" },
{ "Tp", "þ" },
{ "^A", "Â" },
{ "^E", "Ê" },
{ "^I", "Î" },
{ "^O", "Ô" },
{ "^U", "Û" },
{ "^a", "â" },
{ "^e", "ê" },
{ "^i", "î" },
{ "^o", "ô" },
{ "^u", "û" },
{ "`A", "À" },
{ "`E", "È" },
{ "`I", "Ì" },
{ "`O", "Ò" },
{ "`U", "Ù" },
{ "`a", "à" },
{ "`e", "è" },
{ "`i", "ì" },
{ "`o", "ò" },
{ "`u", "ù" },
{ "a\"", "˝" },
{ "a-", "¯" },
{ "a.", "˙" },
{ "a^", "^" },
{ "aa", "´" },
{ "ab", "˘" },
{ "ac", "¸" },
{ "ad", "¨" },
{ "ae", "æ" },
{ "ah", "ˇ" },
{ "ao", "˚" },
{ "aq", "'" },
{ "a~", "~" },
{ "bq", "‚" },
{ "cq", "’" },
{ "dq", "\"" },
{ "em", "—" },
{ "en", "–" },
{ "fc", "›" },
{ "ff", "ff" },
{ "fi", "fi" },
{ "fl", "fl" },
{ "fo", "‹" },
{ "ga", "`" },
{ "ha", "^" },
{ "ho", "˛" },
{ "hy", "‐" },
{ "ij", "ij" },
{ "lq", "“" },
{ "oA", "Å" },
{ "oa", "å" },
{ "oe", "œ" },
{ "oq", "‘" },
{ "r!", "¡" },
{ "r?", "¿" },
{ "rq", "”" },
{ "ss", "ß" },
{ "ti", "~" },
{ "vS", "Š" },
{ "vZ", "Ž" },
{ "vs", "š" },
{ "vz", "ž" },
{ "~A", "Ã" },
{ "~N", "Ñ" },
{ "~O", "Õ" },
{ "~a", "ã" },
{ "~n", "ñ" },
{ "~o", "õ" }
};
static const struct macro perldocs[] = {
/* It is vital to keep these in strcmp order (sort -t\" -k2)! They
* will be searched using bsearch.
* Data from Pod/Man.pm.
*/
{ "--", "-" },
{ "Aq", "'" },
{ "C'", "'" },
{ "C+", "C++" },
{ "C`", "`" },
{ "L\"", "\"" },
{ "PI", "π" },
{ "R\"", "\"" }
};
static void add_str_to_whatis (const char *string, size_t length);
static void add_char_to_whatis (unsigned char c);
static void add_separator_to_whatis (void);
static void add_wordn_to_whatis (const char *string, size_t length);
static void add_word_to_whatis (const char *string);
static void add_glyph_to_whatis (const char *string, size_t length);
static void add_perldoc_to_whatis (const char *string, size_t length);
static void mdoc_text (const char *string);
static void newline_found (void);
static char newname[MAX_NAME];
static char *p_name;
static const char *fname;
static char filters[MAX_FILTERS];
static bool fill_mode;
static bool waiting_for_quote;
static decompress *decomp;
#define YY_INPUT(buf,result,max_size) { \
size_t size = max_size; \
const char *block = decompress_read (decomp, &size); \
if (block && size != 0) { \
memcpy (buf, block, size); \
buf[size] = '\0'; \
result = size; \
} else \
result = YY_NULL; \
}
#define YY_NO_INPUT
%}
%option ecs meta-ecs
%option 8bit batch caseful never-interactive
%option nostdinit
%option warn
%option noyywrap nounput
%x MAN_PRENAME
%x MAN_NAME
%x MAN_DESC
%x MAN_DESC_AT
%x MAN_DESC_BSX
%x MAN_DESC_BX
%x MAN_DESC_BX_RELEASE
%x MAN_DESC_DQ
%x MAN_DESC_FX
%x MAN_DESC_NX
%x MAN_DESC_OX
%x CAT_NAME
%x CAT_FILE
%x MAN_FILE
%x CAT_REST
%x MAN_REST
%x FORCE_EXIT
digit [[:digit:]]
upper [[:upper:]]
alpha [[:alpha:]]
blank [[:blank:]]
blank_eol [[:blank:]\r\n]
word [[:alnum:]][^[:blank:]\r\n]*
eol \r?\n
bol {eol}+
next {eol}*
empty {eol}{blank}*
indent {eol}{blank}+
dbl_quote \"
font_change \\f([[:upper:]1-4]|\({upper}{2})
size_change \\s[+-]?{digit}
style_change ({font_change}{size_change}?|{size_change}{font_change}?)
typeface \.(B[IR]?|I[BR]?|R[BI]|S[BM])
sec_request \.[Ss][HhYySs]
comment ['.]\\{dbl_quote}
/* Please add to this list if you know how. */
/* Note that, since flex only supports UTF-8 by accident, character classes
* including non-ASCII characters must be written out as (a|b|c|d) rather
* than [abcd].
*/
ar_name (اﻹسم|الإسم)
/* ИМЕ also works for mk */
bg_name И(М|м)(Е|е)
cs_name (J[Mm](É|é|\\\('[Ee]|E|e)[Nn][Oo]|N(Á|á)[Zz][Ee][Vv])
da_name N[Aa][Vv][Nn]
de_name B[Ee][Zz][Ee][Ii][Cc][Hh][Nn][Uu][Nn][Gg]
en_name N[Aa][Mm][Ee]
eo_name N[Oo][Mm][Oo]
es_name N[Oo][Mm][Bb][Rr][Ee]
fa_name نام
fi_name N[Ii][Mm][Ii]
fr_name N[Oo][Mm]
hu_name N(É|é|\\\('[Ee]|E|e)[Vv]
id_name N[Aa][Mm][Aa]
/* NOME also works for gl, pt */
it_name N[Oo][Mm][Ee]
ja_name (名|̾)(前|称)
ko_name (이름|명칭)
latin_name N[Oo][Mm][Ee][Nn]
lt_name P[Aa][Vv][Aa][Dd][Ii][Nn][Ii][Mm][Aa][Ss]
nl_name N[Aa][Aa][Mm]
pl_name N[Aa][Zz][Ww][Aa]
ro_name N[Uu][Mm][Ee]
ru_name (И(М|м)(Я|я)|Н(А|а)(З|з)(В|в)(А|а)(Н|н)(И|и)(Е|е)|Н(А|а)(И|и)(М|м)(Е|е)(Н|н)(О|о)(В|в)(А|а)(Н|н)(И|и)(Е|е))
sk_name M[Ee][Nn][Oo]
sr_name (И(М|м)(Е|е)|Н(А|а)(З|з)(И|и)(В|в))
srlatin_name (I[Mm][Ee]|N[Aa][Zz][Ii][Vv])
sv_name N[Aa][Mm][Nn]
ta_name பெய
tr_name (A[Dd]|(İ|i)S(İ|i)M)
uk_name Н(А|а)(З|з)(В|в)(А|а)
vi_name T(Ê|ê)[Nn]
zh_CN_name 名{blank}?(称|字){blank}?.*
zh_TW_name (名{blank}?(稱|字)|命令名){blank}?.*
name ({ar_name}|{bg_name}|{cs_name}|{da_name}|{de_name}|{en_name}|{eo_name}|{es_name}|{fa_name}|{fi_name}|{fr_name}|{hu_name}|{id_name}|{it_name}|{ja_name}|{ko_name}|{latin_name}|{lt_name}|{nl_name}|{pl_name}|{ro_name}|{ru_name}|{sk_name}|{sr_name}|{srlatin_name}|{sv_name}|{ta_name}|{tr_name}|{uk_name}|{vi_name}|{zh_CN_name}|{zh_TW_name})
name_sec {dbl_quote}?{style_change}?{name}{style_change}?({blank}*{dbl_quote})?
/* eptgrv : eqn, pic, tbl, grap, refer, vgrind */
tbl_request \.TS
eqn_request \.EQ
pic_request \.PS
grap_request \.G1
ref1_request \.R1
ref2_request \.\[
vgrind_request \.vS
%%
/* begin NAME section processing */
<MAN_FILE>{sec_request}{blank_eol}+{name_sec}{blank}* BEGIN (MAN_PRENAME);
<CAT_FILE>{empty}{2,}{name}{blank}*{indent} BEGIN (CAT_NAME);
/* general text matching */
<MAN_FILE>{
\.[^Ss\r\n].* |
\..{0,3}{dbl_quote}?.{0,4}{dbl_quote}? |
{comment}.* |
.|{eol}
}
<CAT_FILE>{
.{1,9} |
[ ]* |
{eol}{2,} |
.|{eol}
}
<MAN_REST>{
{bol}{tbl_request} filters[TBL_FILTER] = 't';
{bol}{eqn_request} filters[EQN_FILTER] = 'e';
{bol}{pic_request} filters[PIC_FILTER] = 'p';
{bol}{grap_request} filters[GRAP_FILTER] = 'g';
{bol}{ref1_request} |
{bol}{ref2_request} filters[REF_FILTER] = 'r';
{bol}{vgrind_request} filters[VGRIND_FILTER] = 'v';
}
<MAN_REST><<EOF>> { /* exit */
*p_name = '\0'; /* terminate the string */
yyterminate ();
}
<MAN_REST>.+|{eol}
/* rules to end NAME section processing */
<FORCE_EXIT>.|{eol} { /* forced exit */
*p_name = '\0'; /* terminate the string */
yyterminate ();
}
<MAN_PRENAME>{bol}{sec_request}{blank}* |
<MAN_PRENAME><<EOF>> { /* no NAME at all */
*p_name = '\0';
BEGIN (MAN_REST);
}
/* need to match whole string so that we beat the following roff catch-all,
so use yyless to push back the name */
<MAN_PRENAME>{
{bol}{typeface}{blank}.* |
{bol}\.Tn{blank}.* |
{bol}\.ft{blank}.* |
{bol}\.V[be]{blank}.* |
{bol}\.IX{blank}.* |
{bol}\.Nm{blank}.* {
yyless (0);
BEGIN (MAN_NAME);
}
}
/* Skip over initial roff requests in NAME section. The use of yyless here
is evil. */
<MAN_PRENAME>{bol}['.].*
<MAN_PRENAME>{empty}{eol} yyless (1);
<MAN_PRENAME>.|{eol} {
yyless (0);
BEGIN (MAN_NAME);
}
<MAN_NAME,MAN_DESC>{
{bol}{sec_request}{blank}* | /* Another section */
{bol}\.X{upper}{blank}+ | /* special - hpux */
{bol}\.sp{blank}* | /* vertical spacing */
{bol}\.ig{blank}* | /* block comment */
{bol}\.de[1i]?{blank}* | /* macro definition */
{bol}\.i[ef]{blank}* | /* conditional */
{empty}{bol}.+ |
<<EOF>> { /* terminate the string */
*p_name = '\0';
BEGIN (MAN_REST);
}
}
<CAT_NAME>{
{bol}S[yYeE] |
{eol}{2,}.+ |
{next}__ { /* terminate the string */
*p_name = '\0';
BEGIN (CAT_REST);
yyterminate ();
}
}
/* ROFF request removal */
<MAN_NAME,MAN_DESC>{
/* some include quoting; dealing with this is unpleasant */
{bol}{typeface}{blank}+\" {
newline_found ();
waiting_for_quote = true;
}
{bol}{typeface}{blank}+ | /* type face commands */
{bol}\.Tn{blank}+ | /* mdoc trade name */
{bol}\.ft{blank}.* | /* font change */
{bol}\.V[be]{blank}.* | /* pod2man, verbatim mode */
{bol}\.IX{blank}.* | /* .IX line */
{bol}\.Nm{blank}+ | /* mdoc name */
{bol}\.PD{blank}* | /* paragraph spacing */
{bol}\\& | /* non-breaking space */
{next}{comment}.* { /* per line comments */
newline_found ();
}
}
/* No-op requests */
<MAN_NAME,MAN_DESC>{
{bol}\.{blank}*$ newline_found ();
{bol}\.\.$ newline_found ();
}
/* Toggle fill mode */
<MAN_NAME,MAN_DESC>{
{bol}\.nf.* fill_mode = false;
{bol}\.fi.* fill_mode = true;
}
<CAT_NAME>-{eol}{blank_eol}* /* strip continuations */
/* convert to DASH */
<MAN_NAME>{
{next}{blank}*\\\((mi|hy|em|en){blank}* |
{next}{blank}*\\\[(mi|hy|em|en)\]{blank}* |
{next}{blank_eol}+[-\\]-{blank}* |
{next}{blank_eol}*[-\\]-{blank}+ |
{bol}\.Nd{blank}* {
add_separator_to_whatis ();
BEGIN (MAN_DESC);
}
}
<CAT_NAME>{next}{blank}+-{1,2}{blank_eol}+ add_separator_to_whatis ();
/* escape sequences and special characters */
<MAN_NAME,MAN_DESC>{
{next}\\[\\e] add_char_to_whatis ('\\');
{next}\\('|\(aa) add_char_to_whatis ('\'');
{next}\\(`|\(ga) add_char_to_whatis ('`');
{next}\\(-|\((mi|hy|em|en)) add_char_to_whatis ('-');
{next}\\\[(mi|hy|em|en)\] add_char_to_whatis ('-');
{next}\\\. add_char_to_whatis ('.');
{next}((\\[ 0t~])|[ ]|\t)* add_char_to_whatis (' ');
{next}\\\((ru|ul) add_char_to_whatis ('_');
{next}\\\\t add_char_to_whatis ('\t');
{next}\\[|^&!%acdpruz{}\r\n] /* various useless control chars */
{next}\\[bhlLvx]{blank}*'[^']+' /* various inline functions */
{next}\\\$[1-9] /* interpolate arg */
/* roff named glyphs */
{next}\\\(..|\\\[..\] add_glyph_to_whatis (yytext + 2, 2);
/* perldoc strings */
{next}\\\*\(..|\\\*\[..\] add_perldoc_to_whatis (yytext + 3, 2);
{next}\\\*. add_perldoc_to_whatis (yytext + 2, 1);
{next}\\["#].* /* comment */
{next}{font_change} /* font changes */
{next}\\k{alpha} /* mark input place in register */
{next}\\n(\({alpha})?{alpha} /* interpolate number register */
{next}\\o\"[^"]+\" /* overstrike chars */
{next}{size_change} /* size changes */
{next}\\w{blank}*'[^']+'[^ \t]* /* width of string */
{next}\\ /* catch all */
{next}\(\\\|\){blank}* /* function() in hpux */
}
/* some people rather ambitiously use non-trivial mdoc macros in NAME
sections; cope with those that have been seen in the wild, and a few
more */
<MAN_DESC>{
{bol}\.At{blank}* BEGIN (MAN_DESC_AT);
{bol}\.Bsx{blank}* BEGIN (MAN_DESC_BSX);
{bol}\.Bx{blank}* BEGIN (MAN_DESC_BX);
{bol}\.Fx{blank}* BEGIN (MAN_DESC_FX);
{bol}\.Nx{blank}* BEGIN (MAN_DESC_NX);
{bol}\.Ox{blank}* BEGIN (MAN_DESC_OX);
{bol}\.Ux{blank}* add_word_to_whatis ("UNIX");
{bol}\.Dq{blank}* {
add_word_to_whatis ("\"");
BEGIN (MAN_DESC_DQ);
}
}
<MAN_DESC_AT>{
32v{blank}* mdoc_text ("Version 32V AT&T UNIX");
v1{blank}* mdoc_text ("Version 1 AT&T UNIX");
v2{blank}* mdoc_text ("Version 2 AT&T UNIX");
v3{blank}* mdoc_text ("Version 3 AT&T UNIX");
v4{blank}* mdoc_text ("Version 4 AT&T UNIX");
v5{blank}* mdoc_text ("Version 5 AT&T UNIX");
v6{blank}* mdoc_text ("Version 6 AT&T UNIX");
v7{blank}* mdoc_text ("Version 7 AT&T UNIX");
V{blank}* mdoc_text ("AT&T System V UNIX");
V.1{blank}* mdoc_text ("AT&T System V.1 UNIX");
V.2{blank}* mdoc_text ("AT&T System V.2 UNIX");
V.3{blank}* mdoc_text ("AT&T System V.3 UNIX");
V.4{blank}* mdoc_text ("AT&T System V.4 UNIX");
.|{eol} {
yyless (0);
mdoc_text ("AT&T UNIX");
}
}
<MAN_DESC_BSX>{
{word} {
add_word_to_whatis ("BSD/OS");
add_wordn_to_whatis (yytext, yyleng);
BEGIN (MAN_DESC);
}
.|{eol} {
yyless (0);
mdoc_text ("BSD/OS");
}
}
<MAN_DESC_BX>{
-alpha{blank}* mdoc_text ("BSD (currently in alpha test)");
-beta{blank}* mdoc_text ("BSD (currently in beta test)");
-devel{blank}* mdoc_text ("BSD (currently under development");
{word}{blank}* {
add_wordn_to_whatis (yytext, yyleng);
add_str_to_whatis ("BSD", 3);
BEGIN (MAN_DESC_BX_RELEASE);
}
.|{eol} {
yyless (0);
mdoc_text ("BSD");
}
}
<MAN_DESC_BX_RELEASE>{
[Rr]eno{blank}* {
add_str_to_whatis ("-Reno", 5);
BEGIN (MAN_DESC);
}
[Tt]ahoe{blank}* {
add_str_to_whatis ("-Tahoe", 6);
BEGIN (MAN_DESC);
}
[Ll]ite{blank}* {
add_str_to_whatis ("-Lite", 5);
BEGIN (MAN_DESC);
}
[Ll]ite2{blank}* {
add_str_to_whatis ("-Lite2", 6);
BEGIN (MAN_DESC);
}
.|{eol} {
yyless (0);
BEGIN (MAN_DESC);
}
}
<MAN_DESC_DQ>.* {
add_str_to_whatis (yytext, yyleng);
add_char_to_whatis ('"');
BEGIN (MAN_DESC);
}
<MAN_DESC_FX>{
{word} {
add_word_to_whatis ("FreeBSD");
add_wordn_to_whatis (yytext, yyleng);
BEGIN (MAN_DESC);
}
.|{eol} {
yyless (0);
mdoc_text ("FreeBSD");
}
}
<MAN_DESC_NX>{
{word} {
add_word_to_whatis ("NetBSD");
add_wordn_to_whatis (yytext, yyleng);
BEGIN (MAN_DESC);
}
.|{eol} {
yyless (0);
mdoc_text ("NetBSD");
}
}
<MAN_DESC_OX>{
{word} {
add_word_to_whatis ("OpenBSD");
add_wordn_to_whatis (yytext, yyleng);
BEGIN (MAN_DESC);
}
.|{eol} {
yyless (0);
mdoc_text ("OpenBSD");
}
}
/* collapse spaces, escaped spaces, tabs, newlines to a single space */
<CAT_NAME>{next}((\\[ ])|{blank})* add_char_to_whatis (' ');
/* a ROFF break request, a paragraph request, or an indentation change
usually means we have multiple whatis definitions, provide a separator
for later processing */
<MAN_NAME,MAN_DESC>{
{bol}\.br{blank}* |
{bol}\.LP{blank}* |
{bol}\.PP{blank}* |
{bol}\.P{blank}* |
{bol}\.IP{blank}.* |
{bol}\.HP{blank}.* |
{bol}\.RS{blank}.* |
{bol}\.RE{blank}.* {
add_char_to_whatis ((char) 0x11);
BEGIN (MAN_NAME);
}
}
/* any other roff request we don't recognise terminates definitions */
<MAN_NAME,MAN_DESC>{bol}['.] {
*p_name = '\0';
BEGIN (MAN_REST);
}
/* pass words as a chunk. speed optimization */
<MAN_NAME,MAN_DESC>[[:alnum:]]* add_str_to_whatis (yytext, yyleng);
/* normalise the comma (,) separators */
<CAT_NAME>{blank}*,[ \t\r\n]* |
<MAN_NAME,MAN_DESC>{blank}*,{blank}* add_str_to_whatis (", ", 2);
<CAT_NAME,MAN_NAME,MAN_DESC>{bol}. {
newline_found ();
add_char_to_whatis (yytext[yyleng - 1]);
}
<CAT_NAME,MAN_NAME,MAN_DESC>. add_char_to_whatis (*yytext);
/* default EOF rule */
<<EOF>> return 1;
%%
/* print warning and force scanner to terminate */
static void too_big (void)
{
/* Even though MAX_NAME is a macro expanding to a constant, we
* translate it using ngettext anyway because that will make it
* easier to change the macro later.
*/
error (0, 0,
ngettext ("warning: whatis for %s exceeds %d byte, "
"truncating.",
"warning: whatis for %s exceeds %d bytes, "
"truncating.", MAX_NAME),
fname, MAX_NAME);
BEGIN (FORCE_EXIT);
}
/* append a string to newname if enough room */
static void add_str_to_whatis (const char *string, size_t length)
{
if (p_name - newname + length >= MAX_NAME)
too_big ();
else {
(void) strncpy (p_name, string, length);
p_name += length;
}
}
/* append a char to newname if enough room */
static void add_char_to_whatis (unsigned char c)
{
if (p_name - newname + 1 >= MAX_NAME)
too_big ();
else if (waiting_for_quote && c == '"')
waiting_for_quote = false;
else
*p_name++ = c;
}
/* append the " - " separator to newname, trimming the first space if one's
* already there
*/
static void add_separator_to_whatis (void)
{
if (p_name != newname && *(p_name - 1) != ' ')
add_char_to_whatis (' ');
add_str_to_whatis ("- ", 2);
}
/* append a word to newname if enough room, ensuring only necessary
surrounding space */
static void add_wordn_to_whatis (const char *string, size_t length)
{
if (p_name != newname && *(p_name - 1) != ' ')
add_char_to_whatis (' ');
while (length && string[length - 1] == ' ')
--length;
if (length)
add_str_to_whatis (string, length);
}
static void add_word_to_whatis (const char *string)
{
add_wordn_to_whatis (string, strlen (string));
}
struct compare_macro_key {
const char *string;
size_t length;
};
static int compare_macro (const void *left, const void *right)
{
const struct compare_macro_key *key = left;
const struct macro *value = right;
int cmp;
cmp = strncmp (key->string, value->name, key->length);
if (cmp)
return cmp;
/* equal up to key->length, so value->name must be at least size
* key->length + 1
*/
else if (value->name[key->length])
return -1;
else
return 0;
}
static void add_macro_to_whatis (const struct macro *macros, size_t n_macros,
const char *string, size_t length)
{
struct compare_macro_key key;
const struct macro *macro;
key.string = string;
key.length = length;
macro = bsearch (&key, macros, n_macros, sizeof (struct macro),
compare_macro);
if (macro)
add_str_to_whatis (macro->value, strlen (macro->value));
}
static void add_glyph_to_whatis (const char *string, size_t length)
{
add_macro_to_whatis (glyphs, ARRAY_SIZE (glyphs), string, length);
}
static void add_perldoc_to_whatis (const char *string, size_t length)
{
add_macro_to_whatis (perldocs, ARRAY_SIZE (perldocs), string, length);
}
static void mdoc_text (const char *string)
{
add_word_to_whatis (string);
BEGIN (MAN_DESC);
}
static void newline_found (void)
{
/* If we are mid p_name and the last added char was not a space,
* best add one.
*/
if (p_name != newname && *(p_name - 1) != ' ') {
if (fill_mode)
add_char_to_whatis (' ');
else {
add_char_to_whatis ((char) 0x11);
BEGIN (MAN_NAME);
}
}
waiting_for_quote = false;
}
int find_name (const char *file, const char *filename, lexgrog *p_lg,
const char *encoding)
{
int ret = 0;
decompress *d;
char *page_encoding = NULL;
bool run_col = p_lg->type == CATPAGE && *PROG_COL != '\0';
if (strcmp (file, "-") == 0) {
d = decompress_fdopen (dup (STDIN_FILENO));
} else {
struct stat st;
int decompress_flags;
char *lang;
if (stat (file, &st)) {
error (0, errno, "%s", file);
return 0;
}
if (S_ISDIR (st.st_mode)) {
error (0, EISDIR, "%s", file);
return 0;
}
drop_effective_privs ();
decompress_flags = 0;
/* If we're looking at a cat page, then we need to run col
* over it, which doesn't work conveniently with an
* in-process decompressor.
*/
if (!run_col)
decompress_flags |= DECOMPRESS_ALLOW_INPROCESS;
d = decompress_open (file, decompress_flags);
if (!d) {
error (0, errno, _("can't open %s"), file);
regain_effective_privs ();
return 0;
}
regain_effective_privs ();
if (!encoding) {
lang = lang_dir (file);
page_encoding = get_page_encoding (lang);
free (lang);
}
}
if (!page_encoding && encoding)
page_encoding = xstrdup (encoding);
if (page_encoding) {
if (decompress_is_pipeline (d))
add_manconv (decompress_get_pipeline (d),
page_encoding, "UTF-8");
else if (manconv_inprocess (d, page_encoding, "UTF-8") != 0)
/* manconv should already have written to stderr, so
* just return zero (i.e. no result).
*/
goto out;
}
if (run_col) {
pipecmd *col_cmd;
col_cmd = pipecmd_new_args
(PROG_COL, "-b", "-p", "-x", (void *) 0);
pipecmd_pre_exec (col_cmd, sandbox_load, sandbox_free,
sandbox);
pipeline_command (decompress_get_pipeline (d), col_cmd);
}
decompress_start (d);
ret = find_name_decompressed (d, filename, p_lg);
out:
free (page_encoding);
decompress_free (d);
return ret;
}
int find_name_decompressed (decompress *d, const char *filename, lexgrog *p_lg)
{
int ret;
decomp = d;
fname = filename;
*(p_name = newname) = '\0';
memset (filters, '_', sizeof (filters));
fill_mode = true;
waiting_for_quote = false;
if (p_lg->type == CATPAGE)
BEGIN (CAT_FILE);
else
BEGIN (MAN_FILE);
drop_effective_privs ();
yyrestart (NULL);
ret = yylex ();
regain_effective_privs ();
decompress_wait (decomp);
if (ret)
return 0;
else {
char f_tmp[MAX_FILTERS];
int j, k;
/* wipe out any leading or trailing spaces */
if (*newname) {
for (p_name = strchr (newname, '\0');
*(p_name - 1) == ' ';
p_name--);
if (*p_name == ' ')
*p_name = '\0';
}
for (p_name = newname; *p_name == ' '; p_name++);
p_lg->whatis = xstrdup (p_name);
memset (f_tmp, '\0', MAX_FILTERS);
f_tmp[0] = '-';
for (j = k = 0; j < MAX_FILTERS; j++)
if (filters[j] != '_')
f_tmp[k++] = filters[j];
p_lg->filters = xstrdup (f_tmp);
return p_name[0];
}
}