LCOV - code coverage report
Current view: top level - preproc/soelim - soelim.cpp (source / functions) Hit Total Coverage
Test: GNU roff Lines: 150 225 66.7 %
Date: 2026-01-16 17:51:41 Functions: 4 5 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Copyright (C) 1989-2025 Free Software Foundation, Inc.
       2             :      Written by James Clark (jjc@jclark.com)
       3             : 
       4             : This file is part of groff, the GNU roff typesetting system.
       5             : 
       6             : groff is free software; you can redistribute it and/or modify it under
       7             : the terms of the GNU General Public License as published by the Free
       8             : Software Foundation, either version 3 of the License, or
       9             : (at your option) any later version.
      10             : 
      11             : groff is distributed in the hope that it will be useful, but WITHOUT ANY
      12             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14             : for more details.
      15             : 
      16             : You should have received a copy of the GNU General Public License
      17             : along with this program.  If not, see <http://www.gnu.org/licenses/>. */
      18             : 
      19             : #ifdef HAVE_CONFIG_H
      20             : #include <config.h>
      21             : #endif
      22             : 
      23             : #include <assert.h>
      24             : #include <ctype.h>
      25             : #include <errno.h>
      26             : #include <stdlib.h>
      27             : 
      28             : #include <getopt.h> // getopt_long()
      29             : 
      30             : #include "lib.h"
      31             : 
      32             : #include "errarg.h"
      33             : #include "error.h"
      34             : #include "stringclass.h"
      35             : #include "nonposix.h"
      36             : #include "searchpath.h"
      37             : #include "lf.h"
      38             : 
      39             : // Initialize inclusion search path with only the current directory.
      40             : static search_path include_search_path(0 /* nullptr */, 0 /* nullptr */,
      41             :                                        0, 1);
      42             : 
      43             : bool want_att_compat = false;
      44             : bool want_raw_output = false;
      45             : bool want_tex_output = false;
      46             : 
      47             : extern "C" const char *Version_string;
      48             : 
      49             : // forward declaration
      50             : static bool do_file(const char *);
      51             : 
      52           0 : void usage(FILE *stream)
      53             : {
      54           0 :   fprintf(stream, "usage: %s [-Crt] [-I dir] [input-file ...]\n"
      55             :           "usage: %s {-v | --version}\n"
      56             :           "usage: %s --help\n",
      57             :           program_name, program_name, program_name);
      58           0 :   if (stdout == stream)
      59           0 :     fputs("\n"
      60             : "GNU soelim eliminates source requests in roff(7) and other text\n"
      61             : "files; it replaces lines of the form \".so included‐file\" within\n"
      62             : "each text input-file with the contents of included-file recursively,\n"
      63             : "flattening a tree of documents.  By default, it writes roff \"lf\"\n"
      64             : "requests as well to record the name and line number of each\n"
      65             : "input-file and included-file.  Use the -t option to produce TeX\n"
      66             : "comments instead of roff requests.  Use the -r option to write\n"
      67             : "neither.  See the soelim(1) manual page.\n",
      68             :           stream);
      69           0 : }
      70             : 
      71          77 : int main(int argc, char **argv)
      72             : {
      73          77 :   program_name = argv[0];
      74             :   int opt;
      75             :   static const struct option long_options[] = {
      76             :     { "help", no_argument, 0 /* nullptr */, CHAR_MAX + 1 },
      77             :     { "version", no_argument, 0 /* nullptr */, 'v' },
      78             :     { 0 /* nullptr */, 0, 0 /* nullptr */, 0 }
      79             :   };
      80         107 :   while ((opt = getopt_long(argc, argv, ":CI:rtv", long_options,
      81             :                             0 /* nullptr */))
      82         107 :          != EOF)
      83          30 :     switch (opt) {
      84           0 :     case 'v':
      85           0 :       printf("GNU soelim (groff) version %s\n", Version_string);
      86           0 :       exit(EXIT_SUCCESS);
      87             :       break;
      88           2 :     case 'C':
      89           2 :       want_att_compat = true;
      90           2 :       break;
      91          27 :     case 'I':
      92          27 :       include_search_path.command_line_dir(optarg);
      93          27 :       break;
      94           1 :     case 'r':
      95           1 :       want_raw_output = true;
      96           1 :       break;
      97           0 :     case 't':
      98           0 :       want_tex_output = true;
      99           0 :       break;
     100           0 :     case CHAR_MAX + 1: // --help
     101           0 :       usage(stdout);
     102           0 :       exit(EXIT_SUCCESS);
     103             :       break;
     104           0 :     case '?':
     105           0 :       if (optopt != 0)
     106           0 :         error("unrecognized command-line option '%1'", char(optopt));
     107             :       else
     108           0 :         error("unrecognized command-line option '%1'",
     109           0 :               argv[(optind - 1)]);
     110           0 :       usage(stderr);
     111           0 :       exit(2);
     112             :       break;
     113           0 :     case ':':
     114           0 :       error("command-line option '%1' requires an argument",
     115           0 :            char(optopt));
     116           0 :       usage(stderr);
     117           0 :       exit(2);
     118             :       break;
     119           0 :     default:
     120           0 :       assert(0 == "unhandled getopt_long return value");
     121             :     }
     122          77 :   int nbad = 0;
     123          77 :   if (optind >= argc)
     124          63 :     nbad += !do_file("-");
     125             :   else
     126          28 :     for (int i = optind; i < argc; i++)
     127          14 :       nbad += !do_file(argv[i]);
     128          77 :   if (ferror(stdout))
     129           0 :     fatal("error status on standard output stream");
     130          77 :   if (fflush(stdout) < 0)
     131           0 :     fatal("cannot flush standard output stream: %1", strerror(errno));
     132          77 :   return (nbad != 0);
     133             : }
     134             : 
     135          79 : void set_location()
     136             : {
     137          79 :   if (!want_raw_output) {
     138          76 :     if (!want_tex_output)
     139          76 :       printf(".lf %d %s%s\n", current_lineno,
     140          76 :         ('"' == current_filename[0]) ? "" : "\"", current_filename);
     141             :     else
     142             :       // XXX: Should we quote the file name?  What's TeX-conventional?
     143           0 :       printf("%% file %s, line %d\n", current_filename, current_lineno);
     144             :   }
     145          79 : }
     146             : 
     147           1 : void do_so(const char *line)
     148             : {
     149           1 :   const char *p = line;
     150           2 :   while (*p == ' ')
     151           1 :     p++;
     152           1 :   string filename;
     153           1 :   bool is_filename_valid = true;
     154           1 :   const char *q = p;
     155           1 :   if ('"' == *q)
     156           0 :     q++;
     157          14 :   for (; is_filename_valid && (*q != '\0') && (*q != '\n'); q++)
     158          13 :     if (*q == '\\') {
     159           0 :       switch (*++q) {
     160           0 :       case 'e':
     161             :       case '\\':
     162           0 :         filename += '\\';
     163           0 :         break;
     164           0 :       case ' ':
     165           0 :         warning("escaping a space is unnecessary and not compatible"
     166             :                 " with troff syntax");
     167           0 :         filename += ' ';
     168             :         // TODO: groff 1.24.0 release + 2 years?
     169             :         // is_filename_valid = false; // or fall through
     170           0 :         break;
     171           0 :       default:
     172           0 :         is_filename_valid = false;
     173           0 :         break;
     174             :       }
     175             :     }
     176             :     else
     177          13 :       filename += char(*q);
     178           1 :   if (is_filename_valid && (filename.length() > 0)) {
     179           1 :     filename += '\0';
     180           1 :     const char *fn = current_filename;
     181           1 :     int ln = current_lineno;
     182           1 :     current_lineno--;
     183           1 :     if (do_file(filename.contents())) {
     184           1 :       current_filename = fn;
     185           1 :       current_lineno = ln;
     186           1 :       set_location();
     187           1 :       return;
     188             :     }
     189           0 :     current_lineno++;
     190             :   }
     191           0 :   fputs(".so", stdout);
     192           0 :   fputs(line, stdout);
     193             : }
     194             : 
     195          78 : static bool do_file(const char *filename)
     196             : {
     197          78 :   char *file_name_in_path = 0 /* nullptr */;
     198          78 :   FILE *fp = include_search_path.open_file_cautiously(filename,
     199             :       &file_name_in_path);
     200          78 :   int err = errno;
     201         156 :   string whole_filename(filename);
     202          78 :   if (strcmp(filename, "-") && file_name_in_path != 0 /* nullptr */)
     203          15 :     whole_filename = file_name_in_path;
     204          78 :   whole_filename += '\0';
     205          78 :   free(file_name_in_path);
     206          78 :   if (0 /* nullptr */ == fp) {
     207           0 :     error("cannot open '%1': %2", whole_filename.contents(),
     208           0 :           strerror(err));
     209           0 :     return false;
     210             :   }
     211          78 :   normalize_file_name_for_lf_request(whole_filename);
     212          78 :   current_filename = whole_filename.contents();
     213          78 :   current_lineno = 1;
     214          78 :   set_location();
     215          78 :   enum { START, MIDDLE, HAD_DOT, HAD_s, HAD_so, HAD_l, HAD_lf } state
     216             :       = START;
     217             :   for (;;) {
     218      652074 :     int c = getc(fp);
     219      652074 :     if (c == EOF)
     220          78 :       break;
     221      651996 :     switch (state) {
     222       27448 :     case START:
     223       27448 :       if (c == '.')
     224       14266 :         state = HAD_DOT;
     225             :       else {
     226       13182 :         putchar(c);
     227       13182 :         if (c == '\n') {
     228        1587 :           current_lineno++;
     229        1587 :           state = START;
     230             :         }
     231             :         else
     232       11595 :           state = MIDDLE;
     233             :       }
     234       27448 :       break;
     235      605603 :     case MIDDLE:
     236      605603 :       putchar(c);
     237      605603 :       if (c == '\n') {
     238       23804 :         current_lineno++;
     239       23804 :         state = START;
     240             :       }
     241      605603 :       break;
     242       14266 :     case HAD_DOT:
     243       14266 :       if (c == 's')
     244        2917 :         state = HAD_s;
     245       11349 :       else if (c == 'l')
     246         881 :         state = HAD_l;
     247             :       else {
     248       10468 :         putchar('.');
     249       10468 :         putchar(c);
     250       10468 :         if (c == '\n') {
     251        1185 :           current_lineno++;
     252        1185 :           state = START;
     253             :         }
     254             :         else
     255        9283 :           state = MIDDLE;
     256             :       }
     257       14266 :       break;
     258        2917 :     case HAD_s:
     259        2917 :       if (c == 'o')
     260          12 :         state = HAD_so;
     261             :       else  {
     262        2905 :         putchar('.');
     263        2905 :         putchar('s');
     264        2905 :         putchar(c);
     265        2905 :         if (c == '\n') {
     266           0 :           current_lineno++;
     267           0 :           state = START;
     268             :         }
     269             :         else
     270        2905 :           state = MIDDLE;
     271             :       }
     272        2917 :       break;
     273          12 :     case HAD_so:
     274          12 :       if (c == ' ' || c == '\n' || want_att_compat) {
     275           1 :         string line;
     276          15 :         for (; c != EOF && c != '\n'; c = getc(fp))
     277          14 :           line += c;
     278           1 :         current_lineno++;
     279           1 :         line += '\n';
     280           1 :         line += '\0';
     281           1 :         do_so(line.contents());
     282           1 :         state = START;
     283             :       }
     284             :       else {
     285          11 :         fputs(".so", stdout);
     286          11 :         putchar(c);
     287          11 :         state = MIDDLE;
     288             :       }
     289          12 :       break;
     290         881 :     case HAD_l:
     291         881 :       if (c == 'f')
     292         869 :         state = HAD_lf;
     293             :       else {
     294          12 :         putchar('.');
     295          12 :         putchar('l');
     296          12 :         putchar(c);
     297          12 :         if (c == '\n') {
     298           0 :           current_lineno++;
     299           0 :           state = START;
     300             :         }
     301             :         else
     302          12 :           state = MIDDLE;
     303             :       }
     304         881 :       break;
     305         869 :     case HAD_lf:
     306         869 :       if (c == ' ' || c == '\n' || want_att_compat) {
     307         869 :         string line;
     308        6201 :         for (; c != EOF && c != '\n'; c = getc(fp))
     309        5332 :           line += c;
     310         869 :         current_lineno++;
     311         869 :         line += '\n';
     312         869 :         line += '\0';
     313         869 :         interpret_lf_request_arguments(line.contents());
     314         869 :         printf(".lf%s", line.contents());
     315         869 :         state = START;
     316             :       }
     317             :       else {
     318           0 :         fputs(".lf", stdout);
     319           0 :         putchar(c);
     320           0 :         state = MIDDLE;
     321             :       }
     322         869 :       break;
     323           0 :     default:
     324           0 :       assert(0 == "unhandled state in file parser");
     325             :     }
     326      651996 :   }
     327          78 :   switch (state) {
     328           0 :   case HAD_DOT:
     329           0 :     fputs(".\n", stdout);
     330           0 :     break;
     331           0 :   case HAD_l:
     332           0 :     fputs(".l\n", stdout);
     333           0 :     break;
     334           0 :   case HAD_s:
     335           0 :     fputs(".s\n", stdout);
     336           0 :     break;
     337           0 :   case HAD_lf:
     338           0 :     fputs(".lf\n", stdout);
     339           0 :     break;
     340           0 :   case HAD_so:
     341           0 :     fputs(".so\n", stdout);
     342           0 :     break;
     343           2 :   case MIDDLE:
     344           2 :     putc('\n', stdout);
     345           2 :     break;
     346          76 :   case START:
     347          76 :     break;
     348             :   }
     349          78 :   if (fp != stdin)
     350          15 :     if (fclose(fp) < 0)
     351           0 :       fatal("cannot close '%1': %2", whole_filename.contents(),
     352           0 :             strerror(errno));
     353          78 :   current_filename = 0 /* nullptr */;
     354          78 :   return true;
     355             : }
     356             : 
     357             : // Local Variables:
     358             : // fill-column: 72
     359             : // mode: C++
     360             : // End:
     361             : // vim: set cindent noexpandtab shiftwidth=2 textwidth=72:

Generated by: LCOV version 1.14