LCOV - code coverage report
Current view: top level - roff/troff - input.cpp (source / functions) Hit Total Coverage
Test: GNU roff Lines: 4930 6863 71.8 %
Date: 2026-01-16 17:51:41 Functions: 396 471 84.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Copyright 1989-2024 Free Software Foundation, Inc.
       2             :              2021-2025 G. Branden Robinson
       3             : 
       4             :      Written by James Clark (jjc@jclark.com)
       5             : 
       6             : This file is part of groff, the GNU roff typesetting system.
       7             : 
       8             : groff is free software; you can redistribute it and/or modify it under
       9             : the terms of the GNU General Public License as published by the Free
      10             : Software Foundation, either version 3 of the License, or
      11             : (at your option) any later version.
      12             : 
      13             : groff is distributed in the hope that it will be useful, but WITHOUT ANY
      14             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16             : for more details.
      17             : 
      18             : You should have received a copy of the GNU General Public License
      19             : along with this program.  If not, see <http://www.gnu.org/licenses/>. */
      20             : 
      21             : #ifdef HAVE_CONFIG_H
      22             : #include <config.h>
      23             : #endif
      24             : 
      25             : #include <assert.h>
      26             : #include <errno.h> // ENOENT, errno
      27             : #include <locale.h> // setlocale()
      28             : #include <stdcountof.h>
      29             : #include <stdio.h> // EOF, FILE, clearerr(), fclose(), fflush(),
      30             :                    // fileno(), fopen(), fprintf(), fseek(), getc(),
      31             :                    // pclose(), popen(), printf(), SEEK_SET, snprintf(),
      32             :                    // sprintf(), setbuf(), stderr, stdin, stdout,
      33             :                    // ungetc()
      34             : #include <stdlib.h> // atoi(), exit(), EXIT_FAILURE, EXIT_SUCCESS,
      35             :                     // free(), getenv(), putenv(), strtol(), system()
      36             : #include <string.h> // strcpy(), strdup(), strerror()
      37             : 
      38             : #include <getopt.h> // getopt_long()
      39             : 
      40             : #include <stack>
      41             : 
      42             : #include "json-encode.h" // json_encode_char()
      43             : 
      44             : #include "troff.h"
      45             : #include "dictionary.h"
      46             : #include "hvunits.h"
      47             : #include "stringclass.h"
      48             : #include "mtsm.h"
      49             : #include "env.h"
      50             : #include "request.h"
      51             : #include "node.h"
      52             : #include "token.h"
      53             : #include "div.h"
      54             : #include "reg.h"
      55             : #include "font.h"
      56             : #include "charinfo.h"
      57             : #include "macropath.h"
      58             : #include "input.h"
      59             : #include "defs.h"
      60             : #include "unicode.h"
      61             : #include "curtime.h"
      62             : 
      63             : // needed for getpid() and isatty()
      64             : #include "posix.h"
      65             : #include "nonposix.h"
      66             : 
      67             : #define MACRO_PREFIX "tmac."
      68             : #define MACRO_POSTFIX ".tmac"
      69             : #define INITIAL_STARTUP_FILE "troffrc"
      70             : #define FINAL_STARTUP_FILE   "troffrc-end"
      71             : #define DEFAULT_INPUT_STACK_LIMIT 1000
      72             : 
      73             : #ifndef DEFAULT_WARNING_MASK
      74             : // warnings that are enabled by default
      75             : #define DEFAULT_WARNING_MASK \
      76             :      (WARN_CHAR|WARN_BREAK|WARN_SPACE|WARN_FONT|WARN_FILE)
      77             : #endif
      78             : 
      79             : extern "C" const char *program_name;
      80             : extern "C" const char *Version_string;
      81             : 
      82             : // initial size for input buffers that need to grow arbitrarily
      83             : static const int default_buffer_size = 16;
      84             : 
      85             : #ifdef COLUMN
      86             : void init_column_requests();
      87             : #endif /* COLUMN */
      88             : 
      89             : // forward declarations
      90             : static node *read_drawing_command();
      91             : static void read_drawing_command_color_arguments(token &);
      92             : static void push_token(const token &);
      93             : static void unsafe_transparent_throughput_file_request();
      94             : #ifdef COLUMN
      95             : void vjustify();
      96             : #endif /* COLUMN */
      97             : static void transparent_throughput_file_request();
      98             : 
      99             : token tok;
     100             : bool was_invoked_with_regular_control_character = false;
     101             : bool using_character_classes = false;
     102             : static bool permit_color_output = true;
     103             : bool want_color_output = true;
     104             : static bool want_backtraces = false;
     105             : char *pipe_command = 0 /* nullptr */;
     106             : charinfo *charset_table[256];
     107             : unsigned char hpf_code_table[256];
     108             : 
     109             : static unsigned int warning_mask = DEFAULT_WARNING_MASK;
     110             : static bool want_errors_inhibited = false;
     111             : static bool want_input_ignored = false;
     112             : 
     113             : static void enable_warning(const char *);
     114             : static void disable_warning(const char *);
     115             : 
     116             : static symbol end_of_input_macro_name;
     117             : static symbol blank_line_macro_name;
     118             : static symbol leading_spaces_macro_name;
     119             : static bool want_att_compat = false;
     120             : bool want_abstract_output = false;
     121             : bool want_nodes_dumped = false;
     122             : bool want_output_suppressed = false;
     123             : bool is_writing_html = false;
     124             : static int suppression_level = 0;       // depth of nested \O escapes
     125             : 
     126             : bool in_nroff_mode = false;
     127             : bool is_device_ps_or_pdf = false;
     128             : 
     129             : // Keep track of whether \f, \F, \D'F...', \H, \m, \M, \O[345], \R, \s,
     130             : // or \S has been processed in token::next().
     131             : static bool have_formattable_input = false;
     132             : // `have_formattable_input` is reset immediately upon reading a new
     133             : // input line, but we need more state information because the input line
     134             : // might have been continued/interrupted with `\c`.
     135             : // Consider:
     136             : //   \f[TB]\m[red]hello\c
     137             : //   \f[]\m[]
     138             : static bool have_formattable_input_on_interrupted_line = false;
     139             : 
     140             : bool device_has_tcommand = false;       // 't' output command supported
     141             : static bool want_unsafe_requests = false;       // be safer by default
     142             : 
     143             : static bool have_multiple_params = false;       // \[e aa], \*[foo bar]
     144             : 
     145             : double spread_limit = -3.0 - 1.0;       // negative means deactivated
     146             : 
     147             : double warn_scale;
     148             : char warn_scaling_unit;
     149             : bool want_html_debugging = true;        // enable more diagnostics
     150             : 
     151             : search_path *mac_path = &safer_macro_path;
     152             : 
     153             : // Initialize inclusion search path with only the current directory.
     154             : search_path include_search_path(0 /* nullptr */, 0 /* nullptr */, 0, 1);
     155             : 
     156             : static int read_char_in_copy_mode(node ** /* nd; 0 to discard */,
     157             :                                   bool /* is_defining */ = false,
     158             :                                   bool /* handle_escaped_E */ = false);
     159             : static void copy_mode_error(const char *,
     160             :                             const errarg & = empty_errarg,
     161             :                             const errarg & = empty_errarg,
     162             :                             const errarg & = empty_errarg);
     163             : 
     164             : enum read_mode { ALLOW_EMPTY, WITH_ARGS, NO_ARGS };
     165             : static symbol read_escape_parameter(read_mode = NO_ARGS);
     166             : static symbol read_long_escape_parameters(read_mode = NO_ARGS);
     167             : static void interpolate_string(symbol);
     168             : static void interpolate_string_with_args(symbol);
     169             : static void interpolate_macro(symbol, bool = false);
     170             : static void interpolate_number_format(symbol);
     171             : static void interpolate_environment_variable(symbol);
     172             : 
     173             : static symbol composite_glyph_name(symbol);
     174             : static void interpolate_positional_parameter(symbol);
     175             : static request_or_macro *lookup_request(symbol);
     176             : static bool read_delimited_measurement(units * /* n */,
     177             :     unsigned char /* si */);
     178             : static bool read_delimited_measurement(units * /* n */,
     179             :     unsigned char /* si */, units /* prev_value */);
     180             : static symbol read_input_until_terminator(bool /* required */,
     181             :     unsigned char /* end_char */, bool /* want_identifier */ = false);
     182             : static bool read_line_rule_expression(units * /* res */,
     183             :     unsigned char /* si */, charinfo ** /* cp */);
     184             : static bool read_size(int *);
     185             : static symbol read_delimited_identifier();
     186             : static void init_registers();
     187             : static void trapping_blank_line();
     188             : 
     189             : class input_iterator;
     190             : input_iterator *make_temp_iterator(const char *);
     191             : const char *input_char_description(int);
     192             : 
     193             : void process_input_stack();
     194             : void chop_macro();      // declare to avoid friend name injection
     195             : 
     196             : static const unsigned char default_escape_char = (unsigned char)('\\');
     197             : static unsigned char escape_char = default_escape_char;
     198             : static const unsigned char default_control_char = (unsigned char)('.');
     199             : static const unsigned char default_no_break_control_char
     200             :   = (unsigned char)('\'');
     201             : 
     202       17821 : static void assign_escape_character_request()
     203             : {
     204       17821 :   unsigned char ec = 0U;
     205       17821 :   bool is_invalid = false;
     206       17821 :   if (has_arg()) {
     207         122 :     if (tok.ch() == 0U)
     208           0 :       is_invalid = true;
     209             :     else
     210         122 :       ec = tok.ch();
     211             :   }
     212             :   else
     213       17699 :     ec = default_escape_char;
     214       17821 :   bool do_nothing = false;
     215             :   static const char already_cc[] = "the control character is already";
     216             :   static const char already_nbcc[] = "the no-break control character is"
     217             :                                      " already";
     218       17821 :   const char *already_message = 0 /* nullptr */;
     219       17821 :   if (curenv->get_control_character() == ec) {
     220           0 :       already_message = already_cc;
     221           0 :       do_nothing = true;
     222             :   }
     223       17821 :   else if (curenv->get_no_break_control_character() == ec) {
     224           0 :       already_message = already_nbcc;
     225           0 :       do_nothing = true;
     226             :   }
     227       17821 :   if (do_nothing)
     228           0 :     error("ignoring escape character change request; %1%2 %3",
     229             :           is_invalid ? "cannot select invalid escape character, and"
     230           0 :           : "", already_message, input_char_description(ec));
     231       17821 :   else if (is_invalid) {
     232           0 :     error("cannot select %1 as escape character; using '%2'",
     233           0 :           tok.description(), char(default_escape_char));
     234           0 :     escape_char = default_escape_char;
     235             :   }
     236             :   else
     237       17821 :     escape_char = ec;
     238       17821 :   skip_line();
     239       17821 : }
     240             : 
     241       17503 : void escape_off_request()
     242             : {
     243       17503 :   escape_char = 0U;
     244       17503 :   skip_line();
     245       17503 : }
     246             : 
     247             : static unsigned char saved_escape_char = '\\';
     248             : 
     249           0 : void save_escape_char_request()
     250             : {
     251           0 :   saved_escape_char = escape_char;
     252           0 :   skip_line();
     253           0 : }
     254             : 
     255           0 : void restore_escape_char_request()
     256             : {
     257           0 :   escape_char = saved_escape_char;
     258           0 :   skip_line();
     259           0 : }
     260             : 
     261           0 : void assign_control_character_request()
     262             : {
     263           0 :   unsigned char cc = 0U;
     264           0 :   bool is_invalid = false;
     265           0 :   if (has_arg()) {
     266           0 :     if (tok.ch() == 0U)
     267           0 :       is_invalid = true;
     268             :     else
     269           0 :       cc = tok.ch();
     270             :   }
     271             :   else
     272           0 :     cc = default_control_char;
     273           0 :   bool do_nothing = false;
     274           0 :   char already_ec[] = "the escape character is already";
     275           0 :   char already_nbcc[] = "the no-break control character is already";
     276           0 :   char *already_message = 0 /* nullptr */;
     277           0 :   if (cc == escape_char) {
     278           0 :       already_message = already_ec;
     279           0 :       do_nothing = true;
     280             :   }
     281           0 :   else if (curenv->get_no_break_control_character() == cc) {
     282           0 :       already_message = already_nbcc;
     283           0 :       do_nothing = true;
     284             :   }
     285           0 :   bool assignment_worked = false;
     286           0 :   if (do_nothing)
     287           0 :     error("ignoring control character change request; %1%2 %3",
     288             :           is_invalid ? "cannot select invalid control character, and"
     289           0 :           : "", already_message, input_char_description(cc));
     290           0 :   else if (is_invalid) {
     291           0 :     error("cannot select %1 as control character; using '%2'",
     292           0 :           tok.description(), char(default_control_char));
     293             :     assignment_worked
     294           0 :       = curenv->set_control_character(default_control_char);
     295             :   }
     296             :   else
     297           0 :     assignment_worked = curenv->set_control_character(cc);
     298           0 :   assert(assignment_worked);
     299           0 :   skip_line();
     300           0 : }
     301             : 
     302           3 : void assign_no_break_control_character_request()
     303             : {
     304           3 :   unsigned char nbcc = 0U;
     305           3 :   bool is_invalid = false;
     306           3 :   if (has_arg()) {
     307           2 :     if (tok.ch() == 0U)
     308           0 :       is_invalid = true;
     309             :     else
     310           2 :       nbcc = tok.ch();
     311             :   }
     312             :   else
     313           1 :     nbcc = default_no_break_control_char;
     314           3 :   bool do_nothing = false;
     315           3 :   char already_ec[] = "the escape character is already";
     316           3 :   char already_cc[] = "the (breaking) control character is already";
     317           3 :   char *already_message = 0 /* nullptr */;
     318           3 :   if (nbcc == escape_char) {
     319           0 :       already_message = already_ec;
     320           0 :       do_nothing = true;
     321             :   }
     322           3 :   else if (curenv->get_control_character() == nbcc) {
     323           0 :       already_message = already_cc;
     324           0 :       do_nothing = true;
     325             :   }
     326           3 :   bool assignment_worked = false;
     327           3 :   if (do_nothing)
     328           0 :     error("ignoring no-break control character change request; %1%2 %3",
     329             :           is_invalid ? "cannot select invalid no-break control"
     330             :                        " character, and"
     331           0 :           : "", already_message, input_char_description(nbcc));
     332           3 :   else if (is_invalid) {
     333           0 :     error("cannot select %1 as no-break control character;"
     334             :           " using \"%2\"", tok.description(),
     335           0 :           default_no_break_control_char);
     336             :     assignment_worked
     337           0 :       = curenv->set_no_break_control_character(default_no_break_control_char);
     338             :   }
     339             :   else
     340           3 :     assignment_worked = curenv->set_no_break_control_character(nbcc);
     341           3 :   assert(assignment_worked);
     342           3 :   skip_line();
     343           3 : }
     344             : 
     345             : struct arg_list;
     346             : 
     347             : class input_iterator {
     348             : public:
     349             :   input_iterator();
     350             :   input_iterator(bool /* is_div */);
     351    18685680 :   virtual ~input_iterator() {}
     352             :   int get(node **);
     353             :   friend class input_stack;
     354             :   bool is_diversion;
     355             :   statem *diversion_state;
     356             : protected:
     357             :   const unsigned char *ptr;
     358             :   const unsigned char *endptr;
     359             :   input_iterator *next;
     360             : private:
     361             :   virtual int fill(node **);
     362             :   virtual int peek();
     363     3005956 :   virtual bool has_args() { return false; }
     364           0 :   virtual int nargs() { return 0; }
     365           0 :   virtual input_iterator *get_arg(int) { return 0 /* nullptr */; }
     366           0 :   virtual arg_list *get_arg_list() { return 0 /* nullptr */; }
     367           0 :   virtual symbol get_macro_name() { return NULL_SYMBOL; }
     368           0 :   virtual bool space_follows_arg(int) { return false; }
     369     1272316 :   virtual bool get_break_flag() { return false; }
     370      301234 :   virtual bool get_location(bool /* allow_macro */,
     371             :                             const char ** /* filep */,
     372      301234 :                             int * /* linep */) { return false; }
     373          34 :   virtual void backtrace() {}
     374           3 :   virtual bool set_location(const char *, int) { return false; }
     375           0 :   virtual bool next_file(FILE *, const char *) { return false; }
     376           0 :   virtual void shift(int) {}
     377    17507309 :   virtual int is_boundary() {return 0; } // three-valued Boolean :-|
     378           0 :   virtual bool is_file() { return false; }
     379       26398 :   virtual bool is_macro() { return false; }
     380      214068 :   virtual void set_att_compat(bool) {}
     381      214068 :   virtual bool get_att_compat() { return false; }
     382             : };
     383             : 
     384     9318574 : input_iterator::input_iterator()
     385     9318574 : : is_diversion(false), ptr(0 /* nullptr */), endptr(0 /* nullptr */)
     386             : {
     387     9318574 : }
     388             : 
     389     9437049 : input_iterator::input_iterator(bool is_div)
     390     9437049 : : is_diversion(is_div), ptr(0 /* nullptr */), endptr(0 /* nullptr */)
     391             : {
     392     9437049 : }
     393             : 
     394    10169254 : int input_iterator::fill(node **)
     395             : {
     396    10169254 :   return EOF;
     397             : }
     398             : 
     399           0 : int input_iterator::peek()
     400             : {
     401           0 :   return EOF;
     402             : }
     403             : 
     404    26794625 : inline int input_iterator::get(node **p)
     405             : {
     406    26794625 :   return ptr < endptr ? *ptr++ : fill(p);
     407             : }
     408             : 
     409             : class input_boundary : public input_iterator {
     410             : public:
     411     2139492 :   int is_boundary() { return 1; }
     412             : };
     413             : 
     414             : class input_return_boundary : public input_iterator {
     415             : public:
     416       39600 :   int is_boundary() { return 2; }
     417             : };
     418             : 
     419             : class file_iterator : public input_iterator {
     420             :   FILE *fp;
     421             :   int lineno;
     422             :   char *filename;
     423             :   bool was_popened;
     424             :   bool seen_newline;
     425             :   bool seen_escape;
     426             :   enum { BUF_SIZE = 512 };
     427             :   unsigned char buf[BUF_SIZE];
     428             :   void close();
     429             : public:
     430             :   file_iterator(FILE *, const char *, bool = false);
     431             :   ~file_iterator();
     432             :   int fill(node **);
     433             :   int peek();
     434             :   bool get_location(bool /* allow_macro */, const char ** /* filep */,
     435             :                     int * /* linep */);
     436             :   void backtrace();
     437             :   bool set_location(const char *, int);
     438             :   bool next_file(FILE *, const char *);
     439          43 :   bool is_file() { return true; }
     440             : };
     441             : 
     442       17106 : file_iterator::file_iterator(FILE *f, const char *fn, bool popened)
     443             : : fp(f), lineno(1), was_popened(popened),
     444       17106 :   seen_newline(false), seen_escape(false)
     445             : {
     446       17106 :   filename = strdup(const_cast<char *>(fn));
     447       17106 :   if ((font::use_charnames_in_special) && (fn != 0 /* nullptr */)) {
     448         618 :     if (!the_output)
     449          45 :       init_output();
     450         618 :     the_output->put_filename(fn, popened);
     451             :   }
     452       17106 : }
     453             : 
     454       34182 : file_iterator::~file_iterator()
     455             : {
     456       17091 :   close();
     457       34182 : }
     458             : 
     459       17091 : void file_iterator::close()
     460             : {
     461       17091 :   if (fp == stdin)
     462        1381 :     clearerr(stdin);
     463       15710 :   else if (was_popened)
     464          96 :     pclose(fp);
     465             :   else
     466       15614 :     fclose(fp);
     467       17091 : }
     468             : 
     469           0 : bool file_iterator::next_file(FILE *f, const char *s)
     470             : {
     471           0 :   close();
     472           0 :   fp = f;
     473           0 :   set_location(s, 1);
     474           0 :   seen_newline = false;
     475           0 :   seen_escape = false;
     476           0 :   was_popened = false;
     477           0 :   ptr = 0 /* nullptr */;
     478           0 :   endptr = 0 /* nullptr */;
     479           0 :   return true;
     480             : }
     481             : 
     482     5372584 : int file_iterator::fill(node **)
     483             : {
     484     5372584 :   if (seen_newline)
     485     5355425 :     lineno++;
     486     5372584 :   seen_newline = false;
     487     5372584 :   unsigned char *p = buf;
     488     5372584 :   ptr = p;
     489     5372584 :   unsigned char *e = p + BUF_SIZE;
     490   140893187 :   while (p < e) {
     491   140893159 :     int c = getc(fp);
     492   140893159 :     if (EOF == c)
     493       17073 :       break;
     494   140876086 :     if (is_invalid_input_char(c))
     495           9 :       warning(WARN_INPUT, "invalid input character code %1", c);
     496             :     else {
     497   140876077 :       *p++ = c;
     498   140876077 :       if ('\n' == c) {
     499     5355483 :         seen_escape = false;
     500     5355483 :         seen_newline = true;
     501     5355483 :         break;
     502             :       }
     503   135520594 :       seen_escape = ('\\' == c); // XXX: should be (escape_char == c)?
     504             :     }
     505             :   }
     506     5372584 :   if (p > buf) {
     507     5355536 :     endptr = p;
     508     5355536 :     return *ptr++;
     509             :   }
     510             :   else {
     511       17048 :     endptr = p;
     512       17048 :     return EOF;
     513             :   }
     514             : }
     515             : 
     516           6 : int file_iterator::peek()
     517             : {
     518           6 :   int c = getc(fp);
     519           6 :   while (is_invalid_input_char(c)) {
     520           0 :     warning(WARN_INPUT, "invalid input character code %1", c);
     521           0 :     c = getc(fp);
     522             :   }
     523           6 :   if (c != EOF)
     524           6 :     ungetc(c, fp);
     525           6 :   return c;
     526             : }
     527             : 
     528     1214318 : bool file_iterator::get_location(bool /*allow_macro*/,
     529             :                                  const char **filenamep, int *linenop)
     530             : {
     531     1214318 :   *linenop = lineno;
     532     1214318 :   assert(filename != 0 /* nullptr */);
     533     1214318 :   if (0 /* nullptr */ == filename)
     534           0 :     return false;
     535     1214318 :   if (strcmp(filename, "-") == 0)
     536       11041 :     *filenamep = "<standard input>";
     537             :   else
     538     1203277 :     *filenamep = filename;
     539     1214318 :   return true;
     540             : }
     541             : 
     542          35 : void file_iterator::backtrace()
     543             : {
     544             :   const char *f;
     545             :   int n;
     546             :   // Get side effect of filename rewrite if stdin.
     547          35 :   (void) get_location(false /* allow macro */, &f, &n);
     548          35 :   if (program_name != 0 /* nullptr */)
     549          35 :     errprint("%1: ", program_name);
     550          35 :   errprint("backtrace: %3 '%1':%2\n", f, n,
     551          35 :            was_popened ? "pipe" : "file");
     552          35 : }
     553             : 
     554       22852 : bool file_iterator::set_location(const char *f, int ln)
     555             : {
     556       22852 :   if (f != 0 /* nullptr */)
     557        1394 :     filename = const_cast<char *>(f);
     558       22852 :   lineno = ln;
     559       22852 :   return true;
     560             : }
     561             : 
     562             : input_iterator nil_iterator;
     563             : 
     564             : class input_stack {
     565             : public:
     566             :   static int get(node **);
     567             :   static int peek();
     568             :   static void push(input_iterator *);
     569             :   static input_iterator *get_arg(int);
     570             :   static arg_list *get_arg_list();
     571             :   static symbol get_macro_name();
     572             :   static bool space_follows_arg(int);
     573             :   static int get_break_flag();
     574             :   static int nargs();
     575             :   static bool get_location(bool /* allow_macro */,
     576             :                            const char ** /* filep */,
     577             :                            int * /* linep */);
     578             :   static bool set_location(const char *, int);
     579             :   static void backtrace();
     580             :   static void next_file(FILE *, const char *);
     581             :   static void end_file();
     582             :   static void shift(int n);
     583             :   static void add_boundary();
     584             :   static void add_return_boundary();
     585             :   static int is_return_boundary();
     586             :   static void remove_boundary();
     587             :   static int get_level();
     588             :   static int get_div_level();
     589             :   static void increase_level();
     590             :   static void decrease_level();
     591             :   static void clear();
     592             :   static void pop_macro();
     593             :   static void set_att_compat(bool);
     594             :   static bool get_att_compat();
     595             :   static statem *get_diversion_state();
     596             :   static void check_end_diversion(input_iterator *t);
     597             :   static int limit;
     598             :   static int div_level;
     599             :   static statem *diversion_state;
     600             : private:
     601             :   static input_iterator *top;
     602             :   static int level;
     603             :   static int finish_get(node **);
     604             :   static int finish_peek();
     605             : };
     606             : 
     607             : input_iterator *input_stack::top = &nil_iterator;
     608             : int input_stack::level = 0;
     609             : int input_stack::limit = DEFAULT_INPUT_STACK_LIMIT;
     610             : int input_stack::div_level = 0;
     611             : statem *input_stack::diversion_state = 0 /* nullptr */;
     612             : bool suppress_push = false;
     613             : 
     614             : 
     615    32912914 : inline int input_stack::get_level()
     616             : {
     617    32912914 :   return level;
     618             : }
     619             : 
     620      130923 : inline void input_stack::increase_level()
     621             : {
     622      130923 :   level++;
     623      130923 : }
     624             : 
     625      130923 : inline void input_stack::decrease_level()
     626             : {
     627      130923 :   level--;
     628      130923 : }
     629             : 
     630     8897752 : inline int input_stack::get_div_level()
     631             : {
     632     8897752 :   return div_level;
     633             : }
     634             : 
     635   923638300 : inline int input_stack::get(node **np)
     636             : {
     637   923638300 :   int res = (top->ptr < top->endptr) ? *top->ptr++ : finish_get(np);
     638   923638300 :   if (res == '\n') {
     639    35830546 :     have_formattable_input_on_interrupted_line = have_formattable_input;
     640    35830546 :     have_formattable_input = false;
     641             :   }
     642   923638300 :   return res;
     643             : }
     644             : 
     645    83643088 : int input_stack::finish_get(node **np)
     646             : {
     647             :   for (;;) {
     648    83643088 :     int c = top->fill(np);
     649    83643088 :     if (c != EOF || top->is_boundary())
     650    66262338 :       return c;
     651    17380750 :     if (top == &nil_iterator)
     652        6590 :       break;
     653    17374160 :     input_iterator *tem = top;
     654    17374160 :     check_end_diversion(tem);
     655             : #if defined(DEBUGGING)
     656             :     if (want_html_debugging)
     657             :       if (tem->is_diversion)
     658             :         fprintf(stderr,
     659             :                 "in diversion level = %d\n", input_stack::get_div_level());
     660             : #endif
     661    17374160 :     top = top->next;
     662    17374160 :     level--;
     663    17374160 :     delete tem;
     664    17374160 :     if (top->ptr < top->endptr)
     665    15477606 :       return *top->ptr++;
     666     1896554 :   }
     667        6590 :   assert(level == 0);
     668        6590 :   return EOF;
     669             : }
     670             : 
     671     9006938 : inline int input_stack::peek()
     672             : {
     673     9006938 :   return (top->ptr < top->endptr) ? *top->ptr : finish_peek();
     674             : }
     675             : 
     676    17775210 : void input_stack::check_end_diversion(input_iterator *t)
     677             : {
     678    17775210 :   if (t->is_diversion) {
     679       46733 :     div_level--;
     680       46733 :     if (diversion_state != 0 /* nullptr */)
     681         241 :       delete diversion_state;
     682       46733 :     diversion_state = t->diversion_state;
     683             :   }
     684    17775210 : }
     685             : 
     686        4457 : int input_stack::finish_peek()
     687             : {
     688             :   for (;;) {
     689        4457 :     int c = top->peek();
     690        4457 :     if (c != EOF || top->is_boundary())
     691        4326 :       return c;
     692         131 :     if (top == &nil_iterator)
     693           0 :       break;
     694         131 :     input_iterator *tem = top;
     695         131 :     check_end_diversion(tem);
     696         131 :     top = top->next;
     697         131 :     level--;
     698         131 :     delete tem;
     699         131 :     if (top->ptr < top->endptr)
     700         131 :       return *top->ptr;
     701           0 :   }
     702           0 :   assert(level == 0);
     703           0 :   return EOF;
     704             : }
     705             : 
     706      261248 : void input_stack::add_boundary()
     707             : {
     708      261248 :   push(new input_boundary);
     709      261248 : }
     710             : 
     711       13200 : void input_stack::add_return_boundary()
     712             : {
     713       13200 :   push(new input_return_boundary);
     714       13200 : }
     715             : 
     716      697920 : int input_stack::is_return_boundary()
     717             : {
     718      697920 :   return top->is_boundary() == 2;
     719             : }
     720             : 
     721      261248 : void input_stack::remove_boundary()
     722             : {
     723      261248 :   assert(top->is_boundary());
     724      261248 :   input_iterator *temp = top->next;
     725      261248 :   check_end_diversion(top);
     726             : 
     727      261248 :   delete top;
     728      261248 :   top = temp;
     729      261248 :   level--;
     730      261248 : }
     731             : 
     732    17984645 : void input_stack::push(input_iterator *in)
     733             : {
     734    17984645 :   if (in == 0)
     735      207126 :     return;
     736    17777519 :   if (++level > limit && limit > 0)
     737           0 :     fatal("input stack limit of %1 levels exceeded"
     738           0 :           " (probable infinite loop)", limit);
     739    17777519 :   in->next = top;
     740    17777519 :   top = in;
     741    17777519 :   if (top->is_diversion) {
     742       46733 :     div_level++;
     743       46733 :     in->diversion_state = diversion_state;
     744       46733 :     diversion_state = curenv->construct_state(false);
     745             : #if defined(DEBUGGING)
     746             :     if (want_html_debugging) {
     747             :       curenv->dump_troff_state();
     748             :       fflush(stderr);
     749             :     }
     750             : #endif
     751             :   }
     752             : #if defined(DEBUGGING)
     753             :   if (want_html_debugging)
     754             :     if (top->is_diversion) {
     755             :       fprintf(stderr,
     756             :               "in diversion level = %d\n", input_stack::get_div_level());
     757             :       fflush(stderr);
     758             :     }
     759             : #endif
     760             : }
     761             : 
     762    13844223 : statem *get_diversion_state()
     763             : {
     764    13844223 :   return input_stack::get_diversion_state();
     765             : }
     766             : 
     767    13844223 : statem *input_stack::get_diversion_state()
     768             : {
     769    13844223 :   if (0 /* nullptr */ == diversion_state)
     770    13796189 :     return 0 /* nullptr */;
     771             :   else
     772       48034 :     return new statem(diversion_state);
     773             : }
     774             : 
     775     2664893 : input_iterator *input_stack::get_arg(int i)
     776             : {
     777             :   input_iterator *p;
     778     4533267 :   for (p = top; p != 0 /* nullptr */; p = p->next)
     779     4533267 :     if (p->has_args())
     780     2664893 :       return p->get_arg(i);
     781           0 :   return 0 /* nullptr */;
     782             : }
     783             : 
     784        1497 : arg_list *input_stack::get_arg_list()
     785             : {
     786             :   input_iterator *p;
     787        2247 :   for (p = top; p != 0 /* nullptr */; p = p->next)
     788        1944 :     if (p->has_args())
     789        1194 :       return p->get_arg_list();
     790         303 :   return 0 /* nullptr */;
     791             : }
     792             : 
     793        1497 : symbol input_stack::get_macro_name()
     794             : {
     795             :   input_iterator *p;
     796        2247 :   for (p = top; p != 0 /* nullptr */; p = p->next)
     797        1944 :     if (p->has_args())
     798        1194 :       return p->get_macro_name();
     799         303 :   return NULL_SYMBOL;
     800             : }
     801             : 
     802           0 : bool input_stack::space_follows_arg(int i)
     803             : {
     804             :   input_iterator *p;
     805           0 :   for (p = top; p != 0 /* nullptr */; p = p->next)
     806           0 :     if (p->has_args())
     807           0 :       return p->space_follows_arg(i);
     808           0 :   return false;
     809             : }
     810             : 
     811     9504705 : int input_stack::get_break_flag()
     812             : {
     813     9504705 :   return top->get_break_flag();
     814             : }
     815             : 
     816      267784 : void input_stack::shift(int n)
     817             : {
     818      680194 :   for (input_iterator *p = top; p != 0 /* nullptr */; p = p->next)
     819      680194 :     if (p->has_args()) {
     820      267784 :       p->shift(n);
     821      267784 :       return;
     822             :     }
     823             : }
     824             : 
     825      944483 : int input_stack::nargs()
     826             : {
     827     1668155 :   for (input_iterator *p =top; p != 0 /* nullptr */; p = p->next)
     828     1668152 :     if (p->has_args())
     829      944480 :       return p->nargs();
     830           3 :   return 0;
     831             : }
     832             : 
     833     4548488 : bool input_stack::get_location(bool allow_macro, const char **filenamep,
     834             :                               int *linenop)
     835             : {
     836     4855374 :   for (input_iterator *p = top; p != 0 /* nullptr */; p = p->next)
     837     4853512 :     if (p->get_location(allow_macro, filenamep, linenop))
     838     4546626 :       return true;
     839        1862 :   return false;
     840             : }
     841             : 
     842          34 : void input_stack::backtrace()
     843             : {
     844         131 :   for (input_iterator *p = top; p != 0 /* nullptr */; p = p->next)
     845          97 :     p->backtrace();
     846          34 : }
     847             : 
     848       22852 : bool input_stack::set_location(const char *filename, int lineno)
     849             : {
     850       22855 :   for (input_iterator *p = top; p != 0 /* nullptr */; p = p->next)
     851       22855 :     if (p->set_location(filename, lineno))
     852       22852 :       return true;
     853           0 :   return false;
     854             : }
     855             : 
     856           0 : void input_stack::next_file(FILE *fp, const char *s)
     857             : {
     858             :   input_iterator **pp;
     859           0 :   for (pp = &top; *pp != &nil_iterator; pp = &(*pp)->next)
     860           0 :     if ((*pp)->next_file(fp, s))
     861           0 :       return;
     862           0 :   if (++level > limit && limit > 0)
     863           0 :     fatal("input stack limit of %1 levels exceeded", limit);
     864           0 :   *pp = new file_iterator(fp, s);
     865           0 :   (*pp)->next = &nil_iterator;
     866             : }
     867             : 
     868          43 : void input_stack::end_file()
     869             : {
     870          43 :   for (input_iterator **pp = &top;
     871          43 :        *pp != &nil_iterator;
     872           0 :        pp = &(*pp)->next)
     873          43 :     if ((*pp)->is_file()) {
     874          43 :       input_iterator *tem = *pp;
     875          43 :       check_end_diversion(tem);
     876          43 :       *pp = (*pp)->next;
     877          43 :       delete tem;
     878          43 :       level--;
     879          43 :       return;
     880             :     }
     881             : }
     882             : 
     883          32 : void input_stack::clear()
     884             : {
     885          32 :   int nboundaries = 0;
     886         270 :   while (top != &nil_iterator) {
     887         238 :     if (top->is_boundary())
     888           1 :       nboundaries++;
     889         238 :     input_iterator *tem = top;
     890         238 :     check_end_diversion(tem);
     891         238 :     top = top->next;
     892         238 :     level--;
     893         238 :     delete tem;
     894             :   }
     895             :   // Keep while_request happy.
     896          33 :   for (; nboundaries > 0; --nboundaries)
     897           1 :     add_return_boundary();
     898          32 : }
     899             : 
     900      112992 : void input_stack::pop_macro()
     901             : {
     902      112992 :   int nboundaries = 0;
     903      112992 :   bool is_macro = false;
     904       26398 :   do {
     905      139390 :     if (top->next == &nil_iterator)
     906           0 :       break;
     907      139390 :     if (top->is_boundary())
     908       13199 :       nboundaries++;
     909      139390 :     is_macro = top->is_macro();
     910      139390 :     input_iterator *tem = top;
     911      139390 :     check_end_diversion(tem);
     912      139390 :     top = top->next;
     913      139390 :     level--;
     914      139390 :     delete tem;
     915      139390 :   } while (!is_macro);
     916             :   // Keep while_request happy.
     917      126191 :   for (; nboundaries > 0; --nboundaries)
     918       13199 :     add_return_boundary();
     919      112992 : }
     920             : 
     921     2498331 : inline void input_stack::set_att_compat(bool b)
     922             : {
     923     2498331 :   top->set_att_compat(b);
     924     2498331 : }
     925             : 
     926     2493756 : inline bool input_stack::get_att_compat()
     927             : {
     928     2493756 :   return top->get_att_compat();
     929             : }
     930             : 
     931           0 : void backtrace_request()
     932             : {
     933           0 :   input_stack::backtrace();
     934           0 :   fflush(stderr);
     935           0 :   skip_line();
     936           0 : }
     937             : 
     938          43 : void next_file()
     939             : {
     940          43 :   char *filename = 0 /* nullptr */;
     941          43 :   if (has_arg(true /* peek */)) {
     942           0 :     filename = read_rest_of_line_as_argument();
     943           0 :     tok.next();
     944             :   }
     945          43 :   if (0 /* nullptr */ == filename)
     946          43 :     input_stack::end_file();
     947             :   else {
     948           0 :     errno = 0;
     949           0 :     FILE *fp = include_search_path.open_file_cautiously(filename);
     950           0 :     if (0 /* nullptr */ == fp)
     951           0 :       error("cannot open '%1': %2", filename, strerror(errno));
     952             :     else
     953           0 :       input_stack::next_file(fp, filename);
     954             :   }
     955             :   // TODO: Add `filename` to file name set.
     956          43 :   tok.next();
     957          43 : }
     958             : 
     959      267784 : void shift()
     960             : {
     961             :   int n;
     962      267784 :   if (!has_arg() || !read_integer(&n))
     963      236253 :     n = 1;
     964      267784 :   input_stack::shift(n);
     965      267784 :   skip_line();
     966      267784 : }
     967             : 
     968             : // TODO: return unsigned char (future: grochar)?  We handle EOF here.
     969   108425188 : static char read_char_in_escape_sequence_parameter(bool allow_space
     970             :                                                    = false)
     971             : {
     972   108425188 :   int c = read_char_in_copy_mode(0 /* nullptr */,
     973             :                                  false /* is_defining */,
     974             :                                  true /* handle_escaped_E  */);
     975   108425188 :   switch (c) {
     976           0 :   case EOF:
     977           0 :     copy_mode_error("end of input in escape sequence");
     978           0 :     return '\0';
     979   108355619 :   default:
     980   108355619 :     if (!is_invalid_input_char(c))
     981   108355619 :       break;
     982             :     // fall through
     983             :   case '\n':
     984           0 :     if (c == '\n')
     985           0 :       input_stack::push(make_temp_iterator("\n"));
     986             :     // fall through
     987             :   case ' ':
     988       69569 :     if (c == ' ' && allow_space)
     989       69569 :       break;
     990             :     // fall through
     991             :   case '\t':
     992             :   case '\001':
     993             :   case '\b':
     994           0 :     copy_mode_error("%1 is not allowed in an escape sequence argument",
     995           0 :                     input_char_description(c));
     996           0 :     return '\0';
     997             :   }
     998   108425188 :   return c;
     999             : }
    1000             : 
    1001      169398 : static symbol read_two_char_escape_parameter()
    1002             : {
    1003             :   char buf[3];
    1004      169398 :   buf[0] = read_char_in_escape_sequence_parameter();
    1005      169398 :   if (buf[0] != '\0') {
    1006      169398 :     buf[1] = read_char_in_escape_sequence_parameter();
    1007      169398 :     if (buf[1] == '\0')
    1008           0 :       buf[0] = '\0';
    1009             :     else
    1010      169398 :       buf[2] = '\0';
    1011             :   }
    1012      169398 :   return symbol(buf);
    1013             : }
    1014             : 
    1015    13989373 : static symbol read_long_escape_parameters(read_mode mode)
    1016             : {
    1017    13989373 :   int start_level = input_stack::get_level();
    1018    13989373 :   int buf_size = default_buffer_size;
    1019    13989373 :   char *buf = 0 /* nullptr */;
    1020             :   try {
    1021             :     // C++03: new char[buf_size]();
    1022    13989373 :     buf = new char[buf_size];
    1023             :   }
    1024           0 :   catch (const std::bad_alloc &e) {
    1025           0 :     fatal("cannot allocate %1 bytes to read input line", buf_size);
    1026             :   }
    1027    13989373 :   (void) memset(buf, 0, (buf_size * sizeof(char)));
    1028    13989373 :   int i = 0;
    1029             :   char c;
    1030    13989373 :   bool have_char = false;
    1031             :   for (;;) {
    1032    91610719 :     c = read_char_in_escape_sequence_parameter(have_char
    1033    91610719 :                                                && (WITH_ARGS == mode));
    1034    91610719 :     if ('\0' == c) {
    1035           0 :       delete[] buf;
    1036           0 :       return NULL_SYMBOL;
    1037             :     }
    1038    91610719 :     have_char = true;
    1039    91610719 :     if ((WITH_ARGS == mode) && (' ' == c))
    1040       69569 :       break;
    1041    91541150 :     if (i + 2 > buf_size) {
    1042      606483 :       char *old_buf = buf;
    1043      606483 :       int new_buf_size = buf_size * 2;
    1044             :       // C++03: new char[new_buf_size]();
    1045             :       try {
    1046      606483 :         buf = new char[new_buf_size];
    1047             :       }
    1048           0 :       catch (const std::bad_alloc &e) {
    1049           0 :         fatal("cannot allocate %1 bytes to read input line", buf_size);
    1050             :       }
    1051      606483 :       (void) memset(buf, 0, (new_buf_size * sizeof(char)));
    1052      606483 :       (void) memcpy(buf, old_buf, buf_size);
    1053      606483 :       buf_size = new_buf_size;
    1054      606483 :       delete[] old_buf;
    1055             :     }
    1056    91541150 :     if ((']' == c) && (input_stack::get_level() == start_level))
    1057    13919804 :       break;
    1058    77621346 :     buf[i++] = c;
    1059    77621346 :   }
    1060    13989373 :   buf[i] = '\0';
    1061    13989373 :   if (0 == i) {
    1062       42379 :     if (mode != ALLOW_EMPTY)
    1063             :       // XXX: `.device \[]` passes through as-is but `\X \[]` doesn't,
    1064             :       // landing here.  Implement almost-but-not-quite-copy-mode?
    1065           0 :       copy_mode_error("empty escape sequence argument");
    1066       42379 :     return EMPTY_SYMBOL;
    1067             :   }
    1068    13946994 :   if (' ' == c)
    1069       69569 :     have_multiple_params = true;
    1070    13946994 :   symbol s(buf);
    1071    13946994 :   delete[] buf;
    1072    13946994 :   return s;
    1073             : }
    1074             : 
    1075     7803371 : static symbol read_escape_parameter(read_mode mode)
    1076             : {
    1077     7803371 :   char c = read_char_in_escape_sequence_parameter();
    1078     7803371 :   if ('\0' == c)
    1079           0 :     return NULL_SYMBOL;
    1080     7803371 :   if ('(' == c)
    1081        8207 :     return read_two_char_escape_parameter();
    1082     7795164 :   if (('[' == c) && !want_att_compat)
    1083     5253723 :     return read_long_escape_parameters(mode);
    1084             :   char buf[2];
    1085     2541441 :   buf[0] = c;
    1086     2541441 :   buf[1] = '\0';
    1087     2541441 :   return symbol(buf);
    1088             : }
    1089             : 
    1090     8672302 : static symbol read_increment_and_escape_parameter(int *incp)
    1091             : {
    1092     8672302 :   char c = read_char_in_escape_sequence_parameter();
    1093     8672302 :   switch (c) {
    1094           0 :   case 0:
    1095           0 :     *incp = 0;
    1096           0 :     return NULL_SYMBOL;
    1097      157772 :   case '(':
    1098      157772 :     *incp = 0;
    1099      157772 :     return read_two_char_escape_parameter();
    1100      476085 :   case '+':
    1101      476085 :     *incp = 1;
    1102      476085 :     return read_escape_parameter();
    1103       98022 :   case '-':
    1104       98022 :     *incp = -1;
    1105       98022 :     return read_escape_parameter();
    1106     7935562 :   case '[':
    1107     7935562 :     if (!want_att_compat) {
    1108     7935562 :       *incp = 0;
    1109     7935562 :       return read_long_escape_parameters();
    1110             :     }
    1111           0 :     break;
    1112             :   }
    1113        4861 :   *incp = 0;
    1114             :   char buf[2];
    1115        4861 :   buf[0] = c;
    1116        4861 :   buf[1] = '\0';
    1117        4861 :   return symbol(buf);
    1118             : }
    1119             : 
    1120             : // In copy mode, we don't tokenize normally; characters on the input
    1121             : // stream are typically read into the contents of an existing node (like
    1122             : // a string or macro definition), or discarded.  A handful of escape
    1123             : // sequences (\n, etc.) interpolate as they do outside of copy mode.
    1124   205980202 : static int read_char_in_copy_mode(node **nd,
    1125             :                                   bool is_defining,
    1126             :                                   bool handle_escaped_E)
    1127             : {
    1128             :   for (;;) {
    1129   205980202 :     int c = input_stack::get(nd);
    1130   205980202 :     if (c == PUSH_GROFF_MODE) {
    1131     1054433 :       input_stack::set_att_compat(want_att_compat);
    1132     1054433 :       want_att_compat = false;
    1133     1054433 :       continue;
    1134             :     }
    1135   204925769 :     if (c == PUSH_COMP_MODE) {
    1136           0 :       input_stack::set_att_compat(want_att_compat);
    1137           0 :       want_att_compat = true;
    1138           0 :       continue;
    1139             :     }
    1140   204925769 :     if (c == POP_GROFFCOMP_MODE) {
    1141     1059102 :       want_att_compat = input_stack::get_att_compat();
    1142     1059102 :       continue;
    1143             :     }
    1144   203866667 :     if (c == BEGIN_QUOTE) {
    1145      130923 :       input_stack::increase_level();
    1146      130923 :       continue;
    1147             :     }
    1148   203735744 :     if (c == END_QUOTE) {
    1149      130923 :       input_stack::decrease_level();
    1150      130923 :       continue;
    1151             :     }
    1152   203604821 :     if (c == DOUBLE_QUOTE)
    1153      464636 :       continue;
    1154   203140185 :     if ((c == ESCAPE_E) && handle_escaped_E)
    1155        1511 :       c = escape_char;
    1156   203140185 :     if (c == ESCAPE_NEWLINE) {
    1157       42095 :       if (is_defining)
    1158         810 :         return c;
    1159           0 :       do {
    1160       41285 :         c = input_stack::get(nd);
    1161       41285 :       } while (c == ESCAPE_NEWLINE);
    1162             :     }
    1163   203139375 :     if ((c != escape_char) || (0U == escape_char))
    1164   194695036 :       return c;
    1165     8444339 :   again:
    1166     8444339 :     c = input_stack::peek();
    1167     8444339 :     switch (c) {
    1168           0 :     case 0:
    1169           0 :       return escape_char;
    1170      281439 :     case '"':
    1171      281439 :       (void) input_stack::get(0 /* nullptr */);
    1172     7122459 :       while ((c = input_stack::get(0 /* nullptr */)) != '\n'
    1173     7122459 :              && c != EOF)
    1174             :         ;
    1175      281439 :       return c;
    1176        5961 :     case '#':                   // Like \" but newline is ignored.
    1177        5961 :       (void) input_stack::get(0 /* nullptr */);
    1178      312884 :       while ((c = input_stack::get(0 /* nullptr */)) != '\n')
    1179      306923 :         if (c == EOF)
    1180           0 :           return EOF;
    1181        5961 :       break;
    1182     1030821 :     case '$':
    1183             :       {
    1184     1030821 :         (void) input_stack::get(0 /* nullptr */);
    1185     1030821 :         symbol s = read_escape_parameter();
    1186     1030821 :         if (!(s.is_null() || s.is_empty()))
    1187     1030821 :           interpolate_positional_parameter(s);
    1188     1030821 :         break;
    1189             :       }
    1190     2704940 :     case '*':
    1191             :       {
    1192     2704940 :         (void) input_stack::get(0 /* nullptr */);
    1193     2704940 :         symbol s = read_escape_parameter(WITH_ARGS);
    1194     2704940 :         if (!(s.is_null() || s.is_empty())) {
    1195     2704940 :           if (have_multiple_params) {
    1196         156 :             have_multiple_params = false;
    1197         156 :             interpolate_string_with_args(s);
    1198             :           }
    1199             :           else
    1200     2704784 :             interpolate_string(s);
    1201             :         }
    1202     2704940 :         break;
    1203             :       }
    1204           1 :     case 'a':
    1205           1 :       (void) input_stack::get(0 /* nullptr */);
    1206           1 :       return '\001';
    1207         573 :     case 'e':
    1208         573 :       (void) input_stack::get(0 /* nullptr */);
    1209         573 :       return ESCAPE_e;
    1210       42268 :     case 'E':
    1211       42268 :       (void) input_stack::get(0 /* nullptr */);
    1212       42268 :       if (handle_escaped_E)
    1213           0 :         goto again;
    1214       42268 :       return ESCAPE_E;
    1215     1791336 :     case 'n':
    1216             :       {
    1217     1791336 :         (void) input_stack::get(0 /* nullptr */);
    1218             :         int inc;
    1219     1791336 :         symbol s = read_increment_and_escape_parameter(&inc);
    1220     1791336 :         if (!(s.is_null() || s.is_empty()))
    1221     1791336 :           interpolate_register(s, inc);
    1222     1791336 :         break;
    1223             :       }
    1224         366 :     case 'g':
    1225             :       {
    1226         366 :         (void) input_stack::get(0 /* nullptr */);
    1227         366 :         symbol s = read_escape_parameter();
    1228         366 :         if (!(s.is_null() || s.is_empty()))
    1229         366 :           interpolate_number_format(s);
    1230         366 :         break;
    1231             :       }
    1232         976 :     case 't':
    1233         976 :       (void) input_stack::get(0 /* nullptr */);
    1234         976 :       return '\t';
    1235           2 :     case 'V':
    1236             :       {
    1237           2 :         (void) input_stack::get(0 /* nullptr */);
    1238           2 :         symbol s = read_escape_parameter();
    1239           2 :         if (!(s.is_null() || s.is_empty()))
    1240           2 :           interpolate_environment_variable(s);
    1241           2 :         break;
    1242             :       }
    1243      339830 :     case '\n':
    1244      339830 :       (void) input_stack::get(0 /* nullptr */);
    1245      339830 :       if (is_defining)
    1246      338272 :         return ESCAPE_NEWLINE;
    1247        1558 :       break;
    1248        3803 :     case ' ':
    1249        3803 :       (void) input_stack::get(0 /* nullptr */);
    1250        3803 :       return ESCAPE_SPACE;
    1251        8894 :     case '~':
    1252        8894 :       (void) input_stack::get(0 /* nullptr */);
    1253        8894 :       return ESCAPE_TILDE;
    1254        6809 :     case ':':
    1255        6809 :       (void) input_stack::get(0 /* nullptr */);
    1256        6809 :       return ESCAPE_COLON;
    1257       12100 :     case '|':
    1258       12100 :       (void) input_stack::get(0 /* nullptr */);
    1259       12100 :       return ESCAPE_BAR;
    1260        1666 :     case '^':
    1261        1666 :       (void) input_stack::get(0 /* nullptr */);
    1262        1666 :       return ESCAPE_CIRCUMFLEX;
    1263      177124 :     case '{':
    1264      177124 :       (void) input_stack::get(0 /* nullptr */);
    1265      177124 :       return ESCAPE_LEFT_BRACE;
    1266      177440 :     case '}':
    1267      177440 :       (void) input_stack::get(0 /* nullptr */);
    1268      177440 :       return ESCAPE_RIGHT_BRACE;
    1269         353 :     case '`':
    1270         353 :       (void) input_stack::get(0 /* nullptr */);
    1271         353 :       return ESCAPE_LEFT_QUOTE;
    1272         987 :     case '\'':
    1273         987 :       (void) input_stack::get(0 /* nullptr */);
    1274         987 :       return ESCAPE_RIGHT_QUOTE;
    1275        8662 :     case '-':
    1276        8662 :       (void) input_stack::get(0 /* nullptr */);
    1277        8662 :       return ESCAPE_HYPHEN;
    1278           0 :     case '_':
    1279           0 :       (void) input_stack::get(0 /* nullptr */);
    1280           0 :       return ESCAPE_UNDERSCORE;
    1281       11175 :     case 'c':
    1282       11175 :       (void) input_stack::get(0 /* nullptr */);
    1283       11175 :       return ESCAPE_c;
    1284       10595 :     case '!':
    1285       10595 :       (void) input_stack::get(0 /* nullptr */);
    1286       10595 :       return ESCAPE_BANG;
    1287       31038 :     case '?':
    1288       31038 :       (void) input_stack::get(0 /* nullptr */);
    1289       31038 :       return ESCAPE_QUESTION;
    1290        9008 :     case '&':
    1291        9008 :       (void) input_stack::get(0 /* nullptr */);
    1292        9008 :       return ESCAPE_AMPERSAND;
    1293        2437 :     case ')':
    1294        2437 :       (void) input_stack::get(0 /* nullptr */);
    1295        2437 :       return ESCAPE_RIGHT_PARENTHESIS;
    1296         386 :     case '.':
    1297         386 :       (void) input_stack::get(0 /* nullptr */);
    1298         386 :       return c;
    1299        9986 :     case '%':
    1300        9986 :       (void) input_stack::get(0 /* nullptr */);
    1301        9986 :       return ESCAPE_PERCENT;
    1302     1773363 :     default:
    1303     1773363 :       if (c == escape_char) {
    1304     1025765 :         (void) input_stack::get(0 /* nullptr */);
    1305     1025765 :         return c;
    1306             :       }
    1307             :       else
    1308      747598 :         return escape_char;
    1309             :     }
    1310     8375001 :   }
    1311             : }
    1312             : 
    1313             : // \a or \t
    1314             : class non_interpreted_char_node : public node {
    1315             :   unsigned char c;
    1316             : public:
    1317             :   non_interpreted_char_node(unsigned char);
    1318             :   void asciify(macro *);
    1319             :   node *copy();
    1320             :   bool interpret(macro *);
    1321             :   bool is_same_as(node *);
    1322             :   const char *type();
    1323             :   bool causes_tprint();
    1324             :   bool is_tag();
    1325             : };
    1326             : 
    1327           0 : bool non_interpreted_char_node::is_same_as(node *nd)
    1328             : {
    1329           0 :   return c == static_cast<non_interpreted_char_node *>(nd)->c;
    1330             : }
    1331             : 
    1332           0 : const char *non_interpreted_char_node::type()
    1333             : {
    1334           0 :   return "non-interpreted character node";
    1335             : }
    1336             : 
    1337           0 : bool non_interpreted_char_node::causes_tprint()
    1338             : {
    1339           0 :   return false;
    1340             : }
    1341             : 
    1342           2 : bool non_interpreted_char_node::is_tag()
    1343             : {
    1344           2 :   return false;
    1345             : }
    1346             : 
    1347         326 : non_interpreted_char_node::non_interpreted_char_node(unsigned char cc) : c(cc)
    1348             : {
    1349         326 :   assert(cc != 0U);
    1350         326 : }
    1351             : 
    1352           0 : void non_interpreted_char_node::asciify(macro *)
    1353             : {
    1354           0 :   delete this;
    1355           0 : }
    1356             : 
    1357           0 : node *non_interpreted_char_node::copy()
    1358             : {
    1359           0 :   return new non_interpreted_char_node(c);
    1360             : }
    1361             : 
    1362         323 : bool non_interpreted_char_node::interpret(macro *mac)
    1363             : {
    1364         323 :   mac->append(c);
    1365         323 :   return true;
    1366             : }
    1367             : 
    1368             : // forward declarations
    1369             : static void do_width();
    1370             : static node *do_non_interpreted();
    1371             : static node *do_device_extension();
    1372             : static node *do_suppress(symbol nm);
    1373             : static void do_register();
    1374             : 
    1375             : dictionary color_dictionary(501);
    1376             : 
    1377      143299 : static color *lookup_color(symbol nm)
    1378             : {
    1379      143299 :   assert(!nm.is_null());
    1380      143299 :   if (nm == default_symbol)
    1381       12352 :     return &default_color;
    1382      130947 :   color *c = static_cast<color *>(color_dictionary.lookup(nm));
    1383      130947 :   if (0 == c /* nullptr */)
    1384           0 :     warning(WARN_COLOR, "color '%1' not defined", nm.contents());
    1385      130947 :   return c;
    1386             : }
    1387             : 
    1388      110863 : void do_stroke_color(symbol nm) // \m
    1389             : {
    1390      110863 :   if (nm.is_null())
    1391           0 :     return;
    1392      110863 :   if (nm.is_empty())
    1393         216 :     curenv->set_stroke_color(curenv->get_prev_stroke_color());
    1394             :   else {
    1395      110647 :     color *tem = lookup_color(nm);
    1396      110647 :     if (tem != 0 /* nullptr */)
    1397      110647 :       curenv->set_stroke_color(tem);
    1398             :     else
    1399           0 :       (void) color_dictionary.lookup(nm, new color(nm));
    1400             :   }
    1401             : }
    1402             : 
    1403       65073 : void do_fill_color(symbol nm) // \M
    1404             : {
    1405       65073 :   if (nm.is_null())
    1406           0 :     return;
    1407       65073 :   if (nm.is_empty())
    1408       32421 :     curenv->set_fill_color(curenv->get_prev_fill_color());
    1409             :   else {
    1410       32652 :     color *tem = lookup_color(nm);
    1411       32652 :     if (tem != 0 /* nullptr */)
    1412       32652 :       curenv->set_fill_color(tem);
    1413             :     else
    1414           0 :       (void) color_dictionary.lookup(nm, new color(nm));
    1415             :   }
    1416             : }
    1417             : 
    1418        1980 : static unsigned int read_color_channel_value(const char *scheme,
    1419             :                                              const char *col)
    1420             : {
    1421             :   units val;
    1422        1980 :   if (!read_measurement(&val, (unsigned char)('f'))) { // TODO: grochar
    1423           0 :     warning(WARN_COLOR, "%1 in %2 definition set to 0", col, scheme);
    1424           0 :     tok.next();
    1425           0 :     return 0;
    1426             :   }
    1427        1980 :   if (val < 0) {
    1428           0 :     warning(WARN_RANGE, "%1 cannot be negative: set to 0", col);
    1429           0 :     return 0;
    1430             :   }
    1431        1980 :   if (val > color::MAX_COLOR_VAL+1) {
    1432           0 :     warning(WARN_RANGE, "%1 cannot be greater than 1", col);
    1433             :     // we change 0x10000 to 0xffff
    1434           0 :     return color::MAX_COLOR_VAL;
    1435             :   }
    1436        1980 :   return (unsigned int)(val);
    1437             : }
    1438             : 
    1439      147600 : static color *read_rgb(unsigned char end = 0U)
    1440             : {
    1441             :   symbol component = read_input_until_terminator(false /* required */,
    1442      147600 :                                                  end);
    1443      147600 :   if (component.is_null()) {
    1444           0 :     warning(WARN_COLOR, "missing rgb color values");
    1445           0 :     return 0 /* nullptr */;
    1446             :   }
    1447      147600 :   const char *s = component.contents();
    1448      147600 :   color *col = new color;
    1449      147600 :   if ('#' == *s) {
    1450      147004 :     if (!col->read_rgb(s)) {
    1451           0 :       warning(WARN_COLOR, "expecting rgb color definition,"
    1452           0 :               " not '%1'", s);
    1453           0 :       delete col;
    1454           0 :       return 0 /* nullptr */;
    1455             :     }
    1456             :   }
    1457             :   else {
    1458         596 :     if (!end)
    1459         596 :       input_stack::push(make_temp_iterator(" "));
    1460         596 :     input_stack::push(make_temp_iterator(s));
    1461         596 :     tok.next();
    1462         596 :     unsigned int r = read_color_channel_value("rgb color",
    1463             :                                               "red component");
    1464         596 :     unsigned int g = read_color_channel_value("rgb color",
    1465             :                                               "green component");
    1466         596 :     unsigned int b = read_color_channel_value("rgb color",
    1467             :                                               "blue component");
    1468         596 :     col->set_rgb(r, g, b);
    1469             :   }
    1470      147600 :   return col;
    1471             : }
    1472             : 
    1473           0 : static color *read_cmy(unsigned char end = 0U)
    1474             : {
    1475             :   symbol component = read_input_until_terminator(false /* required */,
    1476           0 :                                                  end);
    1477           0 :   if (component.is_null()) {
    1478           0 :     warning(WARN_COLOR, "missing cmy color values");
    1479           0 :     return 0 /* nullptr */;
    1480             :   }
    1481           0 :   const char *s = component.contents();
    1482           0 :   color *col = new color;
    1483           0 :   if ('#' == *s) {
    1484           0 :     if (!col->read_cmy(s)) {
    1485           0 :       warning(WARN_COLOR, "expecting cmy color definition,"
    1486           0 :               " not '%1'", s);
    1487           0 :       delete col;
    1488           0 :       return 0 /* nullptr */;
    1489             :     }
    1490             :   }
    1491             :   else {
    1492           0 :     if (!end)
    1493           0 :       input_stack::push(make_temp_iterator(" "));
    1494           0 :     input_stack::push(make_temp_iterator(s));
    1495           0 :     tok.next();
    1496           0 :     unsigned int c = read_color_channel_value("cmy color",
    1497             :                                               "cyan component");
    1498           0 :     unsigned int m = read_color_channel_value("cmy color",
    1499             :                                               "magenta component");
    1500           0 :     unsigned int y = read_color_channel_value("cmy color",
    1501             :                                               "yellow component");
    1502           0 :     col->set_cmy(c, m, y);
    1503             :   }
    1504           0 :   return col;
    1505             : }
    1506             : 
    1507           0 : static color *read_cmyk(unsigned char end = 0U)
    1508             : {
    1509             :   symbol component = read_input_until_terminator(false /* required */,
    1510           0 :                                                  end);
    1511           0 :   if (component.is_null()) {
    1512           0 :     warning(WARN_COLOR, "missing cmyk color values");
    1513           0 :     return 0 /* nullptr */;
    1514             :   }
    1515           0 :   const char *s = component.contents();
    1516           0 :   color *col = new color;
    1517           0 :   if ('#' == *s) {
    1518           0 :     if (!col->read_cmyk(s)) {
    1519           0 :       warning(WARN_COLOR, "expecting cmyk color definition,"
    1520           0 :               " not '%1'", s);
    1521           0 :       delete col;
    1522           0 :       return 0 /* nullptr */;
    1523             :     }
    1524             :   }
    1525             :   else {
    1526           0 :     if (!end)
    1527           0 :       input_stack::push(make_temp_iterator(" "));
    1528           0 :     input_stack::push(make_temp_iterator(s));
    1529           0 :     tok.next();
    1530           0 :     unsigned int c = read_color_channel_value("cmyk color",
    1531             :                                               "cyan component");
    1532           0 :     unsigned int m = read_color_channel_value("cmyk color",
    1533             :                                               "magenta component");
    1534           0 :     unsigned int y = read_color_channel_value("cmyk color",
    1535             :                                               "yellow component");
    1536           0 :     unsigned int k = read_color_channel_value("cmyk color",
    1537             :                                               "black component");
    1538           0 :     col->set_cmyk(c, m, y, k);
    1539             :   }
    1540           0 :   return col;
    1541             : }
    1542             : 
    1543         192 : static color *read_gray(unsigned char end = 0U)
    1544             : {
    1545             :   symbol component = read_input_until_terminator(false /* required */,
    1546         192 :                                                  end);
    1547         192 :   if (component.is_null()) {
    1548           0 :     warning(WARN_COLOR, "missing gray value");
    1549           0 :     return 0 /* nullptr */;
    1550             :   }
    1551         192 :   const char *s = component.contents();
    1552         192 :   color *col = new color;
    1553         192 :   if ('#' == *s) {
    1554           0 :     if (!col->read_gray(s)) {
    1555           0 :       warning(WARN_COLOR, "expecting gray definition,"
    1556           0 :               " not '%1'", s);
    1557           0 :       delete col;
    1558           0 :       return 0 /* nullptr */;
    1559             :     }
    1560             :   }
    1561             :   else {
    1562         192 :     if (!end)
    1563           3 :       input_stack::push(make_temp_iterator("\n"));
    1564         192 :     input_stack::push(make_temp_iterator(s));
    1565         192 :     tok.next();
    1566         192 :     unsigned int g = read_color_channel_value("gray", "gray value");
    1567         192 :     col->set_gray(g);
    1568             :   }
    1569         192 :   return col;
    1570             : }
    1571             : 
    1572           0 : static void activate_color()
    1573             : {
    1574             :   int n;
    1575           0 :   bool is_color_desired = false;
    1576           0 :   if (has_arg() && read_integer(&n))
    1577           0 :     is_color_desired = (n > 0);
    1578             :   else
    1579           0 :     is_color_desired = true;
    1580           0 :   if (is_color_desired && !permit_color_output) {
    1581           0 :     error("color output disabled via command line");
    1582           0 :     is_color_desired = false;
    1583             :   }
    1584           0 :   want_color_output = is_color_desired;
    1585           0 :   skip_line();
    1586           0 : }
    1587             : 
    1588      147603 : static void define_color()
    1589             : {
    1590      147603 :   if (!has_arg()) {
    1591           0 :     warning(WARN_MISSING, "color definition request expects arguments");
    1592           0 :     skip_line();
    1593           0 :     return;
    1594             :   }
    1595      147603 :   symbol color_name = read_long_identifier();
    1596             :   // Testing has_arg() should have ensured this.
    1597      147603 :   assert(color_name != 0 /* nullptr */);
    1598      147603 :   if (color_name == default_symbol) {
    1599           0 :     warning(WARN_COLOR, "default color cannot be redefined");
    1600           0 :     skip_line();
    1601           0 :     return;
    1602             :   }
    1603      147603 :   symbol color_space = read_long_identifier();
    1604      147603 :   if (color_space.is_null()) {
    1605           0 :     warning(WARN_MISSING, "missing color space in color definition"
    1606             :             " request");
    1607           0 :     skip_line();
    1608           0 :     return;
    1609             :   }
    1610             :   color *col;
    1611      147603 :   if (strcmp(color_space.contents(), "rgb") == 0)
    1612      147600 :     col = read_rgb();
    1613           3 :   else if (strcmp(color_space.contents(), "cmyk") == 0)
    1614           0 :     col = read_cmyk();
    1615           3 :   else if (strcmp(color_space.contents(), "gray") == 0)
    1616           3 :     col = read_gray();
    1617           0 :   else if (strcmp(color_space.contents(), "grey") == 0)
    1618           0 :     col = read_gray();
    1619           0 :   else if (strcmp(color_space.contents(), "cmy") == 0)
    1620           0 :     col = read_cmy();
    1621             :   else {
    1622           0 :     warning(WARN_COLOR, "unknown color space '%1';"
    1623             :             " use 'rgb', 'cmyk', 'gray' or 'cmy'",
    1624           0 :             color_space.contents());
    1625           0 :     skip_line();
    1626           0 :     return;
    1627             :   }
    1628      147603 :   if (col != 0 /* nullptr */) {
    1629      147603 :     col->nm = color_name;
    1630      147603 :     (void) color_dictionary.lookup(color_name, col);
    1631             :   }
    1632      147603 :   skip_line();
    1633             : }
    1634             : 
    1635           0 : static void print_color_request()
    1636             : {
    1637           0 :   symbol key;
    1638             :   color *value;
    1639           0 :   if (has_arg()) {
    1640           0 :     do {
    1641           0 :       key = read_identifier();
    1642           0 :       value = static_cast<color *>(color_dictionary.lookup(key));
    1643           0 :       if (value != 0 /* nullptr */)
    1644           0 :         errprint("%1\t%2\n", key.contents(), value->print_color());
    1645           0 :     } while (has_arg());
    1646             :   }
    1647             :   else {
    1648           0 :     dictionary_iterator iter(color_dictionary);
    1649             :     // We must use the nuclear `reinterpret_cast` operator because GNU
    1650             :     // troff's dictionary types use a pre-STL approach to containers.
    1651           0 :     while (iter.get(&key, reinterpret_cast<void **>(&value))) {
    1652           0 :       assert(!key.is_null());
    1653           0 :       assert(value != 0 /* nullptr */);
    1654           0 :       errprint("%1\t%2\n", key.contents(), value->print_color());
    1655             :     }
    1656             :   }
    1657           0 :   fflush(stderr);
    1658           0 :   skip_line();
    1659           0 : }
    1660             : 
    1661         110 : node *do_overstrike() // \o
    1662             : {
    1663         110 :   overstrike_node *osnode = new overstrike_node;
    1664         110 :   int start_level = input_stack::get_level();
    1665         220 :   token start_token;
    1666         110 :   start_token.next();
    1667         110 :   if (!want_att_compat && !start_token.is_usable_as_delimiter())
    1668           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
    1669           0 :                         " is deprecated", tok.description());
    1670         110 :   else if (want_att_compat
    1671         110 :            && !start_token.is_usable_as_delimiter(false,
    1672             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
    1673           0 :     warning(WARN_DELIM, "overstriking escape sequence"
    1674             :             " does not accept %1 as a delimiter",
    1675           0 :             start_token.description());
    1676           0 :     delete osnode;
    1677           0 :     return 0 /* nullptr */;
    1678             :   }
    1679             :   // TODO: groff 1.24.0 release + 2 years?
    1680             : #if 0
    1681             :   if (!start_token.is_usable_as_delimiter(true /* report error */)) {
    1682             :     delete osnode;
    1683             :     return 0 /* nullptr */;
    1684             :   }
    1685             : #endif
    1686             :   for (;;) {
    1687         330 :     tok.next();
    1688         330 :     if (tok.is_newline() || tok.is_eof()) {
    1689             :       // token::description() writes to static, class-wide storage, so
    1690             :       // we must allocate a copy of it before issuing the next
    1691             :       // diagnostic.
    1692           0 :       char *delimdesc = strdup(start_token.description());
    1693           0 :       warning(WARN_DELIM, "missing closing delimiter in overstrike"
    1694             :               " escape sequence; expected %1, got %2", delimdesc,
    1695           0 :               tok.description());
    1696           0 :       free(delimdesc);
    1697           0 :       break;
    1698             :     }
    1699         330 :     if (tok == start_token
    1700         330 :         && (want_att_compat || input_stack::get_level() == start_level))
    1701         110 :       break;
    1702         220 :     if (tok.is_horizontal_motion())
    1703           0 :       osnode->overstrike(tok.nd->copy());
    1704         220 :     else if (tok.is_unstretchable_space()) {
    1705           0 :       node *n = new hmotion_node(curenv->get_space_width(),
    1706           0 :                                  curenv->get_fill_color());
    1707           0 :       osnode->overstrike(n);
    1708             :     }
    1709             :     else {
    1710             :       // TODO: In theory, we could accept spaces and horizontal motions.
    1711         220 :       charinfo *ci = tok.get_charinfo(true /* required */);
    1712         220 :       if (0 /* nullptr */ == ci) {
    1713           0 :         error("%1 is not supported in an overstrike escape sequence"
    1714           0 :               " argument", tok.description());
    1715           0 :         delete osnode;
    1716           0 :         return 0 /* nullptr */;
    1717             :       }
    1718             :       else {
    1719         220 :         node *n = curenv->make_char_node(ci);
    1720         220 :         if (n != 0 /* nullptr */)
    1721         220 :           osnode->overstrike(n);
    1722             :       }
    1723             :     }
    1724         220 :   }
    1725         110 :   return osnode;
    1726             : }
    1727             : 
    1728           1 : static node *do_bracket() // \b
    1729             : {
    1730           1 :   bracket_node *bracketnode = new bracket_node;
    1731           2 :   token start_token;
    1732           1 :   start_token.next();
    1733           1 :   if (!want_att_compat && !start_token.is_usable_as_delimiter())
    1734           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
    1735           0 :                         " is deprecated", tok.description());
    1736           1 :   else if (want_att_compat
    1737           1 :            && !start_token.is_usable_as_delimiter(false,
    1738             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
    1739           0 :     warning(WARN_DELIM, "bracket-building escape sequence"
    1740             :             " does not accept %1 as a delimiter",
    1741           0 :             start_token.description());
    1742           0 :     delete bracketnode;
    1743           0 :     return 0 /* nullptr */;
    1744             :   }
    1745             :   // TODO: groff 1.24.0 release + 2 years?
    1746             : #if 0
    1747             :   if (!start_token.is_usable_as_delimiter(true /* report error */)) {
    1748             :     delete bracketnode;
    1749             :     return 0 /* nullptr */;
    1750             :   }
    1751             : #endif
    1752           1 :   int start_level = input_stack::get_level();
    1753             :   for (;;) {
    1754           4 :     tok.next();
    1755           4 :     if (tok.is_newline() || tok.is_eof()) {
    1756             :       // token::description() writes to static, class-wide storage, so
    1757             :       // we must allocate a copy of it before issuing the next
    1758             :       // diagnostic.
    1759           0 :       char *delimdesc = strdup(start_token.description());
    1760           0 :       warning(WARN_DELIM, "missing closing delimiter in"
    1761             :               " bracket-building escape sequence; expected %1, got"
    1762           0 :               " %2", delimdesc, tok.description());
    1763           0 :       free(delimdesc);
    1764           0 :       break;
    1765             :     }
    1766           4 :     if (tok == start_token
    1767           4 :         && (want_att_compat || input_stack::get_level() == start_level))
    1768           1 :       break;
    1769             :     // TODO: In theory, we could accept spaces and horizontal motions.
    1770           3 :     charinfo *ci = tok.get_charinfo(true /* required */);
    1771           3 :     if (0 /* nullptr */ == ci) {
    1772           0 :       error("%1 is not supported in a bracket-building escape sequence"
    1773           0 :             " argument", tok.description());
    1774           0 :       delete bracketnode;
    1775           0 :       return 0 /* nullptr */;
    1776             :     }
    1777             :     else {
    1778           3 :       node *n = curenv->make_char_node(ci);
    1779           3 :       if (n != 0 /* nullptr */)
    1780           3 :         bracketnode->bracket(n);
    1781             :     }
    1782           3 :   }
    1783           1 :   return bracketnode;
    1784             : }
    1785             : 
    1786       22350 : static const char *do_name_test() // \A
    1787             : {
    1788       44700 :   token start_token;
    1789       22350 :   start_token.next();
    1790       22350 :   if (!want_att_compat && !start_token.is_usable_as_delimiter())
    1791           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
    1792           0 :                         " is deprecated", tok.description());
    1793       22350 :   else if (want_att_compat
    1794       22350 :            && !start_token.is_usable_as_delimiter(false,
    1795             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
    1796           0 :     warning(WARN_DELIM, "name test escape sequence"
    1797             :             " does not accept %1 as a delimiter",
    1798           0 :             start_token.description());
    1799           0 :     return 0 /* nullptr */;
    1800             :   }
    1801             :   // TODO: groff 1.24.0 release + 2 years?
    1802             : #if 0
    1803             :   if (!start_token.is_usable_as_delimiter(true /* report error */))
    1804             :     return 0 /* nullptr */;
    1805             : #endif
    1806       22350 :   int start_level = input_stack::get_level();
    1807       22350 :   bool got_bad_char = false;
    1808       22350 :   bool got_some_char = false;
    1809             :   for (;;) {
    1810       95232 :     tok.next();
    1811       95232 :     if (tok.is_newline() || tok.is_eof()) {
    1812             :       // token::description() writes to static, class-wide storage, so
    1813             :       // we must allocate a copy of it before issuing the next
    1814             :       // diagnostic.
    1815           0 :       char *delimdesc = strdup(start_token.description());
    1816           0 :       warning(WARN_DELIM, "missing closing delimiter in identifier"
    1817             :               " validation escape sequence; expected %1, got %2",
    1818           0 :               delimdesc, tok.description());
    1819           0 :       free(delimdesc);
    1820           0 :       break;
    1821             :     }
    1822       95232 :     if (tok == start_token
    1823       95232 :         && (want_att_compat || input_stack::get_level() == start_level))
    1824       22350 :       break;
    1825       72882 :     if (tok.ch() == 0U)
    1826        4428 :       got_bad_char = true;
    1827       72882 :     got_some_char = true;
    1828       72882 :   }
    1829       22350 :   return (got_some_char && !got_bad_char) ? "1" : "0";
    1830             : }
    1831             : 
    1832       10333 : static const char *do_expr_test() // \B
    1833             : {
    1834       20666 :   token start_token;
    1835       10333 :   start_token.next();
    1836       20666 :   if (!want_att_compat
    1837       10333 :       && !start_token.is_usable_as_delimiter(true /* report error */))
    1838           0 :     return 0 /* nullptr */;
    1839       10333 :   else if (want_att_compat
    1840       10333 :            && !start_token.is_usable_as_delimiter(false,
    1841             :                   DELIMITER_ATT_NUMERIC_EXPRESSION)) {
    1842           0 :     warning(WARN_DELIM, "numeric expression test escape sequence"
    1843             :             " does not accept %1 as a delimiter",
    1844           0 :             start_token.description());
    1845           0 :     return 0 /* nullptr */;
    1846             :   }
    1847       10333 :   int start_level = input_stack::get_level();
    1848       10333 :   tok.next();
    1849             :   // disable all warning and error messages temporarily
    1850       10333 :   unsigned int saved_warning_mask = warning_mask;
    1851       10333 :   bool saved_want_errors_inhibited = want_errors_inhibited;
    1852       10333 :   warning_mask = 0;
    1853       10333 :   want_errors_inhibited = true;
    1854             :   int dummy;
    1855             :   // TODO: grochar
    1856       10333 :   bool result = read_measurement(&dummy, (unsigned char)('u'),
    1857             :                                  true /* is_mandatory */);
    1858       10333 :   warning_mask = saved_warning_mask;
    1859       10333 :   want_errors_inhibited = saved_want_errors_inhibited;
    1860             :   // read_measurement() has left `token` pointing at the input character
    1861             :   // after the end of the expression.
    1862       10333 :   if (tok == start_token && input_stack::get_level() == start_level)
    1863       10262 :     return (result ? "1" : "0");
    1864             :   // There may be garbage after the expression but before the closing
    1865             :   // delimiter.  Eat it.
    1866             :   for (;;) {
    1867         505 :     if (tok.is_newline() || tok.is_eof()) {
    1868           0 :       char *delimdesc = strdup(start_token.description());
    1869           0 :       warning(WARN_DELIM, "missing closing delimiter in numeric"
    1870             :               " expression validation escape sequence; expected %1,"
    1871           0 :               " got %2", delimdesc, tok.description());
    1872           0 :       free(delimdesc);
    1873           0 :       break;
    1874             :     }
    1875         505 :     tok.next();
    1876         505 :     if (tok == start_token && input_stack::get_level() == start_level)
    1877          71 :       break;
    1878         434 :   }
    1879          71 :   return "0";
    1880             : }
    1881             : 
    1882             : #if 0
    1883             : static node *do_zero_width_output()
    1884             : {
    1885             :   token start_token;
    1886             :   start_token.next();
    1887             :   int start_level = input_stack::get_level();
    1888             :   environment env(curenv);
    1889             :   environment *oldenv = curenv;
    1890             :   curenv = &env;
    1891             :   for (;;) {
    1892             :     tok.next();
    1893             :     if (tok.is_newline() || tok.is_eof()) {
    1894             :       error("missing closing delimiter");
    1895             :       break;
    1896             :     }
    1897             :     if (tok == start_token
    1898             :         && (want_att_compat || input_stack::get_level() == start_level))
    1899             :       break;
    1900             :     tok.process();
    1901             :   }
    1902             :   curenv = oldenv;
    1903             :   node *rev = env.extract_output_line();
    1904             :   node *n = 0 /* nullptr */;
    1905             :   while (rev != 0 /* nullptr */) {
    1906             :     node *tem = rev;
    1907             :     rev = rev->next;
    1908             :     tem->next = n;
    1909             :     n = tem;
    1910             :   }
    1911             :   return new zero_width_node(n);
    1912             : }
    1913             : 
    1914             : #else
    1915             : 
    1916             : // It's undesirable for \Z to change environments, because then
    1917             : // \n(.w won't work as expected.
    1918             : 
    1919         851 : static node *do_zero_width_output() // \Z
    1920             : {
    1921         851 :   node *rev = new dummy_node;
    1922         851 :   node *n = 0 /* nullptr */;
    1923        1702 :   token start_token;
    1924         851 :   start_token.next();
    1925         851 :   if (!want_att_compat && !start_token.is_usable_as_delimiter())
    1926           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
    1927           0 :                         " is deprecated", tok.description());
    1928         851 :   else if (want_att_compat
    1929         851 :            && !start_token.is_usable_as_delimiter(false,
    1930             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
    1931           0 :     warning(WARN_DELIM, "zero-width sequence escape sequence"
    1932             :             " does not accept %1 as a delimiter",
    1933           0 :             start_token.description());
    1934           0 :     return 0 /* nullptr */;
    1935             :   }
    1936             :   // TODO: groff 1.24.0 release + 2 years?
    1937             : #if 0
    1938             :   if (!start_token.is_usable_as_delimiter(true /* report error */)) {
    1939             :     delete rev;
    1940             :     return 0 /* nullptr */;
    1941             :   }
    1942             : #endif
    1943         851 :   int start_level = input_stack::get_level();
    1944             :   for (;;) {
    1945        4930 :     tok.next();
    1946        4930 :     if (tok.is_newline() || tok.is_eof()) {
    1947             :       // token::description() writes to static, class-wide storage, so
    1948             :       // we must allocate a copy of it before issuing the next
    1949             :       // diagnostic.
    1950           0 :       char *delimdesc = strdup(start_token.description());
    1951           0 :       warning(WARN_DELIM, "missing closing delimiter in zero-width"
    1952             :               " output escape sequence; expected %1, got %2", delimdesc,
    1953           0 :               tok.description());
    1954           0 :       free(delimdesc);
    1955           0 :       break;
    1956             :     }
    1957        4930 :     if (tok == start_token
    1958        4930 :         && (want_att_compat || input_stack::get_level() == start_level))
    1959         851 :       break;
    1960             :     // XXX: does the initial dummy node leak if this fails?
    1961        4079 :     if (!tok.add_to_zero_width_node_list(&rev))
    1962           0 :       error("%1 is not allowed in a zero-width output escape"
    1963           0 :             " sequence argument", tok.description());
    1964        4079 :   }
    1965        5127 :   while (rev != 0 /* nullptr */) {
    1966        4276 :     node *tem = rev;
    1967        4276 :     rev = rev->next;
    1968        4276 :     tem->next = n;
    1969        4276 :     n = tem;
    1970             :   }
    1971         851 :   return new zero_width_node(n);
    1972             : }
    1973             : 
    1974             : #endif
    1975             : 
    1976     8919500 : token_node *node::get_token_node()
    1977             : {
    1978     8919500 :   return 0 /* nullptr */;
    1979             : }
    1980             : 
    1981             : class token_node : public node {
    1982             : public:
    1983             :   token tk;
    1984             :   token_node(const token &t);
    1985             :   void asciify(macro *);
    1986             :   node *copy();
    1987             :   token_node *get_token_node();
    1988             :   bool is_same_as(node *);
    1989             :   const char *type();
    1990             :   bool causes_tprint();
    1991             :   bool is_tag();
    1992             : };
    1993             : 
    1994     1108980 : token_node::token_node(const token &t) : tk(t)
    1995             : {
    1996     1108980 : }
    1997             : 
    1998           0 : void token_node::asciify(macro *)
    1999             : {
    2000           0 :   assert(0 == "attempting to 'asciify' a `token_node`");
    2001             :   delete this;
    2002             : }
    2003             : 
    2004      896850 : node *token_node::copy()
    2005             : {
    2006      896850 :   return new token_node(tk);
    2007             : }
    2008             : 
    2009      896850 : token_node *token_node::get_token_node()
    2010             : {
    2011      896850 :   return this;
    2012             : }
    2013             : 
    2014           0 : bool token_node::is_same_as(node *nd)
    2015             : {
    2016           0 :   return (tk == static_cast<token_node *>(nd)->tk);
    2017             : }
    2018             : 
    2019           0 : const char *token_node::type()
    2020             : {
    2021           0 :   return "token node";
    2022             : }
    2023             : 
    2024           0 : bool token_node::causes_tprint()
    2025             : {
    2026           0 :   return false;
    2027             : }
    2028             : 
    2029           0 : bool token_node::is_tag()
    2030             : {
    2031           0 :   return false;
    2032             : }
    2033             : 
    2034      642083 : token::token() : nd(0 /* nullptr */), type(TOKEN_EMPTY)
    2035             : {
    2036      642083 : }
    2037             : 
    2038     6378604 : token::~token()
    2039             : {
    2040     3189302 :   delete nd;
    2041     3189302 : }
    2042             : 
    2043     2547219 : token::token(const token &t)
    2044     2547219 : : nm(t.nm), c(t.c), val(t.val), dim(t.dim), type(t.type)
    2045             : {
    2046     2547219 :   if (t.nd != 0 /* nullptr */)
    2047         490 :     nd = t.nd->copy();
    2048             :   else
    2049     2546729 :     nd = 0 /* nullptr */;
    2050     2547219 : }
    2051             : 
    2052      947750 : void token::operator=(const token &t)
    2053             : {
    2054      947750 :   delete nd;
    2055      947750 :   nm = t.nm;
    2056      947750 :   if (t.nd != 0 /* nullptr */)
    2057         245 :     nd = t.nd->copy();
    2058             :   else
    2059      947505 :     nd = 0 /* nullptr */;
    2060      947750 :   c = t.c;
    2061      947750 :   val = t.val;
    2062      947750 :   dim = t.dim;
    2063      947750 :   type = t.type;
    2064      947750 : }
    2065             : 
    2066    79731051 : void token::skip_spaces()
    2067             : {
    2068    79731051 :   while (is_space())
    2069    21974737 :     next();
    2070    57756314 : }
    2071             : 
    2072        9614 : void token::diagnose_non_character()
    2073             : {
    2074             :   // TODO: What about
    2075             :   //   is_space()
    2076             :   //   is_stretchable_space()
    2077             :   //   is_unstrechable_space()
    2078             :   //   is_horizontal_motion()
    2079             :   //   is_horizontal_whitespace()
    2080             :   //   is_leader()
    2081             :   //   is_backspace()
    2082             :   //   is_dummy()
    2083             :   //   is_transparent()
    2084             :   //   is_transparent_dummy()
    2085             :   //   is_left_brace()
    2086             :   //   is_page_ejector()
    2087             :   //   is_hyphen_indicator()
    2088             :   //   is_zero_width_break()
    2089             :   // ?
    2090        9614 :   if (!is_newline() && !is_eof() && !is_right_brace() && !is_tab())
    2091           0 :     error("expected ordinary, special, or indexed character, got %1;"
    2092           0 :           " ignoring", description());
    2093        9614 : }
    2094             : 
    2095             : // Indicate whether an argument lies ahead on the current line in the
    2096             : // input stream, skipping over spaces.  This function is therefore not
    2097             : // appropriate for use when handling requests or escape sequences that
    2098             : // don't use space to separate their arguments, as with `.tr aAbB` or
    2099             : // `\o'^e'`.
    2100             : //
    2101             : // Specify `want_peek` if request reads the next argument in copy mode,
    2102             : // or otherwise must interpret it specially, as when reading a
    2103             : // conditional expression (`if`, `ie`, `while`), or expecting a
    2104             : // delimited argument (`tl`).
    2105    14341142 : bool has_arg(bool want_peek)
    2106             : {
    2107    14341142 :   if (tok.is_newline() || tok.is_eof())
    2108     2592698 :     return false;
    2109    11748444 :   if (want_peek) {
    2110             :     int c;
    2111             :     for (;;) {
    2112      562599 :       c = input_stack::peek();
    2113      562599 :       if (' ' == c)
    2114          18 :         (void) read_char_in_copy_mode(0 /* nullptr */);
    2115             :       else
    2116      562581 :         break;
    2117             :     }
    2118      562581 :     return !(('\n' == c) || (EOF == c));
    2119             :   }
    2120             :   else {
    2121    11185863 :     tok.skip_spaces();
    2122    11185863 :     return !(tok.is_newline() || tok.is_eof());
    2123             :   }
    2124             : }
    2125             : 
    2126       32767 : void token::make_space()
    2127             : {
    2128       32767 :   type = TOKEN_SPACE;
    2129       32767 : }
    2130             : 
    2131      103557 : void token::make_newline()
    2132             : {
    2133      103557 :   type = TOKEN_NEWLINE;
    2134      103557 : }
    2135             : 
    2136   239468043 : void token::next()
    2137             : {
    2138   239468043 :   if (nd != 0 /* nullptr */) {
    2139         998 :     delete nd;
    2140         998 :     nd = 0 /* nullptr */;
    2141             :   }
    2142             :   units x;
    2143             :   for (;;) {
    2144   255706213 :     node *n = 0 /* nullptr */;
    2145   255706213 :     int cc = input_stack::get(&n);
    2146   255706213 :     if ((cc != escape_char) || 0U == escape_char) {
    2147   242349091 :     handle_ordinary_char:
    2148   242349338 :       switch (cc) {
    2149           3 :       case INPUT_NO_BREAK_SPACE:
    2150           3 :           type = TOKEN_STRETCHABLE_SPACE;
    2151   239468043 :           return;
    2152           1 :       case INPUT_SOFT_HYPHEN:
    2153           1 :           type = TOKEN_HYPHEN_INDICATOR;
    2154           1 :           return;
    2155     1443898 :       case PUSH_GROFF_MODE:
    2156     1443898 :         input_stack::set_att_compat(want_att_compat);
    2157     1443898 :         want_att_compat = false;
    2158     3066661 :         continue;
    2159           0 :       case PUSH_COMP_MODE:
    2160           0 :         input_stack::set_att_compat(want_att_compat);
    2161           0 :         want_att_compat = true;
    2162           0 :         continue;
    2163     1434654 :       case POP_GROFFCOMP_MODE:
    2164     1434654 :         want_att_compat = input_stack::get_att_compat();
    2165     1434654 :         continue;
    2166           0 :       case BEGIN_QUOTE:
    2167           0 :         input_stack::increase_level();
    2168           0 :         continue;
    2169           0 :       case END_QUOTE:
    2170           0 :         input_stack::decrease_level();
    2171           0 :         continue;
    2172      188109 :       case DOUBLE_QUOTE:
    2173      188109 :         continue;
    2174      952372 :       case EOF:
    2175      952372 :         type = TOKEN_EOF;
    2176      952372 :         return;
    2177         125 :       case TRANSPARENT_FILE_REQUEST:
    2178             :       case TITLE_REQUEST:
    2179             :       case COPY_FILE_REQUEST:
    2180             : #ifdef COLUMN
    2181             :       case VJUSTIFY_REQUEST:
    2182             : #endif /* COLUMN */
    2183         125 :         type = TOKEN_REQUEST;
    2184         125 :         c = cc;
    2185         125 :         return;
    2186       42312 :       case BEGIN_TRAP:
    2187       42312 :         type = TOKEN_BEGIN_TRAP;
    2188       42312 :         return;
    2189       42030 :       case END_TRAP:
    2190       42030 :         type = TOKEN_END_TRAP;
    2191       42030 :         return;
    2192          60 :       case LAST_PAGE_EJECTOR:
    2193          60 :         seen_last_page_ejector = true;
    2194             :         // fall through
    2195        4856 :       case PAGE_EJECTOR:
    2196        4856 :         type = TOKEN_PAGE_EJECTOR;
    2197        4856 :         return;
    2198             :       case ESCAPE_PERCENT:
    2199       83843 :       ESCAPE_PERCENT:
    2200       83843 :         type = TOKEN_HYPHEN_INDICATOR;
    2201       83843 :         return;
    2202             :       case ESCAPE_SPACE:
    2203        6209 :       ESCAPE_SPACE:
    2204        6209 :         type = TOKEN_UNSTRETCHABLE_SPACE;
    2205        6209 :         return;
    2206             :       case ESCAPE_TILDE:
    2207        9853 :       ESCAPE_TILDE:
    2208        9853 :         type = TOKEN_STRETCHABLE_SPACE;
    2209        9853 :         return;
    2210             :       case ESCAPE_COLON:
    2211       51200 :       ESCAPE_COLON:
    2212       51200 :         type = TOKEN_ZERO_WIDTH_BREAK;
    2213       51200 :         return;
    2214             :       case ESCAPE_e:
    2215        5666 :       ESCAPE_e:
    2216        5666 :         type = TOKEN_ESCAPE;
    2217        5666 :         return;
    2218      137769 :       case ESCAPE_E:
    2219      137769 :         goto handle_escape_char;
    2220             :       case ESCAPE_BAR:
    2221        5349 :       ESCAPE_BAR:
    2222        5349 :         type = TOKEN_HORIZONTAL_MOTION;
    2223        5349 :         nd = new hmotion_node(curenv->get_narrow_space_width(),
    2224        5349 :                               curenv->get_fill_color());
    2225        5349 :         return;
    2226             :       case ESCAPE_CIRCUMFLEX:
    2227         310 :       ESCAPE_CIRCUMFLEX:
    2228         310 :         type = TOKEN_HORIZONTAL_MOTION;
    2229         310 :         nd = new hmotion_node(curenv->get_half_narrow_space_width(),
    2230         310 :                               curenv->get_fill_color());
    2231         310 :         return;
    2232     2403611 :       case ESCAPE_NEWLINE:
    2233     2403611 :         have_formattable_input = false;
    2234     2403611 :         break;
    2235             :       case ESCAPE_LEFT_BRACE:
    2236     1405911 :       ESCAPE_LEFT_BRACE:
    2237     1405911 :         type = TOKEN_LEFT_BRACE;
    2238     1405911 :         return;
    2239             :       case ESCAPE_RIGHT_BRACE:
    2240     1232591 :       ESCAPE_RIGHT_BRACE:
    2241     1232591 :         type = TOKEN_RIGHT_BRACE;
    2242     1232591 :         return;
    2243             :       case ESCAPE_LEFT_QUOTE:
    2244          12 :       ESCAPE_LEFT_QUOTE:
    2245          12 :         type = TOKEN_SPECIAL_CHAR;
    2246          12 :         nm = symbol("ga");
    2247          12 :         return;
    2248             :       case ESCAPE_RIGHT_QUOTE:
    2249          28 :       ESCAPE_RIGHT_QUOTE:
    2250          28 :         type = TOKEN_SPECIAL_CHAR;
    2251          28 :         nm = symbol("aa");
    2252          28 :         return;
    2253             :       case ESCAPE_HYPHEN:
    2254       18677 :       ESCAPE_HYPHEN:
    2255       18677 :         type = TOKEN_SPECIAL_CHAR;
    2256       18677 :         nm = symbol("-");
    2257       18677 :         return;
    2258             :       case ESCAPE_UNDERSCORE:
    2259           0 :       ESCAPE_UNDERSCORE:
    2260           0 :         type = TOKEN_SPECIAL_CHAR;
    2261           0 :         nm = symbol("ul");
    2262           0 :         return;
    2263             :       case ESCAPE_c:
    2264      112600 :       ESCAPE_c:
    2265      112600 :         type = TOKEN_INTERRUPT;
    2266      112600 :         return;
    2267             :       case ESCAPE_BANG:
    2268       24769 :       ESCAPE_BANG:
    2269       24769 :         type = TOKEN_TRANSPARENT;
    2270       24769 :         return;
    2271             :       case ESCAPE_QUESTION:
    2272      457945 :       ESCAPE_QUESTION:
    2273      457945 :         nd = do_non_interpreted();
    2274      457945 :         if (nd != 0 /* nullptr */) {
    2275      457945 :           type = TOKEN_NODE;
    2276      457945 :           return;
    2277             :         }
    2278           0 :         break;
    2279             :       case ESCAPE_AMPERSAND:
    2280       79406 :       ESCAPE_AMPERSAND:
    2281       79406 :         type = TOKEN_DUMMY;
    2282       79406 :         return;
    2283             :       case ESCAPE_RIGHT_PARENTHESIS:
    2284       19742 :       ESCAPE_RIGHT_PARENTHESIS:
    2285       19742 :         type = TOKEN_TRANSPARENT_DUMMY;
    2286       19742 :         return;
    2287           6 :       case '\b':
    2288           6 :         type = TOKEN_BACKSPACE;
    2289           6 :         return;
    2290    44980308 :       case ' ':
    2291    44980308 :         type = TOKEN_SPACE;
    2292    44980308 :         return;
    2293      878014 :       case '\t':
    2294      878014 :         type = TOKEN_TAB;
    2295      878014 :         return;
    2296    12232383 :       case '\n':
    2297    12232383 :         type = TOKEN_NEWLINE;
    2298    12232383 :         return;
    2299         287 :       case '\001':
    2300         287 :         type = TOKEN_LEADER;
    2301         287 :         return;
    2302     9816350 :       case 0:
    2303             :         {
    2304     9816350 :           assert(n != 0 /* nullptr */);
    2305     9816350 :           token_node *tn = n->get_token_node();
    2306     9816350 :           if (tn != 0 /* nullptr */) {
    2307      896850 :             *this = tn->tk;
    2308      896850 :             delete tn;
    2309             :           }
    2310             :           else {
    2311     8919500 :             nd = n;
    2312     8919500 :             type = TOKEN_NODE;
    2313             :           }
    2314             :         }
    2315     9816350 :         return;
    2316   164617681 :       default:
    2317   164617681 :         type = TOKEN_CHAR;
    2318   164617681 :         c = cc;
    2319   167684342 :         return;
    2320     2403611 :       }
    2321             :     }
    2322             :     else {
    2323    13494891 :     handle_escape_char:
    2324    13494891 :       cc = input_stack::get(&n);
    2325    13494891 :       switch (cc) {
    2326        3419 :       case '(':
    2327        3419 :         nm = read_two_char_escape_parameter();
    2328        3419 :         type = TOKEN_SPECIAL_CHAR;
    2329        3419 :         return;
    2330           0 :       case EOF:
    2331           0 :         type = TOKEN_EOF;
    2332           0 :         error("end of input after escape character");
    2333           0 :         return;
    2334          12 :       case '`':
    2335          12 :         goto ESCAPE_LEFT_QUOTE;
    2336           6 :       case '\'':
    2337           6 :         goto ESCAPE_RIGHT_QUOTE;
    2338        3572 :       case '-':
    2339        3572 :         goto ESCAPE_HYPHEN;
    2340           0 :       case '_':
    2341           0 :         goto ESCAPE_UNDERSCORE;
    2342        7760 :       case '%':
    2343        7760 :         goto ESCAPE_PERCENT;
    2344          17 :       case ' ':
    2345          17 :         goto ESCAPE_SPACE;
    2346        1214 :       case '0':
    2347        1214 :         nd = new hmotion_node(curenv->get_digit_width(),
    2348        1214 :                               curenv->get_fill_color());
    2349        1214 :         type = TOKEN_HORIZONTAL_MOTION;
    2350        1214 :         return;
    2351        1390 :       case '|':
    2352        1390 :         goto ESCAPE_BAR;
    2353          72 :       case '^':
    2354          72 :         goto ESCAPE_CIRCUMFLEX;
    2355       33797 :       case '/':
    2356       33797 :         if (want_att_compat)
    2357           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2358           0 :                   " AT&T troff", char(cc));
    2359       33797 :         type = TOKEN_ITALIC_CORRECTION;
    2360       33797 :         return;
    2361       33795 :       case ',':
    2362       33795 :         if (want_att_compat)
    2363           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2364           0 :                   " AT&T troff", char(cc));
    2365       33795 :         type = TOKEN_NODE;
    2366       33795 :         nd = new left_italic_corrected_node;
    2367       33795 :         return;
    2368       19103 :       case '&':
    2369       19103 :         goto ESCAPE_AMPERSAND;
    2370       15529 :       case ')':
    2371       15529 :         if (want_att_compat)
    2372           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2373           0 :                   " AT&T troff", char(cc));
    2374       15529 :         goto ESCAPE_RIGHT_PARENTHESIS;
    2375       10702 :       case '!':
    2376       10702 :         goto ESCAPE_BANG;
    2377       27242 :       case '?':
    2378       27242 :         if (want_att_compat)
    2379           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2380           0 :                   " AT&T troff", char(cc));
    2381       27242 :         goto ESCAPE_QUESTION;
    2382        3415 :       case '~':
    2383        3415 :         if (want_att_compat)
    2384           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2385           0 :                   " AT&T troff", char(cc));
    2386        3415 :         goto ESCAPE_TILDE;
    2387         118 :       case ':':
    2388         118 :         if (want_att_compat)
    2389           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2390           0 :                   " AT&T troff", char(cc));
    2391         118 :         goto ESCAPE_COLON;
    2392    32038745 :       case '"':
    2393    32038745 :         while ((cc = input_stack::get(0 /* nullptr */)) != '\n'
    2394    32038745 :                && cc != EOF)
    2395             :           ;
    2396      905581 :         if (cc == '\n')
    2397      905581 :           type = TOKEN_NEWLINE;
    2398             :         else
    2399           0 :           type = TOKEN_EOF;
    2400      905581 :         return;
    2401      137458 :       case '#':                 // Like \" but newline is ignored.
    2402      137458 :         if (want_att_compat)
    2403           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2404           0 :                   " AT&T troff", char(cc));
    2405     3446893 :         while ((cc = input_stack::get(0 /* nullptr */)) != '\n')
    2406     3309435 :           if (cc == EOF) {
    2407           0 :             type = TOKEN_EOF;
    2408           0 :             return;
    2409             :           }
    2410      137458 :         break;
    2411     1503503 :       case '$':
    2412             :         {
    2413     1503503 :           symbol s = read_escape_parameter();
    2414     1503503 :           if (!(s.is_null() || s.is_empty()))
    2415     1503503 :             interpolate_positional_parameter(s);
    2416     1503503 :           break;
    2417             :         }
    2418     1716558 :       case '*':
    2419             :         {
    2420     1716558 :           symbol s = read_escape_parameter(WITH_ARGS);
    2421     1716558 :           if (!(s.is_null() || s.is_empty())) {
    2422     1716558 :             if (have_multiple_params) {
    2423        1779 :               have_multiple_params = false;
    2424        1779 :               interpolate_string_with_args(s);
    2425             :             }
    2426             :             else
    2427     1714779 :               interpolate_string(s);
    2428             :           }
    2429     1716558 :           break;
    2430             :         }
    2431         273 :       case 'a':
    2432         273 :         nd = new non_interpreted_char_node('\001');
    2433         273 :         type = TOKEN_NODE;
    2434         273 :         return;
    2435       22350 :       case 'A':
    2436       22350 :         if (want_att_compat)
    2437           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2438           0 :                   " AT&T troff", char(cc));
    2439             :         {
    2440       22350 :           const char *res = do_name_test();
    2441       22350 :           if (0 /* nullptr */ == res)
    2442           0 :             break;
    2443       22350 :           c = *res;
    2444       22350 :           type = TOKEN_CHAR;
    2445             :         }
    2446       22350 :         return;
    2447           1 :       case 'b':
    2448           1 :         nd = do_bracket();
    2449           1 :         if (0 /* nullptr */ == nd)
    2450           0 :           break;
    2451           1 :         type = TOKEN_NODE;
    2452           1 :         return;
    2453       10333 :       case 'B':
    2454       10333 :         if (want_att_compat)
    2455           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2456           0 :                   " AT&T troff", char(cc));
    2457             :         {
    2458       10333 :           const char *res = do_expr_test();
    2459       10333 :           if (0 /* nullptr */ == res)
    2460           0 :             break;
    2461       10333 :           c = *res;
    2462       10333 :           type = TOKEN_CHAR;
    2463             :         }
    2464       10333 :         return;
    2465       29938 :       case 'c':
    2466       29938 :         goto ESCAPE_c;
    2467          53 :       case 'C':
    2468          53 :         nm = read_delimited_identifier();
    2469          53 :         if (nm.is_null())
    2470           1 :           break;
    2471          52 :         type = TOKEN_DELIMITED_SPECIAL_CHAR;
    2472          52 :         return;
    2473         120 :       case 'd':
    2474         120 :         type = TOKEN_NODE;
    2475         120 :         nd = new vmotion_node(curenv->get_size() / 2,
    2476         120 :                               curenv->get_fill_color());
    2477         120 :         return;
    2478      230491 :       case 'D':
    2479      230491 :         nd = read_drawing_command();
    2480      230491 :         if (0 /* nullptr */ == nd)
    2481         189 :           break;
    2482      230302 :         type = TOKEN_NODE;
    2483      230302 :         return;
    2484        4201 :       case 'e':
    2485        4201 :         goto ESCAPE_e;
    2486           0 :       case 'E':
    2487           0 :         if (want_att_compat)
    2488           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2489           0 :                   " AT&T troff", char(cc));
    2490           0 :         goto handle_escape_char;
    2491      127289 :       case 'f':
    2492      127289 :         if (curenv->get_was_line_interrupted()) {
    2493           0 :           warning(WARN_SYNTAX, "ignoring escaped '%1' on input line"
    2494             :                   " after output line continuation escape sequence",
    2495           0 :                   char(cc));
    2496           0 :           break;
    2497             :         }
    2498      127289 :         select_font(read_escape_parameter(ALLOW_EMPTY));
    2499      127289 :         if (!want_att_compat)
    2500      127289 :           have_formattable_input = true;
    2501      127289 :         break;
    2502        2452 :       case 'F':
    2503        2452 :         if (want_att_compat)
    2504           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2505           0 :                   " AT&T troff", char(cc));
    2506        2452 :         curenv->set_family(read_escape_parameter(ALLOW_EMPTY));
    2507        2452 :         have_formattable_input = true;
    2508        2452 :         break;
    2509         260 :       case 'g':
    2510             :         {
    2511         260 :           symbol s = read_escape_parameter();
    2512         260 :           if (!(s.is_null() || s.is_empty()))
    2513         260 :             interpolate_number_format(s);
    2514         260 :           break;
    2515             :         }
    2516      122013 :       case 'h':
    2517      122013 :         if (!read_delimited_measurement(&x, 'm'))
    2518          13 :           break;
    2519      122000 :         type = TOKEN_DELIMITED_HORIZONTAL_MOTION;
    2520      122000 :         nd = new hmotion_node(x, curenv->get_fill_color());
    2521      122000 :         return;
    2522         615 :       case 'H':
    2523             :         // don't take height increments relative to previous height if
    2524             :         // in compatibility mode
    2525         615 :         if (!want_att_compat && curenv->get_char_height()) {
    2526         222 :           if (read_delimited_measurement(&x, 'z',
    2527             :                                          curenv->get_char_height()))
    2528         222 :             curenv->set_char_height(x);
    2529             :         }
    2530             :         else {
    2531         393 :           if (read_delimited_measurement(&x, 'z',
    2532             :                 curenv->get_requested_point_size()))
    2533         393 :             curenv->set_char_height(x);
    2534             :         }
    2535         615 :         if (!want_att_compat)
    2536         615 :           have_formattable_input = true;
    2537         615 :         break;
    2538         480 :       case 'k':
    2539         480 :         nm = read_escape_parameter();
    2540         480 :         if (nm.is_null() || nm.is_empty())
    2541           0 :           break;
    2542         480 :         type = TOKEN_MARK_INPUT;
    2543         480 :         return;
    2544         242 :       case 'l':
    2545             :       case 'L':
    2546             :         {
    2547         242 :           charinfo *s = 0 /* nullptr */;
    2548         242 :           if (!read_line_rule_expression(&x, (cc == 'l' ? 'm': 'v'),
    2549             :                                          &s))
    2550          30 :             break;
    2551         212 :           if (0 /* nullptr */ == s)
    2552          44 :             s = lookup_charinfo(cc == 'l' ? "ru" : "br");
    2553         212 :           type = TOKEN_NODE;
    2554         212 :           node *char_node = curenv->make_char_node(s);
    2555         212 :           if (cc == 'l')
    2556         211 :             nd = new hline_node(x, char_node);
    2557             :           else
    2558           1 :             nd = new vline_node(x, char_node);
    2559         212 :           return;
    2560             :         }
    2561       76964 :       case 'm':
    2562       76964 :         if (want_att_compat)
    2563           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2564           0 :                   " AT&T troff", char(cc));
    2565       76964 :         do_stroke_color(read_escape_parameter(ALLOW_EMPTY));
    2566       76964 :         if (!want_att_compat)
    2567       76964 :           have_formattable_input = true;
    2568       76964 :         break;
    2569       64842 :       case 'M':
    2570       64842 :         if (want_att_compat)
    2571           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2572           0 :                   " AT&T troff", char(cc));
    2573       64842 :         do_fill_color(read_escape_parameter(ALLOW_EMPTY));
    2574       64842 :         if (!want_att_compat)
    2575       64842 :           have_formattable_input = true;
    2576       64842 :         break;
    2577     6880966 :       case 'n':
    2578             :         {
    2579             :           int inc;
    2580     6880966 :           symbol s = read_increment_and_escape_parameter(&inc);
    2581     6880966 :           if (!(s.is_null() || s.is_empty()))
    2582     6880966 :             interpolate_register(s, inc);
    2583     6880966 :           break;
    2584             :         }
    2585       46574 :       case 'N':
    2586             :         // The argument is a glyph index, which is dimensionless.
    2587       46574 :         if (!read_delimited_measurement(&val, 0 /* dimensionless */))
    2588           0 :           break;
    2589       46574 :         type = TOKEN_INDEXED_CHAR;
    2590       46574 :         return;
    2591         110 :       case 'o':
    2592         110 :         nd = do_overstrike();
    2593         110 :         if (0 /* nullptr */ == nd)
    2594           0 :           break;
    2595         110 :         type = TOKEN_NODE;
    2596         110 :         return;
    2597         757 :       case 'O':
    2598         757 :         if (want_att_compat)
    2599           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2600           0 :                   " AT&T troff", char(cc));
    2601         757 :         nd = do_suppress(read_escape_parameter());
    2602         757 :         if (0 /* nullptr */ == nd)
    2603         270 :           break;
    2604         487 :         type = TOKEN_NODE;
    2605         487 :         return;
    2606          16 :       case 'p':
    2607          16 :         type = TOKEN_SPREAD;
    2608          16 :         return;
    2609          42 :       case 'r':
    2610          42 :         type = TOKEN_NODE;
    2611          42 :         nd = new vmotion_node(-curenv->get_size(), curenv->get_fill_color());
    2612          42 :         return;
    2613        6142 :       case 'R':
    2614        6142 :         if (want_att_compat)
    2615           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2616           0 :                   " AT&T troff", char(cc));
    2617        6142 :         do_register();
    2618        6142 :         if (!want_att_compat)
    2619        6142 :           have_formattable_input = true;
    2620        6142 :         break;
    2621        5972 :       case 's':
    2622        5972 :         if (curenv->get_was_line_interrupted()) {
    2623           0 :           warning(WARN_SYNTAX, "ignoring escaped '%1' on input line"
    2624             :                   " after output line continuation escape sequence",
    2625           0 :                   char(cc));
    2626           0 :           break;
    2627             :         }
    2628        5972 :         if (read_size(&x))
    2629        5972 :           curenv->set_size(x);
    2630        5972 :         if (!want_att_compat)
    2631        5970 :           have_formattable_input = true;
    2632        5972 :         break;
    2633          30 :       case 'S':
    2634             :         // The argument is in degrees, which are dimensionless.
    2635          30 :         if (read_delimited_measurement(&x, 0 /* dimensionless */))
    2636          30 :           curenv->set_char_slant(x);
    2637          30 :         if (!want_att_compat)
    2638          30 :           have_formattable_input = true;
    2639          30 :         break;
    2640          53 :       case 't':
    2641          53 :         type = TOKEN_NODE;
    2642          53 :         nd = new non_interpreted_char_node('\t');
    2643          53 :         return;
    2644         120 :       case 'u':
    2645         120 :         type = TOKEN_NODE;
    2646         120 :         nd = new vmotion_node(-curenv->get_size() / 2,
    2647         120 :                               curenv->get_fill_color());
    2648         120 :         return;
    2649      103815 :       case 'v':
    2650      103815 :         if (!read_delimited_measurement(&x, 'v'))
    2651           0 :           break;
    2652      103815 :         type = TOKEN_NODE;
    2653      103815 :         nd = new vmotion_node(x, curenv->get_fill_color());
    2654      103815 :         return;
    2655           1 :       case 'V':
    2656           1 :         if (want_att_compat)
    2657           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2658           0 :                   " AT&T troff", char(cc));
    2659             :         {
    2660           1 :           symbol s = read_escape_parameter();
    2661           1 :           if (!(s.is_null() || s.is_empty()))
    2662           1 :             interpolate_environment_variable(s);
    2663           1 :           break;
    2664             :         }
    2665       26009 :       case 'w':
    2666       26009 :         do_width();
    2667       26009 :         break;
    2668         192 :       case 'x':
    2669         192 :         if (!read_delimited_measurement(&x, 'v'))
    2670           0 :           break;
    2671         192 :         type = TOKEN_NODE;
    2672         192 :         nd = new extra_size_node(x);
    2673         192 :         return;
    2674       70842 :       case 'X':
    2675       70842 :         nd = do_device_extension();
    2676       70842 :         if (0 /* nullptr */ == nd)
    2677           0 :           break;
    2678       70842 :         type = TOKEN_NODE;
    2679       70842 :         return;
    2680          29 :       case 'Y':
    2681          29 :         if (want_att_compat)
    2682           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2683           0 :                   " AT&T troff", char(cc));
    2684             :         {
    2685          29 :           symbol s = read_escape_parameter();
    2686          29 :           if (s.is_null() || s.is_empty())
    2687           0 :             break;
    2688          29 :           request_or_macro *p = lookup_request(s);
    2689          29 :           macro *m = p->to_macro();
    2690          29 :           if (0 /* nullptr */ == m) {
    2691           0 :             error("cannot interpolate '%1' to device-independent"
    2692             :                   " output; it is a request, not a macro",
    2693           0 :                   s.contents());
    2694           0 :             break;
    2695             :           }
    2696          29 :           nd = new device_extension_node(*m);
    2697          29 :           type = TOKEN_NODE;
    2698          29 :           return;
    2699             :         }
    2700          81 :       case 'z':
    2701          81 :         next();
    2702          81 :         if ((TOKEN_NODE == type)
    2703          81 :             || (TOKEN_HORIZONTAL_MOTION == type)
    2704          81 :             || (TOKEN_DELIMITED_HORIZONTAL_MOTION == type))
    2705           0 :           nd = new zero_width_node(nd);
    2706             :         else {
    2707             :           // TODO: In theory, we could accept spaces and horizontal
    2708             :           // motions.
    2709          81 :           charinfo *ci = get_charinfo(true /* required */);
    2710          81 :           if (0 /* nullptr */ == ci) {
    2711           0 :             error("%1 is not supported in a zero-width character"
    2712           0 :                   " escape sequence argument", tok.description());
    2713           0 :             break;
    2714             :           }
    2715          81 :           node *gn = curenv->make_char_node(ci);
    2716          81 :           if (0 /* nullptr */ == gn) {
    2717             :             assert("make_char_node failed to create a character"
    2718             :                    " node");
    2719           3 :             break;
    2720             :           }
    2721          78 :           nd = new zero_width_node(gn);
    2722          78 :           type = TOKEN_NODE;
    2723             :         }
    2724          78 :         return;
    2725         851 :       case 'Z':
    2726         851 :         if (want_att_compat)
    2727           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2728           0 :                   " AT&T troff", char(cc));
    2729         851 :         nd = do_zero_width_output();
    2730         851 :         if (0 /* nullptr */ == nd)
    2731           0 :           break;
    2732         851 :         type = TOKEN_NODE;
    2733         851 :         return;
    2734      115643 :       case '{':
    2735      115643 :         goto ESCAPE_LEFT_BRACE;
    2736      100822 :       case '}':
    2737      100822 :         goto ESCAPE_RIGHT_BRACE;
    2738      218331 :       case '\n':
    2739      218331 :         break;
    2740      800066 :       case '[':
    2741      800066 :         if (want_att_compat)
    2742           0 :           warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
    2743           0 :                   " AT&T troff", char(cc));
    2744      800066 :         if (!want_att_compat) {
    2745      800066 :           symbol s = read_long_escape_parameters(WITH_ARGS);
    2746      800066 :           if (s.is_null() || s.is_empty())
    2747           0 :             break;
    2748      800066 :           if (have_multiple_params) {
    2749       67634 :             have_multiple_params = false;
    2750       67634 :             nm = composite_glyph_name(s);
    2751             :           }
    2752             :           else {
    2753      732432 :             const char *sc = s.contents();
    2754      732432 :             const char *gn = 0 /* nullptr */;
    2755      732432 :             if ((strlen(sc) > 2) && (sc[0] == 'u'))
    2756       99969 :               gn = valid_unicode_code_sequence(sc, 0 /* nullptr */);
    2757      732432 :             if (gn != 0 /* nullptr */) {
    2758       99916 :               const char *gn_decomposed = decompose_unicode(gn);
    2759       99916 :               if (gn_decomposed != 0 /* nullptr */)
    2760        4397 :                 gn = &gn_decomposed[1];
    2761       99916 :               const char *groff_gn = unicode_to_glyph_name(gn);
    2762       99916 :               if (groff_gn != 0 /* nullptr */)
    2763       14229 :                 nm = symbol(groff_gn);
    2764             :               else {
    2765             :                 // ISO C++ does not permit VLAs on the stack.
    2766             :                 // C++03: new char[strlen(gn) + 1 + 1]();
    2767       85687 :                 char *buf = new char[strlen(gn) + 1 + 1];
    2768       85687 :                 (void) memset(buf, 0,
    2769       85687 :                               (strlen(gn) + 1 + 1) * sizeof(char));
    2770       85687 :                 strcpy(buf, "u");
    2771       85687 :                 strcat(buf, gn);
    2772       85687 :                 nm = symbol(buf);
    2773       85687 :                 delete[] buf;
    2774             :               }
    2775             :             }
    2776             :             else
    2777      632516 :               nm = symbol(sc);
    2778             :           }
    2779      800066 :           type = TOKEN_SPECIAL_CHAR;
    2780      800066 :           return;
    2781             :         }
    2782           0 :         goto handle_ordinary_char;
    2783         247 :       default:
    2784         247 :         if ((cc != escape_char) && (cc != '.'))
    2785           0 :           warning(WARN_ESCAPE, "ignoring escape character before %1",
    2786           0 :                   input_char_description(cc));
    2787         247 :         goto handle_ordinary_char;
    2788             :       }
    2789             :     }
    2790    16238170 :   }
    2791             : }
    2792             : 
    2793    16042199 : bool token::operator==(const token &t)
    2794             : {
    2795    16042199 :   if (type != t.type)
    2796     1280069 :     return false;
    2797    14762130 :   switch (type) {
    2798    14742768 :   case TOKEN_CHAR:
    2799    14742768 :     return c == t.c;
    2800       19350 :   case TOKEN_SPECIAL_CHAR:
    2801             :   case TOKEN_DELIMITED_SPECIAL_CHAR:
    2802       19350 :     return nm == t.nm;
    2803           0 :   case TOKEN_INDEXED_CHAR:
    2804           0 :     return val == t.val;
    2805          12 :   default:
    2806          12 :     return true;
    2807             :   }
    2808             : }
    2809             : 
    2810      915310 : bool token::operator!=(const token &t)
    2811             : {
    2812      915310 :   return !(*this == t);
    2813             : }
    2814             : 
    2815             : // Is the character usable as a delimiter?
    2816             : //
    2817             : // This is used directly only by `do_device_extension()`, because it is
    2818             : // the only escape sequence that reads its argument in copy mode (so it
    2819             : // doesn't tokenize it) and accepts a user-specified delimiter.
    2820     4487629 : static bool is_char_usable_as_delimiter(int c)
    2821             : {
    2822             :   // Reject all characters that can validly begin a numeric expression.
    2823     4487629 :   switch (c) {
    2824     2464251 :   case '0':
    2825             :   case '1':
    2826             :   case '2':
    2827             :   case '3':
    2828             :   case '4':
    2829             :   case '5':
    2830             :   case '6':
    2831             :   case '7':
    2832             :   case '8':
    2833             :   case '9':
    2834             :   case '+':
    2835             :   case '-':
    2836             :   // case '/':
    2837             :   // case '*':
    2838             :   // case '%':
    2839             :   // case '<':
    2840             :   // case '>':
    2841             :   // case '=':
    2842             :   // case '&':
    2843             :   // case ':':
    2844             :   case '(':
    2845             :   // case ')':
    2846             :   case '.':
    2847             :   case '|':
    2848     2464251 :     return false;
    2849     2023378 :   default:
    2850     2023378 :     return true;
    2851             :   }
    2852             : }
    2853             : 
    2854           0 : void token::describe_node(char *buf, size_t bufsz)
    2855             : {
    2856           0 :   assert(nd != 0 /* nullptr */);
    2857           0 :   if (0 /* nullptr */ == nd) {
    2858           0 :     (void) snprintf(buf, bufsz, "a null(!) node");
    2859           0 :     return;
    2860             :   }
    2861             :   // Ah, the joys of computational natural language grammar.
    2862           0 :   const char *ndtype = nd->type();
    2863           0 :   const char initial_letter = ndtype[0];
    2864           0 :   bool is_vowelly = false;
    2865             :   // I wonder if Kernighan thought that the presence of set types and an
    2866             :   // "in" operator was one of Pascal's great blunders.  --GBR
    2867           0 :   if (('a' == initial_letter)
    2868           0 :       || ('e' == initial_letter)
    2869           0 :       || ('i' == initial_letter)
    2870           0 :       || ('o' == initial_letter)
    2871           0 :       || ('u' == initial_letter))
    2872           0 :     is_vowelly = true;
    2873           0 :   (void) memset(buf, 0, bufsz);
    2874           0 :   (void) snprintf(buf, bufsz, "a%s %s", is_vowelly ? "n" : "", ndtype);
    2875             : }
    2876             : 
    2877             : // Is the token a valid delimiter (like `'`)?
    2878     4503104 : bool token::is_usable_as_delimiter(bool report_error,
    2879             :                                    enum delimiter_context context)
    2880             : {
    2881     4503104 :   bool is_valid = false;
    2882     4503104 :   switch (type) {
    2883     4487921 :   case TOKEN_CHAR:
    2884     4487921 :     if (!want_att_compat)
    2885     4487629 :       is_valid = is_char_usable_as_delimiter(c);
    2886             :     else {
    2887         292 :       assert(context != DELIMITER_GROFF_EXPRESSION);
    2888         292 :       switch (context) {
    2889         105 :       case DELIMITER_ATT_STRING_EXPRESSION:
    2890         105 :         if (csgraph(c)
    2891         105 :             || (((c > 0) && (c < 012)) || (014 == c) || (0177 == c)))
    2892         105 :           is_valid = true;
    2893         105 :         break;
    2894          92 :       case DELIMITER_ATT_NUMERIC_EXPRESSION:
    2895          92 :         if (csgraph(c)
    2896          92 :             || (((c > 0) && (c < 012)) || (014 == c) || (0177 == c)))
    2897          92 :           is_valid = true;
    2898             :         // AT&T troff doesn't accept as numeric expression delimiters
    2899             :         // characters that validly appear in a numeric expression,
    2900             :         // _except_ for numerals, `|`, and `.`.
    2901          92 :         if (('+' == c)
    2902          91 :             || ('-' == c)
    2903          90 :             || ('/' == c)
    2904          89 :             || ('*' == c)
    2905          88 :             || ('%' == c)
    2906          87 :             || ('<' == c)
    2907          86 :             || ('>' == c)
    2908          85 :             || ('=' == c)
    2909          84 :             || ('&' == c)
    2910          83 :             || (':' == c)
    2911          82 :             || ('(' == c)
    2912          81 :             || (')' == c))
    2913          12 :           is_valid = false;
    2914          92 :         break;
    2915          95 :       case DELIMITER_ATT_OUTPUT_COMPARISON_EXPRESSION:
    2916          95 :         if (csupper(c)
    2917          71 :             || (cslower(c)
    2918          17 :                 && (c != 'e')
    2919          17 :                 && (c != 'n')
    2920          17 :                 && (c != 'o')
    2921          17 :                 && (c != 't'))
    2922          54 :             || cspunct(c)
    2923         166 :             || (((c > 0) && (c < 012)) || (014 == c) || (0177 == c)))
    2924          80 :           is_valid = true;
    2925             :         // AT&T troff doesn't accept as conditional expression
    2926             :         // delimiters characters that can validly appear in a numeric
    2927             :         // expression, nor `!`.  We already excluded numerals above.
    2928          95 :         if (('+' == c)
    2929          94 :             || ('-' == c)
    2930          93 :             || ('/' == c)
    2931          90 :             || ('*' == c)
    2932          89 :             || ('%' == c)
    2933          88 :             || ('<' == c)
    2934          87 :             || ('>' == c)
    2935          86 :             || ('=' == c)
    2936          85 :             || ('&' == c)
    2937          84 :             || (':' == c)
    2938          83 :             || ('(' == c)
    2939          82 :             || (')' == c)
    2940          81 :             || ('|' == c)
    2941          80 :             || ('.' == c)
    2942          79 :             || ('!' == c))
    2943          16 :           is_valid = false;
    2944          95 :         break;
    2945           0 :       default:
    2946           0 :         assert(0 == "unhandled case of `context` (enum dcontext)");
    2947             :         break;
    2948             :       }
    2949             :     }
    2950     4487921 :     if (!is_valid && report_error)
    2951          30 :       error("character '%1' is not allowed as a delimiter",
    2952          60 :             static_cast<char>(c));
    2953     4487921 :     return is_valid;
    2954           0 :   case TOKEN_NODE:
    2955           0 :     if (report_error) {
    2956             :       // Reserve a buffer large enough to handle the lengthiest case.
    2957             :       // See `token::description()`.
    2958           0 :       const size_t bufsz
    2959             :         = sizeof "space character horizontal motion node token"
    2960             :           + sizeof "bracketrighttp"
    2961             :           + 2 /* for trailing '"' and '\0' */;
    2962             :       // C++03: char[bufsz]();
    2963             :       static char buf[bufsz];
    2964           0 :       (void) memset(buf, 0, bufsz);
    2965           0 :       describe_node(buf, bufsz);
    2966           0 :       error("%1 is not allowed as a delimiter", buf);
    2967             :     }
    2968           0 :     return false;
    2969           2 :   case TOKEN_SPACE:
    2970             :   case TOKEN_STRETCHABLE_SPACE:
    2971             :   case TOKEN_UNSTRETCHABLE_SPACE:
    2972             :   case TOKEN_DELIMITED_HORIZONTAL_MOTION:
    2973             :   case TOKEN_DELIMITED_SPECIAL_CHAR:
    2974             :   case TOKEN_NEWLINE:
    2975             :   case TOKEN_EOF:
    2976           2 :     if (report_error)
    2977           0 :       error("%1 is not allowed as a delimiter", description());
    2978           2 :     return false;
    2979       15181 :   default:
    2980       15181 :     return true;
    2981             :   }
    2982             : }
    2983             : 
    2984     1102494 : const char *token::description()
    2985             : {
    2986             :   // Reserve a buffer large enough to handle the lengthiest cases.  The
    2987             :   // user can still contrive, by accident or otherwise, an arbitrarily
    2988             :   // long identifier.
    2989             :   //   "character code XXX"
    2990             :   //   "special character 'bracketrighttp'"
    2991             :   //   "indexed character -2147483648"
    2992             :   //   "space character horizontal motion node token"
    2993             :   //   "nonexistent special character or class"
    2994             :   // Future:
    2995             :   //   "character code XXX (U+XXXX)" or similar
    2996     1102494 :   const size_t bufsz
    2997             :     = sizeof "space character horizontal motion node token"
    2998             :       + sizeof "bracketrighttp"
    2999             :       + 2 /* for trailing '"' and '\0' */;
    3000             :   static char buf[bufsz];
    3001     1102494 :   (void) memset(buf, 0, bufsz);
    3002     1102494 :   switch (type) {
    3003           0 :   case TOKEN_EMPTY:
    3004           0 :     return "an indeterminate token (at start of input?)";
    3005           0 :   case TOKEN_BACKSPACE:
    3006           0 :     return "a backspace character";
    3007        6412 :   case TOKEN_CHAR:
    3008        6412 :     if (INPUT_DELETE == c)
    3009           0 :       return "a delete character";
    3010        6412 :     else if ('\'' == c) {
    3011        6332 :       (void) snprintf(buf, bufsz, "character \"%c\"", c);
    3012        6332 :       return buf;
    3013             :     }
    3014          80 :     else if (c < 128) {
    3015          80 :       (void) snprintf(buf, bufsz, "character '%c'", c);
    3016          80 :       return buf;
    3017             :     }
    3018             :     else {
    3019           0 :       (void) snprintf(buf, bufsz, "character code %d", c);
    3020           0 :       return buf;
    3021             :     }
    3022           6 :   case TOKEN_DUMMY:
    3023           6 :     return "an escaped '&'";
    3024           0 :   case TOKEN_ESCAPE:
    3025           0 :     return "an escaped 'e'";
    3026           0 :   case TOKEN_HYPHEN_INDICATOR:
    3027           0 :     return "an escaped '%'";
    3028           0 :   case TOKEN_INTERRUPT:
    3029           0 :     return "an escaped 'c'";
    3030           0 :   case TOKEN_ITALIC_CORRECTION:
    3031           0 :     return "an escaped '/'";
    3032           0 :   case TOKEN_LEADER:
    3033           0 :     return "a leader character";
    3034           0 :   case TOKEN_LEFT_BRACE:
    3035           0 :     return "an escaped '{'";
    3036           0 :   case TOKEN_MARK_INPUT:
    3037           0 :     return "an escaped 'k'";
    3038          14 :   case TOKEN_NEWLINE:
    3039          14 :     return "a newline";
    3040           0 :   case TOKEN_NODE:
    3041             :     {
    3042             :       static char nodebuf[bufsz - (sizeof " token")];
    3043           0 :       (void) strcpy(nodebuf, "an undescribed node");
    3044           0 :       describe_node(nodebuf, bufsz);
    3045           0 :       (void) snprintf(buf, bufsz, "%s token", nodebuf);
    3046           0 :       return buf;
    3047             :     }
    3048           1 :   case TOKEN_INDEXED_CHAR:
    3049           1 :     (void) snprintf(buf, bufsz, "indexed character %d",
    3050             :                     character_index());
    3051           1 :     return buf;
    3052     1059588 :   case TOKEN_RIGHT_BRACE:
    3053     1059588 :     return "an escaped '}'";
    3054           0 :   case TOKEN_SPACE:
    3055           0 :     return "a space";
    3056           4 :   case TOKEN_SPECIAL_CHAR:
    3057             :   case TOKEN_DELIMITED_SPECIAL_CHAR:
    3058             :     // We normally use apostrophes for quotation in diagnostic messages,
    3059             :     // but many special character names contain them.  Fall back to
    3060             :     // double quotes if this one does.  A user-defined special character
    3061             :     // name could contain both characters; we expect such users to lie
    3062             :     // comfortably in the bed they made for themselves.
    3063             :     {
    3064           4 :       const char *sc = nm.contents();
    3065           4 :       char qc = '\'';
    3066           4 :       if (strchr(sc, '\'') != 0 /* nullptr */)
    3067           0 :         qc = '"';
    3068             :       // TODO: This truncates the names of impractically long special
    3069             :       // character or character class names.  Do something about that.
    3070             :       // (The truncation is visually indicated by the absence of a
    3071             :       // closing quotation mark.)
    3072             :       static const char special_character[] = "special character";
    3073             :       static const char character_class[] = "character class";
    3074             :       static const char nonexistent[] = "nonexistent special character"
    3075             :                                         " or class";
    3076           4 :       const char *ctype = special_character;
    3077           4 :       charinfo *ci = get_charinfo(false /* required */,
    3078             :                                   true /* suppress creation */);
    3079           4 :       if (0 /* nullptr */ == ci)
    3080           1 :         ctype = nonexistent;
    3081           3 :       else if (ci->is_class())
    3082           1 :         ctype = character_class;
    3083           4 :       (void) snprintf(buf, bufsz, "%s %c%s%c", ctype, qc, sc, qc);
    3084           4 :       return buf;
    3085             :     }
    3086           0 :   case TOKEN_SPREAD:
    3087           0 :     return "an escaped 'p'";
    3088           0 :   case TOKEN_STRETCHABLE_SPACE:
    3089           0 :     return "an escaped '~'";
    3090           0 :   case TOKEN_UNSTRETCHABLE_SPACE:
    3091           0 :     return "an escaped ' '";
    3092           2 :   case TOKEN_DELIMITED_HORIZONTAL_MOTION:
    3093           2 :     return "a parameterized horizontal motion";
    3094           0 :   case TOKEN_HORIZONTAL_MOTION:
    3095           0 :     return "a horizontal motion";
    3096       36467 :   case TOKEN_TAB:
    3097       36467 :     return "a tab character";
    3098           0 :   case TOKEN_TRANSPARENT:
    3099           0 :     return "an escaped '!'";
    3100           0 :   case TOKEN_TRANSPARENT_DUMMY:
    3101           0 :     return "an escaped ')'";
    3102           0 :   case TOKEN_ZERO_WIDTH_BREAK:
    3103           0 :     return "an escaped ':'";
    3104           0 :   case TOKEN_EOF:
    3105           0 :     return "end of input";
    3106           0 :   default:
    3107           0 :     assert(0 == "unhandled case of `type` (token)");
    3108             :     return "an undescribed token";
    3109             :   }
    3110             : }
    3111             : 
    3112    12337378 : void skip_line()
    3113             : {
    3114    12337378 :   while (!tok.is_newline())
    3115     1286663 :     if (tok.is_eof())
    3116          17 :       return;
    3117             :     else
    3118     1286646 :       tok.next();
    3119    11050715 :   tok.next();
    3120             : }
    3121             : 
    3122       23802 : void compatible()
    3123             : {
    3124             :   int n;
    3125       23802 :   if (has_arg() && read_integer(&n))
    3126       23802 :     want_att_compat = (n > 0);
    3127             :   else
    3128           0 :     want_att_compat = true;
    3129       23802 :   skip_line();
    3130       23802 : }
    3131             : 
    3132     6554251 : static void diagnose_missing_identifier(bool required)
    3133             : {
    3134     6554251 :   if (tok.is_newline() || tok.is_eof()) {
    3135     5458196 :     if (required)
    3136           0 :       warning(WARN_MISSING, "missing identifier");
    3137             :   }
    3138     1096055 :   else if (tok.is_right_brace() || tok.is_tab()) {
    3139             :     // token::description() writes to static, class-wide storage, so we
    3140             :     // must allocate a copy of it before issuing the next diagnostic.
    3141     1096055 :     char *start = strdup(tok.description());
    3142      311213 :     do {
    3143     1407268 :       tok.next();
    3144     1407268 :     } while (tok.is_space() || tok.is_right_brace() || tok.is_tab());
    3145             :     // XXX: unreachable code? --GBR
    3146     1096055 :     if (!tok.is_newline() && !tok.is_eof())
    3147           0 :       error("%1 is not allowed before an argument", start);
    3148     1096055 :     else if (required)
    3149           0 :       warning(WARN_MISSING, "missing identifier");
    3150     1096055 :     free(start);
    3151             :   }
    3152           0 :   else if (required)
    3153           0 :     error("expected identifier, got %1", tok.description());
    3154             :   else
    3155           0 :     error("expected identifier, got %1; treated as missing",
    3156           0 :           tok.description());
    3157     6554251 : }
    3158             : 
    3159    19275968 : static void diagnose_invalid_identifier()
    3160             : {
    3161    36247206 :   if (!tok.is_newline() && !tok.is_eof() && !tok.is_space()
    3162       72660 :       && !tok.is_tab() && !tok.is_right_brace()
    3163             :       // We don't want to give a warning for .el\{
    3164    36247206 :       && !tok.is_left_brace())
    3165           0 :     error("%1 is not allowed in an identifier", tok.description());
    3166    19275968 : }
    3167             : 
    3168    25349820 : symbol read_identifier(bool required)
    3169             : {
    3170    25349820 :   if (want_att_compat) {
    3171             :     char buf[3];
    3172       79609 :     tok.skip_spaces();
    3173       79609 :     if ((buf[0] = tok.ch()) != 0U) {
    3174       32567 :       tok.next();
    3175       32567 :       if ((buf[1] = tok.ch()) != 0U) {
    3176       32567 :         buf[2] = '\0';
    3177       32567 :         tok.make_space();
    3178             :       }
    3179             :       else
    3180           0 :         diagnose_invalid_identifier();
    3181       32567 :       return symbol(buf);
    3182             :     }
    3183             :     else {
    3184       47042 :       diagnose_missing_identifier(required);
    3185       47042 :       return NULL_SYMBOL;
    3186             :     }
    3187             :   }
    3188             :   else
    3189    25270211 :     return read_long_identifier(required);
    3190             : }
    3191             : 
    3192    25635574 : symbol read_long_identifier(bool required)
    3193             : {
    3194             :   return read_input_until_terminator(required, 0U,
    3195    25635574 :                                      true /* want identifier */);
    3196             : }
    3197             : 
    3198             : // Read bytes from input until reaching a null byte or the specified
    3199             : // `end_char`; construct and return a `symbol` object therefrom.
    3200    25783366 : static symbol read_input_until_terminator(bool required,
    3201             :                                           unsigned char end_char,
    3202             :                                           bool want_identifier)
    3203             : {
    3204    25783366 :   tok.skip_spaces();
    3205    25783366 :   int buf_size = default_buffer_size;
    3206             :   // TODO: grochar
    3207    25783366 :   unsigned char *buf = 0 /* nullptr */;
    3208             :   try {
    3209             :     // C++03: new char[buf_size]();
    3210    25783366 :     buf = new unsigned char[buf_size];
    3211             :   }
    3212           0 :   catch (const std::bad_alloc &e) {
    3213           0 :     fatal("cannot allocate %1 bytes to read input line", buf_size);
    3214             :   }
    3215    25783366 :   (void) memset(buf, 0, (buf_size * sizeof(unsigned char)));
    3216    25783366 :   int i = 0;
    3217    25783366 :   const unsigned char terminator = end_char; // TODO: grochar
    3218             :   for (;;) {
    3219             :     // If `terminator` != 0U we normally have to append a null byte.
    3220   114274928 :     if ((i + 2) > buf_size) {
    3221      937182 :       unsigned char *old_buf = buf; // TODO: grochar
    3222      937182 :       int new_buf_size = buf_size * 2;
    3223             :       // C++03: new char[new_buf_size]();
    3224             :       try {
    3225      937182 :         buf = new unsigned char[new_buf_size];
    3226             :       }
    3227           0 :       catch (const std::bad_alloc &e) {
    3228           0 :         fatal("cannot allocate %1 bytes to read input line", buf_size);
    3229             :       }
    3230      937182 :       (void) memset(buf, 0, (new_buf_size * sizeof(unsigned char)));
    3231      937182 :       (void) memcpy(buf, old_buf, (buf_size * sizeof(unsigned char)));
    3232      937182 :       buf_size = new_buf_size;
    3233      937182 :       delete[] old_buf;
    3234             :     }
    3235   114274928 :     buf[i] = tok.ch();
    3236   114274928 :     if ((0U == buf[i]) || (terminator == buf[i]))
    3237             :       break;
    3238    88491562 :     else if (want_identifier && ((buf[i] < ' ') || (buf[i] > 159))) {
    3239             :       // Of C0 controls, Solaris, Heirloom, and Plan 9 troff support
    3240             :       // ^[BCEFG] (only) in identifiers.  DWB 3.3 supports none.
    3241           0 :       assert(buf[i] != ' '); // ensure caller handled spaces
    3242           0 :       error("character code %1 is not allowed in an identifier",
    3243           0 :             static_cast<int>(buf[i]));
    3244           0 :       delete[] buf;
    3245           0 :       return NULL_SYMBOL;
    3246             :     }
    3247    88491562 :     i++;
    3248    88491562 :     tok.next();
    3249    88491562 :   }
    3250    25783366 :   if (0 == i) {
    3251     6507209 :     diagnose_missing_identifier(required);
    3252     6507209 :     delete[] buf;
    3253     6507209 :     return NULL_SYMBOL;
    3254             :   }
    3255    19276157 :   if ((terminator != 0U) && (terminator == buf[i]))
    3256         189 :     buf[i + 1] = '\0';
    3257             :   else
    3258    19275968 :     diagnose_invalid_identifier();
    3259    19276157 :   char *chbuf = 0 /* nullptr */;
    3260             :   try {
    3261             :     // C++03: new char[buf_size]();
    3262    19276157 :     chbuf = new char[buf_size];
    3263             :   }
    3264           0 :   catch (const std::bad_alloc &e) {
    3265           0 :     fatal("cannot allocate %1 bytes to copy identifier", buf_size);
    3266             :   }
    3267   343286733 :   for (int j = 0; j < buf_size; j++)
    3268   324010576 :     chbuf[j] = static_cast<char>(buf[j]);
    3269    19276157 :   delete[] buf;
    3270    19276157 :   symbol s(chbuf);
    3271    19276157 :   delete[] chbuf;
    3272    19276157 :   return s;
    3273             : }
    3274             : 
    3275             : static void close_all_streams();
    3276             : 
    3277        1403 : void exit_troff()
    3278             : {
    3279        1403 :   is_exit_underway = true;
    3280        1403 :   close_all_streams();
    3281        1403 :   topdiv->set_last_page();
    3282        1403 :   if (!end_of_input_macro_name.is_null()) {
    3283         369 :     spring_trap(end_of_input_macro_name);
    3284         369 :     tok.next();
    3285         369 :     process_input_stack();
    3286             :   }
    3287        1309 :   curenv->final_break();
    3288        1184 :   tok.next();
    3289        1184 :   process_input_stack();
    3290        1184 :   end_diversions();
    3291        1184 :   if (topdiv->get_page_length() > 0) {
    3292        1184 :     is_eoi_macro_finished = true;
    3293        1184 :     topdiv->set_ejecting();
    3294             :     static unsigned char buf[2] = { LAST_PAGE_EJECTOR, '\0' };
    3295             :     // XXX: Ugliness alert.  GNU troff wants to eat its cake and have it
    3296             :     // too, using the explicit `unsigned char` numeric type to represent
    3297             :     // input characters while also using C/C++'s `char` type--of
    3298             :     // undefined signedness--and its literals, including character
    3299             :     // string literals like `"\n"`, in free admixture therewith.
    3300             :     //
    3301             :     // Fixing this the right way means widening the fundamental
    3302             :     // character type of GNU troff formatting operations, possibly to
    3303             :     // `char32_t` (C++11).  That's a heavy lift; see Savannah #40720.
    3304        1184 :     input_stack::push(make_temp_iterator(reinterpret_cast<char *>(buf)));
    3305        1184 :     topdiv->space(topdiv->get_page_length(), true /* forcing */);
    3306         121 :     tok.next();
    3307         121 :     process_input_stack();
    3308             :     // TODO: Resolve the follwing case and enable the assertion.
    3309             :     // $ printf '.DS\n.DE\n' | ./build/test-groff -ms
    3310             :     // troff: ../src/roff/troff/input.cpp:2937: void exit_troff():
    3311             :     //   Assertion `seen_last_page_ejector' failed.
    3312             :     // .../build/groff: error: troff: Aborted (core dumped)
    3313             :     //assert(seen_last_page_ejector);
    3314           5 :     seen_last_page_ejector = true;      // should be set already
    3315           5 :     topdiv->set_ejecting();
    3316           5 :     push_page_ejector();
    3317           5 :     topdiv->space(topdiv->get_page_length(), true /* forcing */);
    3318           5 :     tok.next();
    3319           5 :     process_input_stack();
    3320             :   }
    3321             :   // TODO: delete pointers in file name set.
    3322           0 :   write_any_trailer_and_exit(EXIT_SUCCESS);
    3323           0 : }
    3324             : 
    3325             : // This implements .ex.  The input stack must be cleared before calling
    3326             : // exit_troff().
    3327             : 
    3328          32 : void exit_request()
    3329             : {
    3330          32 :   input_stack::clear();
    3331          32 :   if (is_exit_underway)
    3332          32 :     tok.next();
    3333             :   else
    3334           0 :     exit_troff();
    3335          32 : }
    3336             : 
    3337      112992 : void return_macro_request()
    3338             : {
    3339      112992 :   if (has_arg() && (tok.ch() != 0U))
    3340           0 :     input_stack::pop_macro();
    3341      112992 :   input_stack::pop_macro();
    3342      112992 :   tok.next();
    3343      112992 : }
    3344             : 
    3345         462 : void eoi_macro()
    3346             : {
    3347         462 :   end_of_input_macro_name = read_identifier();
    3348         462 :   skip_line();
    3349         462 : }
    3350             : 
    3351        3302 : void blank_line_macro()
    3352             : {
    3353        3302 :   blank_line_macro_name = read_identifier();
    3354        3302 :   skip_line();
    3355        3302 : }
    3356             : 
    3357        3203 : void leading_spaces_macro()
    3358             : {
    3359        3203 :   leading_spaces_macro_name = read_identifier();
    3360        3203 :   skip_line();
    3361        3203 : }
    3362             : 
    3363        1196 : static void trapping_blank_line()
    3364             : {
    3365        1196 :   if (!blank_line_macro_name.is_null())
    3366        1116 :     spring_trap(blank_line_macro_name);
    3367             :   else
    3368          80 :     blank_line();
    3369        1196 : }
    3370             : 
    3371             : std::stack<bool> want_att_compat_stack;
    3372             : 
    3373      160030 : void do_request()
    3374             : {
    3375      160030 :   if (!has_arg()) {
    3376           0 :     warning(WARN_MISSING, "groff syntax interpretation request expects"
    3377             :             " a request or macro as argument");
    3378           0 :     skip_line();
    3379           0 :     return;
    3380             :   }
    3381      160030 :   want_att_compat_stack.push(want_att_compat);
    3382      160030 :   want_att_compat = false;
    3383      160030 :   symbol nm = read_identifier();
    3384      160030 :   if (nm.is_null())
    3385           0 :     skip_line();
    3386             :   else
    3387      160030 :     interpolate_macro(nm, true /* don't want next token */);
    3388      160030 :   assert(!want_att_compat_stack.empty());
    3389      160030 :   want_att_compat = want_att_compat_stack.top();
    3390      160030 :   want_att_compat_stack.pop();
    3391      160030 :   request_or_macro *p = lookup_request(nm);
    3392      160030 :   macro *m = p->to_macro();
    3393      160030 :   if (m != 0 /* nullptr */)
    3394        2578 :     tok.next();
    3395             : }
    3396             : 
    3397    21029452 : inline bool possibly_handle_first_page_transition()
    3398             : {
    3399       63056 :   if ((topdiv->before_first_page_status > 0) && (curdiv == topdiv)
    3400    21092508 :       && !curenv->is_dummy()) {
    3401         615 :     handle_first_page_transition();
    3402         615 :     return true;
    3403             :   }
    3404             :   else
    3405    21028837 :     return false;
    3406             : }
    3407             : 
    3408      589585 : static int transparent_translate(int cc)
    3409             : {
    3410      589585 :   if (!is_invalid_input_char(cc)) {
    3411      588478 :     charinfo *ci = charset_table[cc];
    3412      588478 :     switch (ci->get_special_translation(true /* transparently */)) {
    3413           0 :     case charinfo::TRANSLATE_SPACE:
    3414           0 :       return ' ';
    3415           0 :     case charinfo::TRANSLATE_STRETCHABLE_SPACE:
    3416           0 :       return ESCAPE_TILDE;
    3417           0 :     case charinfo::TRANSLATE_DUMMY:
    3418           0 :       return ESCAPE_AMPERSAND;
    3419           0 :     case charinfo::TRANSLATE_HYPHEN_INDICATOR:
    3420           0 :       return ESCAPE_PERCENT;
    3421             :     }
    3422             :     // This is really ugly.
    3423      588478 :     ci = ci->get_translation(1);
    3424      588478 :     if (ci != 0 /* nullptr */) {
    3425           8 :       unsigned char c = ci->get_ascii_code();
    3426           8 :       if (c != 0U)
    3427           0 :         return c;
    3428           8 :       error("cannot translate %1 to special character '%2' in"
    3429             :             " device-independent output", input_char_description(cc),
    3430          16 :             ci->nm.contents());
    3431             :     }
    3432             :   }
    3433      589585 :   return cc;
    3434             : }
    3435             : 
    3436     6947672 : bool node::need_reread(bool *)
    3437             : {
    3438     6947672 :   return false;
    3439             : }
    3440             : 
    3441             : int global_diverted_space = 0;
    3442             : 
    3443      526270 : bool diverted_space_node::need_reread(bool *bolp)
    3444             : {
    3445      526270 :   global_diverted_space = 1;
    3446      526270 :   if (curenv->get_fill())
    3447           0 :     trapping_blank_line();
    3448             :   else
    3449      526270 :     curdiv->space(n);
    3450      526270 :   global_diverted_space = 0;
    3451      526270 :   *bolp = true;
    3452      526270 :   return true;
    3453             : }
    3454             : 
    3455           1 : bool diverted_copy_file_node::need_reread(bool *bolp)
    3456             : {
    3457           1 :   curdiv->copy_file(filename.contents());
    3458           1 :   *bolp = true;
    3459           1 :   return true;
    3460             : }
    3461             : 
    3462       42964 : bool word_space_node::need_reread(bool *)
    3463             : {
    3464       42964 :   if (unformat) {
    3465        7172 :     for (width_list *w = orig_width; w != 0 /* nullptr */; w = w->next)
    3466        3586 :       curenv->space(w->width, w->sentence_width);
    3467        3586 :     unformat = 0;
    3468        3586 :     return true;
    3469             :   }
    3470       39378 :   return false;
    3471             : }
    3472             : 
    3473        3575 : bool unbreakable_space_node::need_reread(bool *)
    3474             : {
    3475        3575 :   return false;
    3476             : }
    3477             : 
    3478     1958526 : bool hmotion_node::need_reread(bool *)
    3479             : {
    3480     1958526 :   if (unformat && was_tab) {
    3481           0 :     curenv->advance_to_tab_stop();
    3482           0 :     unformat = 0;
    3483           0 :     return true;
    3484             :   }
    3485     1958526 :   return false;
    3486             : }
    3487             : 
    3488             : static int leading_spaces_number = 0;
    3489             : static int leading_spaces_space = 0;
    3490             : 
    3491      713514 : void process_input_stack()
    3492             : {
    3493     1426798 :   std::stack<int> trap_bol_stack;
    3494      713514 :   bool reading_beginning_of_input_line = true;
    3495             :   for (;;) {
    3496    31904451 :     bool ignore_next_token = false;
    3497    31904451 :     switch (tok.type) {
    3498    20020992 :     case token::TOKEN_CHAR:
    3499             :       {
    3500    20020992 :         unsigned char ch = tok.c;
    3501    19408260 :         if (reading_beginning_of_input_line && !have_formattable_input
    3502    39561457 :             && (curenv->get_control_character() == ch
    3503      132205 :                 || curenv->get_no_break_control_character() == ch)) {
    3504             :           was_invoked_with_regular_control_character
    3505    19282759 :             = (curenv->get_control_character() == ch);
    3506             :           // skip tabs as well as spaces here
    3507    11674712 :           do {
    3508    30957471 :             tok.next();
    3509    30957471 :           } while (tok.is_horizontal_whitespace());
    3510    19282759 :           symbol nm = read_identifier();
    3511             : #if defined(DEBUGGING)
    3512             :           if (want_html_debugging) {
    3513             :             if (!nm.is_null()) {
    3514             :               if (strcmp(nm.contents(), "test") == 0) {
    3515             :                 fprintf(stderr, "found it!\n");
    3516             :                 fflush(stderr);
    3517             :               }
    3518             :               fprintf(stderr, "interpreting [%s]", nm.contents());
    3519             :               if (strcmp(nm.contents(), "di") == 0 && topdiv != curdiv)
    3520             :                 fprintf(stderr, " currently in diversion: %s",
    3521             :                         curdiv->get_diversion_name());
    3522             :               fprintf(stderr, "\n");
    3523             :               fflush(stderr);
    3524             :             }
    3525             :           }
    3526             : #endif
    3527    19282759 :           if (nm.is_null())
    3528     6245310 :             skip_line();
    3529             :           else {
    3530    13037449 :             interpolate_macro(nm);
    3531             : #if defined(DEBUGGING)
    3532             :             if (want_html_debugging) {
    3533             :               fprintf(stderr, "finished interpreting [%s] and environment state is\n", nm.contents());
    3534             :               curenv->dump_troff_state();
    3535             :             }
    3536             : #endif
    3537             :           }
    3538    19282726 :           ignore_next_token = true;
    3539             :         }
    3540             :         else {
    3541      738233 :           if (possibly_handle_first_page_transition())
    3542             :             ;
    3543             :           else {
    3544             :             for (;;) {
    3545             : #if defined(DEBUGGING)
    3546             :               if (want_html_debugging) {
    3547             :                 fprintf(stderr, "found [%c]\n", ch); fflush(stderr);
    3548             :               }
    3549             : #endif
    3550     3556484 :               if (curenv->get_was_line_interrupted())
    3551           0 :                 warning(WARN_SYNTAX, "ignoring %1 on input line after"
    3552             :                         " output line continuation escape sequence",
    3553           0 :                         tok.description());
    3554             :               else
    3555     3556484 :                 curenv->add_char(charset_table[ch]);
    3556     3556484 :               tok.next();
    3557     3556484 :               if (tok.type != token::TOKEN_CHAR)
    3558      737892 :                 break;
    3559     2818592 :               ch = tok.c;
    3560             :             }
    3561      737892 :             ignore_next_token = true;
    3562      737892 :             reading_beginning_of_input_line = false;
    3563             :           }
    3564             :         }
    3565    20020959 :         break;
    3566             :       }
    3567       24784 :     case token::TOKEN_TRANSPARENT:
    3568             :       {
    3569       24784 :         if (reading_beginning_of_input_line) {
    3570       24784 :           if (possibly_handle_first_page_transition())
    3571             :             ;
    3572             :           else {
    3573             :             int cc;
    3574      564816 :             do {
    3575             :               node *n;
    3576      589585 :               cc = read_char_in_copy_mode(&n);
    3577      589585 :               if (cc != EOF) {
    3578      589585 :                 if (cc != '\0')
    3579      589585 :                   curdiv->transparent_output(transparent_translate(cc));
    3580             :                 else
    3581           0 :                   curdiv->transparent_output(n);
    3582             :               }
    3583      589585 :             } while (cc != '\n' && cc != EOF);
    3584       24769 :             if (cc == EOF)
    3585           0 :               curdiv->transparent_output('\n');
    3586             :           }
    3587             :         }
    3588       24784 :         break;
    3589             :       }
    3590      751957 :     case token::TOKEN_NEWLINE:
    3591             :       {
    3592      751957 :         if (reading_beginning_of_input_line
    3593        2301 :             && !have_formattable_input_on_interrupted_line
    3594      754258 :             && !curenv->get_was_previous_line_interrupted())
    3595        1196 :           trapping_blank_line();
    3596             :         else {
    3597      750761 :           curenv->newline();
    3598      750761 :           reading_beginning_of_input_line = true;
    3599             :         }
    3600      751957 :         break;
    3601             :       }
    3602         125 :     case token::TOKEN_REQUEST:
    3603             :       {
    3604         125 :         int request_code = tok.c;
    3605         125 :         tok.next();
    3606             :         switch (request_code) {
    3607         125 :         case TITLE_REQUEST:
    3608         125 :           title();
    3609         125 :           break;
    3610           0 :         case COPY_FILE_REQUEST:
    3611           0 :           unsafe_transparent_throughput_file_request();
    3612           0 :           break;
    3613           0 :         case TRANSPARENT_FILE_REQUEST:
    3614           0 :           transparent_throughput_file_request();
    3615           0 :           break;
    3616             : #ifdef COLUMN
    3617             :         case VJUSTIFY_REQUEST:
    3618             :           vjustify();
    3619             :           break;
    3620             : #endif /* COLUMN */
    3621           0 :         default:
    3622           0 :           assert(0 == "unhandled case of `request_code` (int)");
    3623             :           break;
    3624             :         }
    3625         125 :         ignore_next_token = true;
    3626         125 :         break;
    3627             :       }
    3628      487180 :     case token::TOKEN_SPACE:
    3629             :       {
    3630      487180 :         if (curenv->get_was_line_interrupted())
    3631           0 :           warning(WARN_SYNTAX, "ignoring %1 on input line after"
    3632             :                   " output line continuation escape sequence",
    3633           0 :                   tok.description());
    3634      487180 :         else if (possibly_handle_first_page_transition())
    3635             :           ;
    3636      487180 :         else if (reading_beginning_of_input_line
    3637      487180 :                  && !curenv->get_was_previous_line_interrupted()) {
    3638        1042 :           int nspaces = 0;
    3639             :           // save space_width now so that it isn't changed by \f or \s
    3640             :           // which we wouldn't notice here
    3641        1042 :           hunits space_width = curenv->get_space_width();
    3642        3544 :           do {
    3643        4586 :             nspaces += tok.nspaces();
    3644        4586 :             tok.next();
    3645        4586 :           } while (tok.is_space());
    3646        1042 :           if (tok.is_newline())
    3647           0 :             trapping_blank_line();
    3648             :           else {
    3649        1042 :             push_token(tok);
    3650        1042 :             leading_spaces_number = nspaces;
    3651        1042 :             leading_spaces_space = space_width.to_units() * nspaces;
    3652        1042 :             if (!leading_spaces_macro_name.is_null())
    3653         153 :               spring_trap(leading_spaces_macro_name);
    3654             :             else {
    3655         889 :               curenv->do_break();
    3656        1778 :               curenv->add_node(new hmotion_node(space_width * nspaces,
    3657         889 :                                                 curenv->get_fill_color()));
    3658             :             }
    3659        1042 :             reading_beginning_of_input_line = false;
    3660             :           }
    3661             :         }
    3662             :         else {
    3663      486138 :           curenv->space();
    3664      486138 :           reading_beginning_of_input_line = false;
    3665             :         }
    3666      487180 :         break;
    3667             :       }
    3668      713284 :     case token::TOKEN_EOF:
    3669     1426568 :       return;
    3670     9479465 :     case token::TOKEN_NODE:
    3671             :     case token::TOKEN_DELIMITED_HORIZONTAL_MOTION:
    3672             :     case token::TOKEN_HORIZONTAL_MOTION:
    3673     9479465 :       if (curenv->get_was_line_interrupted()) {
    3674             :         // We don't want to warn about node types.  They might have been
    3675             :         // interpolated into the input by the formatter itself, as with
    3676             :         // the extra vertical space nodes appended to diversions.
    3677         212 :         if ((token::TOKEN_HORIZONTAL_MOTION == tok.type)
    3678         212 :             || (token::TOKEN_DELIMITED_HORIZONTAL_MOTION == tok.type))
    3679           0 :           warning(WARN_SYNTAX, "ignoring %1 on input line after"
    3680             :                   " output line continuation escape sequence",
    3681           0 :                   tok.description());
    3682             :       }
    3683     9479253 :       else if (possibly_handle_first_page_transition())
    3684             :         ;
    3685     9479008 :       else if (tok.nd->need_reread(&reading_beginning_of_input_line)) {
    3686      529857 :         delete tok.nd;
    3687      529857 :         tok.nd = 0;
    3688             :       }
    3689             :       else {
    3690     8949151 :         curenv->add_node(tok.nd);
    3691     8949151 :         tok.nd = 0;
    3692     8949151 :         reading_beginning_of_input_line = false;
    3693     8949151 :         curenv->possibly_break_line(true /* must break here */);
    3694             :       }
    3695     9479465 :       break;
    3696        4856 :     case token::TOKEN_PAGE_EJECTOR:
    3697             :       {
    3698        4856 :         continue_page_eject();
    3699             :         // I think we just want to preserve bol.
    3700             :         // reading_beginning_of_input_line = true;
    3701        4659 :         break;
    3702             :       }
    3703       42312 :     case token::TOKEN_BEGIN_TRAP:
    3704             :       {
    3705       42312 :         trap_bol_stack.push(reading_beginning_of_input_line);
    3706       42312 :         reading_beginning_of_input_line = true;
    3707       42312 :         have_formattable_input = false;
    3708       42312 :         break;
    3709             :       }
    3710       42030 :     case token::TOKEN_END_TRAP:
    3711             :       {
    3712       42030 :         if (trap_bol_stack.empty())
    3713           0 :           error("spurious end trap token detected!");
    3714             :         else {
    3715       42030 :           reading_beginning_of_input_line = trap_bol_stack.top();
    3716       42030 :           trap_bol_stack.pop();
    3717             :         }
    3718       42030 :         have_formattable_input = false;
    3719             : 
    3720             :         /* I'm not totally happy about this.  But I can't think of any other
    3721             :           way to do it.  Doing an output_pending_lines() whenever a
    3722             :           TOKEN_END_TRAP is detected doesn't work: for example,
    3723             : 
    3724             :           .wh -1i x
    3725             :           .de x
    3726             :           'bp
    3727             :           ..
    3728             :           .wh -.5i y
    3729             :           .de y
    3730             :           .tl ''-%-''
    3731             :           ..
    3732             :           .br
    3733             :           .ll .5i
    3734             :           .sp |\n(.pu-1i-.5v
    3735             :           a\%very\%very\%long\%word
    3736             : 
    3737             :           will print all but the first lines from the word immediately
    3738             :           after the footer, rather than on the next page. */
    3739             : 
    3740       42030 :         if (trap_bol_stack.empty())
    3741       39579 :           curenv->output_pending_lines();
    3742       42030 :         break;
    3743             :       }
    3744       58472 :     case token::TOKEN_INDEXED_CHAR:
    3745             :     case token::TOKEN_SPECIAL_CHAR:
    3746             :     case token::TOKEN_DELIMITED_SPECIAL_CHAR:
    3747       58472 :       if (curenv->get_was_line_interrupted())
    3748           0 :         warning(WARN_SYNTAX, "ignoring %1 on input line after output"
    3749             :                 " line continuation escape sequence",
    3750           0 :                 tok.description());
    3751             :       else {
    3752       58472 :         reading_beginning_of_input_line = false;
    3753       58472 :         tok.process();
    3754             :       }
    3755       58472 :       break;
    3756      278994 :     default:
    3757             :       {
    3758      278994 :         reading_beginning_of_input_line = false;
    3759      278994 :         tok.process();
    3760      278994 :         break;
    3761             :       }
    3762             :     }
    3763    31190937 :     if (!ignore_next_token)
    3764    11170194 :       tok.next();
    3765    31190937 :     was_trap_sprung = false;
    3766    31190937 :   }
    3767             : }
    3768             : 
    3769             : #ifdef WIDOW_CONTROL
    3770             : 
    3771             : void flush_pending_lines()
    3772             : {
    3773             :   while (!tok.is_newline() && !tok.is_eof())
    3774             :     tok.next();
    3775             :   curenv->output_pending_lines();
    3776             :   tok.next();
    3777             : }
    3778             : 
    3779             : #endif /* WIDOW_CONTROL */
    3780             : 
    3781    15900785 : request_or_macro::request_or_macro()
    3782             : {
    3783    15900785 : }
    3784             : 
    3785      157661 : macro *request_or_macro::to_macro()
    3786             : {
    3787      157661 :   return 0 /* nullptr */;
    3788             : }
    3789             : 
    3790      275092 : request::request(REQUEST_FUNCP pp) : p(pp)
    3791             : {
    3792      275092 : }
    3793             : 
    3794    12284816 : void request::invoke(symbol, bool)
    3795             : {
    3796    12284816 :   (*p)();
    3797    12284783 : }
    3798             : 
    3799             : struct char_block {
    3800             :   enum { SIZE = 128 };
    3801             :   unsigned char s[SIZE];
    3802             :   char_block *next;
    3803             :   char_block();
    3804             : };
    3805             : 
    3806     4537440 : char_block::char_block()
    3807     4537440 : : next(0)
    3808             : {
    3809     4537440 : }
    3810             : 
    3811             : class char_list {
    3812             : public:
    3813             :   char_list();
    3814             :   ~char_list();
    3815             :   void append(unsigned char);
    3816             :   void set(unsigned char, int);
    3817             :   unsigned char get(int);
    3818             :   int length();
    3819             : private:
    3820             :   unsigned char *ptr;
    3821             :   int len;
    3822             :   char_block *head;
    3823             :   char_block *tail;
    3824             :   friend class macro_header;
    3825             :   friend class string_iterator;
    3826             : };
    3827             : 
    3828     3628687 : char_list::char_list()
    3829     3628687 : : ptr(0), len(0), head(0), tail(0)
    3830             : {
    3831     3628687 : }
    3832             : 
    3833     9713830 : char_list::~char_list()
    3834             : {
    3835     6680791 :   while (head != 0) {
    3836     3647752 :     char_block *tem = head;
    3837     3647752 :     head = head->next;
    3838     3647752 :     delete tem;
    3839             :   }
    3840     3033039 : }
    3841             : 
    3842   160665253 : int char_list::length()
    3843             : {
    3844   160665253 :   return len;
    3845             : }
    3846             : 
    3847   160673517 : void char_list::append(unsigned char c)
    3848             : {
    3849   160673517 :   if (tail == 0) {
    3850     3628687 :     head = tail = new char_block;
    3851     3628687 :     ptr = tail->s;
    3852             :   }
    3853             :   else {
    3854   157044830 :     if (ptr >= tail->s + char_block::SIZE) {
    3855      908753 :       tail->next = new char_block;
    3856      908753 :       tail = tail->next;
    3857      908753 :       ptr = tail->s;
    3858             :     }
    3859             :   }
    3860   160673517 :   *ptr++ = c;
    3861   160673517 :   len++;
    3862   160673517 : }
    3863             : 
    3864           0 : void char_list::set(unsigned char c, int offset)
    3865             : {
    3866           0 :   assert(len > offset);
    3867             :   // optimization for access at the end
    3868           0 :   int boundary = len - len % char_block::SIZE;
    3869           0 :   if (offset >= boundary) {
    3870           0 :     *(tail->s + offset - boundary) = c;
    3871           0 :     return;
    3872             :   }
    3873           0 :   char_block *tem = head;
    3874           0 :   int l = 0;
    3875             :   for (;;) {
    3876           0 :     l += char_block::SIZE;
    3877           0 :     if (l > offset) {
    3878           0 :       *(tem->s + offset % char_block::SIZE) = c;
    3879           0 :       return;
    3880             :     }
    3881           0 :     tem = tem->next;
    3882             :   }
    3883             : }
    3884             : 
    3885        1006 : unsigned char char_list::get(int offset)
    3886             : {
    3887        1006 :   assert(len > offset);
    3888             :   // optimization for access at the end
    3889        1006 :   int boundary = len - len % char_block::SIZE;
    3890        1006 :   if (offset >= boundary)
    3891        1006 :     return *(tail->s + offset - boundary);
    3892           0 :   char_block *tem = head;
    3893           0 :   int l = 0;
    3894             :   for (;;) {
    3895           0 :     l += char_block::SIZE;
    3896           0 :     if (l > offset)
    3897           0 :       return *(tem->s + offset % char_block::SIZE);
    3898           0 :     tem = tem->next;
    3899             :   }
    3900             : }
    3901             : 
    3902             : class node_list {
    3903             :   node *head;
    3904             :   node *tail;
    3905             : public:
    3906             :   node_list();
    3907             :   ~node_list();
    3908             :   void append(node *);
    3909             :   int length();
    3910             :   node *extract();
    3911             : 
    3912             :   friend class macro_header;
    3913             :   friend class string_iterator;
    3914             : };
    3915             : 
    3916     9215342 : void node_list::append(node *n)
    3917             : {
    3918     9215342 :   if (head == 0 /* nullptr */) {
    3919      256890 :     n->next = 0 /* nullptr */;
    3920      256890 :     head = tail = n;
    3921             :   }
    3922             :   else {
    3923     8958452 :     n->next = 0 /* nullptr */;
    3924     8958452 :     tail = tail->next = n;
    3925             :   }
    3926     9215342 : }
    3927             : 
    3928           0 : int node_list::length()
    3929             : {
    3930           0 :   int total = 0 /* nullptr */;
    3931           0 :   for (node *n = head; n != 0 /* nullptr */; n = n->next)
    3932           0 :     ++total;
    3933           0 :   return total;
    3934             : }
    3935             : 
    3936     3628687 : node_list::node_list()
    3937             : {
    3938     3628687 :   head = tail = 0 /* nullptr */;
    3939     3628687 : }
    3940             : 
    3941           0 : node *node_list::extract()
    3942             : {
    3943           0 :   node *temp = head;
    3944           0 :   head = tail = 0 /* nullptr */;
    3945           0 :   return temp;
    3946             : }
    3947             : 
    3948     6066078 : node_list::~node_list()
    3949             : {
    3950     3033039 :   delete_node_list(head);
    3951     3033039 : }
    3952             : 
    3953             : class macro_header {
    3954             : public:
    3955             :   int count;
    3956             :   char_list cl;
    3957             :   node_list nl;
    3958     3628687 :   macro_header() { count = 1; }
    3959             :   macro_header *copy(int);
    3960             :   void json_dump_macro();
    3961             :   void json_dump_diversion();
    3962             : };
    3963             : 
    3964    15163168 : macro::~macro()
    3965             : {
    3966    14998951 :   if (p != 0 /* nullptr */ && --(p->count) <= 0)
    3967     2074552 :     delete p;
    3968    15163168 : }
    3969             : 
    3970     4390983 : macro::macro()
    3971     4390983 : : is_a_diversion(false), is_a_string(true)
    3972             : {
    3973     4390983 :   if (!input_stack::get_location(true /* allow macro */, &filename,
    3974             :                                  &lineno)) {
    3975        1537 :     filename = 0 /* nullptr */;
    3976        1537 :     lineno = 0 /* nullptr */;
    3977             :   }
    3978     4390983 :   len = 0;
    3979     4390983 :   is_empty_macro = true;
    3980     4390983 :   p = 0; /* nullptr */
    3981     4390983 : }
    3982             : 
    3983    11182922 : macro::macro(const macro &m)
    3984    11182922 : : filename(m.filename), lineno(m.lineno), len(m.len),
    3985    11182922 :   is_empty_macro(m.is_empty_macro), is_a_diversion(m.is_a_diversion),
    3986    11182922 :   is_a_string(m.is_a_string), p(m.p)
    3987             : {
    3988    11182922 :   if (p != 0 /* nullptr */)
    3989    10901976 :     p->count++;
    3990    11182922 : }
    3991             : 
    3992       51788 : macro::macro(bool is_div)
    3993       51788 : : is_a_diversion(is_div)
    3994             : {
    3995       51788 :   if (!input_stack::get_location(true /* allow macro */, &filename,
    3996             :                                  &lineno)) {
    3997           0 :     filename = 0 /* nullptr */;
    3998           0 :     lineno = 0 /* nullptr */;
    3999             :   }
    4000       51788 :   len = 0;
    4001       51788 :   is_empty_macro = true;
    4002             :   // A macro is a string until it contains a newline.
    4003       51788 :   is_a_string = true;
    4004       51788 :   p = 0 /* nullptr */;
    4005       51788 : }
    4006             : 
    4007     9868972 : bool macro::is_diversion()
    4008             : {
    4009     9868972 :   return is_a_diversion;
    4010             : }
    4011             : 
    4012     4419563 : bool macro::is_string()
    4013             : {
    4014     4419563 :   return is_a_string;
    4015             : }
    4016             : 
    4017     1461321 : void macro::clear_string_flag()
    4018             : {
    4019     1461321 :   is_a_string = false;
    4020     1461321 : }
    4021             : 
    4022     1972123 : macro &macro::operator=(const macro &m)
    4023             : {
    4024             :   // don't assign object
    4025     1972123 :   if (m.p != 0 /* nullptr */)
    4026     1587838 :     m.p->count++;
    4027     1972123 :   if (p != 0 /* nullptr */ && --(p->count) <= 0)
    4028      958487 :     delete p;
    4029     1972123 :   p = m.p;
    4030     1972123 :   filename = m.filename;
    4031     1972123 :   lineno = m.lineno;
    4032     1972123 :   len = m.len;
    4033     1972123 :   is_empty_macro = m.is_empty_macro;
    4034     1972123 :   is_a_diversion = m.is_a_diversion;
    4035     1972123 :   is_a_string = m.is_a_string;
    4036     1972123 :   return *this;
    4037             : }
    4038             : 
    4039   151458101 : void macro::append(unsigned char c)
    4040             : {
    4041   151458101 :   assert(c != 0);
    4042   151458101 :   if (p == 0 /* nullptr */)
    4043     3374803 :     p = new macro_header;
    4044   151458101 :   if (p->cl.length() != len) {
    4045         426 :     macro_header *tem = p->copy(len);
    4046         426 :     if (--(p->count) <= 0)
    4047           0 :       delete p;
    4048         426 :     p = tem;
    4049             :   }
    4050   151458101 :   p->cl.append(c);
    4051   151458101 :   ++len;
    4052   151458101 :   if (c != PUSH_GROFF_MODE && c != PUSH_COMP_MODE && c != POP_GROFFCOMP_MODE)
    4053   149407597 :     is_empty_macro = false;
    4054   151458101 : }
    4055             : 
    4056           0 : void macro::set(unsigned char c, int offset)
    4057             : {
    4058           0 :   assert(p != 0 /* nullptr */);
    4059           0 :   assert(c != 0);
    4060           0 :   p->cl.set(c, offset);
    4061           0 : }
    4062             : 
    4063        1006 : unsigned char macro::get(int offset)
    4064             : {
    4065        1006 :   assert(p != 0 /* nullptr */);
    4066        1006 :   return p->cl.get(offset);
    4067             : }
    4068             : 
    4069         200 : int macro::length()
    4070             : {
    4071         200 :   return len;
    4072             : }
    4073             : 
    4074          67 : void macro::append_str(const char *s)
    4075             : {
    4076          67 :   int i = 0;
    4077             : 
    4078          67 :   if (s != 0 /* nullptr */) {
    4079         339 :     while (s[i] != '\0') {
    4080         272 :       append(s[i]);
    4081         272 :       i++;
    4082             :     }
    4083             :   }
    4084          67 : }
    4085             : 
    4086     9207152 : void macro::append(node *n)
    4087             : {
    4088     9207152 :   assert(n != 0 /* nullptr */);
    4089     9207152 :   if (p == 0 /* nullptr */)
    4090      253458 :     p = new macro_header;
    4091     9207152 :   if (p->cl.length() != len) {
    4092           0 :     macro_header *tem = p->copy(len);
    4093           0 :     if (--(p->count) <= 0)
    4094           0 :       delete p;
    4095           0 :     p = tem;
    4096             :   }
    4097     9207152 :   p->cl.append(0);
    4098     9207152 :   p->nl.append(n);
    4099     9207152 :   ++len;
    4100     9207152 :   is_empty_macro = false;
    4101     9207152 : }
    4102             : 
    4103          27 : void macro::append_unsigned(unsigned int i)
    4104             : {
    4105          27 :   unsigned int j = i / 10;
    4106          27 :   if (j != 0)
    4107          18 :     append_unsigned(j);
    4108          27 :   append(((unsigned char)(((int)'0') + i % 10)));
    4109          27 : }
    4110             : 
    4111           9 : void macro::append_int(int i)
    4112             : {
    4113           9 :   if (i < 0) {
    4114           0 :     append('-');
    4115           0 :     i = -i;
    4116             :   }
    4117           9 :   append_unsigned((unsigned int) i);
    4118           9 : }
    4119             : 
    4120           0 : void macro::print_size()
    4121             : {
    4122           0 :   errprint("%1", len);
    4123           0 : }
    4124             : 
    4125             : // Use this only for zero-length macros associated with charinfo objects
    4126             : // that are character classes.
    4127           1 : void macro::dump()
    4128             : {
    4129           1 :   if (filename != 0 /* nullptr */)
    4130           1 :     errprint("file name: \"%1\", line number: %2\n", filename, lineno);
    4131           1 : }
    4132             : 
    4133           2 : void macro::json_dump()
    4134             : {
    4135           2 :   bool need_comma = false;
    4136             :   // XXX: Unfortunately, if you alias or rename a request, the location
    4137             :   // of its invocation site is used for location information instead of
    4138             :   // its true origin.
    4139           2 :   if (filename != 0 /* nullptr */) {
    4140           2 :     symbol fn(filename); // `symbol` because it can't contain nulls.
    4141           2 :     const char *jsonfn = fn.json_extract();
    4142           2 :     errprint("\"file name\": %1", jsonfn);
    4143           2 :     free(const_cast<char *>(jsonfn));
    4144           2 :     fflush(stderr);
    4145           2 :     errprint(", \"starting line number\": %1", lineno);
    4146           2 :     need_comma = true;
    4147             :   }
    4148           2 :   if (need_comma)
    4149           2 :     errprint(", ");
    4150           2 :   errprint("\"length\": %1", len);
    4151           2 :   if (p != 0 /* nullptr */) {
    4152           0 :     errprint(", ");
    4153           0 :     p->json_dump_macro();
    4154           0 :     errprint(", ");
    4155           0 :     p->json_dump_diversion();
    4156             :   }
    4157           2 : }
    4158             : 
    4159             : // make a copy of the first n bytes
    4160             : 
    4161         426 : macro_header *macro_header::copy(int n)
    4162             : {
    4163         426 :   macro_header *p = new macro_header;
    4164         426 :   char_block *bp = cl.head;
    4165         426 :   unsigned char *ptr = bp->s;
    4166         426 :   node *nd = nl.head;
    4167        8690 :   while (--n >= 0) {
    4168        8264 :     if (ptr >= bp->s + char_block::SIZE) {
    4169           0 :       bp = bp->next;
    4170           0 :       ptr = bp->s;
    4171             :     }
    4172        8264 :     unsigned char c = *ptr++;
    4173        8264 :     p->cl.append(c);
    4174        8264 :     if (c == 0) {
    4175        8190 :       p->nl.append(nd->copy());
    4176        8190 :       nd = nd->next;
    4177             :     }
    4178             :   }
    4179         426 :   return p;
    4180             : }
    4181             : 
    4182             : extern void dump_node_list(node *);
    4183             : 
    4184           0 : void macro_header::json_dump_diversion()
    4185             : {
    4186           0 :   errprint("\"node list\": ");
    4187           0 :   dump_node_list(nl.head);
    4188           0 :   fflush(stderr);
    4189           0 : }
    4190             : 
    4191           0 : void macro_header::json_dump_macro()
    4192             : {
    4193           0 :   errprint("\"contents\": \"");
    4194           0 :   int macro_len = cl.length();
    4195           0 :   for (int i = 0; i < macro_len; i++) {
    4196           0 :     json_char jc = json_encode_char(cl.get(i));
    4197             :     // Write out its JSON representation by character by character to
    4198             :     // keep libc string functions from interpreting C escape sequences.
    4199           0 :     for (size_t j = 0; j < jc.len; j++)
    4200           0 :       fputc(jc.buf[j], stderr);
    4201             :   }
    4202           0 :   errprint("\"");
    4203           0 :   fflush(stderr);
    4204           0 : }
    4205             : 
    4206           2 : void print_macro_request()
    4207             : {
    4208             :   request_or_macro *rm;
    4209           2 :   macro *m = 0 /* nullptr */;
    4210           2 :   symbol s;
    4211           2 :   if (has_arg()) {
    4212           0 :     do {
    4213           2 :       s = read_identifier();
    4214           2 :       if (s.is_null())
    4215           0 :         break;
    4216           2 :       rm = static_cast<request_or_macro *>(request_dictionary.lookup(s));
    4217           2 :       if (rm != 0 /* nullptr */)
    4218           2 :         m = rm->to_macro();
    4219           2 :       if (m != 0 /* nullptr */) {
    4220           2 :         errprint("{\"name\": ");
    4221           2 :         s.json_dump();
    4222           2 :         errprint(", ");
    4223           2 :         m->json_dump();
    4224           2 :         errprint("}\n");
    4225           2 :         fflush(stderr);
    4226             :       }
    4227           2 :     } while (has_arg());
    4228             :   }
    4229             :   else {
    4230           0 :     object_dictionary_iterator iter(request_dictionary);
    4231             :     // We must use the nuclear `reinterpret_cast` operator because GNU
    4232             :     // troff's dictionary types use a pre-STL approach to containers.
    4233           0 :     while (iter.get(&s, reinterpret_cast<object **>(&rm))) {
    4234           0 :       assert(!s.is_null());
    4235           0 :       m = rm->to_macro();
    4236           0 :       if (m != 0 /* nullptr */) {
    4237           0 :         errprint("%1\t", s.contents());
    4238           0 :         m->print_size();
    4239           0 :         errprint("\n");
    4240             :       }
    4241             :     }
    4242             :   }
    4243           2 :   fflush(stderr);
    4244           2 :   skip_line();
    4245           2 : }
    4246             : 
    4247             : class string_iterator : public input_iterator {
    4248             :   macro mac;
    4249             :   const char *how_invoked;
    4250             :   bool seen_newline;
    4251             :   int lineno;
    4252             :   char_block *bp;
    4253             :   int count;                    // of characters remaining
    4254             :   node *nd;
    4255             :   bool att_compat;
    4256             :   bool with_break;              // inherited from the caller
    4257             : protected:
    4258             :   symbol nm;
    4259             :   string_iterator();
    4260             : public:
    4261             :   string_iterator(const macro &, const char * = 0 /* nullptr */,
    4262             :                   symbol = NULL_SYMBOL);
    4263             :   int fill(node **);
    4264             :   int peek();
    4265             :   bool get_location(bool /* allow_macro */, const char ** /* filep */,
    4266             :                     int * /* linep */);
    4267             :   void backtrace();
    4268     3331981 :   bool get_break_flag() { return with_break; }
    4269     2284263 :   void set_att_compat(bool b) { att_compat = b; }
    4270     2279688 :   bool get_att_compat() { return att_compat; }
    4271             :   bool is_diversion();
    4272             : };
    4273             : 
    4274     9437049 : string_iterator::string_iterator(const macro &m, const char *p,
    4275     9437049 :     symbol s)
    4276     9437049 : : input_iterator(m.is_a_diversion), mac(m), how_invoked(p),
    4277     9437049 :   seen_newline(false), lineno(1), nm(s)
    4278             : {
    4279     9437049 :   count = mac.len;
    4280     9437049 :   if (count != 0) {
    4281     9156136 :     bp = mac.p->cl.head;
    4282     9156136 :     nd = mac.p->nl.head;
    4283     9156136 :     ptr = endptr = bp->s;
    4284             :   }
    4285             :   else {
    4286      280913 :     bp = 0 /* nullptr */;
    4287      280913 :     nd = 0 /* nullptr */;
    4288      280913 :     ptr = endptr = 0 /* nullptr */;
    4289             :   }
    4290     9437049 :   with_break = input_stack::get_break_flag();
    4291     9437049 : }
    4292             : 
    4293       67634 : string_iterator::string_iterator()
    4294             : {
    4295       67634 :   bp = 0 /* nullptr */;
    4296       67634 :   nd = 0 /* nullptr */;
    4297       67634 :   ptr = endptr = 0 /* nullptr */;
    4298       67634 :   seen_newline = false;
    4299       67634 :   how_invoked = 0 /* nullptr */;
    4300       67634 :   lineno = 1;
    4301       67634 :   count = 0;
    4302       67634 :   with_break = input_stack::get_break_flag();
    4303       67634 : }
    4304             : 
    4305     9868972 : bool string_iterator::is_diversion()
    4306             : {
    4307     9868972 :   return mac.is_diversion();
    4308             : }
    4309             : 
    4310    69594597 : int string_iterator::fill(node **np)
    4311             : {
    4312    69594597 :   if (seen_newline)
    4313    38637710 :     lineno++;
    4314    69594597 :   seen_newline = false;
    4315    69594597 :   if (count <= 0)
    4316     8751882 :     return EOF;
    4317    60842715 :   const unsigned char *p = endptr;
    4318    60842715 :   if (p >= bp->s + char_block::SIZE) {
    4319     4895457 :     bp = bp->next;
    4320     4895457 :     p = bp->s;
    4321             :   }
    4322    60842715 :   if (*p == '\0') {
    4323     9869452 :     if (np != 0 /* nullptr */) {
    4324     9868972 :       *np = nd->copy();
    4325     9868972 :       if (is_diversion())
    4326     8897752 :         (*np)->div_nest_level = input_stack::get_div_level();
    4327             :       else
    4328      971220 :         (*np)->div_nest_level = 0;
    4329             :     }
    4330     9869452 :     nd = nd->next;
    4331     9869452 :     endptr = ptr = p + 1;
    4332     9869452 :     count--;
    4333     9869452 :     return 0;
    4334             :   }
    4335    50973263 :   const unsigned char *e = bp->s + char_block::SIZE;
    4336    50973263 :   if (e - p > count)
    4337    15479235 :     e = p + count;
    4338    50973263 :   ptr = p;
    4339   790068160 :   while (p < e) {
    4340   777859686 :     unsigned char c = *p;
    4341   777859686 :     if (c == '\n' || c == ESCAPE_NEWLINE) {
    4342    38764406 :       seen_newline = true;
    4343    38764406 :       p++;
    4344    38764406 :       break;
    4345             :     }
    4346   739095280 :     if (c == '\0')
    4347         383 :       break;
    4348   739094897 :     p++;
    4349             :   }
    4350    50973263 :   endptr = p;
    4351    50973263 :   count -= p - ptr;
    4352    50973263 :   return *ptr++;
    4353             : }
    4354             : 
    4355        4451 : int string_iterator::peek()
    4356             : {
    4357        4451 :   if (count <= 0)
    4358         131 :     return EOF;
    4359        4320 :   const unsigned char *p = endptr;
    4360        4320 :   if (p >= bp->s + char_block::SIZE) {
    4361        4320 :     p = bp->next->s;
    4362             :   }
    4363        4320 :   return *p;
    4364             : }
    4365             : 
    4366     3337995 : bool string_iterator::get_location(bool allow_macro,
    4367             :                                    const char **filep, int *linep)
    4368             : {
    4369     3337995 :   if (!allow_macro)
    4370        5499 :     return false;
    4371     3332496 :   if (0 /* nullptr */ == mac.filename)
    4372         153 :     return false;
    4373     3332343 :   *filep = mac.filename;
    4374     3332343 :   *linep = mac.lineno + lineno - 1;
    4375     3332343 :   return true;
    4376             : }
    4377             : 
    4378          28 : void string_iterator::backtrace()
    4379             : {
    4380          28 :   if (mac.filename != 0 /* nullptr */) {
    4381          28 :     if (program_name != 0 /* nullptr */)
    4382          28 :       errprint("%1: ", program_name);
    4383          28 :     errprint("backtrace: '%1':%2", mac.filename,
    4384          28 :              (mac.lineno + lineno - 1));
    4385          28 :     if (how_invoked != 0 /* nullptr */) {
    4386          28 :       if (!nm.is_null())
    4387          28 :         errprint(": %1 '%2'", how_invoked, nm.contents());
    4388             :       else
    4389           0 :         errprint(": %1", how_invoked);
    4390             :     }
    4391          28 :     errprint("\n");
    4392             :   }
    4393          28 : }
    4394             : 
    4395             : class temp_iterator : public input_iterator {
    4396             :   unsigned char *base;
    4397             :   temp_iterator(const char *, int len);
    4398             : public:
    4399             :   ~temp_iterator();
    4400             :   friend input_iterator *make_temp_iterator(const char *);
    4401             : };
    4402             : 
    4403     8957966 : inline temp_iterator::temp_iterator(const char *s, int len)
    4404     8957966 : : base(0 /* nullptr */)
    4405             : {
    4406     8957966 :   if (len > 0) {
    4407     8891238 :     base = new unsigned char[len + 1];
    4408     8891238 :     (void) memcpy(base, s, len);
    4409     8891238 :     base[len] = '\0';
    4410     8891238 :     ptr = base;
    4411     8891238 :     endptr = base + len;
    4412             :   }
    4413     8957966 : }
    4414             : 
    4415    17912116 : temp_iterator::~temp_iterator()
    4416             : {
    4417     8956058 :   delete[] base;
    4418    17912116 : }
    4419             : 
    4420             : 
    4421     8957966 : input_iterator *make_temp_iterator(const char *s)
    4422             : {
    4423     8957966 :   if (0 /* nullptr */ == s)
    4424       66529 :     return new temp_iterator(s, 0);
    4425             :   else {
    4426     8891437 :     size_t n = strlen(s);
    4427     8891437 :     return new temp_iterator(s, n);
    4428             :   }
    4429             : }
    4430             : 
    4431             : // this is used when macros with arguments are interpolated
    4432             : 
    4433             : struct arg_list {
    4434             :   macro mac;
    4435             :   bool space_follows;
    4436             :   arg_list *next;
    4437             :   arg_list(const macro &, bool);
    4438             :   arg_list(const arg_list *);
    4439             :   ~arg_list();
    4440             : };
    4441             : 
    4442     1083153 : arg_list::arg_list(const macro &m, bool b)
    4443     1083153 : : mac(m), space_follows(b), next(0 /* nullptr */)
    4444             : {
    4445     1083153 : }
    4446             : 
    4447         640 : arg_list::arg_list(const arg_list *al)
    4448         640 : : next(0 /* nullptr */)
    4449             : {
    4450         640 :   mac = al->mac;
    4451         640 :   space_follows = al->space_follows;
    4452         640 :   arg_list **a = &next;
    4453         640 :   arg_list *p = al->next;
    4454        1037 :   while (p != 0 /* nullptr */) {
    4455         397 :     *a = new arg_list(p->mac, p->space_follows);
    4456         397 :     p = p->next;
    4457         397 :     a = &(*a)->next;
    4458             :   }
    4459         640 : }
    4460             : 
    4461     1083783 : arg_list::~arg_list()
    4462             : {
    4463     1083783 : }
    4464             : 
    4465             : class macro_iterator : public string_iterator {
    4466             :   arg_list *args;
    4467             :   int argc;
    4468             :   bool with_break;              // whether called as .foo or 'foo
    4469             : public:
    4470             :   macro_iterator(symbol, macro &,
    4471             :                  const char * /* how_called */ = "macro",
    4472             :                  bool /* want_arguments_initialized */ = false);
    4473             :   macro_iterator();
    4474             :   ~macro_iterator();
    4475     3879545 :   bool has_args() { return true; }
    4476             :   input_iterator *get_arg(int);
    4477             :   arg_list *get_arg_list();
    4478             :   symbol get_macro_name();
    4479             :   bool space_follows_arg(int);
    4480     4900408 :   bool get_break_flag() { return with_break; }
    4481      944480 :   int nargs() { return argc; }
    4482             :   void add_arg(const macro &, int);
    4483             :   void shift(int);
    4484      112992 :   bool is_macro() { return true; }
    4485             :   bool is_diversion();
    4486             : };
    4487             : 
    4488     2664893 : input_iterator *macro_iterator::get_arg(int i)
    4489             : {
    4490     2664893 :   if (i == 0)
    4491       55570 :     return make_temp_iterator(nm.contents());
    4492     2609323 :   if (i > 0 && i <= argc) {
    4493     2402197 :     arg_list *p = args;
    4494     3678189 :     for (int j = 1; j < i; j++) {
    4495     1275992 :       assert(p != 0);
    4496     1275992 :       p = p->next;
    4497             :     }
    4498     2402197 :     return new string_iterator(p->mac);
    4499             :   }
    4500             :   else
    4501      207126 :     return 0 /* nullptr */;
    4502             : }
    4503             : 
    4504        1194 : arg_list *macro_iterator::get_arg_list()
    4505             : {
    4506        1194 :   return args;
    4507             : }
    4508             : 
    4509        1194 : symbol macro_iterator::get_macro_name()
    4510             : {
    4511        1194 :   return nm;
    4512             : }
    4513             : 
    4514           0 : bool macro_iterator::space_follows_arg(int i)
    4515             : {
    4516           0 :   if ((i > 0) && (i <= argc)) {
    4517           0 :     arg_list *p = args;
    4518           0 :     for (int j = 1; j < i; j++) {
    4519           0 :       assert(p != 0 /* nullptr */);
    4520           0 :       p = p->next;
    4521             :     }
    4522           0 :     return p->space_follows;
    4523             :   }
    4524             :   else
    4525           0 :     return false;
    4526             : }
    4527             : 
    4528     1082756 : void macro_iterator::add_arg(const macro &m, int s)
    4529             : {
    4530             :   arg_list **p;
    4531     2341551 :   for (p = &args; *p != 0 /* nullptr */; p = &((*p)->next))
    4532             :     ;
    4533     1082756 :   *p = new arg_list(m, s);
    4534     1082756 :   ++argc;
    4535     1082756 : }
    4536             : 
    4537      568781 : void macro_iterator::shift(int n)
    4538             : {
    4539      568781 :   while (n > 0 && argc > 0) {
    4540      300997 :     arg_list *tem = args;
    4541      300997 :     args = args->next;
    4542      300997 :     delete tem;
    4543      300997 :     --argc;
    4544      300997 :     --n;
    4545             :   }
    4546      267784 : }
    4547             : 
    4548             : // This gets used by, e.g., .if '\?xxx\?''.
    4549             : 
    4550      228916 : bool operator==(const macro &m1, const macro &m2)
    4551             : {
    4552      228916 :   if (m1.len != m2.len)
    4553       89445 :     return false;
    4554      278942 :   string_iterator iter1(m1);
    4555      278942 :   string_iterator iter2(m2);
    4556      139471 :   int n = m1.len;
    4557      205536 :   while (--n >= 0) {
    4558      188610 :     node *nd1 = 0;
    4559      188610 :     int c1 = iter1.get(&nd1);
    4560      188610 :     assert(c1 != EOF);
    4561      188610 :     node *nd2 = 0;
    4562      188610 :     int c2 = iter2.get(&nd2);
    4563      188610 :     assert(c2 != EOF);
    4564      188610 :     if (c1 != c2) {
    4565      122545 :       if (c1 == 0)
    4566           0 :         delete nd1;
    4567      122545 :       else if (c2 == 0)
    4568           0 :         delete nd2;
    4569      122545 :       return false;
    4570             :     }
    4571       66065 :     if (c1 == 0) {
    4572           0 :       assert(nd1 != 0);
    4573           0 :       assert(nd2 != 0);
    4574           0 :       bool same = nd1->type() == nd2->type() && nd1->is_same_as(nd2);
    4575           0 :       delete nd1;
    4576           0 :       delete nd2;
    4577           0 :       return same;
    4578             :     }
    4579             :   }
    4580       16926 :   return true;
    4581             : }
    4582             : 
    4583    13213126 : static void interpolate_macro(symbol nm, bool do_not_want_next_token)
    4584             : {
    4585             :   request_or_macro *p
    4586    13213126 :     = static_cast<request_or_macro *>(request_dictionary.lookup(nm));
    4587    13213126 :   if (0 /* nullptr */ == p) {
    4588         293 :     bool was_warned = false;
    4589         293 :     const char *s = nm.contents();
    4590         293 :     if (strlen(s) > 2) {
    4591             :       request_or_macro *r;
    4592             :       char buf[3];
    4593         280 :       buf[0] = s[0];
    4594         280 :       buf[1] = s[1];
    4595         280 :       buf[2] = '\0';
    4596             :       r = static_cast<request_or_macro *>
    4597         280 :           (request_dictionary.lookup(symbol(buf)));
    4598         280 :       if (r != 0 /* nullptr */) {
    4599           0 :         macro *m = r->to_macro();
    4600           0 :         if ((0 /* nullptr */ == m) || !m->is_empty()) {
    4601           0 :           warning(WARN_SPACE, "name '%1' not defined (possibly missing"
    4602           0 :                   " space after '%2')", nm.contents(), buf);
    4603           0 :           was_warned = true;
    4604             :         }
    4605             :       }
    4606             :     }
    4607         293 :     if (!was_warned) {
    4608         293 :       warning(WARN_MAC, "name '%1' not defined", nm.contents());
    4609         293 :       p = new macro;
    4610         293 :       request_dictionary.define(nm, p);
    4611             :     }
    4612             :   }
    4613    13213126 :   if (p != 0 /* nullptr */)
    4614    13213126 :     p->invoke(nm, do_not_want_next_token);
    4615             :   else {
    4616           0 :     skip_line();
    4617           0 :     return;
    4618             :   }
    4619             : }
    4620             : 
    4621      928310 : static void decode_macro_call_arguments(macro_iterator *mi)
    4622             : {
    4623      928310 :   if (!tok.is_newline() && !tok.is_eof()) {
    4624             :     node *n;
    4625      538745 :     int c = read_char_in_copy_mode(&n);
    4626             :     for (;;) {
    4627     3209303 :       while (c == ' ')
    4628     1656159 :         c = read_char_in_copy_mode(&n);
    4629     1553144 :       if (c == '\n' || c == EOF)
    4630             :         break;
    4631     2028798 :       macro arg;
    4632     1014399 :       int quote_input_level = 0;
    4633     1014399 :       bool was_warned = false; // about an input tab character
    4634     1014399 :       arg.append(want_att_compat ? PUSH_COMP_MODE : PUSH_GROFF_MODE);
    4635             :       // we store discarded double quotes for \$^
    4636     1014399 :       if (c == '"') {
    4637      238297 :         arg.append(DOUBLE_QUOTE);
    4638      238297 :         quote_input_level = input_stack::get_level();
    4639      238297 :         c = read_char_in_copy_mode(&n);
    4640             :       }
    4641    12079639 :       while (c != EOF && c != '\n'
    4642    12829804 :              && !(c == ' ' && quote_input_level == 0)) {
    4643     2557146 :         if (quote_input_level > 0 && c == '"'
    4644     8573042 :             && (want_att_compat
    4645      246007 :                 || input_stack::get_level() == quote_input_level)) {
    4646      237743 :           arg.append(DOUBLE_QUOTE);
    4647      237743 :           c = read_char_in_copy_mode(&n);
    4648      237743 :           if (c == '"') {
    4649         474 :             arg.append(c);
    4650         474 :             c = read_char_in_copy_mode(&n);
    4651             :           }
    4652             :           else
    4653      237269 :             break;
    4654             :         }
    4655             :         else {
    4656     5532146 :           if (c == 0)
    4657           0 :             arg.append(n);
    4658             :           else {
    4659     5532146 :             if (c == '\t' && quote_input_level == 0 && !was_warned) {
    4660           0 :               warning(WARN_TAB, "tab character in unquoted macro"
    4661             :                       " argument");
    4662           0 :               was_warned = true;
    4663             :             }
    4664     5532146 :             arg.append(c);
    4665             :           }
    4666     5532146 :           c = read_char_in_copy_mode(&n);
    4667             :         }
    4668             :       }
    4669     1014399 :       arg.append(POP_GROFFCOMP_MODE);
    4670     1014399 :       mi->add_arg(arg, (c == ' '));
    4671     1014399 :     }
    4672             :   }
    4673      928310 : }
    4674             : 
    4675       69569 : static void decode_escape_sequence_arguments(macro_iterator *mi)
    4676             : {
    4677             :   node *n;
    4678       69569 :   int c = read_char_in_copy_mode(&n);
    4679             :   for (;;) {
    4680      137929 :     while (c == ' ')
    4681           3 :       c = read_char_in_copy_mode(&n);
    4682      137926 :     if (c == '\n' || c == EOF) {
    4683           0 :       error("missing ']' in parameterized escape sequence");
    4684           0 :       break;
    4685             :     }
    4686      137926 :     if (c == ']')
    4687       69569 :       break;
    4688      136714 :     macro arg;
    4689       68357 :     int quote_input_level = 0;
    4690       68357 :     bool was_warned = false; // about an input tab character
    4691       68357 :     if (c == '"') {
    4692           0 :       quote_input_level = input_stack::get_level();
    4693           0 :       c = read_char_in_copy_mode(&n);
    4694             :     }
    4695      340747 :     while (c != EOF && c != '\n'
    4696      204552 :            && !(c == ']' && quote_input_level == 0)
    4697      340750 :            && !(c == ' ' && quote_input_level == 0)) {
    4698           0 :       if (quote_input_level > 0 && c == '"'
    4699      136195 :           && input_stack::get_level() == quote_input_level) {
    4700           0 :         c = read_char_in_copy_mode(&n);
    4701           0 :         if (c == '"') {
    4702           0 :           arg.append(c);
    4703           0 :           c = read_char_in_copy_mode(&n);
    4704             :         }
    4705             :         else
    4706           0 :           break;
    4707             :       }
    4708             :       else {
    4709      136195 :         if (c == 0)
    4710           0 :           arg.append(n);
    4711             :         else {
    4712      136195 :           if (c == '\t' && quote_input_level == 0 && !was_warned)
    4713             :           {
    4714           0 :             warning(WARN_TAB, "tab character in parameterized escape"
    4715             :                     " sequence");
    4716           0 :             was_warned = true;
    4717             :           }
    4718      136195 :           arg.append(c);
    4719             :         }
    4720      136195 :         c = read_char_in_copy_mode(&n);
    4721             :       }
    4722             :     }
    4723       68357 :     mi->add_arg(arg, (c == ' '));
    4724       68357 :   }
    4725       69569 : }
    4726             : 
    4727      928310 : void macro::invoke(symbol nm, bool do_not_want_next_token)
    4728             : {
    4729      928310 :   macro_iterator *mi = new macro_iterator(nm, *this);
    4730      928310 :   decode_macro_call_arguments(mi);
    4731      928310 :   input_stack::push(mi);
    4732             :   // we must delay tok.next() in case the function has been called by
    4733             :   // do_request to assure proper handling of want_att_compat
    4734      928310 :   if (!do_not_want_next_token)
    4735      925732 :     tok.next();
    4736      928310 : }
    4737             : 
    4738     5961468 : macro *macro::to_macro()
    4739             : {
    4740     5961468 :   return this;
    4741             : }
    4742             : 
    4743        1006 : bool macro::is_empty()
    4744             : {
    4745        1006 :   return (is_empty_macro == true);
    4746             : }
    4747             : 
    4748      974054 : macro_iterator::macro_iterator(symbol s, macro &m,
    4749             :                                const char *how_called,
    4750      974054 :                                bool want_arguments_initialized)
    4751             : : string_iterator(m, how_called, s), args(0 /* nullptr */), argc(0),
    4752      974054 :   with_break(was_invoked_with_regular_control_character)
    4753             : {
    4754      974054 :   if (want_arguments_initialized) {
    4755        1497 :     arg_list *al = input_stack::get_arg_list();
    4756        1497 :     if (al != 0 /* nullptr */) {
    4757         640 :       args = new arg_list(al);
    4758         640 :       argc = input_stack::nargs();
    4759             :     }
    4760             :   }
    4761      974054 : }
    4762             : 
    4763       67634 : macro_iterator::macro_iterator()
    4764             : : args(0 /* nullptr */), argc(0),
    4765       67634 :   with_break(was_invoked_with_regular_control_character)
    4766             : {
    4767       67634 : }
    4768             : 
    4769     3123906 : macro_iterator::~macro_iterator()
    4770             : {
    4771     1824088 :   while (args != 0 /* nullptr */) {
    4772      782786 :     arg_list *tem = args;
    4773      782786 :     args = args->next;
    4774      782786 :     delete tem;
    4775             :   }
    4776     2082604 : }
    4777             : 
    4778             : dictionary composite_dictionary(17);
    4779             : 
    4780       31196 : static void map_composite_character()
    4781             : {
    4782       31196 :   symbol from = read_identifier();
    4783       31196 :   if (from.is_null()) {
    4784           0 :     warning(WARN_MISSING, "composite character mapping request expects"
    4785             :             " arguments");
    4786           0 :     skip_line();
    4787           0 :     return;
    4788             :   }
    4789       31196 :   const char *fc = from.contents();
    4790       31196 :   const char *from_gn = glyph_name_to_unicode(fc);
    4791             :   char errbuf[ERRBUFSZ]; // C++03: char errbuf[ERRBUFSZ]()
    4792       31196 :   if (0 /* nullptr */ == from_gn) {
    4793           0 :     from_gn = valid_unicode_code_sequence(fc, errbuf);
    4794           0 :     if (0 /* nullptr */ == from_gn) {
    4795           0 :       error("invalid composite glyph name '%1': %2", fc, errbuf);
    4796           0 :       skip_line();
    4797           0 :       return;
    4798             :     }
    4799             :   }
    4800       31196 :   const char *from_decomposed = decompose_unicode(from_gn);
    4801       31196 :   if (from_decomposed != 0 /* nullptr */)
    4802           0 :     from_gn = &from_decomposed[1];
    4803       31196 :   symbol to = read_identifier();
    4804       31196 :   if (to.is_null()) {
    4805           0 :     composite_dictionary.remove(symbol(from_gn));
    4806           0 :     skip_line();
    4807           0 :     return;
    4808             :   }
    4809       31196 :   const char *tc = to.contents();
    4810       31196 :   const char *to_gn = glyph_name_to_unicode(tc);
    4811       31196 :   if (0 /* nullptr */ == to_gn) {
    4812       31196 :     to_gn = valid_unicode_code_sequence(tc, errbuf);
    4813       31196 :     if (0 /* nullptr */ == to_gn) {
    4814           0 :       error("invalid composite glyph name '%1': %2", tc, errbuf);
    4815           0 :       skip_line();
    4816           0 :       return;
    4817             :     }
    4818             :   }
    4819       31196 :   const char *to_decomposed = decompose_unicode(to_gn);
    4820       31196 :   if (to_decomposed != 0 /* nullptr */)
    4821           0 :     to_gn = &to_decomposed[1];
    4822       31196 :   if (strcmp(from_gn, to_gn) == 0)
    4823           0 :     composite_dictionary.remove(symbol(from_gn));
    4824             :   else
    4825       31196 :     (void) composite_dictionary.lookup(symbol(from_gn), (void *) to_gn);
    4826       31196 :   skip_line();
    4827             : }
    4828             : 
    4829       67634 : static symbol composite_glyph_name(symbol nm)
    4830             : {
    4831       67634 :   macro_iterator *mi = new macro_iterator();
    4832       67634 :   decode_escape_sequence_arguments(mi);
    4833       67634 :   input_stack::push(mi);
    4834       67634 :   const char *nc = nm.contents();
    4835       67634 :   const char *gn = glyph_name_to_unicode(nc);
    4836       67634 :   if (0 /* nullptr */ == gn) {
    4837           0 :     gn = valid_unicode_code_sequence(nc);
    4838           0 :     if (0 /* nullptr */ == gn) {
    4839           0 :       error("invalid base character '%1' in composite character name",
    4840           0 :             nc);
    4841           0 :       return EMPTY_SYMBOL;
    4842             :     }
    4843             :   }
    4844       67634 :   const char *gn_decomposed = decompose_unicode(gn);
    4845      135268 :   string glyph_name(gn_decomposed ? &gn_decomposed[1] : gn);
    4846      135268 :   string gl;
    4847       67634 :   int n = input_stack::nargs();
    4848      135268 :   for (int i = 1; i <= n; i++) {
    4849       67634 :     glyph_name += '_';
    4850       67634 :     input_iterator *p = input_stack::get_arg(i);
    4851       67634 :     gl.clear();
    4852             :     int c;
    4853      202152 :     while ((c = p->get(0)) != EOF)
    4854      134518 :       if (c != DOUBLE_QUOTE)
    4855      134518 :         gl += c;
    4856       67634 :     gl += '\0';
    4857       67634 :     const char *gc = gl.contents();
    4858       67634 :     const char *u = glyph_name_to_unicode(gc);
    4859       67634 :     if (0 /* nullptr */ == u) {
    4860           0 :       u = valid_unicode_code_sequence(gc);
    4861           0 :       if (0 /* nullptr */ == u) {
    4862           0 :         error("invalid component '%1' in composite glyph name", gc);
    4863           0 :         return EMPTY_SYMBOL;
    4864             :       }
    4865             :     }
    4866       67634 :     const char *decomposed = decompose_unicode(u);
    4867       67634 :     if (decomposed != 0 /* nullptr */)
    4868           0 :       u = &decomposed[1];
    4869       67634 :     void *mapped_composite = composite_dictionary.lookup(symbol(u));
    4870       67634 :     if (mapped_composite != 0 /* nullptr */)
    4871       67634 :       u = static_cast<const char *>(mapped_composite);
    4872       67634 :     glyph_name += u;
    4873             :   }
    4874       67634 :   glyph_name += '\0';
    4875       67634 :   const char *groff_gn = unicode_to_glyph_name(glyph_name.contents());
    4876       67634 :   if (groff_gn != 0 /* nullptr */)
    4877        2973 :     return symbol(groff_gn);
    4878       64661 :   gl.clear();
    4879       64661 :   gl += 'u';
    4880       64661 :   gl += glyph_name;
    4881       64661 :   return symbol(gl.contents());
    4882             : }
    4883             : 
    4884           0 : static void print_composite_character_request()
    4885             : {
    4886           0 :   dictionary_iterator iter(composite_dictionary);
    4887           0 :   symbol key;
    4888             :   char *value;
    4889             :   // We must use the nuclear `reinterpret_cast` operator because GNU
    4890             :   // troff's dictionary types use a pre-STL approach to containers.
    4891           0 :   while (iter.get(&key, reinterpret_cast<void **>(&value))) {
    4892           0 :     assert(!key.is_null());
    4893           0 :     assert(value != 0 /* nullptr */);
    4894           0 :     errprint("%1\t%2\n", key.contents(), value);
    4895             :   }
    4896           0 :   fflush(stderr);
    4897           0 :   skip_line();
    4898           0 : }
    4899             : 
    4900             : bool was_trap_sprung = false;
    4901             : static bool are_traps_postponed = false;
    4902             : symbol postponed_trap;
    4903             : 
    4904       42397 : void spring_trap(symbol nm)
    4905             : {
    4906       42397 :   assert(!nm.is_null());
    4907       42397 :   was_trap_sprung = true;
    4908       42397 :   if (are_traps_postponed) {
    4909          85 :     postponed_trap = nm;
    4910          85 :     return;
    4911             :   }
    4912             :   static char buf[2] = { BEGIN_TRAP, '\0' };
    4913             :   static char buf2[2] = { END_TRAP, '\0' };
    4914       42312 :   input_stack::push(make_temp_iterator(buf2));
    4915       42312 :   request_or_macro *p = lookup_request(nm);
    4916             :   // We don't perform this validation at the time the trap is planted
    4917             :   // because a request name might be replaced by a macro by the time the
    4918             :   // trap springs.
    4919       42312 :   macro *m = p->to_macro();
    4920       42312 :   if (m != 0 /* nullptr */)
    4921       42312 :     input_stack::push(new macro_iterator(nm, *m, "trap-called macro"));
    4922             :   else
    4923           0 :     error("trap failed to spring: '%1' is a request", nm.contents());
    4924       42312 :   input_stack::push(make_temp_iterator(buf));
    4925             : }
    4926             : 
    4927      249530 : void postpone_traps()
    4928             : {
    4929      249530 :   are_traps_postponed = true;
    4930      249530 : }
    4931             : 
    4932      249530 : bool unpostpone_traps()
    4933             : {
    4934      249530 :   are_traps_postponed = false;
    4935      249530 :   if (!postponed_trap.is_null()) {
    4936          85 :     spring_trap(postponed_trap);
    4937          85 :     postponed_trap = NULL_SYMBOL;
    4938          85 :     return true;
    4939             :   }
    4940             :   else
    4941      249445 :     return false;
    4942             : }
    4943             : 
    4944           0 : void read_request()
    4945             : {
    4946           0 :   macro_iterator *mi = new macro_iterator;
    4947           0 :   int reading_from_terminal = isatty(fileno(stdin));
    4948           0 :   int had_prompt = 0;
    4949           0 :   if (has_arg(true /* peek */)) {
    4950           0 :     int c = read_char_in_copy_mode(0 /* nullptr */);
    4951           0 :     while (c == ' ')
    4952           0 :       c = read_char_in_copy_mode(0 /* nullptr */);
    4953           0 :     while (c != EOF && c != '\n' && c != ' ') {
    4954           0 :       if (!is_invalid_input_char(c)) {
    4955           0 :         if (reading_from_terminal)
    4956           0 :           fputc(c, stderr);
    4957           0 :         had_prompt = 1;
    4958             :       }
    4959           0 :       c = read_char_in_copy_mode(0 /* nullptr */);
    4960             :     }
    4961           0 :     if (c == ' ') {
    4962           0 :       tok.make_space();
    4963           0 :       decode_macro_call_arguments(mi);
    4964             :     }
    4965             :   }
    4966           0 :   if (reading_from_terminal) {
    4967           0 :     fputc(had_prompt ? ':' : '\a', stderr);
    4968           0 :     fflush(stderr);
    4969             :   }
    4970           0 :   input_stack::push(mi);
    4971           0 :   macro mac;
    4972           0 :   int nl = 0;
    4973             :   int c;
    4974           0 :   while ((c = getchar()) != EOF) {
    4975           0 :     if (is_invalid_input_char(c))
    4976           0 :       warning(WARN_INPUT, "invalid input character code %1", int(c));
    4977             :     else {
    4978           0 :       if (c == '\n') {
    4979           0 :         if (nl != 0 /* nullptr */)
    4980           0 :           break;
    4981             :         else
    4982           0 :           nl = 1;
    4983             :       }
    4984             :       else
    4985           0 :         nl = 0;
    4986           0 :       mac.append(c);
    4987             :     }
    4988             :   }
    4989           0 :   if (reading_from_terminal)
    4990           0 :     clearerr(stdin);
    4991           0 :   input_stack::push(new string_iterator(mac));
    4992           0 :   tok.next();
    4993           0 : }
    4994             : 
    4995             : enum define_mode { DEFINE_NORMAL, DEFINE_APPEND, DEFINE_IGNORE };
    4996             : enum calling_mode { CALLING_NORMAL, CALLING_INDIRECT };
    4997             : enum comp_mode { COMP_IGNORE, COMP_DISABLE, COMP_ENABLE };
    4998             : 
    4999     1549109 : static void do_define_string(define_mode mode, comp_mode comp)
    5000             : {
    5001     1549109 :   symbol nm;
    5002     1549109 :   node *n = 0 /* nullptr */;
    5003             :   int c;
    5004     1549109 :   nm = read_identifier(true /* required */);
    5005     1549109 :   if (nm.is_null()) {
    5006           0 :     skip_line();
    5007           0 :     return;
    5008             :   }
    5009     1549109 :   if (tok.is_newline())
    5010      268866 :     c = '\n';
    5011     1280243 :   else if (tok.is_tab())
    5012           0 :     c = '\t';
    5013     1280243 :   else if (!tok.is_space()) {
    5014           0 :     skip_line();
    5015           0 :     return;
    5016             :   }
    5017             :   else
    5018     1280243 :     c = read_char_in_copy_mode(&n);
    5019     1809401 :   while (c == ' ')
    5020      260292 :     c = read_char_in_copy_mode(&n);
    5021     1549109 :   if (c == '"')
    5022      329971 :     c = read_char_in_copy_mode(&n);
    5023     3098218 :   macro mac;
    5024             :   request_or_macro *rm
    5025     1549109 :     = static_cast<request_or_macro *>(request_dictionary.lookup(nm));
    5026     1549109 :   macro *mm = rm ? rm->to_macro() : 0 /* nullptr */;
    5027     1549109 :   if (mode == DEFINE_APPEND && mm)
    5028      141515 :     mac = *mm;
    5029     1549109 :   if (comp == COMP_DISABLE)
    5030         229 :     mac.append(PUSH_GROFF_MODE);
    5031     1548880 :   else if (comp == COMP_ENABLE)
    5032           1 :     mac.append(PUSH_COMP_MODE);
    5033    22740199 :   while (c != '\n' && c != EOF) {
    5034    21191090 :     if (c == 0)
    5035         160 :       mac.append(n);
    5036             :     else
    5037    21190930 :       mac.append((unsigned char) c);
    5038    21191090 :     c = read_char_in_copy_mode(&n);
    5039             :   }
    5040     1549109 :   if (comp == COMP_DISABLE || comp == COMP_ENABLE)
    5041         230 :     mac.append(POP_GROFFCOMP_MODE);
    5042     1549109 :   if (!mm) {
    5043      240055 :     mm = new macro;
    5044      240055 :     request_dictionary.define(nm, mm);
    5045             :   }
    5046     1549109 :   *mm = mac;
    5047     1549109 :   tok.next();
    5048             : }
    5049             : 
    5050     1407215 : static void define_string()
    5051             : {
    5052     1407215 :   do_define_string(DEFINE_NORMAL,
    5053             :                    want_att_compat ? COMP_ENABLE : COMP_IGNORE);
    5054     1407215 : }
    5055             : 
    5056           0 : static void define_nocomp_string()
    5057             : {
    5058           0 :   do_define_string(DEFINE_NORMAL, COMP_DISABLE);
    5059           0 : }
    5060             : 
    5061      141665 : static void append_string()
    5062             : {
    5063      141665 :   do_define_string(DEFINE_APPEND,
    5064             :                    want_att_compat ? COMP_ENABLE : COMP_IGNORE);
    5065      141665 : }
    5066             : 
    5067         229 : static void append_nocomp_string()
    5068             : {
    5069         229 :   do_define_string(DEFINE_APPEND, COMP_DISABLE);
    5070         229 : }
    5071             : 
    5072      325453 : static const char *character_mode_description(char_mode mode)
    5073             : {
    5074             :   // C++11: There may be a better way to do this with an enum class;
    5075             :   // we could then store these string literals inside `char_mode`.
    5076      325453 :   const char *modestr = 0 /* nullptr */;
    5077      325453 :   switch (mode) {
    5078       10808 :     case CHAR_NORMAL:
    5079       10808 :       modestr = "";
    5080       10808 :       break;
    5081      300946 :     case CHAR_FALLBACK:
    5082      300946 :       modestr = " fallback";
    5083      300946 :       break;
    5084        3207 :     case CHAR_SPECIAL_FALLBACK:
    5085        3207 :       modestr = " special fallback";
    5086        3207 :       break;
    5087       10492 :     case CHAR_FONT_SPECIFIC_FALLBACK:
    5088       10492 :       modestr = " font-specific fallback";
    5089       10492 :       break;
    5090           0 :     default:
    5091           0 :       assert(0 == "unhandled case of character mode");
    5092             :       break;
    5093             :   }
    5094      325453 :   return modestr;
    5095             : }
    5096             : 
    5097      325449 : void define_character(char_mode mode, const char *font_name)
    5098             : {
    5099      325449 :   const char *modestr = character_mode_description(mode);
    5100      325449 :   tok.skip_spaces();
    5101      325449 :   charinfo *ci = tok.get_charinfo(true /* required */);
    5102      325449 :   if (0 /* nullptr */ == ci) {
    5103           0 :     assert(0 == "attempted to use token without charinfo in character"
    5104             :            " definition request");
    5105             :     skip_line();
    5106           0 :     return;
    5107             :   }
    5108             :   // TODO: If `ci` is already a character class, clobber it.
    5109      325449 :   if (font_name != 0 /* nullptr */) {
    5110       10492 :     string s(font_name);
    5111       10492 :     s += ' ';
    5112       10492 :     s += ci->nm.contents();
    5113       10492 :     s += '\0';
    5114       10492 :     ci = lookup_charinfo(symbol(s.contents()));
    5115             :   }
    5116      325449 :   tok.next();
    5117             :   int c;
    5118      325449 :   node *n = 0 /* nullptr */;
    5119      325449 :   if (tok.is_newline())
    5120           0 :     c = '\n';
    5121      325449 :   else if (tok.is_tab())
    5122           0 :     c = '\t';
    5123      325449 :   else if (!tok.is_space()) {
    5124           0 :     error("ignoring invalid%1 character definition; expected an"
    5125             :           " ordinary, indexed, or special character to define, got %2",
    5126           0 :           modestr, tok.description());
    5127           0 :     skip_line();
    5128           0 :     return;
    5129             :   }
    5130             :   else
    5131      325449 :     c = read_char_in_copy_mode(&n);
    5132      343139 :   while (c == ' ' || c == '\t')
    5133       17690 :     c = read_char_in_copy_mode(&n);
    5134      325449 :   if (c == '"')
    5135        1336 :     c = read_char_in_copy_mode(&n);
    5136      325449 :   macro *m = new macro;
    5137             :   // Construct a macro from input characters; if the input character
    5138             :   // code is 0, we've read a node--append that.
    5139     3060531 :   while (c != '\n' && c != EOF) {
    5140     2735082 :     if (c != 0)
    5141     2735078 :       m->append(static_cast<unsigned char>(c));
    5142             :     else
    5143           4 :       m->append(n);
    5144     2735082 :     c = read_char_in_copy_mode(&n);
    5145             :   }
    5146             :   // Assign the macro to the character, discarding any previous macro.
    5147      325449 :   m = ci->set_macro(m, mode);
    5148      325449 :   if (m != 0 /* nullptr */)
    5149       19537 :     delete m;
    5150      325449 :   tok.next();
    5151             : }
    5152             : 
    5153       10804 : static void define_character_request()
    5154             : {
    5155       10804 :   if (!has_arg(true /* peek; we want to read in copy mode */)) {
    5156           0 :     warning(WARN_MISSING, "character definition request expects"
    5157             :             " arguments");
    5158           0 :     skip_line();
    5159           0 :     return;
    5160             :   }
    5161       10804 :   define_character(CHAR_NORMAL);
    5162             : }
    5163             : 
    5164      300946 : static void define_fallback_character_request()
    5165             : {
    5166      300946 :   if (!has_arg(true /* peek; we want to read in copy mode */)) {
    5167           0 :     warning(WARN_MISSING, "fallback character definition request"
    5168             :             " expects arguments");
    5169           0 :     skip_line();
    5170           0 :     return;
    5171             :   }
    5172      300946 :   define_character(CHAR_FALLBACK);
    5173             : }
    5174             : 
    5175        3207 : static void define_special_character_request()
    5176             : {
    5177        3207 :   if (!has_arg(true /* peek; we want to read in copy mode */)) {
    5178           0 :     warning(WARN_MISSING, "special character definition request expects"
    5179             :             " arguments");
    5180           0 :     skip_line();
    5181           0 :     return;
    5182             :   }
    5183        3207 :   define_character(CHAR_SPECIAL_FALLBACK);
    5184             : }
    5185             : 
    5186           5 : static void print_character_request()
    5187             : {
    5188           5 :   if (!has_arg()) {
    5189           0 :     warning(WARN_MISSING, "character report request expects arguments");
    5190           0 :     skip_line();
    5191           0 :     return;
    5192             :   }
    5193             :   charinfo *ci;
    5194             :   do {
    5195          10 :     tok.skip_spaces();
    5196          10 :     if (tok.is_newline() || tok.is_eof())
    5197           5 :       break;
    5198           5 :     if (!tok.is_any_character()) {
    5199           0 :       error("character report request expects characters or character"
    5200           0 :             " classes as arguments; got %1", tok.description());
    5201           0 :       break;
    5202             :     }
    5203           5 :     ci = tok.get_charinfo(false /* required */,
    5204             :                           true /* suppress creation */);
    5205           5 :     if (ci != 0 /* nullptr */) {
    5206           5 :       errprint("%1\n", tok.description());
    5207           5 :       fflush(stderr);
    5208           5 :       ci->dump();
    5209             :     }
    5210           5 :     tok.next();
    5211             :   } while (true);
    5212           5 :   skip_line();
    5213             : }
    5214             : 
    5215        8749 : static void remove_character()
    5216             : {
    5217        8749 :   if (!has_arg()) {
    5218           0 :     warning(WARN_MISSING, "character definition removal request expects"
    5219             :             " arguments");
    5220           0 :     skip_line();
    5221           0 :     return;
    5222             :   }
    5223       17514 :   while (!tok.is_newline() && !tok.is_eof()) {
    5224        8765 :     if (!tok.is_space() && !tok.is_tab()) {
    5225        8757 :       if (tok.is_any_character()) {
    5226        8757 :         charinfo *ci = tok.get_charinfo(true /* required */,
    5227             :                                         true /* suppress creation */);
    5228        8757 :         if (0 /* nullptr */ == ci)
    5229           0 :            warning(WARN_CHAR, "%1 is not defined", tok.description());
    5230             :         else {
    5231        8757 :           macro *m = ci->set_macro(0 /* nullptr */);
    5232        8757 :           if (m != 0 /* nullptr */)
    5233         228 :             delete m;
    5234             :         }
    5235             :       }
    5236             :       else {
    5237           0 :         error("cannot remove character; %1 is not a character",
    5238           0 :               tok.description());
    5239           0 :         break;
    5240             :       }
    5241             :     }
    5242        8765 :     tok.next();
    5243             :   }
    5244        8749 :   skip_line();
    5245             : }
    5246             : 
    5247     4419563 : static void interpolate_string(symbol nm)
    5248             : {
    5249     4419563 :   request_or_macro *p = lookup_request(nm);
    5250     4419563 :   macro *m = p->to_macro();
    5251     4419563 :   if (0 /* nullptr */ == m)
    5252           0 :     error("cannot interpolate request '%1'", nm.contents());
    5253             :   else {
    5254     4419563 :     if (m->is_string()) {
    5255     4418066 :       string_iterator *si = new string_iterator(*m, "string", nm);
    5256     4418066 :       input_stack::push(si);
    5257             :      }
    5258             :     else {
    5259             :       // if a macro is called as a string, \$0 doesn't get changed
    5260        1497 :       macro_iterator *mi = new macro_iterator(input_stack::get_macro_name(),
    5261        1497 :                                               *m, "string", 1);
    5262        1497 :       input_stack::push(mi);
    5263             :     }
    5264             :   }
    5265     4419563 : }
    5266             : 
    5267        1935 : static void interpolate_string_with_args(symbol nm)
    5268             : {
    5269        1935 :   request_or_macro *p = lookup_request(nm);
    5270        1935 :   macro *m = p->to_macro();
    5271        1935 :   if (0 /* nullptr */ == m)
    5272           0 :     error("cannot interpolate request '%1'", nm.contents());
    5273             :   else {
    5274        1935 :     macro_iterator *mi = new macro_iterator(nm, *m);
    5275        1935 :     decode_escape_sequence_arguments(mi);
    5276        1935 :     input_stack::push(mi);
    5277             :   }
    5278        1935 : }
    5279             : 
    5280     2534324 : static void interpolate_positional_parameter(symbol nm)
    5281             : {
    5282     2534324 :   const char *s = nm.contents();
    5283     2534324 :   if (0 /* nullptr */ == s || '\0' == *s)
    5284           0 :     copy_mode_error("missing positional argument number in copy mode");
    5285     2534324 :   else if (s[1] == 0 && csdigit(s[0]))
    5286     2382603 :     input_stack::push(input_stack::get_arg(s[0] - '0'));
    5287      151721 :   else if (s[0] == '*' && s[1] == '\0') {
    5288       62404 :     int limit = input_stack::nargs();
    5289      124808 :     string args;
    5290      145549 :     for (int i = 1; i <= limit; i++) {
    5291       83145 :       input_iterator *p = input_stack::get_arg(i);
    5292             :       int c;
    5293      902333 :       while ((c = p->get(0)) != EOF)
    5294      819188 :         if (c != DOUBLE_QUOTE)
    5295      776777 :           args += c;
    5296       83145 :       if (i != limit)
    5297       22315 :         args += ' ';
    5298       83145 :       delete p;
    5299             :     }
    5300       62404 :     if (limit > 0) {
    5301       60830 :       args += '\0';
    5302       60830 :       input_stack::push(make_temp_iterator(args.contents()));
    5303       62404 :     }
    5304             :   }
    5305       89317 :   else if (s[0] == '@' && s[1] == '\0') {
    5306       88729 :     int limit = input_stack::nargs();
    5307      177458 :     string args;
    5308      219652 :     for (int i = 1; i <= limit; i++) {
    5309      130923 :       args += '"';
    5310      130923 :       args += char(BEGIN_QUOTE);
    5311      130923 :       input_iterator *p = input_stack::get_arg(i);
    5312             :       int c;
    5313     1985039 :       while ((c = p->get(0)) != EOF)
    5314     1854116 :         if (c != DOUBLE_QUOTE)
    5315     1704904 :           args += c;
    5316      130923 :       args += char(END_QUOTE);
    5317      130923 :       args += '"';
    5318      130923 :       if (i != limit)
    5319       80995 :         args += ' ';
    5320      130923 :       delete p;
    5321             :     }
    5322       88729 :     if (limit > 0) {
    5323       49928 :       args += '\0';
    5324       49928 :       input_stack::push(make_temp_iterator(args.contents()));
    5325       88729 :     }
    5326             :   }
    5327         588 :   else if (s[0] == '^' && s[1] == '\0') {
    5328           0 :     int limit = input_stack::nargs();
    5329           0 :     string args;
    5330           0 :     int c = input_stack::peek();
    5331           0 :     for (int i = 1; i <= limit; i++) {
    5332           0 :       input_iterator *p = input_stack::get_arg(i);
    5333           0 :       while ((c = p->get(0)) != EOF) {
    5334           0 :         if (c == DOUBLE_QUOTE)
    5335           0 :           c = '"';
    5336           0 :         args += c;
    5337             :       }
    5338           0 :       if (input_stack::space_follows_arg(i))
    5339           0 :         args += ' ';
    5340           0 :       delete p;
    5341             :     }
    5342           0 :     if (limit > 0) {
    5343           0 :       args += '\0';
    5344           0 :       input_stack::push(make_temp_iterator(args.contents()));
    5345           0 :     }
    5346             :   }
    5347             :   else {
    5348             :     const char *p;
    5349         588 :     bool is_valid = true;
    5350         588 :     bool is_printable = true;
    5351        1764 :     for (p = s; p != 0 /* nullptr */ && *p != '\0'; p++) {
    5352        1176 :       if (!csdigit(*p))
    5353           0 :         is_valid = false;
    5354        1176 :       if (!csprint(*p))
    5355           0 :         is_printable = false;
    5356             :     }
    5357         588 :     if (!is_valid) {
    5358             :       static const char msg[] = "invalid positional argument number in"
    5359             :                                 " copy mode";
    5360           0 :       if (is_printable)
    5361           0 :         copy_mode_error("%1 '%2'", msg, s);
    5362             :       else
    5363           0 :         copy_mode_error("%1 (unprintable)", msg);
    5364             :     }
    5365             :     else
    5366         588 :       input_stack::push(input_stack::get_arg(atoi(s)));
    5367             :   }
    5368     2534324 : }
    5369             : 
    5370         615 : void handle_first_page_transition()
    5371             : {
    5372         615 :   push_token(tok);
    5373         615 :   topdiv->begin_page();
    5374         615 : }
    5375             : 
    5376             : // We push back a token by wrapping it up in a token_node, and
    5377             : // wrapping that up in a string_iterator.
    5378             : 
    5379        1657 : static void push_token(const token &t)
    5380             : {
    5381        1657 :   macro m;
    5382        1657 :   m.append(new token_node(t));
    5383        1657 :   input_stack::push(new string_iterator(m));
    5384        1657 : }
    5385             : 
    5386        5087 : void push_page_ejector()
    5387             : {
    5388             :   static char buf[2] = { PAGE_EJECTOR, '\0' };
    5389        5087 :   input_stack::push(make_temp_iterator(buf));
    5390        5087 : }
    5391             : 
    5392         125 : void handle_initial_request(unsigned char code)
    5393             : {
    5394             :   char buf[2];
    5395         125 :   buf[0] = code;
    5396         125 :   buf[1] = '\0';
    5397         250 :   macro mac;
    5398         125 :   mac.append(new token_node(tok));
    5399         125 :   input_stack::push(new string_iterator(mac));
    5400         125 :   input_stack::push(make_temp_iterator(buf));
    5401         125 :   topdiv->begin_page();
    5402         125 :   tok.next();
    5403         125 : }
    5404             : 
    5405         125 : void handle_initial_title()
    5406             : {
    5407         125 :   handle_initial_request(TITLE_REQUEST);
    5408         125 : }
    5409             : 
    5410      103757 : static void do_define_macro(define_mode mode, calling_mode calling,
    5411             :                             comp_mode comp)
    5412             : {
    5413      103757 :   symbol nm, term, dot_symbol(".");
    5414      103757 :   if (calling == CALLING_INDIRECT) {
    5415           0 :     symbol temp1 = read_identifier(true /* required */);
    5416           0 :     if (temp1.is_null()) {
    5417           0 :       skip_line();
    5418           0 :       return;
    5419             :     }
    5420           0 :     symbol temp2 = read_identifier();
    5421           0 :     input_stack::push(make_temp_iterator("\n"));
    5422           0 :     if (!temp2.is_null()) {
    5423           0 :       interpolate_string(temp2);
    5424           0 :       input_stack::push(make_temp_iterator(" "));
    5425             :     }
    5426           0 :     interpolate_string(temp1);
    5427           0 :     input_stack::push(make_temp_iterator(" "));
    5428           0 :     tok.next();
    5429             :   }
    5430      103757 :   if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND) {
    5431      100178 :     nm = read_identifier(true /* required */);
    5432      100178 :     if (nm.is_null()) {
    5433           0 :       skip_line();
    5434           0 :       return;
    5435             :     }
    5436             :   }
    5437      103757 :   term = read_identifier(); // terminating name
    5438      103757 :   if (term.is_null())
    5439       88110 :     term = dot_symbol;
    5440      104345 :   while (!tok.is_newline() && !tok.is_eof())
    5441         588 :     tok.next();
    5442             :   const char *start_filename;
    5443             :   int start_lineno;
    5444             :   bool have_start_location
    5445      103757 :     = input_stack::get_location(false /* allow_macro */,
    5446             :                                 &start_filename,
    5447             :                                 &start_lineno);
    5448             :   node *n;
    5449             :   // doing this here makes the line numbers come out right
    5450      103757 :   int c = read_char_in_copy_mode(&n, true /* is_defining */);
    5451      207514 :   macro mac;
    5452      103757 :   macro *mm = 0 /* nullptr */;
    5453      103757 :   if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND) {
    5454             :     request_or_macro *rm =
    5455      100178 :       static_cast<request_or_macro *>(request_dictionary.lookup(nm));
    5456      100178 :     if (rm != 0 /* nullptr */)
    5457        4379 :       mm = rm->to_macro();
    5458      100178 :     if (mm != 0 /* nullptr */ && mode == DEFINE_APPEND)
    5459          93 :       mac = *mm;
    5460             :   }
    5461      103757 :   bool reading_beginning_of_input_line = true;
    5462      103757 :   if (comp == COMP_DISABLE)
    5463       10623 :     mac.append(PUSH_GROFF_MODE);
    5464       93134 :   else if (comp == COMP_ENABLE)
    5465           0 :     mac.append(PUSH_COMP_MODE);
    5466             :   for (;;) {
    5467    42760001 :     if (c == '\n')
    5468     1461321 :       mac.clear_string_flag();
    5469    43099083 :     while (c == ESCAPE_NEWLINE) {
    5470      339082 :       if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND)
    5471             :         // TODO: grochar; may need NFD decomposition and UTF-8 encoding
    5472      339059 :         mac.append(static_cast<unsigned char>(c));
    5473      339082 :       c = read_char_in_copy_mode(&n, true /* is_defining */);
    5474             :     }
    5475    42760001 :     if (reading_beginning_of_input_line && (c == '.')) {
    5476     1744869 :       const char *s = term.contents();
    5477     1744869 :       int d = '\0';
    5478             :       // see if it matches term
    5479     1744869 :       int i = 0;
    5480     1744869 :       if (s[0] != '\0') {
    5481     7704581 :         while (((d = read_char_in_copy_mode(&n)) == ' ') || (d == '\t'))
    5482             :           ;
    5483     1744869 :         if (s[0] == d) {
    5484      136551 :           for (i = 1; s[i] != '\0'; i++) {
    5485       32589 :             d = read_char_in_copy_mode(&n);
    5486       32589 :             if (s[i] != d)
    5487         964 :               break;
    5488             :           }
    5489             :         }
    5490             :       }
    5491     3489738 :       if (s[i] == '\0'
    5492     1848831 :           && (((i == 2) && want_att_compat)
    5493      103962 :               || ((d = read_char_in_copy_mode(&n)) == ' ')
    5494      103762 :               || (d == '\n'))) { // we found it
    5495      103757 :         if (d == '\n')
    5496      103557 :           tok.make_newline();
    5497             :         else
    5498         200 :           tok.make_space();
    5499      103757 :         if (mode == DEFINE_APPEND || mode == DEFINE_NORMAL) {
    5500      100178 :           if (!mm) {
    5501       95978 :             mm = new macro;
    5502       95978 :             request_dictionary.define(nm, mm);
    5503             :           }
    5504      100178 :           if (comp == COMP_DISABLE || comp == COMP_ENABLE)
    5505       10623 :             mac.append(POP_GROFFCOMP_MODE);
    5506      100178 :           *mm = mac;
    5507             :         }
    5508      103757 :         if (term != dot_symbol) {
    5509       15647 :           want_input_ignored = false;
    5510       15647 :           interpolate_macro(term);
    5511             :         }
    5512             :         else
    5513       88110 :           skip_line();
    5514      103757 :         return;
    5515             :       }
    5516     1641112 :       if ((mode == DEFINE_APPEND) || (mode == DEFINE_NORMAL)) {
    5517             :         // TODO: grochar; may need NFD decomposition and UTF-8 encoding
    5518     1623641 :         mac.append(static_cast<unsigned char>(c));
    5519     1624922 :         for (int j = 0; j < i; j++)
    5520             :           // TODO: grochar; may need NFD decomposition & UTF-8 encoding
    5521        1281 :           mac.append(static_cast<unsigned char>(s[j]));
    5522             :       }
    5523     1641112 :       c = d;
    5524             :     }
    5525    42656244 :     if (c == EOF) {
    5526           0 :       if ((mode == DEFINE_APPEND) || (mode == DEFINE_NORMAL)) {
    5527           0 :         if (have_start_location)
    5528           0 :           error_with_file_and_line(start_filename, start_lineno,
    5529             :                                    "encountered end of file"
    5530             :                                    " while defining macro '%1'",
    5531           0 :                                    nm.contents());
    5532             :         else
    5533           0 :           error("end of file while defining macro '%1'", nm.contents());
    5534             :       }
    5535             :       else {
    5536             :         static const char msg[] = "encountered end of file while"
    5537             :                                   " ignoring input";
    5538           0 :         if (have_start_location)
    5539           0 :           error_with_file_and_line(start_filename, start_lineno, msg);
    5540             :         else
    5541           0 :           error(msg);
    5542             :       }
    5543           0 :       tok.next();
    5544           0 :       return;
    5545             :     }
    5546    42656244 :     if ((mode == DEFINE_NORMAL) || (mode == DEFINE_APPEND)) {
    5547    41529517 :       if (c == '\0')
    5548           0 :         mac.append(n);
    5549             :       else
    5550             :         // TODO: grochar; may need NFD decomposition and UTF-8 encoding
    5551    41529517 :         mac.append(static_cast<unsigned char>(c));
    5552             :     }
    5553    42656244 :     reading_beginning_of_input_line = (c == '\n');
    5554    42656244 :     c = read_char_in_copy_mode(&n, true /* is_defining */);
    5555    42656244 :   }
    5556             : }
    5557             : 
    5558       89447 : static void define_macro()
    5559             : {
    5560       89447 :   do_define_macro(DEFINE_NORMAL, CALLING_NORMAL,
    5561             :                   want_att_compat ? COMP_ENABLE : COMP_IGNORE);
    5562       89447 : }
    5563             : 
    5564       10623 : static void define_nocomp_macro()
    5565             : {
    5566       10623 :   do_define_macro(DEFINE_NORMAL, CALLING_NORMAL, COMP_DISABLE);
    5567       10623 : }
    5568             : 
    5569           0 : static void define_indirect_macro()
    5570             : {
    5571           0 :   do_define_macro(DEFINE_NORMAL, CALLING_INDIRECT,
    5572             :                   want_att_compat ? COMP_ENABLE : COMP_IGNORE);
    5573           0 : }
    5574             : 
    5575           0 : static void define_indirect_nocomp_macro()
    5576             : {
    5577           0 :   do_define_macro(DEFINE_NORMAL, CALLING_INDIRECT, COMP_DISABLE);
    5578           0 : }
    5579             : 
    5580         108 : static void append_macro()
    5581             : {
    5582         108 :   do_define_macro(DEFINE_APPEND, CALLING_NORMAL,
    5583             :                   want_att_compat ? COMP_ENABLE : COMP_IGNORE);
    5584         108 : }
    5585             : 
    5586           0 : static void append_nocomp_macro()
    5587             : {
    5588           0 :   do_define_macro(DEFINE_APPEND, CALLING_NORMAL, COMP_DISABLE);
    5589           0 : }
    5590             : 
    5591           0 : static void append_indirect_macro()
    5592             : {
    5593           0 :   do_define_macro(DEFINE_APPEND, CALLING_INDIRECT,
    5594             :                   want_att_compat ? COMP_ENABLE : COMP_IGNORE);
    5595           0 : }
    5596             : 
    5597           0 : static void append_indirect_nocomp_macro()
    5598             : {
    5599           0 :   do_define_macro(DEFINE_APPEND, CALLING_INDIRECT, COMP_DISABLE);
    5600           0 : }
    5601             : 
    5602        3579 : void ignore()
    5603             : {
    5604        3579 :   want_input_ignored = true;
    5605        3579 :   do_define_macro(DEFINE_IGNORE, CALLING_NORMAL, COMP_IGNORE);
    5606        3579 :   want_input_ignored = false;
    5607        3579 : }
    5608             : 
    5609      136468 : void remove_macro()
    5610             : {
    5611      136468 :   if (!has_arg()) {
    5612           0 :     warning(WARN_MISSING, "name removal request expects arguments");
    5613           0 :     skip_line();
    5614           0 :     return;
    5615             :   }
    5616             :   for (;;) {
    5617      295920 :     symbol s = read_identifier();
    5618      295920 :     if (s.is_null())
    5619      136468 :       break;
    5620      159452 :     request_dictionary.remove(s);
    5621      159452 :   }
    5622      136468 :   skip_line();
    5623             : }
    5624             : 
    5625         716 : void rename_macro()
    5626             : {
    5627         716 :   if (!has_arg()) {
    5628           0 :     warning(WARN_MISSING, "renaming request expects arguments");
    5629           0 :     skip_line();
    5630           0 :     return;
    5631             :   }
    5632         716 :   symbol s1 = read_identifier();
    5633         716 :   assert(s1 != 0 /* nullptr */);
    5634         716 :   if (!s1.is_null()) {
    5635         716 :     symbol s2 = read_identifier();
    5636         716 :     if (s2.is_null())
    5637           0 :       warning(WARN_MISSING, "renaming request expects identifier of"
    5638             :               " existing request, macro, string, or diversion as"
    5639             :               " second argument");
    5640             :     else
    5641         716 :       request_dictionary.rename(s1, s2);
    5642             :   }
    5643         716 :   skip_line();
    5644             : }
    5645             : 
    5646       44745 : void alias_macro()
    5647             : {
    5648       44745 :   if (!has_arg()) {
    5649           0 :     warning(WARN_MISSING, "name aliasing request expects arguments");
    5650           0 :     skip_line();
    5651           0 :     return;
    5652             :   }
    5653       44745 :   symbol s1 = read_identifier();
    5654       44745 :   assert(s1 != 0 /* nullptr */);
    5655       44745 :   if (!s1.is_null()) {
    5656       44745 :     symbol s2 = read_identifier();
    5657       44745 :     if (s2.is_null())
    5658           0 :       warning(WARN_MISSING, "name aliasing request expects identifier"
    5659             :               " of existing request, macro, string, or diversion as"
    5660             :               " second argument");
    5661             :     else {
    5662       44745 :       if (!request_dictionary.alias(s1, s2))
    5663           0 :         error("cannot alias undefined name '%1'", s2.contents());
    5664             :     }
    5665             :   }
    5666       44745 :   skip_line();
    5667             : }
    5668             : 
    5669        1006 : void chop_macro()
    5670             : {
    5671        1006 :   if (!has_arg()) {
    5672           0 :     warning(WARN_MISSING, "chop request expects an argument");
    5673           0 :     skip_line();
    5674           0 :     return;
    5675             :   }
    5676        1006 :   symbol s = read_identifier();
    5677        1006 :   assert(s != 0 /* nullptr */);
    5678        1006 :   if (!s.is_null()) {
    5679        1006 :     request_or_macro *p = lookup_request(s);
    5680        1006 :     macro *m = p->to_macro();
    5681        1006 :     if (0 /* nullptr */ == m)
    5682           0 :       error("cannot chop request '%1'", s.contents());
    5683        1006 :     else if (m->is_empty())
    5684           0 :       error("cannot chop empty %1 '%2'",
    5685           0 :             (m->is_diversion() ? "diversion" : "macro or string"),
    5686           0 :             s.contents());
    5687             :     else {
    5688        1006 :       int have_restore = 0;
    5689             :       // We have to check for additional save/restore pairs which could
    5690             :       // be there due to empty am1 requests.
    5691             :       for (;;) {
    5692        1006 :         if (m->get(m->len - 1) != POP_GROFFCOMP_MODE)
    5693        1006 :           break;
    5694           0 :         have_restore = 1;
    5695           0 :         m->len -= 1;
    5696           0 :         if (m->get(m->len - 1) != PUSH_GROFF_MODE
    5697           0 :             && m->get(m->len - 1) != PUSH_COMP_MODE)
    5698           0 :           break;
    5699           0 :         have_restore = 0;
    5700           0 :         m->len -= 1;
    5701           0 :         if (m->len == 0)
    5702           0 :           break;
    5703             :       }
    5704        1006 :       if (m->len == 0)
    5705           0 :         error("cannot chop empty object '%1'", s.contents());
    5706             :       else {
    5707        1006 :         if (have_restore)
    5708           0 :           m->set(POP_GROFFCOMP_MODE, m->len - 1);
    5709             :         else
    5710        1006 :           m->len -= 1;
    5711             :       }
    5712             :     }
    5713             :   }
    5714        1006 :   skip_line();
    5715             : }
    5716             : 
    5717             : enum case_xform_mode { STRING_UPCASE, STRING_DOWNCASE };
    5718             : 
    5719             : // Case-transform each byte of the string argument's contents.
    5720         201 : void do_string_case_transform(case_xform_mode mode)
    5721             : {
    5722         201 :   assert((mode == STRING_DOWNCASE) || (mode == STRING_UPCASE));
    5723         201 :   symbol s = read_identifier();
    5724         201 :   assert(s != 0 /* nullptr */);
    5725         201 :   if (s.is_null()) {
    5726           0 :     skip_line();
    5727           1 :     return;
    5728             :   }
    5729         201 :   request_or_macro *p = lookup_request(s);
    5730         201 :   macro *m = p->to_macro();
    5731         201 :   if (0 /* nullptr */ == m) {
    5732           1 :     error("cannot apply string case transformation to request '%1'",
    5733           1 :           s.contents());
    5734           1 :     skip_line();
    5735           1 :     return;
    5736             :   }
    5737         400 :   string_iterator iter1(*m);
    5738         200 :   macro *mac = new macro;
    5739         200 :   int len = m->macro::length();
    5740        1975 :   for (int l = 0; l < len; l++) {
    5741        1775 :     int nc, c = iter1.get(0);
    5742        1775 :     if (c == PUSH_GROFF_MODE
    5743        1775 :         || c == PUSH_COMP_MODE
    5744        1775 :         || c == POP_GROFFCOMP_MODE)
    5745           0 :       nc = c;
    5746        1775 :     else if (c == EOF)
    5747           0 :       break;
    5748             :     else
    5749        1775 :       if (mode == STRING_DOWNCASE)
    5750        1720 :         nc = cmlower(c);
    5751             :       else
    5752          55 :         nc = cmupper(c);
    5753        1775 :     mac->append(nc);
    5754             :   }
    5755         200 :   request_dictionary.define(s, mac);
    5756         200 :   tok.next();
    5757             : }
    5758             : 
    5759             : // Uppercase-transform each byte of the string argument's contents.
    5760         195 : void stringdown_request() {
    5761         195 :   if (!has_arg()) {
    5762           0 :     warning(WARN_MISSING, "string downcasing request expects an"
    5763             :             " argument");
    5764           0 :     skip_line();
    5765           0 :     return;
    5766             :   }
    5767         195 :   do_string_case_transform(STRING_DOWNCASE);
    5768             : }
    5769             : 
    5770             : // Lowercase-transform each byte of the string argument's contents.
    5771           6 : void stringup_request() {
    5772           6 :   if (!has_arg()) {
    5773           0 :     warning(WARN_MISSING, "string upcasing request expects an"
    5774             :             " argument");
    5775           0 :     skip_line();
    5776           0 :     return;
    5777             :   }
    5778           6 :   do_string_case_transform(STRING_UPCASE);
    5779             : }
    5780             : 
    5781      173416 : void substring_request()
    5782             : {
    5783      173416 :   if (!has_arg()) {
    5784           0 :     warning(WARN_MISSING, "substring request expects arguments");
    5785           0 :     skip_line();
    5786           0 :     return;
    5787             :   }
    5788             :   int start;                    // 0, 1, ..., n-1  or  -1, -2, ...
    5789      173416 :   symbol s = read_identifier();
    5790      173416 :   assert(s != 0 /* nullptr */);
    5791      173416 :   if (!s.is_null() && read_integer(&start)) {
    5792      173416 :     request_or_macro *p = lookup_request(s);
    5793      173416 :     macro *m = p->to_macro();
    5794      173416 :     if (0 /* nullptr */ == m)
    5795           0 :       error("cannot extract substring of request '%1'", s.contents());
    5796             :     else {
    5797      173416 :       int end = -1;
    5798      173416 :       if (!has_arg() || read_integer(&end)) {
    5799      173416 :         int real_length = 0;                    // 1, 2, ..., n
    5800      173416 :         string_iterator iter1(*m);
    5801    14598352 :         for (int l = 0; l < m->len; l++) {
    5802    14424936 :           int c = iter1.get(0);
    5803    14424936 :           if (c == PUSH_GROFF_MODE
    5804    14424936 :               || c == PUSH_COMP_MODE
    5805    14424936 :               || c == POP_GROFFCOMP_MODE)
    5806           0 :             continue;
    5807    14424936 :           if (c == EOF)
    5808           0 :             break;
    5809    14424936 :           real_length++;
    5810             :         }
    5811      173416 :         if (start < 0)
    5812        1450 :           start += real_length;
    5813      173416 :         if (end < 0)
    5814       22070 :           end += real_length;
    5815      173416 :         if (start > end) {
    5816          81 :           int tem = start;
    5817          81 :           start = end;
    5818          81 :           end = tem;
    5819             :         }
    5820      173416 :         if (start >= real_length || end < 0) {
    5821           0 :           warning(WARN_RANGE,
    5822             :                   "start and end index of substring out of range");
    5823           0 :           m->len = 0;
    5824           0 :           if (m->p) {
    5825           0 :             if (--(m->p->count) <= 0)
    5826           0 :               delete m->p;
    5827           0 :             m->p = 0;
    5828             :           }
    5829           0 :           skip_line();
    5830           0 :           return;
    5831             :         }
    5832      173416 :         if (start < 0) {
    5833           3 :           warning(WARN_RANGE,
    5834             :                   "start index of substring out of range, set to 0");
    5835           3 :           start = 0;
    5836             :         }
    5837      173416 :         if (end >= real_length) {
    5838           7 :           warning(WARN_RANGE,
    5839             :                   "end index of substring out of range, set to string length");
    5840           7 :           end = real_length - 1;
    5841             :         }
    5842             :         // now extract the substring
    5843      346832 :         string_iterator iter(*m);
    5844             :         int i;
    5845     6951012 :         for (i = 0; i < start; i++) {
    5846     6777596 :           int c = iter.get(0 /* nullptr */);
    5847           0 :           while (c == PUSH_GROFF_MODE
    5848     6777596 :                  || c == PUSH_COMP_MODE
    5849    13555192 :                  || c == POP_GROFFCOMP_MODE)
    5850           0 :             c = iter.get(0 /* nullptr */);
    5851     6777596 :           if (c == EOF)
    5852           0 :             break;
    5853             :         }
    5854      346832 :         macro mac;
    5855      656154 :         for (; i <= end; i++) {
    5856      482738 :           node *nd = 0 /* nullptr */;
    5857      482738 :           int c = iter.get(&nd);
    5858           0 :           while (c == PUSH_GROFF_MODE
    5859      482738 :                  || c == PUSH_COMP_MODE
    5860      965476 :                  || c == POP_GROFFCOMP_MODE)
    5861           0 :             c = iter.get(0 /* nullptr */);
    5862      482738 :           if (c == EOF)
    5863           0 :             break;
    5864      482738 :           if (c == 0)
    5865           0 :             mac.append(nd);
    5866             :           else
    5867      482738 :             mac.append((unsigned char) c);
    5868             :         }
    5869      173416 :         *m = mac;
    5870             :       }
    5871             :     }
    5872             :   }
    5873      173416 :   skip_line();
    5874             : }
    5875             : 
    5876      143341 : void length_request()
    5877             : {
    5878      143341 :   if (!has_arg()) {
    5879           0 :     warning(WARN_MISSING, "length computation request expects"
    5880             :             " arguments");
    5881           0 :     skip_line();
    5882           0 :     return;
    5883             :   }
    5884      143341 :   symbol ret;
    5885      143341 :   ret = read_identifier();
    5886      143341 :   if (ret.is_null()) {
    5887             :     // The identifier was garbage, like `a\&b`.
    5888           0 :     skip_line();
    5889           0 :     return;
    5890             :   }
    5891             :   int c;
    5892             :   node *n;
    5893      143341 :   if (tok.is_newline())
    5894           0 :     c = '\n';
    5895      143341 :   else if (tok.is_tab())
    5896           0 :     c = '\t';
    5897      143341 :   else if (!tok.is_space()) {
    5898           0 :     skip_line();
    5899           0 :     return;
    5900             :   }
    5901             :   else
    5902      143341 :     c = read_char_in_copy_mode(&n);
    5903      143539 :   while (c == ' ')
    5904         198 :     c = read_char_in_copy_mode(&n);
    5905      143341 :   if (c == '"')
    5906       10737 :     c = read_char_in_copy_mode(&n);
    5907      143341 :   int len = 0;
    5908      994831 :   while (c != '\n' && c != EOF) {
    5909      851490 :     ++len;
    5910      851490 :     c = read_char_in_copy_mode(&n);
    5911             :   }
    5912      143341 :   reg *r = static_cast<reg *>(register_dictionary.lookup(ret));
    5913      143341 :   if (r != 0 /* nullptr */)
    5914      137971 :     r->set_value(len);
    5915             :   else
    5916        5370 :     set_register(ret, len);
    5917      143341 :   tok.next();
    5918             : }
    5919             : 
    5920           4 : static void asciify_request()
    5921             : {
    5922           4 :   if (!has_arg()) {
    5923           0 :     warning(WARN_MISSING, "diversion asciification request expects a"
    5924             :             " diversion identifier as argument");
    5925           0 :     skip_line();
    5926           0 :     return;
    5927             :   }
    5928           4 :   symbol s = read_identifier();
    5929           4 :   if (!s.is_null()) {
    5930           4 :     request_or_macro *p = lookup_request(s);
    5931           4 :     macro *m = p->to_macro();
    5932           4 :     if (0 /* nullptr */ == m)
    5933           0 :       error("cannot asciify request '%1'", s.contents());
    5934             :     else {
    5935           8 :       macro am;
    5936           8 :       string_iterator iter(*m);
    5937             :       for (;;) {
    5938         178 :         node *nd = 0 /* nullptr */;
    5939         178 :         int c = iter.get(&nd);
    5940         178 :         if (c == EOF)
    5941           4 :           break;
    5942         174 :         if (c != 0)
    5943           8 :           am.append(c);
    5944             :         else {
    5945         166 :           node *newnd = nd->copy();
    5946         166 :           newnd->asciify(&am);
    5947         166 :           delete nd;
    5948             :         }
    5949         174 :       }
    5950           4 :       *m = am;
    5951             :     }
    5952             :   }
    5953           4 :   skip_line();
    5954             : }
    5955             : 
    5956        1920 : void unformat_macro()
    5957             : {
    5958        1920 :   if (!has_arg()) {
    5959           0 :     warning(WARN_MISSING, "diversion unformatting request expects a"
    5960             :             " diversion identifier as argument");
    5961           0 :     skip_line();
    5962           0 :     return;
    5963             :   }
    5964        1920 :   symbol s = read_identifier();
    5965        1920 :   if (!s.is_null()) {
    5966        1920 :     request_or_macro *p = lookup_request(s);
    5967        1920 :     macro *m = p->to_macro();
    5968        1920 :     if (0 /* nullptr */ == m)
    5969           0 :       error("cannot unformat request '%1'", s.contents());
    5970             :     else {
    5971        3840 :       macro am;
    5972        3840 :       string_iterator iter(*m);
    5973             :       for (;;) {
    5974       55861 :         node *nd = 0 /* nullptr */;
    5975       55861 :         int c = iter.get(&nd);
    5976       55861 :         if (c == EOF)
    5977        1920 :           break;
    5978       53941 :         if (c != 0)
    5979        1649 :           am.append(c);
    5980             :         else {
    5981       52292 :           if (nd->set_unformat_flag())
    5982       48250 :             am.append(nd);
    5983             :         }
    5984       53941 :       }
    5985        1920 :       *m = am;
    5986             :     }
    5987             :   }
    5988        1920 :   skip_line();
    5989             : }
    5990             : 
    5991           3 : static void interpolate_environment_variable(symbol nm)
    5992             : {
    5993           3 :   const char *s = getenv(nm.contents());
    5994           3 :   if ((s != 0 /* nullptr */) && (*s != 0 /* nullptr */))
    5995           1 :     input_stack::push(make_temp_iterator(s));
    5996           3 : }
    5997             : 
    5998     8672390 : void interpolate_register(symbol nm, int inc)
    5999             : {
    6000     8672390 :   reg *r = look_up_register(nm);
    6001     8672390 :   assert(r != 0 /* nullptr */);
    6002     8672390 :   if (inc < 0)
    6003       98022 :     r->decrement();
    6004     8574368 :   else if (inc > 0)
    6005      476085 :     r->increment();
    6006     8672390 :   input_stack::push(make_temp_iterator(r->get_string()));
    6007     8672390 : }
    6008             : 
    6009         626 : static void interpolate_number_format(symbol nm)
    6010             : {
    6011         626 :   reg *r = static_cast<reg *>(register_dictionary.lookup(nm));
    6012         626 :   if (r != 0 /* nullptr */)
    6013         623 :     input_stack::push(make_temp_iterator(r->get_format()));
    6014         626 : }
    6015             : 
    6016         615 : static bool read_delimited_measurement(units *n,
    6017             :                                        unsigned char si,
    6018             :                                        units prev_value)
    6019             : {
    6020        1230 :   token start_token;
    6021         615 :   start_token.next();
    6022         615 :   if (start_token.is_eof()) {
    6023           0 :     error("end of input at start of delimited numeric expression");
    6024           0 :     return false;
    6025             :   }
    6026         615 :   bool is_valid = false;
    6027         615 :   if (!want_att_compat && start_token.is_usable_as_delimiter())
    6028         615 :     is_valid = true;
    6029           0 :   else if (want_att_compat
    6030           0 :            && start_token.is_usable_as_delimiter(false,
    6031             :                   DELIMITER_ATT_NUMERIC_EXPRESSION))
    6032           0 :     is_valid = true;
    6033         615 :   if (!is_valid) {
    6034           0 :     warning(WARN_DELIM, "cannot use %1 to delimit a numeric expression",
    6035           0 :             start_token.description());
    6036           0 :     return false;
    6037             :   }
    6038         615 :   tok.next();
    6039         615 :   if (read_measurement(n, si, prev_value)) {
    6040         615 :     if (start_token != tok) {
    6041             :       // token::description() writes to static, class-wide storage, so
    6042             :       // we must allocate a copy of it before issuing the next
    6043             :       // diagnostic.
    6044           0 :       char *delimdesc = strdup(start_token.description());
    6045           0 :       warning(WARN_DELIM, "closing delimiter does not match;"
    6046           0 :               " expected %1, got %2", delimdesc, tok.description());
    6047           0 :       free(delimdesc);
    6048             :     }
    6049         615 :     return true;
    6050             :   }
    6051           0 :   return false;
    6052             : }
    6053             : 
    6054             : // TODO: Merge into other `read_delimited_measurement()`, using default
    6055             : // argument of 0 for `prev_value`.
    6056      272624 : static bool read_delimited_measurement(units *n, unsigned char si)
    6057             : {
    6058      545248 :   token start_token;
    6059      272624 :   start_token.next();
    6060      272624 :   bool is_valid = false;
    6061      272624 :   if (!want_att_compat && start_token.is_usable_as_delimiter())
    6062      272531 :     is_valid = true;
    6063          93 :   else if (want_att_compat
    6064          93 :            && start_token.is_usable_as_delimiter(false,
    6065             :                   DELIMITER_ATT_NUMERIC_EXPRESSION))
    6066          80 :     is_valid = true;
    6067      272624 :   if (!is_valid) {
    6068          13 :     warning(WARN_DELIM, "cannot use %1 to delimit a numeric expression",
    6069          13 :             start_token.description());
    6070          13 :     return false;
    6071             :   }
    6072      272611 :   tok.next();
    6073      272611 :   if (read_measurement(n, si)) {
    6074      272611 :     if (start_token != tok) {
    6075             :       // token::description() writes to static, class-wide storage, so
    6076             :       // we must allocate a copy of it before issuing the next
    6077             :       // diagnostic.
    6078           0 :       char *delimdesc = strdup(start_token.description());
    6079           0 :       warning(WARN_DELIM, "closing delimiter does not match;"
    6080           0 :               " expected %1, got %2", delimdesc, tok.description());
    6081           0 :       free(delimdesc);
    6082             :     }
    6083      272611 :     return true;
    6084             :   }
    6085           0 :   return false;
    6086             : }
    6087             : 
    6088             : // \l, \L
    6089             : //
    6090             : // Here's some syntax unique to these escape sequences: a horizontal
    6091             : // measurment followed immediately by a character.
    6092         242 : static bool read_line_rule_expression(units *n, unsigned char si,
    6093             :                                       charinfo **cip)
    6094             : {
    6095         242 :   assert(cip != 0 /* nullptr */);
    6096         484 :   token start_token;
    6097         242 :   start_token.next();
    6098         484 :   if (!want_att_compat
    6099         242 :       && !start_token.is_usable_as_delimiter(true /* report error */))
    6100          30 :     return false;
    6101         212 :   else if (want_att_compat
    6102         212 :            && !start_token.is_usable_as_delimiter(true,
    6103             :                   DELIMITER_ATT_NUMERIC_EXPRESSION)) {
    6104           0 :     warning(WARN_DELIM, "line-drawing escape sequence"
    6105             :             " does not accept %1 as a delimiter",
    6106           0 :             start_token.description());
    6107           0 :     return false;
    6108             :   }
    6109         212 :   int start_level = input_stack::get_level();
    6110         212 :   tok.next();
    6111         212 :   if (read_measurement(n, si)) {
    6112         212 :     if (tok.is_dummy() || tok.is_transparent_dummy())
    6113         149 :       tok.next();
    6114         256 :     if (!(start_token == tok
    6115          44 :           && input_stack::get_level() == start_level)) {
    6116         168 :       *cip = tok.get_charinfo(true /* required */);
    6117         168 :       if (0 /* nullptr */ == *cip)
    6118           0 :         assert(0 == "attempted to use token without charinfo in"
    6119             :                " line-drawing escape sequence");
    6120         168 :       tok.next();
    6121             :     }
    6122         424 :     if (!(start_token == tok
    6123         212 :           && input_stack::get_level() == start_level)) {
    6124             :       // token::description() writes to static, class-wide storage, so
    6125             :       // we must allocate a copy of it before issuing the next
    6126             :       // diagnostic.
    6127           0 :       char *delimdesc = strdup(start_token.description());
    6128           0 :       warning(WARN_DELIM, "closing delimiter does not match; expected"
    6129           0 :               " %1, got %2", delimdesc, tok.description());
    6130           0 :       free(delimdesc);
    6131             :     }
    6132         212 :     return true;
    6133             :   }
    6134           0 :   return false;
    6135             : }
    6136             : 
    6137        5972 : static bool read_size(int *x) // \s
    6138             : {
    6139        5972 :   tok.next();
    6140        5972 :   int c = tok.ch(); // safely compares to char literals; TODO: grochar
    6141        5972 :   int inc = 0;
    6142        5972 :   if (c == int('-')) { // TODO: grochar
    6143         209 :     inc = -1;
    6144         209 :     tok.next();
    6145         209 :     c = tok.ch();
    6146             :   }
    6147        5763 :   else if (c == int('+')) { // TODO: grochar
    6148         387 :     inc = 1;
    6149         387 :     tok.next();
    6150         387 :     c = tok.ch();
    6151             :   }
    6152        5972 :   int val = 0;          // pacify compiler
    6153        5972 :   bool contains_invalid_digit = false;
    6154        5972 :   if (c == int('(')) { // TODO: grochar
    6155           0 :     tok.next();
    6156           0 :     c = tok.ch();
    6157           0 :     if (!inc) {
    6158             :       // allow an increment either before or after the left parenthesis
    6159           0 :       if (c == int('-')) { // TODO: grochar
    6160           0 :         inc = -1;
    6161           0 :         tok.next();
    6162           0 :         c = tok.ch();
    6163             :       }
    6164           0 :       else if (c == int('+')) { // TODO: grochar
    6165           0 :         inc = 1;
    6166           0 :         tok.next();
    6167           0 :         c = tok.ch();
    6168             :       }
    6169             :     }
    6170           0 :     if (!csdigit(c))
    6171           0 :       contains_invalid_digit = true;
    6172             :     else {
    6173           0 :       val = c - '0';
    6174           0 :       tok.next();
    6175           0 :       c = tok.ch();
    6176           0 :       if (!csdigit(c))
    6177           0 :         contains_invalid_digit = true;
    6178             :       else {
    6179           0 :         val = val * 10 + (c - '0');
    6180           0 :         val *= sizescale;
    6181             :       }
    6182             :     }
    6183             :   }
    6184        5972 :   else if (csdigit(c)) {
    6185        1636 :     val = c - '0';
    6186        1636 :     if (want_att_compat && !inc && c != '0' && c < '4') {
    6187             :       // Support legacy \sNN syntax.
    6188           2 :       tok.next();
    6189           2 :       c = tok.ch();
    6190           2 :       if (!csdigit(c))
    6191           0 :         contains_invalid_digit = true;
    6192             :       else {
    6193           2 :         val = val * 10 + (c - '0');
    6194           2 :         error("ambiguous type size in escape sequence; rewrite to use"
    6195           2 :               " '%1s(%2' or similar", static_cast<char>(escape_char),
    6196           4 :               val);
    6197             :       }
    6198             :     }
    6199        1636 :     val *= sizescale;
    6200             :   }
    6201        4336 :   else if (!want_att_compat && !tok.is_usable_as_delimiter())
    6202           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
    6203           0 :                         " is deprecated", tok.description());
    6204        4336 :   else if (want_att_compat
    6205        4336 :            && !tok.is_usable_as_delimiter(false,
    6206             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
    6207           0 :     warning(WARN_DELIM, "type size escape sequence"
    6208             :             " does not accept %1 as a delimiter",
    6209           0 :             tok.description());
    6210           0 :     return false;
    6211             :   }
    6212             :   // TODO: groff 1.24.0 release + 2 years?
    6213             : #if 0
    6214             :   else if (!tok.is_usable_as_delimiter(true /* report error */))
    6215             :     return false;
    6216             : #endif
    6217             :   else {
    6218        4336 :     token start(tok);
    6219        4336 :     tok.next();
    6220        4336 :     c = tok.ch();
    6221        4336 :     if ((inc == 0) && ((c == '-') || (c == '+'))) {
    6222         471 :       inc = (c == '+') ? 1 : -1;
    6223         471 :       tok.next();
    6224             :     }
    6225        4336 :     if (!read_measurement(&val, (unsigned char)('z'))) // TODO: grochar
    6226           0 :       return false;
    6227             :     // safely compares to char literals; TODO: grochar
    6228        4336 :     int s = start.ch();
    6229        4336 :     int t = tok.ch();
    6230        4336 :     if (!((s == int('[')) && (t == int(']'))) && (start != tok)) {
    6231           0 :       if (s == int('['))
    6232           0 :         error("missing ']' in type size escape sequence");
    6233             :       else {
    6234             :         // token::description() writes to static, class-wide storage, so
    6235             :         // we must allocate a copy of it before issuing the next
    6236             :         // diagnostic.
    6237           0 :         char *delimdesc = strdup(start.description());
    6238           0 :         if (s != t)
    6239           0 :           error("closing delimiter does not match; expected %1, got %2",
    6240           0 :                 delimdesc, tok.description());
    6241           0 :         free(delimdesc);
    6242             :       }
    6243           0 :       return false;
    6244             :     }
    6245             :   }
    6246        5972 :   if (contains_invalid_digit) {
    6247           0 :     if (c != 0U)
    6248           0 :       error("expected valid digit in type size escape sequence, got %1",
    6249           0 :             input_char_description(c));
    6250             :     else
    6251           0 :       error("invalid digit in type size escape sequence");
    6252           0 :     return false;
    6253             :   }
    6254             :   else {
    6255        5972 :     switch (inc) {
    6256        4905 :     case 0:
    6257        4905 :       if (val == 0) {
    6258             :         // special case -- point size 0 means "revert to previous size"
    6259        1794 :         *x = 0;
    6260        1794 :         return true;
    6261             :       }
    6262        3111 :       *x = val;
    6263        3111 :       break;
    6264         480 :     case 1:
    6265         480 :       *x = curenv->get_requested_point_size() + val;
    6266         480 :       break;
    6267         587 :     case -1:
    6268         587 :       *x = curenv->get_requested_point_size() - val;
    6269         587 :       break;
    6270           0 :     default:
    6271           0 :       assert(0 == "unhandled case of type size increment operator");
    6272             :     }
    6273        4178 :     if (*x <= 0) {
    6274           1 :       warning(WARN_RANGE,
    6275             :               "type size escape sequence results in non-positive size"
    6276           1 :               " %1u; setting it to 1u", *x);
    6277           1 :       *x = 1;
    6278             :     }
    6279        4178 :     return true;
    6280             :   }
    6281             : }
    6282             : 
    6283          53 : static symbol read_delimited_identifier()
    6284             : {
    6285         106 :   token start_token;
    6286          53 :   start_token.next();
    6287          53 :   if (start_token.is_eof()) {
    6288           0 :     error("end of input at start of delimited name");
    6289           0 :     return NULL_SYMBOL;
    6290             :   }
    6291          53 :   bool is_valid = false;
    6292          53 :   if (!want_att_compat && start_token.is_usable_as_delimiter())
    6293          52 :     is_valid = true;
    6294           1 :   else if (want_att_compat
    6295           1 :            && start_token.is_usable_as_delimiter(false,
    6296             :                   DELIMITER_ATT_STRING_EXPRESSION))
    6297           0 :     is_valid = true;
    6298          53 :   if (!is_valid) {
    6299           1 :     warning(WARN_DELIM, "cannot use %1 to delimit an identifier",
    6300           1 :             start_token.description());
    6301           1 :     return NULL_SYMBOL;
    6302             :   }
    6303          52 :   int start_level = input_stack::get_level();
    6304          52 :   int buf_size = default_buffer_size;
    6305          52 :   char *buf = 0 /* nullptr */;
    6306             :   try {
    6307             :     // C++03: new char[buf_size]();
    6308          52 :     buf = new char[buf_size];
    6309             :   }
    6310           0 :   catch (const std::bad_alloc &e) {
    6311           0 :     fatal("cannot allocate %1 bytes to read input line", buf_size);
    6312             :   }
    6313          52 :   (void) memset(buf, 0, (buf_size * sizeof(char)));
    6314          52 :   int i = 0;
    6315             :   for (;;) {
    6316         682 :     if ((i + 1) > buf_size) {
    6317           0 :       char *old_buf = buf;
    6318           0 :       int new_buf_size = buf_size * 2;
    6319             :       // C++03: new char[new_buf_size]();
    6320             :       try {
    6321           0 :         buf = new char[new_buf_size];
    6322             :       }
    6323           0 :       catch (const std::bad_alloc &e) {
    6324           0 :         fatal("cannot allocate %1 bytes to read input line", buf_size);
    6325             :       }
    6326           0 :       (void) memset(buf, 0, (new_buf_size * sizeof(char)));
    6327           0 :       (void) memcpy(buf, old_buf, buf_size);
    6328           0 :       buf_size = new_buf_size;
    6329           0 :       delete[] old_buf;
    6330             :     }
    6331         682 :     tok.next();
    6332         682 :     if ((tok == start_token)
    6333         734 :         && (want_att_compat
    6334          52 :             || (input_stack::get_level() == start_level)))
    6335          52 :       break;
    6336         630 :     if ((buf[i] = tok.ch()) == 0U) {
    6337             :       // token::description() writes to static, class-wide storage, so
    6338             :       // we must allocate a copy of it before issuing the next
    6339             :       // diagnostic.
    6340           0 :       char *delimdesc = strdup(start_token.description());
    6341           0 :       if (start_token != tok)
    6342           0 :         error("closing delimiter does not match; expected %1, got %2",
    6343           0 :               delimdesc, tok.description());
    6344           0 :       free(delimdesc);
    6345           0 :       delete[] buf;
    6346           0 :       return NULL_SYMBOL;
    6347             :     }
    6348         630 :     i++;
    6349         630 :   }
    6350          52 :   buf[i] = '\0';
    6351          52 :   if (0 == i) {
    6352           0 :     error("empty delimited name");
    6353           0 :     return NULL_SYMBOL;
    6354             :   }
    6355          52 :   symbol s(buf);
    6356          52 :   delete[] buf;
    6357          52 :   return s;
    6358             : }
    6359             : 
    6360        6142 : static void do_register() // \R
    6361             : {
    6362        6142 :   token start_token;
    6363        6142 :   start_token.next();
    6364        6142 :   if (!want_att_compat && !start_token.is_usable_as_delimiter())
    6365           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
    6366           0 :                         " is deprecated", tok.description());
    6367        6142 :   else if (want_att_compat
    6368        6142 :            && !start_token.is_usable_as_delimiter(false,
    6369             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
    6370           0 :     warning(WARN_DELIM, "register assignment escape sequence"
    6371             :             " does not accept %1 as a delimiter",
    6372           0 :             start_token.description());
    6373           0 :     return;
    6374             :   }
    6375             :   // TODO: groff 1.24.0 release + 2 years?
    6376             : #if 0
    6377             :   if (!start_token.is_usable_as_delimiter(true /* report error */)) {
    6378             :     return;
    6379             : #endif
    6380        6142 :   tok.next();
    6381        6142 :   symbol nm = read_long_identifier(true /* required */);
    6382        6142 :   if (nm.is_null())
    6383           0 :     return;
    6384        6142 :   tok.skip_spaces();
    6385        6142 :   reg *r = static_cast<reg *>(register_dictionary.lookup(nm));
    6386             :   int prev_value;
    6387        6142 :   if ((0 /* nullptr */ == r) || !r->get_value(&prev_value))
    6388         176 :     prev_value = 0;
    6389             :   int val;
    6390             :   // TODO: grochar
    6391        6142 :   if (!read_measurement(&val, (unsigned char)('u'), prev_value))
    6392           0 :     return;
    6393             :   // token::description() writes to static, class-wide storage, so we
    6394             :   // must allocate a copy of it before issuing the next diagnostic.
    6395        6142 :   char *delimdesc = strdup(start_token.description());
    6396        6142 :   if (start_token != tok)
    6397           0 :     warning(WARN_DELIM, "closing delimiter does not match; expected %1,"
    6398           0 :             " got %2", delimdesc, tok.description());
    6399        6142 :   free(delimdesc);
    6400        6142 :   if (r != 0 /* nullptr */)
    6401        5966 :     r->set_value(val);
    6402             :   else
    6403         176 :     set_register(nm, val);
    6404             : }
    6405             : 
    6406       26009 : static void do_width() // \w
    6407             : {
    6408       26009 :   token start_token;
    6409       26009 :   start_token.next();
    6410       26009 :   if (!want_att_compat && !start_token.is_usable_as_delimiter())
    6411           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
    6412           0 :                         " is deprecated", start_token.description());
    6413       26009 :   else if (want_att_compat
    6414       26009 :            && !start_token.is_usable_as_delimiter(false,
    6415             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
    6416           0 :     warning(WARN_DELIM, "width computation escape sequence"
    6417             :             " does not accept %1 as a delimiter",
    6418           0 :             start_token.description());
    6419           0 :     return;
    6420             :   }
    6421             :   // TODO: groff 1.24.0 release + 2 years?
    6422             : #if 0
    6423             :   if (!start_token.is_usable_as_delimiter(true /* report error */))
    6424             :     return;
    6425             : #endif
    6426       26009 :   int start_level = input_stack::get_level();
    6427       26009 :   environment env(curenv);
    6428       26009 :   environment *oldenv = curenv;
    6429       26009 :   curenv = &env;
    6430             :   for (;;) {
    6431      244007 :     tok.next();
    6432      244007 :     if (tok.is_newline() || tok.is_eof()) {
    6433             :       // token::description() writes to static, class-wide storage, so
    6434             :       // we must allocate a copy of it before issuing the next
    6435             :       // diagnostic.
    6436           0 :       char *delimdesc = strdup(start_token.description());
    6437           0 :       warning(WARN_DELIM, "missing closing delimiter in width"
    6438             :               " computation escape sequence; expected %1, got %2",
    6439           0 :               delimdesc, tok.description());
    6440           0 :       free(delimdesc);
    6441           0 :       break;
    6442             :     }
    6443      244007 :     if (tok == start_token
    6444      244007 :         && (want_att_compat || input_stack::get_level() == start_level))
    6445       26009 :       break;
    6446      217998 :     tok.process();
    6447      217998 :   }
    6448       26009 :   env.wrap_up_tab();
    6449       26009 :   units x = env.get_input_line_position().to_units();
    6450       26009 :   input_stack::push(make_temp_iterator(i_to_a(x)));
    6451       26009 :   env.width_registers();
    6452       26009 :   curenv = oldenv;
    6453       26009 :   have_formattable_input = false;
    6454             : }
    6455             : 
    6456             : charinfo *page_character;
    6457             : 
    6458             : // XXX: The page character is global; shouldn't it be environmental?
    6459             : // Its idiomatic use is in `tl` requests when formatting titles (headers
    6460             : // or footers), which full-service macro packages typically put in their
    6461             : // own environment anyway to ensure that a consistent typeface is used
    6462             : // there regardless of how body text is styled.
    6463           0 : static void page_character_request()
    6464             : {
    6465           0 :   page_character = read_character();
    6466             :   // TODO?: If null pointer, set to `percent_symbol` (see below),
    6467             :   // eliminating test in `read_title_parts()` (also below)?
    6468           0 :   skip_line();
    6469           0 : }
    6470             : 
    6471             : static const symbol percent_symbol("%");
    6472             : 
    6473        3298 : void read_title_parts(node **part, hunits *part_width)
    6474             : {
    6475        3298 :   if (!has_arg())
    6476         188 :     return;
    6477        6220 :   token start(tok);
    6478        3110 :   if (!want_att_compat && !tok.is_usable_as_delimiter())
    6479           0 :     warning(WARN_DELIM, "using %1 as a title request delimiter"
    6480           0 :                         " is deprecated", tok.description());
    6481        3110 :   int start_level = input_stack::get_level();
    6482        3110 :   tok.next();
    6483       12440 :   for (int i = 0; i < 3; i++) {
    6484      122361 :     while (!tok.is_newline() && !tok.is_eof()) {
    6485      122361 :       if ((tok == start)
    6486      131691 :           && (want_att_compat
    6487        9330 :               || input_stack::get_level() == start_level)) {
    6488        9330 :         tok.next();
    6489        9330 :         break;
    6490             :       }
    6491      113031 :       charinfo *ci = tok.get_charinfo();
    6492             :       // It's okay for `ci` to be a null pointer; that will be the case
    6493             :       // if the token is a node: italic corrections, horizontal motions,
    6494             :       // and so forth.  TODO: Is it worth warning about some node types?
    6495      113031 :       if ((ci != 0 /* nullptr */)
    6496      102596 :           && (page_character != 0 /* nullptr */)
    6497      102596 :           && (page_character == ci))
    6498          88 :         interpolate_register(percent_symbol, 0);
    6499             :       else
    6500      112943 :         tok.process();
    6501      113031 :       tok.next();
    6502             :     }
    6503        9330 :     curenv->wrap_up_tab();
    6504        9330 :     part_width[i] = curenv->get_input_line_position();
    6505        9330 :     part[i] = curenv->extract_output_line();
    6506             :   }
    6507        3110 :   while (!tok.is_newline() && !tok.is_eof())
    6508           0 :     tok.next();
    6509             : }
    6510             : 
    6511             : // contents of `\?...\?`
    6512             : class non_interpreted_node : public node {
    6513             :   macro mac;
    6514             : public:
    6515             :   non_interpreted_node(const macro &);
    6516             :   bool interpret(macro *);
    6517             :   void asciify(macro *);
    6518             :   node *copy();
    6519             :   int ends_sentence();
    6520             :   bool is_same_as(node *);
    6521             :   const char *type();
    6522             :   bool causes_tprint();
    6523             :   bool is_tag();
    6524             : };
    6525             : 
    6526      457945 : non_interpreted_node::non_interpreted_node(const macro &m) : mac(m)
    6527             : {
    6528      457945 : }
    6529             : 
    6530         212 : int non_interpreted_node::ends_sentence()
    6531             : {
    6532         212 :   return 2;
    6533             : }
    6534             : 
    6535      228916 : bool non_interpreted_node::is_same_as(node *nd)
    6536             : {
    6537      228916 :   return (mac == static_cast<non_interpreted_node *>(nd)->mac);
    6538             : }
    6539             : 
    6540      457832 : const char *non_interpreted_node::type()
    6541             : {
    6542      457832 :   return "non-interpreted node";
    6543             : }
    6544             : 
    6545           0 : bool non_interpreted_node::causes_tprint()
    6546             : {
    6547           0 :   return false;
    6548             : }
    6549             : 
    6550           0 : bool non_interpreted_node::is_tag()
    6551             : {
    6552           0 :   return false;
    6553             : }
    6554             : 
    6555           0 : void non_interpreted_node::asciify(macro *)
    6556             : {
    6557           0 :   delete this;
    6558           0 : }
    6559             : 
    6560           0 : node *non_interpreted_node::copy()
    6561             : {
    6562           0 :   return new non_interpreted_node(mac);
    6563             : }
    6564             : 
    6565         107 : bool non_interpreted_node::interpret(macro *m)
    6566             : {
    6567         107 :   string_iterator si(mac);
    6568         107 :   node *n = 0 /* nullptr */;
    6569             :   for (;;) {
    6570        1382 :     int c = si.get(&n);
    6571        1382 :     if (c == EOF)
    6572         107 :       break;
    6573        1275 :     if (c == 0)
    6574           0 :       m->append(n);
    6575             :     else
    6576        1275 :       m->append(c);
    6577        1275 :   }
    6578         214 :   return true;
    6579             : }
    6580             : 
    6581      457945 : static node *do_non_interpreted() // \?
    6582             : {
    6583             :   node *n;
    6584             :   int c;
    6585      915890 :   macro mac;
    6586     1637909 :   while (((c = read_char_in_copy_mode(&n)) != ESCAPE_QUESTION)
    6587     1179964 :          && (c != EOF)
    6588     2817873 :          && (c != '\n'))
    6589     1179964 :     if (c == 0)
    6590           0 :       mac.append(n);
    6591             :     else
    6592     1179964 :       mac.append(c);
    6593      457945 :   if (c == EOF || c == '\n') {
    6594           0 :     error("unterminated transparent embedding escape sequence");
    6595           0 :     return 0 /* nullptr */;
    6596             :   }
    6597      457945 :   return new non_interpreted_node(mac);
    6598             : }
    6599             : 
    6600         246 : static void map_special_character_for_device_output(macro *mac,
    6601             :                                                     const char *sc)
    6602             : {
    6603         246 :   if (strcmp("-", sc) == 0)
    6604         178 :     mac->append('-');
    6605          68 :   else if (strcmp("dq", sc) == 0)
    6606           4 :     mac->append('"');
    6607          64 :   else if (strcmp("sh", sc) == 0)
    6608           1 :     mac->append('#');
    6609          63 :   else if (strcmp("Do", sc) == 0)
    6610           1 :     mac->append('$');
    6611          62 :   else if (strcmp("aq", sc) == 0)
    6612           4 :     mac->append('\'');
    6613          58 :   else if (strcmp("sl", sc) == 0)
    6614           1 :     mac->append('/');
    6615          57 :   else if (strcmp("at", sc) == 0)
    6616           1 :     mac->append('@');
    6617          56 :   else if (strcmp("lB", sc) == 0)
    6618           1 :     mac->append('[');
    6619          55 :   else if (strcmp("rs", sc) == 0)
    6620           8 :     mac->append('\\');
    6621          47 :   else if (strcmp("rB", sc) == 0)
    6622           1 :     mac->append(']');
    6623          46 :   else if (strcmp("ha", sc) == 0)
    6624           4 :     mac->append('^');
    6625          42 :   else if (strcmp("lC", sc) == 0)
    6626           1 :     mac->append('{');
    6627          41 :   else if (strcmp("ba", sc) == 0)
    6628           1 :     mac->append('|');
    6629          40 :   else if (strcmp("or", sc) == 0)
    6630           1 :     mac->append('|');
    6631          39 :   else if (strcmp("rC", sc) == 0)
    6632           1 :     mac->append('}');
    6633          38 :   else if (strcmp("ti", sc) == 0)
    6634           7 :     mac->append('~');
    6635             :   else {
    6636          31 :     if (font::use_charnames_in_special) {
    6637           3 :       if (sc[0] != '\0') {
    6638           3 :         mac->append('\\');
    6639           3 :         mac->append('[');
    6640           3 :         int i = 0;
    6641          15 :         while (sc[i] != '\0') {
    6642          12 :           mac->append(sc[i]);
    6643          12 :           i++;
    6644             :         }
    6645           3 :         mac->append(']');
    6646             :       }
    6647             :     }
    6648             :     else {
    6649             :       char errbuf[ERRBUFSZ]; // C++03: char errbuf[ERRBUFSZ]()
    6650          28 :       (void) memset(errbuf, '\0', ERRBUFSZ);
    6651          28 :       const size_t unibufsz = UNIBUFSZ + 1 /* '\0' */;
    6652             :       char character[unibufsz]; // C++03: char errbuf[ERRBUFSZ]()
    6653          28 :       (void) memset(character, '\0', UNIBUFSZ);
    6654             :       // If it looks like something other than an attempt at a Unicode
    6655             :       // special character escape sequence already, try to convert it
    6656             :       // into one.  Output drivers don't (and shouldn't) know anything
    6657             :       // about a troff formatter's special character identifiers.
    6658          28 :       if ((strlen(sc) < 3) || (sc[0] != 'u')) {
    6659           9 :         const char *un = glyph_name_to_unicode(sc);
    6660           9 :         if (un != 0 /* nullptr */)
    6661           8 :           strncpy(character, un, unibufsz);
    6662             :         else {
    6663           1 :           warning(WARN_CHAR, "special character '%1' is not encodable"
    6664           1 :                " in device-independent output", sc);
    6665           2 :           return;
    6666           8 :         }
    6667             :       }
    6668             :       else {
    6669          19 :         const char *un = valid_unicode_code_sequence(sc, errbuf);
    6670          19 :         if (0 /* nullptr */ == un) {
    6671           1 :           warning(WARN_CHAR, "special character '%1' is not encodable"
    6672           1 :                " in device-independent output: %2", sc, errbuf);
    6673           1 :           return;
    6674             :         }
    6675          18 :         strncpy(character, un, unibufsz);
    6676             :       }
    6677          26 :       mac->append_str("\\[u");
    6678          26 :       mac->append_str(character);
    6679          26 :       mac->append(']');
    6680             :     }
    6681             :   }
    6682             : }
    6683             : 
    6684         224 : static void encode_special_character_for_device_output(macro *mac)
    6685             : {
    6686             :   const char *sc;
    6687         224 :   charinfo *ci = tok.get_charinfo(true /* required */);
    6688         224 :   if (0 /* nullptr */ == ci) {
    6689           0 :     assert(0 == "attempted to encode token without charinfo for"
    6690             :            " device extension command output");
    6691             :     return;
    6692             :   }
    6693         224 :   sc = ci->get_symbol()->contents();
    6694         224 :   if (0 /* nullptr */ == sc) {
    6695           0 :     assert(0 == "attempted to encode token containing charinfo with"
    6696             :            " null symbol for device extension command output");
    6697             :     return;
    6698             :   }
    6699         224 :   map_special_character_for_device_output(mac, sc);
    6700             : }
    6701             : 
    6702             : // In troff output, we translate the escape character to '\', but it is
    6703             : // up to the postprocessor to interpret it as such.  (This mostly
    6704             : // matters for device extension commands.)
    6705     1710337 : static void encode_character_for_device_output(macro *mac, const char c)
    6706             : {
    6707     1710337 :   if ('\0' == c) {
    6708             :     // It's a special token, not a character we can write as-is.
    6709         226 :     if (tok.is_stretchable_space()
    6710         226 :              || tok.is_unstretchable_space())
    6711           1 :       mac->append(' ');
    6712         225 :     else if ((tok.is_hyphen_indicator())
    6713         225 :              || tok.is_zero_width_break()
    6714         225 :              || tok.is_dummy()
    6715         450 :              || tok.is_transparent_dummy())
    6716             :       /* do nothing */;
    6717         225 :     else if (tok.is_special_character())
    6718         224 :       encode_special_character_for_device_output(mac);
    6719             :     else
    6720           1 :       warning(WARN_CHAR, "%1 is not encodable in device-independent"
    6721           2 :               " output ('asciify' might help)", tok.description());
    6722             :   }
    6723             :   else {
    6724     1710111 :     if (c == escape_char)
    6725          18 :       mac->append('\\');
    6726             :     else
    6727     1710093 :       mac->append(c);
    6728             :   }
    6729     1710337 : }
    6730             : 
    6731       70842 : static node *do_device_extension() // \X
    6732             : {
    6733      141684 :   token start_token;
    6734       70842 :   start_token.next();
    6735       70842 :   if (!want_att_compat && !start_token.is_usable_as_delimiter())
    6736           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
    6737           0 :                         " is deprecated", tok.description());
    6738       70842 :   else if (want_att_compat
    6739       70842 :            && !start_token.is_usable_as_delimiter(false,
    6740             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
    6741           0 :     warning(WARN_DELIM, "device extension command escape sequence"
    6742             :             " does not accept %1 as a delimiter",
    6743           0 :             start_token.description());
    6744           0 :     return 0 /* nullptr */;
    6745             :   }
    6746             :   // TODO: groff 1.24.0 release + 2 years?
    6747             : #if 0
    6748             :   if (!start_token.is_usable_as_delimiter(true /* report error */))
    6749             :     return 0 /* nullptr */;
    6750             : #endif
    6751       70842 :   int start_level = input_stack::get_level();
    6752       70842 :   macro mac;
    6753       70842 :   if ((curdiv == topdiv) && (topdiv->before_first_page_status > 0))
    6754          70 :     topdiv->begin_page();
    6755             :   for (;;) {
    6756     1783501 :     tok.next();
    6757     1783501 :     if (tok.is_newline() || tok.is_eof()) {
    6758             :       // token::description() writes to static, class-wide storage, so
    6759             :       // we must allocate a copy of it before issuing the next
    6760             :       // diagnostic.
    6761          14 :       char *delimdesc = strdup(start_token.description());
    6762          14 :       warning(WARN_DELIM, "missing closing delimiter in device"
    6763             :               " extension escape sequence; expected %1, got %2",
    6764          14 :               delimdesc, tok.description());
    6765          14 :       free(delimdesc);
    6766          14 :       break;
    6767             :     }
    6768     1783487 :     if (tok == start_token
    6769     1783487 :         && (want_att_compat || input_stack::get_level() == start_level))
    6770       70828 :       break;
    6771             :     unsigned char c; // TODO: grochar
    6772     1712659 :     if (tok.is_space())
    6773      242705 :       c = ' ';
    6774             :     // TODO: Stop silently ignoring these when we have a string
    6775             :     // iterator for users and can externalize "sanitization" operations.
    6776             :     // See <https://savannah.gnu.org/bugs/?62264>.
    6777     1469954 :     else if (tok.is_hyphen_indicator())
    6778         797 :       continue;
    6779     1469157 :     else if (tok.is_dummy())
    6780           3 :       continue;
    6781     1469154 :     else if (tok.is_zero_width_break())
    6782        1522 :       continue;
    6783             :     else
    6784     1467632 :       c = tok.ch();
    6785     1710337 :     encode_character_for_device_output(&mac, c);
    6786     1712659 :   }
    6787       70842 :   return new device_extension_node(mac);
    6788             : }
    6789             : 
    6790         206 : static void device_request()
    6791             : {
    6792         206 :   if (!has_arg(true /* peek; we want to read in copy mode */)) {
    6793           0 :     warning(WARN_MISSING, "device extension request expects an"
    6794             :             " argument");
    6795           0 :     skip_line();
    6796           0 :     return;
    6797             :   }
    6798         412 :   macro mac;
    6799             :   int c;
    6800             :   for (;;) {
    6801         206 :     c = read_char_in_copy_mode(0 /* nullptr */);
    6802         206 :     if ('"' == c) {
    6803          14 :       c = read_char_in_copy_mode(0 /* nullptr */);
    6804          14 :       break;
    6805             :     }
    6806         192 :     if (c != ' ' && c != '\t')
    6807         192 :       break;
    6808             :   }
    6809         206 :   if ((curdiv == topdiv) && (topdiv->before_first_page_status > 0))
    6810          37 :     topdiv->begin_page();
    6811        4748 :   for (;
    6812        4954 :       (c != '\0') && (c != '\n') && (c != EOF);
    6813        4748 :        c = read_char_in_copy_mode(0 /* nullptr */)) {
    6814             :     // We may encounter some of the C0 and C1 character codes GNU troff
    6815             :     // uses for special purposes; see src/roff/troff/input.h.  They
    6816             :     // produce nothing in grout.  Warn only about the ones that are left
    6817             :     // for the user's purposes.  Use octal because input.h does.  Ignore
    6818             :     // 8-bit codes in general.  grout is an ISO 646 file format.
    6819        4748 :     if (ESCAPE_TILDE == c) {
    6820           1 :       mac.append('\\');
    6821           1 :       mac.append('~');
    6822             :     }
    6823        4747 :     else if ((c < 015) || (c >= 0177))
    6824           1 :       warning (WARN_SYNTAX, "ignoring character code %1 in device"
    6825           2 :                " extension command request argument", c);
    6826        4746 :     else if (c != '\\')
    6827        4720 :       mac.append(c);
    6828             :     else {
    6829          26 :       int c1 = read_char_in_copy_mode(0 /* nullptr */);
    6830          26 :       if (c1 != '[') {
    6831           3 :         mac.append(c);
    6832           3 :         mac.append(c1);
    6833           3 :         string chardesc = "";
    6834           3 :         if (csprint(c1)) {
    6835           3 :           chardesc += "'";
    6836           3 :           chardesc += char(c1);
    6837           3 :           chardesc += "'";
    6838             :         }
    6839             :         else {
    6840           0 :           chardesc += "character code ";
    6841           0 :           chardesc += i_to_a(c1);
    6842             :         }
    6843           3 :         chardesc += '\0'; // make it safe for .contents()
    6844           3 :         warning (WARN_SYNTAX, "not interpreting escaped %1 in device"
    6845             :                    " extension command request argument",
    6846           6 :                    chardesc.contents());
    6847             :       }
    6848             :       else {
    6849             :         // Does the input resemble a valid (bracket-form) special
    6850             :         // character escape sequence?
    6851          23 :         bool is_valid = false;
    6852          46 :         string sc = "";
    6853          23 :         int c2 = read_char_in_copy_mode(0 /* nullptr */);
    6854         126 :         for (; (c2 != '\0') && (c2 != '\n') && (c2 != EOF);
    6855         103 :              c2 = read_char_in_copy_mode(0 /* nullptr */)) {
    6856             :           // XXX: `map_special_character_for_device_output()` will need
    6857             :           // the closing bracket in the iterator we construct, but a
    6858             :           // composite character mapping mustn't see it.
    6859         126 :           sc += c2;
    6860         126 :           if (']' == c2) {
    6861          23 :             is_valid = true;
    6862          23 :             break;
    6863             :           }
    6864             :         }
    6865          23 :         sc += '\0';
    6866          23 :         if (sc.search(' ') > 0) {
    6867             :           // XXX: TODO
    6868           1 :           error("composite special character escape sequences not yet"
    6869             :                 " supported in device extension command arguments");
    6870           1 :           is_valid = false;
    6871             :         }
    6872          23 :         if (is_valid) {
    6873          22 :           input_stack::push(make_temp_iterator(sc.contents()));
    6874          22 :           symbol s = read_long_escape_parameters(WITH_ARGS);
    6875          22 :           map_special_character_for_device_output(&mac, s.contents());
    6876             :         }
    6877             :         else {
    6878             :           // We couldn't make sense of it.  Write it out as-is.
    6879           1 :           mac.append(c);
    6880           1 :           mac.append(c1);
    6881           1 :           mac.append_str(sc.contents());
    6882             :         }
    6883             :       }
    6884             :     }
    6885             :   }
    6886         206 :   curenv->add_node(new device_extension_node(mac));
    6887         206 :   tok.next();
    6888             : }
    6889             : 
    6890           1 : static void device_macro_request()
    6891             : {
    6892           1 :   symbol s = read_identifier(true /* required */);
    6893           1 :   if (!(s.is_null() || s.is_empty())) {
    6894           1 :     request_or_macro *p = lookup_request(s);
    6895           1 :     macro *m = p->to_macro();
    6896           1 :     if (m != 0 /* nullptr */)
    6897           1 :       curenv->add_node(new device_extension_node(*m));
    6898             :     else
    6899           0 :       error("cannot interpolate '%1' to device-independent output;"
    6900           0 :             " it is a request, not a macro", s.contents());
    6901             :   }
    6902           1 :   skip_line();
    6903           1 : }
    6904             : 
    6905          44 : static void output_request()
    6906             : {
    6907          44 :   if (!has_arg(true /* peek; we want to read in copy mode */)) {
    6908           0 :     warning(WARN_MISSING, "output request expects arguments");
    6909           0 :     skip_line();
    6910           0 :     return;
    6911             :   }
    6912             :   int c;
    6913             :   for (;;) {
    6914          44 :     c = read_char_in_copy_mode(0 /* nullptr */);
    6915          44 :     if ('"' == c) {
    6916          14 :       c = read_char_in_copy_mode(0 /* nullptr */);
    6917          14 :       break;
    6918             :     }
    6919          30 :     if (c != ' ' && c != '\t')
    6920          30 :       break;
    6921             :   }
    6922         314 :   for (;
    6923         358 :        (c != '\n') && (c != EOF);
    6924         314 :        (c = read_char_in_copy_mode(0 /* nullptr */)))
    6925         314 :     topdiv->transparent_output(c);
    6926          44 :   topdiv->transparent_output('\n');
    6927          44 :   tok.next();
    6928             : }
    6929             : 
    6930             : extern int image_no;            // from node.cpp
    6931             : 
    6932         757 : static node *do_suppress(symbol nm) // \O
    6933             : {
    6934         757 :   if (nm.is_null() || nm.is_empty()) {
    6935           0 :     error("output suppression escape sequence requires an argument");
    6936           0 :     return 0 /* nullptr */;
    6937             :   }
    6938         757 :   const char *s = nm.contents();
    6939         757 :   switch (*s) {
    6940         141 :   case '0':
    6941         141 :     if (0 == suppression_level)
    6942             :       // suppress generation of glyphs
    6943         135 :       return new suppress_node(0, 0);
    6944           6 :     break;
    6945         124 :   case '1':
    6946         124 :     if (0 == suppression_level)
    6947             :       // enable generation of glyphs
    6948         118 :       return new suppress_node(1, 0);
    6949           6 :     break;
    6950         123 :   case '2':
    6951         123 :     if (0 == suppression_level)
    6952         117 :       return new suppress_node(1, 1);
    6953           6 :     break;
    6954         123 :   case '3':
    6955         123 :     have_formattable_input = true;
    6956         123 :     suppression_level++;
    6957         123 :     break;
    6958         123 :   case '4':
    6959         123 :     have_formattable_input = true;
    6960         123 :     suppression_level--;
    6961         123 :     break;
    6962         123 :   case '5':
    6963             :     {
    6964         123 :       s++;                      // move over '5'
    6965         123 :       char position = *s;
    6966         123 :       if ('\0' == *s) {
    6967           0 :         error("missing position and file name in output suppression"
    6968             :               " escape sequence");
    6969           0 :         return 0 /* nullptr */;
    6970             :       }
    6971         123 :       if ((position != 'l')
    6972         119 :           && (position != 'r')
    6973         119 :           && (position != 'c')
    6974           5 :           && (position != 'i')) {
    6975           0 :         error("expected position 'l', 'r', 'c', or 'i' in output"
    6976           0 :               " suppression escape sequence, got '%1'", position);
    6977           0 :         return 0 /* nullptr */;
    6978             :       }
    6979         123 :       s++;                      // onto image name
    6980         123 :       if (0 == s /* nullptr */) {
    6981           0 :         error("missing image name in output suppression escape"
    6982             :               " sequence");
    6983           0 :         return 0 /* nullptr */;
    6984             :       }
    6985         123 :       image_no++;
    6986         123 :       if (0 == suppression_level)
    6987         117 :         return new suppress_node(symbol(s), position, image_no);
    6988             :       else
    6989           6 :         have_formattable_input = true;
    6990             :     }
    6991           6 :     break;
    6992           0 :   default:
    6993           0 :     char qc = '\'';
    6994           0 :     if (strchr(s, '\'') != 0 /* nullptr */)
    6995           0 :       qc = '"';
    6996           0 :     error("invalid argument %1%2%3 to output suppression escape"
    6997           0 :           " sequence", qc, *s, qc);
    6998             :   }
    6999         270 :   return 0 /* nullptr */;
    7000             : }
    7001             : 
    7002       66977 : void device_extension_node::tprint(troff_output_file *out)
    7003             : {
    7004       66977 :   tprint_start(out);
    7005      133954 :   string_iterator iter(mac);
    7006             :   for (;;) {
    7007     1583415 :     int c = iter.get(0 /* nullptr */);
    7008     1583415 :     if (c != EOF)
    7009     3032883 :       for (const char *s = encode_for_stream_output(c);
    7010     3032883 :            *s != 0 /* nullptr */;
    7011             :            s++)
    7012     1516445 :         tprint_char(out, *s);
    7013             :     else
    7014       66977 :       break;
    7015     1516438 :   }
    7016       66977 :   tprint_end(out);
    7017       66977 : }
    7018             : 
    7019         208 : int get_file_line(const char **filename, int *lineno)
    7020             : {
    7021         208 :   return input_stack::get_location(false /* allow macro */, filename,
    7022         208 :                                    lineno);
    7023             : }
    7024             : 
    7025       22852 : void line_file()
    7026             : {
    7027             :   int n;
    7028       22852 :   if (read_integer(&n)) {
    7029       22852 :     if (has_arg(true /* peek */)) {
    7030        1397 :       const char *reported_file_name = read_rest_of_line_as_argument();
    7031        1397 :       (void) input_stack::set_location(reported_file_name, (n - 1));
    7032             :       // TODO: Add `reported_file_name` to file name set.
    7033        1397 :       tok.next();
    7034        1397 :       return;
    7035             :     }
    7036       21455 :     (void) input_stack::set_location(0 /* nullptr */, (n - 1));
    7037             :   }
    7038       21455 :   skip_line();
    7039             : }
    7040             : 
    7041        1140 : static void nroff_request()
    7042             : {
    7043        1140 :   in_nroff_mode = true;
    7044        1140 :   skip_line();
    7045        1140 : }
    7046             : 
    7047           0 : static void troff_request()
    7048             : {
    7049           0 :   in_nroff_mode = false;
    7050           0 :   skip_line();
    7051           0 : }
    7052             : 
    7053     2593683 : static void skip_branch()
    7054             : {
    7055     2593683 :   if (tok.is_newline()) {
    7056           2 :     tok.next();
    7057           2 :     return;
    7058             :   }
    7059     2593681 :   int level = 0;
    7060             :   // ensure that ".if 0\{" works as expected
    7061     2593681 :   if (tok.is_left_brace())
    7062           3 :     level++;
    7063             :   int c;
    7064             :   for (;;) {
    7065   270236480 :     c = input_stack::get(0 /* nullptr */);
    7066   270236480 :     if (c == EOF)
    7067           6 :       break;
    7068   270236474 :     if (c == ESCAPE_LEFT_BRACE)
    7069     2157257 :       ++level;
    7070   268079217 :     else if (c == ESCAPE_RIGHT_BRACE)
    7071     2171202 :       --level;
    7072   265908015 :     else if ((c == escape_char) && (escape_char != 0U))
    7073    12725143 :       switch (input_stack::get(0 /* nullptr */)) {
    7074      162951 :       case '{':
    7075      162951 :         ++level;
    7076      162951 :         break;
    7077      174139 :       case '}':
    7078      174139 :         --level;
    7079      174139 :         break;
    7080     2270297 :       case '"':
    7081     2270297 :         while ((c = input_stack::get(0 /* nullptr */)) != '\n'
    7082     2270297 :                 && c != EOF)
    7083             :           ;
    7084             :       }
    7085             :     /*
    7086             :       Note that the level can properly be < 0, e.g.
    7087             : 
    7088             :         .if 1 \{\
    7089             :         .if 0 \{\
    7090             :         .\}\}
    7091             : 
    7092             :       So don't give an error message in this case.
    7093             :     */
    7094   270236474 :     if (level <= 0 && c == '\n')
    7095     2593675 :       break;
    7096             :   }
    7097     2593681 :   tok.next();
    7098             : }
    7099             : 
    7100     6627912 : static void take_branch()
    7101             : {
    7102     6627912 :   while (tok.is_space() || tok.is_left_brace())
    7103     4257840 :     tok.next();
    7104     2370072 : }
    7105             : 
    7106      296216 : static void nop_request()
    7107             : {
    7108      296216 :   tok.skip_spaces();
    7109      296216 : }
    7110             : 
    7111             : // Perform a (formatted) output comparison operation, as found in
    7112             : //   .if 'foo'bar'
    7113             : // ...for example.
    7114     1379893 : static bool are_comparands_equal()
    7115             : {
    7116     2759786 :   token delim = tok;
    7117     1379893 :   int delim_level = input_stack::get_level();
    7118     2759786 :   environment env1(curenv);
    7119     2759786 :   environment env2(curenv);
    7120     1379893 :   environment *oldenv = curenv;
    7121     1379893 :   curenv = &env1;
    7122     1379893 :   suppress_push = true;
    7123     4139679 :   for (int i = 0; i < 2; i++) {
    7124             :     for (;;) {
    7125    12337240 :       tok.next();
    7126    12337240 :       if (tok.is_newline() || tok.is_eof()) {
    7127             :         // token::description() writes to static, class-wide storage,
    7128             :         // so we must allocate a copy of it before issuing the next
    7129             :         // diagnostic.
    7130           0 :         char *delimdesc = strdup(delim.description());
    7131           0 :         warning(WARN_DELIM, "missing closing delimiter in output"
    7132             :                 " comparison operator; expected %1, got %2",
    7133           0 :                 delimdesc, tok.description());
    7134           0 :         free(delimdesc);
    7135           0 :         tok.next();
    7136           0 :         curenv = oldenv;
    7137           0 :         return false;
    7138             :       }
    7139    12337240 :       if ((tok == delim)
    7140    15202848 :           && (want_att_compat
    7141     2865608 :               || (input_stack::get_level() == delim_level)))
    7142     2759786 :         break;
    7143     9577454 :       tok.process();
    7144     9577454 :     }
    7145     2759786 :     curenv = &env2;
    7146             :   }
    7147     1379893 :   node *n1 = env1.extract_output_line();
    7148     1379893 :   node *n2 = env2.extract_output_line();
    7149     1379893 :   bool result = same_node_list(n1, n2);
    7150     1379893 :   delete_node_list(n1);
    7151     1379893 :   delete_node_list(n2);
    7152     1379893 :   curenv = oldenv;
    7153     1379893 :   have_formattable_input = false;
    7154     1379893 :   suppress_push = false;
    7155     1379893 :   tok.next();
    7156     1379893 :   return result;
    7157             : }
    7158             : 
    7159             : static std::stack<bool> if_else_stack;
    7160             : 
    7161     4444332 : static bool is_conditional_expression_true()
    7162             : {
    7163     4444332 :   bool perform_output_comparison = false;
    7164     4444332 :   bool want_test_sense_inverted = false;
    7165     4444332 :   tok.skip_spaces();
    7166     5176838 :   while (tok.ch() == int('!')) { // TODO: grochar
    7167      732506 :     tok.next();
    7168      732506 :     want_test_sense_inverted = !want_test_sense_inverted;
    7169             :   }
    7170             :   bool result;
    7171     4444332 :   int c = tok.ch(); // safely compares to char literals; TODO: grochar
    7172     4444332 :   if (want_att_compat)
    7173         427 :     switch (c) {
    7174           7 :     case int('F'): // TODO: grochar
    7175             :     case int('S'): // TODO: grochar
    7176             :     case int('c'): // TODO: grochar
    7177             :     case int('d'): // TODO: grochar
    7178             :     case int('m'): // TODO: grochar
    7179             :     case int('r'): // TODO: grochar
    7180             :     case int('v'): // TODO: grochar
    7181           7 :       warning(WARN_SYNTAX,
    7182             :               "conditional expression operator '%1' is not portable to"
    7183             :               " AT&T troff",
    7184           7 :               char(c));
    7185             :               // TODO: "; treating as output comparison delimiter", c);
    7186           7 :       break;
    7187         420 :     default:
    7188         420 :       break;
    7189             :     }
    7190     4444332 :   if (c == int('t')) { // TODO: grochar
    7191       58889 :     tok.next();
    7192       58889 :     result = !in_nroff_mode;
    7193             :   }
    7194     4385443 :   else if (c == int('n')) { // TODO: grochar
    7195        7683 :     tok.next();
    7196        7683 :     result = in_nroff_mode;
    7197             :   }
    7198     4377760 :   else if (c == int('o')) { // TODO: grochar
    7199         892 :     result = (topdiv->get_page_number() & 1); // TODO: dump cleverness
    7200         892 :     tok.next();
    7201             :   }
    7202     4376868 :   else if (c == int('e')) { // TODO: grochar
    7203         397 :     result = !(topdiv->get_page_number() & 1); // TODO: dump cleverness
    7204         397 :     tok.next();
    7205             :   }
    7206             :   // TODO: else if (!want_att_compat) {
    7207             :   // Check for GNU troff extended conditional expression operators.
    7208     4376471 :   else if ((c == int('d') || (c == int('r')))) { // TODO: grochar
    7209      492495 :     tok.next();
    7210      492495 :     symbol nm = read_identifier(true /* required */);
    7211      492495 :     if (nm.is_null()) {
    7212           0 :       skip_branch();
    7213           0 :       return false;
    7214             :     }
    7215      492495 :     result = ((c == 'd')
    7216      492495 :               ? request_dictionary.lookup(nm) != 0 /* nullptr */
    7217      492495 :               : register_dictionary.lookup(nm) != 0 /* nullptr */);
    7218             :   }
    7219     3883976 :   else if (c == 'm') {
    7220        5956 :     tok.next();
    7221        5956 :     symbol nm = read_long_identifier(true /* required */);
    7222        5956 :     if (nm.is_null()) {
    7223           0 :       skip_branch();
    7224           0 :       return false;
    7225             :     }
    7226       11912 :     result = ((nm == default_symbol)
    7227        5956 :               || color_dictionary.lookup(nm) != 0 /* nullptr */);
    7228             :   }
    7229     3878020 :   else if (c == 'c') {
    7230       26117 :     tok.next();
    7231       26117 :     tok.skip_spaces();
    7232             :     // XXX: Mystery: the presence of a character (fortunately) doesn't
    7233             :     // create it if nonexistent even though the default second argument
    7234             :     // to `token::get_charinfo()` (`suppress_creation`) is `false` (see
    7235             :     // "token.h").  Why?
    7236       26117 :     charinfo *ci = tok.get_charinfo(true /* required */);
    7237       26117 :     if (0 == ci /* nullptr */) {
    7238           0 :       skip_branch();
    7239           0 :       return false;
    7240             :     }
    7241       26117 :     result = character_exists(ci, curenv);
    7242       26117 :     tok.next();
    7243             :   }
    7244     3851903 :   else if (c == 'F') {
    7245        7940 :     tok.next();
    7246        7940 :     symbol nm = read_long_identifier(true /* required */);
    7247        7940 :     if (nm.is_null()) {
    7248           0 :       skip_branch();
    7249           0 :       return false;
    7250             :     }
    7251        7940 :     result = is_font_name(curenv->get_family()->nm, nm);
    7252             :   }
    7253     3843963 :   else if (c == 'S') {
    7254           2 :     tok.next();
    7255           2 :     symbol nm = read_long_identifier(true /* required */);
    7256           2 :     if (nm.is_null()) {
    7257           0 :       skip_branch();
    7258           0 :       return false;
    7259             :     }
    7260           2 :     result = is_abstract_style(nm);
    7261             :   }
    7262             :   // vtroff extension
    7263     3843961 :   else if (c == 'v') {
    7264           2 :     tok.next();
    7265           2 :     result = false;
    7266             :   }
    7267     3843959 :   else if (tok.is_space())
    7268           0 :     result = false;
    7269     7687918 :   else if (!want_att_compat
    7270     3843959 :            && tok.is_usable_as_delimiter())
    7271     1379829 :     perform_output_comparison = true;
    7272     2464130 :   else if (want_att_compat
    7273     2464130 :            && tok.is_usable_as_delimiter(false /* report error */,
    7274             :                   DELIMITER_ATT_OUTPUT_COMPARISON_EXPRESSION))
    7275          64 :     perform_output_comparison = true;
    7276             :   else {
    7277             :     // Evaluate numeric expression.
    7278             :     units n;
    7279     2464066 :     if (!read_measurement(&n, (unsigned char)('u'))) { // TODO: grochar
    7280          15 :       skip_branch();
    7281          15 :       return false;
    7282             :     }
    7283             :     else
    7284     2464051 :       result = (n > 0);
    7285             :   }
    7286     4444317 :   if (perform_output_comparison)
    7287     1379893 :     result = are_comparands_equal();
    7288     4444317 :   if (want_test_sense_inverted)
    7289      732505 :     result = !result;
    7290     4444317 :   if (result)
    7291     2021476 :     take_branch();
    7292             :   else
    7293     2422841 :     skip_branch();
    7294     4444317 :   return result;
    7295             : }
    7296             : 
    7297      571975 : static void if_else_request()
    7298             : {
    7299      571975 :   if (!has_arg()) {
    7300           0 :     warning(WARN_MISSING, "if-else request expects arguments");
    7301           0 :     skip_line();
    7302           0 :     return;
    7303             :   }
    7304      571975 :   if_else_stack.push(is_conditional_expression_true());
    7305             : }
    7306             : 
    7307     2977289 : static void if_request()
    7308             : {
    7309     2977289 :   if (!has_arg()) {
    7310           0 :     warning(WARN_MISSING, "if-then request expects arguments");
    7311           0 :     skip_line();
    7312           0 :     return;
    7313             :   }
    7314     2977289 :   (void) is_conditional_expression_true();
    7315             : }
    7316             : 
    7317      519423 : static void else_request()
    7318             : {
    7319      519423 :   if (if_else_stack.empty())
    7320           0 :     skip_branch();
    7321             :   else {
    7322      519423 :     bool predicate = if_else_stack.top();
    7323      519423 :     if_else_stack.pop();
    7324      519423 :     if (predicate)
    7325      170827 :       skip_branch();
    7326             :     else
    7327      348596 :       take_branch();
    7328             :   }
    7329      519423 : }
    7330             : 
    7331             : static int while_depth = 0;
    7332             : static bool want_loop_break = false;
    7333             : 
    7334      210348 : static void while_request()
    7335             : {
    7336      210348 :   if (!has_arg(true /* peek */)) {
    7337           0 :     warning(WARN_MISSING, "while loop request expects arguments");
    7338           0 :     skip_line();
    7339           0 :     return;
    7340             :   }
    7341      420696 :   macro mac;
    7342      210348 :   bool is_char_escaped = false;
    7343      210348 :   int level = 0;
    7344      210348 :   mac.append(new token_node(tok));
    7345             :   for (;;) {
    7346    71903488 :     node *n = 0 /* nullptr */;
    7347    71903488 :     int c = input_stack::get(&n);
    7348    71903488 :     if (c == EOF)
    7349           0 :       break;
    7350    71903488 :     if (c == 0) {
    7351           0 :       is_char_escaped = false;
    7352           0 :       mac.append(n);
    7353             :     }
    7354    71903488 :     else if (is_char_escaped) {
    7355     3485605 :       if (c == '{')
    7356        7656 :         level += 1;
    7357     3477949 :       else if (c == '}')
    7358        7656 :         level -= 1;
    7359     3485605 :       is_char_escaped = false;
    7360     3485605 :       mac.append(c);
    7361             :     }
    7362             :     else {
    7363    68417883 :       if (c == ESCAPE_LEFT_BRACE)
    7364      739276 :         level += 1;
    7365    67678607 :       else if (c == ESCAPE_RIGHT_BRACE)
    7366      739276 :         level -= 1;
    7367    66939331 :       else if (c == escape_char)
    7368     3485605 :         is_char_escaped = true;
    7369    68417883 :       mac.append(c);
    7370    68417883 :       if (c == '\n' && level <= 0)
    7371      210348 :         break;
    7372             :     }
    7373    71693140 :   }
    7374      210348 :   if (level != 0)
    7375           0 :     error("unbalanced brace escape sequences");
    7376             :   else {
    7377      210348 :     while_depth++;
    7378      210348 :     input_stack::add_boundary();
    7379             :     for (;;) {
    7380      895068 :       input_stack::push(new string_iterator(mac, "while loop"));
    7381      895068 :       tok.next();
    7382      895068 :       if (!is_conditional_expression_true()) {
    7383      188000 :         while (input_stack::get(0 /* nullptr */) != EOF)
    7384             :           ;
    7385      188000 :         break;
    7386             :       }
    7387      707068 :       process_input_stack();
    7388      707068 :       if (want_loop_break || input_stack::is_return_boundary()) {
    7389       22348 :         want_loop_break = false;
    7390       22348 :         break;
    7391             :       }
    7392             :     }
    7393      210348 :     input_stack::remove_boundary();
    7394      210348 :     while_depth--;
    7395             :   }
    7396      210348 :   tok.next();
    7397             : }
    7398             : 
    7399        9148 : static void while_break_request()
    7400             : {
    7401        9148 :   if (!while_depth) {
    7402           0 :     error("cannot 'break' when not in a 'while' loop");
    7403           0 :     skip_line();
    7404             :   }
    7405             :   else {
    7406        9148 :     want_loop_break = true;
    7407      422554 :     while (input_stack::get(0 /* nullptr */) != EOF)
    7408             :       ;
    7409        9148 :     tok.next();
    7410             :   }
    7411        9148 : }
    7412             : 
    7413       63602 : static void while_continue_request()
    7414             : {
    7415       63602 :   if (!while_depth) {
    7416           0 :     error("cannot 'continue' when not in a 'while' loop");
    7417           0 :     skip_line();
    7418             :   }
    7419             :   else {
    7420    40051839 :     while (input_stack::get(0 /* nullptr */) != EOF)
    7421             :       ;
    7422       63602 :     tok.next();
    7423             :   }
    7424       63602 : }
    7425             : 
    7426          13 : void do_source(bool quietly)
    7427             : {
    7428          13 :   char *filename = read_rest_of_line_as_argument();
    7429          13 :   errno = 0;
    7430          13 :   FILE *fp = include_search_path.open_file_cautiously(filename);
    7431          13 :   if (fp != 0 /* nullptr */)
    7432          12 :     input_stack::push(new file_iterator(fp, filename));
    7433             :   else
    7434             :     // Suppress diagnostic only if we're operating quietly and it's an
    7435             :     // expected problem.
    7436           1 :     if (!(quietly && (ENOENT == errno)))
    7437           0 :       error("cannot open '%1': %2", filename, strerror(errno));
    7438             :   // TODO: Add `filename` to file name set.
    7439          13 :   tok.next();
    7440          13 : }
    7441             : 
    7442           1 : void source_request() // .so
    7443             : {
    7444           1 :   if (!has_arg(true /* peek */)) {
    7445           0 :     warning(WARN_MISSING, "file sourcing request expects an argument");
    7446           0 :     skip_line();
    7447           0 :     return;
    7448             :   }
    7449           1 :   do_source(false /* quietly */ );
    7450             : }
    7451             : 
    7452             : // like .so, but silently ignore files that can't be opened due to their
    7453             : // nonexistence
    7454          12 : void source_quietly_request() // .soquiet
    7455             : {
    7456          12 :   if (!has_arg(true /* peek */)) {
    7457           0 :     warning(WARN_MISSING, "quiet file sourcing request expects an"
    7458             :             " argument");
    7459           0 :     skip_line();
    7460           0 :     return;
    7461             :   }
    7462          12 :   do_source(true /* quietly */ );
    7463             : }
    7464             : 
    7465          96 : void pipe_source_request() // .pso
    7466             : {
    7467          96 :   if (!has_arg(true /* peek */)) {
    7468           0 :     warning(WARN_MISSING, "piped command source request expects"
    7469             :             " arguments");
    7470           0 :     skip_line();
    7471           0 :     return;
    7472             :   }
    7473          96 :   if (!want_unsafe_requests) {
    7474           0 :     error("piped command source request is not allowed in safer mode");
    7475           0 :     skip_line();
    7476           0 :     return;
    7477             :   }
    7478          96 :   char *pcmd = read_rest_of_line_as_argument();
    7479             :   // `has_arg()` should have ensured that this pointer is non-null.
    7480          96 :   assert(pcmd != 0 /* nullptr */);
    7481          96 :   if (0 /* nullptr */ == pcmd)
    7482           0 :     error("cannot apply piped command source request to empty"
    7483             :           " argument");
    7484          96 :   errno = 0;
    7485          96 :   FILE *fp = popen(pcmd, POPEN_RT);
    7486          96 :   if (fp != 0 /* nullptr */)
    7487          96 :     input_stack::push(new file_iterator(fp, pcmd, true /* popened */));
    7488             :   else
    7489           0 :     error("cannot open pipe to process '%1': %2", pcmd,
    7490           0 :           strerror(errno));
    7491          96 :   delete[] pcmd;
    7492          96 :   tok.next();
    7493             : }
    7494             : 
    7495             : // .psbb
    7496             : //
    7497             : // Extract bounding box limits from PostScript file, and assign
    7498             : // them to the following four gtroff registers:--
    7499             : //
    7500             : static int llx_reg_contents = 0;
    7501             : static int lly_reg_contents = 0;
    7502             : static int urx_reg_contents = 0;
    7503             : static int ury_reg_contents = 0;
    7504             : 
    7505             : // Manifest constants to specify the status of bounding box range
    7506             : // acquisition; (note that PSBB_RANGE_IS_BAD is also suitable for
    7507             : // assignment as a default ordinate property value).
    7508             : //
    7509             : #define PSBB_RANGE_IS_BAD   0
    7510             : #define PSBB_RANGE_IS_SET   1
    7511             : #define PSBB_RANGE_AT_END   2
    7512             : 
    7513             : // Maximum input line length, for DSC conformance, and options to
    7514             : // control how it will be enforced; caller should select either of
    7515             : // DSC_LINE_MAX_IGNORED, to allow partial line collection spread
    7516             : // across multiple calls, or DSC_LINE_MAX_ENFORCE, to truncate
    7517             : // excess length lines at the DSC limit.
    7518             : //
    7519             : // Note that DSC_LINE_MAX_CHECKED is reserved for internal use by
    7520             : // ps_locator::get_line(), and should not be specified in any call;
    7521             : // also, handling of DSC_LINE_MAX_IGNORED, as a get_line() option,
    7522             : // is currently unimplemented.
    7523             : //
    7524             : #define DSC_LINE_MAX          255
    7525             : #define DSC_LINE_MAX_IGNORED   -1
    7526             : #define DSC_LINE_MAX_ENFORCE    0
    7527             : #define DSC_LINE_MAX_CHECKED    1
    7528             : 
    7529             : // Input characters to be considered as whitespace, when reading
    7530             : // PostScript file comments.
    7531             : //
    7532             : cset white_space("\n\r \t");
    7533             : 
    7534             : // Class psbb_locator
    7535             : //
    7536             : // This locally declared and implemented class provides the methods
    7537             : // to be used for retrieval of bounding box properties from a specified
    7538             : // PostScript or PDF file.
    7539             : //
    7540             : class psbb_locator
    7541             : {
    7542             :   public:
    7543             :     // Only the class constructor is exposed publicly; instantiation of
    7544             :     // a class object will retrieve the requisite bounding box properties
    7545             :     // from the specified file, and assign them to gtroff registers.
    7546             :     //
    7547             :     psbb_locator(const char *);
    7548             : 
    7549             :   private:
    7550             :     FILE *fp;
    7551             :     const char *filename;
    7552             :     char buf[2 + DSC_LINE_MAX];
    7553             :     int llx, lly, urx, ury;
    7554             : 
    7555             :     // CRLF handling hook, for get_line() function.
    7556             :     //
    7557             :     int lastc;
    7558             : 
    7559             :     // Private method functions facilitate implementation of the
    7560             :     // class constructor; none are used in any other context.
    7561             :     //
    7562             :     int get_line(int);
    7563             :     inline bool get_header_comment(void);
    7564             :     inline const char *context_args(const char *);
    7565             :     inline const char *context_args(const char *, const char *);
    7566             :     inline const char *bounding_box_args(void);
    7567             :     int parse_bounding_box(const char *);
    7568             :     inline void assign_registers(void);
    7569             :     inline int skip_to_trailer(void);
    7570             : };
    7571             : 
    7572             : // psbb_locator class constructor.
    7573             : //
    7574           5 : psbb_locator::psbb_locator(const char *fname):
    7575           5 : filename(fname), llx(0), lly(0), urx(0), ury(0), lastc(EOF)
    7576             : {
    7577             :   // PS files might contain non-printable characters, such as ^Z
    7578             :   // and CRs not followed by an LF, so open them in binary mode.
    7579             :   //
    7580           5 :   fp = include_search_path.open_file_cautiously(filename, 0, FOPEN_RB);
    7581           5 :   if (fp != 0 /* nullptr */) {
    7582             :     // After successfully opening the file, acquire the first
    7583             :     // line, whence we may determine the file format...
    7584             :     //
    7585           5 :     if (get_line(DSC_LINE_MAX_ENFORCE) == 0)
    7586             :       //
    7587             :       // ...except in the case of an empty file, which we are
    7588             :       // unable to process further.
    7589             :       //
    7590           0 :       error("file '%1' is empty", filename);
    7591             : 
    7592             : # if 0
    7593             :     else if (context_args("%PDF-")) {
    7594             :       // TODO: PDF files specify a /MediaBox, as the equivalent
    7595             :       // of %%BoundingBox; we must implement a handler for this.
    7596             :     }
    7597             : # endif
    7598             : 
    7599           5 :     else if (context_args("%!PS-Adobe-")) {
    7600             :       //
    7601             :       // PostScript files -- strictly, we expect EPS -- should
    7602             :       // specify a %%BoundingBox comment; locate it, initially
    7603             :       // expecting to find it in the comments header...
    7604             :       //
    7605           5 :       const char *context = 0 /* nullptr */;
    7606          26 :       while ((context == 0 /* nullptr */) && get_header_comment()) {
    7607          21 :         if ((context = bounding_box_args()) != 0 /* nullptr */) {
    7608             : 
    7609             :           // When the "%%BoundingBox" comment is found, it may simply
    7610             :           // specify the bounding box property values, or it may defer
    7611             :           // assignment to a similar trailer comment...
    7612             :           //
    7613           5 :           int status = parse_bounding_box(context);
    7614           5 :           if (status == PSBB_RANGE_AT_END) {
    7615             :             //
    7616             :             // ...in which case we must locate the trailer, and search
    7617             :             // for the appropriate specification within it.
    7618             :             //
    7619           0 :             if (skip_to_trailer() > 0) {
    7620           0 :               while ((context = bounding_box_args()) == 0 /* nullptr */
    7621           0 :                      && get_line(DSC_LINE_MAX_ENFORCE) > 0)
    7622             :                 ;
    7623           0 :               if (context != 0 /* nullptr */) {
    7624             :                 //
    7625             :                 // When we find a bounding box specification here...
    7626             :                 //
    7627           0 :                 if ((status = parse_bounding_box(context)) == PSBB_RANGE_AT_END)
    7628             :                   //
    7629             :                   // ...we must ensure it is not a further attempt to defer
    7630             :                   // assignment to a trailer, (which we are already parsing).
    7631             :                   //
    7632           0 :                   error("'(atend)' is not allowed in trailer of '%1'",
    7633           0 :                         filename);
    7634             :               }
    7635             :             }
    7636             :             else
    7637             :               // The trailer could not be found, so there is no context in
    7638             :               // which a trailing %%BoundingBox comment might be located.
    7639             :               //
    7640           0 :               context = 0 /* nullptr */;
    7641             :           }
    7642           5 :           if (status == PSBB_RANGE_IS_BAD) {
    7643             :             //
    7644             :             // This arises when we found a %%BoundingBox comment, but
    7645             :             // we were unable to extract a valid set of range values from
    7646             :             // it; all we can do is diagnose this.
    7647             :             //
    7648           0 :             error("the arguments to the %%%%BoundingBox comment in '%1' are bad",
    7649           0 :                   filename);
    7650             :           }
    7651             :         }
    7652             :       }
    7653           5 :       if (context == 0 /* nullptr */)
    7654             :         //
    7655             :         // Conversely, this arises when no value specifying %%BoundingBox
    7656             :         // comment has been found, in any appropriate location...
    7657             :         //
    7658           0 :         error("%%%%BoundingBox comment not found in '%1'", filename);
    7659             :     }
    7660             :     else
    7661             :       // ...while this indicates that there was no appropriate file format
    7662             :       // identifier, on the first line of the input file.
    7663             :       //
    7664           0 :       error("'%1' does not conform to the Document Structuring Conventions",
    7665           0 :             filename);
    7666             : 
    7667             :     // Regardless of success or failure of bounding box property acquisition,
    7668             :     // we did successfully open an input file, so we must now close it...
    7669             :     //
    7670           5 :     fclose(fp);
    7671             :   }
    7672             :   else
    7673             :     // ...but in this case, we did not successfully open any input file.
    7674             :     //
    7675           0 :     error("cannot open '%1': %2", filename, strerror(errno));
    7676             : 
    7677             :   // Irrespective of whether or not we were able to successfully acquire the
    7678             :   // bounding box properties, we ALWAYS update the associated gtroff registers.
    7679             :   //
    7680           5 :   assign_registers();
    7681           5 : }
    7682             : 
    7683             : // psbb_locator::parse_bounding_box()
    7684             : //
    7685             : // Parse the argument to a %%BoundingBox comment, returning:
    7686             : //   PSBB_RANGE_IS_SET if it contains four numbers,
    7687             : //   PSBB_RANGE_AT_END if it contains "(atend)", or
    7688             : //   PSBB_RANGE_IS_BAD otherwise.
    7689             : //
    7690           5 : int psbb_locator::parse_bounding_box(const char *context)
    7691             : {
    7692             :   // The Document Structuring Conventions say that the numbers
    7693             :   // should be integers.
    7694             :   //
    7695           5 :   int status = PSBB_RANGE_IS_SET;
    7696           5 :   if (sscanf(context, "%d %d %d %d", &llx, &lly, &urx, &ury) != 4) {
    7697             :     //
    7698             :     // Unfortunately some broken applications get this wrong;
    7699             :     // try to parse them as doubles instead...
    7700             :     //
    7701             :     double x1, x2, x3, x4;
    7702           0 :     if (sscanf(context, "%lf %lf %lf %lf", &x1, &x2, &x3, &x4) == 4) {
    7703           0 :       llx = (int) x1;
    7704           0 :       lly = (int) x2;
    7705           0 :       urx = (int) x3;
    7706           0 :       ury = (int) x4;
    7707             :     }
    7708             :     else {
    7709             :       // ...but if we can't parse four numbers, skip over any
    7710             :       // initial whitespace...
    7711             :       //
    7712           0 :       while (*context == '\x20' || *context == '\t')
    7713           0 :         context++;
    7714             : 
    7715             :       // ...before checking for "(atend)", and setting the
    7716             :       // appropriate exit status accordingly.
    7717             :       //
    7718           0 :       status = (context_args("(atend)", context) == 0 /* nullptr */)
    7719           0 :                  ? llx = lly = urx = ury = PSBB_RANGE_IS_BAD
    7720             :                  : PSBB_RANGE_AT_END;
    7721             :     }
    7722             :   }
    7723           5 :   return status;
    7724             : }
    7725             : 
    7726             : // ps_locator::get_line()
    7727             : //
    7728             : // Collect an input record from a PostScript or PDF file.
    7729             : //
    7730             : // Inputs:
    7731             : //   buf       pointer to caller's input buffer.
    7732             : //   fp        FILE stream pointer, whence input is read.
    7733             : //   filename  name of input file, (for diagnostic use only).
    7734             : //   dscopt    DSC_LINE_MAX_ENFORCE or DSC_LINE_MAX_IGNORED.
    7735             : //
    7736             : // Returns the number of input characters stored into caller's
    7737             : // buffer, or zero at end of input stream.
    7738             : //
    7739             : // FIXME: Currently, get_line() always scans an entire line of
    7740             : // input, but returns only as much as will fit in caller's buffer;
    7741             : // the return value is always a positive integer, or zero, with no
    7742             : // way of indicating to caller, that there was more data than the
    7743             : // buffer could accommodate.  A future enhancement could mitigate
    7744             : // this, returning a negative value in the event of truncation, or
    7745             : // even allowing for piecewise retrieval of excessively long lines
    7746             : // in successive reads; (this may be necessary to properly support
    7747             : // DSC_LINE_MAX_IGNORED, which is currently unimplemented).
    7748             : //
    7749          26 : int psbb_locator::get_line(int dscopt)
    7750             : {
    7751          26 :   int c, count = 0;
    7752           0 :   do {
    7753             :     // Collect input characters into caller's buffer, until we
    7754             :     // encounter a line terminator, or end of file...
    7755             :     //
    7756         621 :     while (((c = getc(fp)) != '\n') && (c != '\r') && (c != EOF)) {
    7757         595 :       if ((((lastc = c) < 0x1b) && !white_space(c)) || (c == 0x7f))
    7758             :         //
    7759             :         // ...rejecting any which may be designated as invalid.
    7760             :         //
    7761           0 :         error("invalid input character code %1 in '%2'", int(c), filename);
    7762             : 
    7763             :       // On reading a valid input character, and when there is
    7764             :       // room in caller's buffer...
    7765             :       //
    7766         595 :       else if (count < DSC_LINE_MAX)
    7767             :         //
    7768             :         // ...store it.
    7769             :         //
    7770         595 :         buf[count++] = c;
    7771             : 
    7772             :       // We have a valid input character, but it will not fit
    7773             :       // into caller's buffer; if enforcing DSC conformity...
    7774             :       //
    7775           0 :       else if (dscopt == DSC_LINE_MAX_ENFORCE) {
    7776             :         //
    7777             :         // ...diagnose and truncate.
    7778             :         //
    7779           0 :         dscopt = DSC_LINE_MAX_CHECKED;
    7780           0 :         error("PostScript file '%1' is non-conforming "
    7781           0 :               "because length of line exceeds 255", filename);
    7782             :       }
    7783             :     }
    7784             :     // Reading LF may be a special case: when it immediately
    7785             :     // follows a CR which terminated the preceding input line,
    7786             :     // we deem it to complete a CRLF terminator for the already
    7787             :     // collected preceding line; discard it, and restart input
    7788             :     // collection for the current line.
    7789             :     //
    7790          26 :   } while ((lastc == '\r') && ((lastc = c) == '\n'));
    7791             : 
    7792             :   // For each collected input line, record its actual terminator,
    7793             :   // substitute our preferred LF terminator...
    7794             :   //
    7795          26 :   if (((lastc = c) != EOF) || (count > 0))
    7796          26 :     buf[count++] = '\n';
    7797             : 
    7798             :   // ...and append the required C-string (NUL) terminator, before
    7799             :   // returning the actual count of input characters stored.
    7800             :   //
    7801          26 :   buf[count] = '\0';
    7802          26 :   return count;
    7803             : }
    7804             : 
    7805             : // psbb_locator::context_args()
    7806             : //
    7807             : // Inputs:
    7808             : //   tag   literal text to be matched at start of input line
    7809             : //
    7810             : // Returns a pointer to the trailing substring of the current
    7811             : // input line, following an initial substring matching the "tag"
    7812             : // argument, or 0 if "tag" is not matched.
    7813             : //
    7814          47 : inline const char *psbb_locator::context_args(const char *tag)
    7815             : {
    7816          47 :   return context_args(tag, buf);
    7817             : }
    7818             : 
    7819             : // psbb_locator::context_args()
    7820             : //
    7821             : // Overloaded variant of the preceding function, operating on
    7822             : // an alternative input buffer, (which may represent a terminal
    7823             : // substring of the psbb_locator's primary input line buffer).
    7824             : //
    7825             : // Inputs:
    7826             : //   tag   literal text to be matched at start of buffer
    7827             : //   p     pointer to text to be checked for "tag" match
    7828             : //
    7829             : // Returns a pointer to the trailing substring of the specified
    7830             : // text buffer, following an initial substring matching the "tag"
    7831             : // argument, or 0 if "tag" is not matched.
    7832             : //
    7833          47 : inline const char *psbb_locator::context_args(const char *tag, const char *p)
    7834             : {
    7835          47 :   size_t len = strlen(tag);
    7836          47 :   return (strncmp(tag, p, len) == 0) ? p + len : 0 /* nullptr */;
    7837             : }
    7838             : 
    7839             : // psbb_locator::bounding_box_args()
    7840             : //
    7841             : // Returns a pointer to the arguments string, within the current
    7842             : // input line, when this represents a PostScript "%%BoundingBox:"
    7843             : // comment, or 0 otherwise.
    7844             : //
    7845          21 : inline const char *psbb_locator::bounding_box_args(void)
    7846             : {
    7847          21 :   return context_args("%%BoundingBox:");
    7848             : }
    7849             : 
    7850             : // psbb_locator::assign_registers()
    7851             : //
    7852             : // Copies the bounding box properties established within the
    7853             : // class object, to the associated gtroff registers.
    7854             : //
    7855           5 : inline void psbb_locator::assign_registers(void)
    7856             : {
    7857           5 :   llx_reg_contents = llx;
    7858           5 :   lly_reg_contents = lly;
    7859           5 :   urx_reg_contents = urx;
    7860           5 :   ury_reg_contents = ury;
    7861           5 : }
    7862             : 
    7863             : // psbb_locator::get_header_comment()
    7864             : //
    7865             : // Fetch a line of PostScript input; return true if it complies with
    7866             : // the formatting requirements for header comments, and it is not an
    7867             : // "%%EndComments" line; otherwise return false.
    7868             : //
    7869          21 : inline bool psbb_locator::get_header_comment(void)
    7870             : {
    7871             :   return
    7872             :     // The first necessary requirement, for returning true,
    7873             :     // is that the input line is not empty, (i.e. not EOF).
    7874             :     //
    7875          21 :     get_line(DSC_LINE_MAX_ENFORCE) != 0
    7876             : 
    7877             :     // In header comments, '%X' ('X' any printable character
    7878             :     // except whitespace) is also acceptable.
    7879             :     //
    7880          21 :     && (buf[0] == '%') && !white_space(buf[1])
    7881             : 
    7882             :     // Finally, the input line must not say "%%EndComments".
    7883             :     //
    7884          42 :     && context_args("%%EndComments") == 0 /* nullptr */;
    7885             : }
    7886             : 
    7887             : // psbb_locator::skip_to_trailer()
    7888             : //
    7889             : // Reposition the PostScript input stream, such that the next get_line()
    7890             : // will retrieve the first line, if any, following a "%%Trailer" comment;
    7891             : // returns a positive integer value if the "%%Trailer" comment is found,
    7892             : // or zero if it is not.
    7893             : //
    7894           0 : inline int psbb_locator::skip_to_trailer(void)
    7895             : {
    7896             :   // Begin by considering a chunk of the input file starting 512 bytes
    7897             :   // before its end, and search it for a "%%Trailer" comment; if none is
    7898             :   // found, incrementally double the chunk size while it remains within
    7899             :   // a 32768L byte range, and search again...
    7900             :   //
    7901           0 :   for (ssize_t offset = 512L; offset > 0L; offset <<= 1) {
    7902             :     int status, failed;
    7903           0 :     if ((offset > 32768L) || ((failed = fseek(fp, -offset, SEEK_END)) != 0))
    7904             :       //
    7905             :       // ...ultimately resetting the offset to zero, and simply seeking
    7906             :       // to the start of the file, to terminate the cycle and do a "last
    7907             :       // ditch" search of the entire file, if any backward seek fails, or
    7908             :       // if we reach the arbitrary 32768L byte range limit.
    7909             :       //
    7910           0 :       failed = fseek(fp, offset = 0L, SEEK_SET);
    7911             : 
    7912             :     // Following each successful seek...
    7913             :     //
    7914           0 :     if (!failed) {
    7915             :       //
    7916             :       // ...perform a search by reading lines from the input stream...
    7917             :       //
    7918           0 :       do { status = get_line(DSC_LINE_MAX_ENFORCE);
    7919             :            //
    7920             :            // ...until we either exhaust the available stream data, or
    7921             :            // we have located a "%%Trailer" comment line.
    7922             :            //
    7923             :          } while ((status != 0)
    7924           0 :                   && (context_args("%%Trailer") == 0 /* nullptr */));
    7925           0 :       if (status > 0)
    7926             :         //
    7927             :         // We found the "%%Trailer" comment, so we may immediately
    7928             :         // return, with the stream positioned appropriately...
    7929             :         //
    7930           0 :         return status;
    7931             :     }
    7932             :   }
    7933             :   // ...otherwise, we report that no "%%Trailer" comment was found.
    7934             :   //
    7935           0 :   return 0;
    7936             : }
    7937             : 
    7938           5 : void ps_bbox_request() // .psbb
    7939             : {
    7940           5 :   if (!has_arg(true /* peek */)) {
    7941           0 :     warning(WARN_MISSING, "PostScript file bounding box extraction"
    7942             :             " request expects an argument");
    7943           0 :     skip_line();
    7944           0 :     return;
    7945             :   }
    7946             :   // Parse input line, to extract file name.
    7947             :   //
    7948           5 :   symbol nm = read_long_identifier(true /* required */);
    7949           5 :   if (nm.is_null())
    7950             :     //
    7951             :     // No file name specified: ignore the entire request.
    7952             :     //
    7953           0 :     skip_line();
    7954             :   else {
    7955             :     // File name acquired: swallow the rest of the line.
    7956             :     //
    7957           5 :     while (!tok.is_newline() && !tok.is_eof())
    7958           0 :       tok.next();
    7959           5 :     errno = 0;
    7960             : 
    7961             :     // Update {llx,lly,urx,ury}_reg_contents:
    7962             :     // declaring this class instance achieves this, as an
    7963             :     // intentional side effect of object construction.
    7964             :     //
    7965           5 :     psbb_locator do_ps_file(nm.contents());
    7966             : 
    7967             :     // All done for .psbb; move on, to continue
    7968             :     // input stream processing.
    7969             :     //
    7970           5 :     tok.next();
    7971             :   }
    7972             : }
    7973             : 
    7974             : // Encode a token for output to an operating system file stream.
    7975             : // Express unencodable tokens as null characters.
    7976     1890400 : const char *encode_for_stream_output(int c)
    7977             : {
    7978             :   static char buf[3];
    7979     1890400 :   buf[0] = (0U == escape_char) ? '\\' : escape_char;
    7980     1890400 :   buf[1] = buf[2] = '\0';
    7981     1890400 :   switch (c) {
    7982           0 :   case ESCAPE_QUESTION:
    7983           0 :     buf[1] = '?';
    7984           0 :     break;
    7985           1 :   case ESCAPE_AMPERSAND:
    7986           1 :     buf[1] = '&';
    7987           1 :     break;
    7988           0 :   case ESCAPE_RIGHT_PARENTHESIS:
    7989           0 :     buf[1] = ')';
    7990           0 :     break;
    7991           0 :   case ESCAPE_UNDERSCORE:
    7992           0 :     buf[1] = '_';
    7993           0 :     break;
    7994          38 :   case ESCAPE_BAR:
    7995          38 :     buf[1] = '|';
    7996          38 :     break;
    7997           1 :   case ESCAPE_CIRCUMFLEX:
    7998           1 :     buf[1] = '^';
    7999           1 :     break;
    8000           0 :   case ESCAPE_LEFT_BRACE:
    8001           0 :     buf[1] = '{';
    8002           0 :     break;
    8003           0 :   case ESCAPE_RIGHT_BRACE:
    8004           0 :     buf[1] = '}';
    8005           0 :     break;
    8006           0 :   case ESCAPE_LEFT_QUOTE:
    8007           0 :     buf[1] = '`';
    8008           0 :     break;
    8009           0 :   case ESCAPE_RIGHT_QUOTE:
    8010           0 :     buf[1] = '\'';
    8011           0 :     break;
    8012          14 :   case ESCAPE_HYPHEN:
    8013          14 :     buf[1] = '-';
    8014          14 :     break;
    8015           2 :   case ESCAPE_BANG:
    8016           2 :     buf[1] = '!';
    8017           2 :     break;
    8018           0 :   case ESCAPE_c:
    8019           0 :     buf[1] = 'c';
    8020           0 :     break;
    8021           0 :   case ESCAPE_e:
    8022           0 :     buf[1] = 'e';
    8023           0 :     break;
    8024          24 :   case ESCAPE_E:
    8025          24 :     buf[1] = 'E';
    8026          24 :     break;
    8027         100 :   case ESCAPE_PERCENT:
    8028         100 :     buf[1] = '%';
    8029         100 :     break;
    8030          16 :   case ESCAPE_SPACE:
    8031          16 :     buf[1] = ' ';
    8032          16 :     break;
    8033           2 :   case ESCAPE_TILDE:
    8034           2 :     buf[1] = '~';
    8035           2 :     break;
    8036           0 :   case ESCAPE_COLON:
    8037           0 :     buf[1] = ':';
    8038           0 :     break;
    8039           0 :   case PUSH_GROFF_MODE:
    8040             :   case PUSH_COMP_MODE:
    8041             :   case POP_GROFFCOMP_MODE:
    8042           0 :     buf[0] = '\0';
    8043           0 :     break;
    8044     1890202 :   default:
    8045     1890202 :     if (is_invalid_input_char(c))
    8046           0 :       buf[0] = '\0';
    8047             :     else
    8048     1890202 :       buf[0] = c;
    8049     1890202 :     break;
    8050             :   }
    8051     1890400 :   return buf;
    8052             : }
    8053             : 
    8054           8 : const char *input_char_description(int c)
    8055             : {
    8056           8 :   switch (c) {
    8057           0 :   case '\n':
    8058           0 :     return "a newline character";
    8059           0 :   case '\b':
    8060           0 :     return "a backspace character";
    8061           0 :   case '\001':
    8062           0 :     return "a leader character";
    8063           0 :   case '\t':
    8064           0 :     return "a tab character";
    8065           0 :   case ' ':
    8066           0 :     return "a space character";
    8067           0 :   case '\0':
    8068           0 :     return "a node";
    8069             :   }
    8070           8 :   const size_t bufsz = sizeof "magic character code " + INT_DIGITS + 1;
    8071             :   static char buf[bufsz];
    8072           8 :   (void) memset(buf, 0, bufsz);
    8073           8 :   if (is_invalid_input_char(c)) {
    8074           0 :     const char *s = encode_for_stream_output(c);
    8075           0 :     if (*s) {
    8076           0 :       buf[0] = '\'';
    8077           0 :       strcpy(buf + 1, s);
    8078           0 :       strcat(buf, "'");
    8079           0 :       return buf;
    8080             :     }
    8081           0 :     sprintf(buf, "magic character code %d", c);
    8082           0 :     return buf;
    8083             :   }
    8084           8 :   if (csprint(c)) {
    8085           0 :     if ('\'' == c) {
    8086           0 :       buf[0] = '"';
    8087           0 :       buf[1] = c;
    8088           0 :       buf[2] = '"';
    8089             :     }
    8090             :     else {
    8091           0 :       buf[0] = '\'';
    8092           0 :       buf[1] = c;
    8093           0 :       buf[2] = '\'';
    8094             :     }
    8095           0 :     return buf;
    8096             :   }
    8097           8 :   sprintf(buf, "character code %d", c);
    8098           8 :   return buf;
    8099             : }
    8100             : 
    8101       16164 : void tag()
    8102             : {
    8103       16164 :   if (has_arg(true /* peek */)) {
    8104       16164 :     string s;
    8105             :     int c;
    8106             :     for (;;) {
    8107       16164 :       c = read_char_in_copy_mode(0 /* nullptr */);
    8108       16164 :       if (c == '"') {
    8109           0 :         c = read_char_in_copy_mode(0 /* nullptr */);
    8110           0 :         break;
    8111             :       }
    8112       16164 :       if (c != ' ' && c != '\t')
    8113       16164 :         break;
    8114             :     }
    8115       16164 :     s = "x X ";
    8116      204746 :     for (;
    8117      220910 :          (c != '\n') && (c != EOF);
    8118      204746 :          (c = read_char_in_copy_mode(0 /* nullptr */)))
    8119      204746 :       s += (char) c;
    8120       16164 :     s += '\n';
    8121       16164 :     curenv->add_node(new tag_node(s, 0));
    8122             :   }
    8123       16164 :   tok.next();
    8124       16164 : }
    8125             : 
    8126          50 : void taga()
    8127             : {
    8128          50 :   if (has_arg(true /* peek */)) {
    8129          50 :     string s;
    8130             :     int c;
    8131             :     for (;;) {
    8132          50 :       c = read_char_in_copy_mode(0 /* nullptr */);
    8133          50 :       if (c == '"') {
    8134           0 :         c = read_char_in_copy_mode(0 /* nullptr */);
    8135           0 :         break;
    8136             :       }
    8137          50 :       if (c != ' ' && c != '\t')
    8138          50 :         break;
    8139             :     }
    8140          50 :     s = "x X ";
    8141         650 :     for (;
    8142         700 :          (c != '\n') && (c != EOF);
    8143         650 :          (c = read_char_in_copy_mode(0 /* nullptr */)))
    8144         650 :       s += (char) c;
    8145          50 :     s += '\n';
    8146          50 :     curenv->add_node(new tag_node(s, 1));
    8147             :   }
    8148          50 :   tok.next();
    8149          50 : }
    8150             : 
    8151             : // .tm, .tm1, and .tmc
    8152             : 
    8153             : // TODO: Migrate `tm` (and `ab`) to work like `tm1`, interpreting a
    8154             : // leading `"` as `ds` does (and a bunch of other requests do).
    8155             : //
    8156             : // This would leave `tm1` without a distinct function, so we could
    8157             : // retire it.
    8158             : //
    8159             : // Separately, we could make `tm` (and/or `ab`) do old-style argument
    8160             : // interpretation only in compatibility mode.  We still wouldn't need
    8161             : // `tm1` because a compatibility mode document could say ".do tm foo".
    8162             : 
    8163         568 : static void terminal_write(bool do_append_newline,
    8164             :                            bool interpret_leading_spaces)
    8165             : {
    8166         568 :   if (has_arg(true /* peek */)) {
    8167             :     int c;
    8168             :     for (;;) {
    8169         568 :       c = read_char_in_copy_mode(0 /* nullptr */);
    8170         568 :       if (interpret_leading_spaces && ('"' == c)) {
    8171          21 :         c = read_char_in_copy_mode(0 /* nullptr */);
    8172          21 :         break;
    8173             :       }
    8174         547 :       if ((c != ' ') && (c != '\t'))
    8175         547 :         break;
    8176             :     }
    8177       17325 :     for (;
    8178       17893 :          (c != '\n') && (c != EOF);
    8179       17325 :          (c = read_char_in_copy_mode(0 /* nullptr */)))
    8180       17325 :       fputs(encode_for_stream_output(c), stderr);
    8181             :   }
    8182         568 :   if (do_append_newline)
    8183         547 :     fputc('\n', stderr);
    8184         568 :   fflush(stderr);
    8185         568 :   tok.next();
    8186         568 : }
    8187             : 
    8188             : // old and busted
    8189         529 : static void terminal_message_request() // .tm
    8190             : {
    8191         529 :   terminal_write(true /* do append newline */ ,
    8192             :                  false /* interpret leading spaces */);
    8193         529 : }
    8194             : 
    8195             : // the new hotness
    8196          18 : static void terminal_message1_request() // .tm1
    8197             : {
    8198          18 :   terminal_write(true /* do append newline */ ,
    8199             :                  true /* interpret leading spaces */);
    8200          18 : }
    8201             : 
    8202          21 : static void terminal_message_continuation_request() // .tmc
    8203             : {
    8204          21 :   terminal_write(false /* do append newline */ ,
    8205             :                  true /* interpret leading spaces */);
    8206          21 : }
    8207             : 
    8208             : struct grostream : object {
    8209             :   const symbol filename;
    8210             :   const symbol mode;
    8211             :   FILE * const file;
    8212             :   grostream(const char *fn, symbol m, FILE *fp);
    8213             :   ~grostream();
    8214             : };
    8215             : 
    8216           1 : grostream::grostream(const char *fn, symbol m, FILE *fp)
    8217           1 : : filename(fn), mode(m), file(fp)
    8218             : {
    8219           1 : }
    8220             : 
    8221           2 : grostream::~grostream()
    8222             : {
    8223           2 : }
    8224             : 
    8225             : object_dictionary stream_dictionary(20);
    8226             : 
    8227           0 : static void print_stream_request() // .pstream
    8228             : {
    8229           0 :   object_dictionary_iterator iter(stream_dictionary);
    8230           0 :   symbol stream_name;
    8231             :   grostream *grost;
    8232           0 :   errprint("[");
    8233           0 :   bool need_comma = false;
    8234           0 :   while (iter.get(&stream_name, (object **)&grost)) {
    8235           0 :     assert(!stream_name.is_null());
    8236           0 :     if (stream_name != 0 /* nullptr */) {
    8237           0 :       if (need_comma)
    8238           0 :         errprint(", ");
    8239           0 :       errprint("{\"stream\": ");
    8240           0 :       stream_name.json_dump();
    8241           0 :       errprint(", \"file name\": ");
    8242           0 :       grost->filename.json_dump();
    8243           0 :       errprint(", \"mode\": ");
    8244           0 :       grost->mode.json_dump();
    8245           0 :       errprint("}");
    8246           0 :       fflush(stderr);
    8247           0 :       need_comma = true;
    8248             :     }
    8249             :   }
    8250             :   // !need_comma implies that the list was empty.  JSON convention is to
    8251             :   // put a space between an empty pair of square brackets.
    8252           0 :   if (!need_comma)
    8253           0 :     errprint(" ");
    8254           0 :   errprint("]\n");
    8255           0 :   fflush(stderr);
    8256           0 :   skip_line();
    8257           0 : }
    8258             : 
    8259           1 : static void open_file(bool appending)
    8260             : {
    8261           1 :   symbol stream = read_identifier(true /* required */);
    8262           1 :   if (!stream.is_null()) {
    8263           1 :     char *filename = read_rest_of_line_as_argument();
    8264           1 :     if (filename != 0 /* nullptr */) {
    8265           1 :       const char *mode = appending ? "appending" : "writing";
    8266           1 :       errno = 0;
    8267           1 :       FILE *fp = fopen(filename, appending ? "a" : "w");
    8268           1 :       if (0 /* nullptr */ == fp) {
    8269           0 :         error("cannot open file '%1' for %2: %3", filename, mode,
    8270           0 :               strerror(errno));
    8271             :         // If we already had a key of this name in the dictionary, it's
    8272             :         // invalid now.
    8273           0 :         stream_dictionary.remove(stream);
    8274             :       }
    8275             :       else {
    8276             :         grostream *oldgrost = static_cast<grostream *>(stream_dictionary
    8277           1 :                                                        .lookup(stream));
    8278           1 :         if (oldgrost != 0 /* nullptr */) {
    8279           0 :           FILE *oldfp = oldgrost->file;
    8280           0 :           assert(oldfp != 0 /* nullptr */);
    8281           0 :           if (oldfp != 0 /* nullptr */ && (fclose(oldfp) != 0)) {
    8282           0 :             error("cannot close file '%1' already associated with"
    8283           0 :                   " stream '%2': %3", filename, strerror(errno));
    8284           0 :             return;
    8285             :           }
    8286             :         }
    8287           1 :         stream_dictionary.define(stream,
    8288           2 :                                  new grostream(filename, mode, &*fp));
    8289             :       }
    8290             :     }
    8291           1 :     tok.next();
    8292             :   }
    8293             : }
    8294             : 
    8295           1 : static void open_request() // .open
    8296             : {
    8297           1 :   if (!has_arg(true /* peek */)) {
    8298           0 :     warning(WARN_MISSING, "file writing request expects arguments");
    8299           0 :     skip_line();
    8300           0 :     return;
    8301             :   }
    8302           1 :   if (!want_unsafe_requests) {
    8303           0 :     error("file writing request is not allowed in safer mode");
    8304           0 :     skip_line();
    8305             :   }
    8306             :   else
    8307           1 :     open_file(false /* appending */);
    8308             :   // No skip_line() here; open_file() calls
    8309             :   // read_rest_of_line_as_argument(), tok.next().
    8310             : }
    8311             : 
    8312           0 : static void opena_request() // .opena
    8313             : {
    8314           0 :   if (!has_arg(true /* peek */)) {
    8315           0 :     warning(WARN_MISSING, "file appending request expects arguments");
    8316           0 :     skip_line();
    8317           0 :     return;
    8318             :   }
    8319           0 :   if (!want_unsafe_requests) {
    8320           0 :     error("file appending request is not allowed in safer mode");
    8321           0 :     skip_line();
    8322             :   }
    8323             :   else
    8324           0 :     open_file(true /* appending */);
    8325             :   // No skip_line() here; open_file() calls
    8326             :   // read_rest_of_line_as_argument(), tok.next().
    8327             : }
    8328             : 
    8329           1 : static void close_stream(symbol &stream)
    8330             : {
    8331           1 :   assert(!stream.is_null());
    8332           1 :   bool is_valid = false;
    8333           1 :   FILE *fp = 0 /* nullptr */;
    8334             :   grostream *grost = static_cast<grostream *>(stream_dictionary
    8335           1 :                                               .lookup(stream));
    8336           1 :   if (grost != 0 /* nullptr */) {
    8337           1 :     fp = grost->file;
    8338             :     // We shouldn't have stored a null pointer in the first place.
    8339           1 :     assert(fp != 0 /* nullptr */);
    8340           1 :     if (fp != 0 /* nullptr */)
    8341           1 :       is_valid = true;
    8342             :   }
    8343           1 :   if (!is_valid) {
    8344           0 :     error("cannot close nonexistent stream '%1'", stream.contents());
    8345           0 :     return;
    8346             :   }
    8347             :   else {
    8348           1 :     if (fclose(fp) != 0) {
    8349           0 :       error("cannot close stream '%1': %2", stream.contents(),
    8350           0 :             strerror(errno));
    8351           0 :       return;
    8352             :     }
    8353             :   }
    8354           1 :   stream_dictionary.remove(stream);
    8355             : }
    8356             : 
    8357             : // Call this from exit_troff().
    8358        1403 : static void close_all_streams()
    8359             : {
    8360        1403 :   object_dictionary_iterator iter(stream_dictionary);
    8361             :   FILE *filestream;
    8362        1403 :   symbol stream;
    8363        1403 :   while (iter.get(&stream, (object **)&filestream)) {
    8364           0 :     assert(!stream.is_null());
    8365           0 :     if (stream != 0 /* nullptr */) {
    8366           0 :       warning(WARN_FILE, "stream '%1' still open; closing",
    8367           0 :               stream.contents());
    8368           0 :       close_stream(stream);
    8369             :     }
    8370             :   }
    8371        1403 : }
    8372             : 
    8373           1 : static void close_request() // .close
    8374             : {
    8375           1 :   if (!has_arg(true /* peek */)) {
    8376           0 :     warning(WARN_MISSING, "stream closing request expects an argument");
    8377           0 :     skip_line();
    8378           0 :     return;
    8379             :   }
    8380           1 :   symbol stream = read_identifier();
    8381             :   // Testing has_arg() should have ensured this.
    8382           1 :   assert(stream != 0 /* nullptr */);
    8383           1 :   if (!stream.is_null())
    8384           1 :     close_stream(stream);
    8385           1 :   skip_line();
    8386             : }
    8387             : 
    8388             : // .write and .writec
    8389             : 
    8390           4 : static void do_write_request(bool do_append_newline)
    8391             : {
    8392           4 :   symbol stream = read_identifier(true /* required */);
    8393           4 :   if (stream.is_null()) {
    8394           0 :     skip_line();
    8395           0 :     return;
    8396             :   }
    8397             :   grostream *grost = static_cast<grostream *>(stream_dictionary
    8398           4 :                                               .lookup(stream));
    8399           4 :   if (0 /* nullptr */ == grost) {
    8400           0 :     error("cannot write to nonexistent stream '%1'", stream.contents());
    8401           0 :     skip_line();
    8402           0 :     return;
    8403             :   }
    8404             :   // Invariant: if the groff stream exists, the backing C stream must.
    8405           4 :   assert(grost->file != 0 /* nullptr */);
    8406           4 :   FILE *fp = grost->file;
    8407           4 :   if (0 /* nullptr */ == fp) {
    8408           0 :     error("cannot write to nonexistent stream '%1'", stream.contents());
    8409           0 :     skip_line();
    8410           0 :     return;
    8411             :   }
    8412           4 :   if (has_arg(true /* peek */)) {
    8413           3 :     int c = read_char_in_copy_mode(0 /* nullptr */);
    8414           3 :     while (' ' == c)
    8415           0 :       c = read_char_in_copy_mode(0 /* nullptr */);
    8416           3 :     if ('"' == c)
    8417           0 :       c = read_char_in_copy_mode(0 /* nullptr */);
    8418           6 :     while (c != '\n' && c != EOF) {
    8419           3 :       fputs(encode_for_stream_output(c), fp);
    8420           3 :       c = read_char_in_copy_mode(0 /* nullptr */);
    8421             :     }
    8422             :   }
    8423           4 :   if (do_append_newline)
    8424           4 :     fputc('\n', fp);
    8425           4 :   fflush(fp);
    8426           4 :   tok.next();
    8427             : }
    8428             : 
    8429           4 : static void stream_write_request() // .write
    8430             : {
    8431           4 :   do_write_request(true /* do append newline */);
    8432           4 : }
    8433             : 
    8434           0 : static void stream_write_continuation_request() // .writec
    8435             : {
    8436           0 :   do_write_request(false /* do append newline */);
    8437           0 : }
    8438             : 
    8439           0 : static void stream_write_macro_request() // .writem
    8440             : {
    8441           0 :   symbol stream = read_identifier(true /* required */);
    8442           0 :   if (stream.is_null()) {
    8443           0 :     skip_line();
    8444           0 :     return;
    8445             :   }
    8446             :   grostream *grost = static_cast<grostream *>(stream_dictionary
    8447           0 :                                               .lookup(stream));
    8448           0 :   FILE *fp = grost->file;
    8449           0 :   if (0 /* nullptr */ == fp) {
    8450           0 :     error("no stream named '%1'", stream.contents());
    8451           0 :     skip_line();
    8452           0 :     return;
    8453             :   }
    8454           0 :   symbol s = read_identifier(true /* required */);
    8455           0 :   if (s.is_null()) {
    8456           0 :     skip_line();
    8457           0 :     return;
    8458             :   }
    8459           0 :   request_or_macro *p = lookup_request(s);
    8460           0 :   macro *m = p->to_macro();
    8461           0 :   if (0 /* nullptr */ == m)
    8462           0 :     error("cannot write request '%1' to a stream", s.contents());
    8463             :   else {
    8464           0 :     string_iterator iter(*m);
    8465             :     for (;;) {
    8466           0 :       int c = iter.get(0 /* nullptr */);
    8467           0 :       if (c == EOF)
    8468           0 :         break;
    8469           0 :       fputs(encode_for_stream_output(c), fp);
    8470           0 :     }
    8471           0 :     fflush(fp);
    8472             :   }
    8473           0 :   skip_line();
    8474             : }
    8475             : 
    8476           0 : void warnscale_request() // .warnscale
    8477             : {
    8478           0 :   if (!has_arg()) {
    8479           0 :     warning(WARN_MISSING, "warning scaling unit configuration request"
    8480             :             " expects a scaling unit argument");
    8481           0 :     skip_line();
    8482           0 :     return;
    8483             :   }
    8484           0 :   int c = tok.ch(); // safely compares to char literals; TODO: grochar
    8485           0 :   if ('u' == c)
    8486           0 :     warn_scale = 1.0;
    8487           0 :   else if ('i' == c)
    8488           0 :     warn_scale = double(units_per_inch);
    8489           0 :   else if ('c' == c)
    8490           0 :     warn_scale = double(units_per_inch / 2.54);
    8491           0 :   else if ('p' == c)
    8492           0 :     warn_scale = double(units_per_inch / 72.0);
    8493           0 :   else if ('P' == c)
    8494           0 :     warn_scale = double(units_per_inch / 6.0);
    8495             :   else {
    8496           0 :     warning(WARN_SCALE,
    8497             :             "%1 is not a valid scaling unit; using 'i'",
    8498           0 :             tok.description());
    8499           0 :     c = 'i';
    8500           0 :     warn_scale = double(units_per_inch);
    8501             :   }
    8502           0 :   warn_scaling_unit = c;
    8503           0 :   skip_line();
    8504             : }
    8505             : 
    8506           0 : void spreadwarn_request() // .spreadwarn
    8507             : {
    8508           0 :   hunits n;
    8509           0 :   if (has_arg() && read_hunits(&n, 'm')) {
    8510           0 :     if (n < 0)
    8511           0 :       n = 0;
    8512           0 :     hunits em = curenv->get_size();
    8513           0 :     spread_limit = (double) n.to_units()
    8514           0 :                    / (em.is_zero() ? hresolution : em.to_units());
    8515             :   }
    8516             :   else
    8517           0 :     spread_limit = -spread_limit - 1;   // no arg toggles on/off without
    8518             :                                         // changing value; we mirror at
    8519             :                                         // -0.5 to make zero a valid value
    8520           0 :   skip_line();
    8521           0 : }
    8522             : 
    8523             : // Keep this in sync with "src/libs/libgroff/nametoindex.cpp".
    8524             : // constexpr // C++11
    8525             : static const char char_prefix[] = { 'c', 'h', 'a', 'r' };
    8526             : // constexpr // C++11
    8527             : static const size_t char_prefix_len = sizeof char_prefix;
    8528             : 
    8529        1418 : static void init_charset_table()
    8530             : {
    8531             :   char buf[16];
    8532        1418 :   (void) strncpy(buf, char_prefix, char_prefix_len);
    8533      364426 :   for (int i = 0; i < 256; i++) {
    8534      363008 :     (void) strcpy((buf + char_prefix_len), i_to_a(i));
    8535      363008 :     charset_table[i] = lookup_charinfo(symbol(buf));
    8536      363008 :     charset_table[i]->set_ascii_code(i);
    8537      363008 :     if (csalpha(i))
    8538       73736 :       charset_table[i]->set_hyphenation_code(cmlower(i));
    8539             :   }
    8540        1418 :   charset_table['.']->set_flags(charinfo::ENDS_SENTENCE);
    8541        1418 :   charset_table['?']->set_flags(charinfo::ENDS_SENTENCE);
    8542        1418 :   charset_table['!']->set_flags(charinfo::ENDS_SENTENCE);
    8543        1418 :   charset_table['-']->set_flags(charinfo::ALLOWS_BREAK_AFTER);
    8544        1418 :   charset_table['"']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8545        1418 :   charset_table['\'']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8546        1418 :   charset_table[')']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8547        1418 :   charset_table[']']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8548        1418 :   charset_table['*']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8549        1418 :   lookup_charinfo(symbol("dg"))->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8550        1418 :   lookup_charinfo(symbol("dd"))->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8551        1418 :   lookup_charinfo(symbol("rq"))->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8552        1418 :   lookup_charinfo(symbol("cq"))->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
    8553        1418 :   lookup_charinfo(symbol("em"))->set_flags(charinfo::ALLOWS_BREAK_AFTER);
    8554        1418 :   lookup_charinfo(symbol("hy"))->set_flags(charinfo::ALLOWS_BREAK_AFTER);
    8555        1418 :   lookup_charinfo(symbol("ul"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
    8556        1418 :   lookup_charinfo(symbol("rn"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
    8557        1418 :   lookup_charinfo(symbol("radicalex"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
    8558        1418 :   lookup_charinfo(symbol("sqrtex"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
    8559        1418 :   lookup_charinfo(symbol("ru"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
    8560        1418 :   lookup_charinfo(symbol("br"))->set_flags(charinfo::OVERLAPS_VERTICALLY);
    8561        1418 :   page_character = charset_table['%'];
    8562        1418 : }
    8563             : 
    8564        1418 : static void init_hpf_code_table()
    8565             : {
    8566      364426 :   for (int i = 0; i < 256; i++)
    8567      363008 :     hpf_code_table[i] = cmlower(i);
    8568        1418 : }
    8569             : 
    8570      177201 : static void do_translate(bool transparently, bool as_input)
    8571             : {
    8572      177201 :   tok.skip_spaces();
    8573      354444 :   while (!tok.is_newline() && !tok.is_eof()) {
    8574      177513 :     if (tok.is_space()) {
    8575             :       // This is a really bizarre troff feature.
    8576           0 :       tok.next();
    8577           0 :       translate_space_to_dummy = tok.is_dummy();
    8578           0 :       if (tok.is_newline() || tok.is_eof())
    8579           0 :         break;
    8580           0 :       error("cannot translate space character; ignoring");
    8581           0 :       tok.next();
    8582           0 :       continue;
    8583             :     }
    8584      177513 :     charinfo *ci1 = tok.get_charinfo(true /* required */);
    8585      177513 :     if (0 /* nullptr */ == ci1) {
    8586           0 :       assert(0 == "attempted to use token without charinfo in character"
    8587             :              " translation request");
    8588             :       break;
    8589             :     }
    8590      177513 :     tok.next();
    8591      177513 :     if (tok.is_newline() || tok.is_eof()) {
    8592         270 :       ci1->set_special_translation(charinfo::TRANSLATE_SPACE,
    8593             :                                    transparently);
    8594         270 :       break;
    8595             :     }
    8596      177243 :     if (tok.is_space())
    8597           0 :       ci1->set_special_translation(charinfo::TRANSLATE_SPACE,
    8598             :                                    transparently);
    8599      177243 :     else if (tok.is_stretchable_space())
    8600        1418 :       ci1->set_special_translation(charinfo::TRANSLATE_STRETCHABLE_SPACE,
    8601             :                                    transparently);
    8602      175825 :     else if (tok.is_dummy())
    8603           0 :       ci1->set_special_translation(charinfo::TRANSLATE_DUMMY,
    8604             :                                    transparently);
    8605      175825 :     else if (tok.is_hyphen_indicator())
    8606           0 :       ci1->set_special_translation(charinfo::TRANSLATE_HYPHEN_INDICATOR,
    8607             :                                    transparently);
    8608             :     else {
    8609      175825 :       charinfo *ci2 = tok.get_charinfo(true /* required */);
    8610      175825 :       if (0 /* nullptr */ == ci2) {
    8611           0 :         assert(0 == "attempted to use token without charinfo in"
    8612             :                " character translation request");
    8613             :         break;
    8614             :       }
    8615      175825 :       if (ci1 == ci2)
    8616       14694 :         ci1->set_translation(0 /* nullptr */, transparently, as_input);
    8617             :       else
    8618      161131 :         ci1->set_translation(ci2, transparently, as_input);
    8619             :     }
    8620      177243 :     tok.next();
    8621             :   }
    8622      177201 :   skip_line();
    8623      177201 : }
    8624             : 
    8625       20839 : void translate() // .tr
    8626             : {
    8627       20839 :   if (!has_arg()) {
    8628           0 :     warning(WARN_MISSING, "character translation request expects"
    8629             :             " sequence of character pairs as argument");
    8630           0 :     skip_line();
    8631           0 :     return;
    8632             :   }
    8633       20839 :   do_translate(true /* transparently */, false /* as_input */);
    8634             : }
    8635             : 
    8636           0 : void translate_no_transparent() // .trnt
    8637             : {
    8638           0 :   if (!has_arg()) {
    8639           0 :     warning(WARN_MISSING, "character non-diversion translation request"
    8640             :             " expects sequence of character pairs as argument");
    8641           0 :     skip_line();
    8642           0 :     return;
    8643             :   }
    8644           0 :   do_translate(false /* transparently */, false /* as_input */);
    8645             : }
    8646             : 
    8647      156362 : void translate_input() // .trin
    8648             : {
    8649      156362 :   if (!has_arg()) {
    8650           0 :     warning(WARN_MISSING, "character non-asciification translation"
    8651             :             " request expects sequence of character pairs as argument");
    8652           0 :     skip_line();
    8653           0 :     return;
    8654             :   }
    8655      156362 :   do_translate(true /* transparently */, true /* as_input */);
    8656             : }
    8657             : 
    8658         567 : static void set_character_flags_request() // .cflags
    8659             : {
    8660         567 :   if (!has_arg()) {
    8661           0 :     warning(WARN_MISSING, "character flags configuration request"
    8662             :             " expects arguments");
    8663           0 :     skip_line();
    8664           0 :     return;
    8665             :   }
    8666             :   int flags;
    8667         567 :   if (read_integer(&flags)) {
    8668         567 :     if ((flags < 0) || (flags > charinfo::CFLAGS_MAX)) {
    8669           0 :       warning(WARN_RANGE, "character flags must be in range 0..%1,"
    8670           0 :               " got %2", charinfo::CFLAGS_MAX, flags);
    8671           0 :       skip_line();
    8672           0 :       return;
    8673             :     }
    8674         567 :     if (((flags & charinfo::ENDS_SENTENCE)
    8675           1 :           && (flags & charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE))
    8676         567 :         || ((flags & charinfo::ALLOWS_BREAK_BEFORE)
    8677           0 :           && (flags & charinfo::PROHIBITS_BREAK_BEFORE))
    8678         567 :         || ((flags & charinfo::ALLOWS_BREAK_AFTER)
    8679          28 :           && (flags & charinfo::PROHIBITS_BREAK_AFTER))) {
    8680           0 :       warning(WARN_SYNTAX, "ignoring contradictory character flags: "
    8681           0 :               "%1", flags);
    8682           0 :       skip_line();
    8683           0 :       return;
    8684             :     }
    8685         567 :     if (!has_arg()) {
    8686           0 :       warning(WARN_MISSING, "character flags configuration request"
    8687             :               " expects one or more characters to configure");
    8688           0 :       skip_line();
    8689           0 :       return;
    8690             :     }
    8691        1297 :     while (has_arg()) {
    8692         730 :       charinfo *ci = tok.get_charinfo(true /* required */);
    8693         730 :       if (0 /* nullptr */ == ci)
    8694           0 :         assert(0 == "attempted to use token without charinfo in"
    8695             :                " character flags assignment request");
    8696             :       else {
    8697         730 :         charinfo *tem = ci->get_translation();
    8698         730 :         if (tem != 0 /* nullptr */)
    8699           0 :           ci = tem;
    8700         730 :         ci->set_flags(flags);
    8701             :       }
    8702         730 :       tok.next();
    8703             :     }
    8704             :   }
    8705         567 :   skip_line();
    8706             : }
    8707             : 
    8708      128375 : static void set_hyphenation_codes() // .hcode
    8709             : {
    8710      128375 :   if (!has_arg()) {
    8711           0 :     warning(WARN_MISSING, "hyphenation code assignment request expects"
    8712             :             " arguments");
    8713           0 :     skip_line();
    8714           0 :     return;
    8715             :   }
    8716      289416 :   while (has_arg()) {
    8717      161041 :     unsigned char cdst = tok.ch();
    8718      161041 :     if (csdigit(cdst)) {
    8719           0 :       error("cannot apply a hyphenation code to a numeral");
    8720           0 :       break;
    8721             :     }
    8722      161041 :     charinfo *cidst = tok.get_charinfo();
    8723      161041 :     if ('\0' == cdst) {
    8724       65980 :       if (0 /* nullptr */ == cidst) {
    8725           0 :         error("expected ordinary, special, or indexed character,"
    8726           0 :               " got %1", tok.description());
    8727           0 :         break;
    8728             :       }
    8729             :     }
    8730      161041 :     tok.next();
    8731      161041 :     if (!has_arg()) {
    8732           0 :       error("hyphenation codes must be specified in pairs");
    8733           0 :       break;
    8734             :     }
    8735      161041 :     unsigned char csrc = tok.ch();
    8736      161041 :     if (csdigit(csrc)) {
    8737           0 :       error("cannot use the hyphenation code of a numeral");
    8738           0 :       break;
    8739             :     }
    8740      161041 :     unsigned char new_code = 0U;
    8741      161041 :     charinfo *cisrc = tok.get_charinfo();
    8742      161041 :     if (cisrc != 0 /* nullptr */)
    8743             :       // Common case: assign destination character the hyphenation code
    8744             :       // of the source character.
    8745      161041 :       new_code = cisrc->get_hyphenation_code();
    8746      161041 :     if ('\0' == csrc) {
    8747          25 :       if (0 /* nullptr */ == cisrc) {
    8748           0 :         error("expected ordinary, special, or indexed character,"
    8749           0 :               " got %1", tok.description());
    8750           0 :         break;
    8751             :       }
    8752          25 :       new_code = cisrc->get_hyphenation_code();
    8753             :     }
    8754             :     else {
    8755             :       // If assigning a ordinary character's hyphenation code to itself,
    8756             :       // use its character code point as the value.
    8757      161016 :       if (csrc == cdst)
    8758       49165 :         new_code = tok.ch();
    8759             :     }
    8760      161041 :     cidst->set_hyphenation_code(new_code);
    8761      161041 :     if (cidst->get_translation()
    8762      161041 :         && cidst->get_translation()->is_translatable_as_input())
    8763       95060 :       cidst->get_translation()->set_hyphenation_code(new_code);
    8764      161041 :     tok.next();
    8765      161041 :     tok.skip_spaces();
    8766             :   }
    8767      128375 :   skip_line();
    8768             : }
    8769             : 
    8770           0 : void hyphenation_patterns_file_code() // .hpfcode
    8771             : {
    8772           0 :   error("hyphenation pattern file code assignment request will be"
    8773             :         " withdrawn in a future groff release; migrate to 'hcode'");
    8774           0 :   if (!has_arg()) {
    8775           0 :     warning(WARN_MISSING, "hyphenation pattern file code assignment"
    8776             :             " request expects arguments");
    8777           0 :     skip_line();
    8778           0 :     return;
    8779             :   }
    8780           0 :   while (!tok.is_newline() && !tok.is_eof()) {
    8781             :     int n1, n2;
    8782           0 :     if (read_integer(&n1) && ((0 <= n1) && (n1 <= 255))) {
    8783           0 :       if (!has_arg()) {
    8784           0 :         error("missing output hyphenation code");
    8785           0 :         break;
    8786             :       }
    8787           0 :       if (read_integer(&n2) && ((0 <= n2) && (n2 <= 255))) {
    8788           0 :         hpf_code_table[n1] = n2;
    8789           0 :         tok.skip_spaces();
    8790             :       }
    8791             :       else {
    8792           0 :         error("output hyphenation code must be integer in the range 0..255");
    8793           0 :         break;
    8794             :       }
    8795             :     }
    8796             :     else {
    8797           0 :       error("input hyphenation code must be integer in the range 0..255");
    8798           0 :       break;
    8799             :     }
    8800             :   }
    8801           0 :   skip_line();
    8802             : }
    8803             : 
    8804             : dictionary char_class_dictionary(501);
    8805             : 
    8806          54 : static void define_class_request() // .class
    8807             : {
    8808          54 :   tok.skip_spaces();
    8809          54 :   symbol nm = read_identifier(true /* required */);
    8810          54 :   if (nm.is_null()) {
    8811           0 :     skip_line();
    8812           1 :     return;
    8813             :   }
    8814          54 :   charinfo *ci = lookup_charinfo(nm);
    8815             :   // Assign the charinfo an empty macro as a hack to record the
    8816             :   // file:line location of its definition.
    8817          54 :   macro *m = new macro;
    8818          54 :   (void) ci->set_macro(m);
    8819          54 :   charinfo *child1 = 0 /* nullptr */, *child2 = 0 /* nullptr */;
    8820          54 :   bool just_chained_a_range_expression = false;
    8821         742 :   while (!tok.is_newline() && !tok.is_eof()) {
    8822         689 :     tok.skip_spaces();
    8823             :     // Chained range expressions like
    8824             :     //   \[u3041]-\[u3096]-\[u30FF]
    8825             :     // are not valid.
    8826             :     // TODO: use grochar
    8827         689 :     if ((child1 != 0 /* nullptr */) && (tok.ch() == int('-'))) {
    8828          35 :       tok.next();
    8829          35 :       child2 = tok.get_charinfo();
    8830          35 :       if (0 /* nullptr */ == child2) {
    8831           0 :         warning(WARN_MISSING,
    8832             :                 "missing end of character range in class '%1'",
    8833           0 :                 nm.contents());
    8834           0 :         skip_line();
    8835           0 :         return;
    8836             :       }
    8837          35 :       if (child1->is_class() || child2->is_class()) {
    8838           0 :         warning(WARN_SYNTAX,
    8839             :                 "a nested character class is not allowed in a range"
    8840             :                 " definition");
    8841           0 :         skip_line();
    8842           0 :         return;
    8843             :       }
    8844          35 :       int u1 = child1->get_unicode_mapping();
    8845          35 :       int u2 = child2->get_unicode_mapping();
    8846          35 :       if (u1 < 0) {
    8847           0 :         warning(WARN_SYNTAX,
    8848             :                 "invalid start value in character range");
    8849           0 :         skip_line();
    8850           0 :         return;
    8851             :       }
    8852          35 :       if (u2 < 0) {
    8853           0 :         warning(WARN_SYNTAX,
    8854             :                 "invalid end value in character range");
    8855           0 :         skip_line();
    8856           0 :         return;
    8857             :       }
    8858          35 :       ci->add_to_class(u1, u2);
    8859          35 :       child1 = child2 = 0 /* nullptr */;
    8860          35 :       just_chained_a_range_expression = true;
    8861             :     }
    8862         654 :     else if (child1 != 0 /* nullptr */) {
    8863         582 :       if (child1->is_class()) {
    8864           0 :         if (ci == child1) {
    8865           0 :           warning(WARN_SYNTAX, "cannot nest character classes");
    8866           0 :           skip_line();
    8867           0 :           return;
    8868             :         }
    8869           0 :         ci->add_to_class(child1);
    8870             :       }
    8871             :       else {
    8872         582 :         int u1 = child1->get_unicode_mapping();
    8873         582 :         if (u1 < 0) {
    8874           0 :           warning(WARN_SYNTAX,
    8875             :                   "invalid character value in class '%1'",
    8876           0 :                   nm.contents());
    8877           0 :           skip_line();
    8878           0 :           return;
    8879             :         }
    8880         582 :         ci->add_to_class(u1);
    8881             :       }
    8882         582 :       child1 = 0 /* nullptr */;
    8883             :     }
    8884         689 :     if (tok.is_any_character())
    8885         688 :       child1 = tok.get_charinfo(true /* required */);
    8886             :     else
    8887             :       // If we encountered a space or nonsense, we cannot be
    8888             :       // interpreting a range expression; there should be no "child1".
    8889           1 :       assert(0 /* nullptr */ == child1);
    8890         689 :     tok.next();
    8891         689 :     if (0 /* nullptr */ == child1) {
    8892           1 :       if (!tok.is_newline())
    8893           1 :         skip_line();
    8894           1 :       break;
    8895             :     }
    8896         688 :     if (just_chained_a_range_expression) {
    8897             :       // Throw away `child1` so we don't duplicatively add the second
    8898             :       // end point of a range as a singleton.  See Savannah #67718.
    8899          35 :       child1 = 0 /* nullptr */;
    8900          35 :       just_chained_a_range_expression = false;
    8901             :     }
    8902             :   }
    8903          54 :   if (child1 != 0 /* nullptr */) {
    8904          36 :     if (child1->is_class()) {
    8905           0 :       if (ci == child1) {
    8906           0 :         warning(WARN_SYNTAX, "cannot nest character classes");
    8907           0 :         skip_line();
    8908           0 :         return;
    8909             :       }
    8910           0 :       ci->add_to_class(child1);
    8911             :     }
    8912             :     else {
    8913          36 :       int u1 = child1->get_unicode_mapping();
    8914          36 :       if (u1 < 0) {
    8915           1 :         warning(WARN_SYNTAX,
    8916             :                 "invalid character value in class '%1'",
    8917           1 :                 nm.contents());
    8918           1 :         skip_line();
    8919           1 :         return;
    8920             :       }
    8921          35 :       ci->add_to_class(u1);
    8922             :     }
    8923          35 :     child1 = 0 /* nullptr */;
    8924             :   }
    8925          53 :   assert(ci != 0 /* nullptr */);
    8926          53 :   if (ci != 0 /* nullptr */ && !ci->is_class()) {
    8927           0 :     warning(WARN_SYNTAX,
    8928             :             "empty class definition for '%1'",
    8929           0 :             nm.contents());
    8930           0 :     skip_line();
    8931           0 :     return;
    8932             :   }
    8933          53 :   (void) char_class_dictionary.lookup(nm, ci);
    8934          53 :   skip_line();
    8935             : }
    8936             : 
    8937             : // forward declaration
    8938             : static charinfo *get_charinfo_by_index(int n,
    8939             :                                        bool suppress_creation = false);
    8940             : 
    8941     1161889 : charinfo *token::get_charinfo(bool required, bool suppress_creation)
    8942             : {
    8943     1161889 :   if (TOKEN_CHAR == type)
    8944      383175 :     return charset_table[c];
    8945      778714 :   if ((TOKEN_SPECIAL_CHAR == type)
    8946       45036 :       || (TOKEN_DELIMITED_SPECIAL_CHAR == type))
    8947      733730 :     return lookup_charinfo(nm, suppress_creation);
    8948       44984 :   if (TOKEN_INDEXED_CHAR == type)
    8949       24934 :     return get_charinfo_by_index(val, suppress_creation);
    8950       20050 :   if (TOKEN_ESCAPE == type) {
    8951           1 :     if (escape_char != 0U)
    8952           1 :       return charset_table[escape_char];
    8953             :     else {
    8954             :       // XXX: Is this possible?  token::add_to_zero_width_node_list()
    8955             :       // and token::process() don't add this token type if the escape
    8956             :       // character is null.  If not, this should be an assert().  Also
    8957             :       // see escape_off_request().
    8958           0 :       error("escaped 'e' used while escape sequences disabled");
    8959           0 :       return 0 /* nullptr */;
    8960             :     }
    8961             :   }
    8962       20049 :   if (required) {
    8963           0 :     if (TOKEN_EOF == type || TOKEN_NEWLINE == type)
    8964           0 :       warning(WARN_MISSING, "missing ordinary, special, or indexed"
    8965             :                             " character");
    8966             :     else
    8967           0 :       error("expected ordinary, special, or indexed character, got %1",
    8968           0 :             description());
    8969             :   }
    8970       20049 :   return 0 /* nullptr */;
    8971             : }
    8972             : 
    8973        1798 : charinfo *read_character(/* TODO?: bool required */)
    8974             : {
    8975        1798 :   tok.skip_spaces();
    8976        1798 :   charinfo *ci = tok.get_charinfo();
    8977             :   // TODO?: if (required && (0 /* nullptr */ == ci))
    8978        1798 :   if (0 /* nullptr */ == ci)
    8979         517 :     tok.diagnose_non_character();
    8980             :   else
    8981        1281 :     tok.next();
    8982        1798 :   return ci;
    8983             : }
    8984             : 
    8985             : // this is for \Z
    8986             : 
    8987        4079 : bool token::add_to_zero_width_node_list(node **pp)
    8988             : {
    8989        4079 :   hunits w;
    8990        4079 :   int s = 0; /* space count, possibly populated by `nspaces()` */
    8991        4079 :   node *n = 0 /* nullptr */;
    8992        4079 :   switch (type) {
    8993        1777 :   case TOKEN_CHAR:
    8994        1777 :     *pp = (*pp)->add_char(charset_table[c], curenv, &w, &s);
    8995        1777 :     break;
    8996          85 :   case TOKEN_DUMMY:
    8997          85 :     n = new dummy_node;
    8998          85 :     break;
    8999           0 :   case TOKEN_ESCAPE:
    9000           0 :     if (escape_char != 0U)
    9001           0 :       *pp = (*pp)->add_char(charset_table[escape_char], curenv, &w, &s);
    9002           0 :     break;
    9003           0 :   case TOKEN_HYPHEN_INDICATOR:
    9004           0 :     *pp = (*pp)->add_discretionary_hyphen();
    9005           0 :     break;
    9006         421 :   case TOKEN_ITALIC_CORRECTION:
    9007         421 :     *pp = (*pp)->add_italic_correction(&w);
    9008         421 :     break;
    9009           0 :   case TOKEN_LEFT_BRACE:
    9010           0 :     break;
    9011           0 :   case TOKEN_MARK_INPUT:
    9012           0 :     set_register(nm, curenv->get_input_line_position().to_units());
    9013           0 :     break;
    9014        1555 :   case TOKEN_NODE:
    9015             :   case TOKEN_DELIMITED_HORIZONTAL_MOTION:
    9016             :   case TOKEN_HORIZONTAL_MOTION:
    9017        1555 :     n = nd;
    9018        1555 :     nd = 0 /* nullptr */;
    9019        1555 :     break;
    9020           0 :   case TOKEN_INDEXED_CHAR:
    9021           0 :     *pp = (*pp)->add_char(get_charinfo_by_index(val), curenv, &w, &s);
    9022           0 :     break;
    9023           0 :   case TOKEN_RIGHT_BRACE:
    9024           0 :     break;
    9025         123 :   case TOKEN_SPACE:
    9026         123 :     n = new hmotion_node(curenv->get_space_width(),
    9027         123 :                          curenv->get_fill_color());
    9028         123 :     break;
    9029         112 :   case TOKEN_SPECIAL_CHAR:
    9030             :   case TOKEN_DELIMITED_SPECIAL_CHAR:
    9031         112 :     *pp = (*pp)->add_char(lookup_charinfo(nm), curenv, &w, &s);
    9032         112 :     break;
    9033           0 :   case TOKEN_STRETCHABLE_SPACE:
    9034           0 :     n = new unbreakable_space_node(curenv->get_space_width(),
    9035           0 :                                    curenv->get_fill_color());
    9036           0 :     break;
    9037           6 :   case TOKEN_UNSTRETCHABLE_SPACE:
    9038           6 :     n = new space_char_hmotion_node(curenv->get_space_width(),
    9039           6 :                                     curenv->get_fill_color());
    9040           6 :     break;
    9041           0 :   case TOKEN_TRANSPARENT_DUMMY:
    9042           0 :     n = new transparent_dummy_node;
    9043           0 :     break;
    9044           0 :   case TOKEN_ZERO_WIDTH_BREAK:
    9045           0 :     n = new space_node(H0, curenv->get_fill_color());
    9046           0 :     n->freeze_space();
    9047           0 :     n->is_escape_colon();
    9048           0 :     break;
    9049           0 :   default:
    9050           0 :     return false;
    9051             :   }
    9052        4079 :   if (n != 0 /* nullptr */) {
    9053        1769 :     n->next = *pp;
    9054        1769 :     *pp = n;
    9055             :   }
    9056        4079 :   return true;
    9057             : }
    9058             : 
    9059    10300002 : void token::process()
    9060             : {
    9061    10300002 :   if (possibly_handle_first_page_transition())
    9062          14 :     return;
    9063    10299988 :   switch (type) {
    9064           0 :   case TOKEN_BACKSPACE:
    9065           0 :     curenv->add_node(new hmotion_node(-curenv->get_space_width(),
    9066           0 :                                       curenv->get_fill_color()));
    9067           0 :     break;
    9068     9261793 :   case TOKEN_CHAR:
    9069             :     // Optimize `curenv->add_char(get_charinfo())` for token type.
    9070     9261793 :     curenv->add_char(charset_table[c]);
    9071     9261793 :     break;
    9072       78856 :   case TOKEN_DUMMY:
    9073       78856 :     curenv->add_node(new dummy_node);
    9074       78856 :     break;
    9075           0 :   case TOKEN_EMPTY:
    9076           0 :     assert(0 == "unhandled empty token");
    9077             :     break;
    9078           0 :   case TOKEN_EOF:
    9079           0 :     assert(0 == "unhandled end-of-file token");
    9080             :     break;
    9081        5560 :   case TOKEN_ESCAPE:
    9082        5560 :     if (escape_char != 0U)
    9083        5560 :       curenv->add_char(charset_table[escape_char]);
    9084        5560 :     break;
    9085           0 :   case TOKEN_BEGIN_TRAP:
    9086             :   case TOKEN_END_TRAP:
    9087             :   case TOKEN_PAGE_EJECTOR:
    9088             :     // these are all handled in process_input_stack()
    9089           0 :     break;
    9090       82840 :   case TOKEN_HYPHEN_INDICATOR:
    9091       82840 :     curenv->add_hyphen_indicator();
    9092       82840 :     break;
    9093      112600 :   case TOKEN_INTERRUPT:
    9094      112600 :     curenv->interrupt();
    9095      112600 :     break;
    9096       33376 :   case TOKEN_ITALIC_CORRECTION:
    9097       33376 :     curenv->add_italic_correction();
    9098       33376 :     break;
    9099         281 :   case TOKEN_LEADER:
    9100         281 :     curenv->advance_to_tab_stop(true /* use_leader */);
    9101         281 :     break;
    9102           0 :   case TOKEN_LEFT_BRACE:
    9103           0 :     break;
    9104         480 :   case TOKEN_MARK_INPUT:
    9105         480 :     set_register(nm, curenv->get_input_line_position().to_units());
    9106         480 :     break;
    9107           0 :   case TOKEN_NEWLINE:
    9108           0 :     curenv->newline();
    9109           0 :     break;
    9110      466323 :   case TOKEN_NODE:
    9111             :   case TOKEN_DELIMITED_HORIZONTAL_MOTION:
    9112             :   case TOKEN_HORIZONTAL_MOTION:
    9113      466323 :     curenv->add_node(nd);
    9114      466323 :     nd = 0 /* nullptr */;
    9115      466323 :     break;
    9116       21640 :   case TOKEN_INDEXED_CHAR:
    9117             :     // Optimize `curenv->add_char(get_charinfo())` for token type.
    9118       21640 :     curenv->add_char(get_charinfo_by_index(val));
    9119       21640 :     break;
    9120           0 :   case TOKEN_REQUEST:
    9121             :     // handled in process_input_stack()
    9122           0 :     break;
    9123           0 :   case TOKEN_RIGHT_BRACE:
    9124           0 :     break;
    9125       94899 :   case TOKEN_SPACE:
    9126       94899 :     curenv->space();
    9127       94899 :     break;
    9128       57122 :   case TOKEN_SPECIAL_CHAR:
    9129             :   case TOKEN_DELIMITED_SPECIAL_CHAR:
    9130             :     // Optimize `curenv->add_char(get_charinfo())` for token type.
    9131       57122 :     curenv->add_char(lookup_charinfo(nm));
    9132       57122 :     break;
    9133          16 :   case TOKEN_SPREAD:
    9134          16 :     curenv->spread();
    9135          16 :     break;
    9136        8431 :   case TOKEN_STRETCHABLE_SPACE:
    9137       16862 :     curenv->add_node(new unbreakable_space_node(curenv->get_space_width(),
    9138        8431 :                                                 curenv->get_fill_color()));
    9139        8431 :     break;
    9140        5771 :   case TOKEN_UNSTRETCHABLE_SPACE:
    9141       11542 :     curenv->add_node(new space_char_hmotion_node(curenv->get_space_width(),
    9142        5771 :                                                  curenv->get_fill_color()));
    9143        5771 :     break;
    9144         817 :   case TOKEN_TAB:
    9145         817 :     curenv->advance_to_tab_stop();
    9146         817 :     break;
    9147           0 :   case TOKEN_TRANSPARENT:
    9148           0 :     break;
    9149       19742 :   case TOKEN_TRANSPARENT_DUMMY:
    9150       19742 :     curenv->add_node(new transparent_dummy_node);
    9151       19742 :     break;
    9152       49441 :   case TOKEN_ZERO_WIDTH_BREAK:
    9153             :     {
    9154       49441 :       node *tmp = new space_node(H0, curenv->get_fill_color());
    9155       49441 :       tmp->freeze_space();
    9156       49441 :       tmp->is_escape_colon();
    9157       49441 :       curenv->add_node(tmp);
    9158       49441 :       break;
    9159             :     }
    9160           0 :   default:
    9161           0 :     assert(0 == "unhandled token type");
    9162             :   }
    9163             : }
    9164             : 
    9165             : class nargs_reg : public reg {
    9166             : public:
    9167             :   const char *get_string();
    9168             : };
    9169             : 
    9170      725076 : const char *nargs_reg::get_string()
    9171             : {
    9172      725076 :   return i_to_a(input_stack::nargs());
    9173             : }
    9174             : 
    9175             : class lineno_reg : public reg {
    9176             : public:
    9177             :   const char *get_string();
    9178             : };
    9179             : 
    9180         121 : const char *lineno_reg::get_string()
    9181             : {
    9182             :   int line;
    9183             :   const char *file;
    9184         121 :   if (!input_stack::get_location(false /* allow macro */, &file, &line))
    9185           3 :     line = 0;
    9186         242 :   return i_to_a(line);
    9187             : }
    9188             : 
    9189             : class writable_lineno_reg : public general_reg {
    9190             : public:
    9191             :   writable_lineno_reg();
    9192             :   void set_value(units);
    9193             :   bool get_value(units *);
    9194             : };
    9195             : 
    9196        1418 : writable_lineno_reg::writable_lineno_reg()
    9197             : {
    9198        1418 : }
    9199             : 
    9200           1 : bool writable_lineno_reg::get_value(units *res)
    9201             : {
    9202             :   int line;
    9203             :   const char *file;
    9204           1 :   if (!input_stack::get_location(false /* allow macro */, &file, &line))
    9205           0 :     return false;
    9206           1 :   *res = line;
    9207           1 :   return true;
    9208             : }
    9209             : 
    9210           0 : void writable_lineno_reg::set_value(units n)
    9211             : {
    9212           0 :   (void) input_stack::set_location(0, n);
    9213           0 : }
    9214             : 
    9215             : class filename_reg : public reg {
    9216             : public:
    9217             :   const char *get_string();
    9218             : };
    9219             : 
    9220        1630 : const char *filename_reg::get_string()
    9221             : {
    9222             :   int line;
    9223             :   const char *file;
    9224        1630 :   if (input_stack::get_location(false /* allow macro */, &file, &line))
    9225        1312 :     return file;
    9226             :   else
    9227         318 :     return 0 /* nullptr */;
    9228             : }
    9229             : 
    9230             : class break_flag_reg : public reg {
    9231             : public:
    9232             :   const char *get_string();
    9233             : };
    9234             : 
    9235          22 : const char *break_flag_reg::get_string()
    9236             : {
    9237          22 :   return i_to_a(input_stack::get_break_flag());
    9238             : }
    9239             : 
    9240             : class enclosing_want_att_compat_reg : public reg {
    9241             : public:
    9242             :   const char *get_string();
    9243             : };
    9244             : 
    9245       10411 : const char *enclosing_want_att_compat_reg::get_string()
    9246             : {
    9247       20820 :   return i_to_a(want_att_compat_stack.empty() ? 0
    9248       20820 :                 : want_att_compat_stack.top());
    9249             : }
    9250             : 
    9251             : class readonly_text_register : public reg {
    9252             :   const char *s;
    9253             : public:
    9254             :   readonly_text_register(const char *);
    9255             :   readonly_text_register(int);
    9256             :   const char *get_string();
    9257             : };
    9258             : 
    9259        4254 : readonly_text_register::readonly_text_register(const char *p) : s(p)
    9260             : {
    9261        4254 : }
    9262             : 
    9263        4254 : readonly_text_register::readonly_text_register(int i)
    9264             : {
    9265        4254 :   s = strdup(i_to_a(i));
    9266        4254 : }
    9267             : 
    9268       11950 : const char *readonly_text_register::get_string()
    9269             : {
    9270       11950 :   return s;
    9271             : }
    9272             : 
    9273        5672 : readonly_register::readonly_register(int *q) : p(q)
    9274             : {
    9275        5672 : }
    9276             : 
    9277        7252 : const char *readonly_register::get_string()
    9278             : {
    9279        7252 :   return i_to_a(*p);
    9280             : }
    9281             : 
    9282        8508 : readonly_boolean_register::readonly_boolean_register(bool *q): p(q)
    9283             : {
    9284        8508 : }
    9285             : 
    9286        2138 : const char *readonly_boolean_register::get_string()
    9287             : {
    9288        2138 :   return i_to_a(*p);
    9289             : }
    9290             : 
    9291             : class readonly_mask_register : public reg {
    9292             :   unsigned int *mask;
    9293             : public:
    9294             :   readonly_mask_register(unsigned int *);
    9295             :   const char *get_string();
    9296             : };
    9297             : 
    9298        1418 : readonly_mask_register::readonly_mask_register(unsigned int * m)
    9299        1418 : : mask(m)
    9300             : {
    9301        1418 : }
    9302             : 
    9303         369 : const char *readonly_mask_register::get_string()
    9304             : {
    9305         369 :   return ui_to_a(*mask);
    9306             : }
    9307             : 
    9308          14 : void abort_request()
    9309             : {
    9310             :   int c;
    9311          14 :   if (tok.is_eof())
    9312           0 :     c = EOF;
    9313          14 :   else if (tok.is_newline())
    9314          13 :     c = '\n';
    9315             :   else {
    9316           1 :     while ((c = read_char_in_copy_mode(0 /* nullptr */)) == ' ')
    9317             :       ;
    9318             :   }
    9319          14 :   if (!(c == EOF || c == '\n')) {
    9320           3 :     for (;
    9321           4 :          (c != '\n') && (c != EOF);
    9322           3 :          (c = read_char_in_copy_mode(0 /* nullptr */)))
    9323           3 :       fputs(encode_for_stream_output(c), stderr);
    9324           1 :     fputc('\n', stderr);
    9325             :   }
    9326          14 :   fflush(stderr);
    9327          14 :   write_any_trailer_and_exit(EXIT_FAILURE);
    9328           0 : }
    9329             : 
    9330             : // Consume the rest of the input line in copy mode and return it as a C
    9331             : // string; if, after spaces, the argument starts with a `"`, discard it,
    9332             : // letting any immediately subsequent spaces populate the returned
    9333             : // string.
    9334             : //
    9335             : // The caller must subsequently call `tok.next()` to advance the input
    9336             : // stream pointer.
    9337             : //
    9338             : // The caller has responsibility for `delete`ing the returned buffer.
    9339       16796 : char *read_rest_of_line_as_argument()
    9340             : {
    9341       16796 :   int buf_size = 256;
    9342       16796 :   char *s = new char[buf_size]; // C++03: new char[buf_size]();
    9343       16796 :   (void) memset(s, 0, (buf_size * sizeof(char)));
    9344       16796 :   int c = read_char_in_copy_mode(0 /* nullptr */);
    9345       16796 :   while (' ' == c)
    9346           0 :     c = read_char_in_copy_mode(0 /* nullptr */);
    9347       16796 :   if ('"' == c)
    9348        1391 :     c = read_char_in_copy_mode(0 /* nullptr */);
    9349       16796 :   int i = 0;
    9350      209619 :   while ((c != '\n') && (c != EOF)) {
    9351      192823 :     if (!is_invalid_input_char(c)) {
    9352      192823 :       if ((i + 2) > buf_size) {
    9353           0 :         char *tem = s;
    9354           0 :         s = new char[buf_size * 2]; // C++03: new char[buf_size * 2]();
    9355           0 :         (void) memset(s, 0, (buf_size * 2 * sizeof(char)));
    9356           0 :         memcpy(s, tem, buf_size);
    9357           0 :         buf_size *= 2;
    9358           0 :         delete[] tem;
    9359             :       }
    9360      192823 :       s[i++] = c;
    9361             :     }
    9362      192823 :     c = read_char_in_copy_mode(0 /* nullptr */);
    9363             :   }
    9364       16796 :   s[i] = '\0';
    9365       16796 :   if (0 == i) {
    9366           3 :     delete[] s;
    9367           3 :     return 0 /* nullptr */;
    9368             :   }
    9369       16793 :   return s;
    9370             : }
    9371             : 
    9372           2 : void pipe_output()
    9373             : {
    9374           2 :   if (!has_arg(true /* peek */)) {
    9375           0 :     warning(WARN_MISSING, "output piping request expects a system"
    9376             :             " command as argument");
    9377           0 :     skip_line();
    9378           0 :     return;
    9379             :   }
    9380           2 :   if (!want_unsafe_requests) {
    9381           0 :     error("output piping request is not allowed in safer mode");
    9382           0 :     skip_line();
    9383           0 :     return;
    9384             :   }
    9385           2 :   if (the_output != 0 /* nullptr */) {
    9386           0 :     error("cannot honor pipe request: output already started");
    9387           0 :     skip_line();
    9388           0 :     return;
    9389             :   }
    9390           2 :   char *pc = read_rest_of_line_as_argument();
    9391             :   // `has_arg()` should have ensured that this pointer is non-null.
    9392           2 :   assert(pc != 0 /* nullptr */);
    9393           2 :   if (0 /* nullptr */ == pc)
    9394           0 :     error("cannot apply pipe request to empty command");
    9395             :   // Are we adding to an existing pipeline?
    9396           2 :   if (pipe_command != 0 /* nullptr */) {
    9397             :     // ISO C++ does not permit VLAs on the stack.
    9398             :     // C++03: new char[strlen(pipe_command) + strlen(pc) + 1 + 1]();
    9399           0 :     char *s = new char[strlen(pipe_command) + strlen(pc) + 1 + 1];
    9400           0 :     (void) memset(s, 0, ((strlen(pipe_command) + strlen(pc) + 1 + 1)
    9401             :                          * sizeof(char)));
    9402           0 :     strcpy(s, pipe_command);
    9403           0 :     strcat(s, "|");
    9404           0 :     strcat(s, pc);
    9405           0 :     delete[] pipe_command;
    9406           0 :     delete[] pc;
    9407           0 :     pipe_command = s;
    9408             :   }
    9409             :   else
    9410           2 :     pipe_command = pc;
    9411           2 :   delete[] pc;
    9412           2 :   tok.next();
    9413             : }
    9414             : 
    9415             : static int system_status;
    9416             : 
    9417           8 : void system_request()
    9418             : {
    9419           8 :   if (!has_arg(true /* peek */)) {
    9420           0 :     warning(WARN_MISSING, "system command execution request expects a"
    9421             :             " system command as argument");
    9422           0 :     skip_line();
    9423           0 :     return;
    9424             :   }
    9425           8 :   if (!want_unsafe_requests) {
    9426           2 :     error("system command execution request is not allowed in safer"
    9427             :           " mode");
    9428           2 :     skip_line();
    9429           2 :     return;
    9430             :   }
    9431           6 :   char *command = read_rest_of_line_as_argument();
    9432             :   // `has_arg()` should have ensured that this pointer is non-null.
    9433           6 :   assert(command != 0 /* nullptr */);
    9434           6 :   if (0 /* nullptr */ == command)
    9435           0 :     error("cannot apply system request to empty command");
    9436             :   else
    9437           6 :     system_status = system(command);
    9438           6 :   delete[] command;
    9439           6 :   tok.next();
    9440             : }
    9441             : 
    9442          15 : static void unsafe_transparent_throughput_file_request()
    9443             : {
    9444          15 :   if (!has_arg(true /* peek */)) {
    9445           0 :     warning(WARN_MISSING, "file throughput request expects a file name"
    9446             :             " as argument");
    9447           0 :     skip_line();
    9448           0 :     return;
    9449             :   }
    9450          15 :   if (!want_unsafe_requests) {
    9451          14 :     error("file throughput request is not allowed in safer mode");
    9452          14 :     skip_line();
    9453          14 :     return;
    9454             :   }
    9455           1 :   if ((curdiv == topdiv) && (topdiv->before_first_page_status > 0)) {
    9456           0 :     handle_initial_request(COPY_FILE_REQUEST);
    9457           0 :     return;
    9458             :   }
    9459           1 :   char *filename = read_rest_of_line_as_argument();
    9460           1 :   if (was_invoked_with_regular_control_character)
    9461           1 :     curenv->do_break();
    9462           1 :   if (filename != 0 /* nullptr */)
    9463           1 :     curdiv->copy_file(filename);
    9464             :   // TODO: Add `filename` to file name set.
    9465           1 :   tok.next();
    9466             : }
    9467             : 
    9468             : #ifdef COLUMN
    9469             : 
    9470             : void vjustify()
    9471             : {
    9472             :   if (!has_arg()) {
    9473             :     warning(WARN_MISSING, "vertical adjustment request expects an"
    9474             :             " argument");
    9475             :     skip_line();
    9476             :     return;
    9477             :   }
    9478             :   if (curdiv == topdiv && topdiv->before_first_page) {
    9479             :     handle_initial_request(VJUSTIFY_REQUEST);
    9480             :     return;
    9481             :   }
    9482             :   symbol type = read_long_identifier(true /* required */);
    9483             :   if (!type.is_null())
    9484             :     curdiv->vjustify(type);
    9485             :   skip_line();
    9486             : }
    9487             : 
    9488             : #endif /* COLUMN */
    9489             : 
    9490           4 : static void transparent_throughput_file_request()
    9491             : {
    9492           4 :   if (!has_arg(true /* peek */)) {
    9493           0 :     warning(WARN_MISSING, "transparent file throughput request expects"
    9494             :             " a file name as argument");
    9495           0 :     skip_line();
    9496           0 :     return;
    9497             :   }
    9498           4 :   if ((curdiv == topdiv) && (topdiv->before_first_page_status > 0)) {
    9499           0 :     handle_initial_request(TRANSPARENT_FILE_REQUEST);
    9500           0 :     return;
    9501             :   }
    9502           4 :   char *filename = read_rest_of_line_as_argument();
    9503           4 :   if (was_invoked_with_regular_control_character)
    9504           3 :     curenv->do_break();
    9505           4 :   if (filename != 0 /* nullptr */) {
    9506           4 :     errno = 0;
    9507           4 :     FILE *fp = include_search_path.open_file_cautiously(filename);
    9508           4 :     if (0 /* nullptr */ == fp)
    9509           0 :       error("cannot open '%1': %2", filename, strerror(errno));
    9510             :     else {
    9511           4 :       if (curdiv != topdiv)
    9512           2 :         curdiv->copy_file(filename);
    9513             :       else {
    9514           2 :         bool reading_beginning_of_input_line = true;
    9515             :         for (;;) {
    9516          66 :           int c = getc(fp);
    9517          66 :           if (c == EOF)
    9518           2 :             break;
    9519          64 :           if (is_invalid_input_char(c))
    9520           0 :             warning(WARN_INPUT, "invalid input character code %1",
    9521           0 :                     int(c));
    9522             :           else {
    9523          64 :             curdiv->transparent_output(c);
    9524          64 :             reading_beginning_of_input_line = c == '\n';
    9525             :           }
    9526          64 :         }
    9527           2 :         if (!reading_beginning_of_input_line)
    9528           0 :           curdiv->transparent_output('\n');
    9529           2 :         fclose(fp);
    9530             :       }
    9531             :     }
    9532             :   }
    9533           4 :   tok.next();
    9534             : }
    9535             : 
    9536             : class page_range {
    9537             :   int first;
    9538             :   int last;
    9539             : public:
    9540             :   page_range *next;
    9541             :   page_range(int, int, page_range *);
    9542             :   int contains(int n);
    9543             : };
    9544             : 
    9545           0 : page_range::page_range(int i, int j, page_range *p)
    9546           0 : : first(i), last(j), next(p)
    9547             : {
    9548           0 : }
    9549             : 
    9550           0 : int page_range::contains(int n)
    9551             : {
    9552           0 :   return n >= first && (last <= 0 || n <= last);
    9553             : }
    9554             : 
    9555             : page_range *output_page_list = 0 /* nullptr */;
    9556             : 
    9557        2655 : bool in_output_page_list(int n)
    9558             : {
    9559        2655 :   if (!output_page_list)
    9560        2655 :     return true;
    9561           0 :   for (page_range *p = output_page_list;
    9562           0 :        p != 0 /* nullptr */;
    9563           0 :        p = p->next)
    9564           0 :     if (p->contains(n))
    9565           0 :       return true;
    9566           0 :   return false;
    9567             : }
    9568             : 
    9569           0 : static void parse_output_page_list(const char *p)
    9570             : {
    9571           0 :   const char *pstart = p; // for diagnostic message
    9572             :   for (;;) {
    9573             :     int i;
    9574           0 :     if (*p == '-')
    9575           0 :       i = 1;
    9576           0 :     else if (csdigit(*p)) {
    9577           0 :       i = 0;
    9578           0 :       do
    9579           0 :         i = i*10 + *p++ - '0';
    9580           0 :       while (csdigit(*p));
    9581             :     }
    9582             :     else
    9583           0 :       break;
    9584             :     int j;
    9585           0 :     if (*p == '-') {
    9586           0 :       p++;
    9587           0 :       j = 0;
    9588           0 :       if (csdigit(*p)) {
    9589           0 :         do
    9590           0 :           j = j*10 + *p++ - '0';
    9591           0 :         while (csdigit(*p));
    9592             :       }
    9593             :     }
    9594             :     else
    9595           0 :       j = i;
    9596           0 :     if (j == 0)
    9597           0 :       last_page_number = -1;
    9598           0 :     else if (last_page_number >= 0 && j > last_page_number)
    9599           0 :       last_page_number = j;
    9600           0 :     output_page_list = new page_range(i, j, output_page_list);
    9601           0 :     if (*p != ',')
    9602           0 :       break;
    9603           0 :     ++p;
    9604           0 :   }
    9605           0 :   if (*p != '\0') {
    9606           0 :     error("ignoring invalid output page list argument '%1'", pstart);
    9607           0 :     output_page_list = 0 /* nullptr */;
    9608             :   }
    9609           0 : }
    9610             : 
    9611         509 : static FILE *open_macro_package(const char *mac, char **path)
    9612             : {
    9613             :   // Try `mac`.tmac first, then tmac.`mac`.  Expect ENOENT errors.
    9614             :   // ISO C++ does not permit VLAs on the stack.
    9615             :   // C++03: new char[strlen(mac) + strlen(MACRO_POSTFIX) + 1]();
    9616         509 :   char *s1 = new char[strlen(mac) + strlen(MACRO_POSTFIX) + 1];
    9617         509 :   (void) memset(s1, 0, ((strlen(mac) + strlen(MACRO_POSTFIX) + 1)
    9618             :                         * sizeof(char)));
    9619         509 :   strcpy(s1, mac);
    9620         509 :   strcat(s1, MACRO_POSTFIX);
    9621         509 :   FILE *fp = mac_path->open_file(s1, path);
    9622         509 :   if ((0 /* nullptr */ == fp) && (ENOENT != errno))
    9623           0 :     error("cannot open macro file '%1': %2", s1, strerror(errno));
    9624         509 :   delete[] s1;
    9625         509 :   if (0 /* nullptr */ == fp) {
    9626             :     // ISO C++ does not permit VLAs on the stack.
    9627             :     // C++03: new char[strlen(mac) + strlen(MACRO_PREFIX) + 1]();
    9628           0 :     char *s2 = new char[strlen(mac) + strlen(MACRO_PREFIX) + 1];
    9629           0 :     (void) memset(s2, 0, ((strlen(mac) + strlen(MACRO_PREFIX) + 1)
    9630             :                           * sizeof(char)));
    9631           0 :     strcpy(s2, MACRO_PREFIX);
    9632           0 :     strcat(s2, mac);
    9633           0 :     fp = mac_path->open_file(s2, path);
    9634           0 :     if ((0 /* nullptr */ == fp) && (ENOENT != errno))
    9635           0 :       error("cannot open macro file '%1': %2", s2, strerror(errno));
    9636           0 :     delete[] s2;
    9637             :   }
    9638         509 :   return fp;
    9639             : }
    9640             : 
    9641         509 : static void process_macro_package_argument(const char *mac)
    9642             : {
    9643             :   char *path;
    9644         509 :   FILE *fp = open_macro_package(mac, &path);
    9645         509 :   if (0 /* nullptr */ == fp)
    9646           0 :     fatal("cannot open macro file named in '-m' command-line argument"
    9647           0 :           " '%1': %2", mac, strerror(errno));
    9648         509 :   const char *s = symbol(path).contents();
    9649         509 :   free(path);
    9650         509 :   input_stack::push(new file_iterator(fp, s));
    9651         509 :   tok.next();
    9652         509 :   process_input_stack();
    9653         509 : }
    9654             : 
    9655        2836 : static void process_startup_file(const char *filename)
    9656             : {
    9657             :   char *path;
    9658        2836 :   search_path *orig_mac_path = mac_path;
    9659        2836 :   mac_path = &config_macro_path;
    9660        2836 :   FILE *fp = mac_path->open_file(filename, &path);
    9661        2836 :   if (fp != 0 /* nullptr */) {
    9662        2836 :     input_stack::push(new file_iterator(fp, symbol(path).contents()));
    9663        2836 :     free(path);
    9664        2836 :     tok.next();
    9665        2836 :     process_input_stack();
    9666             :   }
    9667           0 :   else if (errno != ENOENT)
    9668           0 :     error("cannot open startup file '%1': %2", filename,
    9669           0 :           strerror(errno));
    9670        2836 :   mac_path = orig_mac_path;
    9671        2836 : }
    9672             : 
    9673       12287 : void do_macro_source(bool quietly)
    9674             : {
    9675       12287 :   char *macro_filename = read_rest_of_line_as_argument();
    9676             :   char *path;
    9677       12287 :   FILE *fp = mac_path->open_file(macro_filename, &path);
    9678       12287 :   if (fp != 0 /* nullptr */) {
    9679       12231 :     input_stack::push(new file_iterator(fp, macro_filename));
    9680       12231 :     free(path);
    9681             :   }
    9682             :   else
    9683             :     // Suppress diagnostic only if we're operating quietly and it's an
    9684             :     // expected problem.
    9685          56 :     if (!quietly && (ENOENT == errno))
    9686           0 :       warning(WARN_FILE, "cannot open macro file '%1': %2",
    9687           0 :               macro_filename, strerror(errno));
    9688             :   // TODO: Add `macro_filename` to file name set.
    9689       12287 :   tok.next();
    9690       12287 : }
    9691             : 
    9692       12000 : void macro_source_request() // .mso
    9693             : {
    9694       12000 :   if (!has_arg(true /* peek */)) {
    9695           0 :     warning(WARN_MISSING, "macro file sourcing request expects an"
    9696             :             " argument");
    9697           0 :     skip_line();
    9698           0 :     return;
    9699             :   }
    9700       12000 :   do_macro_source(false /* quietly */ );
    9701             : }
    9702             : 
    9703             : // like .mso, but silently ignore files that can't be opened due to
    9704             : // their nonexistence
    9705         287 : void macro_source_quietly_request() // .msoquiet
    9706             : {
    9707         287 :   if (!has_arg(true /* peek */)) {
    9708           0 :     warning(WARN_MISSING, "quiet macro file sourcing request expects an"
    9709             :             " argument");
    9710           0 :     skip_line();
    9711           0 :     return;
    9712             :   }
    9713         287 :   do_macro_source(true /* quietly */ );
    9714             : }
    9715             : 
    9716        1422 : static void process_input_file(const char *name)
    9717             : {
    9718             :   FILE *fp;
    9719        1422 :   if (strcmp(name, "-") == 0) {
    9720        1396 :     clearerr(stdin);
    9721        1396 :     fp = stdin;
    9722             :   }
    9723             :   else {
    9724          26 :     errno = 0;
    9725          26 :     fp = include_search_path.open_file_cautiously(name);
    9726          26 :     if (0 /* nullptr */ == fp)
    9727           0 :       fatal("cannot open '%1': %2", name, strerror(errno));
    9728             :   }
    9729        1422 :   input_stack::push(new file_iterator(fp, name));
    9730        1422 :   tok.next();
    9731        1422 :   process_input_stack();
    9732        1407 : }
    9733             : 
    9734             : // make sure the_input is empty before calling this
    9735             : 
    9736         186 : static int evaluate_expression(const char *expr, units *res)
    9737             : {
    9738         186 :   input_stack::push(make_temp_iterator(expr));
    9739         186 :   tok.next();
    9740             :   // TODO: grochar
    9741         186 :   int success = read_measurement(res, (unsigned char)('u'));
    9742         186 :   while (input_stack::get(0 /* nullptr */) != EOF)
    9743             :     ;
    9744         186 :   return success;
    9745             : }
    9746             : 
    9747         186 : static void do_register_assignment(const char *s)
    9748             : {
    9749         186 :   const char *p = strchr(s, '=');
    9750         186 :   if (!p) {
    9751             :     char buf[2];
    9752          75 :     buf[0] = s[0];
    9753          75 :     buf[1] = 0;
    9754             :     units n;
    9755          75 :     if (evaluate_expression(s + 1, &n))
    9756          75 :       set_register(buf, n);
    9757             :   }
    9758             :   else {
    9759             :     // ISO C++ does not permit VLAs on the stack.
    9760         111 :     char *buf = new char[p - s + 1]; // C++03: new char[p - s + 1]();
    9761         111 :     (void) memset(buf, 0, ((p - s + 1) * sizeof(char)));
    9762         111 :     (void) memcpy(buf, s, p - s);
    9763         111 :     buf[p - s] = 0;
    9764             :     units n;
    9765         111 :     if (evaluate_expression(p + 1, &n))
    9766         111 :       set_register(buf, n);
    9767         111 :     delete[] buf;
    9768             :   }
    9769         186 : }
    9770             : 
    9771        1526 : static void set_string(const char *name, const char *value)
    9772             : {
    9773        1526 :   macro *m = new macro;
    9774        8676 :   for (const char *p = value; *p != 0 /* nullptr */; p++)
    9775        7150 :     if (!is_invalid_input_char(static_cast<unsigned char>(*p)))
    9776        7150 :       m->append(*p);
    9777        1526 :   request_dictionary.define(name, m);
    9778        1526 : }
    9779             : 
    9780         108 : static void do_string_assignment(const char *s)
    9781             : {
    9782         108 :   const char *p = strchr(s, '=');
    9783         108 :   if (!p) {
    9784             :     char buf[2];
    9785           1 :     buf[0] = s[0];
    9786           1 :     buf[1] = 0;
    9787           1 :     set_string(buf, s + 1);
    9788             :   }
    9789             :   else {
    9790             :     // ISO C++ does not permit VLAs on the stack.
    9791         107 :     char *buf = new char[p - s + 1]; // C++03: new char[p - s + 1]();
    9792         107 :     (void) memset(buf, 0, ((p - s + 1) * sizeof(char)));
    9793         107 :     (void) memcpy(buf, s, p - s);
    9794         107 :     buf[p - s] = 0;
    9795         107 :     set_string(buf, p + 1);
    9796         107 :     delete[] buf;
    9797             :   }
    9798         108 : }
    9799             : 
    9800             : struct string_list {
    9801             :   const char *s;
    9802             :   string_list *next;
    9803         805 :   string_list(const char *ss) : s(ss), next(0) {}
    9804             : };
    9805             : 
    9806             : #if 0
    9807             : static void prepend_string(const char *s, string_list **p)
    9808             : {
    9809             :   string_list *l = new string_list(s);
    9810             :   l->next = *p;
    9811             :   *p = l;
    9812             : }
    9813             : #endif
    9814             : 
    9815         944 : static void add_string(const char *s, string_list **p)
    9816             : {
    9817         944 :   while (*p)
    9818         139 :     p = &((*p)->next);
    9819         805 :   *p = new string_list(s);
    9820         805 : }
    9821             : 
    9822           0 : void usage(FILE *stream, const char *prog)
    9823             : {
    9824           0 :   fprintf(stream,
    9825             : "usage: %s [-abcCEiRSUz] [-d ctext] [-d string=text] [-f font-family]"
    9826             : " [-F font-directory] [-I inclusion-directory] [-m macro-package]"
    9827             : " [-M macro-directory] [-n page-number] [-o page-list]"
    9828             : " [-r cnumeric-expression] [-r register=numeric-expression]"
    9829             : " [-T output-device] [-w warning-category] [-W warning-category]"
    9830             : " [file ...]\n"
    9831             : "usage: %s {-v | --version}\n"
    9832             : "usage: %s --help\n",
    9833             :           prog, prog, prog);
    9834           0 :   if (stdout == stream)
    9835           0 :     fputs(
    9836             : "\n"
    9837             : "GNU troff transforms groff(7) language input into the device-\n"
    9838             : "independent page description language detailed in groff_out(5); it\n"
    9839             : "is the heart of the GNU roff document formatting system.  Many\n"
    9840             : "people prefer to use the groff(1) command, a front end that also\n"
    9841             : "runs preprocessors and output drivers in the appropriate order and\n"
    9842             : "with appropriate options.  See the troff(1) manual page.\n",
    9843             :           stream);
    9844           0 : }
    9845             : 
    9846        1420 : int main(int argc, char **argv)
    9847             : {
    9848        1420 :   program_name = argv[0];
    9849             :   static char stderr_buf[BUFSIZ];
    9850        1420 :   setbuf(stderr, stderr_buf);
    9851             :   int c;
    9852        1420 :   string_list *macros = 0 /* nullptr */;
    9853        1420 :   string_list *register_assignments = 0 /* nullptr */;
    9854        1420 :   string_list *string_assignments = 0 /* nullptr */;
    9855        1420 :   bool want_stdin_read_last = false;
    9856        1420 :   bool have_explicit_device_argument = false;
    9857        1420 :   bool have_explicit_default_family = false;
    9858        1420 :   bool have_explicit_first_page_number = false;
    9859        1420 :   bool want_startup_macro_files_skipped = false;
    9860        1420 :   bool is_safer_mode_locked = false; // made true if `-S` explicit
    9861        1420 :   int next_page_number = 0;     // pacify compiler
    9862        1420 :   hresolution = vresolution = 1;
    9863        1420 :   if (getenv("GROFF_DUMP_NODES") != 0 /* nullptr */)
    9864           0 :     want_nodes_dumped = true;
    9865             :   // restore $PATH if called from groff
    9866        1420 :   char* groff_path = getenv("GROFF_PATH__");
    9867        1420 :   if (groff_path != 0 /* nullptr */) {
    9868        2840 :     string e = "PATH";
    9869        1420 :     e += '=';
    9870        1420 :     if (*groff_path)
    9871        1420 :       e += groff_path;
    9872        1420 :     e += '\0';
    9873        1420 :     if (putenv(strsave(e.contents())) != 0)
    9874           0 :       fatal("cannot update process environment: %1", strerror(errno));
    9875             :   }
    9876        1420 :   setlocale(LC_CTYPE, "");
    9877             :   static const struct option long_options[] = {
    9878             :     { "help", no_argument, 0 /* nullptr */, CHAR_MAX + 1 },
    9879             :     { "version", no_argument, 0 /* nullptr */, 'v' },
    9880             :     { 0, 0, 0, 0 }
    9881             :   };
    9882             : #if defined(DEBUGGING)
    9883             : #define DEBUG_OPTION "D"
    9884             : #else
    9885             : #define DEBUG_OPTION ""
    9886             : #endif
    9887        5145 :   while ((c = getopt_long(argc, argv,
    9888             :                           ":abcCd:Ef:F:iI:m:M:n:o:qr:Rs:StT:Uvw:W:z"
    9889             :                           DEBUG_OPTION,
    9890             :                           long_options, 0 /* nullptr */))
    9891        5145 :          != EOF)
    9892        3727 :     switch (c) {
    9893           2 :     case 'v':
    9894             :       {
    9895           2 :         printf("GNU troff (groff) version %s\n", Version_string);
    9896           2 :         exit(EXIT_SUCCESS);
    9897             :         break;
    9898             :       }
    9899          26 :     case 'I':
    9900             :       // Search path for .psbb files
    9901             :       // and most other non-system input files.
    9902          26 :       include_search_path.command_line_dir(optarg);
    9903          26 :       break;
    9904        1418 :     case 'T':
    9905        1418 :       device = optarg;
    9906        1418 :       have_explicit_device_argument = true;
    9907        1418 :       is_writing_html = (strcmp(device, "html") == 0);
    9908        1418 :       break;
    9909         317 :     case 'C':
    9910         317 :       want_att_compat = true;
    9911             :       // fall through
    9912         317 :     case 'c':
    9913         317 :       permit_color_output = false;
    9914         317 :       break;
    9915         146 :     case 'M':
    9916         146 :       macro_path.command_line_dir(optarg);
    9917         146 :       safer_macro_path.command_line_dir(optarg);
    9918         146 :       config_macro_path.command_line_dir(optarg);
    9919         146 :       break;
    9920           0 :     case 'F':
    9921           0 :       font::command_line_font_dir(optarg);
    9922           0 :       break;
    9923         511 :     case 'm':
    9924         511 :       add_string(optarg, &macros);
    9925         511 :       break;
    9926           0 :     case 'E':
    9927           0 :       want_errors_inhibited = true;
    9928           0 :       break;
    9929           0 :     case 'R':
    9930           0 :       want_startup_macro_files_skipped = true;
    9931           0 :       break;
    9932         749 :     case 'w':
    9933         749 :       enable_warning(optarg);
    9934         749 :       break;
    9935          29 :     case 'W':
    9936          29 :       disable_warning(optarg);
    9937          29 :       break;
    9938           0 :     case 'i':
    9939           0 :       want_stdin_read_last = true;
    9940           0 :       break;
    9941          79 :     case 'b':
    9942          79 :       want_backtraces = true;
    9943          79 :       break;
    9944          47 :     case 'a':
    9945          47 :       want_abstract_output = true;
    9946          47 :       break;
    9947          87 :     case 'z':
    9948          87 :       want_output_suppressed = true;
    9949          87 :       break;
    9950           0 :     case 'n':
    9951           0 :       if (sscanf(optarg, "%d", &next_page_number) == 1)
    9952           0 :         have_explicit_first_page_number = true;
    9953             :       else
    9954           0 :         error("bad page number");
    9955           0 :       break;
    9956           0 :     case 'o':
    9957           0 :       parse_output_page_list(optarg);
    9958           0 :       break;
    9959         108 :     case 'd':
    9960         108 :       if (*optarg == '\0')
    9961           0 :         error("'-d' requires non-empty argument");
    9962         108 :       else if (*optarg == '=')
    9963           0 :         error("malformed argument to '-d'; string name cannot be empty"
    9964             :               " or contain an equals sign");
    9965             :       else
    9966         108 :         add_string(optarg, &string_assignments);
    9967         108 :       break;
    9968         186 :     case 'r':
    9969         186 :       if (*optarg == '\0')
    9970           0 :         error("'-r' requires non-empty argument");
    9971         186 :       else if (*optarg == '=')
    9972           0 :         error("malformed argument to '-r'; register name cannot be"
    9973             :               " empty or contain an equals sign");
    9974             :       else
    9975         186 :         add_string(optarg, &register_assignments);
    9976         186 :       break;
    9977           0 :     case 'f':
    9978           0 :       default_family = symbol(optarg);
    9979           0 :       have_explicit_default_family = true;
    9980           0 :       break;
    9981           0 :     case 'q':
    9982             :     case 's':
    9983             :     case 't':
    9984             :       // silently ignore these
    9985           0 :       break;
    9986           1 :     case 'S':
    9987           1 :       want_unsafe_requests = false;
    9988           1 :       is_safer_mode_locked = true;
    9989           1 :       break;
    9990          21 :     case 'U':
    9991          21 :       if (is_safer_mode_locked)
    9992           0 :         error("ignoring '-U' option; '-S' already specified");
    9993             :       else
    9994          21 :         want_unsafe_requests = true;
    9995          21 :       break;
    9996             : #if defined(DEBUGGING)
    9997             :     case 'D':
    9998             :       want_html_debugging = true;
    9999             :       break;
   10000             : #endif
   10001           0 :     case CHAR_MAX + 1: // --help
   10002           0 :       usage(stdout, argv[0]);
   10003           0 :       exit(EXIT_SUCCESS);
   10004             :       break;
   10005           0 :     case '?':
   10006           0 :       if (optopt != 0)
   10007           0 :         error("unrecognized command-line option '%1'", char(optopt));
   10008             :       else
   10009           0 :         error("unrecognized command-line option '%1'",
   10010           0 :               argv[(optind - 1)]);
   10011           0 :       usage(stderr, argv[0]);
   10012           0 :       exit(2);
   10013             :       break;            // never reached
   10014           0 :     case ':':
   10015           0 :       error("command-line option '%1' requires an argument",
   10016           0 :             char(optopt));
   10017           0 :       usage(stderr, argv[0]);
   10018           0 :       exit(2);
   10019             :       break;            // never reached
   10020           0 :     default:
   10021           0 :       assert(0 == "unhandled case of command-line option");
   10022             :     }
   10023        1418 :   if (want_unsafe_requests)
   10024          21 :     mac_path = &macro_path;
   10025        1418 :   set_string(".T", device);
   10026             :   // TODO: Kill this off in groff 1.24.0 release + 2 years.  See env.cpp.
   10027        1418 :   if ((strcmp("pdf", device) == 0) || strcmp("ps", device) == 0)
   10028         199 :     is_device_ps_or_pdf = true;
   10029        1418 :   init_charset_table();
   10030        1418 :   init_hpf_code_table();
   10031        1418 :   if (0 /* nullptr */ == font::load_desc())
   10032           0 :     fatal("cannot load 'DESC' description file for device '%1'",
   10033           0 :           device);
   10034        1418 :   units_per_inch = font::res;
   10035        1418 :   hresolution = font::hor;
   10036        1418 :   vresolution = font::vert;
   10037        1418 :   sizescale = font::sizescale;
   10038        1418 :   device_has_tcommand = font::has_tcommand;
   10039        1418 :   warn_scale = (double) units_per_inch;
   10040        1418 :   warn_scaling_unit = 'i';
   10041        1418 :   if (!have_explicit_default_family && (font::family != 0 /* nullptr */)
   10042         238 :       && *font::family != '\0')
   10043         238 :     default_family = symbol(font::family);
   10044        1418 :   font_size::init_size_list(font::sizes);
   10045             :   int i;
   10046        1418 :   int j = 1;
   10047        1418 :   if (font::style_table)
   10048        1390 :     for (i = 0; font::style_table[i] != 0 /* nullptr */; i++)
   10049             :       // Mounting a style can't actually fail due to a bad style name;
   10050             :       // that's not determined until the full font name is resolved.
   10051             :       // The DESC file also can't provoke a problem by requesting over a
   10052             :       // thousand slots in the style table.
   10053        1112 :       if (!mount_style(j++, symbol(font::style_table[i])))
   10054           0 :         warning(WARN_FONT, "cannot mount style '%1' directed by 'DESC'"
   10055           0 :                 " file for device '%2'", font::style_table[i], device);
   10056        9764 :   for (i = 0; font::font_name_table[i] != 0 /* nullptr */; i++, j++)
   10057             :     // In the DESC file, a font name of 0 (zero) means "leave this
   10058             :     // position empty".
   10059        8346 :     if (strcmp(font::font_name_table[i], "0") != 0)
   10060        6716 :       if (!mount_font(j, symbol(font::font_name_table[i])))
   10061           0 :         warning(WARN_FONT, "cannot mount font '%1' directed by 'DESC'"
   10062           0 :                 " file for device '%2'", font::font_name_table[i],
   10063           0 :                 device);
   10064        1418 :   curdiv = topdiv = new top_level_diversion;
   10065        1418 :   if (have_explicit_first_page_number)
   10066           0 :     topdiv->set_next_page_number(next_page_number);
   10067        1418 :   init_input_requests();
   10068        1418 :   init_env_requests();
   10069        1418 :   init_div_requests();
   10070             : #ifdef COLUMN
   10071             :   init_column_requests();
   10072             : #endif /* COLUMN */
   10073        1418 :   init_node_requests();
   10074        1418 :   register_dictionary.define(".T",
   10075        1418 :       new readonly_boolean_register(&have_explicit_device_argument));
   10076        1418 :   init_registers();
   10077        1418 :   init_reg_requests();
   10078        1418 :   init_hyphenation_pattern_requests();
   10079        1418 :   init_environments();
   10080        1526 :   while (string_assignments != 0 /* nullptr */) {
   10081         108 :     do_string_assignment(string_assignments->s);
   10082         108 :     string_list *tem = string_assignments;
   10083         108 :     string_assignments = string_assignments->next;
   10084         108 :     delete tem;
   10085             :   }
   10086        1604 :   while (register_assignments != 0 /* nullptr */) {
   10087         186 :     do_register_assignment(register_assignments->s);
   10088         186 :     string_list *tem = register_assignments;
   10089         186 :     register_assignments = register_assignments->next;
   10090         186 :     delete tem;
   10091             :   }
   10092        1418 :   if (!want_startup_macro_files_skipped)
   10093        1418 :     process_startup_file(INITIAL_STARTUP_FILE);
   10094        1927 :   while (macros != 0 /* nullptr */) {
   10095         509 :     process_macro_package_argument(macros->s);
   10096         509 :     string_list *tem = macros;
   10097         509 :     macros = macros->next;
   10098         509 :     delete tem;
   10099             :   }
   10100        1418 :   if (!want_startup_macro_files_skipped)
   10101        1418 :     process_startup_file(FINAL_STARTUP_FILE);
   10102        1448 :   for (i = optind; i < argc; i++)
   10103          30 :     process_input_file(argv[i]);
   10104        1418 :   if (optind >= argc || want_stdin_read_last)
   10105        1392 :     process_input_file("-");
   10106        1403 :   exit_troff();
   10107           0 :   return 0;                     // not reached
   10108             : }
   10109             : 
   10110         246 : void set_warning_mask_request()
   10111             : {
   10112             :   int n;
   10113         246 :   if (has_arg() && read_integer(&n)) {
   10114         246 :     if (n & ~WARN_MAX) {
   10115           0 :       warning(WARN_RANGE, "warning mask must be in range 0..%1, got %2",
   10116           0 :               WARN_MAX, n);
   10117           0 :       n &= WARN_MAX;
   10118             :     }
   10119         246 :     warning_mask = n;
   10120             :   }
   10121             :   else
   10122           0 :     warning_mask = WARN_MAX;
   10123         246 :   skip_line();
   10124         246 : }
   10125             : 
   10126        1418 : static void init_registers()
   10127             : {
   10128        1418 :   struct tm *t = current_time();
   10129        1418 :   set_register("seconds", int(t->tm_sec));
   10130        1418 :   set_register("minutes", int(t->tm_min));
   10131        1418 :   set_register("hours", int(t->tm_hour));
   10132        1418 :   set_register("dw", int(t->tm_wday + 1));
   10133        1418 :   set_register("dy", int(t->tm_mday));
   10134        1418 :   set_register("mo", int(t->tm_mon + 1));
   10135        1418 :   set_register("year", int(1900 + t->tm_year));
   10136        1418 :   set_register("yr", int(t->tm_year));
   10137        1418 :   set_register("$$", getpid());
   10138        1418 :   register_dictionary.define(".A",
   10139        1418 :       new readonly_text_register(want_abstract_output));
   10140        1418 : }
   10141             : 
   10142             : /*
   10143             :  *  registers associated with \O
   10144             :  */
   10145             : 
   10146             : static int output_reg_minx_contents = -1;
   10147             : static int output_reg_miny_contents = -1;
   10148             : static int output_reg_maxx_contents = -1;
   10149             : static int output_reg_maxy_contents = -1;
   10150             : 
   10151     6560879 : void check_output_limits(int x, int y)
   10152             : {
   10153     6560879 :   if ((output_reg_minx_contents == -1) || (x < output_reg_minx_contents))
   10154        1360 :     output_reg_minx_contents = x;
   10155     6560879 :   if (x > output_reg_maxx_contents)
   10156       21327 :     output_reg_maxx_contents = x;
   10157     6560879 :   if ((output_reg_miny_contents == -1) || (y < output_reg_miny_contents))
   10158        2366 :     output_reg_miny_contents = y;
   10159     6560879 :   if (y > output_reg_maxy_contents)
   10160       27494 :     output_reg_maxy_contents = y;
   10161     6560879 : }
   10162             : 
   10163         117 : void reset_output_registers()
   10164             : {
   10165         117 :   output_reg_minx_contents = -1;
   10166         117 :   output_reg_miny_contents = -1;
   10167         117 :   output_reg_maxx_contents = -1;
   10168         117 :   output_reg_maxy_contents = -1;
   10169         117 : }
   10170             : 
   10171        1418 : void init_input_requests()
   10172             : {
   10173        1418 :   init_request("ab", abort_request);
   10174        1418 :   init_request("als", alias_macro);
   10175        1418 :   init_request("am", append_macro);
   10176        1418 :   init_request("am1", append_nocomp_macro);
   10177        1418 :   init_request("ami", append_indirect_macro);
   10178        1418 :   init_request("ami1", append_indirect_nocomp_macro);
   10179        1418 :   init_request("as", append_string);
   10180        1418 :   init_request("as1", append_nocomp_string);
   10181        1418 :   init_request("asciify", asciify_request);
   10182        1418 :   init_request("backtrace", backtrace_request);
   10183        1418 :   init_request("blm", blank_line_macro);
   10184        1418 :   init_request("break", while_break_request);
   10185        1418 :   init_request("cc", assign_control_character_request);
   10186        1418 :   init_request("c2", assign_no_break_control_character_request);
   10187        1418 :   init_request("cf", unsafe_transparent_throughput_file_request);
   10188        1418 :   init_request("cflags", set_character_flags_request);
   10189        1418 :   init_request("char", define_character_request);
   10190        1418 :   init_request("chop", chop_macro);
   10191        1418 :   init_request("class", define_class_request);
   10192        1418 :   init_request("close", close_request);
   10193        1418 :   init_request("color", activate_color);
   10194        1418 :   init_request("composite", map_composite_character);
   10195        1418 :   init_request("continue", while_continue_request);
   10196        1418 :   init_request("cp", compatible);
   10197        1418 :   init_request("de", define_macro);
   10198        1418 :   init_request("de1", define_nocomp_macro);
   10199        1418 :   init_request("defcolor", define_color);
   10200        1418 :   init_request("dei", define_indirect_macro);
   10201        1418 :   init_request("dei1", define_indirect_nocomp_macro);
   10202        1418 :   init_request("device", device_request);
   10203        1418 :   init_request("devicem", device_macro_request);
   10204        1418 :   init_request("do", do_request);
   10205        1418 :   init_request("ds", define_string);
   10206        1418 :   init_request("ds1", define_nocomp_string);
   10207        1418 :   init_request("ec", assign_escape_character_request);
   10208        1418 :   init_request("ecr", restore_escape_char_request);
   10209        1418 :   init_request("ecs", save_escape_char_request);
   10210        1418 :   init_request("el", else_request);
   10211        1418 :   init_request("em", eoi_macro);
   10212        1418 :   init_request("eo", escape_off_request);
   10213        1418 :   init_request("ex", exit_request);
   10214        1418 :   init_request("fchar", define_fallback_character_request);
   10215             : #ifdef WIDOW_CONTROL
   10216             :   init_request("fpl", flush_pending_lines);
   10217             : #endif /* WIDOW_CONTROL */
   10218        1418 :   init_request("hcode", set_hyphenation_codes);
   10219        1418 :   init_request("hpfcode", hyphenation_patterns_file_code);
   10220        1418 :   init_request("ie", if_else_request);
   10221        1418 :   init_request("if", if_request);
   10222        1418 :   init_request("ig", ignore);
   10223        1418 :   init_request("length", length_request);
   10224        1418 :   init_request("lf", line_file);
   10225        1418 :   init_request("lsm", leading_spaces_macro);
   10226        1418 :   init_request("mso", macro_source_request);
   10227        1418 :   init_request("msoquiet", macro_source_quietly_request);
   10228        1418 :   init_request("nop", nop_request);
   10229        1418 :   init_request("nroff", nroff_request);
   10230        1418 :   init_request("nx", next_file);
   10231        1418 :   init_request("open", open_request);
   10232        1418 :   init_request("opena", opena_request);
   10233        1418 :   init_request("output", output_request);
   10234        1418 :   init_request("pc", page_character_request);
   10235        1418 :   init_request("pchar", print_character_request);
   10236        1418 :   init_request("pcolor", print_color_request);
   10237        1418 :   init_request("pcomposite", print_composite_character_request);
   10238        1418 :   init_request("pi", pipe_output);
   10239        1418 :   init_request("pm", print_macro_request);
   10240        1418 :   init_request("psbb", ps_bbox_request);
   10241        1418 :   init_request("pso", pipe_source_request);
   10242        1418 :   init_request("pstream", print_stream_request);
   10243        1418 :   init_request("rchar", remove_character);
   10244        1418 :   init_request("rd", read_request);
   10245        1418 :   init_request("return", return_macro_request);
   10246        1418 :   init_request("rm", remove_macro);
   10247        1418 :   init_request("rn", rename_macro);
   10248        1418 :   init_request("schar", define_special_character_request);
   10249        1418 :   init_request("shift", shift);
   10250        1418 :   init_request("so", source_request);
   10251        1418 :   init_request("soquiet", source_quietly_request);
   10252        1418 :   init_request("spreadwarn", spreadwarn_request);
   10253        1418 :   init_request("stringdown", stringdown_request);
   10254        1418 :   init_request("stringup", stringup_request);
   10255        1418 :   init_request("substring", substring_request);
   10256        1418 :   init_request("sy", system_request);
   10257        1418 :   init_request("tag", tag);
   10258        1418 :   init_request("taga", taga);
   10259        1418 :   init_request("tm", terminal_message_request);
   10260        1418 :   init_request("tm1", terminal_message1_request);
   10261        1418 :   init_request("tmc", terminal_message_continuation_request);
   10262        1418 :   init_request("tr", translate);
   10263        1418 :   init_request("trf", transparent_throughput_file_request);
   10264        1418 :   init_request("trin", translate_input);
   10265        1418 :   init_request("trnt", translate_no_transparent);
   10266        1418 :   init_request("troff", troff_request);
   10267        1418 :   init_request("unformat", unformat_macro);
   10268             : #ifdef COLUMN
   10269             :   init_request("vj", vjustify);
   10270             : #endif /* COLUMN */
   10271        1418 :   init_request("warn", set_warning_mask_request);
   10272        1418 :   init_request("warnscale", warnscale_request);
   10273        1418 :   init_request("while", while_request);
   10274        1418 :   init_request("write", stream_write_request);
   10275        1418 :   init_request("writec", stream_write_continuation_request);
   10276        1418 :   init_request("writem", stream_write_macro_request);
   10277        1418 :   register_dictionary.define(".$", new nargs_reg);
   10278        1418 :   register_dictionary.define(".br", new break_flag_reg);
   10279        1418 :   register_dictionary.define(".C", new readonly_boolean_register(&want_att_compat));
   10280        1418 :   register_dictionary.define(".cp", new enclosing_want_att_compat_reg);
   10281        1418 :   register_dictionary.define(".O", new variable_reg(&suppression_level));
   10282        1418 :   register_dictionary.define(".c", new lineno_reg);
   10283        1418 :   register_dictionary.define(".color", new readonly_boolean_register(&want_color_output));
   10284        1418 :   register_dictionary.define(".F", new filename_reg);
   10285        1418 :   register_dictionary.define(".g", new readonly_text_register(1));
   10286        1418 :   register_dictionary.define(".H", new readonly_register(&hresolution));
   10287        1418 :   register_dictionary.define(".R", new readonly_text_register(INT_MAX));
   10288        1418 :   register_dictionary.define(".U", new readonly_boolean_register(&want_unsafe_requests));
   10289        1418 :   register_dictionary.define(".V", new readonly_register(&vresolution));
   10290        1418 :   register_dictionary.define(".warn", new readonly_mask_register(&warning_mask));
   10291             :   extern const char *major_version;
   10292        1418 :   register_dictionary.define(".x", new readonly_text_register(major_version));
   10293             :   extern const char *revision;
   10294        1418 :   register_dictionary.define(".Y", new readonly_text_register(revision));
   10295             :   extern const char *minor_version;
   10296        1418 :   register_dictionary.define(".y", new readonly_text_register(minor_version));
   10297        1418 :   register_dictionary.define("c.", new writable_lineno_reg);
   10298        1418 :   register_dictionary.define("llx", new variable_reg(&llx_reg_contents));
   10299        1418 :   register_dictionary.define("lly", new variable_reg(&lly_reg_contents));
   10300        1418 :   register_dictionary.define("lsn", new variable_reg(&leading_spaces_number));
   10301        1418 :   register_dictionary.define("lss", new variable_reg(&leading_spaces_space));
   10302        1418 :   register_dictionary.define("opmaxx",
   10303        1418 :                                new variable_reg(&output_reg_maxx_contents));
   10304        1418 :   register_dictionary.define("opmaxy",
   10305        1418 :                                new variable_reg(&output_reg_maxy_contents));
   10306        1418 :   register_dictionary.define("opminx",
   10307        1418 :                                new variable_reg(&output_reg_minx_contents));
   10308        1418 :   register_dictionary.define("opminy",
   10309        1418 :                                new variable_reg(&output_reg_miny_contents));
   10310        1418 :   register_dictionary.define("slimit",
   10311        1418 :                                new variable_reg(&input_stack::limit));
   10312        1418 :   register_dictionary.define("systat", new variable_reg(&system_status));
   10313        1418 :   register_dictionary.define("urx", new variable_reg(&urx_reg_contents));
   10314        1418 :   register_dictionary.define("ury", new variable_reg(&ury_reg_contents));
   10315        1418 : }
   10316             : 
   10317             : object_dictionary request_dictionary(501);
   10318             : 
   10319      275092 : void init_request(const char *s, REQUEST_FUNCP f)
   10320             : {
   10321      275092 :   request_dictionary.define(s, new request(f));
   10322      275092 : }
   10323             : 
   10324     4800417 : static request_or_macro *lookup_request(symbol nm)
   10325             : {
   10326     4800417 :   assert(!nm.is_null());
   10327             :   request_or_macro *p
   10328     4800417 :     = static_cast<request_or_macro *>(request_dictionary.lookup(nm));
   10329     4800417 :   if (0 /* nullptr */ == p) {
   10330        7060 :     warning(WARN_MAC, "name '%1' not defined", nm.contents());
   10331        7060 :     p = new macro;
   10332        7060 :     request_dictionary.define(nm, p);
   10333             :   }
   10334     4800417 :   return p;
   10335             : }
   10336             : 
   10337             : // XXX: move to node.cpp, its only call site?
   10338       50900 : node *charinfo_to_node_list(charinfo *ci, const environment *envp)
   10339             : {
   10340             :   // Don't interpret character definitions in AT&T compatibility mode.
   10341       50900 :   int old_want_att_compat = want_att_compat;
   10342       50900 :   want_att_compat = false;
   10343       50900 :   unsigned char previous_escape_char = escape_char;
   10344       50900 :   escape_char = '\\';
   10345       50900 :   macro *mac = ci->set_macro(0 /* nullptr */);
   10346       50900 :   assert(mac != 0 /* nullptr */);
   10347       50900 :   environment *oldenv = curenv;
   10348      101800 :   environment env(envp);
   10349       50900 :   curenv = &env;
   10350       50900 :   curenv->set_composite();
   10351       50900 :   token old_tok = tok;
   10352       50900 :   input_stack::add_boundary();
   10353             :   string_iterator *si =
   10354       50900 :     new string_iterator(*mac, "special character", ci->nm);
   10355       50900 :   input_stack::push(si);
   10356             :   // Don't use process_input_stack, because we don't want to recognize
   10357             :   // requests.
   10358             :   for (;;) {
   10359      105041 :     tok.next();
   10360      105041 :     if (tok.is_eof())
   10361       50900 :       break;
   10362       54141 :     if (tok.is_newline()) {
   10363           0 :       error("a newline is not allowed in a composite character"
   10364             :             " escape sequence argument");
   10365           0 :       while (!tok.is_eof())
   10366           0 :         tok.next();
   10367           0 :       break;
   10368             :     }
   10369             :     else
   10370       54141 :       tok.process();
   10371             :   }
   10372       50900 :   node *n = curenv->extract_output_line();
   10373       50900 :   input_stack::remove_boundary();
   10374       50900 :   ci->set_macro(mac);
   10375       50900 :   tok = old_tok;
   10376       50900 :   curenv = oldenv;
   10377       50900 :   want_att_compat = old_want_att_compat;
   10378       50900 :   escape_char = previous_escape_char;
   10379       50900 :   have_formattable_input = false;
   10380      101800 :   return n;
   10381             : }
   10382             : 
   10383      230491 : static node *read_drawing_command() // \D
   10384             : {
   10385      460982 :   token start_token;
   10386      230491 :   start_token.next();
   10387      230491 :   if (!want_att_compat && !start_token.is_usable_as_delimiter())
   10388           0 :     warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
   10389           0 :                         " is deprecated", tok.description());
   10390      230491 :   else if (want_att_compat
   10391      230491 :            && !start_token.is_usable_as_delimiter(false,
   10392             :                   DELIMITER_ATT_STRING_EXPRESSION)) {
   10393           0 :     warning(WARN_DELIM, "drawing command escape sequence"
   10394             :             " does not accept %1 as a delimiter",
   10395           0 :             start_token.description());
   10396           0 :     return 0 /* nullptr */;
   10397             :   }
   10398             :   // TODO: groff 1.24.0 release + 2 years?
   10399             : #if 0
   10400             :   if (!start_token.is_usable_as_delimiter(true /* report error */))
   10401             :     return 0 /* nullptr */;
   10402             : #endif
   10403             :   else {
   10404      230491 :     tok.next();
   10405      230491 :     if (tok == start_token)
   10406           0 :       warning(WARN_MISSING, "missing arguments to drawing escape"
   10407             :               " sequence");
   10408             :     else {
   10409      230491 :       int type = tok.ch(); // safely compares to char literals
   10410             :       // TODO: grochar
   10411      230491 :       if (type == 'F') {
   10412         189 :         read_drawing_command_color_arguments(start_token);
   10413         189 :         return 0 /* nullptr */;
   10414             :       }
   10415      230302 :       tok.next();
   10416      230302 :       int maxpoints = 10;
   10417     2533322 :       hvpair *point = new hvpair[maxpoints];
   10418      230302 :       int npoints = 0;
   10419      230302 :       bool no_last_v = false;
   10420      230302 :       bool had_error = false;
   10421             :       int i;
   10422      404649 :       for (i = 0; tok != start_token; i++) {
   10423      296674 :         if (i == maxpoints) {
   10424           0 :           hvpair *oldpoint = point;
   10425           0 :           point = new hvpair[maxpoints * 2];
   10426           0 :           for (int j = 0; j < maxpoints; j++)
   10427           0 :             point[j] = oldpoint[j];
   10428           0 :           maxpoints *= 2;
   10429           0 :           delete[] oldpoint;
   10430             :         }
   10431      296674 :         if (tok.is_newline() || tok.is_eof()) {
   10432             :           // token::description() writes to static, class-wide storage,
   10433             :           // so we must allocate a copy of it before issuing the next
   10434             :           // diagnostic.
   10435           0 :           char *delimdesc = strdup(start_token.description());
   10436           0 :           warning(WARN_DELIM, "missing closing delimiter in drawing"
   10437             :                   " escape sequence; expected %1, got %2", delimdesc,
   10438           0 :                   tok.description());
   10439           0 :           free(delimdesc);
   10440           0 :           had_error = true;
   10441           0 :           break;
   10442             :         }
   10443      593348 :         if (!read_hunits(&point[i].h,
   10444      296674 :                         type == 'f' || type == 't' ? 'u' : 'm')) {
   10445           0 :           had_error = true;
   10446           0 :           break;
   10447             :         }
   10448      296674 :         ++npoints;
   10449      296674 :         tok.skip_spaces();
   10450      296674 :         point[i].v = V0;
   10451      296674 :         if (tok == start_token) {
   10452      122327 :           no_last_v = true;
   10453      122327 :           break;
   10454             :         }
   10455      174347 :         if (!read_vunits(&point[i].v, 'v')) {
   10456           0 :           had_error = false;
   10457           0 :           break;
   10458             :         }
   10459      174347 :         tok.skip_spaces();
   10460             :       }
   10461      230302 :       while (tok != start_token && !tok.is_newline() && !tok.is_eof())
   10462           0 :         tok.next();
   10463      230302 :       if (!had_error) {
   10464      230302 :         switch (type) {
   10465       74421 :         case 'l':
   10466       74421 :           if (npoints != 1 || no_last_v) {
   10467           0 :             error("two arguments needed for line");
   10468           0 :             npoints = 1;
   10469             :           }
   10470       74421 :           break;
   10471         254 :         case 'c':
   10472         254 :           if (npoints != 1 || !no_last_v) {
   10473           0 :             error("one argument needed for circle");
   10474           0 :             npoints = 1;
   10475           0 :             point[0].v = V0;
   10476             :           }
   10477         254 :           break;
   10478          42 :         case 'e':
   10479          42 :           if (npoints != 1 || no_last_v) {
   10480           0 :             error("two arguments needed for ellipse");
   10481           0 :             npoints = 1;
   10482             :           }
   10483          42 :           break;
   10484         211 :         case 'a':
   10485         211 :           if (npoints != 2 || no_last_v) {
   10486           0 :             error("four arguments needed for arc");
   10487           0 :             npoints = 2;
   10488             :           }
   10489         211 :           break;
   10490          16 :         case '~':
   10491          16 :           if (no_last_v)
   10492           0 :             error("even number of arguments needed for spline");
   10493          16 :           break;
   10494           0 :         case 'f':
   10495           0 :           if (npoints != 1 || !no_last_v) {
   10496           0 :             error("one argument needed for gray shade");
   10497           0 :             npoints = 1;
   10498           0 :             point[0].v = V0;
   10499             :           }
   10500             :         default:
   10501             :           // silently pass it through
   10502      155358 :           break;
   10503             :         }
   10504             :         draw_node *dn = new draw_node(type, point, npoints,
   10505      230302 :                                       curenv->get_font_size(),
   10506      230302 :                                       curenv->get_stroke_color(),
   10507      230302 :                                       curenv->get_fill_color());
   10508      230302 :         delete[] point;
   10509      230302 :         return dn;
   10510             :       }
   10511             :       else {
   10512           0 :         delete[] point;
   10513             :       }
   10514             :     }
   10515             :   }
   10516           0 :   return 0 /* nullptr */;
   10517             : }
   10518             : 
   10519         189 : static void read_drawing_command_color_arguments(token &start)
   10520             : {
   10521         189 :   tok.next();
   10522         189 :   if (tok == start) {
   10523           0 :     error("missing color scheme");
   10524           0 :     return;
   10525             :   }
   10526             :   // safely compares to char literals; TODO: grochar
   10527         189 :   int scheme = tok.ch();
   10528         189 :   tok.next();
   10529         189 :   color *col = 0 /* nullptr */;
   10530             :   // TODO: grochar
   10531         189 :   unsigned char end = start.ch();
   10532         189 :   switch (scheme) {
   10533           0 :   case 'c':
   10534           0 :     col = read_cmy(end);
   10535           0 :     break;
   10536           0 :   case 'd':
   10537           0 :     col = &default_color;
   10538           0 :     break;
   10539         189 :   case 'g':
   10540         189 :     col = read_gray(end);
   10541         189 :     break;
   10542           0 :   case 'k':
   10543           0 :     col = read_cmyk(end);
   10544           0 :     break;
   10545           0 :   case 'r':
   10546           0 :     col = read_rgb(end);
   10547           0 :     break;
   10548             :   }
   10549         189 :   if (col != 0 /* nullptr */)
   10550         189 :     curenv->set_fill_color(col);
   10551         189 :   while (tok != start) {
   10552           0 :     if (!has_arg()) {
   10553             :       // token::description() writes to static, class-wide storage, so
   10554             :       // we must allocate a copy of it before issuing the next
   10555             :       // diagnostic.
   10556           0 :       char *delimdesc = strdup(start.description());
   10557           0 :       warning(WARN_DELIM, "missing closing delimiter in color space"
   10558             :               " drawing escape sequence; expected %1, got %2",
   10559           0 :               delimdesc, tok.description());
   10560           0 :       free(delimdesc);
   10561           0 :       input_stack::push(make_temp_iterator("\n"));
   10562           0 :       break;
   10563             :     }
   10564           0 :     tok.next();
   10565             :   }
   10566         189 :   have_formattable_input = true;
   10567             : }
   10568             : 
   10569             : static struct warning_category {
   10570             :   const char *name;
   10571             :   unsigned int mask;
   10572             : } warning_table[] = {
   10573             :   { "char", WARN_CHAR },
   10574             :   { "range", WARN_RANGE },
   10575             :   { "break", WARN_BREAK },
   10576             :   { "delim", WARN_DELIM },
   10577             :   { "scale", WARN_SCALE },
   10578             :   { "syntax", WARN_SYNTAX },
   10579             :   { "tab", WARN_TAB },
   10580             :   { "missing", WARN_MISSING },
   10581             :   { "input", WARN_INPUT },
   10582             :   { "escape", WARN_ESCAPE },
   10583             :   { "space", WARN_SPACE },
   10584             :   { "font", WARN_FONT },
   10585             :   { "di", WARN_DI },
   10586             :   { "mac", WARN_MAC },
   10587             :   { "reg", WARN_REG },
   10588             :   { "ig", WARN_IG },
   10589             :   { "color", WARN_COLOR },
   10590             :   { "file", WARN_FILE },
   10591             :   { "all", WARN_MAX & ~(WARN_DI | WARN_MAC | WARN_REG) },
   10592             :   { "w", WARN_MAX },
   10593             :   { "default", DEFAULT_WARNING_MASK },
   10594             : };
   10595             : 
   10596         778 : static unsigned int lookup_warning(const char *name)
   10597             : {
   10598        5760 :   for (unsigned int i = 0U; i < countof(warning_table); i++)
   10599        5759 :     if (strcmp(name, warning_table[i].name) == 0)
   10600         777 :       return warning_table[i].mask;
   10601           1 :   return 0U;
   10602             : }
   10603             : 
   10604         749 : static void enable_warning(const char *name)
   10605             : {
   10606         749 :   unsigned int mask = lookup_warning(name);
   10607         749 :   if (mask != 0U)
   10608         749 :     warning_mask |= mask;
   10609             :   else
   10610           0 :     error("unrecognized warning category '%1'", name);
   10611         749 : }
   10612             : 
   10613          29 : static void disable_warning(const char *name)
   10614             : {
   10615          29 :   unsigned int mask = lookup_warning(name);
   10616          29 :   if (mask != 0U)
   10617          28 :     warning_mask &= ~mask;
   10618             :   else
   10619           1 :     error("unrecognized warning category '%1'", name);
   10620          29 : }
   10621             : 
   10622           0 : static void copy_mode_error(const char *format,
   10623             :                             const errarg &arg1,
   10624             :                             const errarg &arg2,
   10625             :                             const errarg &arg3)
   10626             : {
   10627           0 :   if (want_input_ignored) {
   10628             :     static const char prefix[] = "(in ignored input) ";
   10629             :     // ISO C++ does not permit VLAs on the stack.
   10630             :     // C++03: new char[sizeof prefix + strlen(format)]();
   10631           0 :     char *s = new char[sizeof prefix + strlen(format)];
   10632           0 :     (void) memset(s, 0, (sizeof prefix + (strlen(format)
   10633             :                                           * sizeof(char))));
   10634           0 :     strcpy(s, prefix);
   10635           0 :     strcat(s, format);
   10636           0 :     warning(WARN_IG, s, arg1, arg2, arg3);
   10637           0 :     delete[] s;
   10638             :   }
   10639             :   else
   10640           0 :     error(format, arg1, arg2, arg3);
   10641           0 : }
   10642             : 
   10643             : enum error_type { DEBUG, WARNING, OUTPUT_WARNING, ERROR, FATAL };
   10644             : 
   10645         441 : static void do_error(error_type type,
   10646             :                      const char *format,
   10647             :                      const errarg &arg1,
   10648             :                      const errarg &arg2,
   10649             :                      const errarg &arg3)
   10650             : {
   10651             :   const char *filename;
   10652             :   int lineno;
   10653         441 :   if (want_errors_inhibited && (type < FATAL))
   10654         233 :     return;
   10655         208 :   if (want_backtraces)
   10656          34 :     input_stack::backtrace();
   10657         208 :   if (!get_file_line(&filename, &lineno))
   10658           4 :     filename = 0 /* nullptr */;
   10659         208 :   if (filename != 0 /* nullptr */) {
   10660         204 :     if (program_name != 0 /* nullptr */)
   10661         204 :       errprint("%1:", program_name);
   10662         204 :     errprint("%1:%2: ", filename, lineno);
   10663             :   }
   10664           4 :   else if (program_name != 0 /* nullptr */)
   10665           4 :     fprintf(stderr, "%s: ", program_name);
   10666         208 :   switch (type) {
   10667           2 :   case FATAL:
   10668           2 :     fputs("fatal error: ", stderr);
   10669           2 :     break;
   10670          79 :   case ERROR:
   10671          79 :     fputs("error: ", stderr);
   10672          79 :     break;
   10673          94 :   case WARNING:
   10674          94 :     fputs("warning: ", stderr);
   10675          94 :     break;
   10676           0 :   case DEBUG:
   10677           0 :     fputs("debug: ", stderr);
   10678           0 :     break;
   10679          33 :   case OUTPUT_WARNING:
   10680          33 :     if (in_nroff_mode) {
   10681          23 :       int fromtop = (topdiv->get_vertical_position().to_units()
   10682          23 :                      / vresolution) + 1;
   10683          23 :       fprintf(stderr, "warning [page %d, line %d",
   10684             :               topdiv->get_page_number(), fromtop);
   10685          23 :       if (topdiv != curdiv) {
   10686           0 :         int fromdivtop = (curdiv->get_vertical_position().to_units()
   10687           0 :                           / vresolution) + 1;
   10688           0 :         fprintf(stderr, ", diversion '%s', line %d",
   10689             :                 curdiv->get_diversion_name(), fromdivtop);
   10690             :       }
   10691          23 :       fprintf(stderr, "]: ");
   10692             :     }
   10693             :     else {
   10694          10 :       double fromtop = topdiv->get_vertical_position().to_units()
   10695          10 :                        / warn_scale;
   10696          10 :       fprintf(stderr, "warning [page %d, %.1f%c",
   10697             :               topdiv->get_page_number(), fromtop, warn_scaling_unit);
   10698          10 :       if (topdiv != curdiv) {
   10699           0 :         double fromtop1 = curdiv->get_vertical_position().to_units()
   10700           0 :                           / warn_scale;
   10701           0 :         fprintf(stderr, " (diversion '%s', %.1f%c)",
   10702             :                 curdiv->get_diversion_name(), fromtop1,
   10703             :                 warn_scaling_unit);
   10704             :       }
   10705          10 :       fprintf(stderr, "]: ");
   10706             :     }
   10707          33 :     break;
   10708             :   }
   10709         208 :   errprint(format, arg1, arg2, arg3);
   10710         208 :   fputc('\n', stderr);
   10711         208 :   fflush(stderr);
   10712         208 :   if (type == FATAL)
   10713           2 :     write_any_trailer_and_exit(EXIT_FAILURE);
   10714             : }
   10715             : 
   10716             : // This function should have no callers in production builds.
   10717           0 : void debug(const char *format,
   10718             :            const errarg &arg1,
   10719             :            const errarg &arg2,
   10720             :            const errarg &arg3)
   10721             : {
   10722           0 :   do_error(DEBUG, format, arg1, arg2, arg3);
   10723           0 : }
   10724             : 
   10725       27765 : int warning(warning_type t,
   10726             :             const char *format,
   10727             :             const errarg &arg1,
   10728             :             const errarg &arg2,
   10729             :             const errarg &arg3)
   10730             : {
   10731       27765 :   if ((t & warning_mask) != 0U) {
   10732          94 :     do_error(WARNING, format, arg1, arg2, arg3);
   10733          94 :     return 1;
   10734             :   }
   10735             :   else
   10736       27671 :     return 0;
   10737             : }
   10738             : 
   10739          40 : int output_warning(warning_type t,
   10740             :                    const char *format,
   10741             :                    const errarg &arg1,
   10742             :                    const errarg &arg2,
   10743             :                    const errarg &arg3)
   10744             : {
   10745          40 :   if ((t & warning_mask) != 0U) {
   10746          33 :     do_error(OUTPUT_WARNING, format, arg1, arg2, arg3);
   10747          33 :     return 1;
   10748             :   }
   10749             :   else
   10750           7 :     return 0;
   10751             : }
   10752             : 
   10753         312 : void error(const char *format,
   10754             :            const errarg &arg1,
   10755             :            const errarg &arg2,
   10756             :            const errarg &arg3)
   10757             : {
   10758         312 :   do_error(ERROR, format, arg1, arg2, arg3);
   10759         312 : }
   10760             : 
   10761           2 : void fatal(const char *format,
   10762             :            const errarg &arg1,
   10763             :            const errarg &arg2,
   10764             :            const errarg &arg3)
   10765             : {
   10766           2 :   do_error(FATAL, format, arg1, arg2, arg3);
   10767           0 : }
   10768             : 
   10769           0 : void fatal_with_file_and_line(const char *filename, int lineno,
   10770             :                               const char *format,
   10771             :                               const errarg &arg1,
   10772             :                               const errarg &arg2,
   10773             :                               const errarg &arg3)
   10774             : {
   10775           0 :   if (program_name != 0 /* nullptr */)
   10776           0 :     fprintf(stderr, "%s:", program_name);
   10777           0 :   fprintf(stderr, "%s:", filename);
   10778           0 :   if (lineno > 0)
   10779           0 :     fprintf(stderr, "%d:", lineno);
   10780           0 :   fputs(" fatal error: ", stderr);
   10781           0 :   errprint(format, arg1, arg2, arg3);
   10782           0 :   fputc('\n', stderr);
   10783           0 :   fflush(stderr);
   10784           0 :   write_any_trailer_and_exit(EXIT_FAILURE);
   10785           0 : }
   10786             : 
   10787           0 : void error_with_file_and_line(const char *filename, int lineno,
   10788             :                               const char *format,
   10789             :                               const errarg &arg1,
   10790             :                               const errarg &arg2,
   10791             :                               const errarg &arg3)
   10792             : {
   10793           0 :   if (program_name != 0 /* nullptr */)
   10794           0 :     fprintf(stderr, "%s:", program_name);
   10795           0 :   fprintf(stderr, "%s:", filename);
   10796           0 :   if (lineno > 0)
   10797           0 :     fprintf(stderr, "%d:", lineno);
   10798           0 :   fputs(" error: ", stderr);
   10799           0 :   errprint(format, arg1, arg2, arg3);
   10800           0 :   fputc('\n', stderr);
   10801           0 :   fflush(stderr);
   10802           0 : }
   10803             : 
   10804             : // This function should have no callers in production builds.
   10805           0 : void debug_with_file_and_line(const char *filename,
   10806             :                               int lineno,
   10807             :                               const char *format,
   10808             :                               const errarg &arg1,
   10809             :                               const errarg &arg2,
   10810             :                               const errarg &arg3)
   10811             : {
   10812           0 :   if (program_name != 0 /* nullptr */)
   10813           0 :     fprintf(stderr, "%s:", program_name);
   10814           0 :   fprintf(stderr, "%s:", filename);
   10815           0 :   if (lineno > 0)
   10816           0 :     fprintf(stderr, "%d:", lineno);
   10817           0 :   fputs(" debug: ", stderr);
   10818           0 :   errprint(format, arg1, arg2, arg3);
   10819           0 :   fputc('\n', stderr);
   10820           0 :   fflush(stderr);
   10821           0 : }
   10822             : 
   10823             : dictionary charinfo_dictionary(501);
   10824             : 
   10825     7840300 : charinfo *lookup_charinfo(symbol nm, bool suppress_creation)
   10826             : {
   10827     7840300 :   void *p = charinfo_dictionary.lookup(nm);
   10828     7840300 :   if (p != 0 /* nullptr */)
   10829     6812382 :     return static_cast<charinfo *>(p);
   10830     1027918 :   if (suppress_creation)
   10831           1 :     return static_cast<charinfo *>(0 /* nullptr */);
   10832             :   else {
   10833     1027917 :     charinfo *cp = new charinfo(nm);
   10834     1027917 :     (void) charinfo_dictionary.lookup(nm, cp);
   10835     1027917 :     return cp;
   10836             :   }
   10837             : }
   10838             : 
   10839             : int charinfo::next_index = 0;
   10840             : 
   10841     1364986 : charinfo::charinfo(symbol s)
   10842             : : translation(0 /* nullptr */), mac(0 /* nullptr */),
   10843             :   special_translation(TRANSLATE_NONE), hyphenation_code(0U),
   10844             :   flags(0U), ascii_code(0U), asciify_code(0U),
   10845             :   is_not_found(false), is_transparently_translatable(true),
   10846     1364986 :   translatable_as_input(false), mode(CHAR_NORMAL), nm(s)
   10847             : {
   10848     1364986 :   index = next_index++;
   10849     1364986 :   number = -1;
   10850     1364986 :   get_flags();
   10851     1364986 : }
   10852             : 
   10853       28204 : int charinfo::get_unicode_mapping()
   10854             : {
   10855       28204 :   if (ascii_code != 0U)
   10856        5697 :     return ascii_code;
   10857       22507 :   return glyph_to_unicode(this);
   10858             : }
   10859             : 
   10860      343785 : void charinfo::set_hyphenation_code(unsigned char c)
   10861             : {
   10862      343785 :   hyphenation_code = c;
   10863      343785 : }
   10864             : 
   10865      175825 : void charinfo::set_translation(charinfo *ci, bool transparently,
   10866             :                                bool as_input)
   10867             : {
   10868      175825 :   translation = ci;
   10869      175825 :   if ((ci != 0 /* nullptr */) && as_input) {
   10870      156362 :     if (hyphenation_code != 0U)
   10871       13948 :       ci->set_hyphenation_code(hyphenation_code);
   10872      156362 :     if (asciify_code != 0U)
   10873           0 :       ci->set_asciify_code(asciify_code);
   10874      156362 :     else if (ascii_code != 0U)
   10875      156362 :       ci->set_asciify_code(ascii_code);
   10876      156362 :     ci->make_translatable_as_input();
   10877             :   }
   10878      175825 :   special_translation = TRANSLATE_NONE;
   10879      175825 :   is_transparently_translatable = transparently;
   10880      175825 : }
   10881             : 
   10882             : // Recompute flags for all entries in the charinfo dictionary.
   10883           8 : void get_flags()
   10884             : {
   10885           8 :   dictionary_iterator iter(charinfo_dictionary);
   10886             :   charinfo *ci;
   10887           8 :   symbol s;
   10888             :   // We must use the nuclear `reinterpret_cast` operator because GNU
   10889             :   // troff's dictionary types use a pre-STL approach to containers.
   10890        9399 :   while (iter.get(&s, reinterpret_cast<void **>(&ci))) {
   10891        9391 :     assert(!s.is_null());
   10892        9391 :     ci->get_flags();
   10893             :   }
   10894           8 :   using_character_classes = false;
   10895           8 : }
   10896             : 
   10897             : // Get the union of all flags affecting this charinfo.
   10898     1374381 : void charinfo::get_flags()
   10899             : {
   10900     1374381 :   dictionary_iterator iter(char_class_dictionary);
   10901             :   charinfo *ci;
   10902     1374381 :   symbol s;
   10903             :   // We must use the nuclear `reinterpret_cast` operator because GNU
   10904             :   // troff's dictionary types use a pre-STL approach to containers.
   10905     1401890 :   while (iter.get(&s, reinterpret_cast<void **>(&ci))) {
   10906       27509 :     assert(!s.is_null());
   10907       27509 :     if (ci->contains(get_unicode_mapping())) {
   10908             : #if defined(DEBUGGING)
   10909             :       if (want_html_debugging)
   10910             :         fprintf(stderr, "charinfo::get_flags %p %s %d\n",
   10911             :                         static_cast<void *>(ci), ci->nm.contents(),
   10912             :                         ci->flags);
   10913             : #endif
   10914         621 :       flags |= ci->flags;
   10915             :     }
   10916             :   }
   10917     1374381 : }
   10918             : 
   10919        1688 : void charinfo::set_special_translation(int cc, bool transparently)
   10920             : {
   10921        1688 :   special_translation = cc;
   10922        1688 :   translation = 0 /* nullptr */;
   10923        1688 :   is_transparently_translatable = transparently;
   10924        1688 : }
   10925             : 
   10926      363008 : void charinfo::set_ascii_code(unsigned char c)
   10927             : {
   10928      363008 :   ascii_code = c;
   10929      363008 : }
   10930             : 
   10931      156362 : void charinfo::set_asciify_code(unsigned char c)
   10932             : {
   10933      156362 :   asciify_code = c;
   10934      156362 : }
   10935             : 
   10936             : // Replace character definition with macro `m`, returning previous
   10937             : // macro if any (if none, return a null pointer).
   10938      110611 : macro *charinfo::set_macro(macro *m)
   10939             : {
   10940      110611 :   macro *tem = mac;
   10941      110611 :   mac = m;
   10942      110611 :   return tem;
   10943             : }
   10944             : 
   10945             : // Replace character definition with macro `m` and update its character
   10946             : // mode to `cm`, returning previous macro if any (if none, return a null
   10947             : // pointer).
   10948      325449 : macro *charinfo::set_macro(macro *m, char_mode cm)
   10949             : {
   10950      325449 :   macro *tem = mac;
   10951      325449 :   mac = m;
   10952      325449 :   mode = cm;
   10953      325449 :   return tem;
   10954             : }
   10955             : 
   10956      337069 : void charinfo::set_number(int n)
   10957             : {
   10958      337069 :   assert(n >= 0);
   10959      337069 :   number = n;
   10960      337069 : }
   10961             : 
   10962       21144 : int charinfo::get_number()
   10963             : {
   10964       21144 :   assert(number >= 0);
   10965       21144 :   return number;
   10966             : }
   10967             : 
   10968       27509 : bool charinfo::contains(int c, bool already_called)
   10969             : {
   10970       27509 :   if (already_called) {
   10971           0 :     warning(WARN_SYNTAX, "nested class detected while processing"
   10972           0 :             " character code %1", c);
   10973           0 :     return false;
   10974             :   }
   10975       27509 :   std::vector<std::pair<int, int> >::const_iterator ranges_iter;
   10976       27509 :   ranges_iter = ranges.begin();
   10977      394494 :   while (ranges_iter != ranges.end()) {
   10978      367606 :     if (c >= ranges_iter->first && c <= ranges_iter->second) {
   10979             : #if defined(DEBUGGING)
   10980             :       if (want_html_debugging)
   10981             :         fprintf(stderr, "charinfo::contains(%d)\n", c);
   10982             : #endif
   10983         621 :       return true;
   10984             :     }
   10985      366985 :     ++ranges_iter;
   10986             :   }
   10987             : 
   10988             :   // Nested classes don't work.  See Savannah #67770.
   10989             : #if 0
   10990             :   std::vector<charinfo *>::const_iterator nested_iter;
   10991             :   nested_iter = nested_classes.begin();
   10992             :   while (nested_iter != nested_classes.end()) {
   10993             :     if ((*nested_iter)->contains(c, true))
   10994             :       return true;
   10995             :     ++nested_iter;
   10996             :   }
   10997             : #endif
   10998             : 
   10999       26888 :   return false;
   11000             : }
   11001             : 
   11002           0 : bool charinfo::contains(symbol s, bool already_called)
   11003             : {
   11004           0 :   if (already_called) {
   11005           0 :     warning(WARN_SYNTAX, "nested class detected while processing symbol"
   11006           0 :             " %1", s.contents());
   11007           0 :     return false;
   11008             :   }
   11009           0 :   const char *unicode = glyph_name_to_unicode(s.contents());
   11010           0 :   if (unicode != 0 /* nullptr */ && strchr(unicode, '_') == 0) {
   11011             :     char *ignore;
   11012           0 :     int c = (int) strtol(unicode, &ignore, 16);
   11013           0 :     return contains(c, true);
   11014             :   }
   11015             :   else
   11016           0 :     return false;
   11017             : }
   11018             : 
   11019           0 : bool charinfo::contains(charinfo *, bool)
   11020             : {
   11021             :   // Werner Lemberg marked this as "TODO" in 2010.
   11022           0 :   assert(0 == "unimplemented member function");
   11023             :   return false;
   11024             : }
   11025             : 
   11026           5 : void charinfo::describe_flags()
   11027             : {
   11028           5 :   if (0U == flags)
   11029           2 :     errprint("(none)\n");
   11030             :   else {
   11031           3 :     char none[] = { '\0' };
   11032           3 :     char comma[] = { ',', ' ', '\0' };
   11033           3 :     char *separator = none;
   11034           3 :     errprint("(");
   11035           3 :     if (flags & ENDS_SENTENCE) {
   11036           0 :       errprint("%1ends sentence", separator);
   11037           0 :       separator = comma;
   11038             :     }
   11039           3 :     if (flags & ALLOWS_BREAK_BEFORE) {
   11040           0 :       errprint("%1allows break before", separator);
   11041           0 :       separator = comma;
   11042             :     }
   11043           3 :     if (flags & ALLOWS_BREAK_AFTER) {
   11044           2 :       errprint("%1allows break after", separator);
   11045           2 :       separator = comma;
   11046             :     }
   11047           3 :     if (flags & OVERLAPS_HORIZONTALLY) {
   11048           0 :       errprint("%1overlaps horizontally", separator);
   11049           0 :       separator = comma;
   11050             :     }
   11051           3 :     if (flags & OVERLAPS_VERTICALLY) {
   11052           0 :       errprint("%1overlaps vertically", separator);
   11053           0 :       separator = comma;
   11054             :     }
   11055           3 :     if (flags & IS_TRANSPARENT_TO_END_OF_SENTENCE) {
   11056           0 :       errprint("%1is transparent to end of sentence", separator);
   11057           0 :       separator = comma;
   11058             :     }
   11059           3 :     if (flags & IGNORES_SURROUNDING_HYPHENATION_CODES) {
   11060           0 :       errprint("%1ignores surrounding hyphenation codes", separator);
   11061           0 :       separator = comma;
   11062             :     }
   11063           3 :     if (flags & PROHIBITS_BREAK_BEFORE) {
   11064           1 :       errprint("%1prohibits break before", separator);
   11065           1 :       separator = comma;
   11066             :     }
   11067           3 :     if (flags & PROHIBITS_BREAK_AFTER) {
   11068           0 :       errprint("%1prohibits break after", separator);
   11069           0 :       separator = comma;
   11070             :     }
   11071           3 :     if (flags & IS_INTERWORD_SPACE) {
   11072           0 :       errprint("%1is interword space", separator);
   11073           0 :       separator = comma;
   11074             :     }
   11075           3 :     errprint(")\n");
   11076             :   }
   11077           5 : }
   11078             : 
   11079           5 : void charinfo::dump_flags()
   11080             : {
   11081           5 :   errprint("  %1flags: %2 ", (is_class() ? "" : "inherent "), flags);
   11082           5 :   describe_flags();
   11083           5 :   if (!is_class()) {
   11084             :     // Report influence of membership in character classes, if any.
   11085           4 :     unsigned int saved_flags = flags;
   11086           4 :     get_flags();
   11087           4 :     if (flags != saved_flags) {
   11088           0 :       errprint("  effective flags: %1 ", flags);
   11089           0 :       describe_flags();
   11090           0 :       flags = saved_flags;
   11091             :     }
   11092             :   }
   11093           5 : }
   11094             : 
   11095           5 : void charinfo::dump()
   11096             : {
   11097           5 :   if (is_class()) {
   11098           1 :     std::vector<std::pair<int, int> >::const_iterator ranges_iter;
   11099           1 :     ranges_iter = ranges.begin();
   11100           1 :     assert(mac != 0 /* nullptr */);
   11101           1 :     errprint("  defined at: ");
   11102           1 :     mac->dump();
   11103           1 :     fflush(stderr);
   11104           1 :     errprint("  contains code points: ");
   11105           1 :     const size_t buflen = sizeof "U+10FFFF";
   11106           1 :     int range_begin = 0;
   11107           1 :     int range_end = 0;
   11108             :     char beg_hexbuf[buflen];
   11109             :     char end_hexbuf[buflen];
   11110           1 :     (void) memset(beg_hexbuf, '\0', buflen);
   11111           1 :     (void) memset(end_hexbuf, '\0', buflen);
   11112           1 :     bool has_ranges = false;
   11113          41 :     while (ranges_iter != ranges.end()) {
   11114          40 :       has_ranges = true;
   11115          40 :       range_begin = ranges_iter->first;
   11116          40 :       range_end = ranges_iter->second;
   11117          40 :       (void) snprintf(beg_hexbuf, buflen, "U+%.4X", range_begin);
   11118          40 :       (void) snprintf(end_hexbuf, buflen, "U+%.4X", range_end);
   11119             :       // TODO: comma-separate?  JSON list?
   11120          40 :       if (range_begin == range_end)
   11121          40 :         errprint("%1 ", beg_hexbuf);
   11122             :       else
   11123           0 :         errprint("%1-%2 ", beg_hexbuf, end_hexbuf);
   11124          40 :       ++ranges_iter;
   11125             :     }
   11126           1 :     if (!has_ranges)
   11127           0 :       errprint("(none)");
   11128           1 :     errprint("\n");
   11129             : #if 0
   11130             :     // Nested classes don't work.  See Savannah #67770.
   11131             :     errprint("  contains nested classes: ");
   11132             :     std::vector<charinfo *>::const_iterator nested_iter;
   11133             :     nested_iter = nested_classes.begin();
   11134             :     bool has_nested_classes = false;
   11135             :     while (nested_iter != nested_classes.end()) {
   11136             :       has_nested_classes = true;
   11137             :       // TODO: Here's where JSON would really pay off.
   11138             :       (*nested_iter)->dump();
   11139             :     }
   11140             :     if (!has_nested_classes)
   11141             :       errprint("(none)");
   11142             :     errprint("\n");
   11143             : #endif
   11144           1 :     dump_flags();
   11145             :   }
   11146             :   else {
   11147           4 :     if (translation != 0 /* nullptr */)
   11148           0 :       errprint("  is translated\n");
   11149             :     else
   11150           4 :       errprint("  is not translated\n");
   11151           4 :     if (mac != 0 /* nullptr */) {
   11152           0 :       errprint("  has a macro: ");
   11153           0 :       mac->json_dump();
   11154           0 :       errprint("\n");
   11155             :     }
   11156             :     else
   11157           4 :       errprint("  does not have a macro\n");
   11158           4 :     errprint("  special translation: %1\n",
   11159           4 :              static_cast<int>(special_translation));
   11160           4 :     errprint("  hyphenation code: %1\n",
   11161           4 :              static_cast<int>(hyphenation_code));
   11162           4 :     dump_flags();
   11163           4 :     errprint("  asciify code: %1\n", static_cast<int>(asciify_code));
   11164           4 :     errprint("  ASCII code: %1\n", static_cast<int>(ascii_code));
   11165             :     // Also see node.cpp::glyph_node::asciify().
   11166           4 :     int mapping = get_unicode_mapping();
   11167           4 :     if (mapping >= 0) {
   11168           3 :       const size_t buflen = 6; // enough for five hex digits + '\0'
   11169             :       char hexbuf[buflen];
   11170           3 :       (void) memset(hexbuf, '\0', buflen);
   11171           3 :       (void) snprintf(hexbuf, buflen, "%.4X", mapping);
   11172           3 :       errprint("  Unicode mapping: U+%1\n", hexbuf);
   11173             :     }
   11174             :     else
   11175           1 :       errprint("  Unicode mapping: none (%1)\n", mapping);
   11176           4 :     errprint("  is%1 found\n", is_not_found ? " not" : "");
   11177           4 :     errprint("  is%1 transparently translatable\n",
   11178           4 :              is_transparently_translatable ? "" : " not");
   11179           4 :     errprint("  is%1 translatable as input\n",
   11180           4 :              translatable_as_input ? "" : " not");
   11181           4 :     const char *modestr = character_mode_description(mode);
   11182           4 :     if (strcmp(modestr, "") == 0)
   11183           4 :       modestr =" normal";
   11184           4 :     errprint("  mode:%1\n", modestr);
   11185             :   }
   11186           5 :   fflush(stderr);
   11187           5 : }
   11188             : 
   11189             : symbol UNNAMED_SYMBOL("---");
   11190             : 
   11191             : // For indexed characters not between 0 and 255, we make a symbol out
   11192             : // of the number and store them in this dictionary.
   11193             : 
   11194             : dictionary indexed_charinfo_dictionary(11);
   11195             : 
   11196     3064483 : static charinfo *get_charinfo_by_index(int n, bool suppress_creation)
   11197             : {
   11198             :   static charinfo *index_table[256];
   11199             : 
   11200     3064483 :   if (n >= 0 && n < 256) {
   11201     2278674 :     charinfo *ci = index_table[n];
   11202     2278674 :     if ((0 /*nullptr */ == ci) && !suppress_creation) {
   11203      194414 :       ci = new charinfo(UNNAMED_SYMBOL);
   11204      194414 :       ci->set_number(n);
   11205      194414 :       index_table[n] = ci;
   11206             :     }
   11207     2278674 :     return ci;
   11208             :   }
   11209             :   else {
   11210      785809 :     symbol ns(i_to_a(n));
   11211             :     charinfo *ci =
   11212      785809 :       static_cast<charinfo *>(indexed_charinfo_dictionary.lookup(ns));
   11213      785809 :     if ((0 /*nullptr */ == ci) && !suppress_creation) {
   11214      142655 :       ci = new charinfo(UNNAMED_SYMBOL);
   11215      142655 :       ci->set_number(n);
   11216      142655 :       (void) indexed_charinfo_dictionary.lookup(ns, ci);
   11217             :     }
   11218      785809 :     return ci;
   11219             :   }
   11220             : }
   11221             : 
   11222             : // This overrides the same function from libgroff; while reading font
   11223             : // definition files it puts single-letter glyph names into
   11224             : // 'charset_table' and converts glyph names of the form '\x' ('x' a
   11225             : // single letter) into 'x'.  Consequently, symbol("x") refers to glyph
   11226             : // name '\x', not 'x'.
   11227             : 
   11228    11619868 : glyph *name_to_glyph(const char *nm)
   11229             : {
   11230             :   charinfo *ci;
   11231    11619868 :   if (nm[1] == 0)
   11232     4991031 :     ci = charset_table[nm[0] & 0xff];
   11233     6628837 :   else if (nm[0] == '\\' && nm[2] == 0)
   11234       10929 :     ci = lookup_charinfo(symbol(nm + 1));
   11235             :   else
   11236     6617908 :     ci = lookup_charinfo(symbol(nm));
   11237    11619868 :   return ci->as_glyph();
   11238             : }
   11239             : 
   11240     3017909 : glyph *number_to_glyph(int n)
   11241             : {
   11242     3017909 :   return get_charinfo_by_index(n)->as_glyph();
   11243             : }
   11244             : 
   11245    26873637 : const char *glyph_to_name(glyph *g)
   11246             : {
   11247             :   // In both libgroff and troff, `charinfo` has `glyph` as a base class.
   11248             :   // But in troff, `charinfo` stores much more information.
   11249    26873637 :   charinfo *ci = reinterpret_cast<charinfo *>(g);
   11250    26873637 :   return ((ci->nm != UNNAMED_SYMBOL) ? ci->nm.contents()
   11251    26873637 :                                      : 0 /* nullptr */);
   11252             : }
   11253             : 
   11254             : // Local Variables:
   11255             : // fill-column: 72
   11256             : // mode: C++
   11257             : // End:
   11258             : // vim: set cindent noexpandtab shiftwidth=2 textwidth=72:

Generated by: LCOV version 1.14