Line data Source code
1 : /* Copyright 1989-2024 Free Software Foundation, Inc.
2 : 2021-2025 G. Branden Robinson
3 :
4 : Written by James Clark (jjc@jclark.com)
5 :
6 : This file is part of groff, the GNU roff typesetting system.
7 :
8 : groff is free software; you can redistribute it and/or modify it under
9 : the terms of the GNU General Public License as published by the Free
10 : Software Foundation, either version 3 of the License, or
11 : (at your option) any later version.
12 :
13 : groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 :
21 : #ifdef HAVE_CONFIG_H
22 : #include <config.h>
23 : #endif
24 :
25 : #include <assert.h>
26 : #include <errno.h> // ENOENT, errno
27 : #include <locale.h> // setlocale()
28 : #include <stdcountof.h>
29 : #include <stdio.h> // EOF, FILE, clearerr(), fclose(), fflush(),
30 : // fileno(), fopen(), fprintf(), fseek(), getc(),
31 : // pclose(), popen(), printf(), SEEK_SET, snprintf(),
32 : // sprintf(), setbuf(), stderr, stdin, stdout,
33 : // ungetc()
34 : #include <stdlib.h> // atoi(), exit(), EXIT_FAILURE, EXIT_SUCCESS,
35 : // free(), getenv(), putenv(), strtol(), system()
36 : #include <string.h> // strcpy(), strdup(), strerror()
37 :
38 : #include <getopt.h> // getopt_long()
39 :
40 : #include <stack>
41 :
42 : #include "json-encode.h" // json_encode_char()
43 :
44 : #include "troff.h"
45 : #include "dictionary.h"
46 : #include "hvunits.h"
47 : #include "stringclass.h"
48 : #include "mtsm.h"
49 : #include "env.h"
50 : #include "request.h"
51 : #include "node.h"
52 : #include "token.h"
53 : #include "div.h"
54 : #include "reg.h"
55 : #include "font.h"
56 : #include "charinfo.h"
57 : #include "macropath.h"
58 : #include "input.h"
59 : #include "defs.h"
60 : #include "unicode.h"
61 : #include "curtime.h"
62 :
63 : // needed for getpid() and isatty()
64 : #include "posix.h"
65 : #include "nonposix.h"
66 :
67 : #define MACRO_PREFIX "tmac."
68 : #define MACRO_POSTFIX ".tmac"
69 : #define INITIAL_STARTUP_FILE "troffrc"
70 : #define FINAL_STARTUP_FILE "troffrc-end"
71 : #define DEFAULT_INPUT_STACK_LIMIT 1000
72 :
73 : #ifndef DEFAULT_WARNING_MASK
74 : // warnings that are enabled by default
75 : #define DEFAULT_WARNING_MASK \
76 : (WARN_CHAR|WARN_BREAK|WARN_SPACE|WARN_FONT|WARN_FILE)
77 : #endif
78 :
79 : extern "C" const char *program_name;
80 : extern "C" const char *Version_string;
81 :
82 : // initial size for input buffers that need to grow arbitrarily
83 : static const int default_buffer_size = 16;
84 :
85 : #ifdef COLUMN
86 : void init_column_requests();
87 : #endif /* COLUMN */
88 :
89 : // forward declarations
90 : static node *read_drawing_command();
91 : static void read_drawing_command_color_arguments(token &);
92 : static void push_token(const token &);
93 : static void unsafe_transparent_throughput_file_request();
94 : #ifdef COLUMN
95 : void vjustify();
96 : #endif /* COLUMN */
97 : static void transparent_throughput_file_request();
98 :
99 : token tok;
100 : bool was_invoked_with_regular_control_character = false;
101 : bool using_character_classes = false;
102 : static bool permit_color_output = true;
103 : bool want_color_output = true;
104 : static bool want_backtraces = false;
105 : char *pipe_command = 0 /* nullptr */;
106 : charinfo *charset_table[256];
107 : unsigned char hpf_code_table[256];
108 :
109 : static unsigned int warning_mask = DEFAULT_WARNING_MASK;
110 : static bool want_errors_inhibited = false;
111 : static bool want_input_ignored = false;
112 :
113 : static void enable_warning(const char *);
114 : static void disable_warning(const char *);
115 :
116 : static symbol end_of_input_macro_name;
117 : static symbol blank_line_macro_name;
118 : static symbol leading_spaces_macro_name;
119 : static bool want_att_compat = false;
120 : bool want_abstract_output = false;
121 : bool want_nodes_dumped = false;
122 : bool want_output_suppressed = false;
123 : bool is_writing_html = false;
124 : static int suppression_level = 0; // depth of nested \O escapes
125 :
126 : bool in_nroff_mode = false;
127 : bool is_device_ps_or_pdf = false;
128 :
129 : // Keep track of whether \f, \F, \D'F...', \H, \m, \M, \O[345], \R, \s,
130 : // or \S has been processed in token::next().
131 : static bool have_formattable_input = false;
132 : // `have_formattable_input` is reset immediately upon reading a new
133 : // input line, but we need more state information because the input line
134 : // might have been continued/interrupted with `\c`.
135 : // Consider:
136 : // \f[TB]\m[red]hello\c
137 : // \f[]\m[]
138 : static bool have_formattable_input_on_interrupted_line = false;
139 :
140 : bool device_has_tcommand = false; // 't' output command supported
141 : static bool want_unsafe_requests = false; // be safer by default
142 :
143 : static bool have_multiple_params = false; // \[e aa], \*[foo bar]
144 :
145 : double spread_limit = -3.0 - 1.0; // negative means deactivated
146 :
147 : double warn_scale;
148 : char warn_scaling_unit;
149 : bool want_html_debugging = true; // enable more diagnostics
150 :
151 : search_path *mac_path = &safer_macro_path;
152 :
153 : // Initialize inclusion search path with only the current directory.
154 : search_path include_search_path(0 /* nullptr */, 0 /* nullptr */, 0, 1);
155 :
156 : static int read_char_in_copy_mode(node ** /* nd; 0 to discard */,
157 : bool /* is_defining */ = false,
158 : bool /* handle_escaped_E */ = false);
159 : static void copy_mode_error(const char *,
160 : const errarg & = empty_errarg,
161 : const errarg & = empty_errarg,
162 : const errarg & = empty_errarg);
163 :
164 : enum read_mode { ALLOW_EMPTY, WITH_ARGS, NO_ARGS };
165 : static symbol read_escape_parameter(read_mode = NO_ARGS);
166 : static symbol read_long_escape_parameters(read_mode = NO_ARGS);
167 : static void interpolate_string(symbol);
168 : static void interpolate_string_with_args(symbol);
169 : static void interpolate_macro(symbol, bool = false);
170 : static void interpolate_number_format(symbol);
171 : static void interpolate_environment_variable(symbol);
172 :
173 : static symbol composite_glyph_name(symbol);
174 : static void interpolate_positional_parameter(symbol);
175 : static request_or_macro *lookup_request(symbol);
176 : static bool read_delimited_measurement(units * /* n */,
177 : unsigned char /* si */);
178 : static bool read_delimited_measurement(units * /* n */,
179 : unsigned char /* si */, units /* prev_value */);
180 : static symbol read_input_until_terminator(bool /* required */,
181 : unsigned char /* end_char */, bool /* want_identifier */ = false);
182 : static bool read_line_rule_expression(units * /* res */,
183 : unsigned char /* si */, charinfo ** /* cp */);
184 : static bool read_size(int *);
185 : static symbol read_delimited_identifier();
186 : static void init_registers();
187 : static void trapping_blank_line();
188 :
189 : class input_iterator;
190 : input_iterator *make_temp_iterator(const char *);
191 : const char *input_char_description(int);
192 :
193 : void process_input_stack();
194 : void chop_macro(); // declare to avoid friend name injection
195 :
196 : static const unsigned char default_escape_char = (unsigned char)('\\');
197 : static unsigned char escape_char = default_escape_char;
198 : static const unsigned char default_control_char = (unsigned char)('.');
199 : static const unsigned char default_no_break_control_char
200 : = (unsigned char)('\'');
201 :
202 17821 : static void assign_escape_character_request()
203 : {
204 17821 : unsigned char ec = 0U;
205 17821 : bool is_invalid = false;
206 17821 : if (has_arg()) {
207 122 : if (tok.ch() == 0U)
208 0 : is_invalid = true;
209 : else
210 122 : ec = tok.ch();
211 : }
212 : else
213 17699 : ec = default_escape_char;
214 17821 : bool do_nothing = false;
215 : static const char already_cc[] = "the control character is already";
216 : static const char already_nbcc[] = "the no-break control character is"
217 : " already";
218 17821 : const char *already_message = 0 /* nullptr */;
219 17821 : if (curenv->get_control_character() == ec) {
220 0 : already_message = already_cc;
221 0 : do_nothing = true;
222 : }
223 17821 : else if (curenv->get_no_break_control_character() == ec) {
224 0 : already_message = already_nbcc;
225 0 : do_nothing = true;
226 : }
227 17821 : if (do_nothing)
228 0 : error("ignoring escape character change request; %1%2 %3",
229 : is_invalid ? "cannot select invalid escape character, and"
230 0 : : "", already_message, input_char_description(ec));
231 17821 : else if (is_invalid) {
232 0 : error("cannot select %1 as escape character; using '%2'",
233 0 : tok.description(), char(default_escape_char));
234 0 : escape_char = default_escape_char;
235 : }
236 : else
237 17821 : escape_char = ec;
238 17821 : skip_line();
239 17821 : }
240 :
241 17503 : void escape_off_request()
242 : {
243 17503 : escape_char = 0U;
244 17503 : skip_line();
245 17503 : }
246 :
247 : static unsigned char saved_escape_char = '\\';
248 :
249 0 : void save_escape_char_request()
250 : {
251 0 : saved_escape_char = escape_char;
252 0 : skip_line();
253 0 : }
254 :
255 0 : void restore_escape_char_request()
256 : {
257 0 : escape_char = saved_escape_char;
258 0 : skip_line();
259 0 : }
260 :
261 0 : void assign_control_character_request()
262 : {
263 0 : unsigned char cc = 0U;
264 0 : bool is_invalid = false;
265 0 : if (has_arg()) {
266 0 : if (tok.ch() == 0U)
267 0 : is_invalid = true;
268 : else
269 0 : cc = tok.ch();
270 : }
271 : else
272 0 : cc = default_control_char;
273 0 : bool do_nothing = false;
274 0 : char already_ec[] = "the escape character is already";
275 0 : char already_nbcc[] = "the no-break control character is already";
276 0 : char *already_message = 0 /* nullptr */;
277 0 : if (cc == escape_char) {
278 0 : already_message = already_ec;
279 0 : do_nothing = true;
280 : }
281 0 : else if (curenv->get_no_break_control_character() == cc) {
282 0 : already_message = already_nbcc;
283 0 : do_nothing = true;
284 : }
285 0 : bool assignment_worked = false;
286 0 : if (do_nothing)
287 0 : error("ignoring control character change request; %1%2 %3",
288 : is_invalid ? "cannot select invalid control character, and"
289 0 : : "", already_message, input_char_description(cc));
290 0 : else if (is_invalid) {
291 0 : error("cannot select %1 as control character; using '%2'",
292 0 : tok.description(), char(default_control_char));
293 : assignment_worked
294 0 : = curenv->set_control_character(default_control_char);
295 : }
296 : else
297 0 : assignment_worked = curenv->set_control_character(cc);
298 0 : assert(assignment_worked);
299 0 : skip_line();
300 0 : }
301 :
302 3 : void assign_no_break_control_character_request()
303 : {
304 3 : unsigned char nbcc = 0U;
305 3 : bool is_invalid = false;
306 3 : if (has_arg()) {
307 2 : if (tok.ch() == 0U)
308 0 : is_invalid = true;
309 : else
310 2 : nbcc = tok.ch();
311 : }
312 : else
313 1 : nbcc = default_no_break_control_char;
314 3 : bool do_nothing = false;
315 3 : char already_ec[] = "the escape character is already";
316 3 : char already_cc[] = "the (breaking) control character is already";
317 3 : char *already_message = 0 /* nullptr */;
318 3 : if (nbcc == escape_char) {
319 0 : already_message = already_ec;
320 0 : do_nothing = true;
321 : }
322 3 : else if (curenv->get_control_character() == nbcc) {
323 0 : already_message = already_cc;
324 0 : do_nothing = true;
325 : }
326 3 : bool assignment_worked = false;
327 3 : if (do_nothing)
328 0 : error("ignoring no-break control character change request; %1%2 %3",
329 : is_invalid ? "cannot select invalid no-break control"
330 : " character, and"
331 0 : : "", already_message, input_char_description(nbcc));
332 3 : else if (is_invalid) {
333 0 : error("cannot select %1 as no-break control character;"
334 : " using \"%2\"", tok.description(),
335 0 : default_no_break_control_char);
336 : assignment_worked
337 0 : = curenv->set_no_break_control_character(default_no_break_control_char);
338 : }
339 : else
340 3 : assignment_worked = curenv->set_no_break_control_character(nbcc);
341 3 : assert(assignment_worked);
342 3 : skip_line();
343 3 : }
344 :
345 : struct arg_list;
346 :
347 : class input_iterator {
348 : public:
349 : input_iterator();
350 : input_iterator(bool /* is_div */);
351 18685680 : virtual ~input_iterator() {}
352 : int get(node **);
353 : friend class input_stack;
354 : bool is_diversion;
355 : statem *diversion_state;
356 : protected:
357 : const unsigned char *ptr;
358 : const unsigned char *endptr;
359 : input_iterator *next;
360 : private:
361 : virtual int fill(node **);
362 : virtual int peek();
363 3005956 : virtual bool has_args() { return false; }
364 0 : virtual int nargs() { return 0; }
365 0 : virtual input_iterator *get_arg(int) { return 0 /* nullptr */; }
366 0 : virtual arg_list *get_arg_list() { return 0 /* nullptr */; }
367 0 : virtual symbol get_macro_name() { return NULL_SYMBOL; }
368 0 : virtual bool space_follows_arg(int) { return false; }
369 1272316 : virtual bool get_break_flag() { return false; }
370 301234 : virtual bool get_location(bool /* allow_macro */,
371 : const char ** /* filep */,
372 301234 : int * /* linep */) { return false; }
373 34 : virtual void backtrace() {}
374 3 : virtual bool set_location(const char *, int) { return false; }
375 0 : virtual bool next_file(FILE *, const char *) { return false; }
376 0 : virtual void shift(int) {}
377 17507309 : virtual int is_boundary() {return 0; } // three-valued Boolean :-|
378 0 : virtual bool is_file() { return false; }
379 26398 : virtual bool is_macro() { return false; }
380 214068 : virtual void set_att_compat(bool) {}
381 214068 : virtual bool get_att_compat() { return false; }
382 : };
383 :
384 9318574 : input_iterator::input_iterator()
385 9318574 : : is_diversion(false), ptr(0 /* nullptr */), endptr(0 /* nullptr */)
386 : {
387 9318574 : }
388 :
389 9437049 : input_iterator::input_iterator(bool is_div)
390 9437049 : : is_diversion(is_div), ptr(0 /* nullptr */), endptr(0 /* nullptr */)
391 : {
392 9437049 : }
393 :
394 10169254 : int input_iterator::fill(node **)
395 : {
396 10169254 : return EOF;
397 : }
398 :
399 0 : int input_iterator::peek()
400 : {
401 0 : return EOF;
402 : }
403 :
404 26794625 : inline int input_iterator::get(node **p)
405 : {
406 26794625 : return ptr < endptr ? *ptr++ : fill(p);
407 : }
408 :
409 : class input_boundary : public input_iterator {
410 : public:
411 2139492 : int is_boundary() { return 1; }
412 : };
413 :
414 : class input_return_boundary : public input_iterator {
415 : public:
416 39600 : int is_boundary() { return 2; }
417 : };
418 :
419 : class file_iterator : public input_iterator {
420 : FILE *fp;
421 : int lineno;
422 : char *filename;
423 : bool was_popened;
424 : bool seen_newline;
425 : bool seen_escape;
426 : enum { BUF_SIZE = 512 };
427 : unsigned char buf[BUF_SIZE];
428 : void close();
429 : public:
430 : file_iterator(FILE *, const char *, bool = false);
431 : ~file_iterator();
432 : int fill(node **);
433 : int peek();
434 : bool get_location(bool /* allow_macro */, const char ** /* filep */,
435 : int * /* linep */);
436 : void backtrace();
437 : bool set_location(const char *, int);
438 : bool next_file(FILE *, const char *);
439 43 : bool is_file() { return true; }
440 : };
441 :
442 17106 : file_iterator::file_iterator(FILE *f, const char *fn, bool popened)
443 : : fp(f), lineno(1), was_popened(popened),
444 17106 : seen_newline(false), seen_escape(false)
445 : {
446 17106 : filename = strdup(const_cast<char *>(fn));
447 17106 : if ((font::use_charnames_in_special) && (fn != 0 /* nullptr */)) {
448 618 : if (!the_output)
449 45 : init_output();
450 618 : the_output->put_filename(fn, popened);
451 : }
452 17106 : }
453 :
454 34182 : file_iterator::~file_iterator()
455 : {
456 17091 : close();
457 34182 : }
458 :
459 17091 : void file_iterator::close()
460 : {
461 17091 : if (fp == stdin)
462 1381 : clearerr(stdin);
463 15710 : else if (was_popened)
464 96 : pclose(fp);
465 : else
466 15614 : fclose(fp);
467 17091 : }
468 :
469 0 : bool file_iterator::next_file(FILE *f, const char *s)
470 : {
471 0 : close();
472 0 : fp = f;
473 0 : set_location(s, 1);
474 0 : seen_newline = false;
475 0 : seen_escape = false;
476 0 : was_popened = false;
477 0 : ptr = 0 /* nullptr */;
478 0 : endptr = 0 /* nullptr */;
479 0 : return true;
480 : }
481 :
482 5372584 : int file_iterator::fill(node **)
483 : {
484 5372584 : if (seen_newline)
485 5355425 : lineno++;
486 5372584 : seen_newline = false;
487 5372584 : unsigned char *p = buf;
488 5372584 : ptr = p;
489 5372584 : unsigned char *e = p + BUF_SIZE;
490 140893187 : while (p < e) {
491 140893159 : int c = getc(fp);
492 140893159 : if (EOF == c)
493 17073 : break;
494 140876086 : if (is_invalid_input_char(c))
495 9 : warning(WARN_INPUT, "invalid input character code %1", c);
496 : else {
497 140876077 : *p++ = c;
498 140876077 : if ('\n' == c) {
499 5355483 : seen_escape = false;
500 5355483 : seen_newline = true;
501 5355483 : break;
502 : }
503 135520594 : seen_escape = ('\\' == c); // XXX: should be (escape_char == c)?
504 : }
505 : }
506 5372584 : if (p > buf) {
507 5355536 : endptr = p;
508 5355536 : return *ptr++;
509 : }
510 : else {
511 17048 : endptr = p;
512 17048 : return EOF;
513 : }
514 : }
515 :
516 6 : int file_iterator::peek()
517 : {
518 6 : int c = getc(fp);
519 6 : while (is_invalid_input_char(c)) {
520 0 : warning(WARN_INPUT, "invalid input character code %1", c);
521 0 : c = getc(fp);
522 : }
523 6 : if (c != EOF)
524 6 : ungetc(c, fp);
525 6 : return c;
526 : }
527 :
528 1214318 : bool file_iterator::get_location(bool /*allow_macro*/,
529 : const char **filenamep, int *linenop)
530 : {
531 1214318 : *linenop = lineno;
532 1214318 : assert(filename != 0 /* nullptr */);
533 1214318 : if (0 /* nullptr */ == filename)
534 0 : return false;
535 1214318 : if (strcmp(filename, "-") == 0)
536 11041 : *filenamep = "<standard input>";
537 : else
538 1203277 : *filenamep = filename;
539 1214318 : return true;
540 : }
541 :
542 35 : void file_iterator::backtrace()
543 : {
544 : const char *f;
545 : int n;
546 : // Get side effect of filename rewrite if stdin.
547 35 : (void) get_location(false /* allow macro */, &f, &n);
548 35 : if (program_name != 0 /* nullptr */)
549 35 : errprint("%1: ", program_name);
550 35 : errprint("backtrace: %3 '%1':%2\n", f, n,
551 35 : was_popened ? "pipe" : "file");
552 35 : }
553 :
554 22852 : bool file_iterator::set_location(const char *f, int ln)
555 : {
556 22852 : if (f != 0 /* nullptr */)
557 1394 : filename = const_cast<char *>(f);
558 22852 : lineno = ln;
559 22852 : return true;
560 : }
561 :
562 : input_iterator nil_iterator;
563 :
564 : class input_stack {
565 : public:
566 : static int get(node **);
567 : static int peek();
568 : static void push(input_iterator *);
569 : static input_iterator *get_arg(int);
570 : static arg_list *get_arg_list();
571 : static symbol get_macro_name();
572 : static bool space_follows_arg(int);
573 : static int get_break_flag();
574 : static int nargs();
575 : static bool get_location(bool /* allow_macro */,
576 : const char ** /* filep */,
577 : int * /* linep */);
578 : static bool set_location(const char *, int);
579 : static void backtrace();
580 : static void next_file(FILE *, const char *);
581 : static void end_file();
582 : static void shift(int n);
583 : static void add_boundary();
584 : static void add_return_boundary();
585 : static int is_return_boundary();
586 : static void remove_boundary();
587 : static int get_level();
588 : static int get_div_level();
589 : static void increase_level();
590 : static void decrease_level();
591 : static void clear();
592 : static void pop_macro();
593 : static void set_att_compat(bool);
594 : static bool get_att_compat();
595 : static statem *get_diversion_state();
596 : static void check_end_diversion(input_iterator *t);
597 : static int limit;
598 : static int div_level;
599 : static statem *diversion_state;
600 : private:
601 : static input_iterator *top;
602 : static int level;
603 : static int finish_get(node **);
604 : static int finish_peek();
605 : };
606 :
607 : input_iterator *input_stack::top = &nil_iterator;
608 : int input_stack::level = 0;
609 : int input_stack::limit = DEFAULT_INPUT_STACK_LIMIT;
610 : int input_stack::div_level = 0;
611 : statem *input_stack::diversion_state = 0 /* nullptr */;
612 : bool suppress_push = false;
613 :
614 :
615 32912914 : inline int input_stack::get_level()
616 : {
617 32912914 : return level;
618 : }
619 :
620 130923 : inline void input_stack::increase_level()
621 : {
622 130923 : level++;
623 130923 : }
624 :
625 130923 : inline void input_stack::decrease_level()
626 : {
627 130923 : level--;
628 130923 : }
629 :
630 8897752 : inline int input_stack::get_div_level()
631 : {
632 8897752 : return div_level;
633 : }
634 :
635 923638300 : inline int input_stack::get(node **np)
636 : {
637 923638300 : int res = (top->ptr < top->endptr) ? *top->ptr++ : finish_get(np);
638 923638300 : if (res == '\n') {
639 35830546 : have_formattable_input_on_interrupted_line = have_formattable_input;
640 35830546 : have_formattable_input = false;
641 : }
642 923638300 : return res;
643 : }
644 :
645 83643088 : int input_stack::finish_get(node **np)
646 : {
647 : for (;;) {
648 83643088 : int c = top->fill(np);
649 83643088 : if (c != EOF || top->is_boundary())
650 66262338 : return c;
651 17380750 : if (top == &nil_iterator)
652 6590 : break;
653 17374160 : input_iterator *tem = top;
654 17374160 : check_end_diversion(tem);
655 : #if defined(DEBUGGING)
656 : if (want_html_debugging)
657 : if (tem->is_diversion)
658 : fprintf(stderr,
659 : "in diversion level = %d\n", input_stack::get_div_level());
660 : #endif
661 17374160 : top = top->next;
662 17374160 : level--;
663 17374160 : delete tem;
664 17374160 : if (top->ptr < top->endptr)
665 15477606 : return *top->ptr++;
666 1896554 : }
667 6590 : assert(level == 0);
668 6590 : return EOF;
669 : }
670 :
671 9006938 : inline int input_stack::peek()
672 : {
673 9006938 : return (top->ptr < top->endptr) ? *top->ptr : finish_peek();
674 : }
675 :
676 17775210 : void input_stack::check_end_diversion(input_iterator *t)
677 : {
678 17775210 : if (t->is_diversion) {
679 46733 : div_level--;
680 46733 : if (diversion_state != 0 /* nullptr */)
681 241 : delete diversion_state;
682 46733 : diversion_state = t->diversion_state;
683 : }
684 17775210 : }
685 :
686 4457 : int input_stack::finish_peek()
687 : {
688 : for (;;) {
689 4457 : int c = top->peek();
690 4457 : if (c != EOF || top->is_boundary())
691 4326 : return c;
692 131 : if (top == &nil_iterator)
693 0 : break;
694 131 : input_iterator *tem = top;
695 131 : check_end_diversion(tem);
696 131 : top = top->next;
697 131 : level--;
698 131 : delete tem;
699 131 : if (top->ptr < top->endptr)
700 131 : return *top->ptr;
701 0 : }
702 0 : assert(level == 0);
703 0 : return EOF;
704 : }
705 :
706 261248 : void input_stack::add_boundary()
707 : {
708 261248 : push(new input_boundary);
709 261248 : }
710 :
711 13200 : void input_stack::add_return_boundary()
712 : {
713 13200 : push(new input_return_boundary);
714 13200 : }
715 :
716 697920 : int input_stack::is_return_boundary()
717 : {
718 697920 : return top->is_boundary() == 2;
719 : }
720 :
721 261248 : void input_stack::remove_boundary()
722 : {
723 261248 : assert(top->is_boundary());
724 261248 : input_iterator *temp = top->next;
725 261248 : check_end_diversion(top);
726 :
727 261248 : delete top;
728 261248 : top = temp;
729 261248 : level--;
730 261248 : }
731 :
732 17984645 : void input_stack::push(input_iterator *in)
733 : {
734 17984645 : if (in == 0)
735 207126 : return;
736 17777519 : if (++level > limit && limit > 0)
737 0 : fatal("input stack limit of %1 levels exceeded"
738 0 : " (probable infinite loop)", limit);
739 17777519 : in->next = top;
740 17777519 : top = in;
741 17777519 : if (top->is_diversion) {
742 46733 : div_level++;
743 46733 : in->diversion_state = diversion_state;
744 46733 : diversion_state = curenv->construct_state(false);
745 : #if defined(DEBUGGING)
746 : if (want_html_debugging) {
747 : curenv->dump_troff_state();
748 : fflush(stderr);
749 : }
750 : #endif
751 : }
752 : #if defined(DEBUGGING)
753 : if (want_html_debugging)
754 : if (top->is_diversion) {
755 : fprintf(stderr,
756 : "in diversion level = %d\n", input_stack::get_div_level());
757 : fflush(stderr);
758 : }
759 : #endif
760 : }
761 :
762 13844223 : statem *get_diversion_state()
763 : {
764 13844223 : return input_stack::get_diversion_state();
765 : }
766 :
767 13844223 : statem *input_stack::get_diversion_state()
768 : {
769 13844223 : if (0 /* nullptr */ == diversion_state)
770 13796189 : return 0 /* nullptr */;
771 : else
772 48034 : return new statem(diversion_state);
773 : }
774 :
775 2664893 : input_iterator *input_stack::get_arg(int i)
776 : {
777 : input_iterator *p;
778 4533267 : for (p = top; p != 0 /* nullptr */; p = p->next)
779 4533267 : if (p->has_args())
780 2664893 : return p->get_arg(i);
781 0 : return 0 /* nullptr */;
782 : }
783 :
784 1497 : arg_list *input_stack::get_arg_list()
785 : {
786 : input_iterator *p;
787 2247 : for (p = top; p != 0 /* nullptr */; p = p->next)
788 1944 : if (p->has_args())
789 1194 : return p->get_arg_list();
790 303 : return 0 /* nullptr */;
791 : }
792 :
793 1497 : symbol input_stack::get_macro_name()
794 : {
795 : input_iterator *p;
796 2247 : for (p = top; p != 0 /* nullptr */; p = p->next)
797 1944 : if (p->has_args())
798 1194 : return p->get_macro_name();
799 303 : return NULL_SYMBOL;
800 : }
801 :
802 0 : bool input_stack::space_follows_arg(int i)
803 : {
804 : input_iterator *p;
805 0 : for (p = top; p != 0 /* nullptr */; p = p->next)
806 0 : if (p->has_args())
807 0 : return p->space_follows_arg(i);
808 0 : return false;
809 : }
810 :
811 9504705 : int input_stack::get_break_flag()
812 : {
813 9504705 : return top->get_break_flag();
814 : }
815 :
816 267784 : void input_stack::shift(int n)
817 : {
818 680194 : for (input_iterator *p = top; p != 0 /* nullptr */; p = p->next)
819 680194 : if (p->has_args()) {
820 267784 : p->shift(n);
821 267784 : return;
822 : }
823 : }
824 :
825 944483 : int input_stack::nargs()
826 : {
827 1668155 : for (input_iterator *p =top; p != 0 /* nullptr */; p = p->next)
828 1668152 : if (p->has_args())
829 944480 : return p->nargs();
830 3 : return 0;
831 : }
832 :
833 4548488 : bool input_stack::get_location(bool allow_macro, const char **filenamep,
834 : int *linenop)
835 : {
836 4855374 : for (input_iterator *p = top; p != 0 /* nullptr */; p = p->next)
837 4853512 : if (p->get_location(allow_macro, filenamep, linenop))
838 4546626 : return true;
839 1862 : return false;
840 : }
841 :
842 34 : void input_stack::backtrace()
843 : {
844 131 : for (input_iterator *p = top; p != 0 /* nullptr */; p = p->next)
845 97 : p->backtrace();
846 34 : }
847 :
848 22852 : bool input_stack::set_location(const char *filename, int lineno)
849 : {
850 22855 : for (input_iterator *p = top; p != 0 /* nullptr */; p = p->next)
851 22855 : if (p->set_location(filename, lineno))
852 22852 : return true;
853 0 : return false;
854 : }
855 :
856 0 : void input_stack::next_file(FILE *fp, const char *s)
857 : {
858 : input_iterator **pp;
859 0 : for (pp = ⊤ *pp != &nil_iterator; pp = &(*pp)->next)
860 0 : if ((*pp)->next_file(fp, s))
861 0 : return;
862 0 : if (++level > limit && limit > 0)
863 0 : fatal("input stack limit of %1 levels exceeded", limit);
864 0 : *pp = new file_iterator(fp, s);
865 0 : (*pp)->next = &nil_iterator;
866 : }
867 :
868 43 : void input_stack::end_file()
869 : {
870 43 : for (input_iterator **pp = ⊤
871 43 : *pp != &nil_iterator;
872 0 : pp = &(*pp)->next)
873 43 : if ((*pp)->is_file()) {
874 43 : input_iterator *tem = *pp;
875 43 : check_end_diversion(tem);
876 43 : *pp = (*pp)->next;
877 43 : delete tem;
878 43 : level--;
879 43 : return;
880 : }
881 : }
882 :
883 32 : void input_stack::clear()
884 : {
885 32 : int nboundaries = 0;
886 270 : while (top != &nil_iterator) {
887 238 : if (top->is_boundary())
888 1 : nboundaries++;
889 238 : input_iterator *tem = top;
890 238 : check_end_diversion(tem);
891 238 : top = top->next;
892 238 : level--;
893 238 : delete tem;
894 : }
895 : // Keep while_request happy.
896 33 : for (; nboundaries > 0; --nboundaries)
897 1 : add_return_boundary();
898 32 : }
899 :
900 112992 : void input_stack::pop_macro()
901 : {
902 112992 : int nboundaries = 0;
903 112992 : bool is_macro = false;
904 26398 : do {
905 139390 : if (top->next == &nil_iterator)
906 0 : break;
907 139390 : if (top->is_boundary())
908 13199 : nboundaries++;
909 139390 : is_macro = top->is_macro();
910 139390 : input_iterator *tem = top;
911 139390 : check_end_diversion(tem);
912 139390 : top = top->next;
913 139390 : level--;
914 139390 : delete tem;
915 139390 : } while (!is_macro);
916 : // Keep while_request happy.
917 126191 : for (; nboundaries > 0; --nboundaries)
918 13199 : add_return_boundary();
919 112992 : }
920 :
921 2498331 : inline void input_stack::set_att_compat(bool b)
922 : {
923 2498331 : top->set_att_compat(b);
924 2498331 : }
925 :
926 2493756 : inline bool input_stack::get_att_compat()
927 : {
928 2493756 : return top->get_att_compat();
929 : }
930 :
931 0 : void backtrace_request()
932 : {
933 0 : input_stack::backtrace();
934 0 : fflush(stderr);
935 0 : skip_line();
936 0 : }
937 :
938 43 : void next_file()
939 : {
940 43 : char *filename = 0 /* nullptr */;
941 43 : if (has_arg(true /* peek */)) {
942 0 : filename = read_rest_of_line_as_argument();
943 0 : tok.next();
944 : }
945 43 : if (0 /* nullptr */ == filename)
946 43 : input_stack::end_file();
947 : else {
948 0 : errno = 0;
949 0 : FILE *fp = include_search_path.open_file_cautiously(filename);
950 0 : if (0 /* nullptr */ == fp)
951 0 : error("cannot open '%1': %2", filename, strerror(errno));
952 : else
953 0 : input_stack::next_file(fp, filename);
954 : }
955 : // TODO: Add `filename` to file name set.
956 43 : tok.next();
957 43 : }
958 :
959 267784 : void shift()
960 : {
961 : int n;
962 267784 : if (!has_arg() || !read_integer(&n))
963 236253 : n = 1;
964 267784 : input_stack::shift(n);
965 267784 : skip_line();
966 267784 : }
967 :
968 : // TODO: return unsigned char (future: grochar)? We handle EOF here.
969 108425188 : static char read_char_in_escape_sequence_parameter(bool allow_space
970 : = false)
971 : {
972 108425188 : int c = read_char_in_copy_mode(0 /* nullptr */,
973 : false /* is_defining */,
974 : true /* handle_escaped_E */);
975 108425188 : switch (c) {
976 0 : case EOF:
977 0 : copy_mode_error("end of input in escape sequence");
978 0 : return '\0';
979 108355619 : default:
980 108355619 : if (!is_invalid_input_char(c))
981 108355619 : break;
982 : // fall through
983 : case '\n':
984 0 : if (c == '\n')
985 0 : input_stack::push(make_temp_iterator("\n"));
986 : // fall through
987 : case ' ':
988 69569 : if (c == ' ' && allow_space)
989 69569 : break;
990 : // fall through
991 : case '\t':
992 : case '\001':
993 : case '\b':
994 0 : copy_mode_error("%1 is not allowed in an escape sequence argument",
995 0 : input_char_description(c));
996 0 : return '\0';
997 : }
998 108425188 : return c;
999 : }
1000 :
1001 169398 : static symbol read_two_char_escape_parameter()
1002 : {
1003 : char buf[3];
1004 169398 : buf[0] = read_char_in_escape_sequence_parameter();
1005 169398 : if (buf[0] != '\0') {
1006 169398 : buf[1] = read_char_in_escape_sequence_parameter();
1007 169398 : if (buf[1] == '\0')
1008 0 : buf[0] = '\0';
1009 : else
1010 169398 : buf[2] = '\0';
1011 : }
1012 169398 : return symbol(buf);
1013 : }
1014 :
1015 13989373 : static symbol read_long_escape_parameters(read_mode mode)
1016 : {
1017 13989373 : int start_level = input_stack::get_level();
1018 13989373 : int buf_size = default_buffer_size;
1019 13989373 : char *buf = 0 /* nullptr */;
1020 : try {
1021 : // C++03: new char[buf_size]();
1022 13989373 : buf = new char[buf_size];
1023 : }
1024 0 : catch (const std::bad_alloc &e) {
1025 0 : fatal("cannot allocate %1 bytes to read input line", buf_size);
1026 : }
1027 13989373 : (void) memset(buf, 0, (buf_size * sizeof(char)));
1028 13989373 : int i = 0;
1029 : char c;
1030 13989373 : bool have_char = false;
1031 : for (;;) {
1032 91610719 : c = read_char_in_escape_sequence_parameter(have_char
1033 91610719 : && (WITH_ARGS == mode));
1034 91610719 : if ('\0' == c) {
1035 0 : delete[] buf;
1036 0 : return NULL_SYMBOL;
1037 : }
1038 91610719 : have_char = true;
1039 91610719 : if ((WITH_ARGS == mode) && (' ' == c))
1040 69569 : break;
1041 91541150 : if (i + 2 > buf_size) {
1042 606483 : char *old_buf = buf;
1043 606483 : int new_buf_size = buf_size * 2;
1044 : // C++03: new char[new_buf_size]();
1045 : try {
1046 606483 : buf = new char[new_buf_size];
1047 : }
1048 0 : catch (const std::bad_alloc &e) {
1049 0 : fatal("cannot allocate %1 bytes to read input line", buf_size);
1050 : }
1051 606483 : (void) memset(buf, 0, (new_buf_size * sizeof(char)));
1052 606483 : (void) memcpy(buf, old_buf, buf_size);
1053 606483 : buf_size = new_buf_size;
1054 606483 : delete[] old_buf;
1055 : }
1056 91541150 : if ((']' == c) && (input_stack::get_level() == start_level))
1057 13919804 : break;
1058 77621346 : buf[i++] = c;
1059 77621346 : }
1060 13989373 : buf[i] = '\0';
1061 13989373 : if (0 == i) {
1062 42379 : if (mode != ALLOW_EMPTY)
1063 : // XXX: `.device \[]` passes through as-is but `\X \[]` doesn't,
1064 : // landing here. Implement almost-but-not-quite-copy-mode?
1065 0 : copy_mode_error("empty escape sequence argument");
1066 42379 : return EMPTY_SYMBOL;
1067 : }
1068 13946994 : if (' ' == c)
1069 69569 : have_multiple_params = true;
1070 13946994 : symbol s(buf);
1071 13946994 : delete[] buf;
1072 13946994 : return s;
1073 : }
1074 :
1075 7803371 : static symbol read_escape_parameter(read_mode mode)
1076 : {
1077 7803371 : char c = read_char_in_escape_sequence_parameter();
1078 7803371 : if ('\0' == c)
1079 0 : return NULL_SYMBOL;
1080 7803371 : if ('(' == c)
1081 8207 : return read_two_char_escape_parameter();
1082 7795164 : if (('[' == c) && !want_att_compat)
1083 5253723 : return read_long_escape_parameters(mode);
1084 : char buf[2];
1085 2541441 : buf[0] = c;
1086 2541441 : buf[1] = '\0';
1087 2541441 : return symbol(buf);
1088 : }
1089 :
1090 8672302 : static symbol read_increment_and_escape_parameter(int *incp)
1091 : {
1092 8672302 : char c = read_char_in_escape_sequence_parameter();
1093 8672302 : switch (c) {
1094 0 : case 0:
1095 0 : *incp = 0;
1096 0 : return NULL_SYMBOL;
1097 157772 : case '(':
1098 157772 : *incp = 0;
1099 157772 : return read_two_char_escape_parameter();
1100 476085 : case '+':
1101 476085 : *incp = 1;
1102 476085 : return read_escape_parameter();
1103 98022 : case '-':
1104 98022 : *incp = -1;
1105 98022 : return read_escape_parameter();
1106 7935562 : case '[':
1107 7935562 : if (!want_att_compat) {
1108 7935562 : *incp = 0;
1109 7935562 : return read_long_escape_parameters();
1110 : }
1111 0 : break;
1112 : }
1113 4861 : *incp = 0;
1114 : char buf[2];
1115 4861 : buf[0] = c;
1116 4861 : buf[1] = '\0';
1117 4861 : return symbol(buf);
1118 : }
1119 :
1120 : // In copy mode, we don't tokenize normally; characters on the input
1121 : // stream are typically read into the contents of an existing node (like
1122 : // a string or macro definition), or discarded. A handful of escape
1123 : // sequences (\n, etc.) interpolate as they do outside of copy mode.
1124 205980202 : static int read_char_in_copy_mode(node **nd,
1125 : bool is_defining,
1126 : bool handle_escaped_E)
1127 : {
1128 : for (;;) {
1129 205980202 : int c = input_stack::get(nd);
1130 205980202 : if (c == PUSH_GROFF_MODE) {
1131 1054433 : input_stack::set_att_compat(want_att_compat);
1132 1054433 : want_att_compat = false;
1133 1054433 : continue;
1134 : }
1135 204925769 : if (c == PUSH_COMP_MODE) {
1136 0 : input_stack::set_att_compat(want_att_compat);
1137 0 : want_att_compat = true;
1138 0 : continue;
1139 : }
1140 204925769 : if (c == POP_GROFFCOMP_MODE) {
1141 1059102 : want_att_compat = input_stack::get_att_compat();
1142 1059102 : continue;
1143 : }
1144 203866667 : if (c == BEGIN_QUOTE) {
1145 130923 : input_stack::increase_level();
1146 130923 : continue;
1147 : }
1148 203735744 : if (c == END_QUOTE) {
1149 130923 : input_stack::decrease_level();
1150 130923 : continue;
1151 : }
1152 203604821 : if (c == DOUBLE_QUOTE)
1153 464636 : continue;
1154 203140185 : if ((c == ESCAPE_E) && handle_escaped_E)
1155 1511 : c = escape_char;
1156 203140185 : if (c == ESCAPE_NEWLINE) {
1157 42095 : if (is_defining)
1158 810 : return c;
1159 0 : do {
1160 41285 : c = input_stack::get(nd);
1161 41285 : } while (c == ESCAPE_NEWLINE);
1162 : }
1163 203139375 : if ((c != escape_char) || (0U == escape_char))
1164 194695036 : return c;
1165 8444339 : again:
1166 8444339 : c = input_stack::peek();
1167 8444339 : switch (c) {
1168 0 : case 0:
1169 0 : return escape_char;
1170 281439 : case '"':
1171 281439 : (void) input_stack::get(0 /* nullptr */);
1172 7122459 : while ((c = input_stack::get(0 /* nullptr */)) != '\n'
1173 7122459 : && c != EOF)
1174 : ;
1175 281439 : return c;
1176 5961 : case '#': // Like \" but newline is ignored.
1177 5961 : (void) input_stack::get(0 /* nullptr */);
1178 312884 : while ((c = input_stack::get(0 /* nullptr */)) != '\n')
1179 306923 : if (c == EOF)
1180 0 : return EOF;
1181 5961 : break;
1182 1030821 : case '$':
1183 : {
1184 1030821 : (void) input_stack::get(0 /* nullptr */);
1185 1030821 : symbol s = read_escape_parameter();
1186 1030821 : if (!(s.is_null() || s.is_empty()))
1187 1030821 : interpolate_positional_parameter(s);
1188 1030821 : break;
1189 : }
1190 2704940 : case '*':
1191 : {
1192 2704940 : (void) input_stack::get(0 /* nullptr */);
1193 2704940 : symbol s = read_escape_parameter(WITH_ARGS);
1194 2704940 : if (!(s.is_null() || s.is_empty())) {
1195 2704940 : if (have_multiple_params) {
1196 156 : have_multiple_params = false;
1197 156 : interpolate_string_with_args(s);
1198 : }
1199 : else
1200 2704784 : interpolate_string(s);
1201 : }
1202 2704940 : break;
1203 : }
1204 1 : case 'a':
1205 1 : (void) input_stack::get(0 /* nullptr */);
1206 1 : return '\001';
1207 573 : case 'e':
1208 573 : (void) input_stack::get(0 /* nullptr */);
1209 573 : return ESCAPE_e;
1210 42268 : case 'E':
1211 42268 : (void) input_stack::get(0 /* nullptr */);
1212 42268 : if (handle_escaped_E)
1213 0 : goto again;
1214 42268 : return ESCAPE_E;
1215 1791336 : case 'n':
1216 : {
1217 1791336 : (void) input_stack::get(0 /* nullptr */);
1218 : int inc;
1219 1791336 : symbol s = read_increment_and_escape_parameter(&inc);
1220 1791336 : if (!(s.is_null() || s.is_empty()))
1221 1791336 : interpolate_register(s, inc);
1222 1791336 : break;
1223 : }
1224 366 : case 'g':
1225 : {
1226 366 : (void) input_stack::get(0 /* nullptr */);
1227 366 : symbol s = read_escape_parameter();
1228 366 : if (!(s.is_null() || s.is_empty()))
1229 366 : interpolate_number_format(s);
1230 366 : break;
1231 : }
1232 976 : case 't':
1233 976 : (void) input_stack::get(0 /* nullptr */);
1234 976 : return '\t';
1235 2 : case 'V':
1236 : {
1237 2 : (void) input_stack::get(0 /* nullptr */);
1238 2 : symbol s = read_escape_parameter();
1239 2 : if (!(s.is_null() || s.is_empty()))
1240 2 : interpolate_environment_variable(s);
1241 2 : break;
1242 : }
1243 339830 : case '\n':
1244 339830 : (void) input_stack::get(0 /* nullptr */);
1245 339830 : if (is_defining)
1246 338272 : return ESCAPE_NEWLINE;
1247 1558 : break;
1248 3803 : case ' ':
1249 3803 : (void) input_stack::get(0 /* nullptr */);
1250 3803 : return ESCAPE_SPACE;
1251 8894 : case '~':
1252 8894 : (void) input_stack::get(0 /* nullptr */);
1253 8894 : return ESCAPE_TILDE;
1254 6809 : case ':':
1255 6809 : (void) input_stack::get(0 /* nullptr */);
1256 6809 : return ESCAPE_COLON;
1257 12100 : case '|':
1258 12100 : (void) input_stack::get(0 /* nullptr */);
1259 12100 : return ESCAPE_BAR;
1260 1666 : case '^':
1261 1666 : (void) input_stack::get(0 /* nullptr */);
1262 1666 : return ESCAPE_CIRCUMFLEX;
1263 177124 : case '{':
1264 177124 : (void) input_stack::get(0 /* nullptr */);
1265 177124 : return ESCAPE_LEFT_BRACE;
1266 177440 : case '}':
1267 177440 : (void) input_stack::get(0 /* nullptr */);
1268 177440 : return ESCAPE_RIGHT_BRACE;
1269 353 : case '`':
1270 353 : (void) input_stack::get(0 /* nullptr */);
1271 353 : return ESCAPE_LEFT_QUOTE;
1272 987 : case '\'':
1273 987 : (void) input_stack::get(0 /* nullptr */);
1274 987 : return ESCAPE_RIGHT_QUOTE;
1275 8662 : case '-':
1276 8662 : (void) input_stack::get(0 /* nullptr */);
1277 8662 : return ESCAPE_HYPHEN;
1278 0 : case '_':
1279 0 : (void) input_stack::get(0 /* nullptr */);
1280 0 : return ESCAPE_UNDERSCORE;
1281 11175 : case 'c':
1282 11175 : (void) input_stack::get(0 /* nullptr */);
1283 11175 : return ESCAPE_c;
1284 10595 : case '!':
1285 10595 : (void) input_stack::get(0 /* nullptr */);
1286 10595 : return ESCAPE_BANG;
1287 31038 : case '?':
1288 31038 : (void) input_stack::get(0 /* nullptr */);
1289 31038 : return ESCAPE_QUESTION;
1290 9008 : case '&':
1291 9008 : (void) input_stack::get(0 /* nullptr */);
1292 9008 : return ESCAPE_AMPERSAND;
1293 2437 : case ')':
1294 2437 : (void) input_stack::get(0 /* nullptr */);
1295 2437 : return ESCAPE_RIGHT_PARENTHESIS;
1296 386 : case '.':
1297 386 : (void) input_stack::get(0 /* nullptr */);
1298 386 : return c;
1299 9986 : case '%':
1300 9986 : (void) input_stack::get(0 /* nullptr */);
1301 9986 : return ESCAPE_PERCENT;
1302 1773363 : default:
1303 1773363 : if (c == escape_char) {
1304 1025765 : (void) input_stack::get(0 /* nullptr */);
1305 1025765 : return c;
1306 : }
1307 : else
1308 747598 : return escape_char;
1309 : }
1310 8375001 : }
1311 : }
1312 :
1313 : // \a or \t
1314 : class non_interpreted_char_node : public node {
1315 : unsigned char c;
1316 : public:
1317 : non_interpreted_char_node(unsigned char);
1318 : void asciify(macro *);
1319 : node *copy();
1320 : bool interpret(macro *);
1321 : bool is_same_as(node *);
1322 : const char *type();
1323 : bool causes_tprint();
1324 : bool is_tag();
1325 : };
1326 :
1327 0 : bool non_interpreted_char_node::is_same_as(node *nd)
1328 : {
1329 0 : return c == static_cast<non_interpreted_char_node *>(nd)->c;
1330 : }
1331 :
1332 0 : const char *non_interpreted_char_node::type()
1333 : {
1334 0 : return "non-interpreted character node";
1335 : }
1336 :
1337 0 : bool non_interpreted_char_node::causes_tprint()
1338 : {
1339 0 : return false;
1340 : }
1341 :
1342 2 : bool non_interpreted_char_node::is_tag()
1343 : {
1344 2 : return false;
1345 : }
1346 :
1347 326 : non_interpreted_char_node::non_interpreted_char_node(unsigned char cc) : c(cc)
1348 : {
1349 326 : assert(cc != 0U);
1350 326 : }
1351 :
1352 0 : void non_interpreted_char_node::asciify(macro *)
1353 : {
1354 0 : delete this;
1355 0 : }
1356 :
1357 0 : node *non_interpreted_char_node::copy()
1358 : {
1359 0 : return new non_interpreted_char_node(c);
1360 : }
1361 :
1362 323 : bool non_interpreted_char_node::interpret(macro *mac)
1363 : {
1364 323 : mac->append(c);
1365 323 : return true;
1366 : }
1367 :
1368 : // forward declarations
1369 : static void do_width();
1370 : static node *do_non_interpreted();
1371 : static node *do_device_extension();
1372 : static node *do_suppress(symbol nm);
1373 : static void do_register();
1374 :
1375 : dictionary color_dictionary(501);
1376 :
1377 143299 : static color *lookup_color(symbol nm)
1378 : {
1379 143299 : assert(!nm.is_null());
1380 143299 : if (nm == default_symbol)
1381 12352 : return &default_color;
1382 130947 : color *c = static_cast<color *>(color_dictionary.lookup(nm));
1383 130947 : if (0 == c /* nullptr */)
1384 0 : warning(WARN_COLOR, "color '%1' not defined", nm.contents());
1385 130947 : return c;
1386 : }
1387 :
1388 110863 : void do_stroke_color(symbol nm) // \m
1389 : {
1390 110863 : if (nm.is_null())
1391 0 : return;
1392 110863 : if (nm.is_empty())
1393 216 : curenv->set_stroke_color(curenv->get_prev_stroke_color());
1394 : else {
1395 110647 : color *tem = lookup_color(nm);
1396 110647 : if (tem != 0 /* nullptr */)
1397 110647 : curenv->set_stroke_color(tem);
1398 : else
1399 0 : (void) color_dictionary.lookup(nm, new color(nm));
1400 : }
1401 : }
1402 :
1403 65073 : void do_fill_color(symbol nm) // \M
1404 : {
1405 65073 : if (nm.is_null())
1406 0 : return;
1407 65073 : if (nm.is_empty())
1408 32421 : curenv->set_fill_color(curenv->get_prev_fill_color());
1409 : else {
1410 32652 : color *tem = lookup_color(nm);
1411 32652 : if (tem != 0 /* nullptr */)
1412 32652 : curenv->set_fill_color(tem);
1413 : else
1414 0 : (void) color_dictionary.lookup(nm, new color(nm));
1415 : }
1416 : }
1417 :
1418 1980 : static unsigned int read_color_channel_value(const char *scheme,
1419 : const char *col)
1420 : {
1421 : units val;
1422 1980 : if (!read_measurement(&val, (unsigned char)('f'))) { // TODO: grochar
1423 0 : warning(WARN_COLOR, "%1 in %2 definition set to 0", col, scheme);
1424 0 : tok.next();
1425 0 : return 0;
1426 : }
1427 1980 : if (val < 0) {
1428 0 : warning(WARN_RANGE, "%1 cannot be negative: set to 0", col);
1429 0 : return 0;
1430 : }
1431 1980 : if (val > color::MAX_COLOR_VAL+1) {
1432 0 : warning(WARN_RANGE, "%1 cannot be greater than 1", col);
1433 : // we change 0x10000 to 0xffff
1434 0 : return color::MAX_COLOR_VAL;
1435 : }
1436 1980 : return (unsigned int)(val);
1437 : }
1438 :
1439 147600 : static color *read_rgb(unsigned char end = 0U)
1440 : {
1441 : symbol component = read_input_until_terminator(false /* required */,
1442 147600 : end);
1443 147600 : if (component.is_null()) {
1444 0 : warning(WARN_COLOR, "missing rgb color values");
1445 0 : return 0 /* nullptr */;
1446 : }
1447 147600 : const char *s = component.contents();
1448 147600 : color *col = new color;
1449 147600 : if ('#' == *s) {
1450 147004 : if (!col->read_rgb(s)) {
1451 0 : warning(WARN_COLOR, "expecting rgb color definition,"
1452 0 : " not '%1'", s);
1453 0 : delete col;
1454 0 : return 0 /* nullptr */;
1455 : }
1456 : }
1457 : else {
1458 596 : if (!end)
1459 596 : input_stack::push(make_temp_iterator(" "));
1460 596 : input_stack::push(make_temp_iterator(s));
1461 596 : tok.next();
1462 596 : unsigned int r = read_color_channel_value("rgb color",
1463 : "red component");
1464 596 : unsigned int g = read_color_channel_value("rgb color",
1465 : "green component");
1466 596 : unsigned int b = read_color_channel_value("rgb color",
1467 : "blue component");
1468 596 : col->set_rgb(r, g, b);
1469 : }
1470 147600 : return col;
1471 : }
1472 :
1473 0 : static color *read_cmy(unsigned char end = 0U)
1474 : {
1475 : symbol component = read_input_until_terminator(false /* required */,
1476 0 : end);
1477 0 : if (component.is_null()) {
1478 0 : warning(WARN_COLOR, "missing cmy color values");
1479 0 : return 0 /* nullptr */;
1480 : }
1481 0 : const char *s = component.contents();
1482 0 : color *col = new color;
1483 0 : if ('#' == *s) {
1484 0 : if (!col->read_cmy(s)) {
1485 0 : warning(WARN_COLOR, "expecting cmy color definition,"
1486 0 : " not '%1'", s);
1487 0 : delete col;
1488 0 : return 0 /* nullptr */;
1489 : }
1490 : }
1491 : else {
1492 0 : if (!end)
1493 0 : input_stack::push(make_temp_iterator(" "));
1494 0 : input_stack::push(make_temp_iterator(s));
1495 0 : tok.next();
1496 0 : unsigned int c = read_color_channel_value("cmy color",
1497 : "cyan component");
1498 0 : unsigned int m = read_color_channel_value("cmy color",
1499 : "magenta component");
1500 0 : unsigned int y = read_color_channel_value("cmy color",
1501 : "yellow component");
1502 0 : col->set_cmy(c, m, y);
1503 : }
1504 0 : return col;
1505 : }
1506 :
1507 0 : static color *read_cmyk(unsigned char end = 0U)
1508 : {
1509 : symbol component = read_input_until_terminator(false /* required */,
1510 0 : end);
1511 0 : if (component.is_null()) {
1512 0 : warning(WARN_COLOR, "missing cmyk color values");
1513 0 : return 0 /* nullptr */;
1514 : }
1515 0 : const char *s = component.contents();
1516 0 : color *col = new color;
1517 0 : if ('#' == *s) {
1518 0 : if (!col->read_cmyk(s)) {
1519 0 : warning(WARN_COLOR, "expecting cmyk color definition,"
1520 0 : " not '%1'", s);
1521 0 : delete col;
1522 0 : return 0 /* nullptr */;
1523 : }
1524 : }
1525 : else {
1526 0 : if (!end)
1527 0 : input_stack::push(make_temp_iterator(" "));
1528 0 : input_stack::push(make_temp_iterator(s));
1529 0 : tok.next();
1530 0 : unsigned int c = read_color_channel_value("cmyk color",
1531 : "cyan component");
1532 0 : unsigned int m = read_color_channel_value("cmyk color",
1533 : "magenta component");
1534 0 : unsigned int y = read_color_channel_value("cmyk color",
1535 : "yellow component");
1536 0 : unsigned int k = read_color_channel_value("cmyk color",
1537 : "black component");
1538 0 : col->set_cmyk(c, m, y, k);
1539 : }
1540 0 : return col;
1541 : }
1542 :
1543 192 : static color *read_gray(unsigned char end = 0U)
1544 : {
1545 : symbol component = read_input_until_terminator(false /* required */,
1546 192 : end);
1547 192 : if (component.is_null()) {
1548 0 : warning(WARN_COLOR, "missing gray value");
1549 0 : return 0 /* nullptr */;
1550 : }
1551 192 : const char *s = component.contents();
1552 192 : color *col = new color;
1553 192 : if ('#' == *s) {
1554 0 : if (!col->read_gray(s)) {
1555 0 : warning(WARN_COLOR, "expecting gray definition,"
1556 0 : " not '%1'", s);
1557 0 : delete col;
1558 0 : return 0 /* nullptr */;
1559 : }
1560 : }
1561 : else {
1562 192 : if (!end)
1563 3 : input_stack::push(make_temp_iterator("\n"));
1564 192 : input_stack::push(make_temp_iterator(s));
1565 192 : tok.next();
1566 192 : unsigned int g = read_color_channel_value("gray", "gray value");
1567 192 : col->set_gray(g);
1568 : }
1569 192 : return col;
1570 : }
1571 :
1572 0 : static void activate_color()
1573 : {
1574 : int n;
1575 0 : bool is_color_desired = false;
1576 0 : if (has_arg() && read_integer(&n))
1577 0 : is_color_desired = (n > 0);
1578 : else
1579 0 : is_color_desired = true;
1580 0 : if (is_color_desired && !permit_color_output) {
1581 0 : error("color output disabled via command line");
1582 0 : is_color_desired = false;
1583 : }
1584 0 : want_color_output = is_color_desired;
1585 0 : skip_line();
1586 0 : }
1587 :
1588 147603 : static void define_color()
1589 : {
1590 147603 : if (!has_arg()) {
1591 0 : warning(WARN_MISSING, "color definition request expects arguments");
1592 0 : skip_line();
1593 0 : return;
1594 : }
1595 147603 : symbol color_name = read_long_identifier();
1596 : // Testing has_arg() should have ensured this.
1597 147603 : assert(color_name != 0 /* nullptr */);
1598 147603 : if (color_name == default_symbol) {
1599 0 : warning(WARN_COLOR, "default color cannot be redefined");
1600 0 : skip_line();
1601 0 : return;
1602 : }
1603 147603 : symbol color_space = read_long_identifier();
1604 147603 : if (color_space.is_null()) {
1605 0 : warning(WARN_MISSING, "missing color space in color definition"
1606 : " request");
1607 0 : skip_line();
1608 0 : return;
1609 : }
1610 : color *col;
1611 147603 : if (strcmp(color_space.contents(), "rgb") == 0)
1612 147600 : col = read_rgb();
1613 3 : else if (strcmp(color_space.contents(), "cmyk") == 0)
1614 0 : col = read_cmyk();
1615 3 : else if (strcmp(color_space.contents(), "gray") == 0)
1616 3 : col = read_gray();
1617 0 : else if (strcmp(color_space.contents(), "grey") == 0)
1618 0 : col = read_gray();
1619 0 : else if (strcmp(color_space.contents(), "cmy") == 0)
1620 0 : col = read_cmy();
1621 : else {
1622 0 : warning(WARN_COLOR, "unknown color space '%1';"
1623 : " use 'rgb', 'cmyk', 'gray' or 'cmy'",
1624 0 : color_space.contents());
1625 0 : skip_line();
1626 0 : return;
1627 : }
1628 147603 : if (col != 0 /* nullptr */) {
1629 147603 : col->nm = color_name;
1630 147603 : (void) color_dictionary.lookup(color_name, col);
1631 : }
1632 147603 : skip_line();
1633 : }
1634 :
1635 0 : static void print_color_request()
1636 : {
1637 0 : symbol key;
1638 : color *value;
1639 0 : if (has_arg()) {
1640 0 : do {
1641 0 : key = read_identifier();
1642 0 : value = static_cast<color *>(color_dictionary.lookup(key));
1643 0 : if (value != 0 /* nullptr */)
1644 0 : errprint("%1\t%2\n", key.contents(), value->print_color());
1645 0 : } while (has_arg());
1646 : }
1647 : else {
1648 0 : dictionary_iterator iter(color_dictionary);
1649 : // We must use the nuclear `reinterpret_cast` operator because GNU
1650 : // troff's dictionary types use a pre-STL approach to containers.
1651 0 : while (iter.get(&key, reinterpret_cast<void **>(&value))) {
1652 0 : assert(!key.is_null());
1653 0 : assert(value != 0 /* nullptr */);
1654 0 : errprint("%1\t%2\n", key.contents(), value->print_color());
1655 : }
1656 : }
1657 0 : fflush(stderr);
1658 0 : skip_line();
1659 0 : }
1660 :
1661 110 : node *do_overstrike() // \o
1662 : {
1663 110 : overstrike_node *osnode = new overstrike_node;
1664 110 : int start_level = input_stack::get_level();
1665 220 : token start_token;
1666 110 : start_token.next();
1667 110 : if (!want_att_compat && !start_token.is_usable_as_delimiter())
1668 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
1669 0 : " is deprecated", tok.description());
1670 110 : else if (want_att_compat
1671 110 : && !start_token.is_usable_as_delimiter(false,
1672 : DELIMITER_ATT_STRING_EXPRESSION)) {
1673 0 : warning(WARN_DELIM, "overstriking escape sequence"
1674 : " does not accept %1 as a delimiter",
1675 0 : start_token.description());
1676 0 : delete osnode;
1677 0 : return 0 /* nullptr */;
1678 : }
1679 : // TODO: groff 1.24.0 release + 2 years?
1680 : #if 0
1681 : if (!start_token.is_usable_as_delimiter(true /* report error */)) {
1682 : delete osnode;
1683 : return 0 /* nullptr */;
1684 : }
1685 : #endif
1686 : for (;;) {
1687 330 : tok.next();
1688 330 : if (tok.is_newline() || tok.is_eof()) {
1689 : // token::description() writes to static, class-wide storage, so
1690 : // we must allocate a copy of it before issuing the next
1691 : // diagnostic.
1692 0 : char *delimdesc = strdup(start_token.description());
1693 0 : warning(WARN_DELIM, "missing closing delimiter in overstrike"
1694 : " escape sequence; expected %1, got %2", delimdesc,
1695 0 : tok.description());
1696 0 : free(delimdesc);
1697 0 : break;
1698 : }
1699 330 : if (tok == start_token
1700 330 : && (want_att_compat || input_stack::get_level() == start_level))
1701 110 : break;
1702 220 : if (tok.is_horizontal_motion())
1703 0 : osnode->overstrike(tok.nd->copy());
1704 220 : else if (tok.is_unstretchable_space()) {
1705 0 : node *n = new hmotion_node(curenv->get_space_width(),
1706 0 : curenv->get_fill_color());
1707 0 : osnode->overstrike(n);
1708 : }
1709 : else {
1710 : // TODO: In theory, we could accept spaces and horizontal motions.
1711 220 : charinfo *ci = tok.get_charinfo(true /* required */);
1712 220 : if (0 /* nullptr */ == ci) {
1713 0 : error("%1 is not supported in an overstrike escape sequence"
1714 0 : " argument", tok.description());
1715 0 : delete osnode;
1716 0 : return 0 /* nullptr */;
1717 : }
1718 : else {
1719 220 : node *n = curenv->make_char_node(ci);
1720 220 : if (n != 0 /* nullptr */)
1721 220 : osnode->overstrike(n);
1722 : }
1723 : }
1724 220 : }
1725 110 : return osnode;
1726 : }
1727 :
1728 1 : static node *do_bracket() // \b
1729 : {
1730 1 : bracket_node *bracketnode = new bracket_node;
1731 2 : token start_token;
1732 1 : start_token.next();
1733 1 : if (!want_att_compat && !start_token.is_usable_as_delimiter())
1734 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
1735 0 : " is deprecated", tok.description());
1736 1 : else if (want_att_compat
1737 1 : && !start_token.is_usable_as_delimiter(false,
1738 : DELIMITER_ATT_STRING_EXPRESSION)) {
1739 0 : warning(WARN_DELIM, "bracket-building escape sequence"
1740 : " does not accept %1 as a delimiter",
1741 0 : start_token.description());
1742 0 : delete bracketnode;
1743 0 : return 0 /* nullptr */;
1744 : }
1745 : // TODO: groff 1.24.0 release + 2 years?
1746 : #if 0
1747 : if (!start_token.is_usable_as_delimiter(true /* report error */)) {
1748 : delete bracketnode;
1749 : return 0 /* nullptr */;
1750 : }
1751 : #endif
1752 1 : int start_level = input_stack::get_level();
1753 : for (;;) {
1754 4 : tok.next();
1755 4 : if (tok.is_newline() || tok.is_eof()) {
1756 : // token::description() writes to static, class-wide storage, so
1757 : // we must allocate a copy of it before issuing the next
1758 : // diagnostic.
1759 0 : char *delimdesc = strdup(start_token.description());
1760 0 : warning(WARN_DELIM, "missing closing delimiter in"
1761 : " bracket-building escape sequence; expected %1, got"
1762 0 : " %2", delimdesc, tok.description());
1763 0 : free(delimdesc);
1764 0 : break;
1765 : }
1766 4 : if (tok == start_token
1767 4 : && (want_att_compat || input_stack::get_level() == start_level))
1768 1 : break;
1769 : // TODO: In theory, we could accept spaces and horizontal motions.
1770 3 : charinfo *ci = tok.get_charinfo(true /* required */);
1771 3 : if (0 /* nullptr */ == ci) {
1772 0 : error("%1 is not supported in a bracket-building escape sequence"
1773 0 : " argument", tok.description());
1774 0 : delete bracketnode;
1775 0 : return 0 /* nullptr */;
1776 : }
1777 : else {
1778 3 : node *n = curenv->make_char_node(ci);
1779 3 : if (n != 0 /* nullptr */)
1780 3 : bracketnode->bracket(n);
1781 : }
1782 3 : }
1783 1 : return bracketnode;
1784 : }
1785 :
1786 22350 : static const char *do_name_test() // \A
1787 : {
1788 44700 : token start_token;
1789 22350 : start_token.next();
1790 22350 : if (!want_att_compat && !start_token.is_usable_as_delimiter())
1791 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
1792 0 : " is deprecated", tok.description());
1793 22350 : else if (want_att_compat
1794 22350 : && !start_token.is_usable_as_delimiter(false,
1795 : DELIMITER_ATT_STRING_EXPRESSION)) {
1796 0 : warning(WARN_DELIM, "name test escape sequence"
1797 : " does not accept %1 as a delimiter",
1798 0 : start_token.description());
1799 0 : return 0 /* nullptr */;
1800 : }
1801 : // TODO: groff 1.24.0 release + 2 years?
1802 : #if 0
1803 : if (!start_token.is_usable_as_delimiter(true /* report error */))
1804 : return 0 /* nullptr */;
1805 : #endif
1806 22350 : int start_level = input_stack::get_level();
1807 22350 : bool got_bad_char = false;
1808 22350 : bool got_some_char = false;
1809 : for (;;) {
1810 95232 : tok.next();
1811 95232 : if (tok.is_newline() || tok.is_eof()) {
1812 : // token::description() writes to static, class-wide storage, so
1813 : // we must allocate a copy of it before issuing the next
1814 : // diagnostic.
1815 0 : char *delimdesc = strdup(start_token.description());
1816 0 : warning(WARN_DELIM, "missing closing delimiter in identifier"
1817 : " validation escape sequence; expected %1, got %2",
1818 0 : delimdesc, tok.description());
1819 0 : free(delimdesc);
1820 0 : break;
1821 : }
1822 95232 : if (tok == start_token
1823 95232 : && (want_att_compat || input_stack::get_level() == start_level))
1824 22350 : break;
1825 72882 : if (tok.ch() == 0U)
1826 4428 : got_bad_char = true;
1827 72882 : got_some_char = true;
1828 72882 : }
1829 22350 : return (got_some_char && !got_bad_char) ? "1" : "0";
1830 : }
1831 :
1832 10333 : static const char *do_expr_test() // \B
1833 : {
1834 20666 : token start_token;
1835 10333 : start_token.next();
1836 20666 : if (!want_att_compat
1837 10333 : && !start_token.is_usable_as_delimiter(true /* report error */))
1838 0 : return 0 /* nullptr */;
1839 10333 : else if (want_att_compat
1840 10333 : && !start_token.is_usable_as_delimiter(false,
1841 : DELIMITER_ATT_NUMERIC_EXPRESSION)) {
1842 0 : warning(WARN_DELIM, "numeric expression test escape sequence"
1843 : " does not accept %1 as a delimiter",
1844 0 : start_token.description());
1845 0 : return 0 /* nullptr */;
1846 : }
1847 10333 : int start_level = input_stack::get_level();
1848 10333 : tok.next();
1849 : // disable all warning and error messages temporarily
1850 10333 : unsigned int saved_warning_mask = warning_mask;
1851 10333 : bool saved_want_errors_inhibited = want_errors_inhibited;
1852 10333 : warning_mask = 0;
1853 10333 : want_errors_inhibited = true;
1854 : int dummy;
1855 : // TODO: grochar
1856 10333 : bool result = read_measurement(&dummy, (unsigned char)('u'),
1857 : true /* is_mandatory */);
1858 10333 : warning_mask = saved_warning_mask;
1859 10333 : want_errors_inhibited = saved_want_errors_inhibited;
1860 : // read_measurement() has left `token` pointing at the input character
1861 : // after the end of the expression.
1862 10333 : if (tok == start_token && input_stack::get_level() == start_level)
1863 10262 : return (result ? "1" : "0");
1864 : // There may be garbage after the expression but before the closing
1865 : // delimiter. Eat it.
1866 : for (;;) {
1867 505 : if (tok.is_newline() || tok.is_eof()) {
1868 0 : char *delimdesc = strdup(start_token.description());
1869 0 : warning(WARN_DELIM, "missing closing delimiter in numeric"
1870 : " expression validation escape sequence; expected %1,"
1871 0 : " got %2", delimdesc, tok.description());
1872 0 : free(delimdesc);
1873 0 : break;
1874 : }
1875 505 : tok.next();
1876 505 : if (tok == start_token && input_stack::get_level() == start_level)
1877 71 : break;
1878 434 : }
1879 71 : return "0";
1880 : }
1881 :
1882 : #if 0
1883 : static node *do_zero_width_output()
1884 : {
1885 : token start_token;
1886 : start_token.next();
1887 : int start_level = input_stack::get_level();
1888 : environment env(curenv);
1889 : environment *oldenv = curenv;
1890 : curenv = &env;
1891 : for (;;) {
1892 : tok.next();
1893 : if (tok.is_newline() || tok.is_eof()) {
1894 : error("missing closing delimiter");
1895 : break;
1896 : }
1897 : if (tok == start_token
1898 : && (want_att_compat || input_stack::get_level() == start_level))
1899 : break;
1900 : tok.process();
1901 : }
1902 : curenv = oldenv;
1903 : node *rev = env.extract_output_line();
1904 : node *n = 0 /* nullptr */;
1905 : while (rev != 0 /* nullptr */) {
1906 : node *tem = rev;
1907 : rev = rev->next;
1908 : tem->next = n;
1909 : n = tem;
1910 : }
1911 : return new zero_width_node(n);
1912 : }
1913 :
1914 : #else
1915 :
1916 : // It's undesirable for \Z to change environments, because then
1917 : // \n(.w won't work as expected.
1918 :
1919 851 : static node *do_zero_width_output() // \Z
1920 : {
1921 851 : node *rev = new dummy_node;
1922 851 : node *n = 0 /* nullptr */;
1923 1702 : token start_token;
1924 851 : start_token.next();
1925 851 : if (!want_att_compat && !start_token.is_usable_as_delimiter())
1926 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
1927 0 : " is deprecated", tok.description());
1928 851 : else if (want_att_compat
1929 851 : && !start_token.is_usable_as_delimiter(false,
1930 : DELIMITER_ATT_STRING_EXPRESSION)) {
1931 0 : warning(WARN_DELIM, "zero-width sequence escape sequence"
1932 : " does not accept %1 as a delimiter",
1933 0 : start_token.description());
1934 0 : return 0 /* nullptr */;
1935 : }
1936 : // TODO: groff 1.24.0 release + 2 years?
1937 : #if 0
1938 : if (!start_token.is_usable_as_delimiter(true /* report error */)) {
1939 : delete rev;
1940 : return 0 /* nullptr */;
1941 : }
1942 : #endif
1943 851 : int start_level = input_stack::get_level();
1944 : for (;;) {
1945 4930 : tok.next();
1946 4930 : if (tok.is_newline() || tok.is_eof()) {
1947 : // token::description() writes to static, class-wide storage, so
1948 : // we must allocate a copy of it before issuing the next
1949 : // diagnostic.
1950 0 : char *delimdesc = strdup(start_token.description());
1951 0 : warning(WARN_DELIM, "missing closing delimiter in zero-width"
1952 : " output escape sequence; expected %1, got %2", delimdesc,
1953 0 : tok.description());
1954 0 : free(delimdesc);
1955 0 : break;
1956 : }
1957 4930 : if (tok == start_token
1958 4930 : && (want_att_compat || input_stack::get_level() == start_level))
1959 851 : break;
1960 : // XXX: does the initial dummy node leak if this fails?
1961 4079 : if (!tok.add_to_zero_width_node_list(&rev))
1962 0 : error("%1 is not allowed in a zero-width output escape"
1963 0 : " sequence argument", tok.description());
1964 4079 : }
1965 5127 : while (rev != 0 /* nullptr */) {
1966 4276 : node *tem = rev;
1967 4276 : rev = rev->next;
1968 4276 : tem->next = n;
1969 4276 : n = tem;
1970 : }
1971 851 : return new zero_width_node(n);
1972 : }
1973 :
1974 : #endif
1975 :
1976 8919500 : token_node *node::get_token_node()
1977 : {
1978 8919500 : return 0 /* nullptr */;
1979 : }
1980 :
1981 : class token_node : public node {
1982 : public:
1983 : token tk;
1984 : token_node(const token &t);
1985 : void asciify(macro *);
1986 : node *copy();
1987 : token_node *get_token_node();
1988 : bool is_same_as(node *);
1989 : const char *type();
1990 : bool causes_tprint();
1991 : bool is_tag();
1992 : };
1993 :
1994 1108980 : token_node::token_node(const token &t) : tk(t)
1995 : {
1996 1108980 : }
1997 :
1998 0 : void token_node::asciify(macro *)
1999 : {
2000 0 : assert(0 == "attempting to 'asciify' a `token_node`");
2001 : delete this;
2002 : }
2003 :
2004 896850 : node *token_node::copy()
2005 : {
2006 896850 : return new token_node(tk);
2007 : }
2008 :
2009 896850 : token_node *token_node::get_token_node()
2010 : {
2011 896850 : return this;
2012 : }
2013 :
2014 0 : bool token_node::is_same_as(node *nd)
2015 : {
2016 0 : return (tk == static_cast<token_node *>(nd)->tk);
2017 : }
2018 :
2019 0 : const char *token_node::type()
2020 : {
2021 0 : return "token node";
2022 : }
2023 :
2024 0 : bool token_node::causes_tprint()
2025 : {
2026 0 : return false;
2027 : }
2028 :
2029 0 : bool token_node::is_tag()
2030 : {
2031 0 : return false;
2032 : }
2033 :
2034 642083 : token::token() : nd(0 /* nullptr */), type(TOKEN_EMPTY)
2035 : {
2036 642083 : }
2037 :
2038 6378604 : token::~token()
2039 : {
2040 3189302 : delete nd;
2041 3189302 : }
2042 :
2043 2547219 : token::token(const token &t)
2044 2547219 : : nm(t.nm), c(t.c), val(t.val), dim(t.dim), type(t.type)
2045 : {
2046 2547219 : if (t.nd != 0 /* nullptr */)
2047 490 : nd = t.nd->copy();
2048 : else
2049 2546729 : nd = 0 /* nullptr */;
2050 2547219 : }
2051 :
2052 947750 : void token::operator=(const token &t)
2053 : {
2054 947750 : delete nd;
2055 947750 : nm = t.nm;
2056 947750 : if (t.nd != 0 /* nullptr */)
2057 245 : nd = t.nd->copy();
2058 : else
2059 947505 : nd = 0 /* nullptr */;
2060 947750 : c = t.c;
2061 947750 : val = t.val;
2062 947750 : dim = t.dim;
2063 947750 : type = t.type;
2064 947750 : }
2065 :
2066 79731051 : void token::skip_spaces()
2067 : {
2068 79731051 : while (is_space())
2069 21974737 : next();
2070 57756314 : }
2071 :
2072 9614 : void token::diagnose_non_character()
2073 : {
2074 : // TODO: What about
2075 : // is_space()
2076 : // is_stretchable_space()
2077 : // is_unstrechable_space()
2078 : // is_horizontal_motion()
2079 : // is_horizontal_whitespace()
2080 : // is_leader()
2081 : // is_backspace()
2082 : // is_dummy()
2083 : // is_transparent()
2084 : // is_transparent_dummy()
2085 : // is_left_brace()
2086 : // is_page_ejector()
2087 : // is_hyphen_indicator()
2088 : // is_zero_width_break()
2089 : // ?
2090 9614 : if (!is_newline() && !is_eof() && !is_right_brace() && !is_tab())
2091 0 : error("expected ordinary, special, or indexed character, got %1;"
2092 0 : " ignoring", description());
2093 9614 : }
2094 :
2095 : // Indicate whether an argument lies ahead on the current line in the
2096 : // input stream, skipping over spaces. This function is therefore not
2097 : // appropriate for use when handling requests or escape sequences that
2098 : // don't use space to separate their arguments, as with `.tr aAbB` or
2099 : // `\o'^e'`.
2100 : //
2101 : // Specify `want_peek` if request reads the next argument in copy mode,
2102 : // or otherwise must interpret it specially, as when reading a
2103 : // conditional expression (`if`, `ie`, `while`), or expecting a
2104 : // delimited argument (`tl`).
2105 14341142 : bool has_arg(bool want_peek)
2106 : {
2107 14341142 : if (tok.is_newline() || tok.is_eof())
2108 2592698 : return false;
2109 11748444 : if (want_peek) {
2110 : int c;
2111 : for (;;) {
2112 562599 : c = input_stack::peek();
2113 562599 : if (' ' == c)
2114 18 : (void) read_char_in_copy_mode(0 /* nullptr */);
2115 : else
2116 562581 : break;
2117 : }
2118 562581 : return !(('\n' == c) || (EOF == c));
2119 : }
2120 : else {
2121 11185863 : tok.skip_spaces();
2122 11185863 : return !(tok.is_newline() || tok.is_eof());
2123 : }
2124 : }
2125 :
2126 32767 : void token::make_space()
2127 : {
2128 32767 : type = TOKEN_SPACE;
2129 32767 : }
2130 :
2131 103557 : void token::make_newline()
2132 : {
2133 103557 : type = TOKEN_NEWLINE;
2134 103557 : }
2135 :
2136 239468043 : void token::next()
2137 : {
2138 239468043 : if (nd != 0 /* nullptr */) {
2139 998 : delete nd;
2140 998 : nd = 0 /* nullptr */;
2141 : }
2142 : units x;
2143 : for (;;) {
2144 255706213 : node *n = 0 /* nullptr */;
2145 255706213 : int cc = input_stack::get(&n);
2146 255706213 : if ((cc != escape_char) || 0U == escape_char) {
2147 242349091 : handle_ordinary_char:
2148 242349338 : switch (cc) {
2149 3 : case INPUT_NO_BREAK_SPACE:
2150 3 : type = TOKEN_STRETCHABLE_SPACE;
2151 239468043 : return;
2152 1 : case INPUT_SOFT_HYPHEN:
2153 1 : type = TOKEN_HYPHEN_INDICATOR;
2154 1 : return;
2155 1443898 : case PUSH_GROFF_MODE:
2156 1443898 : input_stack::set_att_compat(want_att_compat);
2157 1443898 : want_att_compat = false;
2158 3066661 : continue;
2159 0 : case PUSH_COMP_MODE:
2160 0 : input_stack::set_att_compat(want_att_compat);
2161 0 : want_att_compat = true;
2162 0 : continue;
2163 1434654 : case POP_GROFFCOMP_MODE:
2164 1434654 : want_att_compat = input_stack::get_att_compat();
2165 1434654 : continue;
2166 0 : case BEGIN_QUOTE:
2167 0 : input_stack::increase_level();
2168 0 : continue;
2169 0 : case END_QUOTE:
2170 0 : input_stack::decrease_level();
2171 0 : continue;
2172 188109 : case DOUBLE_QUOTE:
2173 188109 : continue;
2174 952372 : case EOF:
2175 952372 : type = TOKEN_EOF;
2176 952372 : return;
2177 125 : case TRANSPARENT_FILE_REQUEST:
2178 : case TITLE_REQUEST:
2179 : case COPY_FILE_REQUEST:
2180 : #ifdef COLUMN
2181 : case VJUSTIFY_REQUEST:
2182 : #endif /* COLUMN */
2183 125 : type = TOKEN_REQUEST;
2184 125 : c = cc;
2185 125 : return;
2186 42312 : case BEGIN_TRAP:
2187 42312 : type = TOKEN_BEGIN_TRAP;
2188 42312 : return;
2189 42030 : case END_TRAP:
2190 42030 : type = TOKEN_END_TRAP;
2191 42030 : return;
2192 60 : case LAST_PAGE_EJECTOR:
2193 60 : seen_last_page_ejector = true;
2194 : // fall through
2195 4856 : case PAGE_EJECTOR:
2196 4856 : type = TOKEN_PAGE_EJECTOR;
2197 4856 : return;
2198 : case ESCAPE_PERCENT:
2199 83843 : ESCAPE_PERCENT:
2200 83843 : type = TOKEN_HYPHEN_INDICATOR;
2201 83843 : return;
2202 : case ESCAPE_SPACE:
2203 6209 : ESCAPE_SPACE:
2204 6209 : type = TOKEN_UNSTRETCHABLE_SPACE;
2205 6209 : return;
2206 : case ESCAPE_TILDE:
2207 9853 : ESCAPE_TILDE:
2208 9853 : type = TOKEN_STRETCHABLE_SPACE;
2209 9853 : return;
2210 : case ESCAPE_COLON:
2211 51200 : ESCAPE_COLON:
2212 51200 : type = TOKEN_ZERO_WIDTH_BREAK;
2213 51200 : return;
2214 : case ESCAPE_e:
2215 5666 : ESCAPE_e:
2216 5666 : type = TOKEN_ESCAPE;
2217 5666 : return;
2218 137769 : case ESCAPE_E:
2219 137769 : goto handle_escape_char;
2220 : case ESCAPE_BAR:
2221 5349 : ESCAPE_BAR:
2222 5349 : type = TOKEN_HORIZONTAL_MOTION;
2223 5349 : nd = new hmotion_node(curenv->get_narrow_space_width(),
2224 5349 : curenv->get_fill_color());
2225 5349 : return;
2226 : case ESCAPE_CIRCUMFLEX:
2227 310 : ESCAPE_CIRCUMFLEX:
2228 310 : type = TOKEN_HORIZONTAL_MOTION;
2229 310 : nd = new hmotion_node(curenv->get_half_narrow_space_width(),
2230 310 : curenv->get_fill_color());
2231 310 : return;
2232 2403611 : case ESCAPE_NEWLINE:
2233 2403611 : have_formattable_input = false;
2234 2403611 : break;
2235 : case ESCAPE_LEFT_BRACE:
2236 1405911 : ESCAPE_LEFT_BRACE:
2237 1405911 : type = TOKEN_LEFT_BRACE;
2238 1405911 : return;
2239 : case ESCAPE_RIGHT_BRACE:
2240 1232591 : ESCAPE_RIGHT_BRACE:
2241 1232591 : type = TOKEN_RIGHT_BRACE;
2242 1232591 : return;
2243 : case ESCAPE_LEFT_QUOTE:
2244 12 : ESCAPE_LEFT_QUOTE:
2245 12 : type = TOKEN_SPECIAL_CHAR;
2246 12 : nm = symbol("ga");
2247 12 : return;
2248 : case ESCAPE_RIGHT_QUOTE:
2249 28 : ESCAPE_RIGHT_QUOTE:
2250 28 : type = TOKEN_SPECIAL_CHAR;
2251 28 : nm = symbol("aa");
2252 28 : return;
2253 : case ESCAPE_HYPHEN:
2254 18677 : ESCAPE_HYPHEN:
2255 18677 : type = TOKEN_SPECIAL_CHAR;
2256 18677 : nm = symbol("-");
2257 18677 : return;
2258 : case ESCAPE_UNDERSCORE:
2259 0 : ESCAPE_UNDERSCORE:
2260 0 : type = TOKEN_SPECIAL_CHAR;
2261 0 : nm = symbol("ul");
2262 0 : return;
2263 : case ESCAPE_c:
2264 112600 : ESCAPE_c:
2265 112600 : type = TOKEN_INTERRUPT;
2266 112600 : return;
2267 : case ESCAPE_BANG:
2268 24769 : ESCAPE_BANG:
2269 24769 : type = TOKEN_TRANSPARENT;
2270 24769 : return;
2271 : case ESCAPE_QUESTION:
2272 457945 : ESCAPE_QUESTION:
2273 457945 : nd = do_non_interpreted();
2274 457945 : if (nd != 0 /* nullptr */) {
2275 457945 : type = TOKEN_NODE;
2276 457945 : return;
2277 : }
2278 0 : break;
2279 : case ESCAPE_AMPERSAND:
2280 79406 : ESCAPE_AMPERSAND:
2281 79406 : type = TOKEN_DUMMY;
2282 79406 : return;
2283 : case ESCAPE_RIGHT_PARENTHESIS:
2284 19742 : ESCAPE_RIGHT_PARENTHESIS:
2285 19742 : type = TOKEN_TRANSPARENT_DUMMY;
2286 19742 : return;
2287 6 : case '\b':
2288 6 : type = TOKEN_BACKSPACE;
2289 6 : return;
2290 44980308 : case ' ':
2291 44980308 : type = TOKEN_SPACE;
2292 44980308 : return;
2293 878014 : case '\t':
2294 878014 : type = TOKEN_TAB;
2295 878014 : return;
2296 12232383 : case '\n':
2297 12232383 : type = TOKEN_NEWLINE;
2298 12232383 : return;
2299 287 : case '\001':
2300 287 : type = TOKEN_LEADER;
2301 287 : return;
2302 9816350 : case 0:
2303 : {
2304 9816350 : assert(n != 0 /* nullptr */);
2305 9816350 : token_node *tn = n->get_token_node();
2306 9816350 : if (tn != 0 /* nullptr */) {
2307 896850 : *this = tn->tk;
2308 896850 : delete tn;
2309 : }
2310 : else {
2311 8919500 : nd = n;
2312 8919500 : type = TOKEN_NODE;
2313 : }
2314 : }
2315 9816350 : return;
2316 164617681 : default:
2317 164617681 : type = TOKEN_CHAR;
2318 164617681 : c = cc;
2319 167684342 : return;
2320 2403611 : }
2321 : }
2322 : else {
2323 13494891 : handle_escape_char:
2324 13494891 : cc = input_stack::get(&n);
2325 13494891 : switch (cc) {
2326 3419 : case '(':
2327 3419 : nm = read_two_char_escape_parameter();
2328 3419 : type = TOKEN_SPECIAL_CHAR;
2329 3419 : return;
2330 0 : case EOF:
2331 0 : type = TOKEN_EOF;
2332 0 : error("end of input after escape character");
2333 0 : return;
2334 12 : case '`':
2335 12 : goto ESCAPE_LEFT_QUOTE;
2336 6 : case '\'':
2337 6 : goto ESCAPE_RIGHT_QUOTE;
2338 3572 : case '-':
2339 3572 : goto ESCAPE_HYPHEN;
2340 0 : case '_':
2341 0 : goto ESCAPE_UNDERSCORE;
2342 7760 : case '%':
2343 7760 : goto ESCAPE_PERCENT;
2344 17 : case ' ':
2345 17 : goto ESCAPE_SPACE;
2346 1214 : case '0':
2347 1214 : nd = new hmotion_node(curenv->get_digit_width(),
2348 1214 : curenv->get_fill_color());
2349 1214 : type = TOKEN_HORIZONTAL_MOTION;
2350 1214 : return;
2351 1390 : case '|':
2352 1390 : goto ESCAPE_BAR;
2353 72 : case '^':
2354 72 : goto ESCAPE_CIRCUMFLEX;
2355 33797 : case '/':
2356 33797 : if (want_att_compat)
2357 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2358 0 : " AT&T troff", char(cc));
2359 33797 : type = TOKEN_ITALIC_CORRECTION;
2360 33797 : return;
2361 33795 : case ',':
2362 33795 : if (want_att_compat)
2363 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2364 0 : " AT&T troff", char(cc));
2365 33795 : type = TOKEN_NODE;
2366 33795 : nd = new left_italic_corrected_node;
2367 33795 : return;
2368 19103 : case '&':
2369 19103 : goto ESCAPE_AMPERSAND;
2370 15529 : case ')':
2371 15529 : if (want_att_compat)
2372 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2373 0 : " AT&T troff", char(cc));
2374 15529 : goto ESCAPE_RIGHT_PARENTHESIS;
2375 10702 : case '!':
2376 10702 : goto ESCAPE_BANG;
2377 27242 : case '?':
2378 27242 : if (want_att_compat)
2379 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2380 0 : " AT&T troff", char(cc));
2381 27242 : goto ESCAPE_QUESTION;
2382 3415 : case '~':
2383 3415 : if (want_att_compat)
2384 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2385 0 : " AT&T troff", char(cc));
2386 3415 : goto ESCAPE_TILDE;
2387 118 : case ':':
2388 118 : if (want_att_compat)
2389 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2390 0 : " AT&T troff", char(cc));
2391 118 : goto ESCAPE_COLON;
2392 32038745 : case '"':
2393 32038745 : while ((cc = input_stack::get(0 /* nullptr */)) != '\n'
2394 32038745 : && cc != EOF)
2395 : ;
2396 905581 : if (cc == '\n')
2397 905581 : type = TOKEN_NEWLINE;
2398 : else
2399 0 : type = TOKEN_EOF;
2400 905581 : return;
2401 137458 : case '#': // Like \" but newline is ignored.
2402 137458 : if (want_att_compat)
2403 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2404 0 : " AT&T troff", char(cc));
2405 3446893 : while ((cc = input_stack::get(0 /* nullptr */)) != '\n')
2406 3309435 : if (cc == EOF) {
2407 0 : type = TOKEN_EOF;
2408 0 : return;
2409 : }
2410 137458 : break;
2411 1503503 : case '$':
2412 : {
2413 1503503 : symbol s = read_escape_parameter();
2414 1503503 : if (!(s.is_null() || s.is_empty()))
2415 1503503 : interpolate_positional_parameter(s);
2416 1503503 : break;
2417 : }
2418 1716558 : case '*':
2419 : {
2420 1716558 : symbol s = read_escape_parameter(WITH_ARGS);
2421 1716558 : if (!(s.is_null() || s.is_empty())) {
2422 1716558 : if (have_multiple_params) {
2423 1779 : have_multiple_params = false;
2424 1779 : interpolate_string_with_args(s);
2425 : }
2426 : else
2427 1714779 : interpolate_string(s);
2428 : }
2429 1716558 : break;
2430 : }
2431 273 : case 'a':
2432 273 : nd = new non_interpreted_char_node('\001');
2433 273 : type = TOKEN_NODE;
2434 273 : return;
2435 22350 : case 'A':
2436 22350 : if (want_att_compat)
2437 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2438 0 : " AT&T troff", char(cc));
2439 : {
2440 22350 : const char *res = do_name_test();
2441 22350 : if (0 /* nullptr */ == res)
2442 0 : break;
2443 22350 : c = *res;
2444 22350 : type = TOKEN_CHAR;
2445 : }
2446 22350 : return;
2447 1 : case 'b':
2448 1 : nd = do_bracket();
2449 1 : if (0 /* nullptr */ == nd)
2450 0 : break;
2451 1 : type = TOKEN_NODE;
2452 1 : return;
2453 10333 : case 'B':
2454 10333 : if (want_att_compat)
2455 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2456 0 : " AT&T troff", char(cc));
2457 : {
2458 10333 : const char *res = do_expr_test();
2459 10333 : if (0 /* nullptr */ == res)
2460 0 : break;
2461 10333 : c = *res;
2462 10333 : type = TOKEN_CHAR;
2463 : }
2464 10333 : return;
2465 29938 : case 'c':
2466 29938 : goto ESCAPE_c;
2467 53 : case 'C':
2468 53 : nm = read_delimited_identifier();
2469 53 : if (nm.is_null())
2470 1 : break;
2471 52 : type = TOKEN_DELIMITED_SPECIAL_CHAR;
2472 52 : return;
2473 120 : case 'd':
2474 120 : type = TOKEN_NODE;
2475 120 : nd = new vmotion_node(curenv->get_size() / 2,
2476 120 : curenv->get_fill_color());
2477 120 : return;
2478 230491 : case 'D':
2479 230491 : nd = read_drawing_command();
2480 230491 : if (0 /* nullptr */ == nd)
2481 189 : break;
2482 230302 : type = TOKEN_NODE;
2483 230302 : return;
2484 4201 : case 'e':
2485 4201 : goto ESCAPE_e;
2486 0 : case 'E':
2487 0 : if (want_att_compat)
2488 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2489 0 : " AT&T troff", char(cc));
2490 0 : goto handle_escape_char;
2491 127289 : case 'f':
2492 127289 : if (curenv->get_was_line_interrupted()) {
2493 0 : warning(WARN_SYNTAX, "ignoring escaped '%1' on input line"
2494 : " after output line continuation escape sequence",
2495 0 : char(cc));
2496 0 : break;
2497 : }
2498 127289 : select_font(read_escape_parameter(ALLOW_EMPTY));
2499 127289 : if (!want_att_compat)
2500 127289 : have_formattable_input = true;
2501 127289 : break;
2502 2452 : case 'F':
2503 2452 : if (want_att_compat)
2504 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2505 0 : " AT&T troff", char(cc));
2506 2452 : curenv->set_family(read_escape_parameter(ALLOW_EMPTY));
2507 2452 : have_formattable_input = true;
2508 2452 : break;
2509 260 : case 'g':
2510 : {
2511 260 : symbol s = read_escape_parameter();
2512 260 : if (!(s.is_null() || s.is_empty()))
2513 260 : interpolate_number_format(s);
2514 260 : break;
2515 : }
2516 122013 : case 'h':
2517 122013 : if (!read_delimited_measurement(&x, 'm'))
2518 13 : break;
2519 122000 : type = TOKEN_DELIMITED_HORIZONTAL_MOTION;
2520 122000 : nd = new hmotion_node(x, curenv->get_fill_color());
2521 122000 : return;
2522 615 : case 'H':
2523 : // don't take height increments relative to previous height if
2524 : // in compatibility mode
2525 615 : if (!want_att_compat && curenv->get_char_height()) {
2526 222 : if (read_delimited_measurement(&x, 'z',
2527 : curenv->get_char_height()))
2528 222 : curenv->set_char_height(x);
2529 : }
2530 : else {
2531 393 : if (read_delimited_measurement(&x, 'z',
2532 : curenv->get_requested_point_size()))
2533 393 : curenv->set_char_height(x);
2534 : }
2535 615 : if (!want_att_compat)
2536 615 : have_formattable_input = true;
2537 615 : break;
2538 480 : case 'k':
2539 480 : nm = read_escape_parameter();
2540 480 : if (nm.is_null() || nm.is_empty())
2541 0 : break;
2542 480 : type = TOKEN_MARK_INPUT;
2543 480 : return;
2544 242 : case 'l':
2545 : case 'L':
2546 : {
2547 242 : charinfo *s = 0 /* nullptr */;
2548 242 : if (!read_line_rule_expression(&x, (cc == 'l' ? 'm': 'v'),
2549 : &s))
2550 30 : break;
2551 212 : if (0 /* nullptr */ == s)
2552 44 : s = lookup_charinfo(cc == 'l' ? "ru" : "br");
2553 212 : type = TOKEN_NODE;
2554 212 : node *char_node = curenv->make_char_node(s);
2555 212 : if (cc == 'l')
2556 211 : nd = new hline_node(x, char_node);
2557 : else
2558 1 : nd = new vline_node(x, char_node);
2559 212 : return;
2560 : }
2561 76964 : case 'm':
2562 76964 : if (want_att_compat)
2563 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2564 0 : " AT&T troff", char(cc));
2565 76964 : do_stroke_color(read_escape_parameter(ALLOW_EMPTY));
2566 76964 : if (!want_att_compat)
2567 76964 : have_formattable_input = true;
2568 76964 : break;
2569 64842 : case 'M':
2570 64842 : if (want_att_compat)
2571 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2572 0 : " AT&T troff", char(cc));
2573 64842 : do_fill_color(read_escape_parameter(ALLOW_EMPTY));
2574 64842 : if (!want_att_compat)
2575 64842 : have_formattable_input = true;
2576 64842 : break;
2577 6880966 : case 'n':
2578 : {
2579 : int inc;
2580 6880966 : symbol s = read_increment_and_escape_parameter(&inc);
2581 6880966 : if (!(s.is_null() || s.is_empty()))
2582 6880966 : interpolate_register(s, inc);
2583 6880966 : break;
2584 : }
2585 46574 : case 'N':
2586 : // The argument is a glyph index, which is dimensionless.
2587 46574 : if (!read_delimited_measurement(&val, 0 /* dimensionless */))
2588 0 : break;
2589 46574 : type = TOKEN_INDEXED_CHAR;
2590 46574 : return;
2591 110 : case 'o':
2592 110 : nd = do_overstrike();
2593 110 : if (0 /* nullptr */ == nd)
2594 0 : break;
2595 110 : type = TOKEN_NODE;
2596 110 : return;
2597 757 : case 'O':
2598 757 : if (want_att_compat)
2599 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2600 0 : " AT&T troff", char(cc));
2601 757 : nd = do_suppress(read_escape_parameter());
2602 757 : if (0 /* nullptr */ == nd)
2603 270 : break;
2604 487 : type = TOKEN_NODE;
2605 487 : return;
2606 16 : case 'p':
2607 16 : type = TOKEN_SPREAD;
2608 16 : return;
2609 42 : case 'r':
2610 42 : type = TOKEN_NODE;
2611 42 : nd = new vmotion_node(-curenv->get_size(), curenv->get_fill_color());
2612 42 : return;
2613 6142 : case 'R':
2614 6142 : if (want_att_compat)
2615 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2616 0 : " AT&T troff", char(cc));
2617 6142 : do_register();
2618 6142 : if (!want_att_compat)
2619 6142 : have_formattable_input = true;
2620 6142 : break;
2621 5972 : case 's':
2622 5972 : if (curenv->get_was_line_interrupted()) {
2623 0 : warning(WARN_SYNTAX, "ignoring escaped '%1' on input line"
2624 : " after output line continuation escape sequence",
2625 0 : char(cc));
2626 0 : break;
2627 : }
2628 5972 : if (read_size(&x))
2629 5972 : curenv->set_size(x);
2630 5972 : if (!want_att_compat)
2631 5970 : have_formattable_input = true;
2632 5972 : break;
2633 30 : case 'S':
2634 : // The argument is in degrees, which are dimensionless.
2635 30 : if (read_delimited_measurement(&x, 0 /* dimensionless */))
2636 30 : curenv->set_char_slant(x);
2637 30 : if (!want_att_compat)
2638 30 : have_formattable_input = true;
2639 30 : break;
2640 53 : case 't':
2641 53 : type = TOKEN_NODE;
2642 53 : nd = new non_interpreted_char_node('\t');
2643 53 : return;
2644 120 : case 'u':
2645 120 : type = TOKEN_NODE;
2646 120 : nd = new vmotion_node(-curenv->get_size() / 2,
2647 120 : curenv->get_fill_color());
2648 120 : return;
2649 103815 : case 'v':
2650 103815 : if (!read_delimited_measurement(&x, 'v'))
2651 0 : break;
2652 103815 : type = TOKEN_NODE;
2653 103815 : nd = new vmotion_node(x, curenv->get_fill_color());
2654 103815 : return;
2655 1 : case 'V':
2656 1 : if (want_att_compat)
2657 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2658 0 : " AT&T troff", char(cc));
2659 : {
2660 1 : symbol s = read_escape_parameter();
2661 1 : if (!(s.is_null() || s.is_empty()))
2662 1 : interpolate_environment_variable(s);
2663 1 : break;
2664 : }
2665 26009 : case 'w':
2666 26009 : do_width();
2667 26009 : break;
2668 192 : case 'x':
2669 192 : if (!read_delimited_measurement(&x, 'v'))
2670 0 : break;
2671 192 : type = TOKEN_NODE;
2672 192 : nd = new extra_size_node(x);
2673 192 : return;
2674 70842 : case 'X':
2675 70842 : nd = do_device_extension();
2676 70842 : if (0 /* nullptr */ == nd)
2677 0 : break;
2678 70842 : type = TOKEN_NODE;
2679 70842 : return;
2680 29 : case 'Y':
2681 29 : if (want_att_compat)
2682 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2683 0 : " AT&T troff", char(cc));
2684 : {
2685 29 : symbol s = read_escape_parameter();
2686 29 : if (s.is_null() || s.is_empty())
2687 0 : break;
2688 29 : request_or_macro *p = lookup_request(s);
2689 29 : macro *m = p->to_macro();
2690 29 : if (0 /* nullptr */ == m) {
2691 0 : error("cannot interpolate '%1' to device-independent"
2692 : " output; it is a request, not a macro",
2693 0 : s.contents());
2694 0 : break;
2695 : }
2696 29 : nd = new device_extension_node(*m);
2697 29 : type = TOKEN_NODE;
2698 29 : return;
2699 : }
2700 81 : case 'z':
2701 81 : next();
2702 81 : if ((TOKEN_NODE == type)
2703 81 : || (TOKEN_HORIZONTAL_MOTION == type)
2704 81 : || (TOKEN_DELIMITED_HORIZONTAL_MOTION == type))
2705 0 : nd = new zero_width_node(nd);
2706 : else {
2707 : // TODO: In theory, we could accept spaces and horizontal
2708 : // motions.
2709 81 : charinfo *ci = get_charinfo(true /* required */);
2710 81 : if (0 /* nullptr */ == ci) {
2711 0 : error("%1 is not supported in a zero-width character"
2712 0 : " escape sequence argument", tok.description());
2713 0 : break;
2714 : }
2715 81 : node *gn = curenv->make_char_node(ci);
2716 81 : if (0 /* nullptr */ == gn) {
2717 : assert("make_char_node failed to create a character"
2718 : " node");
2719 3 : break;
2720 : }
2721 78 : nd = new zero_width_node(gn);
2722 78 : type = TOKEN_NODE;
2723 : }
2724 78 : return;
2725 851 : case 'Z':
2726 851 : if (want_att_compat)
2727 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2728 0 : " AT&T troff", char(cc));
2729 851 : nd = do_zero_width_output();
2730 851 : if (0 /* nullptr */ == nd)
2731 0 : break;
2732 851 : type = TOKEN_NODE;
2733 851 : return;
2734 115643 : case '{':
2735 115643 : goto ESCAPE_LEFT_BRACE;
2736 100822 : case '}':
2737 100822 : goto ESCAPE_RIGHT_BRACE;
2738 218331 : case '\n':
2739 218331 : break;
2740 800066 : case '[':
2741 800066 : if (want_att_compat)
2742 0 : warning(WARN_SYNTAX, "an escaped '%1' is not portable to"
2743 0 : " AT&T troff", char(cc));
2744 800066 : if (!want_att_compat) {
2745 800066 : symbol s = read_long_escape_parameters(WITH_ARGS);
2746 800066 : if (s.is_null() || s.is_empty())
2747 0 : break;
2748 800066 : if (have_multiple_params) {
2749 67634 : have_multiple_params = false;
2750 67634 : nm = composite_glyph_name(s);
2751 : }
2752 : else {
2753 732432 : const char *sc = s.contents();
2754 732432 : const char *gn = 0 /* nullptr */;
2755 732432 : if ((strlen(sc) > 2) && (sc[0] == 'u'))
2756 99969 : gn = valid_unicode_code_sequence(sc, 0 /* nullptr */);
2757 732432 : if (gn != 0 /* nullptr */) {
2758 99916 : const char *gn_decomposed = decompose_unicode(gn);
2759 99916 : if (gn_decomposed != 0 /* nullptr */)
2760 4397 : gn = &gn_decomposed[1];
2761 99916 : const char *groff_gn = unicode_to_glyph_name(gn);
2762 99916 : if (groff_gn != 0 /* nullptr */)
2763 14229 : nm = symbol(groff_gn);
2764 : else {
2765 : // ISO C++ does not permit VLAs on the stack.
2766 : // C++03: new char[strlen(gn) + 1 + 1]();
2767 85687 : char *buf = new char[strlen(gn) + 1 + 1];
2768 85687 : (void) memset(buf, 0,
2769 85687 : (strlen(gn) + 1 + 1) * sizeof(char));
2770 85687 : strcpy(buf, "u");
2771 85687 : strcat(buf, gn);
2772 85687 : nm = symbol(buf);
2773 85687 : delete[] buf;
2774 : }
2775 : }
2776 : else
2777 632516 : nm = symbol(sc);
2778 : }
2779 800066 : type = TOKEN_SPECIAL_CHAR;
2780 800066 : return;
2781 : }
2782 0 : goto handle_ordinary_char;
2783 247 : default:
2784 247 : if ((cc != escape_char) && (cc != '.'))
2785 0 : warning(WARN_ESCAPE, "ignoring escape character before %1",
2786 0 : input_char_description(cc));
2787 247 : goto handle_ordinary_char;
2788 : }
2789 : }
2790 16238170 : }
2791 : }
2792 :
2793 16042199 : bool token::operator==(const token &t)
2794 : {
2795 16042199 : if (type != t.type)
2796 1280069 : return false;
2797 14762130 : switch (type) {
2798 14742768 : case TOKEN_CHAR:
2799 14742768 : return c == t.c;
2800 19350 : case TOKEN_SPECIAL_CHAR:
2801 : case TOKEN_DELIMITED_SPECIAL_CHAR:
2802 19350 : return nm == t.nm;
2803 0 : case TOKEN_INDEXED_CHAR:
2804 0 : return val == t.val;
2805 12 : default:
2806 12 : return true;
2807 : }
2808 : }
2809 :
2810 915310 : bool token::operator!=(const token &t)
2811 : {
2812 915310 : return !(*this == t);
2813 : }
2814 :
2815 : // Is the character usable as a delimiter?
2816 : //
2817 : // This is used directly only by `do_device_extension()`, because it is
2818 : // the only escape sequence that reads its argument in copy mode (so it
2819 : // doesn't tokenize it) and accepts a user-specified delimiter.
2820 4487629 : static bool is_char_usable_as_delimiter(int c)
2821 : {
2822 : // Reject all characters that can validly begin a numeric expression.
2823 4487629 : switch (c) {
2824 2464251 : case '0':
2825 : case '1':
2826 : case '2':
2827 : case '3':
2828 : case '4':
2829 : case '5':
2830 : case '6':
2831 : case '7':
2832 : case '8':
2833 : case '9':
2834 : case '+':
2835 : case '-':
2836 : // case '/':
2837 : // case '*':
2838 : // case '%':
2839 : // case '<':
2840 : // case '>':
2841 : // case '=':
2842 : // case '&':
2843 : // case ':':
2844 : case '(':
2845 : // case ')':
2846 : case '.':
2847 : case '|':
2848 2464251 : return false;
2849 2023378 : default:
2850 2023378 : return true;
2851 : }
2852 : }
2853 :
2854 0 : void token::describe_node(char *buf, size_t bufsz)
2855 : {
2856 0 : assert(nd != 0 /* nullptr */);
2857 0 : if (0 /* nullptr */ == nd) {
2858 0 : (void) snprintf(buf, bufsz, "a null(!) node");
2859 0 : return;
2860 : }
2861 : // Ah, the joys of computational natural language grammar.
2862 0 : const char *ndtype = nd->type();
2863 0 : const char initial_letter = ndtype[0];
2864 0 : bool is_vowelly = false;
2865 : // I wonder if Kernighan thought that the presence of set types and an
2866 : // "in" operator was one of Pascal's great blunders. --GBR
2867 0 : if (('a' == initial_letter)
2868 0 : || ('e' == initial_letter)
2869 0 : || ('i' == initial_letter)
2870 0 : || ('o' == initial_letter)
2871 0 : || ('u' == initial_letter))
2872 0 : is_vowelly = true;
2873 0 : (void) memset(buf, 0, bufsz);
2874 0 : (void) snprintf(buf, bufsz, "a%s %s", is_vowelly ? "n" : "", ndtype);
2875 : }
2876 :
2877 : // Is the token a valid delimiter (like `'`)?
2878 4503104 : bool token::is_usable_as_delimiter(bool report_error,
2879 : enum delimiter_context context)
2880 : {
2881 4503104 : bool is_valid = false;
2882 4503104 : switch (type) {
2883 4487921 : case TOKEN_CHAR:
2884 4487921 : if (!want_att_compat)
2885 4487629 : is_valid = is_char_usable_as_delimiter(c);
2886 : else {
2887 292 : assert(context != DELIMITER_GROFF_EXPRESSION);
2888 292 : switch (context) {
2889 105 : case DELIMITER_ATT_STRING_EXPRESSION:
2890 105 : if (csgraph(c)
2891 105 : || (((c > 0) && (c < 012)) || (014 == c) || (0177 == c)))
2892 105 : is_valid = true;
2893 105 : break;
2894 92 : case DELIMITER_ATT_NUMERIC_EXPRESSION:
2895 92 : if (csgraph(c)
2896 92 : || (((c > 0) && (c < 012)) || (014 == c) || (0177 == c)))
2897 92 : is_valid = true;
2898 : // AT&T troff doesn't accept as numeric expression delimiters
2899 : // characters that validly appear in a numeric expression,
2900 : // _except_ for numerals, `|`, and `.`.
2901 92 : if (('+' == c)
2902 91 : || ('-' == c)
2903 90 : || ('/' == c)
2904 89 : || ('*' == c)
2905 88 : || ('%' == c)
2906 87 : || ('<' == c)
2907 86 : || ('>' == c)
2908 85 : || ('=' == c)
2909 84 : || ('&' == c)
2910 83 : || (':' == c)
2911 82 : || ('(' == c)
2912 81 : || (')' == c))
2913 12 : is_valid = false;
2914 92 : break;
2915 95 : case DELIMITER_ATT_OUTPUT_COMPARISON_EXPRESSION:
2916 95 : if (csupper(c)
2917 71 : || (cslower(c)
2918 17 : && (c != 'e')
2919 17 : && (c != 'n')
2920 17 : && (c != 'o')
2921 17 : && (c != 't'))
2922 54 : || cspunct(c)
2923 166 : || (((c > 0) && (c < 012)) || (014 == c) || (0177 == c)))
2924 80 : is_valid = true;
2925 : // AT&T troff doesn't accept as conditional expression
2926 : // delimiters characters that can validly appear in a numeric
2927 : // expression, nor `!`. We already excluded numerals above.
2928 95 : if (('+' == c)
2929 94 : || ('-' == c)
2930 93 : || ('/' == c)
2931 90 : || ('*' == c)
2932 89 : || ('%' == c)
2933 88 : || ('<' == c)
2934 87 : || ('>' == c)
2935 86 : || ('=' == c)
2936 85 : || ('&' == c)
2937 84 : || (':' == c)
2938 83 : || ('(' == c)
2939 82 : || (')' == c)
2940 81 : || ('|' == c)
2941 80 : || ('.' == c)
2942 79 : || ('!' == c))
2943 16 : is_valid = false;
2944 95 : break;
2945 0 : default:
2946 0 : assert(0 == "unhandled case of `context` (enum dcontext)");
2947 : break;
2948 : }
2949 : }
2950 4487921 : if (!is_valid && report_error)
2951 30 : error("character '%1' is not allowed as a delimiter",
2952 60 : static_cast<char>(c));
2953 4487921 : return is_valid;
2954 0 : case TOKEN_NODE:
2955 0 : if (report_error) {
2956 : // Reserve a buffer large enough to handle the lengthiest case.
2957 : // See `token::description()`.
2958 0 : const size_t bufsz
2959 : = sizeof "space character horizontal motion node token"
2960 : + sizeof "bracketrighttp"
2961 : + 2 /* for trailing '"' and '\0' */;
2962 : // C++03: char[bufsz]();
2963 : static char buf[bufsz];
2964 0 : (void) memset(buf, 0, bufsz);
2965 0 : describe_node(buf, bufsz);
2966 0 : error("%1 is not allowed as a delimiter", buf);
2967 : }
2968 0 : return false;
2969 2 : case TOKEN_SPACE:
2970 : case TOKEN_STRETCHABLE_SPACE:
2971 : case TOKEN_UNSTRETCHABLE_SPACE:
2972 : case TOKEN_DELIMITED_HORIZONTAL_MOTION:
2973 : case TOKEN_DELIMITED_SPECIAL_CHAR:
2974 : case TOKEN_NEWLINE:
2975 : case TOKEN_EOF:
2976 2 : if (report_error)
2977 0 : error("%1 is not allowed as a delimiter", description());
2978 2 : return false;
2979 15181 : default:
2980 15181 : return true;
2981 : }
2982 : }
2983 :
2984 1102494 : const char *token::description()
2985 : {
2986 : // Reserve a buffer large enough to handle the lengthiest cases. The
2987 : // user can still contrive, by accident or otherwise, an arbitrarily
2988 : // long identifier.
2989 : // "character code XXX"
2990 : // "special character 'bracketrighttp'"
2991 : // "indexed character -2147483648"
2992 : // "space character horizontal motion node token"
2993 : // "nonexistent special character or class"
2994 : // Future:
2995 : // "character code XXX (U+XXXX)" or similar
2996 1102494 : const size_t bufsz
2997 : = sizeof "space character horizontal motion node token"
2998 : + sizeof "bracketrighttp"
2999 : + 2 /* for trailing '"' and '\0' */;
3000 : static char buf[bufsz];
3001 1102494 : (void) memset(buf, 0, bufsz);
3002 1102494 : switch (type) {
3003 0 : case TOKEN_EMPTY:
3004 0 : return "an indeterminate token (at start of input?)";
3005 0 : case TOKEN_BACKSPACE:
3006 0 : return "a backspace character";
3007 6412 : case TOKEN_CHAR:
3008 6412 : if (INPUT_DELETE == c)
3009 0 : return "a delete character";
3010 6412 : else if ('\'' == c) {
3011 6332 : (void) snprintf(buf, bufsz, "character \"%c\"", c);
3012 6332 : return buf;
3013 : }
3014 80 : else if (c < 128) {
3015 80 : (void) snprintf(buf, bufsz, "character '%c'", c);
3016 80 : return buf;
3017 : }
3018 : else {
3019 0 : (void) snprintf(buf, bufsz, "character code %d", c);
3020 0 : return buf;
3021 : }
3022 6 : case TOKEN_DUMMY:
3023 6 : return "an escaped '&'";
3024 0 : case TOKEN_ESCAPE:
3025 0 : return "an escaped 'e'";
3026 0 : case TOKEN_HYPHEN_INDICATOR:
3027 0 : return "an escaped '%'";
3028 0 : case TOKEN_INTERRUPT:
3029 0 : return "an escaped 'c'";
3030 0 : case TOKEN_ITALIC_CORRECTION:
3031 0 : return "an escaped '/'";
3032 0 : case TOKEN_LEADER:
3033 0 : return "a leader character";
3034 0 : case TOKEN_LEFT_BRACE:
3035 0 : return "an escaped '{'";
3036 0 : case TOKEN_MARK_INPUT:
3037 0 : return "an escaped 'k'";
3038 14 : case TOKEN_NEWLINE:
3039 14 : return "a newline";
3040 0 : case TOKEN_NODE:
3041 : {
3042 : static char nodebuf[bufsz - (sizeof " token")];
3043 0 : (void) strcpy(nodebuf, "an undescribed node");
3044 0 : describe_node(nodebuf, bufsz);
3045 0 : (void) snprintf(buf, bufsz, "%s token", nodebuf);
3046 0 : return buf;
3047 : }
3048 1 : case TOKEN_INDEXED_CHAR:
3049 1 : (void) snprintf(buf, bufsz, "indexed character %d",
3050 : character_index());
3051 1 : return buf;
3052 1059588 : case TOKEN_RIGHT_BRACE:
3053 1059588 : return "an escaped '}'";
3054 0 : case TOKEN_SPACE:
3055 0 : return "a space";
3056 4 : case TOKEN_SPECIAL_CHAR:
3057 : case TOKEN_DELIMITED_SPECIAL_CHAR:
3058 : // We normally use apostrophes for quotation in diagnostic messages,
3059 : // but many special character names contain them. Fall back to
3060 : // double quotes if this one does. A user-defined special character
3061 : // name could contain both characters; we expect such users to lie
3062 : // comfortably in the bed they made for themselves.
3063 : {
3064 4 : const char *sc = nm.contents();
3065 4 : char qc = '\'';
3066 4 : if (strchr(sc, '\'') != 0 /* nullptr */)
3067 0 : qc = '"';
3068 : // TODO: This truncates the names of impractically long special
3069 : // character or character class names. Do something about that.
3070 : // (The truncation is visually indicated by the absence of a
3071 : // closing quotation mark.)
3072 : static const char special_character[] = "special character";
3073 : static const char character_class[] = "character class";
3074 : static const char nonexistent[] = "nonexistent special character"
3075 : " or class";
3076 4 : const char *ctype = special_character;
3077 4 : charinfo *ci = get_charinfo(false /* required */,
3078 : true /* suppress creation */);
3079 4 : if (0 /* nullptr */ == ci)
3080 1 : ctype = nonexistent;
3081 3 : else if (ci->is_class())
3082 1 : ctype = character_class;
3083 4 : (void) snprintf(buf, bufsz, "%s %c%s%c", ctype, qc, sc, qc);
3084 4 : return buf;
3085 : }
3086 0 : case TOKEN_SPREAD:
3087 0 : return "an escaped 'p'";
3088 0 : case TOKEN_STRETCHABLE_SPACE:
3089 0 : return "an escaped '~'";
3090 0 : case TOKEN_UNSTRETCHABLE_SPACE:
3091 0 : return "an escaped ' '";
3092 2 : case TOKEN_DELIMITED_HORIZONTAL_MOTION:
3093 2 : return "a parameterized horizontal motion";
3094 0 : case TOKEN_HORIZONTAL_MOTION:
3095 0 : return "a horizontal motion";
3096 36467 : case TOKEN_TAB:
3097 36467 : return "a tab character";
3098 0 : case TOKEN_TRANSPARENT:
3099 0 : return "an escaped '!'";
3100 0 : case TOKEN_TRANSPARENT_DUMMY:
3101 0 : return "an escaped ')'";
3102 0 : case TOKEN_ZERO_WIDTH_BREAK:
3103 0 : return "an escaped ':'";
3104 0 : case TOKEN_EOF:
3105 0 : return "end of input";
3106 0 : default:
3107 0 : assert(0 == "unhandled case of `type` (token)");
3108 : return "an undescribed token";
3109 : }
3110 : }
3111 :
3112 12337378 : void skip_line()
3113 : {
3114 12337378 : while (!tok.is_newline())
3115 1286663 : if (tok.is_eof())
3116 17 : return;
3117 : else
3118 1286646 : tok.next();
3119 11050715 : tok.next();
3120 : }
3121 :
3122 23802 : void compatible()
3123 : {
3124 : int n;
3125 23802 : if (has_arg() && read_integer(&n))
3126 23802 : want_att_compat = (n > 0);
3127 : else
3128 0 : want_att_compat = true;
3129 23802 : skip_line();
3130 23802 : }
3131 :
3132 6554251 : static void diagnose_missing_identifier(bool required)
3133 : {
3134 6554251 : if (tok.is_newline() || tok.is_eof()) {
3135 5458196 : if (required)
3136 0 : warning(WARN_MISSING, "missing identifier");
3137 : }
3138 1096055 : else if (tok.is_right_brace() || tok.is_tab()) {
3139 : // token::description() writes to static, class-wide storage, so we
3140 : // must allocate a copy of it before issuing the next diagnostic.
3141 1096055 : char *start = strdup(tok.description());
3142 311213 : do {
3143 1407268 : tok.next();
3144 1407268 : } while (tok.is_space() || tok.is_right_brace() || tok.is_tab());
3145 : // XXX: unreachable code? --GBR
3146 1096055 : if (!tok.is_newline() && !tok.is_eof())
3147 0 : error("%1 is not allowed before an argument", start);
3148 1096055 : else if (required)
3149 0 : warning(WARN_MISSING, "missing identifier");
3150 1096055 : free(start);
3151 : }
3152 0 : else if (required)
3153 0 : error("expected identifier, got %1", tok.description());
3154 : else
3155 0 : error("expected identifier, got %1; treated as missing",
3156 0 : tok.description());
3157 6554251 : }
3158 :
3159 19275968 : static void diagnose_invalid_identifier()
3160 : {
3161 36247206 : if (!tok.is_newline() && !tok.is_eof() && !tok.is_space()
3162 72660 : && !tok.is_tab() && !tok.is_right_brace()
3163 : // We don't want to give a warning for .el\{
3164 36247206 : && !tok.is_left_brace())
3165 0 : error("%1 is not allowed in an identifier", tok.description());
3166 19275968 : }
3167 :
3168 25349820 : symbol read_identifier(bool required)
3169 : {
3170 25349820 : if (want_att_compat) {
3171 : char buf[3];
3172 79609 : tok.skip_spaces();
3173 79609 : if ((buf[0] = tok.ch()) != 0U) {
3174 32567 : tok.next();
3175 32567 : if ((buf[1] = tok.ch()) != 0U) {
3176 32567 : buf[2] = '\0';
3177 32567 : tok.make_space();
3178 : }
3179 : else
3180 0 : diagnose_invalid_identifier();
3181 32567 : return symbol(buf);
3182 : }
3183 : else {
3184 47042 : diagnose_missing_identifier(required);
3185 47042 : return NULL_SYMBOL;
3186 : }
3187 : }
3188 : else
3189 25270211 : return read_long_identifier(required);
3190 : }
3191 :
3192 25635574 : symbol read_long_identifier(bool required)
3193 : {
3194 : return read_input_until_terminator(required, 0U,
3195 25635574 : true /* want identifier */);
3196 : }
3197 :
3198 : // Read bytes from input until reaching a null byte or the specified
3199 : // `end_char`; construct and return a `symbol` object therefrom.
3200 25783366 : static symbol read_input_until_terminator(bool required,
3201 : unsigned char end_char,
3202 : bool want_identifier)
3203 : {
3204 25783366 : tok.skip_spaces();
3205 25783366 : int buf_size = default_buffer_size;
3206 : // TODO: grochar
3207 25783366 : unsigned char *buf = 0 /* nullptr */;
3208 : try {
3209 : // C++03: new char[buf_size]();
3210 25783366 : buf = new unsigned char[buf_size];
3211 : }
3212 0 : catch (const std::bad_alloc &e) {
3213 0 : fatal("cannot allocate %1 bytes to read input line", buf_size);
3214 : }
3215 25783366 : (void) memset(buf, 0, (buf_size * sizeof(unsigned char)));
3216 25783366 : int i = 0;
3217 25783366 : const unsigned char terminator = end_char; // TODO: grochar
3218 : for (;;) {
3219 : // If `terminator` != 0U we normally have to append a null byte.
3220 114274928 : if ((i + 2) > buf_size) {
3221 937182 : unsigned char *old_buf = buf; // TODO: grochar
3222 937182 : int new_buf_size = buf_size * 2;
3223 : // C++03: new char[new_buf_size]();
3224 : try {
3225 937182 : buf = new unsigned char[new_buf_size];
3226 : }
3227 0 : catch (const std::bad_alloc &e) {
3228 0 : fatal("cannot allocate %1 bytes to read input line", buf_size);
3229 : }
3230 937182 : (void) memset(buf, 0, (new_buf_size * sizeof(unsigned char)));
3231 937182 : (void) memcpy(buf, old_buf, (buf_size * sizeof(unsigned char)));
3232 937182 : buf_size = new_buf_size;
3233 937182 : delete[] old_buf;
3234 : }
3235 114274928 : buf[i] = tok.ch();
3236 114274928 : if ((0U == buf[i]) || (terminator == buf[i]))
3237 : break;
3238 88491562 : else if (want_identifier && ((buf[i] < ' ') || (buf[i] > 159))) {
3239 : // Of C0 controls, Solaris, Heirloom, and Plan 9 troff support
3240 : // ^[BCEFG] (only) in identifiers. DWB 3.3 supports none.
3241 0 : assert(buf[i] != ' '); // ensure caller handled spaces
3242 0 : error("character code %1 is not allowed in an identifier",
3243 0 : static_cast<int>(buf[i]));
3244 0 : delete[] buf;
3245 0 : return NULL_SYMBOL;
3246 : }
3247 88491562 : i++;
3248 88491562 : tok.next();
3249 88491562 : }
3250 25783366 : if (0 == i) {
3251 6507209 : diagnose_missing_identifier(required);
3252 6507209 : delete[] buf;
3253 6507209 : return NULL_SYMBOL;
3254 : }
3255 19276157 : if ((terminator != 0U) && (terminator == buf[i]))
3256 189 : buf[i + 1] = '\0';
3257 : else
3258 19275968 : diagnose_invalid_identifier();
3259 19276157 : char *chbuf = 0 /* nullptr */;
3260 : try {
3261 : // C++03: new char[buf_size]();
3262 19276157 : chbuf = new char[buf_size];
3263 : }
3264 0 : catch (const std::bad_alloc &e) {
3265 0 : fatal("cannot allocate %1 bytes to copy identifier", buf_size);
3266 : }
3267 343286733 : for (int j = 0; j < buf_size; j++)
3268 324010576 : chbuf[j] = static_cast<char>(buf[j]);
3269 19276157 : delete[] buf;
3270 19276157 : symbol s(chbuf);
3271 19276157 : delete[] chbuf;
3272 19276157 : return s;
3273 : }
3274 :
3275 : static void close_all_streams();
3276 :
3277 1403 : void exit_troff()
3278 : {
3279 1403 : is_exit_underway = true;
3280 1403 : close_all_streams();
3281 1403 : topdiv->set_last_page();
3282 1403 : if (!end_of_input_macro_name.is_null()) {
3283 369 : spring_trap(end_of_input_macro_name);
3284 369 : tok.next();
3285 369 : process_input_stack();
3286 : }
3287 1309 : curenv->final_break();
3288 1184 : tok.next();
3289 1184 : process_input_stack();
3290 1184 : end_diversions();
3291 1184 : if (topdiv->get_page_length() > 0) {
3292 1184 : is_eoi_macro_finished = true;
3293 1184 : topdiv->set_ejecting();
3294 : static unsigned char buf[2] = { LAST_PAGE_EJECTOR, '\0' };
3295 : // XXX: Ugliness alert. GNU troff wants to eat its cake and have it
3296 : // too, using the explicit `unsigned char` numeric type to represent
3297 : // input characters while also using C/C++'s `char` type--of
3298 : // undefined signedness--and its literals, including character
3299 : // string literals like `"\n"`, in free admixture therewith.
3300 : //
3301 : // Fixing this the right way means widening the fundamental
3302 : // character type of GNU troff formatting operations, possibly to
3303 : // `char32_t` (C++11). That's a heavy lift; see Savannah #40720.
3304 1184 : input_stack::push(make_temp_iterator(reinterpret_cast<char *>(buf)));
3305 1184 : topdiv->space(topdiv->get_page_length(), true /* forcing */);
3306 121 : tok.next();
3307 121 : process_input_stack();
3308 : // TODO: Resolve the follwing case and enable the assertion.
3309 : // $ printf '.DS\n.DE\n' | ./build/test-groff -ms
3310 : // troff: ../src/roff/troff/input.cpp:2937: void exit_troff():
3311 : // Assertion `seen_last_page_ejector' failed.
3312 : // .../build/groff: error: troff: Aborted (core dumped)
3313 : //assert(seen_last_page_ejector);
3314 5 : seen_last_page_ejector = true; // should be set already
3315 5 : topdiv->set_ejecting();
3316 5 : push_page_ejector();
3317 5 : topdiv->space(topdiv->get_page_length(), true /* forcing */);
3318 5 : tok.next();
3319 5 : process_input_stack();
3320 : }
3321 : // TODO: delete pointers in file name set.
3322 0 : write_any_trailer_and_exit(EXIT_SUCCESS);
3323 0 : }
3324 :
3325 : // This implements .ex. The input stack must be cleared before calling
3326 : // exit_troff().
3327 :
3328 32 : void exit_request()
3329 : {
3330 32 : input_stack::clear();
3331 32 : if (is_exit_underway)
3332 32 : tok.next();
3333 : else
3334 0 : exit_troff();
3335 32 : }
3336 :
3337 112992 : void return_macro_request()
3338 : {
3339 112992 : if (has_arg() && (tok.ch() != 0U))
3340 0 : input_stack::pop_macro();
3341 112992 : input_stack::pop_macro();
3342 112992 : tok.next();
3343 112992 : }
3344 :
3345 462 : void eoi_macro()
3346 : {
3347 462 : end_of_input_macro_name = read_identifier();
3348 462 : skip_line();
3349 462 : }
3350 :
3351 3302 : void blank_line_macro()
3352 : {
3353 3302 : blank_line_macro_name = read_identifier();
3354 3302 : skip_line();
3355 3302 : }
3356 :
3357 3203 : void leading_spaces_macro()
3358 : {
3359 3203 : leading_spaces_macro_name = read_identifier();
3360 3203 : skip_line();
3361 3203 : }
3362 :
3363 1196 : static void trapping_blank_line()
3364 : {
3365 1196 : if (!blank_line_macro_name.is_null())
3366 1116 : spring_trap(blank_line_macro_name);
3367 : else
3368 80 : blank_line();
3369 1196 : }
3370 :
3371 : std::stack<bool> want_att_compat_stack;
3372 :
3373 160030 : void do_request()
3374 : {
3375 160030 : if (!has_arg()) {
3376 0 : warning(WARN_MISSING, "groff syntax interpretation request expects"
3377 : " a request or macro as argument");
3378 0 : skip_line();
3379 0 : return;
3380 : }
3381 160030 : want_att_compat_stack.push(want_att_compat);
3382 160030 : want_att_compat = false;
3383 160030 : symbol nm = read_identifier();
3384 160030 : if (nm.is_null())
3385 0 : skip_line();
3386 : else
3387 160030 : interpolate_macro(nm, true /* don't want next token */);
3388 160030 : assert(!want_att_compat_stack.empty());
3389 160030 : want_att_compat = want_att_compat_stack.top();
3390 160030 : want_att_compat_stack.pop();
3391 160030 : request_or_macro *p = lookup_request(nm);
3392 160030 : macro *m = p->to_macro();
3393 160030 : if (m != 0 /* nullptr */)
3394 2578 : tok.next();
3395 : }
3396 :
3397 21029452 : inline bool possibly_handle_first_page_transition()
3398 : {
3399 63056 : if ((topdiv->before_first_page_status > 0) && (curdiv == topdiv)
3400 21092508 : && !curenv->is_dummy()) {
3401 615 : handle_first_page_transition();
3402 615 : return true;
3403 : }
3404 : else
3405 21028837 : return false;
3406 : }
3407 :
3408 589585 : static int transparent_translate(int cc)
3409 : {
3410 589585 : if (!is_invalid_input_char(cc)) {
3411 588478 : charinfo *ci = charset_table[cc];
3412 588478 : switch (ci->get_special_translation(true /* transparently */)) {
3413 0 : case charinfo::TRANSLATE_SPACE:
3414 0 : return ' ';
3415 0 : case charinfo::TRANSLATE_STRETCHABLE_SPACE:
3416 0 : return ESCAPE_TILDE;
3417 0 : case charinfo::TRANSLATE_DUMMY:
3418 0 : return ESCAPE_AMPERSAND;
3419 0 : case charinfo::TRANSLATE_HYPHEN_INDICATOR:
3420 0 : return ESCAPE_PERCENT;
3421 : }
3422 : // This is really ugly.
3423 588478 : ci = ci->get_translation(1);
3424 588478 : if (ci != 0 /* nullptr */) {
3425 8 : unsigned char c = ci->get_ascii_code();
3426 8 : if (c != 0U)
3427 0 : return c;
3428 8 : error("cannot translate %1 to special character '%2' in"
3429 : " device-independent output", input_char_description(cc),
3430 16 : ci->nm.contents());
3431 : }
3432 : }
3433 589585 : return cc;
3434 : }
3435 :
3436 6947672 : bool node::need_reread(bool *)
3437 : {
3438 6947672 : return false;
3439 : }
3440 :
3441 : int global_diverted_space = 0;
3442 :
3443 526270 : bool diverted_space_node::need_reread(bool *bolp)
3444 : {
3445 526270 : global_diverted_space = 1;
3446 526270 : if (curenv->get_fill())
3447 0 : trapping_blank_line();
3448 : else
3449 526270 : curdiv->space(n);
3450 526270 : global_diverted_space = 0;
3451 526270 : *bolp = true;
3452 526270 : return true;
3453 : }
3454 :
3455 1 : bool diverted_copy_file_node::need_reread(bool *bolp)
3456 : {
3457 1 : curdiv->copy_file(filename.contents());
3458 1 : *bolp = true;
3459 1 : return true;
3460 : }
3461 :
3462 42964 : bool word_space_node::need_reread(bool *)
3463 : {
3464 42964 : if (unformat) {
3465 7172 : for (width_list *w = orig_width; w != 0 /* nullptr */; w = w->next)
3466 3586 : curenv->space(w->width, w->sentence_width);
3467 3586 : unformat = 0;
3468 3586 : return true;
3469 : }
3470 39378 : return false;
3471 : }
3472 :
3473 3575 : bool unbreakable_space_node::need_reread(bool *)
3474 : {
3475 3575 : return false;
3476 : }
3477 :
3478 1958526 : bool hmotion_node::need_reread(bool *)
3479 : {
3480 1958526 : if (unformat && was_tab) {
3481 0 : curenv->advance_to_tab_stop();
3482 0 : unformat = 0;
3483 0 : return true;
3484 : }
3485 1958526 : return false;
3486 : }
3487 :
3488 : static int leading_spaces_number = 0;
3489 : static int leading_spaces_space = 0;
3490 :
3491 713514 : void process_input_stack()
3492 : {
3493 1426798 : std::stack<int> trap_bol_stack;
3494 713514 : bool reading_beginning_of_input_line = true;
3495 : for (;;) {
3496 31904451 : bool ignore_next_token = false;
3497 31904451 : switch (tok.type) {
3498 20020992 : case token::TOKEN_CHAR:
3499 : {
3500 20020992 : unsigned char ch = tok.c;
3501 19408260 : if (reading_beginning_of_input_line && !have_formattable_input
3502 39561457 : && (curenv->get_control_character() == ch
3503 132205 : || curenv->get_no_break_control_character() == ch)) {
3504 : was_invoked_with_regular_control_character
3505 19282759 : = (curenv->get_control_character() == ch);
3506 : // skip tabs as well as spaces here
3507 11674712 : do {
3508 30957471 : tok.next();
3509 30957471 : } while (tok.is_horizontal_whitespace());
3510 19282759 : symbol nm = read_identifier();
3511 : #if defined(DEBUGGING)
3512 : if (want_html_debugging) {
3513 : if (!nm.is_null()) {
3514 : if (strcmp(nm.contents(), "test") == 0) {
3515 : fprintf(stderr, "found it!\n");
3516 : fflush(stderr);
3517 : }
3518 : fprintf(stderr, "interpreting [%s]", nm.contents());
3519 : if (strcmp(nm.contents(), "di") == 0 && topdiv != curdiv)
3520 : fprintf(stderr, " currently in diversion: %s",
3521 : curdiv->get_diversion_name());
3522 : fprintf(stderr, "\n");
3523 : fflush(stderr);
3524 : }
3525 : }
3526 : #endif
3527 19282759 : if (nm.is_null())
3528 6245310 : skip_line();
3529 : else {
3530 13037449 : interpolate_macro(nm);
3531 : #if defined(DEBUGGING)
3532 : if (want_html_debugging) {
3533 : fprintf(stderr, "finished interpreting [%s] and environment state is\n", nm.contents());
3534 : curenv->dump_troff_state();
3535 : }
3536 : #endif
3537 : }
3538 19282726 : ignore_next_token = true;
3539 : }
3540 : else {
3541 738233 : if (possibly_handle_first_page_transition())
3542 : ;
3543 : else {
3544 : for (;;) {
3545 : #if defined(DEBUGGING)
3546 : if (want_html_debugging) {
3547 : fprintf(stderr, "found [%c]\n", ch); fflush(stderr);
3548 : }
3549 : #endif
3550 3556484 : if (curenv->get_was_line_interrupted())
3551 0 : warning(WARN_SYNTAX, "ignoring %1 on input line after"
3552 : " output line continuation escape sequence",
3553 0 : tok.description());
3554 : else
3555 3556484 : curenv->add_char(charset_table[ch]);
3556 3556484 : tok.next();
3557 3556484 : if (tok.type != token::TOKEN_CHAR)
3558 737892 : break;
3559 2818592 : ch = tok.c;
3560 : }
3561 737892 : ignore_next_token = true;
3562 737892 : reading_beginning_of_input_line = false;
3563 : }
3564 : }
3565 20020959 : break;
3566 : }
3567 24784 : case token::TOKEN_TRANSPARENT:
3568 : {
3569 24784 : if (reading_beginning_of_input_line) {
3570 24784 : if (possibly_handle_first_page_transition())
3571 : ;
3572 : else {
3573 : int cc;
3574 564816 : do {
3575 : node *n;
3576 589585 : cc = read_char_in_copy_mode(&n);
3577 589585 : if (cc != EOF) {
3578 589585 : if (cc != '\0')
3579 589585 : curdiv->transparent_output(transparent_translate(cc));
3580 : else
3581 0 : curdiv->transparent_output(n);
3582 : }
3583 589585 : } while (cc != '\n' && cc != EOF);
3584 24769 : if (cc == EOF)
3585 0 : curdiv->transparent_output('\n');
3586 : }
3587 : }
3588 24784 : break;
3589 : }
3590 751957 : case token::TOKEN_NEWLINE:
3591 : {
3592 751957 : if (reading_beginning_of_input_line
3593 2301 : && !have_formattable_input_on_interrupted_line
3594 754258 : && !curenv->get_was_previous_line_interrupted())
3595 1196 : trapping_blank_line();
3596 : else {
3597 750761 : curenv->newline();
3598 750761 : reading_beginning_of_input_line = true;
3599 : }
3600 751957 : break;
3601 : }
3602 125 : case token::TOKEN_REQUEST:
3603 : {
3604 125 : int request_code = tok.c;
3605 125 : tok.next();
3606 : switch (request_code) {
3607 125 : case TITLE_REQUEST:
3608 125 : title();
3609 125 : break;
3610 0 : case COPY_FILE_REQUEST:
3611 0 : unsafe_transparent_throughput_file_request();
3612 0 : break;
3613 0 : case TRANSPARENT_FILE_REQUEST:
3614 0 : transparent_throughput_file_request();
3615 0 : break;
3616 : #ifdef COLUMN
3617 : case VJUSTIFY_REQUEST:
3618 : vjustify();
3619 : break;
3620 : #endif /* COLUMN */
3621 0 : default:
3622 0 : assert(0 == "unhandled case of `request_code` (int)");
3623 : break;
3624 : }
3625 125 : ignore_next_token = true;
3626 125 : break;
3627 : }
3628 487180 : case token::TOKEN_SPACE:
3629 : {
3630 487180 : if (curenv->get_was_line_interrupted())
3631 0 : warning(WARN_SYNTAX, "ignoring %1 on input line after"
3632 : " output line continuation escape sequence",
3633 0 : tok.description());
3634 487180 : else if (possibly_handle_first_page_transition())
3635 : ;
3636 487180 : else if (reading_beginning_of_input_line
3637 487180 : && !curenv->get_was_previous_line_interrupted()) {
3638 1042 : int nspaces = 0;
3639 : // save space_width now so that it isn't changed by \f or \s
3640 : // which we wouldn't notice here
3641 1042 : hunits space_width = curenv->get_space_width();
3642 3544 : do {
3643 4586 : nspaces += tok.nspaces();
3644 4586 : tok.next();
3645 4586 : } while (tok.is_space());
3646 1042 : if (tok.is_newline())
3647 0 : trapping_blank_line();
3648 : else {
3649 1042 : push_token(tok);
3650 1042 : leading_spaces_number = nspaces;
3651 1042 : leading_spaces_space = space_width.to_units() * nspaces;
3652 1042 : if (!leading_spaces_macro_name.is_null())
3653 153 : spring_trap(leading_spaces_macro_name);
3654 : else {
3655 889 : curenv->do_break();
3656 1778 : curenv->add_node(new hmotion_node(space_width * nspaces,
3657 889 : curenv->get_fill_color()));
3658 : }
3659 1042 : reading_beginning_of_input_line = false;
3660 : }
3661 : }
3662 : else {
3663 486138 : curenv->space();
3664 486138 : reading_beginning_of_input_line = false;
3665 : }
3666 487180 : break;
3667 : }
3668 713284 : case token::TOKEN_EOF:
3669 1426568 : return;
3670 9479465 : case token::TOKEN_NODE:
3671 : case token::TOKEN_DELIMITED_HORIZONTAL_MOTION:
3672 : case token::TOKEN_HORIZONTAL_MOTION:
3673 9479465 : if (curenv->get_was_line_interrupted()) {
3674 : // We don't want to warn about node types. They might have been
3675 : // interpolated into the input by the formatter itself, as with
3676 : // the extra vertical space nodes appended to diversions.
3677 212 : if ((token::TOKEN_HORIZONTAL_MOTION == tok.type)
3678 212 : || (token::TOKEN_DELIMITED_HORIZONTAL_MOTION == tok.type))
3679 0 : warning(WARN_SYNTAX, "ignoring %1 on input line after"
3680 : " output line continuation escape sequence",
3681 0 : tok.description());
3682 : }
3683 9479253 : else if (possibly_handle_first_page_transition())
3684 : ;
3685 9479008 : else if (tok.nd->need_reread(&reading_beginning_of_input_line)) {
3686 529857 : delete tok.nd;
3687 529857 : tok.nd = 0;
3688 : }
3689 : else {
3690 8949151 : curenv->add_node(tok.nd);
3691 8949151 : tok.nd = 0;
3692 8949151 : reading_beginning_of_input_line = false;
3693 8949151 : curenv->possibly_break_line(true /* must break here */);
3694 : }
3695 9479465 : break;
3696 4856 : case token::TOKEN_PAGE_EJECTOR:
3697 : {
3698 4856 : continue_page_eject();
3699 : // I think we just want to preserve bol.
3700 : // reading_beginning_of_input_line = true;
3701 4659 : break;
3702 : }
3703 42312 : case token::TOKEN_BEGIN_TRAP:
3704 : {
3705 42312 : trap_bol_stack.push(reading_beginning_of_input_line);
3706 42312 : reading_beginning_of_input_line = true;
3707 42312 : have_formattable_input = false;
3708 42312 : break;
3709 : }
3710 42030 : case token::TOKEN_END_TRAP:
3711 : {
3712 42030 : if (trap_bol_stack.empty())
3713 0 : error("spurious end trap token detected!");
3714 : else {
3715 42030 : reading_beginning_of_input_line = trap_bol_stack.top();
3716 42030 : trap_bol_stack.pop();
3717 : }
3718 42030 : have_formattable_input = false;
3719 :
3720 : /* I'm not totally happy about this. But I can't think of any other
3721 : way to do it. Doing an output_pending_lines() whenever a
3722 : TOKEN_END_TRAP is detected doesn't work: for example,
3723 :
3724 : .wh -1i x
3725 : .de x
3726 : 'bp
3727 : ..
3728 : .wh -.5i y
3729 : .de y
3730 : .tl ''-%-''
3731 : ..
3732 : .br
3733 : .ll .5i
3734 : .sp |\n(.pu-1i-.5v
3735 : a\%very\%very\%long\%word
3736 :
3737 : will print all but the first lines from the word immediately
3738 : after the footer, rather than on the next page. */
3739 :
3740 42030 : if (trap_bol_stack.empty())
3741 39579 : curenv->output_pending_lines();
3742 42030 : break;
3743 : }
3744 58472 : case token::TOKEN_INDEXED_CHAR:
3745 : case token::TOKEN_SPECIAL_CHAR:
3746 : case token::TOKEN_DELIMITED_SPECIAL_CHAR:
3747 58472 : if (curenv->get_was_line_interrupted())
3748 0 : warning(WARN_SYNTAX, "ignoring %1 on input line after output"
3749 : " line continuation escape sequence",
3750 0 : tok.description());
3751 : else {
3752 58472 : reading_beginning_of_input_line = false;
3753 58472 : tok.process();
3754 : }
3755 58472 : break;
3756 278994 : default:
3757 : {
3758 278994 : reading_beginning_of_input_line = false;
3759 278994 : tok.process();
3760 278994 : break;
3761 : }
3762 : }
3763 31190937 : if (!ignore_next_token)
3764 11170194 : tok.next();
3765 31190937 : was_trap_sprung = false;
3766 31190937 : }
3767 : }
3768 :
3769 : #ifdef WIDOW_CONTROL
3770 :
3771 : void flush_pending_lines()
3772 : {
3773 : while (!tok.is_newline() && !tok.is_eof())
3774 : tok.next();
3775 : curenv->output_pending_lines();
3776 : tok.next();
3777 : }
3778 :
3779 : #endif /* WIDOW_CONTROL */
3780 :
3781 15900785 : request_or_macro::request_or_macro()
3782 : {
3783 15900785 : }
3784 :
3785 157661 : macro *request_or_macro::to_macro()
3786 : {
3787 157661 : return 0 /* nullptr */;
3788 : }
3789 :
3790 275092 : request::request(REQUEST_FUNCP pp) : p(pp)
3791 : {
3792 275092 : }
3793 :
3794 12284816 : void request::invoke(symbol, bool)
3795 : {
3796 12284816 : (*p)();
3797 12284783 : }
3798 :
3799 : struct char_block {
3800 : enum { SIZE = 128 };
3801 : unsigned char s[SIZE];
3802 : char_block *next;
3803 : char_block();
3804 : };
3805 :
3806 4537440 : char_block::char_block()
3807 4537440 : : next(0)
3808 : {
3809 4537440 : }
3810 :
3811 : class char_list {
3812 : public:
3813 : char_list();
3814 : ~char_list();
3815 : void append(unsigned char);
3816 : void set(unsigned char, int);
3817 : unsigned char get(int);
3818 : int length();
3819 : private:
3820 : unsigned char *ptr;
3821 : int len;
3822 : char_block *head;
3823 : char_block *tail;
3824 : friend class macro_header;
3825 : friend class string_iterator;
3826 : };
3827 :
3828 3628687 : char_list::char_list()
3829 3628687 : : ptr(0), len(0), head(0), tail(0)
3830 : {
3831 3628687 : }
3832 :
3833 9713830 : char_list::~char_list()
3834 : {
3835 6680791 : while (head != 0) {
3836 3647752 : char_block *tem = head;
3837 3647752 : head = head->next;
3838 3647752 : delete tem;
3839 : }
3840 3033039 : }
3841 :
3842 160665253 : int char_list::length()
3843 : {
3844 160665253 : return len;
3845 : }
3846 :
3847 160673517 : void char_list::append(unsigned char c)
3848 : {
3849 160673517 : if (tail == 0) {
3850 3628687 : head = tail = new char_block;
3851 3628687 : ptr = tail->s;
3852 : }
3853 : else {
3854 157044830 : if (ptr >= tail->s + char_block::SIZE) {
3855 908753 : tail->next = new char_block;
3856 908753 : tail = tail->next;
3857 908753 : ptr = tail->s;
3858 : }
3859 : }
3860 160673517 : *ptr++ = c;
3861 160673517 : len++;
3862 160673517 : }
3863 :
3864 0 : void char_list::set(unsigned char c, int offset)
3865 : {
3866 0 : assert(len > offset);
3867 : // optimization for access at the end
3868 0 : int boundary = len - len % char_block::SIZE;
3869 0 : if (offset >= boundary) {
3870 0 : *(tail->s + offset - boundary) = c;
3871 0 : return;
3872 : }
3873 0 : char_block *tem = head;
3874 0 : int l = 0;
3875 : for (;;) {
3876 0 : l += char_block::SIZE;
3877 0 : if (l > offset) {
3878 0 : *(tem->s + offset % char_block::SIZE) = c;
3879 0 : return;
3880 : }
3881 0 : tem = tem->next;
3882 : }
3883 : }
3884 :
3885 1006 : unsigned char char_list::get(int offset)
3886 : {
3887 1006 : assert(len > offset);
3888 : // optimization for access at the end
3889 1006 : int boundary = len - len % char_block::SIZE;
3890 1006 : if (offset >= boundary)
3891 1006 : return *(tail->s + offset - boundary);
3892 0 : char_block *tem = head;
3893 0 : int l = 0;
3894 : for (;;) {
3895 0 : l += char_block::SIZE;
3896 0 : if (l > offset)
3897 0 : return *(tem->s + offset % char_block::SIZE);
3898 0 : tem = tem->next;
3899 : }
3900 : }
3901 :
3902 : class node_list {
3903 : node *head;
3904 : node *tail;
3905 : public:
3906 : node_list();
3907 : ~node_list();
3908 : void append(node *);
3909 : int length();
3910 : node *extract();
3911 :
3912 : friend class macro_header;
3913 : friend class string_iterator;
3914 : };
3915 :
3916 9215342 : void node_list::append(node *n)
3917 : {
3918 9215342 : if (head == 0 /* nullptr */) {
3919 256890 : n->next = 0 /* nullptr */;
3920 256890 : head = tail = n;
3921 : }
3922 : else {
3923 8958452 : n->next = 0 /* nullptr */;
3924 8958452 : tail = tail->next = n;
3925 : }
3926 9215342 : }
3927 :
3928 0 : int node_list::length()
3929 : {
3930 0 : int total = 0 /* nullptr */;
3931 0 : for (node *n = head; n != 0 /* nullptr */; n = n->next)
3932 0 : ++total;
3933 0 : return total;
3934 : }
3935 :
3936 3628687 : node_list::node_list()
3937 : {
3938 3628687 : head = tail = 0 /* nullptr */;
3939 3628687 : }
3940 :
3941 0 : node *node_list::extract()
3942 : {
3943 0 : node *temp = head;
3944 0 : head = tail = 0 /* nullptr */;
3945 0 : return temp;
3946 : }
3947 :
3948 6066078 : node_list::~node_list()
3949 : {
3950 3033039 : delete_node_list(head);
3951 3033039 : }
3952 :
3953 : class macro_header {
3954 : public:
3955 : int count;
3956 : char_list cl;
3957 : node_list nl;
3958 3628687 : macro_header() { count = 1; }
3959 : macro_header *copy(int);
3960 : void json_dump_macro();
3961 : void json_dump_diversion();
3962 : };
3963 :
3964 15163168 : macro::~macro()
3965 : {
3966 14998951 : if (p != 0 /* nullptr */ && --(p->count) <= 0)
3967 2074552 : delete p;
3968 15163168 : }
3969 :
3970 4390983 : macro::macro()
3971 4390983 : : is_a_diversion(false), is_a_string(true)
3972 : {
3973 4390983 : if (!input_stack::get_location(true /* allow macro */, &filename,
3974 : &lineno)) {
3975 1537 : filename = 0 /* nullptr */;
3976 1537 : lineno = 0 /* nullptr */;
3977 : }
3978 4390983 : len = 0;
3979 4390983 : is_empty_macro = true;
3980 4390983 : p = 0; /* nullptr */
3981 4390983 : }
3982 :
3983 11182922 : macro::macro(const macro &m)
3984 11182922 : : filename(m.filename), lineno(m.lineno), len(m.len),
3985 11182922 : is_empty_macro(m.is_empty_macro), is_a_diversion(m.is_a_diversion),
3986 11182922 : is_a_string(m.is_a_string), p(m.p)
3987 : {
3988 11182922 : if (p != 0 /* nullptr */)
3989 10901976 : p->count++;
3990 11182922 : }
3991 :
3992 51788 : macro::macro(bool is_div)
3993 51788 : : is_a_diversion(is_div)
3994 : {
3995 51788 : if (!input_stack::get_location(true /* allow macro */, &filename,
3996 : &lineno)) {
3997 0 : filename = 0 /* nullptr */;
3998 0 : lineno = 0 /* nullptr */;
3999 : }
4000 51788 : len = 0;
4001 51788 : is_empty_macro = true;
4002 : // A macro is a string until it contains a newline.
4003 51788 : is_a_string = true;
4004 51788 : p = 0 /* nullptr */;
4005 51788 : }
4006 :
4007 9868972 : bool macro::is_diversion()
4008 : {
4009 9868972 : return is_a_diversion;
4010 : }
4011 :
4012 4419563 : bool macro::is_string()
4013 : {
4014 4419563 : return is_a_string;
4015 : }
4016 :
4017 1461321 : void macro::clear_string_flag()
4018 : {
4019 1461321 : is_a_string = false;
4020 1461321 : }
4021 :
4022 1972123 : macro ¯o::operator=(const macro &m)
4023 : {
4024 : // don't assign object
4025 1972123 : if (m.p != 0 /* nullptr */)
4026 1587838 : m.p->count++;
4027 1972123 : if (p != 0 /* nullptr */ && --(p->count) <= 0)
4028 958487 : delete p;
4029 1972123 : p = m.p;
4030 1972123 : filename = m.filename;
4031 1972123 : lineno = m.lineno;
4032 1972123 : len = m.len;
4033 1972123 : is_empty_macro = m.is_empty_macro;
4034 1972123 : is_a_diversion = m.is_a_diversion;
4035 1972123 : is_a_string = m.is_a_string;
4036 1972123 : return *this;
4037 : }
4038 :
4039 151458101 : void macro::append(unsigned char c)
4040 : {
4041 151458101 : assert(c != 0);
4042 151458101 : if (p == 0 /* nullptr */)
4043 3374803 : p = new macro_header;
4044 151458101 : if (p->cl.length() != len) {
4045 426 : macro_header *tem = p->copy(len);
4046 426 : if (--(p->count) <= 0)
4047 0 : delete p;
4048 426 : p = tem;
4049 : }
4050 151458101 : p->cl.append(c);
4051 151458101 : ++len;
4052 151458101 : if (c != PUSH_GROFF_MODE && c != PUSH_COMP_MODE && c != POP_GROFFCOMP_MODE)
4053 149407597 : is_empty_macro = false;
4054 151458101 : }
4055 :
4056 0 : void macro::set(unsigned char c, int offset)
4057 : {
4058 0 : assert(p != 0 /* nullptr */);
4059 0 : assert(c != 0);
4060 0 : p->cl.set(c, offset);
4061 0 : }
4062 :
4063 1006 : unsigned char macro::get(int offset)
4064 : {
4065 1006 : assert(p != 0 /* nullptr */);
4066 1006 : return p->cl.get(offset);
4067 : }
4068 :
4069 200 : int macro::length()
4070 : {
4071 200 : return len;
4072 : }
4073 :
4074 67 : void macro::append_str(const char *s)
4075 : {
4076 67 : int i = 0;
4077 :
4078 67 : if (s != 0 /* nullptr */) {
4079 339 : while (s[i] != '\0') {
4080 272 : append(s[i]);
4081 272 : i++;
4082 : }
4083 : }
4084 67 : }
4085 :
4086 9207152 : void macro::append(node *n)
4087 : {
4088 9207152 : assert(n != 0 /* nullptr */);
4089 9207152 : if (p == 0 /* nullptr */)
4090 253458 : p = new macro_header;
4091 9207152 : if (p->cl.length() != len) {
4092 0 : macro_header *tem = p->copy(len);
4093 0 : if (--(p->count) <= 0)
4094 0 : delete p;
4095 0 : p = tem;
4096 : }
4097 9207152 : p->cl.append(0);
4098 9207152 : p->nl.append(n);
4099 9207152 : ++len;
4100 9207152 : is_empty_macro = false;
4101 9207152 : }
4102 :
4103 27 : void macro::append_unsigned(unsigned int i)
4104 : {
4105 27 : unsigned int j = i / 10;
4106 27 : if (j != 0)
4107 18 : append_unsigned(j);
4108 27 : append(((unsigned char)(((int)'0') + i % 10)));
4109 27 : }
4110 :
4111 9 : void macro::append_int(int i)
4112 : {
4113 9 : if (i < 0) {
4114 0 : append('-');
4115 0 : i = -i;
4116 : }
4117 9 : append_unsigned((unsigned int) i);
4118 9 : }
4119 :
4120 0 : void macro::print_size()
4121 : {
4122 0 : errprint("%1", len);
4123 0 : }
4124 :
4125 : // Use this only for zero-length macros associated with charinfo objects
4126 : // that are character classes.
4127 1 : void macro::dump()
4128 : {
4129 1 : if (filename != 0 /* nullptr */)
4130 1 : errprint("file name: \"%1\", line number: %2\n", filename, lineno);
4131 1 : }
4132 :
4133 2 : void macro::json_dump()
4134 : {
4135 2 : bool need_comma = false;
4136 : // XXX: Unfortunately, if you alias or rename a request, the location
4137 : // of its invocation site is used for location information instead of
4138 : // its true origin.
4139 2 : if (filename != 0 /* nullptr */) {
4140 2 : symbol fn(filename); // `symbol` because it can't contain nulls.
4141 2 : const char *jsonfn = fn.json_extract();
4142 2 : errprint("\"file name\": %1", jsonfn);
4143 2 : free(const_cast<char *>(jsonfn));
4144 2 : fflush(stderr);
4145 2 : errprint(", \"starting line number\": %1", lineno);
4146 2 : need_comma = true;
4147 : }
4148 2 : if (need_comma)
4149 2 : errprint(", ");
4150 2 : errprint("\"length\": %1", len);
4151 2 : if (p != 0 /* nullptr */) {
4152 0 : errprint(", ");
4153 0 : p->json_dump_macro();
4154 0 : errprint(", ");
4155 0 : p->json_dump_diversion();
4156 : }
4157 2 : }
4158 :
4159 : // make a copy of the first n bytes
4160 :
4161 426 : macro_header *macro_header::copy(int n)
4162 : {
4163 426 : macro_header *p = new macro_header;
4164 426 : char_block *bp = cl.head;
4165 426 : unsigned char *ptr = bp->s;
4166 426 : node *nd = nl.head;
4167 8690 : while (--n >= 0) {
4168 8264 : if (ptr >= bp->s + char_block::SIZE) {
4169 0 : bp = bp->next;
4170 0 : ptr = bp->s;
4171 : }
4172 8264 : unsigned char c = *ptr++;
4173 8264 : p->cl.append(c);
4174 8264 : if (c == 0) {
4175 8190 : p->nl.append(nd->copy());
4176 8190 : nd = nd->next;
4177 : }
4178 : }
4179 426 : return p;
4180 : }
4181 :
4182 : extern void dump_node_list(node *);
4183 :
4184 0 : void macro_header::json_dump_diversion()
4185 : {
4186 0 : errprint("\"node list\": ");
4187 0 : dump_node_list(nl.head);
4188 0 : fflush(stderr);
4189 0 : }
4190 :
4191 0 : void macro_header::json_dump_macro()
4192 : {
4193 0 : errprint("\"contents\": \"");
4194 0 : int macro_len = cl.length();
4195 0 : for (int i = 0; i < macro_len; i++) {
4196 0 : json_char jc = json_encode_char(cl.get(i));
4197 : // Write out its JSON representation by character by character to
4198 : // keep libc string functions from interpreting C escape sequences.
4199 0 : for (size_t j = 0; j < jc.len; j++)
4200 0 : fputc(jc.buf[j], stderr);
4201 : }
4202 0 : errprint("\"");
4203 0 : fflush(stderr);
4204 0 : }
4205 :
4206 2 : void print_macro_request()
4207 : {
4208 : request_or_macro *rm;
4209 2 : macro *m = 0 /* nullptr */;
4210 2 : symbol s;
4211 2 : if (has_arg()) {
4212 0 : do {
4213 2 : s = read_identifier();
4214 2 : if (s.is_null())
4215 0 : break;
4216 2 : rm = static_cast<request_or_macro *>(request_dictionary.lookup(s));
4217 2 : if (rm != 0 /* nullptr */)
4218 2 : m = rm->to_macro();
4219 2 : if (m != 0 /* nullptr */) {
4220 2 : errprint("{\"name\": ");
4221 2 : s.json_dump();
4222 2 : errprint(", ");
4223 2 : m->json_dump();
4224 2 : errprint("}\n");
4225 2 : fflush(stderr);
4226 : }
4227 2 : } while (has_arg());
4228 : }
4229 : else {
4230 0 : object_dictionary_iterator iter(request_dictionary);
4231 : // We must use the nuclear `reinterpret_cast` operator because GNU
4232 : // troff's dictionary types use a pre-STL approach to containers.
4233 0 : while (iter.get(&s, reinterpret_cast<object **>(&rm))) {
4234 0 : assert(!s.is_null());
4235 0 : m = rm->to_macro();
4236 0 : if (m != 0 /* nullptr */) {
4237 0 : errprint("%1\t", s.contents());
4238 0 : m->print_size();
4239 0 : errprint("\n");
4240 : }
4241 : }
4242 : }
4243 2 : fflush(stderr);
4244 2 : skip_line();
4245 2 : }
4246 :
4247 : class string_iterator : public input_iterator {
4248 : macro mac;
4249 : const char *how_invoked;
4250 : bool seen_newline;
4251 : int lineno;
4252 : char_block *bp;
4253 : int count; // of characters remaining
4254 : node *nd;
4255 : bool att_compat;
4256 : bool with_break; // inherited from the caller
4257 : protected:
4258 : symbol nm;
4259 : string_iterator();
4260 : public:
4261 : string_iterator(const macro &, const char * = 0 /* nullptr */,
4262 : symbol = NULL_SYMBOL);
4263 : int fill(node **);
4264 : int peek();
4265 : bool get_location(bool /* allow_macro */, const char ** /* filep */,
4266 : int * /* linep */);
4267 : void backtrace();
4268 3331981 : bool get_break_flag() { return with_break; }
4269 2284263 : void set_att_compat(bool b) { att_compat = b; }
4270 2279688 : bool get_att_compat() { return att_compat; }
4271 : bool is_diversion();
4272 : };
4273 :
4274 9437049 : string_iterator::string_iterator(const macro &m, const char *p,
4275 9437049 : symbol s)
4276 9437049 : : input_iterator(m.is_a_diversion), mac(m), how_invoked(p),
4277 9437049 : seen_newline(false), lineno(1), nm(s)
4278 : {
4279 9437049 : count = mac.len;
4280 9437049 : if (count != 0) {
4281 9156136 : bp = mac.p->cl.head;
4282 9156136 : nd = mac.p->nl.head;
4283 9156136 : ptr = endptr = bp->s;
4284 : }
4285 : else {
4286 280913 : bp = 0 /* nullptr */;
4287 280913 : nd = 0 /* nullptr */;
4288 280913 : ptr = endptr = 0 /* nullptr */;
4289 : }
4290 9437049 : with_break = input_stack::get_break_flag();
4291 9437049 : }
4292 :
4293 67634 : string_iterator::string_iterator()
4294 : {
4295 67634 : bp = 0 /* nullptr */;
4296 67634 : nd = 0 /* nullptr */;
4297 67634 : ptr = endptr = 0 /* nullptr */;
4298 67634 : seen_newline = false;
4299 67634 : how_invoked = 0 /* nullptr */;
4300 67634 : lineno = 1;
4301 67634 : count = 0;
4302 67634 : with_break = input_stack::get_break_flag();
4303 67634 : }
4304 :
4305 9868972 : bool string_iterator::is_diversion()
4306 : {
4307 9868972 : return mac.is_diversion();
4308 : }
4309 :
4310 69594597 : int string_iterator::fill(node **np)
4311 : {
4312 69594597 : if (seen_newline)
4313 38637710 : lineno++;
4314 69594597 : seen_newline = false;
4315 69594597 : if (count <= 0)
4316 8751882 : return EOF;
4317 60842715 : const unsigned char *p = endptr;
4318 60842715 : if (p >= bp->s + char_block::SIZE) {
4319 4895457 : bp = bp->next;
4320 4895457 : p = bp->s;
4321 : }
4322 60842715 : if (*p == '\0') {
4323 9869452 : if (np != 0 /* nullptr */) {
4324 9868972 : *np = nd->copy();
4325 9868972 : if (is_diversion())
4326 8897752 : (*np)->div_nest_level = input_stack::get_div_level();
4327 : else
4328 971220 : (*np)->div_nest_level = 0;
4329 : }
4330 9869452 : nd = nd->next;
4331 9869452 : endptr = ptr = p + 1;
4332 9869452 : count--;
4333 9869452 : return 0;
4334 : }
4335 50973263 : const unsigned char *e = bp->s + char_block::SIZE;
4336 50973263 : if (e - p > count)
4337 15479235 : e = p + count;
4338 50973263 : ptr = p;
4339 790068160 : while (p < e) {
4340 777859686 : unsigned char c = *p;
4341 777859686 : if (c == '\n' || c == ESCAPE_NEWLINE) {
4342 38764406 : seen_newline = true;
4343 38764406 : p++;
4344 38764406 : break;
4345 : }
4346 739095280 : if (c == '\0')
4347 383 : break;
4348 739094897 : p++;
4349 : }
4350 50973263 : endptr = p;
4351 50973263 : count -= p - ptr;
4352 50973263 : return *ptr++;
4353 : }
4354 :
4355 4451 : int string_iterator::peek()
4356 : {
4357 4451 : if (count <= 0)
4358 131 : return EOF;
4359 4320 : const unsigned char *p = endptr;
4360 4320 : if (p >= bp->s + char_block::SIZE) {
4361 4320 : p = bp->next->s;
4362 : }
4363 4320 : return *p;
4364 : }
4365 :
4366 3337995 : bool string_iterator::get_location(bool allow_macro,
4367 : const char **filep, int *linep)
4368 : {
4369 3337995 : if (!allow_macro)
4370 5499 : return false;
4371 3332496 : if (0 /* nullptr */ == mac.filename)
4372 153 : return false;
4373 3332343 : *filep = mac.filename;
4374 3332343 : *linep = mac.lineno + lineno - 1;
4375 3332343 : return true;
4376 : }
4377 :
4378 28 : void string_iterator::backtrace()
4379 : {
4380 28 : if (mac.filename != 0 /* nullptr */) {
4381 28 : if (program_name != 0 /* nullptr */)
4382 28 : errprint("%1: ", program_name);
4383 28 : errprint("backtrace: '%1':%2", mac.filename,
4384 28 : (mac.lineno + lineno - 1));
4385 28 : if (how_invoked != 0 /* nullptr */) {
4386 28 : if (!nm.is_null())
4387 28 : errprint(": %1 '%2'", how_invoked, nm.contents());
4388 : else
4389 0 : errprint(": %1", how_invoked);
4390 : }
4391 28 : errprint("\n");
4392 : }
4393 28 : }
4394 :
4395 : class temp_iterator : public input_iterator {
4396 : unsigned char *base;
4397 : temp_iterator(const char *, int len);
4398 : public:
4399 : ~temp_iterator();
4400 : friend input_iterator *make_temp_iterator(const char *);
4401 : };
4402 :
4403 8957966 : inline temp_iterator::temp_iterator(const char *s, int len)
4404 8957966 : : base(0 /* nullptr */)
4405 : {
4406 8957966 : if (len > 0) {
4407 8891238 : base = new unsigned char[len + 1];
4408 8891238 : (void) memcpy(base, s, len);
4409 8891238 : base[len] = '\0';
4410 8891238 : ptr = base;
4411 8891238 : endptr = base + len;
4412 : }
4413 8957966 : }
4414 :
4415 17912116 : temp_iterator::~temp_iterator()
4416 : {
4417 8956058 : delete[] base;
4418 17912116 : }
4419 :
4420 :
4421 8957966 : input_iterator *make_temp_iterator(const char *s)
4422 : {
4423 8957966 : if (0 /* nullptr */ == s)
4424 66529 : return new temp_iterator(s, 0);
4425 : else {
4426 8891437 : size_t n = strlen(s);
4427 8891437 : return new temp_iterator(s, n);
4428 : }
4429 : }
4430 :
4431 : // this is used when macros with arguments are interpolated
4432 :
4433 : struct arg_list {
4434 : macro mac;
4435 : bool space_follows;
4436 : arg_list *next;
4437 : arg_list(const macro &, bool);
4438 : arg_list(const arg_list *);
4439 : ~arg_list();
4440 : };
4441 :
4442 1083153 : arg_list::arg_list(const macro &m, bool b)
4443 1083153 : : mac(m), space_follows(b), next(0 /* nullptr */)
4444 : {
4445 1083153 : }
4446 :
4447 640 : arg_list::arg_list(const arg_list *al)
4448 640 : : next(0 /* nullptr */)
4449 : {
4450 640 : mac = al->mac;
4451 640 : space_follows = al->space_follows;
4452 640 : arg_list **a = &next;
4453 640 : arg_list *p = al->next;
4454 1037 : while (p != 0 /* nullptr */) {
4455 397 : *a = new arg_list(p->mac, p->space_follows);
4456 397 : p = p->next;
4457 397 : a = &(*a)->next;
4458 : }
4459 640 : }
4460 :
4461 1083783 : arg_list::~arg_list()
4462 : {
4463 1083783 : }
4464 :
4465 : class macro_iterator : public string_iterator {
4466 : arg_list *args;
4467 : int argc;
4468 : bool with_break; // whether called as .foo or 'foo
4469 : public:
4470 : macro_iterator(symbol, macro &,
4471 : const char * /* how_called */ = "macro",
4472 : bool /* want_arguments_initialized */ = false);
4473 : macro_iterator();
4474 : ~macro_iterator();
4475 3879545 : bool has_args() { return true; }
4476 : input_iterator *get_arg(int);
4477 : arg_list *get_arg_list();
4478 : symbol get_macro_name();
4479 : bool space_follows_arg(int);
4480 4900408 : bool get_break_flag() { return with_break; }
4481 944480 : int nargs() { return argc; }
4482 : void add_arg(const macro &, int);
4483 : void shift(int);
4484 112992 : bool is_macro() { return true; }
4485 : bool is_diversion();
4486 : };
4487 :
4488 2664893 : input_iterator *macro_iterator::get_arg(int i)
4489 : {
4490 2664893 : if (i == 0)
4491 55570 : return make_temp_iterator(nm.contents());
4492 2609323 : if (i > 0 && i <= argc) {
4493 2402197 : arg_list *p = args;
4494 3678189 : for (int j = 1; j < i; j++) {
4495 1275992 : assert(p != 0);
4496 1275992 : p = p->next;
4497 : }
4498 2402197 : return new string_iterator(p->mac);
4499 : }
4500 : else
4501 207126 : return 0 /* nullptr */;
4502 : }
4503 :
4504 1194 : arg_list *macro_iterator::get_arg_list()
4505 : {
4506 1194 : return args;
4507 : }
4508 :
4509 1194 : symbol macro_iterator::get_macro_name()
4510 : {
4511 1194 : return nm;
4512 : }
4513 :
4514 0 : bool macro_iterator::space_follows_arg(int i)
4515 : {
4516 0 : if ((i > 0) && (i <= argc)) {
4517 0 : arg_list *p = args;
4518 0 : for (int j = 1; j < i; j++) {
4519 0 : assert(p != 0 /* nullptr */);
4520 0 : p = p->next;
4521 : }
4522 0 : return p->space_follows;
4523 : }
4524 : else
4525 0 : return false;
4526 : }
4527 :
4528 1082756 : void macro_iterator::add_arg(const macro &m, int s)
4529 : {
4530 : arg_list **p;
4531 2341551 : for (p = &args; *p != 0 /* nullptr */; p = &((*p)->next))
4532 : ;
4533 1082756 : *p = new arg_list(m, s);
4534 1082756 : ++argc;
4535 1082756 : }
4536 :
4537 568781 : void macro_iterator::shift(int n)
4538 : {
4539 568781 : while (n > 0 && argc > 0) {
4540 300997 : arg_list *tem = args;
4541 300997 : args = args->next;
4542 300997 : delete tem;
4543 300997 : --argc;
4544 300997 : --n;
4545 : }
4546 267784 : }
4547 :
4548 : // This gets used by, e.g., .if '\?xxx\?''.
4549 :
4550 228916 : bool operator==(const macro &m1, const macro &m2)
4551 : {
4552 228916 : if (m1.len != m2.len)
4553 89445 : return false;
4554 278942 : string_iterator iter1(m1);
4555 278942 : string_iterator iter2(m2);
4556 139471 : int n = m1.len;
4557 205536 : while (--n >= 0) {
4558 188610 : node *nd1 = 0;
4559 188610 : int c1 = iter1.get(&nd1);
4560 188610 : assert(c1 != EOF);
4561 188610 : node *nd2 = 0;
4562 188610 : int c2 = iter2.get(&nd2);
4563 188610 : assert(c2 != EOF);
4564 188610 : if (c1 != c2) {
4565 122545 : if (c1 == 0)
4566 0 : delete nd1;
4567 122545 : else if (c2 == 0)
4568 0 : delete nd2;
4569 122545 : return false;
4570 : }
4571 66065 : if (c1 == 0) {
4572 0 : assert(nd1 != 0);
4573 0 : assert(nd2 != 0);
4574 0 : bool same = nd1->type() == nd2->type() && nd1->is_same_as(nd2);
4575 0 : delete nd1;
4576 0 : delete nd2;
4577 0 : return same;
4578 : }
4579 : }
4580 16926 : return true;
4581 : }
4582 :
4583 13213126 : static void interpolate_macro(symbol nm, bool do_not_want_next_token)
4584 : {
4585 : request_or_macro *p
4586 13213126 : = static_cast<request_or_macro *>(request_dictionary.lookup(nm));
4587 13213126 : if (0 /* nullptr */ == p) {
4588 293 : bool was_warned = false;
4589 293 : const char *s = nm.contents();
4590 293 : if (strlen(s) > 2) {
4591 : request_or_macro *r;
4592 : char buf[3];
4593 280 : buf[0] = s[0];
4594 280 : buf[1] = s[1];
4595 280 : buf[2] = '\0';
4596 : r = static_cast<request_or_macro *>
4597 280 : (request_dictionary.lookup(symbol(buf)));
4598 280 : if (r != 0 /* nullptr */) {
4599 0 : macro *m = r->to_macro();
4600 0 : if ((0 /* nullptr */ == m) || !m->is_empty()) {
4601 0 : warning(WARN_SPACE, "name '%1' not defined (possibly missing"
4602 0 : " space after '%2')", nm.contents(), buf);
4603 0 : was_warned = true;
4604 : }
4605 : }
4606 : }
4607 293 : if (!was_warned) {
4608 293 : warning(WARN_MAC, "name '%1' not defined", nm.contents());
4609 293 : p = new macro;
4610 293 : request_dictionary.define(nm, p);
4611 : }
4612 : }
4613 13213126 : if (p != 0 /* nullptr */)
4614 13213126 : p->invoke(nm, do_not_want_next_token);
4615 : else {
4616 0 : skip_line();
4617 0 : return;
4618 : }
4619 : }
4620 :
4621 928310 : static void decode_macro_call_arguments(macro_iterator *mi)
4622 : {
4623 928310 : if (!tok.is_newline() && !tok.is_eof()) {
4624 : node *n;
4625 538745 : int c = read_char_in_copy_mode(&n);
4626 : for (;;) {
4627 3209303 : while (c == ' ')
4628 1656159 : c = read_char_in_copy_mode(&n);
4629 1553144 : if (c == '\n' || c == EOF)
4630 : break;
4631 2028798 : macro arg;
4632 1014399 : int quote_input_level = 0;
4633 1014399 : bool was_warned = false; // about an input tab character
4634 1014399 : arg.append(want_att_compat ? PUSH_COMP_MODE : PUSH_GROFF_MODE);
4635 : // we store discarded double quotes for \$^
4636 1014399 : if (c == '"') {
4637 238297 : arg.append(DOUBLE_QUOTE);
4638 238297 : quote_input_level = input_stack::get_level();
4639 238297 : c = read_char_in_copy_mode(&n);
4640 : }
4641 12079639 : while (c != EOF && c != '\n'
4642 12829804 : && !(c == ' ' && quote_input_level == 0)) {
4643 2557146 : if (quote_input_level > 0 && c == '"'
4644 8573042 : && (want_att_compat
4645 246007 : || input_stack::get_level() == quote_input_level)) {
4646 237743 : arg.append(DOUBLE_QUOTE);
4647 237743 : c = read_char_in_copy_mode(&n);
4648 237743 : if (c == '"') {
4649 474 : arg.append(c);
4650 474 : c = read_char_in_copy_mode(&n);
4651 : }
4652 : else
4653 237269 : break;
4654 : }
4655 : else {
4656 5532146 : if (c == 0)
4657 0 : arg.append(n);
4658 : else {
4659 5532146 : if (c == '\t' && quote_input_level == 0 && !was_warned) {
4660 0 : warning(WARN_TAB, "tab character in unquoted macro"
4661 : " argument");
4662 0 : was_warned = true;
4663 : }
4664 5532146 : arg.append(c);
4665 : }
4666 5532146 : c = read_char_in_copy_mode(&n);
4667 : }
4668 : }
4669 1014399 : arg.append(POP_GROFFCOMP_MODE);
4670 1014399 : mi->add_arg(arg, (c == ' '));
4671 1014399 : }
4672 : }
4673 928310 : }
4674 :
4675 69569 : static void decode_escape_sequence_arguments(macro_iterator *mi)
4676 : {
4677 : node *n;
4678 69569 : int c = read_char_in_copy_mode(&n);
4679 : for (;;) {
4680 137929 : while (c == ' ')
4681 3 : c = read_char_in_copy_mode(&n);
4682 137926 : if (c == '\n' || c == EOF) {
4683 0 : error("missing ']' in parameterized escape sequence");
4684 0 : break;
4685 : }
4686 137926 : if (c == ']')
4687 69569 : break;
4688 136714 : macro arg;
4689 68357 : int quote_input_level = 0;
4690 68357 : bool was_warned = false; // about an input tab character
4691 68357 : if (c == '"') {
4692 0 : quote_input_level = input_stack::get_level();
4693 0 : c = read_char_in_copy_mode(&n);
4694 : }
4695 340747 : while (c != EOF && c != '\n'
4696 204552 : && !(c == ']' && quote_input_level == 0)
4697 340750 : && !(c == ' ' && quote_input_level == 0)) {
4698 0 : if (quote_input_level > 0 && c == '"'
4699 136195 : && input_stack::get_level() == quote_input_level) {
4700 0 : c = read_char_in_copy_mode(&n);
4701 0 : if (c == '"') {
4702 0 : arg.append(c);
4703 0 : c = read_char_in_copy_mode(&n);
4704 : }
4705 : else
4706 0 : break;
4707 : }
4708 : else {
4709 136195 : if (c == 0)
4710 0 : arg.append(n);
4711 : else {
4712 136195 : if (c == '\t' && quote_input_level == 0 && !was_warned)
4713 : {
4714 0 : warning(WARN_TAB, "tab character in parameterized escape"
4715 : " sequence");
4716 0 : was_warned = true;
4717 : }
4718 136195 : arg.append(c);
4719 : }
4720 136195 : c = read_char_in_copy_mode(&n);
4721 : }
4722 : }
4723 68357 : mi->add_arg(arg, (c == ' '));
4724 68357 : }
4725 69569 : }
4726 :
4727 928310 : void macro::invoke(symbol nm, bool do_not_want_next_token)
4728 : {
4729 928310 : macro_iterator *mi = new macro_iterator(nm, *this);
4730 928310 : decode_macro_call_arguments(mi);
4731 928310 : input_stack::push(mi);
4732 : // we must delay tok.next() in case the function has been called by
4733 : // do_request to assure proper handling of want_att_compat
4734 928310 : if (!do_not_want_next_token)
4735 925732 : tok.next();
4736 928310 : }
4737 :
4738 5961468 : macro *macro::to_macro()
4739 : {
4740 5961468 : return this;
4741 : }
4742 :
4743 1006 : bool macro::is_empty()
4744 : {
4745 1006 : return (is_empty_macro == true);
4746 : }
4747 :
4748 974054 : macro_iterator::macro_iterator(symbol s, macro &m,
4749 : const char *how_called,
4750 974054 : bool want_arguments_initialized)
4751 : : string_iterator(m, how_called, s), args(0 /* nullptr */), argc(0),
4752 974054 : with_break(was_invoked_with_regular_control_character)
4753 : {
4754 974054 : if (want_arguments_initialized) {
4755 1497 : arg_list *al = input_stack::get_arg_list();
4756 1497 : if (al != 0 /* nullptr */) {
4757 640 : args = new arg_list(al);
4758 640 : argc = input_stack::nargs();
4759 : }
4760 : }
4761 974054 : }
4762 :
4763 67634 : macro_iterator::macro_iterator()
4764 : : args(0 /* nullptr */), argc(0),
4765 67634 : with_break(was_invoked_with_regular_control_character)
4766 : {
4767 67634 : }
4768 :
4769 3123906 : macro_iterator::~macro_iterator()
4770 : {
4771 1824088 : while (args != 0 /* nullptr */) {
4772 782786 : arg_list *tem = args;
4773 782786 : args = args->next;
4774 782786 : delete tem;
4775 : }
4776 2082604 : }
4777 :
4778 : dictionary composite_dictionary(17);
4779 :
4780 31196 : static void map_composite_character()
4781 : {
4782 31196 : symbol from = read_identifier();
4783 31196 : if (from.is_null()) {
4784 0 : warning(WARN_MISSING, "composite character mapping request expects"
4785 : " arguments");
4786 0 : skip_line();
4787 0 : return;
4788 : }
4789 31196 : const char *fc = from.contents();
4790 31196 : const char *from_gn = glyph_name_to_unicode(fc);
4791 : char errbuf[ERRBUFSZ]; // C++03: char errbuf[ERRBUFSZ]()
4792 31196 : if (0 /* nullptr */ == from_gn) {
4793 0 : from_gn = valid_unicode_code_sequence(fc, errbuf);
4794 0 : if (0 /* nullptr */ == from_gn) {
4795 0 : error("invalid composite glyph name '%1': %2", fc, errbuf);
4796 0 : skip_line();
4797 0 : return;
4798 : }
4799 : }
4800 31196 : const char *from_decomposed = decompose_unicode(from_gn);
4801 31196 : if (from_decomposed != 0 /* nullptr */)
4802 0 : from_gn = &from_decomposed[1];
4803 31196 : symbol to = read_identifier();
4804 31196 : if (to.is_null()) {
4805 0 : composite_dictionary.remove(symbol(from_gn));
4806 0 : skip_line();
4807 0 : return;
4808 : }
4809 31196 : const char *tc = to.contents();
4810 31196 : const char *to_gn = glyph_name_to_unicode(tc);
4811 31196 : if (0 /* nullptr */ == to_gn) {
4812 31196 : to_gn = valid_unicode_code_sequence(tc, errbuf);
4813 31196 : if (0 /* nullptr */ == to_gn) {
4814 0 : error("invalid composite glyph name '%1': %2", tc, errbuf);
4815 0 : skip_line();
4816 0 : return;
4817 : }
4818 : }
4819 31196 : const char *to_decomposed = decompose_unicode(to_gn);
4820 31196 : if (to_decomposed != 0 /* nullptr */)
4821 0 : to_gn = &to_decomposed[1];
4822 31196 : if (strcmp(from_gn, to_gn) == 0)
4823 0 : composite_dictionary.remove(symbol(from_gn));
4824 : else
4825 31196 : (void) composite_dictionary.lookup(symbol(from_gn), (void *) to_gn);
4826 31196 : skip_line();
4827 : }
4828 :
4829 67634 : static symbol composite_glyph_name(symbol nm)
4830 : {
4831 67634 : macro_iterator *mi = new macro_iterator();
4832 67634 : decode_escape_sequence_arguments(mi);
4833 67634 : input_stack::push(mi);
4834 67634 : const char *nc = nm.contents();
4835 67634 : const char *gn = glyph_name_to_unicode(nc);
4836 67634 : if (0 /* nullptr */ == gn) {
4837 0 : gn = valid_unicode_code_sequence(nc);
4838 0 : if (0 /* nullptr */ == gn) {
4839 0 : error("invalid base character '%1' in composite character name",
4840 0 : nc);
4841 0 : return EMPTY_SYMBOL;
4842 : }
4843 : }
4844 67634 : const char *gn_decomposed = decompose_unicode(gn);
4845 135268 : string glyph_name(gn_decomposed ? &gn_decomposed[1] : gn);
4846 135268 : string gl;
4847 67634 : int n = input_stack::nargs();
4848 135268 : for (int i = 1; i <= n; i++) {
4849 67634 : glyph_name += '_';
4850 67634 : input_iterator *p = input_stack::get_arg(i);
4851 67634 : gl.clear();
4852 : int c;
4853 202152 : while ((c = p->get(0)) != EOF)
4854 134518 : if (c != DOUBLE_QUOTE)
4855 134518 : gl += c;
4856 67634 : gl += '\0';
4857 67634 : const char *gc = gl.contents();
4858 67634 : const char *u = glyph_name_to_unicode(gc);
4859 67634 : if (0 /* nullptr */ == u) {
4860 0 : u = valid_unicode_code_sequence(gc);
4861 0 : if (0 /* nullptr */ == u) {
4862 0 : error("invalid component '%1' in composite glyph name", gc);
4863 0 : return EMPTY_SYMBOL;
4864 : }
4865 : }
4866 67634 : const char *decomposed = decompose_unicode(u);
4867 67634 : if (decomposed != 0 /* nullptr */)
4868 0 : u = &decomposed[1];
4869 67634 : void *mapped_composite = composite_dictionary.lookup(symbol(u));
4870 67634 : if (mapped_composite != 0 /* nullptr */)
4871 67634 : u = static_cast<const char *>(mapped_composite);
4872 67634 : glyph_name += u;
4873 : }
4874 67634 : glyph_name += '\0';
4875 67634 : const char *groff_gn = unicode_to_glyph_name(glyph_name.contents());
4876 67634 : if (groff_gn != 0 /* nullptr */)
4877 2973 : return symbol(groff_gn);
4878 64661 : gl.clear();
4879 64661 : gl += 'u';
4880 64661 : gl += glyph_name;
4881 64661 : return symbol(gl.contents());
4882 : }
4883 :
4884 0 : static void print_composite_character_request()
4885 : {
4886 0 : dictionary_iterator iter(composite_dictionary);
4887 0 : symbol key;
4888 : char *value;
4889 : // We must use the nuclear `reinterpret_cast` operator because GNU
4890 : // troff's dictionary types use a pre-STL approach to containers.
4891 0 : while (iter.get(&key, reinterpret_cast<void **>(&value))) {
4892 0 : assert(!key.is_null());
4893 0 : assert(value != 0 /* nullptr */);
4894 0 : errprint("%1\t%2\n", key.contents(), value);
4895 : }
4896 0 : fflush(stderr);
4897 0 : skip_line();
4898 0 : }
4899 :
4900 : bool was_trap_sprung = false;
4901 : static bool are_traps_postponed = false;
4902 : symbol postponed_trap;
4903 :
4904 42397 : void spring_trap(symbol nm)
4905 : {
4906 42397 : assert(!nm.is_null());
4907 42397 : was_trap_sprung = true;
4908 42397 : if (are_traps_postponed) {
4909 85 : postponed_trap = nm;
4910 85 : return;
4911 : }
4912 : static char buf[2] = { BEGIN_TRAP, '\0' };
4913 : static char buf2[2] = { END_TRAP, '\0' };
4914 42312 : input_stack::push(make_temp_iterator(buf2));
4915 42312 : request_or_macro *p = lookup_request(nm);
4916 : // We don't perform this validation at the time the trap is planted
4917 : // because a request name might be replaced by a macro by the time the
4918 : // trap springs.
4919 42312 : macro *m = p->to_macro();
4920 42312 : if (m != 0 /* nullptr */)
4921 42312 : input_stack::push(new macro_iterator(nm, *m, "trap-called macro"));
4922 : else
4923 0 : error("trap failed to spring: '%1' is a request", nm.contents());
4924 42312 : input_stack::push(make_temp_iterator(buf));
4925 : }
4926 :
4927 249530 : void postpone_traps()
4928 : {
4929 249530 : are_traps_postponed = true;
4930 249530 : }
4931 :
4932 249530 : bool unpostpone_traps()
4933 : {
4934 249530 : are_traps_postponed = false;
4935 249530 : if (!postponed_trap.is_null()) {
4936 85 : spring_trap(postponed_trap);
4937 85 : postponed_trap = NULL_SYMBOL;
4938 85 : return true;
4939 : }
4940 : else
4941 249445 : return false;
4942 : }
4943 :
4944 0 : void read_request()
4945 : {
4946 0 : macro_iterator *mi = new macro_iterator;
4947 0 : int reading_from_terminal = isatty(fileno(stdin));
4948 0 : int had_prompt = 0;
4949 0 : if (has_arg(true /* peek */)) {
4950 0 : int c = read_char_in_copy_mode(0 /* nullptr */);
4951 0 : while (c == ' ')
4952 0 : c = read_char_in_copy_mode(0 /* nullptr */);
4953 0 : while (c != EOF && c != '\n' && c != ' ') {
4954 0 : if (!is_invalid_input_char(c)) {
4955 0 : if (reading_from_terminal)
4956 0 : fputc(c, stderr);
4957 0 : had_prompt = 1;
4958 : }
4959 0 : c = read_char_in_copy_mode(0 /* nullptr */);
4960 : }
4961 0 : if (c == ' ') {
4962 0 : tok.make_space();
4963 0 : decode_macro_call_arguments(mi);
4964 : }
4965 : }
4966 0 : if (reading_from_terminal) {
4967 0 : fputc(had_prompt ? ':' : '\a', stderr);
4968 0 : fflush(stderr);
4969 : }
4970 0 : input_stack::push(mi);
4971 0 : macro mac;
4972 0 : int nl = 0;
4973 : int c;
4974 0 : while ((c = getchar()) != EOF) {
4975 0 : if (is_invalid_input_char(c))
4976 0 : warning(WARN_INPUT, "invalid input character code %1", int(c));
4977 : else {
4978 0 : if (c == '\n') {
4979 0 : if (nl != 0 /* nullptr */)
4980 0 : break;
4981 : else
4982 0 : nl = 1;
4983 : }
4984 : else
4985 0 : nl = 0;
4986 0 : mac.append(c);
4987 : }
4988 : }
4989 0 : if (reading_from_terminal)
4990 0 : clearerr(stdin);
4991 0 : input_stack::push(new string_iterator(mac));
4992 0 : tok.next();
4993 0 : }
4994 :
4995 : enum define_mode { DEFINE_NORMAL, DEFINE_APPEND, DEFINE_IGNORE };
4996 : enum calling_mode { CALLING_NORMAL, CALLING_INDIRECT };
4997 : enum comp_mode { COMP_IGNORE, COMP_DISABLE, COMP_ENABLE };
4998 :
4999 1549109 : static void do_define_string(define_mode mode, comp_mode comp)
5000 : {
5001 1549109 : symbol nm;
5002 1549109 : node *n = 0 /* nullptr */;
5003 : int c;
5004 1549109 : nm = read_identifier(true /* required */);
5005 1549109 : if (nm.is_null()) {
5006 0 : skip_line();
5007 0 : return;
5008 : }
5009 1549109 : if (tok.is_newline())
5010 268866 : c = '\n';
5011 1280243 : else if (tok.is_tab())
5012 0 : c = '\t';
5013 1280243 : else if (!tok.is_space()) {
5014 0 : skip_line();
5015 0 : return;
5016 : }
5017 : else
5018 1280243 : c = read_char_in_copy_mode(&n);
5019 1809401 : while (c == ' ')
5020 260292 : c = read_char_in_copy_mode(&n);
5021 1549109 : if (c == '"')
5022 329971 : c = read_char_in_copy_mode(&n);
5023 3098218 : macro mac;
5024 : request_or_macro *rm
5025 1549109 : = static_cast<request_or_macro *>(request_dictionary.lookup(nm));
5026 1549109 : macro *mm = rm ? rm->to_macro() : 0 /* nullptr */;
5027 1549109 : if (mode == DEFINE_APPEND && mm)
5028 141515 : mac = *mm;
5029 1549109 : if (comp == COMP_DISABLE)
5030 229 : mac.append(PUSH_GROFF_MODE);
5031 1548880 : else if (comp == COMP_ENABLE)
5032 1 : mac.append(PUSH_COMP_MODE);
5033 22740199 : while (c != '\n' && c != EOF) {
5034 21191090 : if (c == 0)
5035 160 : mac.append(n);
5036 : else
5037 21190930 : mac.append((unsigned char) c);
5038 21191090 : c = read_char_in_copy_mode(&n);
5039 : }
5040 1549109 : if (comp == COMP_DISABLE || comp == COMP_ENABLE)
5041 230 : mac.append(POP_GROFFCOMP_MODE);
5042 1549109 : if (!mm) {
5043 240055 : mm = new macro;
5044 240055 : request_dictionary.define(nm, mm);
5045 : }
5046 1549109 : *mm = mac;
5047 1549109 : tok.next();
5048 : }
5049 :
5050 1407215 : static void define_string()
5051 : {
5052 1407215 : do_define_string(DEFINE_NORMAL,
5053 : want_att_compat ? COMP_ENABLE : COMP_IGNORE);
5054 1407215 : }
5055 :
5056 0 : static void define_nocomp_string()
5057 : {
5058 0 : do_define_string(DEFINE_NORMAL, COMP_DISABLE);
5059 0 : }
5060 :
5061 141665 : static void append_string()
5062 : {
5063 141665 : do_define_string(DEFINE_APPEND,
5064 : want_att_compat ? COMP_ENABLE : COMP_IGNORE);
5065 141665 : }
5066 :
5067 229 : static void append_nocomp_string()
5068 : {
5069 229 : do_define_string(DEFINE_APPEND, COMP_DISABLE);
5070 229 : }
5071 :
5072 325453 : static const char *character_mode_description(char_mode mode)
5073 : {
5074 : // C++11: There may be a better way to do this with an enum class;
5075 : // we could then store these string literals inside `char_mode`.
5076 325453 : const char *modestr = 0 /* nullptr */;
5077 325453 : switch (mode) {
5078 10808 : case CHAR_NORMAL:
5079 10808 : modestr = "";
5080 10808 : break;
5081 300946 : case CHAR_FALLBACK:
5082 300946 : modestr = " fallback";
5083 300946 : break;
5084 3207 : case CHAR_SPECIAL_FALLBACK:
5085 3207 : modestr = " special fallback";
5086 3207 : break;
5087 10492 : case CHAR_FONT_SPECIFIC_FALLBACK:
5088 10492 : modestr = " font-specific fallback";
5089 10492 : break;
5090 0 : default:
5091 0 : assert(0 == "unhandled case of character mode");
5092 : break;
5093 : }
5094 325453 : return modestr;
5095 : }
5096 :
5097 325449 : void define_character(char_mode mode, const char *font_name)
5098 : {
5099 325449 : const char *modestr = character_mode_description(mode);
5100 325449 : tok.skip_spaces();
5101 325449 : charinfo *ci = tok.get_charinfo(true /* required */);
5102 325449 : if (0 /* nullptr */ == ci) {
5103 0 : assert(0 == "attempted to use token without charinfo in character"
5104 : " definition request");
5105 : skip_line();
5106 0 : return;
5107 : }
5108 : // TODO: If `ci` is already a character class, clobber it.
5109 325449 : if (font_name != 0 /* nullptr */) {
5110 10492 : string s(font_name);
5111 10492 : s += ' ';
5112 10492 : s += ci->nm.contents();
5113 10492 : s += '\0';
5114 10492 : ci = lookup_charinfo(symbol(s.contents()));
5115 : }
5116 325449 : tok.next();
5117 : int c;
5118 325449 : node *n = 0 /* nullptr */;
5119 325449 : if (tok.is_newline())
5120 0 : c = '\n';
5121 325449 : else if (tok.is_tab())
5122 0 : c = '\t';
5123 325449 : else if (!tok.is_space()) {
5124 0 : error("ignoring invalid%1 character definition; expected an"
5125 : " ordinary, indexed, or special character to define, got %2",
5126 0 : modestr, tok.description());
5127 0 : skip_line();
5128 0 : return;
5129 : }
5130 : else
5131 325449 : c = read_char_in_copy_mode(&n);
5132 343139 : while (c == ' ' || c == '\t')
5133 17690 : c = read_char_in_copy_mode(&n);
5134 325449 : if (c == '"')
5135 1336 : c = read_char_in_copy_mode(&n);
5136 325449 : macro *m = new macro;
5137 : // Construct a macro from input characters; if the input character
5138 : // code is 0, we've read a node--append that.
5139 3060531 : while (c != '\n' && c != EOF) {
5140 2735082 : if (c != 0)
5141 2735078 : m->append(static_cast<unsigned char>(c));
5142 : else
5143 4 : m->append(n);
5144 2735082 : c = read_char_in_copy_mode(&n);
5145 : }
5146 : // Assign the macro to the character, discarding any previous macro.
5147 325449 : m = ci->set_macro(m, mode);
5148 325449 : if (m != 0 /* nullptr */)
5149 19537 : delete m;
5150 325449 : tok.next();
5151 : }
5152 :
5153 10804 : static void define_character_request()
5154 : {
5155 10804 : if (!has_arg(true /* peek; we want to read in copy mode */)) {
5156 0 : warning(WARN_MISSING, "character definition request expects"
5157 : " arguments");
5158 0 : skip_line();
5159 0 : return;
5160 : }
5161 10804 : define_character(CHAR_NORMAL);
5162 : }
5163 :
5164 300946 : static void define_fallback_character_request()
5165 : {
5166 300946 : if (!has_arg(true /* peek; we want to read in copy mode */)) {
5167 0 : warning(WARN_MISSING, "fallback character definition request"
5168 : " expects arguments");
5169 0 : skip_line();
5170 0 : return;
5171 : }
5172 300946 : define_character(CHAR_FALLBACK);
5173 : }
5174 :
5175 3207 : static void define_special_character_request()
5176 : {
5177 3207 : if (!has_arg(true /* peek; we want to read in copy mode */)) {
5178 0 : warning(WARN_MISSING, "special character definition request expects"
5179 : " arguments");
5180 0 : skip_line();
5181 0 : return;
5182 : }
5183 3207 : define_character(CHAR_SPECIAL_FALLBACK);
5184 : }
5185 :
5186 5 : static void print_character_request()
5187 : {
5188 5 : if (!has_arg()) {
5189 0 : warning(WARN_MISSING, "character report request expects arguments");
5190 0 : skip_line();
5191 0 : return;
5192 : }
5193 : charinfo *ci;
5194 : do {
5195 10 : tok.skip_spaces();
5196 10 : if (tok.is_newline() || tok.is_eof())
5197 5 : break;
5198 5 : if (!tok.is_any_character()) {
5199 0 : error("character report request expects characters or character"
5200 0 : " classes as arguments; got %1", tok.description());
5201 0 : break;
5202 : }
5203 5 : ci = tok.get_charinfo(false /* required */,
5204 : true /* suppress creation */);
5205 5 : if (ci != 0 /* nullptr */) {
5206 5 : errprint("%1\n", tok.description());
5207 5 : fflush(stderr);
5208 5 : ci->dump();
5209 : }
5210 5 : tok.next();
5211 : } while (true);
5212 5 : skip_line();
5213 : }
5214 :
5215 8749 : static void remove_character()
5216 : {
5217 8749 : if (!has_arg()) {
5218 0 : warning(WARN_MISSING, "character definition removal request expects"
5219 : " arguments");
5220 0 : skip_line();
5221 0 : return;
5222 : }
5223 17514 : while (!tok.is_newline() && !tok.is_eof()) {
5224 8765 : if (!tok.is_space() && !tok.is_tab()) {
5225 8757 : if (tok.is_any_character()) {
5226 8757 : charinfo *ci = tok.get_charinfo(true /* required */,
5227 : true /* suppress creation */);
5228 8757 : if (0 /* nullptr */ == ci)
5229 0 : warning(WARN_CHAR, "%1 is not defined", tok.description());
5230 : else {
5231 8757 : macro *m = ci->set_macro(0 /* nullptr */);
5232 8757 : if (m != 0 /* nullptr */)
5233 228 : delete m;
5234 : }
5235 : }
5236 : else {
5237 0 : error("cannot remove character; %1 is not a character",
5238 0 : tok.description());
5239 0 : break;
5240 : }
5241 : }
5242 8765 : tok.next();
5243 : }
5244 8749 : skip_line();
5245 : }
5246 :
5247 4419563 : static void interpolate_string(symbol nm)
5248 : {
5249 4419563 : request_or_macro *p = lookup_request(nm);
5250 4419563 : macro *m = p->to_macro();
5251 4419563 : if (0 /* nullptr */ == m)
5252 0 : error("cannot interpolate request '%1'", nm.contents());
5253 : else {
5254 4419563 : if (m->is_string()) {
5255 4418066 : string_iterator *si = new string_iterator(*m, "string", nm);
5256 4418066 : input_stack::push(si);
5257 : }
5258 : else {
5259 : // if a macro is called as a string, \$0 doesn't get changed
5260 1497 : macro_iterator *mi = new macro_iterator(input_stack::get_macro_name(),
5261 1497 : *m, "string", 1);
5262 1497 : input_stack::push(mi);
5263 : }
5264 : }
5265 4419563 : }
5266 :
5267 1935 : static void interpolate_string_with_args(symbol nm)
5268 : {
5269 1935 : request_or_macro *p = lookup_request(nm);
5270 1935 : macro *m = p->to_macro();
5271 1935 : if (0 /* nullptr */ == m)
5272 0 : error("cannot interpolate request '%1'", nm.contents());
5273 : else {
5274 1935 : macro_iterator *mi = new macro_iterator(nm, *m);
5275 1935 : decode_escape_sequence_arguments(mi);
5276 1935 : input_stack::push(mi);
5277 : }
5278 1935 : }
5279 :
5280 2534324 : static void interpolate_positional_parameter(symbol nm)
5281 : {
5282 2534324 : const char *s = nm.contents();
5283 2534324 : if (0 /* nullptr */ == s || '\0' == *s)
5284 0 : copy_mode_error("missing positional argument number in copy mode");
5285 2534324 : else if (s[1] == 0 && csdigit(s[0]))
5286 2382603 : input_stack::push(input_stack::get_arg(s[0] - '0'));
5287 151721 : else if (s[0] == '*' && s[1] == '\0') {
5288 62404 : int limit = input_stack::nargs();
5289 124808 : string args;
5290 145549 : for (int i = 1; i <= limit; i++) {
5291 83145 : input_iterator *p = input_stack::get_arg(i);
5292 : int c;
5293 902333 : while ((c = p->get(0)) != EOF)
5294 819188 : if (c != DOUBLE_QUOTE)
5295 776777 : args += c;
5296 83145 : if (i != limit)
5297 22315 : args += ' ';
5298 83145 : delete p;
5299 : }
5300 62404 : if (limit > 0) {
5301 60830 : args += '\0';
5302 60830 : input_stack::push(make_temp_iterator(args.contents()));
5303 62404 : }
5304 : }
5305 89317 : else if (s[0] == '@' && s[1] == '\0') {
5306 88729 : int limit = input_stack::nargs();
5307 177458 : string args;
5308 219652 : for (int i = 1; i <= limit; i++) {
5309 130923 : args += '"';
5310 130923 : args += char(BEGIN_QUOTE);
5311 130923 : input_iterator *p = input_stack::get_arg(i);
5312 : int c;
5313 1985039 : while ((c = p->get(0)) != EOF)
5314 1854116 : if (c != DOUBLE_QUOTE)
5315 1704904 : args += c;
5316 130923 : args += char(END_QUOTE);
5317 130923 : args += '"';
5318 130923 : if (i != limit)
5319 80995 : args += ' ';
5320 130923 : delete p;
5321 : }
5322 88729 : if (limit > 0) {
5323 49928 : args += '\0';
5324 49928 : input_stack::push(make_temp_iterator(args.contents()));
5325 88729 : }
5326 : }
5327 588 : else if (s[0] == '^' && s[1] == '\0') {
5328 0 : int limit = input_stack::nargs();
5329 0 : string args;
5330 0 : int c = input_stack::peek();
5331 0 : for (int i = 1; i <= limit; i++) {
5332 0 : input_iterator *p = input_stack::get_arg(i);
5333 0 : while ((c = p->get(0)) != EOF) {
5334 0 : if (c == DOUBLE_QUOTE)
5335 0 : c = '"';
5336 0 : args += c;
5337 : }
5338 0 : if (input_stack::space_follows_arg(i))
5339 0 : args += ' ';
5340 0 : delete p;
5341 : }
5342 0 : if (limit > 0) {
5343 0 : args += '\0';
5344 0 : input_stack::push(make_temp_iterator(args.contents()));
5345 0 : }
5346 : }
5347 : else {
5348 : const char *p;
5349 588 : bool is_valid = true;
5350 588 : bool is_printable = true;
5351 1764 : for (p = s; p != 0 /* nullptr */ && *p != '\0'; p++) {
5352 1176 : if (!csdigit(*p))
5353 0 : is_valid = false;
5354 1176 : if (!csprint(*p))
5355 0 : is_printable = false;
5356 : }
5357 588 : if (!is_valid) {
5358 : static const char msg[] = "invalid positional argument number in"
5359 : " copy mode";
5360 0 : if (is_printable)
5361 0 : copy_mode_error("%1 '%2'", msg, s);
5362 : else
5363 0 : copy_mode_error("%1 (unprintable)", msg);
5364 : }
5365 : else
5366 588 : input_stack::push(input_stack::get_arg(atoi(s)));
5367 : }
5368 2534324 : }
5369 :
5370 615 : void handle_first_page_transition()
5371 : {
5372 615 : push_token(tok);
5373 615 : topdiv->begin_page();
5374 615 : }
5375 :
5376 : // We push back a token by wrapping it up in a token_node, and
5377 : // wrapping that up in a string_iterator.
5378 :
5379 1657 : static void push_token(const token &t)
5380 : {
5381 1657 : macro m;
5382 1657 : m.append(new token_node(t));
5383 1657 : input_stack::push(new string_iterator(m));
5384 1657 : }
5385 :
5386 5087 : void push_page_ejector()
5387 : {
5388 : static char buf[2] = { PAGE_EJECTOR, '\0' };
5389 5087 : input_stack::push(make_temp_iterator(buf));
5390 5087 : }
5391 :
5392 125 : void handle_initial_request(unsigned char code)
5393 : {
5394 : char buf[2];
5395 125 : buf[0] = code;
5396 125 : buf[1] = '\0';
5397 250 : macro mac;
5398 125 : mac.append(new token_node(tok));
5399 125 : input_stack::push(new string_iterator(mac));
5400 125 : input_stack::push(make_temp_iterator(buf));
5401 125 : topdiv->begin_page();
5402 125 : tok.next();
5403 125 : }
5404 :
5405 125 : void handle_initial_title()
5406 : {
5407 125 : handle_initial_request(TITLE_REQUEST);
5408 125 : }
5409 :
5410 103757 : static void do_define_macro(define_mode mode, calling_mode calling,
5411 : comp_mode comp)
5412 : {
5413 103757 : symbol nm, term, dot_symbol(".");
5414 103757 : if (calling == CALLING_INDIRECT) {
5415 0 : symbol temp1 = read_identifier(true /* required */);
5416 0 : if (temp1.is_null()) {
5417 0 : skip_line();
5418 0 : return;
5419 : }
5420 0 : symbol temp2 = read_identifier();
5421 0 : input_stack::push(make_temp_iterator("\n"));
5422 0 : if (!temp2.is_null()) {
5423 0 : interpolate_string(temp2);
5424 0 : input_stack::push(make_temp_iterator(" "));
5425 : }
5426 0 : interpolate_string(temp1);
5427 0 : input_stack::push(make_temp_iterator(" "));
5428 0 : tok.next();
5429 : }
5430 103757 : if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND) {
5431 100178 : nm = read_identifier(true /* required */);
5432 100178 : if (nm.is_null()) {
5433 0 : skip_line();
5434 0 : return;
5435 : }
5436 : }
5437 103757 : term = read_identifier(); // terminating name
5438 103757 : if (term.is_null())
5439 88110 : term = dot_symbol;
5440 104345 : while (!tok.is_newline() && !tok.is_eof())
5441 588 : tok.next();
5442 : const char *start_filename;
5443 : int start_lineno;
5444 : bool have_start_location
5445 103757 : = input_stack::get_location(false /* allow_macro */,
5446 : &start_filename,
5447 : &start_lineno);
5448 : node *n;
5449 : // doing this here makes the line numbers come out right
5450 103757 : int c = read_char_in_copy_mode(&n, true /* is_defining */);
5451 207514 : macro mac;
5452 103757 : macro *mm = 0 /* nullptr */;
5453 103757 : if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND) {
5454 : request_or_macro *rm =
5455 100178 : static_cast<request_or_macro *>(request_dictionary.lookup(nm));
5456 100178 : if (rm != 0 /* nullptr */)
5457 4379 : mm = rm->to_macro();
5458 100178 : if (mm != 0 /* nullptr */ && mode == DEFINE_APPEND)
5459 93 : mac = *mm;
5460 : }
5461 103757 : bool reading_beginning_of_input_line = true;
5462 103757 : if (comp == COMP_DISABLE)
5463 10623 : mac.append(PUSH_GROFF_MODE);
5464 93134 : else if (comp == COMP_ENABLE)
5465 0 : mac.append(PUSH_COMP_MODE);
5466 : for (;;) {
5467 42760001 : if (c == '\n')
5468 1461321 : mac.clear_string_flag();
5469 43099083 : while (c == ESCAPE_NEWLINE) {
5470 339082 : if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND)
5471 : // TODO: grochar; may need NFD decomposition and UTF-8 encoding
5472 339059 : mac.append(static_cast<unsigned char>(c));
5473 339082 : c = read_char_in_copy_mode(&n, true /* is_defining */);
5474 : }
5475 42760001 : if (reading_beginning_of_input_line && (c == '.')) {
5476 1744869 : const char *s = term.contents();
5477 1744869 : int d = '\0';
5478 : // see if it matches term
5479 1744869 : int i = 0;
5480 1744869 : if (s[0] != '\0') {
5481 7704581 : while (((d = read_char_in_copy_mode(&n)) == ' ') || (d == '\t'))
5482 : ;
5483 1744869 : if (s[0] == d) {
5484 136551 : for (i = 1; s[i] != '\0'; i++) {
5485 32589 : d = read_char_in_copy_mode(&n);
5486 32589 : if (s[i] != d)
5487 964 : break;
5488 : }
5489 : }
5490 : }
5491 3489738 : if (s[i] == '\0'
5492 1848831 : && (((i == 2) && want_att_compat)
5493 103962 : || ((d = read_char_in_copy_mode(&n)) == ' ')
5494 103762 : || (d == '\n'))) { // we found it
5495 103757 : if (d == '\n')
5496 103557 : tok.make_newline();
5497 : else
5498 200 : tok.make_space();
5499 103757 : if (mode == DEFINE_APPEND || mode == DEFINE_NORMAL) {
5500 100178 : if (!mm) {
5501 95978 : mm = new macro;
5502 95978 : request_dictionary.define(nm, mm);
5503 : }
5504 100178 : if (comp == COMP_DISABLE || comp == COMP_ENABLE)
5505 10623 : mac.append(POP_GROFFCOMP_MODE);
5506 100178 : *mm = mac;
5507 : }
5508 103757 : if (term != dot_symbol) {
5509 15647 : want_input_ignored = false;
5510 15647 : interpolate_macro(term);
5511 : }
5512 : else
5513 88110 : skip_line();
5514 103757 : return;
5515 : }
5516 1641112 : if ((mode == DEFINE_APPEND) || (mode == DEFINE_NORMAL)) {
5517 : // TODO: grochar; may need NFD decomposition and UTF-8 encoding
5518 1623641 : mac.append(static_cast<unsigned char>(c));
5519 1624922 : for (int j = 0; j < i; j++)
5520 : // TODO: grochar; may need NFD decomposition & UTF-8 encoding
5521 1281 : mac.append(static_cast<unsigned char>(s[j]));
5522 : }
5523 1641112 : c = d;
5524 : }
5525 42656244 : if (c == EOF) {
5526 0 : if ((mode == DEFINE_APPEND) || (mode == DEFINE_NORMAL)) {
5527 0 : if (have_start_location)
5528 0 : error_with_file_and_line(start_filename, start_lineno,
5529 : "encountered end of file"
5530 : " while defining macro '%1'",
5531 0 : nm.contents());
5532 : else
5533 0 : error("end of file while defining macro '%1'", nm.contents());
5534 : }
5535 : else {
5536 : static const char msg[] = "encountered end of file while"
5537 : " ignoring input";
5538 0 : if (have_start_location)
5539 0 : error_with_file_and_line(start_filename, start_lineno, msg);
5540 : else
5541 0 : error(msg);
5542 : }
5543 0 : tok.next();
5544 0 : return;
5545 : }
5546 42656244 : if ((mode == DEFINE_NORMAL) || (mode == DEFINE_APPEND)) {
5547 41529517 : if (c == '\0')
5548 0 : mac.append(n);
5549 : else
5550 : // TODO: grochar; may need NFD decomposition and UTF-8 encoding
5551 41529517 : mac.append(static_cast<unsigned char>(c));
5552 : }
5553 42656244 : reading_beginning_of_input_line = (c == '\n');
5554 42656244 : c = read_char_in_copy_mode(&n, true /* is_defining */);
5555 42656244 : }
5556 : }
5557 :
5558 89447 : static void define_macro()
5559 : {
5560 89447 : do_define_macro(DEFINE_NORMAL, CALLING_NORMAL,
5561 : want_att_compat ? COMP_ENABLE : COMP_IGNORE);
5562 89447 : }
5563 :
5564 10623 : static void define_nocomp_macro()
5565 : {
5566 10623 : do_define_macro(DEFINE_NORMAL, CALLING_NORMAL, COMP_DISABLE);
5567 10623 : }
5568 :
5569 0 : static void define_indirect_macro()
5570 : {
5571 0 : do_define_macro(DEFINE_NORMAL, CALLING_INDIRECT,
5572 : want_att_compat ? COMP_ENABLE : COMP_IGNORE);
5573 0 : }
5574 :
5575 0 : static void define_indirect_nocomp_macro()
5576 : {
5577 0 : do_define_macro(DEFINE_NORMAL, CALLING_INDIRECT, COMP_DISABLE);
5578 0 : }
5579 :
5580 108 : static void append_macro()
5581 : {
5582 108 : do_define_macro(DEFINE_APPEND, CALLING_NORMAL,
5583 : want_att_compat ? COMP_ENABLE : COMP_IGNORE);
5584 108 : }
5585 :
5586 0 : static void append_nocomp_macro()
5587 : {
5588 0 : do_define_macro(DEFINE_APPEND, CALLING_NORMAL, COMP_DISABLE);
5589 0 : }
5590 :
5591 0 : static void append_indirect_macro()
5592 : {
5593 0 : do_define_macro(DEFINE_APPEND, CALLING_INDIRECT,
5594 : want_att_compat ? COMP_ENABLE : COMP_IGNORE);
5595 0 : }
5596 :
5597 0 : static void append_indirect_nocomp_macro()
5598 : {
5599 0 : do_define_macro(DEFINE_APPEND, CALLING_INDIRECT, COMP_DISABLE);
5600 0 : }
5601 :
5602 3579 : void ignore()
5603 : {
5604 3579 : want_input_ignored = true;
5605 3579 : do_define_macro(DEFINE_IGNORE, CALLING_NORMAL, COMP_IGNORE);
5606 3579 : want_input_ignored = false;
5607 3579 : }
5608 :
5609 136468 : void remove_macro()
5610 : {
5611 136468 : if (!has_arg()) {
5612 0 : warning(WARN_MISSING, "name removal request expects arguments");
5613 0 : skip_line();
5614 0 : return;
5615 : }
5616 : for (;;) {
5617 295920 : symbol s = read_identifier();
5618 295920 : if (s.is_null())
5619 136468 : break;
5620 159452 : request_dictionary.remove(s);
5621 159452 : }
5622 136468 : skip_line();
5623 : }
5624 :
5625 716 : void rename_macro()
5626 : {
5627 716 : if (!has_arg()) {
5628 0 : warning(WARN_MISSING, "renaming request expects arguments");
5629 0 : skip_line();
5630 0 : return;
5631 : }
5632 716 : symbol s1 = read_identifier();
5633 716 : assert(s1 != 0 /* nullptr */);
5634 716 : if (!s1.is_null()) {
5635 716 : symbol s2 = read_identifier();
5636 716 : if (s2.is_null())
5637 0 : warning(WARN_MISSING, "renaming request expects identifier of"
5638 : " existing request, macro, string, or diversion as"
5639 : " second argument");
5640 : else
5641 716 : request_dictionary.rename(s1, s2);
5642 : }
5643 716 : skip_line();
5644 : }
5645 :
5646 44745 : void alias_macro()
5647 : {
5648 44745 : if (!has_arg()) {
5649 0 : warning(WARN_MISSING, "name aliasing request expects arguments");
5650 0 : skip_line();
5651 0 : return;
5652 : }
5653 44745 : symbol s1 = read_identifier();
5654 44745 : assert(s1 != 0 /* nullptr */);
5655 44745 : if (!s1.is_null()) {
5656 44745 : symbol s2 = read_identifier();
5657 44745 : if (s2.is_null())
5658 0 : warning(WARN_MISSING, "name aliasing request expects identifier"
5659 : " of existing request, macro, string, or diversion as"
5660 : " second argument");
5661 : else {
5662 44745 : if (!request_dictionary.alias(s1, s2))
5663 0 : error("cannot alias undefined name '%1'", s2.contents());
5664 : }
5665 : }
5666 44745 : skip_line();
5667 : }
5668 :
5669 1006 : void chop_macro()
5670 : {
5671 1006 : if (!has_arg()) {
5672 0 : warning(WARN_MISSING, "chop request expects an argument");
5673 0 : skip_line();
5674 0 : return;
5675 : }
5676 1006 : symbol s = read_identifier();
5677 1006 : assert(s != 0 /* nullptr */);
5678 1006 : if (!s.is_null()) {
5679 1006 : request_or_macro *p = lookup_request(s);
5680 1006 : macro *m = p->to_macro();
5681 1006 : if (0 /* nullptr */ == m)
5682 0 : error("cannot chop request '%1'", s.contents());
5683 1006 : else if (m->is_empty())
5684 0 : error("cannot chop empty %1 '%2'",
5685 0 : (m->is_diversion() ? "diversion" : "macro or string"),
5686 0 : s.contents());
5687 : else {
5688 1006 : int have_restore = 0;
5689 : // We have to check for additional save/restore pairs which could
5690 : // be there due to empty am1 requests.
5691 : for (;;) {
5692 1006 : if (m->get(m->len - 1) != POP_GROFFCOMP_MODE)
5693 1006 : break;
5694 0 : have_restore = 1;
5695 0 : m->len -= 1;
5696 0 : if (m->get(m->len - 1) != PUSH_GROFF_MODE
5697 0 : && m->get(m->len - 1) != PUSH_COMP_MODE)
5698 0 : break;
5699 0 : have_restore = 0;
5700 0 : m->len -= 1;
5701 0 : if (m->len == 0)
5702 0 : break;
5703 : }
5704 1006 : if (m->len == 0)
5705 0 : error("cannot chop empty object '%1'", s.contents());
5706 : else {
5707 1006 : if (have_restore)
5708 0 : m->set(POP_GROFFCOMP_MODE, m->len - 1);
5709 : else
5710 1006 : m->len -= 1;
5711 : }
5712 : }
5713 : }
5714 1006 : skip_line();
5715 : }
5716 :
5717 : enum case_xform_mode { STRING_UPCASE, STRING_DOWNCASE };
5718 :
5719 : // Case-transform each byte of the string argument's contents.
5720 201 : void do_string_case_transform(case_xform_mode mode)
5721 : {
5722 201 : assert((mode == STRING_DOWNCASE) || (mode == STRING_UPCASE));
5723 201 : symbol s = read_identifier();
5724 201 : assert(s != 0 /* nullptr */);
5725 201 : if (s.is_null()) {
5726 0 : skip_line();
5727 1 : return;
5728 : }
5729 201 : request_or_macro *p = lookup_request(s);
5730 201 : macro *m = p->to_macro();
5731 201 : if (0 /* nullptr */ == m) {
5732 1 : error("cannot apply string case transformation to request '%1'",
5733 1 : s.contents());
5734 1 : skip_line();
5735 1 : return;
5736 : }
5737 400 : string_iterator iter1(*m);
5738 200 : macro *mac = new macro;
5739 200 : int len = m->macro::length();
5740 1975 : for (int l = 0; l < len; l++) {
5741 1775 : int nc, c = iter1.get(0);
5742 1775 : if (c == PUSH_GROFF_MODE
5743 1775 : || c == PUSH_COMP_MODE
5744 1775 : || c == POP_GROFFCOMP_MODE)
5745 0 : nc = c;
5746 1775 : else if (c == EOF)
5747 0 : break;
5748 : else
5749 1775 : if (mode == STRING_DOWNCASE)
5750 1720 : nc = cmlower(c);
5751 : else
5752 55 : nc = cmupper(c);
5753 1775 : mac->append(nc);
5754 : }
5755 200 : request_dictionary.define(s, mac);
5756 200 : tok.next();
5757 : }
5758 :
5759 : // Uppercase-transform each byte of the string argument's contents.
5760 195 : void stringdown_request() {
5761 195 : if (!has_arg()) {
5762 0 : warning(WARN_MISSING, "string downcasing request expects an"
5763 : " argument");
5764 0 : skip_line();
5765 0 : return;
5766 : }
5767 195 : do_string_case_transform(STRING_DOWNCASE);
5768 : }
5769 :
5770 : // Lowercase-transform each byte of the string argument's contents.
5771 6 : void stringup_request() {
5772 6 : if (!has_arg()) {
5773 0 : warning(WARN_MISSING, "string upcasing request expects an"
5774 : " argument");
5775 0 : skip_line();
5776 0 : return;
5777 : }
5778 6 : do_string_case_transform(STRING_UPCASE);
5779 : }
5780 :
5781 173416 : void substring_request()
5782 : {
5783 173416 : if (!has_arg()) {
5784 0 : warning(WARN_MISSING, "substring request expects arguments");
5785 0 : skip_line();
5786 0 : return;
5787 : }
5788 : int start; // 0, 1, ..., n-1 or -1, -2, ...
5789 173416 : symbol s = read_identifier();
5790 173416 : assert(s != 0 /* nullptr */);
5791 173416 : if (!s.is_null() && read_integer(&start)) {
5792 173416 : request_or_macro *p = lookup_request(s);
5793 173416 : macro *m = p->to_macro();
5794 173416 : if (0 /* nullptr */ == m)
5795 0 : error("cannot extract substring of request '%1'", s.contents());
5796 : else {
5797 173416 : int end = -1;
5798 173416 : if (!has_arg() || read_integer(&end)) {
5799 173416 : int real_length = 0; // 1, 2, ..., n
5800 173416 : string_iterator iter1(*m);
5801 14598352 : for (int l = 0; l < m->len; l++) {
5802 14424936 : int c = iter1.get(0);
5803 14424936 : if (c == PUSH_GROFF_MODE
5804 14424936 : || c == PUSH_COMP_MODE
5805 14424936 : || c == POP_GROFFCOMP_MODE)
5806 0 : continue;
5807 14424936 : if (c == EOF)
5808 0 : break;
5809 14424936 : real_length++;
5810 : }
5811 173416 : if (start < 0)
5812 1450 : start += real_length;
5813 173416 : if (end < 0)
5814 22070 : end += real_length;
5815 173416 : if (start > end) {
5816 81 : int tem = start;
5817 81 : start = end;
5818 81 : end = tem;
5819 : }
5820 173416 : if (start >= real_length || end < 0) {
5821 0 : warning(WARN_RANGE,
5822 : "start and end index of substring out of range");
5823 0 : m->len = 0;
5824 0 : if (m->p) {
5825 0 : if (--(m->p->count) <= 0)
5826 0 : delete m->p;
5827 0 : m->p = 0;
5828 : }
5829 0 : skip_line();
5830 0 : return;
5831 : }
5832 173416 : if (start < 0) {
5833 3 : warning(WARN_RANGE,
5834 : "start index of substring out of range, set to 0");
5835 3 : start = 0;
5836 : }
5837 173416 : if (end >= real_length) {
5838 7 : warning(WARN_RANGE,
5839 : "end index of substring out of range, set to string length");
5840 7 : end = real_length - 1;
5841 : }
5842 : // now extract the substring
5843 346832 : string_iterator iter(*m);
5844 : int i;
5845 6951012 : for (i = 0; i < start; i++) {
5846 6777596 : int c = iter.get(0 /* nullptr */);
5847 0 : while (c == PUSH_GROFF_MODE
5848 6777596 : || c == PUSH_COMP_MODE
5849 13555192 : || c == POP_GROFFCOMP_MODE)
5850 0 : c = iter.get(0 /* nullptr */);
5851 6777596 : if (c == EOF)
5852 0 : break;
5853 : }
5854 346832 : macro mac;
5855 656154 : for (; i <= end; i++) {
5856 482738 : node *nd = 0 /* nullptr */;
5857 482738 : int c = iter.get(&nd);
5858 0 : while (c == PUSH_GROFF_MODE
5859 482738 : || c == PUSH_COMP_MODE
5860 965476 : || c == POP_GROFFCOMP_MODE)
5861 0 : c = iter.get(0 /* nullptr */);
5862 482738 : if (c == EOF)
5863 0 : break;
5864 482738 : if (c == 0)
5865 0 : mac.append(nd);
5866 : else
5867 482738 : mac.append((unsigned char) c);
5868 : }
5869 173416 : *m = mac;
5870 : }
5871 : }
5872 : }
5873 173416 : skip_line();
5874 : }
5875 :
5876 143341 : void length_request()
5877 : {
5878 143341 : if (!has_arg()) {
5879 0 : warning(WARN_MISSING, "length computation request expects"
5880 : " arguments");
5881 0 : skip_line();
5882 0 : return;
5883 : }
5884 143341 : symbol ret;
5885 143341 : ret = read_identifier();
5886 143341 : if (ret.is_null()) {
5887 : // The identifier was garbage, like `a\&b`.
5888 0 : skip_line();
5889 0 : return;
5890 : }
5891 : int c;
5892 : node *n;
5893 143341 : if (tok.is_newline())
5894 0 : c = '\n';
5895 143341 : else if (tok.is_tab())
5896 0 : c = '\t';
5897 143341 : else if (!tok.is_space()) {
5898 0 : skip_line();
5899 0 : return;
5900 : }
5901 : else
5902 143341 : c = read_char_in_copy_mode(&n);
5903 143539 : while (c == ' ')
5904 198 : c = read_char_in_copy_mode(&n);
5905 143341 : if (c == '"')
5906 10737 : c = read_char_in_copy_mode(&n);
5907 143341 : int len = 0;
5908 994831 : while (c != '\n' && c != EOF) {
5909 851490 : ++len;
5910 851490 : c = read_char_in_copy_mode(&n);
5911 : }
5912 143341 : reg *r = static_cast<reg *>(register_dictionary.lookup(ret));
5913 143341 : if (r != 0 /* nullptr */)
5914 137971 : r->set_value(len);
5915 : else
5916 5370 : set_register(ret, len);
5917 143341 : tok.next();
5918 : }
5919 :
5920 4 : static void asciify_request()
5921 : {
5922 4 : if (!has_arg()) {
5923 0 : warning(WARN_MISSING, "diversion asciification request expects a"
5924 : " diversion identifier as argument");
5925 0 : skip_line();
5926 0 : return;
5927 : }
5928 4 : symbol s = read_identifier();
5929 4 : if (!s.is_null()) {
5930 4 : request_or_macro *p = lookup_request(s);
5931 4 : macro *m = p->to_macro();
5932 4 : if (0 /* nullptr */ == m)
5933 0 : error("cannot asciify request '%1'", s.contents());
5934 : else {
5935 8 : macro am;
5936 8 : string_iterator iter(*m);
5937 : for (;;) {
5938 178 : node *nd = 0 /* nullptr */;
5939 178 : int c = iter.get(&nd);
5940 178 : if (c == EOF)
5941 4 : break;
5942 174 : if (c != 0)
5943 8 : am.append(c);
5944 : else {
5945 166 : node *newnd = nd->copy();
5946 166 : newnd->asciify(&am);
5947 166 : delete nd;
5948 : }
5949 174 : }
5950 4 : *m = am;
5951 : }
5952 : }
5953 4 : skip_line();
5954 : }
5955 :
5956 1920 : void unformat_macro()
5957 : {
5958 1920 : if (!has_arg()) {
5959 0 : warning(WARN_MISSING, "diversion unformatting request expects a"
5960 : " diversion identifier as argument");
5961 0 : skip_line();
5962 0 : return;
5963 : }
5964 1920 : symbol s = read_identifier();
5965 1920 : if (!s.is_null()) {
5966 1920 : request_or_macro *p = lookup_request(s);
5967 1920 : macro *m = p->to_macro();
5968 1920 : if (0 /* nullptr */ == m)
5969 0 : error("cannot unformat request '%1'", s.contents());
5970 : else {
5971 3840 : macro am;
5972 3840 : string_iterator iter(*m);
5973 : for (;;) {
5974 55861 : node *nd = 0 /* nullptr */;
5975 55861 : int c = iter.get(&nd);
5976 55861 : if (c == EOF)
5977 1920 : break;
5978 53941 : if (c != 0)
5979 1649 : am.append(c);
5980 : else {
5981 52292 : if (nd->set_unformat_flag())
5982 48250 : am.append(nd);
5983 : }
5984 53941 : }
5985 1920 : *m = am;
5986 : }
5987 : }
5988 1920 : skip_line();
5989 : }
5990 :
5991 3 : static void interpolate_environment_variable(symbol nm)
5992 : {
5993 3 : const char *s = getenv(nm.contents());
5994 3 : if ((s != 0 /* nullptr */) && (*s != 0 /* nullptr */))
5995 1 : input_stack::push(make_temp_iterator(s));
5996 3 : }
5997 :
5998 8672390 : void interpolate_register(symbol nm, int inc)
5999 : {
6000 8672390 : reg *r = look_up_register(nm);
6001 8672390 : assert(r != 0 /* nullptr */);
6002 8672390 : if (inc < 0)
6003 98022 : r->decrement();
6004 8574368 : else if (inc > 0)
6005 476085 : r->increment();
6006 8672390 : input_stack::push(make_temp_iterator(r->get_string()));
6007 8672390 : }
6008 :
6009 626 : static void interpolate_number_format(symbol nm)
6010 : {
6011 626 : reg *r = static_cast<reg *>(register_dictionary.lookup(nm));
6012 626 : if (r != 0 /* nullptr */)
6013 623 : input_stack::push(make_temp_iterator(r->get_format()));
6014 626 : }
6015 :
6016 615 : static bool read_delimited_measurement(units *n,
6017 : unsigned char si,
6018 : units prev_value)
6019 : {
6020 1230 : token start_token;
6021 615 : start_token.next();
6022 615 : if (start_token.is_eof()) {
6023 0 : error("end of input at start of delimited numeric expression");
6024 0 : return false;
6025 : }
6026 615 : bool is_valid = false;
6027 615 : if (!want_att_compat && start_token.is_usable_as_delimiter())
6028 615 : is_valid = true;
6029 0 : else if (want_att_compat
6030 0 : && start_token.is_usable_as_delimiter(false,
6031 : DELIMITER_ATT_NUMERIC_EXPRESSION))
6032 0 : is_valid = true;
6033 615 : if (!is_valid) {
6034 0 : warning(WARN_DELIM, "cannot use %1 to delimit a numeric expression",
6035 0 : start_token.description());
6036 0 : return false;
6037 : }
6038 615 : tok.next();
6039 615 : if (read_measurement(n, si, prev_value)) {
6040 615 : if (start_token != tok) {
6041 : // token::description() writes to static, class-wide storage, so
6042 : // we must allocate a copy of it before issuing the next
6043 : // diagnostic.
6044 0 : char *delimdesc = strdup(start_token.description());
6045 0 : warning(WARN_DELIM, "closing delimiter does not match;"
6046 0 : " expected %1, got %2", delimdesc, tok.description());
6047 0 : free(delimdesc);
6048 : }
6049 615 : return true;
6050 : }
6051 0 : return false;
6052 : }
6053 :
6054 : // TODO: Merge into other `read_delimited_measurement()`, using default
6055 : // argument of 0 for `prev_value`.
6056 272624 : static bool read_delimited_measurement(units *n, unsigned char si)
6057 : {
6058 545248 : token start_token;
6059 272624 : start_token.next();
6060 272624 : bool is_valid = false;
6061 272624 : if (!want_att_compat && start_token.is_usable_as_delimiter())
6062 272531 : is_valid = true;
6063 93 : else if (want_att_compat
6064 93 : && start_token.is_usable_as_delimiter(false,
6065 : DELIMITER_ATT_NUMERIC_EXPRESSION))
6066 80 : is_valid = true;
6067 272624 : if (!is_valid) {
6068 13 : warning(WARN_DELIM, "cannot use %1 to delimit a numeric expression",
6069 13 : start_token.description());
6070 13 : return false;
6071 : }
6072 272611 : tok.next();
6073 272611 : if (read_measurement(n, si)) {
6074 272611 : if (start_token != tok) {
6075 : // token::description() writes to static, class-wide storage, so
6076 : // we must allocate a copy of it before issuing the next
6077 : // diagnostic.
6078 0 : char *delimdesc = strdup(start_token.description());
6079 0 : warning(WARN_DELIM, "closing delimiter does not match;"
6080 0 : " expected %1, got %2", delimdesc, tok.description());
6081 0 : free(delimdesc);
6082 : }
6083 272611 : return true;
6084 : }
6085 0 : return false;
6086 : }
6087 :
6088 : // \l, \L
6089 : //
6090 : // Here's some syntax unique to these escape sequences: a horizontal
6091 : // measurment followed immediately by a character.
6092 242 : static bool read_line_rule_expression(units *n, unsigned char si,
6093 : charinfo **cip)
6094 : {
6095 242 : assert(cip != 0 /* nullptr */);
6096 484 : token start_token;
6097 242 : start_token.next();
6098 484 : if (!want_att_compat
6099 242 : && !start_token.is_usable_as_delimiter(true /* report error */))
6100 30 : return false;
6101 212 : else if (want_att_compat
6102 212 : && !start_token.is_usable_as_delimiter(true,
6103 : DELIMITER_ATT_NUMERIC_EXPRESSION)) {
6104 0 : warning(WARN_DELIM, "line-drawing escape sequence"
6105 : " does not accept %1 as a delimiter",
6106 0 : start_token.description());
6107 0 : return false;
6108 : }
6109 212 : int start_level = input_stack::get_level();
6110 212 : tok.next();
6111 212 : if (read_measurement(n, si)) {
6112 212 : if (tok.is_dummy() || tok.is_transparent_dummy())
6113 149 : tok.next();
6114 256 : if (!(start_token == tok
6115 44 : && input_stack::get_level() == start_level)) {
6116 168 : *cip = tok.get_charinfo(true /* required */);
6117 168 : if (0 /* nullptr */ == *cip)
6118 0 : assert(0 == "attempted to use token without charinfo in"
6119 : " line-drawing escape sequence");
6120 168 : tok.next();
6121 : }
6122 424 : if (!(start_token == tok
6123 212 : && input_stack::get_level() == start_level)) {
6124 : // token::description() writes to static, class-wide storage, so
6125 : // we must allocate a copy of it before issuing the next
6126 : // diagnostic.
6127 0 : char *delimdesc = strdup(start_token.description());
6128 0 : warning(WARN_DELIM, "closing delimiter does not match; expected"
6129 0 : " %1, got %2", delimdesc, tok.description());
6130 0 : free(delimdesc);
6131 : }
6132 212 : return true;
6133 : }
6134 0 : return false;
6135 : }
6136 :
6137 5972 : static bool read_size(int *x) // \s
6138 : {
6139 5972 : tok.next();
6140 5972 : int c = tok.ch(); // safely compares to char literals; TODO: grochar
6141 5972 : int inc = 0;
6142 5972 : if (c == int('-')) { // TODO: grochar
6143 209 : inc = -1;
6144 209 : tok.next();
6145 209 : c = tok.ch();
6146 : }
6147 5763 : else if (c == int('+')) { // TODO: grochar
6148 387 : inc = 1;
6149 387 : tok.next();
6150 387 : c = tok.ch();
6151 : }
6152 5972 : int val = 0; // pacify compiler
6153 5972 : bool contains_invalid_digit = false;
6154 5972 : if (c == int('(')) { // TODO: grochar
6155 0 : tok.next();
6156 0 : c = tok.ch();
6157 0 : if (!inc) {
6158 : // allow an increment either before or after the left parenthesis
6159 0 : if (c == int('-')) { // TODO: grochar
6160 0 : inc = -1;
6161 0 : tok.next();
6162 0 : c = tok.ch();
6163 : }
6164 0 : else if (c == int('+')) { // TODO: grochar
6165 0 : inc = 1;
6166 0 : tok.next();
6167 0 : c = tok.ch();
6168 : }
6169 : }
6170 0 : if (!csdigit(c))
6171 0 : contains_invalid_digit = true;
6172 : else {
6173 0 : val = c - '0';
6174 0 : tok.next();
6175 0 : c = tok.ch();
6176 0 : if (!csdigit(c))
6177 0 : contains_invalid_digit = true;
6178 : else {
6179 0 : val = val * 10 + (c - '0');
6180 0 : val *= sizescale;
6181 : }
6182 : }
6183 : }
6184 5972 : else if (csdigit(c)) {
6185 1636 : val = c - '0';
6186 1636 : if (want_att_compat && !inc && c != '0' && c < '4') {
6187 : // Support legacy \sNN syntax.
6188 2 : tok.next();
6189 2 : c = tok.ch();
6190 2 : if (!csdigit(c))
6191 0 : contains_invalid_digit = true;
6192 : else {
6193 2 : val = val * 10 + (c - '0');
6194 2 : error("ambiguous type size in escape sequence; rewrite to use"
6195 2 : " '%1s(%2' or similar", static_cast<char>(escape_char),
6196 4 : val);
6197 : }
6198 : }
6199 1636 : val *= sizescale;
6200 : }
6201 4336 : else if (!want_att_compat && !tok.is_usable_as_delimiter())
6202 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
6203 0 : " is deprecated", tok.description());
6204 4336 : else if (want_att_compat
6205 4336 : && !tok.is_usable_as_delimiter(false,
6206 : DELIMITER_ATT_STRING_EXPRESSION)) {
6207 0 : warning(WARN_DELIM, "type size escape sequence"
6208 : " does not accept %1 as a delimiter",
6209 0 : tok.description());
6210 0 : return false;
6211 : }
6212 : // TODO: groff 1.24.0 release + 2 years?
6213 : #if 0
6214 : else if (!tok.is_usable_as_delimiter(true /* report error */))
6215 : return false;
6216 : #endif
6217 : else {
6218 4336 : token start(tok);
6219 4336 : tok.next();
6220 4336 : c = tok.ch();
6221 4336 : if ((inc == 0) && ((c == '-') || (c == '+'))) {
6222 471 : inc = (c == '+') ? 1 : -1;
6223 471 : tok.next();
6224 : }
6225 4336 : if (!read_measurement(&val, (unsigned char)('z'))) // TODO: grochar
6226 0 : return false;
6227 : // safely compares to char literals; TODO: grochar
6228 4336 : int s = start.ch();
6229 4336 : int t = tok.ch();
6230 4336 : if (!((s == int('[')) && (t == int(']'))) && (start != tok)) {
6231 0 : if (s == int('['))
6232 0 : error("missing ']' in type size escape sequence");
6233 : else {
6234 : // token::description() writes to static, class-wide storage, so
6235 : // we must allocate a copy of it before issuing the next
6236 : // diagnostic.
6237 0 : char *delimdesc = strdup(start.description());
6238 0 : if (s != t)
6239 0 : error("closing delimiter does not match; expected %1, got %2",
6240 0 : delimdesc, tok.description());
6241 0 : free(delimdesc);
6242 : }
6243 0 : return false;
6244 : }
6245 : }
6246 5972 : if (contains_invalid_digit) {
6247 0 : if (c != 0U)
6248 0 : error("expected valid digit in type size escape sequence, got %1",
6249 0 : input_char_description(c));
6250 : else
6251 0 : error("invalid digit in type size escape sequence");
6252 0 : return false;
6253 : }
6254 : else {
6255 5972 : switch (inc) {
6256 4905 : case 0:
6257 4905 : if (val == 0) {
6258 : // special case -- point size 0 means "revert to previous size"
6259 1794 : *x = 0;
6260 1794 : return true;
6261 : }
6262 3111 : *x = val;
6263 3111 : break;
6264 480 : case 1:
6265 480 : *x = curenv->get_requested_point_size() + val;
6266 480 : break;
6267 587 : case -1:
6268 587 : *x = curenv->get_requested_point_size() - val;
6269 587 : break;
6270 0 : default:
6271 0 : assert(0 == "unhandled case of type size increment operator");
6272 : }
6273 4178 : if (*x <= 0) {
6274 1 : warning(WARN_RANGE,
6275 : "type size escape sequence results in non-positive size"
6276 1 : " %1u; setting it to 1u", *x);
6277 1 : *x = 1;
6278 : }
6279 4178 : return true;
6280 : }
6281 : }
6282 :
6283 53 : static symbol read_delimited_identifier()
6284 : {
6285 106 : token start_token;
6286 53 : start_token.next();
6287 53 : if (start_token.is_eof()) {
6288 0 : error("end of input at start of delimited name");
6289 0 : return NULL_SYMBOL;
6290 : }
6291 53 : bool is_valid = false;
6292 53 : if (!want_att_compat && start_token.is_usable_as_delimiter())
6293 52 : is_valid = true;
6294 1 : else if (want_att_compat
6295 1 : && start_token.is_usable_as_delimiter(false,
6296 : DELIMITER_ATT_STRING_EXPRESSION))
6297 0 : is_valid = true;
6298 53 : if (!is_valid) {
6299 1 : warning(WARN_DELIM, "cannot use %1 to delimit an identifier",
6300 1 : start_token.description());
6301 1 : return NULL_SYMBOL;
6302 : }
6303 52 : int start_level = input_stack::get_level();
6304 52 : int buf_size = default_buffer_size;
6305 52 : char *buf = 0 /* nullptr */;
6306 : try {
6307 : // C++03: new char[buf_size]();
6308 52 : buf = new char[buf_size];
6309 : }
6310 0 : catch (const std::bad_alloc &e) {
6311 0 : fatal("cannot allocate %1 bytes to read input line", buf_size);
6312 : }
6313 52 : (void) memset(buf, 0, (buf_size * sizeof(char)));
6314 52 : int i = 0;
6315 : for (;;) {
6316 682 : if ((i + 1) > buf_size) {
6317 0 : char *old_buf = buf;
6318 0 : int new_buf_size = buf_size * 2;
6319 : // C++03: new char[new_buf_size]();
6320 : try {
6321 0 : buf = new char[new_buf_size];
6322 : }
6323 0 : catch (const std::bad_alloc &e) {
6324 0 : fatal("cannot allocate %1 bytes to read input line", buf_size);
6325 : }
6326 0 : (void) memset(buf, 0, (new_buf_size * sizeof(char)));
6327 0 : (void) memcpy(buf, old_buf, buf_size);
6328 0 : buf_size = new_buf_size;
6329 0 : delete[] old_buf;
6330 : }
6331 682 : tok.next();
6332 682 : if ((tok == start_token)
6333 734 : && (want_att_compat
6334 52 : || (input_stack::get_level() == start_level)))
6335 52 : break;
6336 630 : if ((buf[i] = tok.ch()) == 0U) {
6337 : // token::description() writes to static, class-wide storage, so
6338 : // we must allocate a copy of it before issuing the next
6339 : // diagnostic.
6340 0 : char *delimdesc = strdup(start_token.description());
6341 0 : if (start_token != tok)
6342 0 : error("closing delimiter does not match; expected %1, got %2",
6343 0 : delimdesc, tok.description());
6344 0 : free(delimdesc);
6345 0 : delete[] buf;
6346 0 : return NULL_SYMBOL;
6347 : }
6348 630 : i++;
6349 630 : }
6350 52 : buf[i] = '\0';
6351 52 : if (0 == i) {
6352 0 : error("empty delimited name");
6353 0 : return NULL_SYMBOL;
6354 : }
6355 52 : symbol s(buf);
6356 52 : delete[] buf;
6357 52 : return s;
6358 : }
6359 :
6360 6142 : static void do_register() // \R
6361 : {
6362 6142 : token start_token;
6363 6142 : start_token.next();
6364 6142 : if (!want_att_compat && !start_token.is_usable_as_delimiter())
6365 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
6366 0 : " is deprecated", tok.description());
6367 6142 : else if (want_att_compat
6368 6142 : && !start_token.is_usable_as_delimiter(false,
6369 : DELIMITER_ATT_STRING_EXPRESSION)) {
6370 0 : warning(WARN_DELIM, "register assignment escape sequence"
6371 : " does not accept %1 as a delimiter",
6372 0 : start_token.description());
6373 0 : return;
6374 : }
6375 : // TODO: groff 1.24.0 release + 2 years?
6376 : #if 0
6377 : if (!start_token.is_usable_as_delimiter(true /* report error */)) {
6378 : return;
6379 : #endif
6380 6142 : tok.next();
6381 6142 : symbol nm = read_long_identifier(true /* required */);
6382 6142 : if (nm.is_null())
6383 0 : return;
6384 6142 : tok.skip_spaces();
6385 6142 : reg *r = static_cast<reg *>(register_dictionary.lookup(nm));
6386 : int prev_value;
6387 6142 : if ((0 /* nullptr */ == r) || !r->get_value(&prev_value))
6388 176 : prev_value = 0;
6389 : int val;
6390 : // TODO: grochar
6391 6142 : if (!read_measurement(&val, (unsigned char)('u'), prev_value))
6392 0 : return;
6393 : // token::description() writes to static, class-wide storage, so we
6394 : // must allocate a copy of it before issuing the next diagnostic.
6395 6142 : char *delimdesc = strdup(start_token.description());
6396 6142 : if (start_token != tok)
6397 0 : warning(WARN_DELIM, "closing delimiter does not match; expected %1,"
6398 0 : " got %2", delimdesc, tok.description());
6399 6142 : free(delimdesc);
6400 6142 : if (r != 0 /* nullptr */)
6401 5966 : r->set_value(val);
6402 : else
6403 176 : set_register(nm, val);
6404 : }
6405 :
6406 26009 : static void do_width() // \w
6407 : {
6408 26009 : token start_token;
6409 26009 : start_token.next();
6410 26009 : if (!want_att_compat && !start_token.is_usable_as_delimiter())
6411 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
6412 0 : " is deprecated", start_token.description());
6413 26009 : else if (want_att_compat
6414 26009 : && !start_token.is_usable_as_delimiter(false,
6415 : DELIMITER_ATT_STRING_EXPRESSION)) {
6416 0 : warning(WARN_DELIM, "width computation escape sequence"
6417 : " does not accept %1 as a delimiter",
6418 0 : start_token.description());
6419 0 : return;
6420 : }
6421 : // TODO: groff 1.24.0 release + 2 years?
6422 : #if 0
6423 : if (!start_token.is_usable_as_delimiter(true /* report error */))
6424 : return;
6425 : #endif
6426 26009 : int start_level = input_stack::get_level();
6427 26009 : environment env(curenv);
6428 26009 : environment *oldenv = curenv;
6429 26009 : curenv = &env;
6430 : for (;;) {
6431 244007 : tok.next();
6432 244007 : if (tok.is_newline() || tok.is_eof()) {
6433 : // token::description() writes to static, class-wide storage, so
6434 : // we must allocate a copy of it before issuing the next
6435 : // diagnostic.
6436 0 : char *delimdesc = strdup(start_token.description());
6437 0 : warning(WARN_DELIM, "missing closing delimiter in width"
6438 : " computation escape sequence; expected %1, got %2",
6439 0 : delimdesc, tok.description());
6440 0 : free(delimdesc);
6441 0 : break;
6442 : }
6443 244007 : if (tok == start_token
6444 244007 : && (want_att_compat || input_stack::get_level() == start_level))
6445 26009 : break;
6446 217998 : tok.process();
6447 217998 : }
6448 26009 : env.wrap_up_tab();
6449 26009 : units x = env.get_input_line_position().to_units();
6450 26009 : input_stack::push(make_temp_iterator(i_to_a(x)));
6451 26009 : env.width_registers();
6452 26009 : curenv = oldenv;
6453 26009 : have_formattable_input = false;
6454 : }
6455 :
6456 : charinfo *page_character;
6457 :
6458 : // XXX: The page character is global; shouldn't it be environmental?
6459 : // Its idiomatic use is in `tl` requests when formatting titles (headers
6460 : // or footers), which full-service macro packages typically put in their
6461 : // own environment anyway to ensure that a consistent typeface is used
6462 : // there regardless of how body text is styled.
6463 0 : static void page_character_request()
6464 : {
6465 0 : page_character = read_character();
6466 : // TODO?: If null pointer, set to `percent_symbol` (see below),
6467 : // eliminating test in `read_title_parts()` (also below)?
6468 0 : skip_line();
6469 0 : }
6470 :
6471 : static const symbol percent_symbol("%");
6472 :
6473 3298 : void read_title_parts(node **part, hunits *part_width)
6474 : {
6475 3298 : if (!has_arg())
6476 188 : return;
6477 6220 : token start(tok);
6478 3110 : if (!want_att_compat && !tok.is_usable_as_delimiter())
6479 0 : warning(WARN_DELIM, "using %1 as a title request delimiter"
6480 0 : " is deprecated", tok.description());
6481 3110 : int start_level = input_stack::get_level();
6482 3110 : tok.next();
6483 12440 : for (int i = 0; i < 3; i++) {
6484 122361 : while (!tok.is_newline() && !tok.is_eof()) {
6485 122361 : if ((tok == start)
6486 131691 : && (want_att_compat
6487 9330 : || input_stack::get_level() == start_level)) {
6488 9330 : tok.next();
6489 9330 : break;
6490 : }
6491 113031 : charinfo *ci = tok.get_charinfo();
6492 : // It's okay for `ci` to be a null pointer; that will be the case
6493 : // if the token is a node: italic corrections, horizontal motions,
6494 : // and so forth. TODO: Is it worth warning about some node types?
6495 113031 : if ((ci != 0 /* nullptr */)
6496 102596 : && (page_character != 0 /* nullptr */)
6497 102596 : && (page_character == ci))
6498 88 : interpolate_register(percent_symbol, 0);
6499 : else
6500 112943 : tok.process();
6501 113031 : tok.next();
6502 : }
6503 9330 : curenv->wrap_up_tab();
6504 9330 : part_width[i] = curenv->get_input_line_position();
6505 9330 : part[i] = curenv->extract_output_line();
6506 : }
6507 3110 : while (!tok.is_newline() && !tok.is_eof())
6508 0 : tok.next();
6509 : }
6510 :
6511 : // contents of `\?...\?`
6512 : class non_interpreted_node : public node {
6513 : macro mac;
6514 : public:
6515 : non_interpreted_node(const macro &);
6516 : bool interpret(macro *);
6517 : void asciify(macro *);
6518 : node *copy();
6519 : int ends_sentence();
6520 : bool is_same_as(node *);
6521 : const char *type();
6522 : bool causes_tprint();
6523 : bool is_tag();
6524 : };
6525 :
6526 457945 : non_interpreted_node::non_interpreted_node(const macro &m) : mac(m)
6527 : {
6528 457945 : }
6529 :
6530 212 : int non_interpreted_node::ends_sentence()
6531 : {
6532 212 : return 2;
6533 : }
6534 :
6535 228916 : bool non_interpreted_node::is_same_as(node *nd)
6536 : {
6537 228916 : return (mac == static_cast<non_interpreted_node *>(nd)->mac);
6538 : }
6539 :
6540 457832 : const char *non_interpreted_node::type()
6541 : {
6542 457832 : return "non-interpreted node";
6543 : }
6544 :
6545 0 : bool non_interpreted_node::causes_tprint()
6546 : {
6547 0 : return false;
6548 : }
6549 :
6550 0 : bool non_interpreted_node::is_tag()
6551 : {
6552 0 : return false;
6553 : }
6554 :
6555 0 : void non_interpreted_node::asciify(macro *)
6556 : {
6557 0 : delete this;
6558 0 : }
6559 :
6560 0 : node *non_interpreted_node::copy()
6561 : {
6562 0 : return new non_interpreted_node(mac);
6563 : }
6564 :
6565 107 : bool non_interpreted_node::interpret(macro *m)
6566 : {
6567 107 : string_iterator si(mac);
6568 107 : node *n = 0 /* nullptr */;
6569 : for (;;) {
6570 1382 : int c = si.get(&n);
6571 1382 : if (c == EOF)
6572 107 : break;
6573 1275 : if (c == 0)
6574 0 : m->append(n);
6575 : else
6576 1275 : m->append(c);
6577 1275 : }
6578 214 : return true;
6579 : }
6580 :
6581 457945 : static node *do_non_interpreted() // \?
6582 : {
6583 : node *n;
6584 : int c;
6585 915890 : macro mac;
6586 1637909 : while (((c = read_char_in_copy_mode(&n)) != ESCAPE_QUESTION)
6587 1179964 : && (c != EOF)
6588 2817873 : && (c != '\n'))
6589 1179964 : if (c == 0)
6590 0 : mac.append(n);
6591 : else
6592 1179964 : mac.append(c);
6593 457945 : if (c == EOF || c == '\n') {
6594 0 : error("unterminated transparent embedding escape sequence");
6595 0 : return 0 /* nullptr */;
6596 : }
6597 457945 : return new non_interpreted_node(mac);
6598 : }
6599 :
6600 246 : static void map_special_character_for_device_output(macro *mac,
6601 : const char *sc)
6602 : {
6603 246 : if (strcmp("-", sc) == 0)
6604 178 : mac->append('-');
6605 68 : else if (strcmp("dq", sc) == 0)
6606 4 : mac->append('"');
6607 64 : else if (strcmp("sh", sc) == 0)
6608 1 : mac->append('#');
6609 63 : else if (strcmp("Do", sc) == 0)
6610 1 : mac->append('$');
6611 62 : else if (strcmp("aq", sc) == 0)
6612 4 : mac->append('\'');
6613 58 : else if (strcmp("sl", sc) == 0)
6614 1 : mac->append('/');
6615 57 : else if (strcmp("at", sc) == 0)
6616 1 : mac->append('@');
6617 56 : else if (strcmp("lB", sc) == 0)
6618 1 : mac->append('[');
6619 55 : else if (strcmp("rs", sc) == 0)
6620 8 : mac->append('\\');
6621 47 : else if (strcmp("rB", sc) == 0)
6622 1 : mac->append(']');
6623 46 : else if (strcmp("ha", sc) == 0)
6624 4 : mac->append('^');
6625 42 : else if (strcmp("lC", sc) == 0)
6626 1 : mac->append('{');
6627 41 : else if (strcmp("ba", sc) == 0)
6628 1 : mac->append('|');
6629 40 : else if (strcmp("or", sc) == 0)
6630 1 : mac->append('|');
6631 39 : else if (strcmp("rC", sc) == 0)
6632 1 : mac->append('}');
6633 38 : else if (strcmp("ti", sc) == 0)
6634 7 : mac->append('~');
6635 : else {
6636 31 : if (font::use_charnames_in_special) {
6637 3 : if (sc[0] != '\0') {
6638 3 : mac->append('\\');
6639 3 : mac->append('[');
6640 3 : int i = 0;
6641 15 : while (sc[i] != '\0') {
6642 12 : mac->append(sc[i]);
6643 12 : i++;
6644 : }
6645 3 : mac->append(']');
6646 : }
6647 : }
6648 : else {
6649 : char errbuf[ERRBUFSZ]; // C++03: char errbuf[ERRBUFSZ]()
6650 28 : (void) memset(errbuf, '\0', ERRBUFSZ);
6651 28 : const size_t unibufsz = UNIBUFSZ + 1 /* '\0' */;
6652 : char character[unibufsz]; // C++03: char errbuf[ERRBUFSZ]()
6653 28 : (void) memset(character, '\0', UNIBUFSZ);
6654 : // If it looks like something other than an attempt at a Unicode
6655 : // special character escape sequence already, try to convert it
6656 : // into one. Output drivers don't (and shouldn't) know anything
6657 : // about a troff formatter's special character identifiers.
6658 28 : if ((strlen(sc) < 3) || (sc[0] != 'u')) {
6659 9 : const char *un = glyph_name_to_unicode(sc);
6660 9 : if (un != 0 /* nullptr */)
6661 8 : strncpy(character, un, unibufsz);
6662 : else {
6663 1 : warning(WARN_CHAR, "special character '%1' is not encodable"
6664 1 : " in device-independent output", sc);
6665 2 : return;
6666 8 : }
6667 : }
6668 : else {
6669 19 : const char *un = valid_unicode_code_sequence(sc, errbuf);
6670 19 : if (0 /* nullptr */ == un) {
6671 1 : warning(WARN_CHAR, "special character '%1' is not encodable"
6672 1 : " in device-independent output: %2", sc, errbuf);
6673 1 : return;
6674 : }
6675 18 : strncpy(character, un, unibufsz);
6676 : }
6677 26 : mac->append_str("\\[u");
6678 26 : mac->append_str(character);
6679 26 : mac->append(']');
6680 : }
6681 : }
6682 : }
6683 :
6684 224 : static void encode_special_character_for_device_output(macro *mac)
6685 : {
6686 : const char *sc;
6687 224 : charinfo *ci = tok.get_charinfo(true /* required */);
6688 224 : if (0 /* nullptr */ == ci) {
6689 0 : assert(0 == "attempted to encode token without charinfo for"
6690 : " device extension command output");
6691 : return;
6692 : }
6693 224 : sc = ci->get_symbol()->contents();
6694 224 : if (0 /* nullptr */ == sc) {
6695 0 : assert(0 == "attempted to encode token containing charinfo with"
6696 : " null symbol for device extension command output");
6697 : return;
6698 : }
6699 224 : map_special_character_for_device_output(mac, sc);
6700 : }
6701 :
6702 : // In troff output, we translate the escape character to '\', but it is
6703 : // up to the postprocessor to interpret it as such. (This mostly
6704 : // matters for device extension commands.)
6705 1710337 : static void encode_character_for_device_output(macro *mac, const char c)
6706 : {
6707 1710337 : if ('\0' == c) {
6708 : // It's a special token, not a character we can write as-is.
6709 226 : if (tok.is_stretchable_space()
6710 226 : || tok.is_unstretchable_space())
6711 1 : mac->append(' ');
6712 225 : else if ((tok.is_hyphen_indicator())
6713 225 : || tok.is_zero_width_break()
6714 225 : || tok.is_dummy()
6715 450 : || tok.is_transparent_dummy())
6716 : /* do nothing */;
6717 225 : else if (tok.is_special_character())
6718 224 : encode_special_character_for_device_output(mac);
6719 : else
6720 1 : warning(WARN_CHAR, "%1 is not encodable in device-independent"
6721 2 : " output ('asciify' might help)", tok.description());
6722 : }
6723 : else {
6724 1710111 : if (c == escape_char)
6725 18 : mac->append('\\');
6726 : else
6727 1710093 : mac->append(c);
6728 : }
6729 1710337 : }
6730 :
6731 70842 : static node *do_device_extension() // \X
6732 : {
6733 141684 : token start_token;
6734 70842 : start_token.next();
6735 70842 : if (!want_att_compat && !start_token.is_usable_as_delimiter())
6736 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
6737 0 : " is deprecated", tok.description());
6738 70842 : else if (want_att_compat
6739 70842 : && !start_token.is_usable_as_delimiter(false,
6740 : DELIMITER_ATT_STRING_EXPRESSION)) {
6741 0 : warning(WARN_DELIM, "device extension command escape sequence"
6742 : " does not accept %1 as a delimiter",
6743 0 : start_token.description());
6744 0 : return 0 /* nullptr */;
6745 : }
6746 : // TODO: groff 1.24.0 release + 2 years?
6747 : #if 0
6748 : if (!start_token.is_usable_as_delimiter(true /* report error */))
6749 : return 0 /* nullptr */;
6750 : #endif
6751 70842 : int start_level = input_stack::get_level();
6752 70842 : macro mac;
6753 70842 : if ((curdiv == topdiv) && (topdiv->before_first_page_status > 0))
6754 70 : topdiv->begin_page();
6755 : for (;;) {
6756 1783501 : tok.next();
6757 1783501 : if (tok.is_newline() || tok.is_eof()) {
6758 : // token::description() writes to static, class-wide storage, so
6759 : // we must allocate a copy of it before issuing the next
6760 : // diagnostic.
6761 14 : char *delimdesc = strdup(start_token.description());
6762 14 : warning(WARN_DELIM, "missing closing delimiter in device"
6763 : " extension escape sequence; expected %1, got %2",
6764 14 : delimdesc, tok.description());
6765 14 : free(delimdesc);
6766 14 : break;
6767 : }
6768 1783487 : if (tok == start_token
6769 1783487 : && (want_att_compat || input_stack::get_level() == start_level))
6770 70828 : break;
6771 : unsigned char c; // TODO: grochar
6772 1712659 : if (tok.is_space())
6773 242705 : c = ' ';
6774 : // TODO: Stop silently ignoring these when we have a string
6775 : // iterator for users and can externalize "sanitization" operations.
6776 : // See <https://savannah.gnu.org/bugs/?62264>.
6777 1469954 : else if (tok.is_hyphen_indicator())
6778 797 : continue;
6779 1469157 : else if (tok.is_dummy())
6780 3 : continue;
6781 1469154 : else if (tok.is_zero_width_break())
6782 1522 : continue;
6783 : else
6784 1467632 : c = tok.ch();
6785 1710337 : encode_character_for_device_output(&mac, c);
6786 1712659 : }
6787 70842 : return new device_extension_node(mac);
6788 : }
6789 :
6790 206 : static void device_request()
6791 : {
6792 206 : if (!has_arg(true /* peek; we want to read in copy mode */)) {
6793 0 : warning(WARN_MISSING, "device extension request expects an"
6794 : " argument");
6795 0 : skip_line();
6796 0 : return;
6797 : }
6798 412 : macro mac;
6799 : int c;
6800 : for (;;) {
6801 206 : c = read_char_in_copy_mode(0 /* nullptr */);
6802 206 : if ('"' == c) {
6803 14 : c = read_char_in_copy_mode(0 /* nullptr */);
6804 14 : break;
6805 : }
6806 192 : if (c != ' ' && c != '\t')
6807 192 : break;
6808 : }
6809 206 : if ((curdiv == topdiv) && (topdiv->before_first_page_status > 0))
6810 37 : topdiv->begin_page();
6811 4748 : for (;
6812 4954 : (c != '\0') && (c != '\n') && (c != EOF);
6813 4748 : c = read_char_in_copy_mode(0 /* nullptr */)) {
6814 : // We may encounter some of the C0 and C1 character codes GNU troff
6815 : // uses for special purposes; see src/roff/troff/input.h. They
6816 : // produce nothing in grout. Warn only about the ones that are left
6817 : // for the user's purposes. Use octal because input.h does. Ignore
6818 : // 8-bit codes in general. grout is an ISO 646 file format.
6819 4748 : if (ESCAPE_TILDE == c) {
6820 1 : mac.append('\\');
6821 1 : mac.append('~');
6822 : }
6823 4747 : else if ((c < 015) || (c >= 0177))
6824 1 : warning (WARN_SYNTAX, "ignoring character code %1 in device"
6825 2 : " extension command request argument", c);
6826 4746 : else if (c != '\\')
6827 4720 : mac.append(c);
6828 : else {
6829 26 : int c1 = read_char_in_copy_mode(0 /* nullptr */);
6830 26 : if (c1 != '[') {
6831 3 : mac.append(c);
6832 3 : mac.append(c1);
6833 3 : string chardesc = "";
6834 3 : if (csprint(c1)) {
6835 3 : chardesc += "'";
6836 3 : chardesc += char(c1);
6837 3 : chardesc += "'";
6838 : }
6839 : else {
6840 0 : chardesc += "character code ";
6841 0 : chardesc += i_to_a(c1);
6842 : }
6843 3 : chardesc += '\0'; // make it safe for .contents()
6844 3 : warning (WARN_SYNTAX, "not interpreting escaped %1 in device"
6845 : " extension command request argument",
6846 6 : chardesc.contents());
6847 : }
6848 : else {
6849 : // Does the input resemble a valid (bracket-form) special
6850 : // character escape sequence?
6851 23 : bool is_valid = false;
6852 46 : string sc = "";
6853 23 : int c2 = read_char_in_copy_mode(0 /* nullptr */);
6854 126 : for (; (c2 != '\0') && (c2 != '\n') && (c2 != EOF);
6855 103 : c2 = read_char_in_copy_mode(0 /* nullptr */)) {
6856 : // XXX: `map_special_character_for_device_output()` will need
6857 : // the closing bracket in the iterator we construct, but a
6858 : // composite character mapping mustn't see it.
6859 126 : sc += c2;
6860 126 : if (']' == c2) {
6861 23 : is_valid = true;
6862 23 : break;
6863 : }
6864 : }
6865 23 : sc += '\0';
6866 23 : if (sc.search(' ') > 0) {
6867 : // XXX: TODO
6868 1 : error("composite special character escape sequences not yet"
6869 : " supported in device extension command arguments");
6870 1 : is_valid = false;
6871 : }
6872 23 : if (is_valid) {
6873 22 : input_stack::push(make_temp_iterator(sc.contents()));
6874 22 : symbol s = read_long_escape_parameters(WITH_ARGS);
6875 22 : map_special_character_for_device_output(&mac, s.contents());
6876 : }
6877 : else {
6878 : // We couldn't make sense of it. Write it out as-is.
6879 1 : mac.append(c);
6880 1 : mac.append(c1);
6881 1 : mac.append_str(sc.contents());
6882 : }
6883 : }
6884 : }
6885 : }
6886 206 : curenv->add_node(new device_extension_node(mac));
6887 206 : tok.next();
6888 : }
6889 :
6890 1 : static void device_macro_request()
6891 : {
6892 1 : symbol s = read_identifier(true /* required */);
6893 1 : if (!(s.is_null() || s.is_empty())) {
6894 1 : request_or_macro *p = lookup_request(s);
6895 1 : macro *m = p->to_macro();
6896 1 : if (m != 0 /* nullptr */)
6897 1 : curenv->add_node(new device_extension_node(*m));
6898 : else
6899 0 : error("cannot interpolate '%1' to device-independent output;"
6900 0 : " it is a request, not a macro", s.contents());
6901 : }
6902 1 : skip_line();
6903 1 : }
6904 :
6905 44 : static void output_request()
6906 : {
6907 44 : if (!has_arg(true /* peek; we want to read in copy mode */)) {
6908 0 : warning(WARN_MISSING, "output request expects arguments");
6909 0 : skip_line();
6910 0 : return;
6911 : }
6912 : int c;
6913 : for (;;) {
6914 44 : c = read_char_in_copy_mode(0 /* nullptr */);
6915 44 : if ('"' == c) {
6916 14 : c = read_char_in_copy_mode(0 /* nullptr */);
6917 14 : break;
6918 : }
6919 30 : if (c != ' ' && c != '\t')
6920 30 : break;
6921 : }
6922 314 : for (;
6923 358 : (c != '\n') && (c != EOF);
6924 314 : (c = read_char_in_copy_mode(0 /* nullptr */)))
6925 314 : topdiv->transparent_output(c);
6926 44 : topdiv->transparent_output('\n');
6927 44 : tok.next();
6928 : }
6929 :
6930 : extern int image_no; // from node.cpp
6931 :
6932 757 : static node *do_suppress(symbol nm) // \O
6933 : {
6934 757 : if (nm.is_null() || nm.is_empty()) {
6935 0 : error("output suppression escape sequence requires an argument");
6936 0 : return 0 /* nullptr */;
6937 : }
6938 757 : const char *s = nm.contents();
6939 757 : switch (*s) {
6940 141 : case '0':
6941 141 : if (0 == suppression_level)
6942 : // suppress generation of glyphs
6943 135 : return new suppress_node(0, 0);
6944 6 : break;
6945 124 : case '1':
6946 124 : if (0 == suppression_level)
6947 : // enable generation of glyphs
6948 118 : return new suppress_node(1, 0);
6949 6 : break;
6950 123 : case '2':
6951 123 : if (0 == suppression_level)
6952 117 : return new suppress_node(1, 1);
6953 6 : break;
6954 123 : case '3':
6955 123 : have_formattable_input = true;
6956 123 : suppression_level++;
6957 123 : break;
6958 123 : case '4':
6959 123 : have_formattable_input = true;
6960 123 : suppression_level--;
6961 123 : break;
6962 123 : case '5':
6963 : {
6964 123 : s++; // move over '5'
6965 123 : char position = *s;
6966 123 : if ('\0' == *s) {
6967 0 : error("missing position and file name in output suppression"
6968 : " escape sequence");
6969 0 : return 0 /* nullptr */;
6970 : }
6971 123 : if ((position != 'l')
6972 119 : && (position != 'r')
6973 119 : && (position != 'c')
6974 5 : && (position != 'i')) {
6975 0 : error("expected position 'l', 'r', 'c', or 'i' in output"
6976 0 : " suppression escape sequence, got '%1'", position);
6977 0 : return 0 /* nullptr */;
6978 : }
6979 123 : s++; // onto image name
6980 123 : if (0 == s /* nullptr */) {
6981 0 : error("missing image name in output suppression escape"
6982 : " sequence");
6983 0 : return 0 /* nullptr */;
6984 : }
6985 123 : image_no++;
6986 123 : if (0 == suppression_level)
6987 117 : return new suppress_node(symbol(s), position, image_no);
6988 : else
6989 6 : have_formattable_input = true;
6990 : }
6991 6 : break;
6992 0 : default:
6993 0 : char qc = '\'';
6994 0 : if (strchr(s, '\'') != 0 /* nullptr */)
6995 0 : qc = '"';
6996 0 : error("invalid argument %1%2%3 to output suppression escape"
6997 0 : " sequence", qc, *s, qc);
6998 : }
6999 270 : return 0 /* nullptr */;
7000 : }
7001 :
7002 66977 : void device_extension_node::tprint(troff_output_file *out)
7003 : {
7004 66977 : tprint_start(out);
7005 133954 : string_iterator iter(mac);
7006 : for (;;) {
7007 1583415 : int c = iter.get(0 /* nullptr */);
7008 1583415 : if (c != EOF)
7009 3032883 : for (const char *s = encode_for_stream_output(c);
7010 3032883 : *s != 0 /* nullptr */;
7011 : s++)
7012 1516445 : tprint_char(out, *s);
7013 : else
7014 66977 : break;
7015 1516438 : }
7016 66977 : tprint_end(out);
7017 66977 : }
7018 :
7019 208 : int get_file_line(const char **filename, int *lineno)
7020 : {
7021 208 : return input_stack::get_location(false /* allow macro */, filename,
7022 208 : lineno);
7023 : }
7024 :
7025 22852 : void line_file()
7026 : {
7027 : int n;
7028 22852 : if (read_integer(&n)) {
7029 22852 : if (has_arg(true /* peek */)) {
7030 1397 : const char *reported_file_name = read_rest_of_line_as_argument();
7031 1397 : (void) input_stack::set_location(reported_file_name, (n - 1));
7032 : // TODO: Add `reported_file_name` to file name set.
7033 1397 : tok.next();
7034 1397 : return;
7035 : }
7036 21455 : (void) input_stack::set_location(0 /* nullptr */, (n - 1));
7037 : }
7038 21455 : skip_line();
7039 : }
7040 :
7041 1140 : static void nroff_request()
7042 : {
7043 1140 : in_nroff_mode = true;
7044 1140 : skip_line();
7045 1140 : }
7046 :
7047 0 : static void troff_request()
7048 : {
7049 0 : in_nroff_mode = false;
7050 0 : skip_line();
7051 0 : }
7052 :
7053 2593683 : static void skip_branch()
7054 : {
7055 2593683 : if (tok.is_newline()) {
7056 2 : tok.next();
7057 2 : return;
7058 : }
7059 2593681 : int level = 0;
7060 : // ensure that ".if 0\{" works as expected
7061 2593681 : if (tok.is_left_brace())
7062 3 : level++;
7063 : int c;
7064 : for (;;) {
7065 270236480 : c = input_stack::get(0 /* nullptr */);
7066 270236480 : if (c == EOF)
7067 6 : break;
7068 270236474 : if (c == ESCAPE_LEFT_BRACE)
7069 2157257 : ++level;
7070 268079217 : else if (c == ESCAPE_RIGHT_BRACE)
7071 2171202 : --level;
7072 265908015 : else if ((c == escape_char) && (escape_char != 0U))
7073 12725143 : switch (input_stack::get(0 /* nullptr */)) {
7074 162951 : case '{':
7075 162951 : ++level;
7076 162951 : break;
7077 174139 : case '}':
7078 174139 : --level;
7079 174139 : break;
7080 2270297 : case '"':
7081 2270297 : while ((c = input_stack::get(0 /* nullptr */)) != '\n'
7082 2270297 : && c != EOF)
7083 : ;
7084 : }
7085 : /*
7086 : Note that the level can properly be < 0, e.g.
7087 :
7088 : .if 1 \{\
7089 : .if 0 \{\
7090 : .\}\}
7091 :
7092 : So don't give an error message in this case.
7093 : */
7094 270236474 : if (level <= 0 && c == '\n')
7095 2593675 : break;
7096 : }
7097 2593681 : tok.next();
7098 : }
7099 :
7100 6627912 : static void take_branch()
7101 : {
7102 6627912 : while (tok.is_space() || tok.is_left_brace())
7103 4257840 : tok.next();
7104 2370072 : }
7105 :
7106 296216 : static void nop_request()
7107 : {
7108 296216 : tok.skip_spaces();
7109 296216 : }
7110 :
7111 : // Perform a (formatted) output comparison operation, as found in
7112 : // .if 'foo'bar'
7113 : // ...for example.
7114 1379893 : static bool are_comparands_equal()
7115 : {
7116 2759786 : token delim = tok;
7117 1379893 : int delim_level = input_stack::get_level();
7118 2759786 : environment env1(curenv);
7119 2759786 : environment env2(curenv);
7120 1379893 : environment *oldenv = curenv;
7121 1379893 : curenv = &env1;
7122 1379893 : suppress_push = true;
7123 4139679 : for (int i = 0; i < 2; i++) {
7124 : for (;;) {
7125 12337240 : tok.next();
7126 12337240 : if (tok.is_newline() || tok.is_eof()) {
7127 : // token::description() writes to static, class-wide storage,
7128 : // so we must allocate a copy of it before issuing the next
7129 : // diagnostic.
7130 0 : char *delimdesc = strdup(delim.description());
7131 0 : warning(WARN_DELIM, "missing closing delimiter in output"
7132 : " comparison operator; expected %1, got %2",
7133 0 : delimdesc, tok.description());
7134 0 : free(delimdesc);
7135 0 : tok.next();
7136 0 : curenv = oldenv;
7137 0 : return false;
7138 : }
7139 12337240 : if ((tok == delim)
7140 15202848 : && (want_att_compat
7141 2865608 : || (input_stack::get_level() == delim_level)))
7142 2759786 : break;
7143 9577454 : tok.process();
7144 9577454 : }
7145 2759786 : curenv = &env2;
7146 : }
7147 1379893 : node *n1 = env1.extract_output_line();
7148 1379893 : node *n2 = env2.extract_output_line();
7149 1379893 : bool result = same_node_list(n1, n2);
7150 1379893 : delete_node_list(n1);
7151 1379893 : delete_node_list(n2);
7152 1379893 : curenv = oldenv;
7153 1379893 : have_formattable_input = false;
7154 1379893 : suppress_push = false;
7155 1379893 : tok.next();
7156 1379893 : return result;
7157 : }
7158 :
7159 : static std::stack<bool> if_else_stack;
7160 :
7161 4444332 : static bool is_conditional_expression_true()
7162 : {
7163 4444332 : bool perform_output_comparison = false;
7164 4444332 : bool want_test_sense_inverted = false;
7165 4444332 : tok.skip_spaces();
7166 5176838 : while (tok.ch() == int('!')) { // TODO: grochar
7167 732506 : tok.next();
7168 732506 : want_test_sense_inverted = !want_test_sense_inverted;
7169 : }
7170 : bool result;
7171 4444332 : int c = tok.ch(); // safely compares to char literals; TODO: grochar
7172 4444332 : if (want_att_compat)
7173 427 : switch (c) {
7174 7 : case int('F'): // TODO: grochar
7175 : case int('S'): // TODO: grochar
7176 : case int('c'): // TODO: grochar
7177 : case int('d'): // TODO: grochar
7178 : case int('m'): // TODO: grochar
7179 : case int('r'): // TODO: grochar
7180 : case int('v'): // TODO: grochar
7181 7 : warning(WARN_SYNTAX,
7182 : "conditional expression operator '%1' is not portable to"
7183 : " AT&T troff",
7184 7 : char(c));
7185 : // TODO: "; treating as output comparison delimiter", c);
7186 7 : break;
7187 420 : default:
7188 420 : break;
7189 : }
7190 4444332 : if (c == int('t')) { // TODO: grochar
7191 58889 : tok.next();
7192 58889 : result = !in_nroff_mode;
7193 : }
7194 4385443 : else if (c == int('n')) { // TODO: grochar
7195 7683 : tok.next();
7196 7683 : result = in_nroff_mode;
7197 : }
7198 4377760 : else if (c == int('o')) { // TODO: grochar
7199 892 : result = (topdiv->get_page_number() & 1); // TODO: dump cleverness
7200 892 : tok.next();
7201 : }
7202 4376868 : else if (c == int('e')) { // TODO: grochar
7203 397 : result = !(topdiv->get_page_number() & 1); // TODO: dump cleverness
7204 397 : tok.next();
7205 : }
7206 : // TODO: else if (!want_att_compat) {
7207 : // Check for GNU troff extended conditional expression operators.
7208 4376471 : else if ((c == int('d') || (c == int('r')))) { // TODO: grochar
7209 492495 : tok.next();
7210 492495 : symbol nm = read_identifier(true /* required */);
7211 492495 : if (nm.is_null()) {
7212 0 : skip_branch();
7213 0 : return false;
7214 : }
7215 492495 : result = ((c == 'd')
7216 492495 : ? request_dictionary.lookup(nm) != 0 /* nullptr */
7217 492495 : : register_dictionary.lookup(nm) != 0 /* nullptr */);
7218 : }
7219 3883976 : else if (c == 'm') {
7220 5956 : tok.next();
7221 5956 : symbol nm = read_long_identifier(true /* required */);
7222 5956 : if (nm.is_null()) {
7223 0 : skip_branch();
7224 0 : return false;
7225 : }
7226 11912 : result = ((nm == default_symbol)
7227 5956 : || color_dictionary.lookup(nm) != 0 /* nullptr */);
7228 : }
7229 3878020 : else if (c == 'c') {
7230 26117 : tok.next();
7231 26117 : tok.skip_spaces();
7232 : // XXX: Mystery: the presence of a character (fortunately) doesn't
7233 : // create it if nonexistent even though the default second argument
7234 : // to `token::get_charinfo()` (`suppress_creation`) is `false` (see
7235 : // "token.h"). Why?
7236 26117 : charinfo *ci = tok.get_charinfo(true /* required */);
7237 26117 : if (0 == ci /* nullptr */) {
7238 0 : skip_branch();
7239 0 : return false;
7240 : }
7241 26117 : result = character_exists(ci, curenv);
7242 26117 : tok.next();
7243 : }
7244 3851903 : else if (c == 'F') {
7245 7940 : tok.next();
7246 7940 : symbol nm = read_long_identifier(true /* required */);
7247 7940 : if (nm.is_null()) {
7248 0 : skip_branch();
7249 0 : return false;
7250 : }
7251 7940 : result = is_font_name(curenv->get_family()->nm, nm);
7252 : }
7253 3843963 : else if (c == 'S') {
7254 2 : tok.next();
7255 2 : symbol nm = read_long_identifier(true /* required */);
7256 2 : if (nm.is_null()) {
7257 0 : skip_branch();
7258 0 : return false;
7259 : }
7260 2 : result = is_abstract_style(nm);
7261 : }
7262 : // vtroff extension
7263 3843961 : else if (c == 'v') {
7264 2 : tok.next();
7265 2 : result = false;
7266 : }
7267 3843959 : else if (tok.is_space())
7268 0 : result = false;
7269 7687918 : else if (!want_att_compat
7270 3843959 : && tok.is_usable_as_delimiter())
7271 1379829 : perform_output_comparison = true;
7272 2464130 : else if (want_att_compat
7273 2464130 : && tok.is_usable_as_delimiter(false /* report error */,
7274 : DELIMITER_ATT_OUTPUT_COMPARISON_EXPRESSION))
7275 64 : perform_output_comparison = true;
7276 : else {
7277 : // Evaluate numeric expression.
7278 : units n;
7279 2464066 : if (!read_measurement(&n, (unsigned char)('u'))) { // TODO: grochar
7280 15 : skip_branch();
7281 15 : return false;
7282 : }
7283 : else
7284 2464051 : result = (n > 0);
7285 : }
7286 4444317 : if (perform_output_comparison)
7287 1379893 : result = are_comparands_equal();
7288 4444317 : if (want_test_sense_inverted)
7289 732505 : result = !result;
7290 4444317 : if (result)
7291 2021476 : take_branch();
7292 : else
7293 2422841 : skip_branch();
7294 4444317 : return result;
7295 : }
7296 :
7297 571975 : static void if_else_request()
7298 : {
7299 571975 : if (!has_arg()) {
7300 0 : warning(WARN_MISSING, "if-else request expects arguments");
7301 0 : skip_line();
7302 0 : return;
7303 : }
7304 571975 : if_else_stack.push(is_conditional_expression_true());
7305 : }
7306 :
7307 2977289 : static void if_request()
7308 : {
7309 2977289 : if (!has_arg()) {
7310 0 : warning(WARN_MISSING, "if-then request expects arguments");
7311 0 : skip_line();
7312 0 : return;
7313 : }
7314 2977289 : (void) is_conditional_expression_true();
7315 : }
7316 :
7317 519423 : static void else_request()
7318 : {
7319 519423 : if (if_else_stack.empty())
7320 0 : skip_branch();
7321 : else {
7322 519423 : bool predicate = if_else_stack.top();
7323 519423 : if_else_stack.pop();
7324 519423 : if (predicate)
7325 170827 : skip_branch();
7326 : else
7327 348596 : take_branch();
7328 : }
7329 519423 : }
7330 :
7331 : static int while_depth = 0;
7332 : static bool want_loop_break = false;
7333 :
7334 210348 : static void while_request()
7335 : {
7336 210348 : if (!has_arg(true /* peek */)) {
7337 0 : warning(WARN_MISSING, "while loop request expects arguments");
7338 0 : skip_line();
7339 0 : return;
7340 : }
7341 420696 : macro mac;
7342 210348 : bool is_char_escaped = false;
7343 210348 : int level = 0;
7344 210348 : mac.append(new token_node(tok));
7345 : for (;;) {
7346 71903488 : node *n = 0 /* nullptr */;
7347 71903488 : int c = input_stack::get(&n);
7348 71903488 : if (c == EOF)
7349 0 : break;
7350 71903488 : if (c == 0) {
7351 0 : is_char_escaped = false;
7352 0 : mac.append(n);
7353 : }
7354 71903488 : else if (is_char_escaped) {
7355 3485605 : if (c == '{')
7356 7656 : level += 1;
7357 3477949 : else if (c == '}')
7358 7656 : level -= 1;
7359 3485605 : is_char_escaped = false;
7360 3485605 : mac.append(c);
7361 : }
7362 : else {
7363 68417883 : if (c == ESCAPE_LEFT_BRACE)
7364 739276 : level += 1;
7365 67678607 : else if (c == ESCAPE_RIGHT_BRACE)
7366 739276 : level -= 1;
7367 66939331 : else if (c == escape_char)
7368 3485605 : is_char_escaped = true;
7369 68417883 : mac.append(c);
7370 68417883 : if (c == '\n' && level <= 0)
7371 210348 : break;
7372 : }
7373 71693140 : }
7374 210348 : if (level != 0)
7375 0 : error("unbalanced brace escape sequences");
7376 : else {
7377 210348 : while_depth++;
7378 210348 : input_stack::add_boundary();
7379 : for (;;) {
7380 895068 : input_stack::push(new string_iterator(mac, "while loop"));
7381 895068 : tok.next();
7382 895068 : if (!is_conditional_expression_true()) {
7383 188000 : while (input_stack::get(0 /* nullptr */) != EOF)
7384 : ;
7385 188000 : break;
7386 : }
7387 707068 : process_input_stack();
7388 707068 : if (want_loop_break || input_stack::is_return_boundary()) {
7389 22348 : want_loop_break = false;
7390 22348 : break;
7391 : }
7392 : }
7393 210348 : input_stack::remove_boundary();
7394 210348 : while_depth--;
7395 : }
7396 210348 : tok.next();
7397 : }
7398 :
7399 9148 : static void while_break_request()
7400 : {
7401 9148 : if (!while_depth) {
7402 0 : error("cannot 'break' when not in a 'while' loop");
7403 0 : skip_line();
7404 : }
7405 : else {
7406 9148 : want_loop_break = true;
7407 422554 : while (input_stack::get(0 /* nullptr */) != EOF)
7408 : ;
7409 9148 : tok.next();
7410 : }
7411 9148 : }
7412 :
7413 63602 : static void while_continue_request()
7414 : {
7415 63602 : if (!while_depth) {
7416 0 : error("cannot 'continue' when not in a 'while' loop");
7417 0 : skip_line();
7418 : }
7419 : else {
7420 40051839 : while (input_stack::get(0 /* nullptr */) != EOF)
7421 : ;
7422 63602 : tok.next();
7423 : }
7424 63602 : }
7425 :
7426 13 : void do_source(bool quietly)
7427 : {
7428 13 : char *filename = read_rest_of_line_as_argument();
7429 13 : errno = 0;
7430 13 : FILE *fp = include_search_path.open_file_cautiously(filename);
7431 13 : if (fp != 0 /* nullptr */)
7432 12 : input_stack::push(new file_iterator(fp, filename));
7433 : else
7434 : // Suppress diagnostic only if we're operating quietly and it's an
7435 : // expected problem.
7436 1 : if (!(quietly && (ENOENT == errno)))
7437 0 : error("cannot open '%1': %2", filename, strerror(errno));
7438 : // TODO: Add `filename` to file name set.
7439 13 : tok.next();
7440 13 : }
7441 :
7442 1 : void source_request() // .so
7443 : {
7444 1 : if (!has_arg(true /* peek */)) {
7445 0 : warning(WARN_MISSING, "file sourcing request expects an argument");
7446 0 : skip_line();
7447 0 : return;
7448 : }
7449 1 : do_source(false /* quietly */ );
7450 : }
7451 :
7452 : // like .so, but silently ignore files that can't be opened due to their
7453 : // nonexistence
7454 12 : void source_quietly_request() // .soquiet
7455 : {
7456 12 : if (!has_arg(true /* peek */)) {
7457 0 : warning(WARN_MISSING, "quiet file sourcing request expects an"
7458 : " argument");
7459 0 : skip_line();
7460 0 : return;
7461 : }
7462 12 : do_source(true /* quietly */ );
7463 : }
7464 :
7465 96 : void pipe_source_request() // .pso
7466 : {
7467 96 : if (!has_arg(true /* peek */)) {
7468 0 : warning(WARN_MISSING, "piped command source request expects"
7469 : " arguments");
7470 0 : skip_line();
7471 0 : return;
7472 : }
7473 96 : if (!want_unsafe_requests) {
7474 0 : error("piped command source request is not allowed in safer mode");
7475 0 : skip_line();
7476 0 : return;
7477 : }
7478 96 : char *pcmd = read_rest_of_line_as_argument();
7479 : // `has_arg()` should have ensured that this pointer is non-null.
7480 96 : assert(pcmd != 0 /* nullptr */);
7481 96 : if (0 /* nullptr */ == pcmd)
7482 0 : error("cannot apply piped command source request to empty"
7483 : " argument");
7484 96 : errno = 0;
7485 96 : FILE *fp = popen(pcmd, POPEN_RT);
7486 96 : if (fp != 0 /* nullptr */)
7487 96 : input_stack::push(new file_iterator(fp, pcmd, true /* popened */));
7488 : else
7489 0 : error("cannot open pipe to process '%1': %2", pcmd,
7490 0 : strerror(errno));
7491 96 : delete[] pcmd;
7492 96 : tok.next();
7493 : }
7494 :
7495 : // .psbb
7496 : //
7497 : // Extract bounding box limits from PostScript file, and assign
7498 : // them to the following four gtroff registers:--
7499 : //
7500 : static int llx_reg_contents = 0;
7501 : static int lly_reg_contents = 0;
7502 : static int urx_reg_contents = 0;
7503 : static int ury_reg_contents = 0;
7504 :
7505 : // Manifest constants to specify the status of bounding box range
7506 : // acquisition; (note that PSBB_RANGE_IS_BAD is also suitable for
7507 : // assignment as a default ordinate property value).
7508 : //
7509 : #define PSBB_RANGE_IS_BAD 0
7510 : #define PSBB_RANGE_IS_SET 1
7511 : #define PSBB_RANGE_AT_END 2
7512 :
7513 : // Maximum input line length, for DSC conformance, and options to
7514 : // control how it will be enforced; caller should select either of
7515 : // DSC_LINE_MAX_IGNORED, to allow partial line collection spread
7516 : // across multiple calls, or DSC_LINE_MAX_ENFORCE, to truncate
7517 : // excess length lines at the DSC limit.
7518 : //
7519 : // Note that DSC_LINE_MAX_CHECKED is reserved for internal use by
7520 : // ps_locator::get_line(), and should not be specified in any call;
7521 : // also, handling of DSC_LINE_MAX_IGNORED, as a get_line() option,
7522 : // is currently unimplemented.
7523 : //
7524 : #define DSC_LINE_MAX 255
7525 : #define DSC_LINE_MAX_IGNORED -1
7526 : #define DSC_LINE_MAX_ENFORCE 0
7527 : #define DSC_LINE_MAX_CHECKED 1
7528 :
7529 : // Input characters to be considered as whitespace, when reading
7530 : // PostScript file comments.
7531 : //
7532 : cset white_space("\n\r \t");
7533 :
7534 : // Class psbb_locator
7535 : //
7536 : // This locally declared and implemented class provides the methods
7537 : // to be used for retrieval of bounding box properties from a specified
7538 : // PostScript or PDF file.
7539 : //
7540 : class psbb_locator
7541 : {
7542 : public:
7543 : // Only the class constructor is exposed publicly; instantiation of
7544 : // a class object will retrieve the requisite bounding box properties
7545 : // from the specified file, and assign them to gtroff registers.
7546 : //
7547 : psbb_locator(const char *);
7548 :
7549 : private:
7550 : FILE *fp;
7551 : const char *filename;
7552 : char buf[2 + DSC_LINE_MAX];
7553 : int llx, lly, urx, ury;
7554 :
7555 : // CRLF handling hook, for get_line() function.
7556 : //
7557 : int lastc;
7558 :
7559 : // Private method functions facilitate implementation of the
7560 : // class constructor; none are used in any other context.
7561 : //
7562 : int get_line(int);
7563 : inline bool get_header_comment(void);
7564 : inline const char *context_args(const char *);
7565 : inline const char *context_args(const char *, const char *);
7566 : inline const char *bounding_box_args(void);
7567 : int parse_bounding_box(const char *);
7568 : inline void assign_registers(void);
7569 : inline int skip_to_trailer(void);
7570 : };
7571 :
7572 : // psbb_locator class constructor.
7573 : //
7574 5 : psbb_locator::psbb_locator(const char *fname):
7575 5 : filename(fname), llx(0), lly(0), urx(0), ury(0), lastc(EOF)
7576 : {
7577 : // PS files might contain non-printable characters, such as ^Z
7578 : // and CRs not followed by an LF, so open them in binary mode.
7579 : //
7580 5 : fp = include_search_path.open_file_cautiously(filename, 0, FOPEN_RB);
7581 5 : if (fp != 0 /* nullptr */) {
7582 : // After successfully opening the file, acquire the first
7583 : // line, whence we may determine the file format...
7584 : //
7585 5 : if (get_line(DSC_LINE_MAX_ENFORCE) == 0)
7586 : //
7587 : // ...except in the case of an empty file, which we are
7588 : // unable to process further.
7589 : //
7590 0 : error("file '%1' is empty", filename);
7591 :
7592 : # if 0
7593 : else if (context_args("%PDF-")) {
7594 : // TODO: PDF files specify a /MediaBox, as the equivalent
7595 : // of %%BoundingBox; we must implement a handler for this.
7596 : }
7597 : # endif
7598 :
7599 5 : else if (context_args("%!PS-Adobe-")) {
7600 : //
7601 : // PostScript files -- strictly, we expect EPS -- should
7602 : // specify a %%BoundingBox comment; locate it, initially
7603 : // expecting to find it in the comments header...
7604 : //
7605 5 : const char *context = 0 /* nullptr */;
7606 26 : while ((context == 0 /* nullptr */) && get_header_comment()) {
7607 21 : if ((context = bounding_box_args()) != 0 /* nullptr */) {
7608 :
7609 : // When the "%%BoundingBox" comment is found, it may simply
7610 : // specify the bounding box property values, or it may defer
7611 : // assignment to a similar trailer comment...
7612 : //
7613 5 : int status = parse_bounding_box(context);
7614 5 : if (status == PSBB_RANGE_AT_END) {
7615 : //
7616 : // ...in which case we must locate the trailer, and search
7617 : // for the appropriate specification within it.
7618 : //
7619 0 : if (skip_to_trailer() > 0) {
7620 0 : while ((context = bounding_box_args()) == 0 /* nullptr */
7621 0 : && get_line(DSC_LINE_MAX_ENFORCE) > 0)
7622 : ;
7623 0 : if (context != 0 /* nullptr */) {
7624 : //
7625 : // When we find a bounding box specification here...
7626 : //
7627 0 : if ((status = parse_bounding_box(context)) == PSBB_RANGE_AT_END)
7628 : //
7629 : // ...we must ensure it is not a further attempt to defer
7630 : // assignment to a trailer, (which we are already parsing).
7631 : //
7632 0 : error("'(atend)' is not allowed in trailer of '%1'",
7633 0 : filename);
7634 : }
7635 : }
7636 : else
7637 : // The trailer could not be found, so there is no context in
7638 : // which a trailing %%BoundingBox comment might be located.
7639 : //
7640 0 : context = 0 /* nullptr */;
7641 : }
7642 5 : if (status == PSBB_RANGE_IS_BAD) {
7643 : //
7644 : // This arises when we found a %%BoundingBox comment, but
7645 : // we were unable to extract a valid set of range values from
7646 : // it; all we can do is diagnose this.
7647 : //
7648 0 : error("the arguments to the %%%%BoundingBox comment in '%1' are bad",
7649 0 : filename);
7650 : }
7651 : }
7652 : }
7653 5 : if (context == 0 /* nullptr */)
7654 : //
7655 : // Conversely, this arises when no value specifying %%BoundingBox
7656 : // comment has been found, in any appropriate location...
7657 : //
7658 0 : error("%%%%BoundingBox comment not found in '%1'", filename);
7659 : }
7660 : else
7661 : // ...while this indicates that there was no appropriate file format
7662 : // identifier, on the first line of the input file.
7663 : //
7664 0 : error("'%1' does not conform to the Document Structuring Conventions",
7665 0 : filename);
7666 :
7667 : // Regardless of success or failure of bounding box property acquisition,
7668 : // we did successfully open an input file, so we must now close it...
7669 : //
7670 5 : fclose(fp);
7671 : }
7672 : else
7673 : // ...but in this case, we did not successfully open any input file.
7674 : //
7675 0 : error("cannot open '%1': %2", filename, strerror(errno));
7676 :
7677 : // Irrespective of whether or not we were able to successfully acquire the
7678 : // bounding box properties, we ALWAYS update the associated gtroff registers.
7679 : //
7680 5 : assign_registers();
7681 5 : }
7682 :
7683 : // psbb_locator::parse_bounding_box()
7684 : //
7685 : // Parse the argument to a %%BoundingBox comment, returning:
7686 : // PSBB_RANGE_IS_SET if it contains four numbers,
7687 : // PSBB_RANGE_AT_END if it contains "(atend)", or
7688 : // PSBB_RANGE_IS_BAD otherwise.
7689 : //
7690 5 : int psbb_locator::parse_bounding_box(const char *context)
7691 : {
7692 : // The Document Structuring Conventions say that the numbers
7693 : // should be integers.
7694 : //
7695 5 : int status = PSBB_RANGE_IS_SET;
7696 5 : if (sscanf(context, "%d %d %d %d", &llx, &lly, &urx, &ury) != 4) {
7697 : //
7698 : // Unfortunately some broken applications get this wrong;
7699 : // try to parse them as doubles instead...
7700 : //
7701 : double x1, x2, x3, x4;
7702 0 : if (sscanf(context, "%lf %lf %lf %lf", &x1, &x2, &x3, &x4) == 4) {
7703 0 : llx = (int) x1;
7704 0 : lly = (int) x2;
7705 0 : urx = (int) x3;
7706 0 : ury = (int) x4;
7707 : }
7708 : else {
7709 : // ...but if we can't parse four numbers, skip over any
7710 : // initial whitespace...
7711 : //
7712 0 : while (*context == '\x20' || *context == '\t')
7713 0 : context++;
7714 :
7715 : // ...before checking for "(atend)", and setting the
7716 : // appropriate exit status accordingly.
7717 : //
7718 0 : status = (context_args("(atend)", context) == 0 /* nullptr */)
7719 0 : ? llx = lly = urx = ury = PSBB_RANGE_IS_BAD
7720 : : PSBB_RANGE_AT_END;
7721 : }
7722 : }
7723 5 : return status;
7724 : }
7725 :
7726 : // ps_locator::get_line()
7727 : //
7728 : // Collect an input record from a PostScript or PDF file.
7729 : //
7730 : // Inputs:
7731 : // buf pointer to caller's input buffer.
7732 : // fp FILE stream pointer, whence input is read.
7733 : // filename name of input file, (for diagnostic use only).
7734 : // dscopt DSC_LINE_MAX_ENFORCE or DSC_LINE_MAX_IGNORED.
7735 : //
7736 : // Returns the number of input characters stored into caller's
7737 : // buffer, or zero at end of input stream.
7738 : //
7739 : // FIXME: Currently, get_line() always scans an entire line of
7740 : // input, but returns only as much as will fit in caller's buffer;
7741 : // the return value is always a positive integer, or zero, with no
7742 : // way of indicating to caller, that there was more data than the
7743 : // buffer could accommodate. A future enhancement could mitigate
7744 : // this, returning a negative value in the event of truncation, or
7745 : // even allowing for piecewise retrieval of excessively long lines
7746 : // in successive reads; (this may be necessary to properly support
7747 : // DSC_LINE_MAX_IGNORED, which is currently unimplemented).
7748 : //
7749 26 : int psbb_locator::get_line(int dscopt)
7750 : {
7751 26 : int c, count = 0;
7752 0 : do {
7753 : // Collect input characters into caller's buffer, until we
7754 : // encounter a line terminator, or end of file...
7755 : //
7756 621 : while (((c = getc(fp)) != '\n') && (c != '\r') && (c != EOF)) {
7757 595 : if ((((lastc = c) < 0x1b) && !white_space(c)) || (c == 0x7f))
7758 : //
7759 : // ...rejecting any which may be designated as invalid.
7760 : //
7761 0 : error("invalid input character code %1 in '%2'", int(c), filename);
7762 :
7763 : // On reading a valid input character, and when there is
7764 : // room in caller's buffer...
7765 : //
7766 595 : else if (count < DSC_LINE_MAX)
7767 : //
7768 : // ...store it.
7769 : //
7770 595 : buf[count++] = c;
7771 :
7772 : // We have a valid input character, but it will not fit
7773 : // into caller's buffer; if enforcing DSC conformity...
7774 : //
7775 0 : else if (dscopt == DSC_LINE_MAX_ENFORCE) {
7776 : //
7777 : // ...diagnose and truncate.
7778 : //
7779 0 : dscopt = DSC_LINE_MAX_CHECKED;
7780 0 : error("PostScript file '%1' is non-conforming "
7781 0 : "because length of line exceeds 255", filename);
7782 : }
7783 : }
7784 : // Reading LF may be a special case: when it immediately
7785 : // follows a CR which terminated the preceding input line,
7786 : // we deem it to complete a CRLF terminator for the already
7787 : // collected preceding line; discard it, and restart input
7788 : // collection for the current line.
7789 : //
7790 26 : } while ((lastc == '\r') && ((lastc = c) == '\n'));
7791 :
7792 : // For each collected input line, record its actual terminator,
7793 : // substitute our preferred LF terminator...
7794 : //
7795 26 : if (((lastc = c) != EOF) || (count > 0))
7796 26 : buf[count++] = '\n';
7797 :
7798 : // ...and append the required C-string (NUL) terminator, before
7799 : // returning the actual count of input characters stored.
7800 : //
7801 26 : buf[count] = '\0';
7802 26 : return count;
7803 : }
7804 :
7805 : // psbb_locator::context_args()
7806 : //
7807 : // Inputs:
7808 : // tag literal text to be matched at start of input line
7809 : //
7810 : // Returns a pointer to the trailing substring of the current
7811 : // input line, following an initial substring matching the "tag"
7812 : // argument, or 0 if "tag" is not matched.
7813 : //
7814 47 : inline const char *psbb_locator::context_args(const char *tag)
7815 : {
7816 47 : return context_args(tag, buf);
7817 : }
7818 :
7819 : // psbb_locator::context_args()
7820 : //
7821 : // Overloaded variant of the preceding function, operating on
7822 : // an alternative input buffer, (which may represent a terminal
7823 : // substring of the psbb_locator's primary input line buffer).
7824 : //
7825 : // Inputs:
7826 : // tag literal text to be matched at start of buffer
7827 : // p pointer to text to be checked for "tag" match
7828 : //
7829 : // Returns a pointer to the trailing substring of the specified
7830 : // text buffer, following an initial substring matching the "tag"
7831 : // argument, or 0 if "tag" is not matched.
7832 : //
7833 47 : inline const char *psbb_locator::context_args(const char *tag, const char *p)
7834 : {
7835 47 : size_t len = strlen(tag);
7836 47 : return (strncmp(tag, p, len) == 0) ? p + len : 0 /* nullptr */;
7837 : }
7838 :
7839 : // psbb_locator::bounding_box_args()
7840 : //
7841 : // Returns a pointer to the arguments string, within the current
7842 : // input line, when this represents a PostScript "%%BoundingBox:"
7843 : // comment, or 0 otherwise.
7844 : //
7845 21 : inline const char *psbb_locator::bounding_box_args(void)
7846 : {
7847 21 : return context_args("%%BoundingBox:");
7848 : }
7849 :
7850 : // psbb_locator::assign_registers()
7851 : //
7852 : // Copies the bounding box properties established within the
7853 : // class object, to the associated gtroff registers.
7854 : //
7855 5 : inline void psbb_locator::assign_registers(void)
7856 : {
7857 5 : llx_reg_contents = llx;
7858 5 : lly_reg_contents = lly;
7859 5 : urx_reg_contents = urx;
7860 5 : ury_reg_contents = ury;
7861 5 : }
7862 :
7863 : // psbb_locator::get_header_comment()
7864 : //
7865 : // Fetch a line of PostScript input; return true if it complies with
7866 : // the formatting requirements for header comments, and it is not an
7867 : // "%%EndComments" line; otherwise return false.
7868 : //
7869 21 : inline bool psbb_locator::get_header_comment(void)
7870 : {
7871 : return
7872 : // The first necessary requirement, for returning true,
7873 : // is that the input line is not empty, (i.e. not EOF).
7874 : //
7875 21 : get_line(DSC_LINE_MAX_ENFORCE) != 0
7876 :
7877 : // In header comments, '%X' ('X' any printable character
7878 : // except whitespace) is also acceptable.
7879 : //
7880 21 : && (buf[0] == '%') && !white_space(buf[1])
7881 :
7882 : // Finally, the input line must not say "%%EndComments".
7883 : //
7884 42 : && context_args("%%EndComments") == 0 /* nullptr */;
7885 : }
7886 :
7887 : // psbb_locator::skip_to_trailer()
7888 : //
7889 : // Reposition the PostScript input stream, such that the next get_line()
7890 : // will retrieve the first line, if any, following a "%%Trailer" comment;
7891 : // returns a positive integer value if the "%%Trailer" comment is found,
7892 : // or zero if it is not.
7893 : //
7894 0 : inline int psbb_locator::skip_to_trailer(void)
7895 : {
7896 : // Begin by considering a chunk of the input file starting 512 bytes
7897 : // before its end, and search it for a "%%Trailer" comment; if none is
7898 : // found, incrementally double the chunk size while it remains within
7899 : // a 32768L byte range, and search again...
7900 : //
7901 0 : for (ssize_t offset = 512L; offset > 0L; offset <<= 1) {
7902 : int status, failed;
7903 0 : if ((offset > 32768L) || ((failed = fseek(fp, -offset, SEEK_END)) != 0))
7904 : //
7905 : // ...ultimately resetting the offset to zero, and simply seeking
7906 : // to the start of the file, to terminate the cycle and do a "last
7907 : // ditch" search of the entire file, if any backward seek fails, or
7908 : // if we reach the arbitrary 32768L byte range limit.
7909 : //
7910 0 : failed = fseek(fp, offset = 0L, SEEK_SET);
7911 :
7912 : // Following each successful seek...
7913 : //
7914 0 : if (!failed) {
7915 : //
7916 : // ...perform a search by reading lines from the input stream...
7917 : //
7918 0 : do { status = get_line(DSC_LINE_MAX_ENFORCE);
7919 : //
7920 : // ...until we either exhaust the available stream data, or
7921 : // we have located a "%%Trailer" comment line.
7922 : //
7923 : } while ((status != 0)
7924 0 : && (context_args("%%Trailer") == 0 /* nullptr */));
7925 0 : if (status > 0)
7926 : //
7927 : // We found the "%%Trailer" comment, so we may immediately
7928 : // return, with the stream positioned appropriately...
7929 : //
7930 0 : return status;
7931 : }
7932 : }
7933 : // ...otherwise, we report that no "%%Trailer" comment was found.
7934 : //
7935 0 : return 0;
7936 : }
7937 :
7938 5 : void ps_bbox_request() // .psbb
7939 : {
7940 5 : if (!has_arg(true /* peek */)) {
7941 0 : warning(WARN_MISSING, "PostScript file bounding box extraction"
7942 : " request expects an argument");
7943 0 : skip_line();
7944 0 : return;
7945 : }
7946 : // Parse input line, to extract file name.
7947 : //
7948 5 : symbol nm = read_long_identifier(true /* required */);
7949 5 : if (nm.is_null())
7950 : //
7951 : // No file name specified: ignore the entire request.
7952 : //
7953 0 : skip_line();
7954 : else {
7955 : // File name acquired: swallow the rest of the line.
7956 : //
7957 5 : while (!tok.is_newline() && !tok.is_eof())
7958 0 : tok.next();
7959 5 : errno = 0;
7960 :
7961 : // Update {llx,lly,urx,ury}_reg_contents:
7962 : // declaring this class instance achieves this, as an
7963 : // intentional side effect of object construction.
7964 : //
7965 5 : psbb_locator do_ps_file(nm.contents());
7966 :
7967 : // All done for .psbb; move on, to continue
7968 : // input stream processing.
7969 : //
7970 5 : tok.next();
7971 : }
7972 : }
7973 :
7974 : // Encode a token for output to an operating system file stream.
7975 : // Express unencodable tokens as null characters.
7976 1890400 : const char *encode_for_stream_output(int c)
7977 : {
7978 : static char buf[3];
7979 1890400 : buf[0] = (0U == escape_char) ? '\\' : escape_char;
7980 1890400 : buf[1] = buf[2] = '\0';
7981 1890400 : switch (c) {
7982 0 : case ESCAPE_QUESTION:
7983 0 : buf[1] = '?';
7984 0 : break;
7985 1 : case ESCAPE_AMPERSAND:
7986 1 : buf[1] = '&';
7987 1 : break;
7988 0 : case ESCAPE_RIGHT_PARENTHESIS:
7989 0 : buf[1] = ')';
7990 0 : break;
7991 0 : case ESCAPE_UNDERSCORE:
7992 0 : buf[1] = '_';
7993 0 : break;
7994 38 : case ESCAPE_BAR:
7995 38 : buf[1] = '|';
7996 38 : break;
7997 1 : case ESCAPE_CIRCUMFLEX:
7998 1 : buf[1] = '^';
7999 1 : break;
8000 0 : case ESCAPE_LEFT_BRACE:
8001 0 : buf[1] = '{';
8002 0 : break;
8003 0 : case ESCAPE_RIGHT_BRACE:
8004 0 : buf[1] = '}';
8005 0 : break;
8006 0 : case ESCAPE_LEFT_QUOTE:
8007 0 : buf[1] = '`';
8008 0 : break;
8009 0 : case ESCAPE_RIGHT_QUOTE:
8010 0 : buf[1] = '\'';
8011 0 : break;
8012 14 : case ESCAPE_HYPHEN:
8013 14 : buf[1] = '-';
8014 14 : break;
8015 2 : case ESCAPE_BANG:
8016 2 : buf[1] = '!';
8017 2 : break;
8018 0 : case ESCAPE_c:
8019 0 : buf[1] = 'c';
8020 0 : break;
8021 0 : case ESCAPE_e:
8022 0 : buf[1] = 'e';
8023 0 : break;
8024 24 : case ESCAPE_E:
8025 24 : buf[1] = 'E';
8026 24 : break;
8027 100 : case ESCAPE_PERCENT:
8028 100 : buf[1] = '%';
8029 100 : break;
8030 16 : case ESCAPE_SPACE:
8031 16 : buf[1] = ' ';
8032 16 : break;
8033 2 : case ESCAPE_TILDE:
8034 2 : buf[1] = '~';
8035 2 : break;
8036 0 : case ESCAPE_COLON:
8037 0 : buf[1] = ':';
8038 0 : break;
8039 0 : case PUSH_GROFF_MODE:
8040 : case PUSH_COMP_MODE:
8041 : case POP_GROFFCOMP_MODE:
8042 0 : buf[0] = '\0';
8043 0 : break;
8044 1890202 : default:
8045 1890202 : if (is_invalid_input_char(c))
8046 0 : buf[0] = '\0';
8047 : else
8048 1890202 : buf[0] = c;
8049 1890202 : break;
8050 : }
8051 1890400 : return buf;
8052 : }
8053 :
8054 8 : const char *input_char_description(int c)
8055 : {
8056 8 : switch (c) {
8057 0 : case '\n':
8058 0 : return "a newline character";
8059 0 : case '\b':
8060 0 : return "a backspace character";
8061 0 : case '\001':
8062 0 : return "a leader character";
8063 0 : case '\t':
8064 0 : return "a tab character";
8065 0 : case ' ':
8066 0 : return "a space character";
8067 0 : case '\0':
8068 0 : return "a node";
8069 : }
8070 8 : const size_t bufsz = sizeof "magic character code " + INT_DIGITS + 1;
8071 : static char buf[bufsz];
8072 8 : (void) memset(buf, 0, bufsz);
8073 8 : if (is_invalid_input_char(c)) {
8074 0 : const char *s = encode_for_stream_output(c);
8075 0 : if (*s) {
8076 0 : buf[0] = '\'';
8077 0 : strcpy(buf + 1, s);
8078 0 : strcat(buf, "'");
8079 0 : return buf;
8080 : }
8081 0 : sprintf(buf, "magic character code %d", c);
8082 0 : return buf;
8083 : }
8084 8 : if (csprint(c)) {
8085 0 : if ('\'' == c) {
8086 0 : buf[0] = '"';
8087 0 : buf[1] = c;
8088 0 : buf[2] = '"';
8089 : }
8090 : else {
8091 0 : buf[0] = '\'';
8092 0 : buf[1] = c;
8093 0 : buf[2] = '\'';
8094 : }
8095 0 : return buf;
8096 : }
8097 8 : sprintf(buf, "character code %d", c);
8098 8 : return buf;
8099 : }
8100 :
8101 16164 : void tag()
8102 : {
8103 16164 : if (has_arg(true /* peek */)) {
8104 16164 : string s;
8105 : int c;
8106 : for (;;) {
8107 16164 : c = read_char_in_copy_mode(0 /* nullptr */);
8108 16164 : if (c == '"') {
8109 0 : c = read_char_in_copy_mode(0 /* nullptr */);
8110 0 : break;
8111 : }
8112 16164 : if (c != ' ' && c != '\t')
8113 16164 : break;
8114 : }
8115 16164 : s = "x X ";
8116 204746 : for (;
8117 220910 : (c != '\n') && (c != EOF);
8118 204746 : (c = read_char_in_copy_mode(0 /* nullptr */)))
8119 204746 : s += (char) c;
8120 16164 : s += '\n';
8121 16164 : curenv->add_node(new tag_node(s, 0));
8122 : }
8123 16164 : tok.next();
8124 16164 : }
8125 :
8126 50 : void taga()
8127 : {
8128 50 : if (has_arg(true /* peek */)) {
8129 50 : string s;
8130 : int c;
8131 : for (;;) {
8132 50 : c = read_char_in_copy_mode(0 /* nullptr */);
8133 50 : if (c == '"') {
8134 0 : c = read_char_in_copy_mode(0 /* nullptr */);
8135 0 : break;
8136 : }
8137 50 : if (c != ' ' && c != '\t')
8138 50 : break;
8139 : }
8140 50 : s = "x X ";
8141 650 : for (;
8142 700 : (c != '\n') && (c != EOF);
8143 650 : (c = read_char_in_copy_mode(0 /* nullptr */)))
8144 650 : s += (char) c;
8145 50 : s += '\n';
8146 50 : curenv->add_node(new tag_node(s, 1));
8147 : }
8148 50 : tok.next();
8149 50 : }
8150 :
8151 : // .tm, .tm1, and .tmc
8152 :
8153 : // TODO: Migrate `tm` (and `ab`) to work like `tm1`, interpreting a
8154 : // leading `"` as `ds` does (and a bunch of other requests do).
8155 : //
8156 : // This would leave `tm1` without a distinct function, so we could
8157 : // retire it.
8158 : //
8159 : // Separately, we could make `tm` (and/or `ab`) do old-style argument
8160 : // interpretation only in compatibility mode. We still wouldn't need
8161 : // `tm1` because a compatibility mode document could say ".do tm foo".
8162 :
8163 568 : static void terminal_write(bool do_append_newline,
8164 : bool interpret_leading_spaces)
8165 : {
8166 568 : if (has_arg(true /* peek */)) {
8167 : int c;
8168 : for (;;) {
8169 568 : c = read_char_in_copy_mode(0 /* nullptr */);
8170 568 : if (interpret_leading_spaces && ('"' == c)) {
8171 21 : c = read_char_in_copy_mode(0 /* nullptr */);
8172 21 : break;
8173 : }
8174 547 : if ((c != ' ') && (c != '\t'))
8175 547 : break;
8176 : }
8177 17325 : for (;
8178 17893 : (c != '\n') && (c != EOF);
8179 17325 : (c = read_char_in_copy_mode(0 /* nullptr */)))
8180 17325 : fputs(encode_for_stream_output(c), stderr);
8181 : }
8182 568 : if (do_append_newline)
8183 547 : fputc('\n', stderr);
8184 568 : fflush(stderr);
8185 568 : tok.next();
8186 568 : }
8187 :
8188 : // old and busted
8189 529 : static void terminal_message_request() // .tm
8190 : {
8191 529 : terminal_write(true /* do append newline */ ,
8192 : false /* interpret leading spaces */);
8193 529 : }
8194 :
8195 : // the new hotness
8196 18 : static void terminal_message1_request() // .tm1
8197 : {
8198 18 : terminal_write(true /* do append newline */ ,
8199 : true /* interpret leading spaces */);
8200 18 : }
8201 :
8202 21 : static void terminal_message_continuation_request() // .tmc
8203 : {
8204 21 : terminal_write(false /* do append newline */ ,
8205 : true /* interpret leading spaces */);
8206 21 : }
8207 :
8208 : struct grostream : object {
8209 : const symbol filename;
8210 : const symbol mode;
8211 : FILE * const file;
8212 : grostream(const char *fn, symbol m, FILE *fp);
8213 : ~grostream();
8214 : };
8215 :
8216 1 : grostream::grostream(const char *fn, symbol m, FILE *fp)
8217 1 : : filename(fn), mode(m), file(fp)
8218 : {
8219 1 : }
8220 :
8221 2 : grostream::~grostream()
8222 : {
8223 2 : }
8224 :
8225 : object_dictionary stream_dictionary(20);
8226 :
8227 0 : static void print_stream_request() // .pstream
8228 : {
8229 0 : object_dictionary_iterator iter(stream_dictionary);
8230 0 : symbol stream_name;
8231 : grostream *grost;
8232 0 : errprint("[");
8233 0 : bool need_comma = false;
8234 0 : while (iter.get(&stream_name, (object **)&grost)) {
8235 0 : assert(!stream_name.is_null());
8236 0 : if (stream_name != 0 /* nullptr */) {
8237 0 : if (need_comma)
8238 0 : errprint(", ");
8239 0 : errprint("{\"stream\": ");
8240 0 : stream_name.json_dump();
8241 0 : errprint(", \"file name\": ");
8242 0 : grost->filename.json_dump();
8243 0 : errprint(", \"mode\": ");
8244 0 : grost->mode.json_dump();
8245 0 : errprint("}");
8246 0 : fflush(stderr);
8247 0 : need_comma = true;
8248 : }
8249 : }
8250 : // !need_comma implies that the list was empty. JSON convention is to
8251 : // put a space between an empty pair of square brackets.
8252 0 : if (!need_comma)
8253 0 : errprint(" ");
8254 0 : errprint("]\n");
8255 0 : fflush(stderr);
8256 0 : skip_line();
8257 0 : }
8258 :
8259 1 : static void open_file(bool appending)
8260 : {
8261 1 : symbol stream = read_identifier(true /* required */);
8262 1 : if (!stream.is_null()) {
8263 1 : char *filename = read_rest_of_line_as_argument();
8264 1 : if (filename != 0 /* nullptr */) {
8265 1 : const char *mode = appending ? "appending" : "writing";
8266 1 : errno = 0;
8267 1 : FILE *fp = fopen(filename, appending ? "a" : "w");
8268 1 : if (0 /* nullptr */ == fp) {
8269 0 : error("cannot open file '%1' for %2: %3", filename, mode,
8270 0 : strerror(errno));
8271 : // If we already had a key of this name in the dictionary, it's
8272 : // invalid now.
8273 0 : stream_dictionary.remove(stream);
8274 : }
8275 : else {
8276 : grostream *oldgrost = static_cast<grostream *>(stream_dictionary
8277 1 : .lookup(stream));
8278 1 : if (oldgrost != 0 /* nullptr */) {
8279 0 : FILE *oldfp = oldgrost->file;
8280 0 : assert(oldfp != 0 /* nullptr */);
8281 0 : if (oldfp != 0 /* nullptr */ && (fclose(oldfp) != 0)) {
8282 0 : error("cannot close file '%1' already associated with"
8283 0 : " stream '%2': %3", filename, strerror(errno));
8284 0 : return;
8285 : }
8286 : }
8287 1 : stream_dictionary.define(stream,
8288 2 : new grostream(filename, mode, &*fp));
8289 : }
8290 : }
8291 1 : tok.next();
8292 : }
8293 : }
8294 :
8295 1 : static void open_request() // .open
8296 : {
8297 1 : if (!has_arg(true /* peek */)) {
8298 0 : warning(WARN_MISSING, "file writing request expects arguments");
8299 0 : skip_line();
8300 0 : return;
8301 : }
8302 1 : if (!want_unsafe_requests) {
8303 0 : error("file writing request is not allowed in safer mode");
8304 0 : skip_line();
8305 : }
8306 : else
8307 1 : open_file(false /* appending */);
8308 : // No skip_line() here; open_file() calls
8309 : // read_rest_of_line_as_argument(), tok.next().
8310 : }
8311 :
8312 0 : static void opena_request() // .opena
8313 : {
8314 0 : if (!has_arg(true /* peek */)) {
8315 0 : warning(WARN_MISSING, "file appending request expects arguments");
8316 0 : skip_line();
8317 0 : return;
8318 : }
8319 0 : if (!want_unsafe_requests) {
8320 0 : error("file appending request is not allowed in safer mode");
8321 0 : skip_line();
8322 : }
8323 : else
8324 0 : open_file(true /* appending */);
8325 : // No skip_line() here; open_file() calls
8326 : // read_rest_of_line_as_argument(), tok.next().
8327 : }
8328 :
8329 1 : static void close_stream(symbol &stream)
8330 : {
8331 1 : assert(!stream.is_null());
8332 1 : bool is_valid = false;
8333 1 : FILE *fp = 0 /* nullptr */;
8334 : grostream *grost = static_cast<grostream *>(stream_dictionary
8335 1 : .lookup(stream));
8336 1 : if (grost != 0 /* nullptr */) {
8337 1 : fp = grost->file;
8338 : // We shouldn't have stored a null pointer in the first place.
8339 1 : assert(fp != 0 /* nullptr */);
8340 1 : if (fp != 0 /* nullptr */)
8341 1 : is_valid = true;
8342 : }
8343 1 : if (!is_valid) {
8344 0 : error("cannot close nonexistent stream '%1'", stream.contents());
8345 0 : return;
8346 : }
8347 : else {
8348 1 : if (fclose(fp) != 0) {
8349 0 : error("cannot close stream '%1': %2", stream.contents(),
8350 0 : strerror(errno));
8351 0 : return;
8352 : }
8353 : }
8354 1 : stream_dictionary.remove(stream);
8355 : }
8356 :
8357 : // Call this from exit_troff().
8358 1403 : static void close_all_streams()
8359 : {
8360 1403 : object_dictionary_iterator iter(stream_dictionary);
8361 : FILE *filestream;
8362 1403 : symbol stream;
8363 1403 : while (iter.get(&stream, (object **)&filestream)) {
8364 0 : assert(!stream.is_null());
8365 0 : if (stream != 0 /* nullptr */) {
8366 0 : warning(WARN_FILE, "stream '%1' still open; closing",
8367 0 : stream.contents());
8368 0 : close_stream(stream);
8369 : }
8370 : }
8371 1403 : }
8372 :
8373 1 : static void close_request() // .close
8374 : {
8375 1 : if (!has_arg(true /* peek */)) {
8376 0 : warning(WARN_MISSING, "stream closing request expects an argument");
8377 0 : skip_line();
8378 0 : return;
8379 : }
8380 1 : symbol stream = read_identifier();
8381 : // Testing has_arg() should have ensured this.
8382 1 : assert(stream != 0 /* nullptr */);
8383 1 : if (!stream.is_null())
8384 1 : close_stream(stream);
8385 1 : skip_line();
8386 : }
8387 :
8388 : // .write and .writec
8389 :
8390 4 : static void do_write_request(bool do_append_newline)
8391 : {
8392 4 : symbol stream = read_identifier(true /* required */);
8393 4 : if (stream.is_null()) {
8394 0 : skip_line();
8395 0 : return;
8396 : }
8397 : grostream *grost = static_cast<grostream *>(stream_dictionary
8398 4 : .lookup(stream));
8399 4 : if (0 /* nullptr */ == grost) {
8400 0 : error("cannot write to nonexistent stream '%1'", stream.contents());
8401 0 : skip_line();
8402 0 : return;
8403 : }
8404 : // Invariant: if the groff stream exists, the backing C stream must.
8405 4 : assert(grost->file != 0 /* nullptr */);
8406 4 : FILE *fp = grost->file;
8407 4 : if (0 /* nullptr */ == fp) {
8408 0 : error("cannot write to nonexistent stream '%1'", stream.contents());
8409 0 : skip_line();
8410 0 : return;
8411 : }
8412 4 : if (has_arg(true /* peek */)) {
8413 3 : int c = read_char_in_copy_mode(0 /* nullptr */);
8414 3 : while (' ' == c)
8415 0 : c = read_char_in_copy_mode(0 /* nullptr */);
8416 3 : if ('"' == c)
8417 0 : c = read_char_in_copy_mode(0 /* nullptr */);
8418 6 : while (c != '\n' && c != EOF) {
8419 3 : fputs(encode_for_stream_output(c), fp);
8420 3 : c = read_char_in_copy_mode(0 /* nullptr */);
8421 : }
8422 : }
8423 4 : if (do_append_newline)
8424 4 : fputc('\n', fp);
8425 4 : fflush(fp);
8426 4 : tok.next();
8427 : }
8428 :
8429 4 : static void stream_write_request() // .write
8430 : {
8431 4 : do_write_request(true /* do append newline */);
8432 4 : }
8433 :
8434 0 : static void stream_write_continuation_request() // .writec
8435 : {
8436 0 : do_write_request(false /* do append newline */);
8437 0 : }
8438 :
8439 0 : static void stream_write_macro_request() // .writem
8440 : {
8441 0 : symbol stream = read_identifier(true /* required */);
8442 0 : if (stream.is_null()) {
8443 0 : skip_line();
8444 0 : return;
8445 : }
8446 : grostream *grost = static_cast<grostream *>(stream_dictionary
8447 0 : .lookup(stream));
8448 0 : FILE *fp = grost->file;
8449 0 : if (0 /* nullptr */ == fp) {
8450 0 : error("no stream named '%1'", stream.contents());
8451 0 : skip_line();
8452 0 : return;
8453 : }
8454 0 : symbol s = read_identifier(true /* required */);
8455 0 : if (s.is_null()) {
8456 0 : skip_line();
8457 0 : return;
8458 : }
8459 0 : request_or_macro *p = lookup_request(s);
8460 0 : macro *m = p->to_macro();
8461 0 : if (0 /* nullptr */ == m)
8462 0 : error("cannot write request '%1' to a stream", s.contents());
8463 : else {
8464 0 : string_iterator iter(*m);
8465 : for (;;) {
8466 0 : int c = iter.get(0 /* nullptr */);
8467 0 : if (c == EOF)
8468 0 : break;
8469 0 : fputs(encode_for_stream_output(c), fp);
8470 0 : }
8471 0 : fflush(fp);
8472 : }
8473 0 : skip_line();
8474 : }
8475 :
8476 0 : void warnscale_request() // .warnscale
8477 : {
8478 0 : if (!has_arg()) {
8479 0 : warning(WARN_MISSING, "warning scaling unit configuration request"
8480 : " expects a scaling unit argument");
8481 0 : skip_line();
8482 0 : return;
8483 : }
8484 0 : int c = tok.ch(); // safely compares to char literals; TODO: grochar
8485 0 : if ('u' == c)
8486 0 : warn_scale = 1.0;
8487 0 : else if ('i' == c)
8488 0 : warn_scale = double(units_per_inch);
8489 0 : else if ('c' == c)
8490 0 : warn_scale = double(units_per_inch / 2.54);
8491 0 : else if ('p' == c)
8492 0 : warn_scale = double(units_per_inch / 72.0);
8493 0 : else if ('P' == c)
8494 0 : warn_scale = double(units_per_inch / 6.0);
8495 : else {
8496 0 : warning(WARN_SCALE,
8497 : "%1 is not a valid scaling unit; using 'i'",
8498 0 : tok.description());
8499 0 : c = 'i';
8500 0 : warn_scale = double(units_per_inch);
8501 : }
8502 0 : warn_scaling_unit = c;
8503 0 : skip_line();
8504 : }
8505 :
8506 0 : void spreadwarn_request() // .spreadwarn
8507 : {
8508 0 : hunits n;
8509 0 : if (has_arg() && read_hunits(&n, 'm')) {
8510 0 : if (n < 0)
8511 0 : n = 0;
8512 0 : hunits em = curenv->get_size();
8513 0 : spread_limit = (double) n.to_units()
8514 0 : / (em.is_zero() ? hresolution : em.to_units());
8515 : }
8516 : else
8517 0 : spread_limit = -spread_limit - 1; // no arg toggles on/off without
8518 : // changing value; we mirror at
8519 : // -0.5 to make zero a valid value
8520 0 : skip_line();
8521 0 : }
8522 :
8523 : // Keep this in sync with "src/libs/libgroff/nametoindex.cpp".
8524 : // constexpr // C++11
8525 : static const char char_prefix[] = { 'c', 'h', 'a', 'r' };
8526 : // constexpr // C++11
8527 : static const size_t char_prefix_len = sizeof char_prefix;
8528 :
8529 1418 : static void init_charset_table()
8530 : {
8531 : char buf[16];
8532 1418 : (void) strncpy(buf, char_prefix, char_prefix_len);
8533 364426 : for (int i = 0; i < 256; i++) {
8534 363008 : (void) strcpy((buf + char_prefix_len), i_to_a(i));
8535 363008 : charset_table[i] = lookup_charinfo(symbol(buf));
8536 363008 : charset_table[i]->set_ascii_code(i);
8537 363008 : if (csalpha(i))
8538 73736 : charset_table[i]->set_hyphenation_code(cmlower(i));
8539 : }
8540 1418 : charset_table['.']->set_flags(charinfo::ENDS_SENTENCE);
8541 1418 : charset_table['?']->set_flags(charinfo::ENDS_SENTENCE);
8542 1418 : charset_table['!']->set_flags(charinfo::ENDS_SENTENCE);
8543 1418 : charset_table['-']->set_flags(charinfo::ALLOWS_BREAK_AFTER);
8544 1418 : charset_table['"']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8545 1418 : charset_table['\'']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8546 1418 : charset_table[')']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8547 1418 : charset_table[']']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8548 1418 : charset_table['*']->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8549 1418 : lookup_charinfo(symbol("dg"))->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8550 1418 : lookup_charinfo(symbol("dd"))->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8551 1418 : lookup_charinfo(symbol("rq"))->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8552 1418 : lookup_charinfo(symbol("cq"))->set_flags(charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE);
8553 1418 : lookup_charinfo(symbol("em"))->set_flags(charinfo::ALLOWS_BREAK_AFTER);
8554 1418 : lookup_charinfo(symbol("hy"))->set_flags(charinfo::ALLOWS_BREAK_AFTER);
8555 1418 : lookup_charinfo(symbol("ul"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
8556 1418 : lookup_charinfo(symbol("rn"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
8557 1418 : lookup_charinfo(symbol("radicalex"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
8558 1418 : lookup_charinfo(symbol("sqrtex"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
8559 1418 : lookup_charinfo(symbol("ru"))->set_flags(charinfo::OVERLAPS_HORIZONTALLY);
8560 1418 : lookup_charinfo(symbol("br"))->set_flags(charinfo::OVERLAPS_VERTICALLY);
8561 1418 : page_character = charset_table['%'];
8562 1418 : }
8563 :
8564 1418 : static void init_hpf_code_table()
8565 : {
8566 364426 : for (int i = 0; i < 256; i++)
8567 363008 : hpf_code_table[i] = cmlower(i);
8568 1418 : }
8569 :
8570 177201 : static void do_translate(bool transparently, bool as_input)
8571 : {
8572 177201 : tok.skip_spaces();
8573 354444 : while (!tok.is_newline() && !tok.is_eof()) {
8574 177513 : if (tok.is_space()) {
8575 : // This is a really bizarre troff feature.
8576 0 : tok.next();
8577 0 : translate_space_to_dummy = tok.is_dummy();
8578 0 : if (tok.is_newline() || tok.is_eof())
8579 0 : break;
8580 0 : error("cannot translate space character; ignoring");
8581 0 : tok.next();
8582 0 : continue;
8583 : }
8584 177513 : charinfo *ci1 = tok.get_charinfo(true /* required */);
8585 177513 : if (0 /* nullptr */ == ci1) {
8586 0 : assert(0 == "attempted to use token without charinfo in character"
8587 : " translation request");
8588 : break;
8589 : }
8590 177513 : tok.next();
8591 177513 : if (tok.is_newline() || tok.is_eof()) {
8592 270 : ci1->set_special_translation(charinfo::TRANSLATE_SPACE,
8593 : transparently);
8594 270 : break;
8595 : }
8596 177243 : if (tok.is_space())
8597 0 : ci1->set_special_translation(charinfo::TRANSLATE_SPACE,
8598 : transparently);
8599 177243 : else if (tok.is_stretchable_space())
8600 1418 : ci1->set_special_translation(charinfo::TRANSLATE_STRETCHABLE_SPACE,
8601 : transparently);
8602 175825 : else if (tok.is_dummy())
8603 0 : ci1->set_special_translation(charinfo::TRANSLATE_DUMMY,
8604 : transparently);
8605 175825 : else if (tok.is_hyphen_indicator())
8606 0 : ci1->set_special_translation(charinfo::TRANSLATE_HYPHEN_INDICATOR,
8607 : transparently);
8608 : else {
8609 175825 : charinfo *ci2 = tok.get_charinfo(true /* required */);
8610 175825 : if (0 /* nullptr */ == ci2) {
8611 0 : assert(0 == "attempted to use token without charinfo in"
8612 : " character translation request");
8613 : break;
8614 : }
8615 175825 : if (ci1 == ci2)
8616 14694 : ci1->set_translation(0 /* nullptr */, transparently, as_input);
8617 : else
8618 161131 : ci1->set_translation(ci2, transparently, as_input);
8619 : }
8620 177243 : tok.next();
8621 : }
8622 177201 : skip_line();
8623 177201 : }
8624 :
8625 20839 : void translate() // .tr
8626 : {
8627 20839 : if (!has_arg()) {
8628 0 : warning(WARN_MISSING, "character translation request expects"
8629 : " sequence of character pairs as argument");
8630 0 : skip_line();
8631 0 : return;
8632 : }
8633 20839 : do_translate(true /* transparently */, false /* as_input */);
8634 : }
8635 :
8636 0 : void translate_no_transparent() // .trnt
8637 : {
8638 0 : if (!has_arg()) {
8639 0 : warning(WARN_MISSING, "character non-diversion translation request"
8640 : " expects sequence of character pairs as argument");
8641 0 : skip_line();
8642 0 : return;
8643 : }
8644 0 : do_translate(false /* transparently */, false /* as_input */);
8645 : }
8646 :
8647 156362 : void translate_input() // .trin
8648 : {
8649 156362 : if (!has_arg()) {
8650 0 : warning(WARN_MISSING, "character non-asciification translation"
8651 : " request expects sequence of character pairs as argument");
8652 0 : skip_line();
8653 0 : return;
8654 : }
8655 156362 : do_translate(true /* transparently */, true /* as_input */);
8656 : }
8657 :
8658 567 : static void set_character_flags_request() // .cflags
8659 : {
8660 567 : if (!has_arg()) {
8661 0 : warning(WARN_MISSING, "character flags configuration request"
8662 : " expects arguments");
8663 0 : skip_line();
8664 0 : return;
8665 : }
8666 : int flags;
8667 567 : if (read_integer(&flags)) {
8668 567 : if ((flags < 0) || (flags > charinfo::CFLAGS_MAX)) {
8669 0 : warning(WARN_RANGE, "character flags must be in range 0..%1,"
8670 0 : " got %2", charinfo::CFLAGS_MAX, flags);
8671 0 : skip_line();
8672 0 : return;
8673 : }
8674 567 : if (((flags & charinfo::ENDS_SENTENCE)
8675 1 : && (flags & charinfo::IS_TRANSPARENT_TO_END_OF_SENTENCE))
8676 567 : || ((flags & charinfo::ALLOWS_BREAK_BEFORE)
8677 0 : && (flags & charinfo::PROHIBITS_BREAK_BEFORE))
8678 567 : || ((flags & charinfo::ALLOWS_BREAK_AFTER)
8679 28 : && (flags & charinfo::PROHIBITS_BREAK_AFTER))) {
8680 0 : warning(WARN_SYNTAX, "ignoring contradictory character flags: "
8681 0 : "%1", flags);
8682 0 : skip_line();
8683 0 : return;
8684 : }
8685 567 : if (!has_arg()) {
8686 0 : warning(WARN_MISSING, "character flags configuration request"
8687 : " expects one or more characters to configure");
8688 0 : skip_line();
8689 0 : return;
8690 : }
8691 1297 : while (has_arg()) {
8692 730 : charinfo *ci = tok.get_charinfo(true /* required */);
8693 730 : if (0 /* nullptr */ == ci)
8694 0 : assert(0 == "attempted to use token without charinfo in"
8695 : " character flags assignment request");
8696 : else {
8697 730 : charinfo *tem = ci->get_translation();
8698 730 : if (tem != 0 /* nullptr */)
8699 0 : ci = tem;
8700 730 : ci->set_flags(flags);
8701 : }
8702 730 : tok.next();
8703 : }
8704 : }
8705 567 : skip_line();
8706 : }
8707 :
8708 128375 : static void set_hyphenation_codes() // .hcode
8709 : {
8710 128375 : if (!has_arg()) {
8711 0 : warning(WARN_MISSING, "hyphenation code assignment request expects"
8712 : " arguments");
8713 0 : skip_line();
8714 0 : return;
8715 : }
8716 289416 : while (has_arg()) {
8717 161041 : unsigned char cdst = tok.ch();
8718 161041 : if (csdigit(cdst)) {
8719 0 : error("cannot apply a hyphenation code to a numeral");
8720 0 : break;
8721 : }
8722 161041 : charinfo *cidst = tok.get_charinfo();
8723 161041 : if ('\0' == cdst) {
8724 65980 : if (0 /* nullptr */ == cidst) {
8725 0 : error("expected ordinary, special, or indexed character,"
8726 0 : " got %1", tok.description());
8727 0 : break;
8728 : }
8729 : }
8730 161041 : tok.next();
8731 161041 : if (!has_arg()) {
8732 0 : error("hyphenation codes must be specified in pairs");
8733 0 : break;
8734 : }
8735 161041 : unsigned char csrc = tok.ch();
8736 161041 : if (csdigit(csrc)) {
8737 0 : error("cannot use the hyphenation code of a numeral");
8738 0 : break;
8739 : }
8740 161041 : unsigned char new_code = 0U;
8741 161041 : charinfo *cisrc = tok.get_charinfo();
8742 161041 : if (cisrc != 0 /* nullptr */)
8743 : // Common case: assign destination character the hyphenation code
8744 : // of the source character.
8745 161041 : new_code = cisrc->get_hyphenation_code();
8746 161041 : if ('\0' == csrc) {
8747 25 : if (0 /* nullptr */ == cisrc) {
8748 0 : error("expected ordinary, special, or indexed character,"
8749 0 : " got %1", tok.description());
8750 0 : break;
8751 : }
8752 25 : new_code = cisrc->get_hyphenation_code();
8753 : }
8754 : else {
8755 : // If assigning a ordinary character's hyphenation code to itself,
8756 : // use its character code point as the value.
8757 161016 : if (csrc == cdst)
8758 49165 : new_code = tok.ch();
8759 : }
8760 161041 : cidst->set_hyphenation_code(new_code);
8761 161041 : if (cidst->get_translation()
8762 161041 : && cidst->get_translation()->is_translatable_as_input())
8763 95060 : cidst->get_translation()->set_hyphenation_code(new_code);
8764 161041 : tok.next();
8765 161041 : tok.skip_spaces();
8766 : }
8767 128375 : skip_line();
8768 : }
8769 :
8770 0 : void hyphenation_patterns_file_code() // .hpfcode
8771 : {
8772 0 : error("hyphenation pattern file code assignment request will be"
8773 : " withdrawn in a future groff release; migrate to 'hcode'");
8774 0 : if (!has_arg()) {
8775 0 : warning(WARN_MISSING, "hyphenation pattern file code assignment"
8776 : " request expects arguments");
8777 0 : skip_line();
8778 0 : return;
8779 : }
8780 0 : while (!tok.is_newline() && !tok.is_eof()) {
8781 : int n1, n2;
8782 0 : if (read_integer(&n1) && ((0 <= n1) && (n1 <= 255))) {
8783 0 : if (!has_arg()) {
8784 0 : error("missing output hyphenation code");
8785 0 : break;
8786 : }
8787 0 : if (read_integer(&n2) && ((0 <= n2) && (n2 <= 255))) {
8788 0 : hpf_code_table[n1] = n2;
8789 0 : tok.skip_spaces();
8790 : }
8791 : else {
8792 0 : error("output hyphenation code must be integer in the range 0..255");
8793 0 : break;
8794 : }
8795 : }
8796 : else {
8797 0 : error("input hyphenation code must be integer in the range 0..255");
8798 0 : break;
8799 : }
8800 : }
8801 0 : skip_line();
8802 : }
8803 :
8804 : dictionary char_class_dictionary(501);
8805 :
8806 54 : static void define_class_request() // .class
8807 : {
8808 54 : tok.skip_spaces();
8809 54 : symbol nm = read_identifier(true /* required */);
8810 54 : if (nm.is_null()) {
8811 0 : skip_line();
8812 1 : return;
8813 : }
8814 54 : charinfo *ci = lookup_charinfo(nm);
8815 : // Assign the charinfo an empty macro as a hack to record the
8816 : // file:line location of its definition.
8817 54 : macro *m = new macro;
8818 54 : (void) ci->set_macro(m);
8819 54 : charinfo *child1 = 0 /* nullptr */, *child2 = 0 /* nullptr */;
8820 54 : bool just_chained_a_range_expression = false;
8821 742 : while (!tok.is_newline() && !tok.is_eof()) {
8822 689 : tok.skip_spaces();
8823 : // Chained range expressions like
8824 : // \[u3041]-\[u3096]-\[u30FF]
8825 : // are not valid.
8826 : // TODO: use grochar
8827 689 : if ((child1 != 0 /* nullptr */) && (tok.ch() == int('-'))) {
8828 35 : tok.next();
8829 35 : child2 = tok.get_charinfo();
8830 35 : if (0 /* nullptr */ == child2) {
8831 0 : warning(WARN_MISSING,
8832 : "missing end of character range in class '%1'",
8833 0 : nm.contents());
8834 0 : skip_line();
8835 0 : return;
8836 : }
8837 35 : if (child1->is_class() || child2->is_class()) {
8838 0 : warning(WARN_SYNTAX,
8839 : "a nested character class is not allowed in a range"
8840 : " definition");
8841 0 : skip_line();
8842 0 : return;
8843 : }
8844 35 : int u1 = child1->get_unicode_mapping();
8845 35 : int u2 = child2->get_unicode_mapping();
8846 35 : if (u1 < 0) {
8847 0 : warning(WARN_SYNTAX,
8848 : "invalid start value in character range");
8849 0 : skip_line();
8850 0 : return;
8851 : }
8852 35 : if (u2 < 0) {
8853 0 : warning(WARN_SYNTAX,
8854 : "invalid end value in character range");
8855 0 : skip_line();
8856 0 : return;
8857 : }
8858 35 : ci->add_to_class(u1, u2);
8859 35 : child1 = child2 = 0 /* nullptr */;
8860 35 : just_chained_a_range_expression = true;
8861 : }
8862 654 : else if (child1 != 0 /* nullptr */) {
8863 582 : if (child1->is_class()) {
8864 0 : if (ci == child1) {
8865 0 : warning(WARN_SYNTAX, "cannot nest character classes");
8866 0 : skip_line();
8867 0 : return;
8868 : }
8869 0 : ci->add_to_class(child1);
8870 : }
8871 : else {
8872 582 : int u1 = child1->get_unicode_mapping();
8873 582 : if (u1 < 0) {
8874 0 : warning(WARN_SYNTAX,
8875 : "invalid character value in class '%1'",
8876 0 : nm.contents());
8877 0 : skip_line();
8878 0 : return;
8879 : }
8880 582 : ci->add_to_class(u1);
8881 : }
8882 582 : child1 = 0 /* nullptr */;
8883 : }
8884 689 : if (tok.is_any_character())
8885 688 : child1 = tok.get_charinfo(true /* required */);
8886 : else
8887 : // If we encountered a space or nonsense, we cannot be
8888 : // interpreting a range expression; there should be no "child1".
8889 1 : assert(0 /* nullptr */ == child1);
8890 689 : tok.next();
8891 689 : if (0 /* nullptr */ == child1) {
8892 1 : if (!tok.is_newline())
8893 1 : skip_line();
8894 1 : break;
8895 : }
8896 688 : if (just_chained_a_range_expression) {
8897 : // Throw away `child1` so we don't duplicatively add the second
8898 : // end point of a range as a singleton. See Savannah #67718.
8899 35 : child1 = 0 /* nullptr */;
8900 35 : just_chained_a_range_expression = false;
8901 : }
8902 : }
8903 54 : if (child1 != 0 /* nullptr */) {
8904 36 : if (child1->is_class()) {
8905 0 : if (ci == child1) {
8906 0 : warning(WARN_SYNTAX, "cannot nest character classes");
8907 0 : skip_line();
8908 0 : return;
8909 : }
8910 0 : ci->add_to_class(child1);
8911 : }
8912 : else {
8913 36 : int u1 = child1->get_unicode_mapping();
8914 36 : if (u1 < 0) {
8915 1 : warning(WARN_SYNTAX,
8916 : "invalid character value in class '%1'",
8917 1 : nm.contents());
8918 1 : skip_line();
8919 1 : return;
8920 : }
8921 35 : ci->add_to_class(u1);
8922 : }
8923 35 : child1 = 0 /* nullptr */;
8924 : }
8925 53 : assert(ci != 0 /* nullptr */);
8926 53 : if (ci != 0 /* nullptr */ && !ci->is_class()) {
8927 0 : warning(WARN_SYNTAX,
8928 : "empty class definition for '%1'",
8929 0 : nm.contents());
8930 0 : skip_line();
8931 0 : return;
8932 : }
8933 53 : (void) char_class_dictionary.lookup(nm, ci);
8934 53 : skip_line();
8935 : }
8936 :
8937 : // forward declaration
8938 : static charinfo *get_charinfo_by_index(int n,
8939 : bool suppress_creation = false);
8940 :
8941 1161889 : charinfo *token::get_charinfo(bool required, bool suppress_creation)
8942 : {
8943 1161889 : if (TOKEN_CHAR == type)
8944 383175 : return charset_table[c];
8945 778714 : if ((TOKEN_SPECIAL_CHAR == type)
8946 45036 : || (TOKEN_DELIMITED_SPECIAL_CHAR == type))
8947 733730 : return lookup_charinfo(nm, suppress_creation);
8948 44984 : if (TOKEN_INDEXED_CHAR == type)
8949 24934 : return get_charinfo_by_index(val, suppress_creation);
8950 20050 : if (TOKEN_ESCAPE == type) {
8951 1 : if (escape_char != 0U)
8952 1 : return charset_table[escape_char];
8953 : else {
8954 : // XXX: Is this possible? token::add_to_zero_width_node_list()
8955 : // and token::process() don't add this token type if the escape
8956 : // character is null. If not, this should be an assert(). Also
8957 : // see escape_off_request().
8958 0 : error("escaped 'e' used while escape sequences disabled");
8959 0 : return 0 /* nullptr */;
8960 : }
8961 : }
8962 20049 : if (required) {
8963 0 : if (TOKEN_EOF == type || TOKEN_NEWLINE == type)
8964 0 : warning(WARN_MISSING, "missing ordinary, special, or indexed"
8965 : " character");
8966 : else
8967 0 : error("expected ordinary, special, or indexed character, got %1",
8968 0 : description());
8969 : }
8970 20049 : return 0 /* nullptr */;
8971 : }
8972 :
8973 1798 : charinfo *read_character(/* TODO?: bool required */)
8974 : {
8975 1798 : tok.skip_spaces();
8976 1798 : charinfo *ci = tok.get_charinfo();
8977 : // TODO?: if (required && (0 /* nullptr */ == ci))
8978 1798 : if (0 /* nullptr */ == ci)
8979 517 : tok.diagnose_non_character();
8980 : else
8981 1281 : tok.next();
8982 1798 : return ci;
8983 : }
8984 :
8985 : // this is for \Z
8986 :
8987 4079 : bool token::add_to_zero_width_node_list(node **pp)
8988 : {
8989 4079 : hunits w;
8990 4079 : int s = 0; /* space count, possibly populated by `nspaces()` */
8991 4079 : node *n = 0 /* nullptr */;
8992 4079 : switch (type) {
8993 1777 : case TOKEN_CHAR:
8994 1777 : *pp = (*pp)->add_char(charset_table[c], curenv, &w, &s);
8995 1777 : break;
8996 85 : case TOKEN_DUMMY:
8997 85 : n = new dummy_node;
8998 85 : break;
8999 0 : case TOKEN_ESCAPE:
9000 0 : if (escape_char != 0U)
9001 0 : *pp = (*pp)->add_char(charset_table[escape_char], curenv, &w, &s);
9002 0 : break;
9003 0 : case TOKEN_HYPHEN_INDICATOR:
9004 0 : *pp = (*pp)->add_discretionary_hyphen();
9005 0 : break;
9006 421 : case TOKEN_ITALIC_CORRECTION:
9007 421 : *pp = (*pp)->add_italic_correction(&w);
9008 421 : break;
9009 0 : case TOKEN_LEFT_BRACE:
9010 0 : break;
9011 0 : case TOKEN_MARK_INPUT:
9012 0 : set_register(nm, curenv->get_input_line_position().to_units());
9013 0 : break;
9014 1555 : case TOKEN_NODE:
9015 : case TOKEN_DELIMITED_HORIZONTAL_MOTION:
9016 : case TOKEN_HORIZONTAL_MOTION:
9017 1555 : n = nd;
9018 1555 : nd = 0 /* nullptr */;
9019 1555 : break;
9020 0 : case TOKEN_INDEXED_CHAR:
9021 0 : *pp = (*pp)->add_char(get_charinfo_by_index(val), curenv, &w, &s);
9022 0 : break;
9023 0 : case TOKEN_RIGHT_BRACE:
9024 0 : break;
9025 123 : case TOKEN_SPACE:
9026 123 : n = new hmotion_node(curenv->get_space_width(),
9027 123 : curenv->get_fill_color());
9028 123 : break;
9029 112 : case TOKEN_SPECIAL_CHAR:
9030 : case TOKEN_DELIMITED_SPECIAL_CHAR:
9031 112 : *pp = (*pp)->add_char(lookup_charinfo(nm), curenv, &w, &s);
9032 112 : break;
9033 0 : case TOKEN_STRETCHABLE_SPACE:
9034 0 : n = new unbreakable_space_node(curenv->get_space_width(),
9035 0 : curenv->get_fill_color());
9036 0 : break;
9037 6 : case TOKEN_UNSTRETCHABLE_SPACE:
9038 6 : n = new space_char_hmotion_node(curenv->get_space_width(),
9039 6 : curenv->get_fill_color());
9040 6 : break;
9041 0 : case TOKEN_TRANSPARENT_DUMMY:
9042 0 : n = new transparent_dummy_node;
9043 0 : break;
9044 0 : case TOKEN_ZERO_WIDTH_BREAK:
9045 0 : n = new space_node(H0, curenv->get_fill_color());
9046 0 : n->freeze_space();
9047 0 : n->is_escape_colon();
9048 0 : break;
9049 0 : default:
9050 0 : return false;
9051 : }
9052 4079 : if (n != 0 /* nullptr */) {
9053 1769 : n->next = *pp;
9054 1769 : *pp = n;
9055 : }
9056 4079 : return true;
9057 : }
9058 :
9059 10300002 : void token::process()
9060 : {
9061 10300002 : if (possibly_handle_first_page_transition())
9062 14 : return;
9063 10299988 : switch (type) {
9064 0 : case TOKEN_BACKSPACE:
9065 0 : curenv->add_node(new hmotion_node(-curenv->get_space_width(),
9066 0 : curenv->get_fill_color()));
9067 0 : break;
9068 9261793 : case TOKEN_CHAR:
9069 : // Optimize `curenv->add_char(get_charinfo())` for token type.
9070 9261793 : curenv->add_char(charset_table[c]);
9071 9261793 : break;
9072 78856 : case TOKEN_DUMMY:
9073 78856 : curenv->add_node(new dummy_node);
9074 78856 : break;
9075 0 : case TOKEN_EMPTY:
9076 0 : assert(0 == "unhandled empty token");
9077 : break;
9078 0 : case TOKEN_EOF:
9079 0 : assert(0 == "unhandled end-of-file token");
9080 : break;
9081 5560 : case TOKEN_ESCAPE:
9082 5560 : if (escape_char != 0U)
9083 5560 : curenv->add_char(charset_table[escape_char]);
9084 5560 : break;
9085 0 : case TOKEN_BEGIN_TRAP:
9086 : case TOKEN_END_TRAP:
9087 : case TOKEN_PAGE_EJECTOR:
9088 : // these are all handled in process_input_stack()
9089 0 : break;
9090 82840 : case TOKEN_HYPHEN_INDICATOR:
9091 82840 : curenv->add_hyphen_indicator();
9092 82840 : break;
9093 112600 : case TOKEN_INTERRUPT:
9094 112600 : curenv->interrupt();
9095 112600 : break;
9096 33376 : case TOKEN_ITALIC_CORRECTION:
9097 33376 : curenv->add_italic_correction();
9098 33376 : break;
9099 281 : case TOKEN_LEADER:
9100 281 : curenv->advance_to_tab_stop(true /* use_leader */);
9101 281 : break;
9102 0 : case TOKEN_LEFT_BRACE:
9103 0 : break;
9104 480 : case TOKEN_MARK_INPUT:
9105 480 : set_register(nm, curenv->get_input_line_position().to_units());
9106 480 : break;
9107 0 : case TOKEN_NEWLINE:
9108 0 : curenv->newline();
9109 0 : break;
9110 466323 : case TOKEN_NODE:
9111 : case TOKEN_DELIMITED_HORIZONTAL_MOTION:
9112 : case TOKEN_HORIZONTAL_MOTION:
9113 466323 : curenv->add_node(nd);
9114 466323 : nd = 0 /* nullptr */;
9115 466323 : break;
9116 21640 : case TOKEN_INDEXED_CHAR:
9117 : // Optimize `curenv->add_char(get_charinfo())` for token type.
9118 21640 : curenv->add_char(get_charinfo_by_index(val));
9119 21640 : break;
9120 0 : case TOKEN_REQUEST:
9121 : // handled in process_input_stack()
9122 0 : break;
9123 0 : case TOKEN_RIGHT_BRACE:
9124 0 : break;
9125 94899 : case TOKEN_SPACE:
9126 94899 : curenv->space();
9127 94899 : break;
9128 57122 : case TOKEN_SPECIAL_CHAR:
9129 : case TOKEN_DELIMITED_SPECIAL_CHAR:
9130 : // Optimize `curenv->add_char(get_charinfo())` for token type.
9131 57122 : curenv->add_char(lookup_charinfo(nm));
9132 57122 : break;
9133 16 : case TOKEN_SPREAD:
9134 16 : curenv->spread();
9135 16 : break;
9136 8431 : case TOKEN_STRETCHABLE_SPACE:
9137 16862 : curenv->add_node(new unbreakable_space_node(curenv->get_space_width(),
9138 8431 : curenv->get_fill_color()));
9139 8431 : break;
9140 5771 : case TOKEN_UNSTRETCHABLE_SPACE:
9141 11542 : curenv->add_node(new space_char_hmotion_node(curenv->get_space_width(),
9142 5771 : curenv->get_fill_color()));
9143 5771 : break;
9144 817 : case TOKEN_TAB:
9145 817 : curenv->advance_to_tab_stop();
9146 817 : break;
9147 0 : case TOKEN_TRANSPARENT:
9148 0 : break;
9149 19742 : case TOKEN_TRANSPARENT_DUMMY:
9150 19742 : curenv->add_node(new transparent_dummy_node);
9151 19742 : break;
9152 49441 : case TOKEN_ZERO_WIDTH_BREAK:
9153 : {
9154 49441 : node *tmp = new space_node(H0, curenv->get_fill_color());
9155 49441 : tmp->freeze_space();
9156 49441 : tmp->is_escape_colon();
9157 49441 : curenv->add_node(tmp);
9158 49441 : break;
9159 : }
9160 0 : default:
9161 0 : assert(0 == "unhandled token type");
9162 : }
9163 : }
9164 :
9165 : class nargs_reg : public reg {
9166 : public:
9167 : const char *get_string();
9168 : };
9169 :
9170 725076 : const char *nargs_reg::get_string()
9171 : {
9172 725076 : return i_to_a(input_stack::nargs());
9173 : }
9174 :
9175 : class lineno_reg : public reg {
9176 : public:
9177 : const char *get_string();
9178 : };
9179 :
9180 121 : const char *lineno_reg::get_string()
9181 : {
9182 : int line;
9183 : const char *file;
9184 121 : if (!input_stack::get_location(false /* allow macro */, &file, &line))
9185 3 : line = 0;
9186 242 : return i_to_a(line);
9187 : }
9188 :
9189 : class writable_lineno_reg : public general_reg {
9190 : public:
9191 : writable_lineno_reg();
9192 : void set_value(units);
9193 : bool get_value(units *);
9194 : };
9195 :
9196 1418 : writable_lineno_reg::writable_lineno_reg()
9197 : {
9198 1418 : }
9199 :
9200 1 : bool writable_lineno_reg::get_value(units *res)
9201 : {
9202 : int line;
9203 : const char *file;
9204 1 : if (!input_stack::get_location(false /* allow macro */, &file, &line))
9205 0 : return false;
9206 1 : *res = line;
9207 1 : return true;
9208 : }
9209 :
9210 0 : void writable_lineno_reg::set_value(units n)
9211 : {
9212 0 : (void) input_stack::set_location(0, n);
9213 0 : }
9214 :
9215 : class filename_reg : public reg {
9216 : public:
9217 : const char *get_string();
9218 : };
9219 :
9220 1630 : const char *filename_reg::get_string()
9221 : {
9222 : int line;
9223 : const char *file;
9224 1630 : if (input_stack::get_location(false /* allow macro */, &file, &line))
9225 1312 : return file;
9226 : else
9227 318 : return 0 /* nullptr */;
9228 : }
9229 :
9230 : class break_flag_reg : public reg {
9231 : public:
9232 : const char *get_string();
9233 : };
9234 :
9235 22 : const char *break_flag_reg::get_string()
9236 : {
9237 22 : return i_to_a(input_stack::get_break_flag());
9238 : }
9239 :
9240 : class enclosing_want_att_compat_reg : public reg {
9241 : public:
9242 : const char *get_string();
9243 : };
9244 :
9245 10411 : const char *enclosing_want_att_compat_reg::get_string()
9246 : {
9247 20820 : return i_to_a(want_att_compat_stack.empty() ? 0
9248 20820 : : want_att_compat_stack.top());
9249 : }
9250 :
9251 : class readonly_text_register : public reg {
9252 : const char *s;
9253 : public:
9254 : readonly_text_register(const char *);
9255 : readonly_text_register(int);
9256 : const char *get_string();
9257 : };
9258 :
9259 4254 : readonly_text_register::readonly_text_register(const char *p) : s(p)
9260 : {
9261 4254 : }
9262 :
9263 4254 : readonly_text_register::readonly_text_register(int i)
9264 : {
9265 4254 : s = strdup(i_to_a(i));
9266 4254 : }
9267 :
9268 11950 : const char *readonly_text_register::get_string()
9269 : {
9270 11950 : return s;
9271 : }
9272 :
9273 5672 : readonly_register::readonly_register(int *q) : p(q)
9274 : {
9275 5672 : }
9276 :
9277 7252 : const char *readonly_register::get_string()
9278 : {
9279 7252 : return i_to_a(*p);
9280 : }
9281 :
9282 8508 : readonly_boolean_register::readonly_boolean_register(bool *q): p(q)
9283 : {
9284 8508 : }
9285 :
9286 2138 : const char *readonly_boolean_register::get_string()
9287 : {
9288 2138 : return i_to_a(*p);
9289 : }
9290 :
9291 : class readonly_mask_register : public reg {
9292 : unsigned int *mask;
9293 : public:
9294 : readonly_mask_register(unsigned int *);
9295 : const char *get_string();
9296 : };
9297 :
9298 1418 : readonly_mask_register::readonly_mask_register(unsigned int * m)
9299 1418 : : mask(m)
9300 : {
9301 1418 : }
9302 :
9303 369 : const char *readonly_mask_register::get_string()
9304 : {
9305 369 : return ui_to_a(*mask);
9306 : }
9307 :
9308 14 : void abort_request()
9309 : {
9310 : int c;
9311 14 : if (tok.is_eof())
9312 0 : c = EOF;
9313 14 : else if (tok.is_newline())
9314 13 : c = '\n';
9315 : else {
9316 1 : while ((c = read_char_in_copy_mode(0 /* nullptr */)) == ' ')
9317 : ;
9318 : }
9319 14 : if (!(c == EOF || c == '\n')) {
9320 3 : for (;
9321 4 : (c != '\n') && (c != EOF);
9322 3 : (c = read_char_in_copy_mode(0 /* nullptr */)))
9323 3 : fputs(encode_for_stream_output(c), stderr);
9324 1 : fputc('\n', stderr);
9325 : }
9326 14 : fflush(stderr);
9327 14 : write_any_trailer_and_exit(EXIT_FAILURE);
9328 0 : }
9329 :
9330 : // Consume the rest of the input line in copy mode and return it as a C
9331 : // string; if, after spaces, the argument starts with a `"`, discard it,
9332 : // letting any immediately subsequent spaces populate the returned
9333 : // string.
9334 : //
9335 : // The caller must subsequently call `tok.next()` to advance the input
9336 : // stream pointer.
9337 : //
9338 : // The caller has responsibility for `delete`ing the returned buffer.
9339 16796 : char *read_rest_of_line_as_argument()
9340 : {
9341 16796 : int buf_size = 256;
9342 16796 : char *s = new char[buf_size]; // C++03: new char[buf_size]();
9343 16796 : (void) memset(s, 0, (buf_size * sizeof(char)));
9344 16796 : int c = read_char_in_copy_mode(0 /* nullptr */);
9345 16796 : while (' ' == c)
9346 0 : c = read_char_in_copy_mode(0 /* nullptr */);
9347 16796 : if ('"' == c)
9348 1391 : c = read_char_in_copy_mode(0 /* nullptr */);
9349 16796 : int i = 0;
9350 209619 : while ((c != '\n') && (c != EOF)) {
9351 192823 : if (!is_invalid_input_char(c)) {
9352 192823 : if ((i + 2) > buf_size) {
9353 0 : char *tem = s;
9354 0 : s = new char[buf_size * 2]; // C++03: new char[buf_size * 2]();
9355 0 : (void) memset(s, 0, (buf_size * 2 * sizeof(char)));
9356 0 : memcpy(s, tem, buf_size);
9357 0 : buf_size *= 2;
9358 0 : delete[] tem;
9359 : }
9360 192823 : s[i++] = c;
9361 : }
9362 192823 : c = read_char_in_copy_mode(0 /* nullptr */);
9363 : }
9364 16796 : s[i] = '\0';
9365 16796 : if (0 == i) {
9366 3 : delete[] s;
9367 3 : return 0 /* nullptr */;
9368 : }
9369 16793 : return s;
9370 : }
9371 :
9372 2 : void pipe_output()
9373 : {
9374 2 : if (!has_arg(true /* peek */)) {
9375 0 : warning(WARN_MISSING, "output piping request expects a system"
9376 : " command as argument");
9377 0 : skip_line();
9378 0 : return;
9379 : }
9380 2 : if (!want_unsafe_requests) {
9381 0 : error("output piping request is not allowed in safer mode");
9382 0 : skip_line();
9383 0 : return;
9384 : }
9385 2 : if (the_output != 0 /* nullptr */) {
9386 0 : error("cannot honor pipe request: output already started");
9387 0 : skip_line();
9388 0 : return;
9389 : }
9390 2 : char *pc = read_rest_of_line_as_argument();
9391 : // `has_arg()` should have ensured that this pointer is non-null.
9392 2 : assert(pc != 0 /* nullptr */);
9393 2 : if (0 /* nullptr */ == pc)
9394 0 : error("cannot apply pipe request to empty command");
9395 : // Are we adding to an existing pipeline?
9396 2 : if (pipe_command != 0 /* nullptr */) {
9397 : // ISO C++ does not permit VLAs on the stack.
9398 : // C++03: new char[strlen(pipe_command) + strlen(pc) + 1 + 1]();
9399 0 : char *s = new char[strlen(pipe_command) + strlen(pc) + 1 + 1];
9400 0 : (void) memset(s, 0, ((strlen(pipe_command) + strlen(pc) + 1 + 1)
9401 : * sizeof(char)));
9402 0 : strcpy(s, pipe_command);
9403 0 : strcat(s, "|");
9404 0 : strcat(s, pc);
9405 0 : delete[] pipe_command;
9406 0 : delete[] pc;
9407 0 : pipe_command = s;
9408 : }
9409 : else
9410 2 : pipe_command = pc;
9411 2 : delete[] pc;
9412 2 : tok.next();
9413 : }
9414 :
9415 : static int system_status;
9416 :
9417 8 : void system_request()
9418 : {
9419 8 : if (!has_arg(true /* peek */)) {
9420 0 : warning(WARN_MISSING, "system command execution request expects a"
9421 : " system command as argument");
9422 0 : skip_line();
9423 0 : return;
9424 : }
9425 8 : if (!want_unsafe_requests) {
9426 2 : error("system command execution request is not allowed in safer"
9427 : " mode");
9428 2 : skip_line();
9429 2 : return;
9430 : }
9431 6 : char *command = read_rest_of_line_as_argument();
9432 : // `has_arg()` should have ensured that this pointer is non-null.
9433 6 : assert(command != 0 /* nullptr */);
9434 6 : if (0 /* nullptr */ == command)
9435 0 : error("cannot apply system request to empty command");
9436 : else
9437 6 : system_status = system(command);
9438 6 : delete[] command;
9439 6 : tok.next();
9440 : }
9441 :
9442 15 : static void unsafe_transparent_throughput_file_request()
9443 : {
9444 15 : if (!has_arg(true /* peek */)) {
9445 0 : warning(WARN_MISSING, "file throughput request expects a file name"
9446 : " as argument");
9447 0 : skip_line();
9448 0 : return;
9449 : }
9450 15 : if (!want_unsafe_requests) {
9451 14 : error("file throughput request is not allowed in safer mode");
9452 14 : skip_line();
9453 14 : return;
9454 : }
9455 1 : if ((curdiv == topdiv) && (topdiv->before_first_page_status > 0)) {
9456 0 : handle_initial_request(COPY_FILE_REQUEST);
9457 0 : return;
9458 : }
9459 1 : char *filename = read_rest_of_line_as_argument();
9460 1 : if (was_invoked_with_regular_control_character)
9461 1 : curenv->do_break();
9462 1 : if (filename != 0 /* nullptr */)
9463 1 : curdiv->copy_file(filename);
9464 : // TODO: Add `filename` to file name set.
9465 1 : tok.next();
9466 : }
9467 :
9468 : #ifdef COLUMN
9469 :
9470 : void vjustify()
9471 : {
9472 : if (!has_arg()) {
9473 : warning(WARN_MISSING, "vertical adjustment request expects an"
9474 : " argument");
9475 : skip_line();
9476 : return;
9477 : }
9478 : if (curdiv == topdiv && topdiv->before_first_page) {
9479 : handle_initial_request(VJUSTIFY_REQUEST);
9480 : return;
9481 : }
9482 : symbol type = read_long_identifier(true /* required */);
9483 : if (!type.is_null())
9484 : curdiv->vjustify(type);
9485 : skip_line();
9486 : }
9487 :
9488 : #endif /* COLUMN */
9489 :
9490 4 : static void transparent_throughput_file_request()
9491 : {
9492 4 : if (!has_arg(true /* peek */)) {
9493 0 : warning(WARN_MISSING, "transparent file throughput request expects"
9494 : " a file name as argument");
9495 0 : skip_line();
9496 0 : return;
9497 : }
9498 4 : if ((curdiv == topdiv) && (topdiv->before_first_page_status > 0)) {
9499 0 : handle_initial_request(TRANSPARENT_FILE_REQUEST);
9500 0 : return;
9501 : }
9502 4 : char *filename = read_rest_of_line_as_argument();
9503 4 : if (was_invoked_with_regular_control_character)
9504 3 : curenv->do_break();
9505 4 : if (filename != 0 /* nullptr */) {
9506 4 : errno = 0;
9507 4 : FILE *fp = include_search_path.open_file_cautiously(filename);
9508 4 : if (0 /* nullptr */ == fp)
9509 0 : error("cannot open '%1': %2", filename, strerror(errno));
9510 : else {
9511 4 : if (curdiv != topdiv)
9512 2 : curdiv->copy_file(filename);
9513 : else {
9514 2 : bool reading_beginning_of_input_line = true;
9515 : for (;;) {
9516 66 : int c = getc(fp);
9517 66 : if (c == EOF)
9518 2 : break;
9519 64 : if (is_invalid_input_char(c))
9520 0 : warning(WARN_INPUT, "invalid input character code %1",
9521 0 : int(c));
9522 : else {
9523 64 : curdiv->transparent_output(c);
9524 64 : reading_beginning_of_input_line = c == '\n';
9525 : }
9526 64 : }
9527 2 : if (!reading_beginning_of_input_line)
9528 0 : curdiv->transparent_output('\n');
9529 2 : fclose(fp);
9530 : }
9531 : }
9532 : }
9533 4 : tok.next();
9534 : }
9535 :
9536 : class page_range {
9537 : int first;
9538 : int last;
9539 : public:
9540 : page_range *next;
9541 : page_range(int, int, page_range *);
9542 : int contains(int n);
9543 : };
9544 :
9545 0 : page_range::page_range(int i, int j, page_range *p)
9546 0 : : first(i), last(j), next(p)
9547 : {
9548 0 : }
9549 :
9550 0 : int page_range::contains(int n)
9551 : {
9552 0 : return n >= first && (last <= 0 || n <= last);
9553 : }
9554 :
9555 : page_range *output_page_list = 0 /* nullptr */;
9556 :
9557 2655 : bool in_output_page_list(int n)
9558 : {
9559 2655 : if (!output_page_list)
9560 2655 : return true;
9561 0 : for (page_range *p = output_page_list;
9562 0 : p != 0 /* nullptr */;
9563 0 : p = p->next)
9564 0 : if (p->contains(n))
9565 0 : return true;
9566 0 : return false;
9567 : }
9568 :
9569 0 : static void parse_output_page_list(const char *p)
9570 : {
9571 0 : const char *pstart = p; // for diagnostic message
9572 : for (;;) {
9573 : int i;
9574 0 : if (*p == '-')
9575 0 : i = 1;
9576 0 : else if (csdigit(*p)) {
9577 0 : i = 0;
9578 0 : do
9579 0 : i = i*10 + *p++ - '0';
9580 0 : while (csdigit(*p));
9581 : }
9582 : else
9583 0 : break;
9584 : int j;
9585 0 : if (*p == '-') {
9586 0 : p++;
9587 0 : j = 0;
9588 0 : if (csdigit(*p)) {
9589 0 : do
9590 0 : j = j*10 + *p++ - '0';
9591 0 : while (csdigit(*p));
9592 : }
9593 : }
9594 : else
9595 0 : j = i;
9596 0 : if (j == 0)
9597 0 : last_page_number = -1;
9598 0 : else if (last_page_number >= 0 && j > last_page_number)
9599 0 : last_page_number = j;
9600 0 : output_page_list = new page_range(i, j, output_page_list);
9601 0 : if (*p != ',')
9602 0 : break;
9603 0 : ++p;
9604 0 : }
9605 0 : if (*p != '\0') {
9606 0 : error("ignoring invalid output page list argument '%1'", pstart);
9607 0 : output_page_list = 0 /* nullptr */;
9608 : }
9609 0 : }
9610 :
9611 509 : static FILE *open_macro_package(const char *mac, char **path)
9612 : {
9613 : // Try `mac`.tmac first, then tmac.`mac`. Expect ENOENT errors.
9614 : // ISO C++ does not permit VLAs on the stack.
9615 : // C++03: new char[strlen(mac) + strlen(MACRO_POSTFIX) + 1]();
9616 509 : char *s1 = new char[strlen(mac) + strlen(MACRO_POSTFIX) + 1];
9617 509 : (void) memset(s1, 0, ((strlen(mac) + strlen(MACRO_POSTFIX) + 1)
9618 : * sizeof(char)));
9619 509 : strcpy(s1, mac);
9620 509 : strcat(s1, MACRO_POSTFIX);
9621 509 : FILE *fp = mac_path->open_file(s1, path);
9622 509 : if ((0 /* nullptr */ == fp) && (ENOENT != errno))
9623 0 : error("cannot open macro file '%1': %2", s1, strerror(errno));
9624 509 : delete[] s1;
9625 509 : if (0 /* nullptr */ == fp) {
9626 : // ISO C++ does not permit VLAs on the stack.
9627 : // C++03: new char[strlen(mac) + strlen(MACRO_PREFIX) + 1]();
9628 0 : char *s2 = new char[strlen(mac) + strlen(MACRO_PREFIX) + 1];
9629 0 : (void) memset(s2, 0, ((strlen(mac) + strlen(MACRO_PREFIX) + 1)
9630 : * sizeof(char)));
9631 0 : strcpy(s2, MACRO_PREFIX);
9632 0 : strcat(s2, mac);
9633 0 : fp = mac_path->open_file(s2, path);
9634 0 : if ((0 /* nullptr */ == fp) && (ENOENT != errno))
9635 0 : error("cannot open macro file '%1': %2", s2, strerror(errno));
9636 0 : delete[] s2;
9637 : }
9638 509 : return fp;
9639 : }
9640 :
9641 509 : static void process_macro_package_argument(const char *mac)
9642 : {
9643 : char *path;
9644 509 : FILE *fp = open_macro_package(mac, &path);
9645 509 : if (0 /* nullptr */ == fp)
9646 0 : fatal("cannot open macro file named in '-m' command-line argument"
9647 0 : " '%1': %2", mac, strerror(errno));
9648 509 : const char *s = symbol(path).contents();
9649 509 : free(path);
9650 509 : input_stack::push(new file_iterator(fp, s));
9651 509 : tok.next();
9652 509 : process_input_stack();
9653 509 : }
9654 :
9655 2836 : static void process_startup_file(const char *filename)
9656 : {
9657 : char *path;
9658 2836 : search_path *orig_mac_path = mac_path;
9659 2836 : mac_path = &config_macro_path;
9660 2836 : FILE *fp = mac_path->open_file(filename, &path);
9661 2836 : if (fp != 0 /* nullptr */) {
9662 2836 : input_stack::push(new file_iterator(fp, symbol(path).contents()));
9663 2836 : free(path);
9664 2836 : tok.next();
9665 2836 : process_input_stack();
9666 : }
9667 0 : else if (errno != ENOENT)
9668 0 : error("cannot open startup file '%1': %2", filename,
9669 0 : strerror(errno));
9670 2836 : mac_path = orig_mac_path;
9671 2836 : }
9672 :
9673 12287 : void do_macro_source(bool quietly)
9674 : {
9675 12287 : char *macro_filename = read_rest_of_line_as_argument();
9676 : char *path;
9677 12287 : FILE *fp = mac_path->open_file(macro_filename, &path);
9678 12287 : if (fp != 0 /* nullptr */) {
9679 12231 : input_stack::push(new file_iterator(fp, macro_filename));
9680 12231 : free(path);
9681 : }
9682 : else
9683 : // Suppress diagnostic only if we're operating quietly and it's an
9684 : // expected problem.
9685 56 : if (!quietly && (ENOENT == errno))
9686 0 : warning(WARN_FILE, "cannot open macro file '%1': %2",
9687 0 : macro_filename, strerror(errno));
9688 : // TODO: Add `macro_filename` to file name set.
9689 12287 : tok.next();
9690 12287 : }
9691 :
9692 12000 : void macro_source_request() // .mso
9693 : {
9694 12000 : if (!has_arg(true /* peek */)) {
9695 0 : warning(WARN_MISSING, "macro file sourcing request expects an"
9696 : " argument");
9697 0 : skip_line();
9698 0 : return;
9699 : }
9700 12000 : do_macro_source(false /* quietly */ );
9701 : }
9702 :
9703 : // like .mso, but silently ignore files that can't be opened due to
9704 : // their nonexistence
9705 287 : void macro_source_quietly_request() // .msoquiet
9706 : {
9707 287 : if (!has_arg(true /* peek */)) {
9708 0 : warning(WARN_MISSING, "quiet macro file sourcing request expects an"
9709 : " argument");
9710 0 : skip_line();
9711 0 : return;
9712 : }
9713 287 : do_macro_source(true /* quietly */ );
9714 : }
9715 :
9716 1422 : static void process_input_file(const char *name)
9717 : {
9718 : FILE *fp;
9719 1422 : if (strcmp(name, "-") == 0) {
9720 1396 : clearerr(stdin);
9721 1396 : fp = stdin;
9722 : }
9723 : else {
9724 26 : errno = 0;
9725 26 : fp = include_search_path.open_file_cautiously(name);
9726 26 : if (0 /* nullptr */ == fp)
9727 0 : fatal("cannot open '%1': %2", name, strerror(errno));
9728 : }
9729 1422 : input_stack::push(new file_iterator(fp, name));
9730 1422 : tok.next();
9731 1422 : process_input_stack();
9732 1407 : }
9733 :
9734 : // make sure the_input is empty before calling this
9735 :
9736 186 : static int evaluate_expression(const char *expr, units *res)
9737 : {
9738 186 : input_stack::push(make_temp_iterator(expr));
9739 186 : tok.next();
9740 : // TODO: grochar
9741 186 : int success = read_measurement(res, (unsigned char)('u'));
9742 186 : while (input_stack::get(0 /* nullptr */) != EOF)
9743 : ;
9744 186 : return success;
9745 : }
9746 :
9747 186 : static void do_register_assignment(const char *s)
9748 : {
9749 186 : const char *p = strchr(s, '=');
9750 186 : if (!p) {
9751 : char buf[2];
9752 75 : buf[0] = s[0];
9753 75 : buf[1] = 0;
9754 : units n;
9755 75 : if (evaluate_expression(s + 1, &n))
9756 75 : set_register(buf, n);
9757 : }
9758 : else {
9759 : // ISO C++ does not permit VLAs on the stack.
9760 111 : char *buf = new char[p - s + 1]; // C++03: new char[p - s + 1]();
9761 111 : (void) memset(buf, 0, ((p - s + 1) * sizeof(char)));
9762 111 : (void) memcpy(buf, s, p - s);
9763 111 : buf[p - s] = 0;
9764 : units n;
9765 111 : if (evaluate_expression(p + 1, &n))
9766 111 : set_register(buf, n);
9767 111 : delete[] buf;
9768 : }
9769 186 : }
9770 :
9771 1526 : static void set_string(const char *name, const char *value)
9772 : {
9773 1526 : macro *m = new macro;
9774 8676 : for (const char *p = value; *p != 0 /* nullptr */; p++)
9775 7150 : if (!is_invalid_input_char(static_cast<unsigned char>(*p)))
9776 7150 : m->append(*p);
9777 1526 : request_dictionary.define(name, m);
9778 1526 : }
9779 :
9780 108 : static void do_string_assignment(const char *s)
9781 : {
9782 108 : const char *p = strchr(s, '=');
9783 108 : if (!p) {
9784 : char buf[2];
9785 1 : buf[0] = s[0];
9786 1 : buf[1] = 0;
9787 1 : set_string(buf, s + 1);
9788 : }
9789 : else {
9790 : // ISO C++ does not permit VLAs on the stack.
9791 107 : char *buf = new char[p - s + 1]; // C++03: new char[p - s + 1]();
9792 107 : (void) memset(buf, 0, ((p - s + 1) * sizeof(char)));
9793 107 : (void) memcpy(buf, s, p - s);
9794 107 : buf[p - s] = 0;
9795 107 : set_string(buf, p + 1);
9796 107 : delete[] buf;
9797 : }
9798 108 : }
9799 :
9800 : struct string_list {
9801 : const char *s;
9802 : string_list *next;
9803 805 : string_list(const char *ss) : s(ss), next(0) {}
9804 : };
9805 :
9806 : #if 0
9807 : static void prepend_string(const char *s, string_list **p)
9808 : {
9809 : string_list *l = new string_list(s);
9810 : l->next = *p;
9811 : *p = l;
9812 : }
9813 : #endif
9814 :
9815 944 : static void add_string(const char *s, string_list **p)
9816 : {
9817 944 : while (*p)
9818 139 : p = &((*p)->next);
9819 805 : *p = new string_list(s);
9820 805 : }
9821 :
9822 0 : void usage(FILE *stream, const char *prog)
9823 : {
9824 0 : fprintf(stream,
9825 : "usage: %s [-abcCEiRSUz] [-d ctext] [-d string=text] [-f font-family]"
9826 : " [-F font-directory] [-I inclusion-directory] [-m macro-package]"
9827 : " [-M macro-directory] [-n page-number] [-o page-list]"
9828 : " [-r cnumeric-expression] [-r register=numeric-expression]"
9829 : " [-T output-device] [-w warning-category] [-W warning-category]"
9830 : " [file ...]\n"
9831 : "usage: %s {-v | --version}\n"
9832 : "usage: %s --help\n",
9833 : prog, prog, prog);
9834 0 : if (stdout == stream)
9835 0 : fputs(
9836 : "\n"
9837 : "GNU troff transforms groff(7) language input into the device-\n"
9838 : "independent page description language detailed in groff_out(5); it\n"
9839 : "is the heart of the GNU roff document formatting system. Many\n"
9840 : "people prefer to use the groff(1) command, a front end that also\n"
9841 : "runs preprocessors and output drivers in the appropriate order and\n"
9842 : "with appropriate options. See the troff(1) manual page.\n",
9843 : stream);
9844 0 : }
9845 :
9846 1420 : int main(int argc, char **argv)
9847 : {
9848 1420 : program_name = argv[0];
9849 : static char stderr_buf[BUFSIZ];
9850 1420 : setbuf(stderr, stderr_buf);
9851 : int c;
9852 1420 : string_list *macros = 0 /* nullptr */;
9853 1420 : string_list *register_assignments = 0 /* nullptr */;
9854 1420 : string_list *string_assignments = 0 /* nullptr */;
9855 1420 : bool want_stdin_read_last = false;
9856 1420 : bool have_explicit_device_argument = false;
9857 1420 : bool have_explicit_default_family = false;
9858 1420 : bool have_explicit_first_page_number = false;
9859 1420 : bool want_startup_macro_files_skipped = false;
9860 1420 : bool is_safer_mode_locked = false; // made true if `-S` explicit
9861 1420 : int next_page_number = 0; // pacify compiler
9862 1420 : hresolution = vresolution = 1;
9863 1420 : if (getenv("GROFF_DUMP_NODES") != 0 /* nullptr */)
9864 0 : want_nodes_dumped = true;
9865 : // restore $PATH if called from groff
9866 1420 : char* groff_path = getenv("GROFF_PATH__");
9867 1420 : if (groff_path != 0 /* nullptr */) {
9868 2840 : string e = "PATH";
9869 1420 : e += '=';
9870 1420 : if (*groff_path)
9871 1420 : e += groff_path;
9872 1420 : e += '\0';
9873 1420 : if (putenv(strsave(e.contents())) != 0)
9874 0 : fatal("cannot update process environment: %1", strerror(errno));
9875 : }
9876 1420 : setlocale(LC_CTYPE, "");
9877 : static const struct option long_options[] = {
9878 : { "help", no_argument, 0 /* nullptr */, CHAR_MAX + 1 },
9879 : { "version", no_argument, 0 /* nullptr */, 'v' },
9880 : { 0, 0, 0, 0 }
9881 : };
9882 : #if defined(DEBUGGING)
9883 : #define DEBUG_OPTION "D"
9884 : #else
9885 : #define DEBUG_OPTION ""
9886 : #endif
9887 5145 : while ((c = getopt_long(argc, argv,
9888 : ":abcCd:Ef:F:iI:m:M:n:o:qr:Rs:StT:Uvw:W:z"
9889 : DEBUG_OPTION,
9890 : long_options, 0 /* nullptr */))
9891 5145 : != EOF)
9892 3727 : switch (c) {
9893 2 : case 'v':
9894 : {
9895 2 : printf("GNU troff (groff) version %s\n", Version_string);
9896 2 : exit(EXIT_SUCCESS);
9897 : break;
9898 : }
9899 26 : case 'I':
9900 : // Search path for .psbb files
9901 : // and most other non-system input files.
9902 26 : include_search_path.command_line_dir(optarg);
9903 26 : break;
9904 1418 : case 'T':
9905 1418 : device = optarg;
9906 1418 : have_explicit_device_argument = true;
9907 1418 : is_writing_html = (strcmp(device, "html") == 0);
9908 1418 : break;
9909 317 : case 'C':
9910 317 : want_att_compat = true;
9911 : // fall through
9912 317 : case 'c':
9913 317 : permit_color_output = false;
9914 317 : break;
9915 146 : case 'M':
9916 146 : macro_path.command_line_dir(optarg);
9917 146 : safer_macro_path.command_line_dir(optarg);
9918 146 : config_macro_path.command_line_dir(optarg);
9919 146 : break;
9920 0 : case 'F':
9921 0 : font::command_line_font_dir(optarg);
9922 0 : break;
9923 511 : case 'm':
9924 511 : add_string(optarg, ¯os);
9925 511 : break;
9926 0 : case 'E':
9927 0 : want_errors_inhibited = true;
9928 0 : break;
9929 0 : case 'R':
9930 0 : want_startup_macro_files_skipped = true;
9931 0 : break;
9932 749 : case 'w':
9933 749 : enable_warning(optarg);
9934 749 : break;
9935 29 : case 'W':
9936 29 : disable_warning(optarg);
9937 29 : break;
9938 0 : case 'i':
9939 0 : want_stdin_read_last = true;
9940 0 : break;
9941 79 : case 'b':
9942 79 : want_backtraces = true;
9943 79 : break;
9944 47 : case 'a':
9945 47 : want_abstract_output = true;
9946 47 : break;
9947 87 : case 'z':
9948 87 : want_output_suppressed = true;
9949 87 : break;
9950 0 : case 'n':
9951 0 : if (sscanf(optarg, "%d", &next_page_number) == 1)
9952 0 : have_explicit_first_page_number = true;
9953 : else
9954 0 : error("bad page number");
9955 0 : break;
9956 0 : case 'o':
9957 0 : parse_output_page_list(optarg);
9958 0 : break;
9959 108 : case 'd':
9960 108 : if (*optarg == '\0')
9961 0 : error("'-d' requires non-empty argument");
9962 108 : else if (*optarg == '=')
9963 0 : error("malformed argument to '-d'; string name cannot be empty"
9964 : " or contain an equals sign");
9965 : else
9966 108 : add_string(optarg, &string_assignments);
9967 108 : break;
9968 186 : case 'r':
9969 186 : if (*optarg == '\0')
9970 0 : error("'-r' requires non-empty argument");
9971 186 : else if (*optarg == '=')
9972 0 : error("malformed argument to '-r'; register name cannot be"
9973 : " empty or contain an equals sign");
9974 : else
9975 186 : add_string(optarg, ®ister_assignments);
9976 186 : break;
9977 0 : case 'f':
9978 0 : default_family = symbol(optarg);
9979 0 : have_explicit_default_family = true;
9980 0 : break;
9981 0 : case 'q':
9982 : case 's':
9983 : case 't':
9984 : // silently ignore these
9985 0 : break;
9986 1 : case 'S':
9987 1 : want_unsafe_requests = false;
9988 1 : is_safer_mode_locked = true;
9989 1 : break;
9990 21 : case 'U':
9991 21 : if (is_safer_mode_locked)
9992 0 : error("ignoring '-U' option; '-S' already specified");
9993 : else
9994 21 : want_unsafe_requests = true;
9995 21 : break;
9996 : #if defined(DEBUGGING)
9997 : case 'D':
9998 : want_html_debugging = true;
9999 : break;
10000 : #endif
10001 0 : case CHAR_MAX + 1: // --help
10002 0 : usage(stdout, argv[0]);
10003 0 : exit(EXIT_SUCCESS);
10004 : break;
10005 0 : case '?':
10006 0 : if (optopt != 0)
10007 0 : error("unrecognized command-line option '%1'", char(optopt));
10008 : else
10009 0 : error("unrecognized command-line option '%1'",
10010 0 : argv[(optind - 1)]);
10011 0 : usage(stderr, argv[0]);
10012 0 : exit(2);
10013 : break; // never reached
10014 0 : case ':':
10015 0 : error("command-line option '%1' requires an argument",
10016 0 : char(optopt));
10017 0 : usage(stderr, argv[0]);
10018 0 : exit(2);
10019 : break; // never reached
10020 0 : default:
10021 0 : assert(0 == "unhandled case of command-line option");
10022 : }
10023 1418 : if (want_unsafe_requests)
10024 21 : mac_path = ¯o_path;
10025 1418 : set_string(".T", device);
10026 : // TODO: Kill this off in groff 1.24.0 release + 2 years. See env.cpp.
10027 1418 : if ((strcmp("pdf", device) == 0) || strcmp("ps", device) == 0)
10028 199 : is_device_ps_or_pdf = true;
10029 1418 : init_charset_table();
10030 1418 : init_hpf_code_table();
10031 1418 : if (0 /* nullptr */ == font::load_desc())
10032 0 : fatal("cannot load 'DESC' description file for device '%1'",
10033 0 : device);
10034 1418 : units_per_inch = font::res;
10035 1418 : hresolution = font::hor;
10036 1418 : vresolution = font::vert;
10037 1418 : sizescale = font::sizescale;
10038 1418 : device_has_tcommand = font::has_tcommand;
10039 1418 : warn_scale = (double) units_per_inch;
10040 1418 : warn_scaling_unit = 'i';
10041 1418 : if (!have_explicit_default_family && (font::family != 0 /* nullptr */)
10042 238 : && *font::family != '\0')
10043 238 : default_family = symbol(font::family);
10044 1418 : font_size::init_size_list(font::sizes);
10045 : int i;
10046 1418 : int j = 1;
10047 1418 : if (font::style_table)
10048 1390 : for (i = 0; font::style_table[i] != 0 /* nullptr */; i++)
10049 : // Mounting a style can't actually fail due to a bad style name;
10050 : // that's not determined until the full font name is resolved.
10051 : // The DESC file also can't provoke a problem by requesting over a
10052 : // thousand slots in the style table.
10053 1112 : if (!mount_style(j++, symbol(font::style_table[i])))
10054 0 : warning(WARN_FONT, "cannot mount style '%1' directed by 'DESC'"
10055 0 : " file for device '%2'", font::style_table[i], device);
10056 9764 : for (i = 0; font::font_name_table[i] != 0 /* nullptr */; i++, j++)
10057 : // In the DESC file, a font name of 0 (zero) means "leave this
10058 : // position empty".
10059 8346 : if (strcmp(font::font_name_table[i], "0") != 0)
10060 6716 : if (!mount_font(j, symbol(font::font_name_table[i])))
10061 0 : warning(WARN_FONT, "cannot mount font '%1' directed by 'DESC'"
10062 0 : " file for device '%2'", font::font_name_table[i],
10063 0 : device);
10064 1418 : curdiv = topdiv = new top_level_diversion;
10065 1418 : if (have_explicit_first_page_number)
10066 0 : topdiv->set_next_page_number(next_page_number);
10067 1418 : init_input_requests();
10068 1418 : init_env_requests();
10069 1418 : init_div_requests();
10070 : #ifdef COLUMN
10071 : init_column_requests();
10072 : #endif /* COLUMN */
10073 1418 : init_node_requests();
10074 1418 : register_dictionary.define(".T",
10075 1418 : new readonly_boolean_register(&have_explicit_device_argument));
10076 1418 : init_registers();
10077 1418 : init_reg_requests();
10078 1418 : init_hyphenation_pattern_requests();
10079 1418 : init_environments();
10080 1526 : while (string_assignments != 0 /* nullptr */) {
10081 108 : do_string_assignment(string_assignments->s);
10082 108 : string_list *tem = string_assignments;
10083 108 : string_assignments = string_assignments->next;
10084 108 : delete tem;
10085 : }
10086 1604 : while (register_assignments != 0 /* nullptr */) {
10087 186 : do_register_assignment(register_assignments->s);
10088 186 : string_list *tem = register_assignments;
10089 186 : register_assignments = register_assignments->next;
10090 186 : delete tem;
10091 : }
10092 1418 : if (!want_startup_macro_files_skipped)
10093 1418 : process_startup_file(INITIAL_STARTUP_FILE);
10094 1927 : while (macros != 0 /* nullptr */) {
10095 509 : process_macro_package_argument(macros->s);
10096 509 : string_list *tem = macros;
10097 509 : macros = macros->next;
10098 509 : delete tem;
10099 : }
10100 1418 : if (!want_startup_macro_files_skipped)
10101 1418 : process_startup_file(FINAL_STARTUP_FILE);
10102 1448 : for (i = optind; i < argc; i++)
10103 30 : process_input_file(argv[i]);
10104 1418 : if (optind >= argc || want_stdin_read_last)
10105 1392 : process_input_file("-");
10106 1403 : exit_troff();
10107 0 : return 0; // not reached
10108 : }
10109 :
10110 246 : void set_warning_mask_request()
10111 : {
10112 : int n;
10113 246 : if (has_arg() && read_integer(&n)) {
10114 246 : if (n & ~WARN_MAX) {
10115 0 : warning(WARN_RANGE, "warning mask must be in range 0..%1, got %2",
10116 0 : WARN_MAX, n);
10117 0 : n &= WARN_MAX;
10118 : }
10119 246 : warning_mask = n;
10120 : }
10121 : else
10122 0 : warning_mask = WARN_MAX;
10123 246 : skip_line();
10124 246 : }
10125 :
10126 1418 : static void init_registers()
10127 : {
10128 1418 : struct tm *t = current_time();
10129 1418 : set_register("seconds", int(t->tm_sec));
10130 1418 : set_register("minutes", int(t->tm_min));
10131 1418 : set_register("hours", int(t->tm_hour));
10132 1418 : set_register("dw", int(t->tm_wday + 1));
10133 1418 : set_register("dy", int(t->tm_mday));
10134 1418 : set_register("mo", int(t->tm_mon + 1));
10135 1418 : set_register("year", int(1900 + t->tm_year));
10136 1418 : set_register("yr", int(t->tm_year));
10137 1418 : set_register("$$", getpid());
10138 1418 : register_dictionary.define(".A",
10139 1418 : new readonly_text_register(want_abstract_output));
10140 1418 : }
10141 :
10142 : /*
10143 : * registers associated with \O
10144 : */
10145 :
10146 : static int output_reg_minx_contents = -1;
10147 : static int output_reg_miny_contents = -1;
10148 : static int output_reg_maxx_contents = -1;
10149 : static int output_reg_maxy_contents = -1;
10150 :
10151 6560879 : void check_output_limits(int x, int y)
10152 : {
10153 6560879 : if ((output_reg_minx_contents == -1) || (x < output_reg_minx_contents))
10154 1360 : output_reg_minx_contents = x;
10155 6560879 : if (x > output_reg_maxx_contents)
10156 21327 : output_reg_maxx_contents = x;
10157 6560879 : if ((output_reg_miny_contents == -1) || (y < output_reg_miny_contents))
10158 2366 : output_reg_miny_contents = y;
10159 6560879 : if (y > output_reg_maxy_contents)
10160 27494 : output_reg_maxy_contents = y;
10161 6560879 : }
10162 :
10163 117 : void reset_output_registers()
10164 : {
10165 117 : output_reg_minx_contents = -1;
10166 117 : output_reg_miny_contents = -1;
10167 117 : output_reg_maxx_contents = -1;
10168 117 : output_reg_maxy_contents = -1;
10169 117 : }
10170 :
10171 1418 : void init_input_requests()
10172 : {
10173 1418 : init_request("ab", abort_request);
10174 1418 : init_request("als", alias_macro);
10175 1418 : init_request("am", append_macro);
10176 1418 : init_request("am1", append_nocomp_macro);
10177 1418 : init_request("ami", append_indirect_macro);
10178 1418 : init_request("ami1", append_indirect_nocomp_macro);
10179 1418 : init_request("as", append_string);
10180 1418 : init_request("as1", append_nocomp_string);
10181 1418 : init_request("asciify", asciify_request);
10182 1418 : init_request("backtrace", backtrace_request);
10183 1418 : init_request("blm", blank_line_macro);
10184 1418 : init_request("break", while_break_request);
10185 1418 : init_request("cc", assign_control_character_request);
10186 1418 : init_request("c2", assign_no_break_control_character_request);
10187 1418 : init_request("cf", unsafe_transparent_throughput_file_request);
10188 1418 : init_request("cflags", set_character_flags_request);
10189 1418 : init_request("char", define_character_request);
10190 1418 : init_request("chop", chop_macro);
10191 1418 : init_request("class", define_class_request);
10192 1418 : init_request("close", close_request);
10193 1418 : init_request("color", activate_color);
10194 1418 : init_request("composite", map_composite_character);
10195 1418 : init_request("continue", while_continue_request);
10196 1418 : init_request("cp", compatible);
10197 1418 : init_request("de", define_macro);
10198 1418 : init_request("de1", define_nocomp_macro);
10199 1418 : init_request("defcolor", define_color);
10200 1418 : init_request("dei", define_indirect_macro);
10201 1418 : init_request("dei1", define_indirect_nocomp_macro);
10202 1418 : init_request("device", device_request);
10203 1418 : init_request("devicem", device_macro_request);
10204 1418 : init_request("do", do_request);
10205 1418 : init_request("ds", define_string);
10206 1418 : init_request("ds1", define_nocomp_string);
10207 1418 : init_request("ec", assign_escape_character_request);
10208 1418 : init_request("ecr", restore_escape_char_request);
10209 1418 : init_request("ecs", save_escape_char_request);
10210 1418 : init_request("el", else_request);
10211 1418 : init_request("em", eoi_macro);
10212 1418 : init_request("eo", escape_off_request);
10213 1418 : init_request("ex", exit_request);
10214 1418 : init_request("fchar", define_fallback_character_request);
10215 : #ifdef WIDOW_CONTROL
10216 : init_request("fpl", flush_pending_lines);
10217 : #endif /* WIDOW_CONTROL */
10218 1418 : init_request("hcode", set_hyphenation_codes);
10219 1418 : init_request("hpfcode", hyphenation_patterns_file_code);
10220 1418 : init_request("ie", if_else_request);
10221 1418 : init_request("if", if_request);
10222 1418 : init_request("ig", ignore);
10223 1418 : init_request("length", length_request);
10224 1418 : init_request("lf", line_file);
10225 1418 : init_request("lsm", leading_spaces_macro);
10226 1418 : init_request("mso", macro_source_request);
10227 1418 : init_request("msoquiet", macro_source_quietly_request);
10228 1418 : init_request("nop", nop_request);
10229 1418 : init_request("nroff", nroff_request);
10230 1418 : init_request("nx", next_file);
10231 1418 : init_request("open", open_request);
10232 1418 : init_request("opena", opena_request);
10233 1418 : init_request("output", output_request);
10234 1418 : init_request("pc", page_character_request);
10235 1418 : init_request("pchar", print_character_request);
10236 1418 : init_request("pcolor", print_color_request);
10237 1418 : init_request("pcomposite", print_composite_character_request);
10238 1418 : init_request("pi", pipe_output);
10239 1418 : init_request("pm", print_macro_request);
10240 1418 : init_request("psbb", ps_bbox_request);
10241 1418 : init_request("pso", pipe_source_request);
10242 1418 : init_request("pstream", print_stream_request);
10243 1418 : init_request("rchar", remove_character);
10244 1418 : init_request("rd", read_request);
10245 1418 : init_request("return", return_macro_request);
10246 1418 : init_request("rm", remove_macro);
10247 1418 : init_request("rn", rename_macro);
10248 1418 : init_request("schar", define_special_character_request);
10249 1418 : init_request("shift", shift);
10250 1418 : init_request("so", source_request);
10251 1418 : init_request("soquiet", source_quietly_request);
10252 1418 : init_request("spreadwarn", spreadwarn_request);
10253 1418 : init_request("stringdown", stringdown_request);
10254 1418 : init_request("stringup", stringup_request);
10255 1418 : init_request("substring", substring_request);
10256 1418 : init_request("sy", system_request);
10257 1418 : init_request("tag", tag);
10258 1418 : init_request("taga", taga);
10259 1418 : init_request("tm", terminal_message_request);
10260 1418 : init_request("tm1", terminal_message1_request);
10261 1418 : init_request("tmc", terminal_message_continuation_request);
10262 1418 : init_request("tr", translate);
10263 1418 : init_request("trf", transparent_throughput_file_request);
10264 1418 : init_request("trin", translate_input);
10265 1418 : init_request("trnt", translate_no_transparent);
10266 1418 : init_request("troff", troff_request);
10267 1418 : init_request("unformat", unformat_macro);
10268 : #ifdef COLUMN
10269 : init_request("vj", vjustify);
10270 : #endif /* COLUMN */
10271 1418 : init_request("warn", set_warning_mask_request);
10272 1418 : init_request("warnscale", warnscale_request);
10273 1418 : init_request("while", while_request);
10274 1418 : init_request("write", stream_write_request);
10275 1418 : init_request("writec", stream_write_continuation_request);
10276 1418 : init_request("writem", stream_write_macro_request);
10277 1418 : register_dictionary.define(".$", new nargs_reg);
10278 1418 : register_dictionary.define(".br", new break_flag_reg);
10279 1418 : register_dictionary.define(".C", new readonly_boolean_register(&want_att_compat));
10280 1418 : register_dictionary.define(".cp", new enclosing_want_att_compat_reg);
10281 1418 : register_dictionary.define(".O", new variable_reg(&suppression_level));
10282 1418 : register_dictionary.define(".c", new lineno_reg);
10283 1418 : register_dictionary.define(".color", new readonly_boolean_register(&want_color_output));
10284 1418 : register_dictionary.define(".F", new filename_reg);
10285 1418 : register_dictionary.define(".g", new readonly_text_register(1));
10286 1418 : register_dictionary.define(".H", new readonly_register(&hresolution));
10287 1418 : register_dictionary.define(".R", new readonly_text_register(INT_MAX));
10288 1418 : register_dictionary.define(".U", new readonly_boolean_register(&want_unsafe_requests));
10289 1418 : register_dictionary.define(".V", new readonly_register(&vresolution));
10290 1418 : register_dictionary.define(".warn", new readonly_mask_register(&warning_mask));
10291 : extern const char *major_version;
10292 1418 : register_dictionary.define(".x", new readonly_text_register(major_version));
10293 : extern const char *revision;
10294 1418 : register_dictionary.define(".Y", new readonly_text_register(revision));
10295 : extern const char *minor_version;
10296 1418 : register_dictionary.define(".y", new readonly_text_register(minor_version));
10297 1418 : register_dictionary.define("c.", new writable_lineno_reg);
10298 1418 : register_dictionary.define("llx", new variable_reg(&llx_reg_contents));
10299 1418 : register_dictionary.define("lly", new variable_reg(&lly_reg_contents));
10300 1418 : register_dictionary.define("lsn", new variable_reg(&leading_spaces_number));
10301 1418 : register_dictionary.define("lss", new variable_reg(&leading_spaces_space));
10302 1418 : register_dictionary.define("opmaxx",
10303 1418 : new variable_reg(&output_reg_maxx_contents));
10304 1418 : register_dictionary.define("opmaxy",
10305 1418 : new variable_reg(&output_reg_maxy_contents));
10306 1418 : register_dictionary.define("opminx",
10307 1418 : new variable_reg(&output_reg_minx_contents));
10308 1418 : register_dictionary.define("opminy",
10309 1418 : new variable_reg(&output_reg_miny_contents));
10310 1418 : register_dictionary.define("slimit",
10311 1418 : new variable_reg(&input_stack::limit));
10312 1418 : register_dictionary.define("systat", new variable_reg(&system_status));
10313 1418 : register_dictionary.define("urx", new variable_reg(&urx_reg_contents));
10314 1418 : register_dictionary.define("ury", new variable_reg(&ury_reg_contents));
10315 1418 : }
10316 :
10317 : object_dictionary request_dictionary(501);
10318 :
10319 275092 : void init_request(const char *s, REQUEST_FUNCP f)
10320 : {
10321 275092 : request_dictionary.define(s, new request(f));
10322 275092 : }
10323 :
10324 4800417 : static request_or_macro *lookup_request(symbol nm)
10325 : {
10326 4800417 : assert(!nm.is_null());
10327 : request_or_macro *p
10328 4800417 : = static_cast<request_or_macro *>(request_dictionary.lookup(nm));
10329 4800417 : if (0 /* nullptr */ == p) {
10330 7060 : warning(WARN_MAC, "name '%1' not defined", nm.contents());
10331 7060 : p = new macro;
10332 7060 : request_dictionary.define(nm, p);
10333 : }
10334 4800417 : return p;
10335 : }
10336 :
10337 : // XXX: move to node.cpp, its only call site?
10338 50900 : node *charinfo_to_node_list(charinfo *ci, const environment *envp)
10339 : {
10340 : // Don't interpret character definitions in AT&T compatibility mode.
10341 50900 : int old_want_att_compat = want_att_compat;
10342 50900 : want_att_compat = false;
10343 50900 : unsigned char previous_escape_char = escape_char;
10344 50900 : escape_char = '\\';
10345 50900 : macro *mac = ci->set_macro(0 /* nullptr */);
10346 50900 : assert(mac != 0 /* nullptr */);
10347 50900 : environment *oldenv = curenv;
10348 101800 : environment env(envp);
10349 50900 : curenv = &env;
10350 50900 : curenv->set_composite();
10351 50900 : token old_tok = tok;
10352 50900 : input_stack::add_boundary();
10353 : string_iterator *si =
10354 50900 : new string_iterator(*mac, "special character", ci->nm);
10355 50900 : input_stack::push(si);
10356 : // Don't use process_input_stack, because we don't want to recognize
10357 : // requests.
10358 : for (;;) {
10359 105041 : tok.next();
10360 105041 : if (tok.is_eof())
10361 50900 : break;
10362 54141 : if (tok.is_newline()) {
10363 0 : error("a newline is not allowed in a composite character"
10364 : " escape sequence argument");
10365 0 : while (!tok.is_eof())
10366 0 : tok.next();
10367 0 : break;
10368 : }
10369 : else
10370 54141 : tok.process();
10371 : }
10372 50900 : node *n = curenv->extract_output_line();
10373 50900 : input_stack::remove_boundary();
10374 50900 : ci->set_macro(mac);
10375 50900 : tok = old_tok;
10376 50900 : curenv = oldenv;
10377 50900 : want_att_compat = old_want_att_compat;
10378 50900 : escape_char = previous_escape_char;
10379 50900 : have_formattable_input = false;
10380 101800 : return n;
10381 : }
10382 :
10383 230491 : static node *read_drawing_command() // \D
10384 : {
10385 460982 : token start_token;
10386 230491 : start_token.next();
10387 230491 : if (!want_att_compat && !start_token.is_usable_as_delimiter())
10388 0 : warning(WARN_DELIM, "using %1 as an escape sequence delimiter"
10389 0 : " is deprecated", tok.description());
10390 230491 : else if (want_att_compat
10391 230491 : && !start_token.is_usable_as_delimiter(false,
10392 : DELIMITER_ATT_STRING_EXPRESSION)) {
10393 0 : warning(WARN_DELIM, "drawing command escape sequence"
10394 : " does not accept %1 as a delimiter",
10395 0 : start_token.description());
10396 0 : return 0 /* nullptr */;
10397 : }
10398 : // TODO: groff 1.24.0 release + 2 years?
10399 : #if 0
10400 : if (!start_token.is_usable_as_delimiter(true /* report error */))
10401 : return 0 /* nullptr */;
10402 : #endif
10403 : else {
10404 230491 : tok.next();
10405 230491 : if (tok == start_token)
10406 0 : warning(WARN_MISSING, "missing arguments to drawing escape"
10407 : " sequence");
10408 : else {
10409 230491 : int type = tok.ch(); // safely compares to char literals
10410 : // TODO: grochar
10411 230491 : if (type == 'F') {
10412 189 : read_drawing_command_color_arguments(start_token);
10413 189 : return 0 /* nullptr */;
10414 : }
10415 230302 : tok.next();
10416 230302 : int maxpoints = 10;
10417 2533322 : hvpair *point = new hvpair[maxpoints];
10418 230302 : int npoints = 0;
10419 230302 : bool no_last_v = false;
10420 230302 : bool had_error = false;
10421 : int i;
10422 404649 : for (i = 0; tok != start_token; i++) {
10423 296674 : if (i == maxpoints) {
10424 0 : hvpair *oldpoint = point;
10425 0 : point = new hvpair[maxpoints * 2];
10426 0 : for (int j = 0; j < maxpoints; j++)
10427 0 : point[j] = oldpoint[j];
10428 0 : maxpoints *= 2;
10429 0 : delete[] oldpoint;
10430 : }
10431 296674 : if (tok.is_newline() || tok.is_eof()) {
10432 : // token::description() writes to static, class-wide storage,
10433 : // so we must allocate a copy of it before issuing the next
10434 : // diagnostic.
10435 0 : char *delimdesc = strdup(start_token.description());
10436 0 : warning(WARN_DELIM, "missing closing delimiter in drawing"
10437 : " escape sequence; expected %1, got %2", delimdesc,
10438 0 : tok.description());
10439 0 : free(delimdesc);
10440 0 : had_error = true;
10441 0 : break;
10442 : }
10443 593348 : if (!read_hunits(&point[i].h,
10444 296674 : type == 'f' || type == 't' ? 'u' : 'm')) {
10445 0 : had_error = true;
10446 0 : break;
10447 : }
10448 296674 : ++npoints;
10449 296674 : tok.skip_spaces();
10450 296674 : point[i].v = V0;
10451 296674 : if (tok == start_token) {
10452 122327 : no_last_v = true;
10453 122327 : break;
10454 : }
10455 174347 : if (!read_vunits(&point[i].v, 'v')) {
10456 0 : had_error = false;
10457 0 : break;
10458 : }
10459 174347 : tok.skip_spaces();
10460 : }
10461 230302 : while (tok != start_token && !tok.is_newline() && !tok.is_eof())
10462 0 : tok.next();
10463 230302 : if (!had_error) {
10464 230302 : switch (type) {
10465 74421 : case 'l':
10466 74421 : if (npoints != 1 || no_last_v) {
10467 0 : error("two arguments needed for line");
10468 0 : npoints = 1;
10469 : }
10470 74421 : break;
10471 254 : case 'c':
10472 254 : if (npoints != 1 || !no_last_v) {
10473 0 : error("one argument needed for circle");
10474 0 : npoints = 1;
10475 0 : point[0].v = V0;
10476 : }
10477 254 : break;
10478 42 : case 'e':
10479 42 : if (npoints != 1 || no_last_v) {
10480 0 : error("two arguments needed for ellipse");
10481 0 : npoints = 1;
10482 : }
10483 42 : break;
10484 211 : case 'a':
10485 211 : if (npoints != 2 || no_last_v) {
10486 0 : error("four arguments needed for arc");
10487 0 : npoints = 2;
10488 : }
10489 211 : break;
10490 16 : case '~':
10491 16 : if (no_last_v)
10492 0 : error("even number of arguments needed for spline");
10493 16 : break;
10494 0 : case 'f':
10495 0 : if (npoints != 1 || !no_last_v) {
10496 0 : error("one argument needed for gray shade");
10497 0 : npoints = 1;
10498 0 : point[0].v = V0;
10499 : }
10500 : default:
10501 : // silently pass it through
10502 155358 : break;
10503 : }
10504 : draw_node *dn = new draw_node(type, point, npoints,
10505 230302 : curenv->get_font_size(),
10506 230302 : curenv->get_stroke_color(),
10507 230302 : curenv->get_fill_color());
10508 230302 : delete[] point;
10509 230302 : return dn;
10510 : }
10511 : else {
10512 0 : delete[] point;
10513 : }
10514 : }
10515 : }
10516 0 : return 0 /* nullptr */;
10517 : }
10518 :
10519 189 : static void read_drawing_command_color_arguments(token &start)
10520 : {
10521 189 : tok.next();
10522 189 : if (tok == start) {
10523 0 : error("missing color scheme");
10524 0 : return;
10525 : }
10526 : // safely compares to char literals; TODO: grochar
10527 189 : int scheme = tok.ch();
10528 189 : tok.next();
10529 189 : color *col = 0 /* nullptr */;
10530 : // TODO: grochar
10531 189 : unsigned char end = start.ch();
10532 189 : switch (scheme) {
10533 0 : case 'c':
10534 0 : col = read_cmy(end);
10535 0 : break;
10536 0 : case 'd':
10537 0 : col = &default_color;
10538 0 : break;
10539 189 : case 'g':
10540 189 : col = read_gray(end);
10541 189 : break;
10542 0 : case 'k':
10543 0 : col = read_cmyk(end);
10544 0 : break;
10545 0 : case 'r':
10546 0 : col = read_rgb(end);
10547 0 : break;
10548 : }
10549 189 : if (col != 0 /* nullptr */)
10550 189 : curenv->set_fill_color(col);
10551 189 : while (tok != start) {
10552 0 : if (!has_arg()) {
10553 : // token::description() writes to static, class-wide storage, so
10554 : // we must allocate a copy of it before issuing the next
10555 : // diagnostic.
10556 0 : char *delimdesc = strdup(start.description());
10557 0 : warning(WARN_DELIM, "missing closing delimiter in color space"
10558 : " drawing escape sequence; expected %1, got %2",
10559 0 : delimdesc, tok.description());
10560 0 : free(delimdesc);
10561 0 : input_stack::push(make_temp_iterator("\n"));
10562 0 : break;
10563 : }
10564 0 : tok.next();
10565 : }
10566 189 : have_formattable_input = true;
10567 : }
10568 :
10569 : static struct warning_category {
10570 : const char *name;
10571 : unsigned int mask;
10572 : } warning_table[] = {
10573 : { "char", WARN_CHAR },
10574 : { "range", WARN_RANGE },
10575 : { "break", WARN_BREAK },
10576 : { "delim", WARN_DELIM },
10577 : { "scale", WARN_SCALE },
10578 : { "syntax", WARN_SYNTAX },
10579 : { "tab", WARN_TAB },
10580 : { "missing", WARN_MISSING },
10581 : { "input", WARN_INPUT },
10582 : { "escape", WARN_ESCAPE },
10583 : { "space", WARN_SPACE },
10584 : { "font", WARN_FONT },
10585 : { "di", WARN_DI },
10586 : { "mac", WARN_MAC },
10587 : { "reg", WARN_REG },
10588 : { "ig", WARN_IG },
10589 : { "color", WARN_COLOR },
10590 : { "file", WARN_FILE },
10591 : { "all", WARN_MAX & ~(WARN_DI | WARN_MAC | WARN_REG) },
10592 : { "w", WARN_MAX },
10593 : { "default", DEFAULT_WARNING_MASK },
10594 : };
10595 :
10596 778 : static unsigned int lookup_warning(const char *name)
10597 : {
10598 5760 : for (unsigned int i = 0U; i < countof(warning_table); i++)
10599 5759 : if (strcmp(name, warning_table[i].name) == 0)
10600 777 : return warning_table[i].mask;
10601 1 : return 0U;
10602 : }
10603 :
10604 749 : static void enable_warning(const char *name)
10605 : {
10606 749 : unsigned int mask = lookup_warning(name);
10607 749 : if (mask != 0U)
10608 749 : warning_mask |= mask;
10609 : else
10610 0 : error("unrecognized warning category '%1'", name);
10611 749 : }
10612 :
10613 29 : static void disable_warning(const char *name)
10614 : {
10615 29 : unsigned int mask = lookup_warning(name);
10616 29 : if (mask != 0U)
10617 28 : warning_mask &= ~mask;
10618 : else
10619 1 : error("unrecognized warning category '%1'", name);
10620 29 : }
10621 :
10622 0 : static void copy_mode_error(const char *format,
10623 : const errarg &arg1,
10624 : const errarg &arg2,
10625 : const errarg &arg3)
10626 : {
10627 0 : if (want_input_ignored) {
10628 : static const char prefix[] = "(in ignored input) ";
10629 : // ISO C++ does not permit VLAs on the stack.
10630 : // C++03: new char[sizeof prefix + strlen(format)]();
10631 0 : char *s = new char[sizeof prefix + strlen(format)];
10632 0 : (void) memset(s, 0, (sizeof prefix + (strlen(format)
10633 : * sizeof(char))));
10634 0 : strcpy(s, prefix);
10635 0 : strcat(s, format);
10636 0 : warning(WARN_IG, s, arg1, arg2, arg3);
10637 0 : delete[] s;
10638 : }
10639 : else
10640 0 : error(format, arg1, arg2, arg3);
10641 0 : }
10642 :
10643 : enum error_type { DEBUG, WARNING, OUTPUT_WARNING, ERROR, FATAL };
10644 :
10645 441 : static void do_error(error_type type,
10646 : const char *format,
10647 : const errarg &arg1,
10648 : const errarg &arg2,
10649 : const errarg &arg3)
10650 : {
10651 : const char *filename;
10652 : int lineno;
10653 441 : if (want_errors_inhibited && (type < FATAL))
10654 233 : return;
10655 208 : if (want_backtraces)
10656 34 : input_stack::backtrace();
10657 208 : if (!get_file_line(&filename, &lineno))
10658 4 : filename = 0 /* nullptr */;
10659 208 : if (filename != 0 /* nullptr */) {
10660 204 : if (program_name != 0 /* nullptr */)
10661 204 : errprint("%1:", program_name);
10662 204 : errprint("%1:%2: ", filename, lineno);
10663 : }
10664 4 : else if (program_name != 0 /* nullptr */)
10665 4 : fprintf(stderr, "%s: ", program_name);
10666 208 : switch (type) {
10667 2 : case FATAL:
10668 2 : fputs("fatal error: ", stderr);
10669 2 : break;
10670 79 : case ERROR:
10671 79 : fputs("error: ", stderr);
10672 79 : break;
10673 94 : case WARNING:
10674 94 : fputs("warning: ", stderr);
10675 94 : break;
10676 0 : case DEBUG:
10677 0 : fputs("debug: ", stderr);
10678 0 : break;
10679 33 : case OUTPUT_WARNING:
10680 33 : if (in_nroff_mode) {
10681 23 : int fromtop = (topdiv->get_vertical_position().to_units()
10682 23 : / vresolution) + 1;
10683 23 : fprintf(stderr, "warning [page %d, line %d",
10684 : topdiv->get_page_number(), fromtop);
10685 23 : if (topdiv != curdiv) {
10686 0 : int fromdivtop = (curdiv->get_vertical_position().to_units()
10687 0 : / vresolution) + 1;
10688 0 : fprintf(stderr, ", diversion '%s', line %d",
10689 : curdiv->get_diversion_name(), fromdivtop);
10690 : }
10691 23 : fprintf(stderr, "]: ");
10692 : }
10693 : else {
10694 10 : double fromtop = topdiv->get_vertical_position().to_units()
10695 10 : / warn_scale;
10696 10 : fprintf(stderr, "warning [page %d, %.1f%c",
10697 : topdiv->get_page_number(), fromtop, warn_scaling_unit);
10698 10 : if (topdiv != curdiv) {
10699 0 : double fromtop1 = curdiv->get_vertical_position().to_units()
10700 0 : / warn_scale;
10701 0 : fprintf(stderr, " (diversion '%s', %.1f%c)",
10702 : curdiv->get_diversion_name(), fromtop1,
10703 : warn_scaling_unit);
10704 : }
10705 10 : fprintf(stderr, "]: ");
10706 : }
10707 33 : break;
10708 : }
10709 208 : errprint(format, arg1, arg2, arg3);
10710 208 : fputc('\n', stderr);
10711 208 : fflush(stderr);
10712 208 : if (type == FATAL)
10713 2 : write_any_trailer_and_exit(EXIT_FAILURE);
10714 : }
10715 :
10716 : // This function should have no callers in production builds.
10717 0 : void debug(const char *format,
10718 : const errarg &arg1,
10719 : const errarg &arg2,
10720 : const errarg &arg3)
10721 : {
10722 0 : do_error(DEBUG, format, arg1, arg2, arg3);
10723 0 : }
10724 :
10725 27765 : int warning(warning_type t,
10726 : const char *format,
10727 : const errarg &arg1,
10728 : const errarg &arg2,
10729 : const errarg &arg3)
10730 : {
10731 27765 : if ((t & warning_mask) != 0U) {
10732 94 : do_error(WARNING, format, arg1, arg2, arg3);
10733 94 : return 1;
10734 : }
10735 : else
10736 27671 : return 0;
10737 : }
10738 :
10739 40 : int output_warning(warning_type t,
10740 : const char *format,
10741 : const errarg &arg1,
10742 : const errarg &arg2,
10743 : const errarg &arg3)
10744 : {
10745 40 : if ((t & warning_mask) != 0U) {
10746 33 : do_error(OUTPUT_WARNING, format, arg1, arg2, arg3);
10747 33 : return 1;
10748 : }
10749 : else
10750 7 : return 0;
10751 : }
10752 :
10753 312 : void error(const char *format,
10754 : const errarg &arg1,
10755 : const errarg &arg2,
10756 : const errarg &arg3)
10757 : {
10758 312 : do_error(ERROR, format, arg1, arg2, arg3);
10759 312 : }
10760 :
10761 2 : void fatal(const char *format,
10762 : const errarg &arg1,
10763 : const errarg &arg2,
10764 : const errarg &arg3)
10765 : {
10766 2 : do_error(FATAL, format, arg1, arg2, arg3);
10767 0 : }
10768 :
10769 0 : void fatal_with_file_and_line(const char *filename, int lineno,
10770 : const char *format,
10771 : const errarg &arg1,
10772 : const errarg &arg2,
10773 : const errarg &arg3)
10774 : {
10775 0 : if (program_name != 0 /* nullptr */)
10776 0 : fprintf(stderr, "%s:", program_name);
10777 0 : fprintf(stderr, "%s:", filename);
10778 0 : if (lineno > 0)
10779 0 : fprintf(stderr, "%d:", lineno);
10780 0 : fputs(" fatal error: ", stderr);
10781 0 : errprint(format, arg1, arg2, arg3);
10782 0 : fputc('\n', stderr);
10783 0 : fflush(stderr);
10784 0 : write_any_trailer_and_exit(EXIT_FAILURE);
10785 0 : }
10786 :
10787 0 : void error_with_file_and_line(const char *filename, int lineno,
10788 : const char *format,
10789 : const errarg &arg1,
10790 : const errarg &arg2,
10791 : const errarg &arg3)
10792 : {
10793 0 : if (program_name != 0 /* nullptr */)
10794 0 : fprintf(stderr, "%s:", program_name);
10795 0 : fprintf(stderr, "%s:", filename);
10796 0 : if (lineno > 0)
10797 0 : fprintf(stderr, "%d:", lineno);
10798 0 : fputs(" error: ", stderr);
10799 0 : errprint(format, arg1, arg2, arg3);
10800 0 : fputc('\n', stderr);
10801 0 : fflush(stderr);
10802 0 : }
10803 :
10804 : // This function should have no callers in production builds.
10805 0 : void debug_with_file_and_line(const char *filename,
10806 : int lineno,
10807 : const char *format,
10808 : const errarg &arg1,
10809 : const errarg &arg2,
10810 : const errarg &arg3)
10811 : {
10812 0 : if (program_name != 0 /* nullptr */)
10813 0 : fprintf(stderr, "%s:", program_name);
10814 0 : fprintf(stderr, "%s:", filename);
10815 0 : if (lineno > 0)
10816 0 : fprintf(stderr, "%d:", lineno);
10817 0 : fputs(" debug: ", stderr);
10818 0 : errprint(format, arg1, arg2, arg3);
10819 0 : fputc('\n', stderr);
10820 0 : fflush(stderr);
10821 0 : }
10822 :
10823 : dictionary charinfo_dictionary(501);
10824 :
10825 7840300 : charinfo *lookup_charinfo(symbol nm, bool suppress_creation)
10826 : {
10827 7840300 : void *p = charinfo_dictionary.lookup(nm);
10828 7840300 : if (p != 0 /* nullptr */)
10829 6812382 : return static_cast<charinfo *>(p);
10830 1027918 : if (suppress_creation)
10831 1 : return static_cast<charinfo *>(0 /* nullptr */);
10832 : else {
10833 1027917 : charinfo *cp = new charinfo(nm);
10834 1027917 : (void) charinfo_dictionary.lookup(nm, cp);
10835 1027917 : return cp;
10836 : }
10837 : }
10838 :
10839 : int charinfo::next_index = 0;
10840 :
10841 1364986 : charinfo::charinfo(symbol s)
10842 : : translation(0 /* nullptr */), mac(0 /* nullptr */),
10843 : special_translation(TRANSLATE_NONE), hyphenation_code(0U),
10844 : flags(0U), ascii_code(0U), asciify_code(0U),
10845 : is_not_found(false), is_transparently_translatable(true),
10846 1364986 : translatable_as_input(false), mode(CHAR_NORMAL), nm(s)
10847 : {
10848 1364986 : index = next_index++;
10849 1364986 : number = -1;
10850 1364986 : get_flags();
10851 1364986 : }
10852 :
10853 28204 : int charinfo::get_unicode_mapping()
10854 : {
10855 28204 : if (ascii_code != 0U)
10856 5697 : return ascii_code;
10857 22507 : return glyph_to_unicode(this);
10858 : }
10859 :
10860 343785 : void charinfo::set_hyphenation_code(unsigned char c)
10861 : {
10862 343785 : hyphenation_code = c;
10863 343785 : }
10864 :
10865 175825 : void charinfo::set_translation(charinfo *ci, bool transparently,
10866 : bool as_input)
10867 : {
10868 175825 : translation = ci;
10869 175825 : if ((ci != 0 /* nullptr */) && as_input) {
10870 156362 : if (hyphenation_code != 0U)
10871 13948 : ci->set_hyphenation_code(hyphenation_code);
10872 156362 : if (asciify_code != 0U)
10873 0 : ci->set_asciify_code(asciify_code);
10874 156362 : else if (ascii_code != 0U)
10875 156362 : ci->set_asciify_code(ascii_code);
10876 156362 : ci->make_translatable_as_input();
10877 : }
10878 175825 : special_translation = TRANSLATE_NONE;
10879 175825 : is_transparently_translatable = transparently;
10880 175825 : }
10881 :
10882 : // Recompute flags for all entries in the charinfo dictionary.
10883 8 : void get_flags()
10884 : {
10885 8 : dictionary_iterator iter(charinfo_dictionary);
10886 : charinfo *ci;
10887 8 : symbol s;
10888 : // We must use the nuclear `reinterpret_cast` operator because GNU
10889 : // troff's dictionary types use a pre-STL approach to containers.
10890 9399 : while (iter.get(&s, reinterpret_cast<void **>(&ci))) {
10891 9391 : assert(!s.is_null());
10892 9391 : ci->get_flags();
10893 : }
10894 8 : using_character_classes = false;
10895 8 : }
10896 :
10897 : // Get the union of all flags affecting this charinfo.
10898 1374381 : void charinfo::get_flags()
10899 : {
10900 1374381 : dictionary_iterator iter(char_class_dictionary);
10901 : charinfo *ci;
10902 1374381 : symbol s;
10903 : // We must use the nuclear `reinterpret_cast` operator because GNU
10904 : // troff's dictionary types use a pre-STL approach to containers.
10905 1401890 : while (iter.get(&s, reinterpret_cast<void **>(&ci))) {
10906 27509 : assert(!s.is_null());
10907 27509 : if (ci->contains(get_unicode_mapping())) {
10908 : #if defined(DEBUGGING)
10909 : if (want_html_debugging)
10910 : fprintf(stderr, "charinfo::get_flags %p %s %d\n",
10911 : static_cast<void *>(ci), ci->nm.contents(),
10912 : ci->flags);
10913 : #endif
10914 621 : flags |= ci->flags;
10915 : }
10916 : }
10917 1374381 : }
10918 :
10919 1688 : void charinfo::set_special_translation(int cc, bool transparently)
10920 : {
10921 1688 : special_translation = cc;
10922 1688 : translation = 0 /* nullptr */;
10923 1688 : is_transparently_translatable = transparently;
10924 1688 : }
10925 :
10926 363008 : void charinfo::set_ascii_code(unsigned char c)
10927 : {
10928 363008 : ascii_code = c;
10929 363008 : }
10930 :
10931 156362 : void charinfo::set_asciify_code(unsigned char c)
10932 : {
10933 156362 : asciify_code = c;
10934 156362 : }
10935 :
10936 : // Replace character definition with macro `m`, returning previous
10937 : // macro if any (if none, return a null pointer).
10938 110611 : macro *charinfo::set_macro(macro *m)
10939 : {
10940 110611 : macro *tem = mac;
10941 110611 : mac = m;
10942 110611 : return tem;
10943 : }
10944 :
10945 : // Replace character definition with macro `m` and update its character
10946 : // mode to `cm`, returning previous macro if any (if none, return a null
10947 : // pointer).
10948 325449 : macro *charinfo::set_macro(macro *m, char_mode cm)
10949 : {
10950 325449 : macro *tem = mac;
10951 325449 : mac = m;
10952 325449 : mode = cm;
10953 325449 : return tem;
10954 : }
10955 :
10956 337069 : void charinfo::set_number(int n)
10957 : {
10958 337069 : assert(n >= 0);
10959 337069 : number = n;
10960 337069 : }
10961 :
10962 21144 : int charinfo::get_number()
10963 : {
10964 21144 : assert(number >= 0);
10965 21144 : return number;
10966 : }
10967 :
10968 27509 : bool charinfo::contains(int c, bool already_called)
10969 : {
10970 27509 : if (already_called) {
10971 0 : warning(WARN_SYNTAX, "nested class detected while processing"
10972 0 : " character code %1", c);
10973 0 : return false;
10974 : }
10975 27509 : std::vector<std::pair<int, int> >::const_iterator ranges_iter;
10976 27509 : ranges_iter = ranges.begin();
10977 394494 : while (ranges_iter != ranges.end()) {
10978 367606 : if (c >= ranges_iter->first && c <= ranges_iter->second) {
10979 : #if defined(DEBUGGING)
10980 : if (want_html_debugging)
10981 : fprintf(stderr, "charinfo::contains(%d)\n", c);
10982 : #endif
10983 621 : return true;
10984 : }
10985 366985 : ++ranges_iter;
10986 : }
10987 :
10988 : // Nested classes don't work. See Savannah #67770.
10989 : #if 0
10990 : std::vector<charinfo *>::const_iterator nested_iter;
10991 : nested_iter = nested_classes.begin();
10992 : while (nested_iter != nested_classes.end()) {
10993 : if ((*nested_iter)->contains(c, true))
10994 : return true;
10995 : ++nested_iter;
10996 : }
10997 : #endif
10998 :
10999 26888 : return false;
11000 : }
11001 :
11002 0 : bool charinfo::contains(symbol s, bool already_called)
11003 : {
11004 0 : if (already_called) {
11005 0 : warning(WARN_SYNTAX, "nested class detected while processing symbol"
11006 0 : " %1", s.contents());
11007 0 : return false;
11008 : }
11009 0 : const char *unicode = glyph_name_to_unicode(s.contents());
11010 0 : if (unicode != 0 /* nullptr */ && strchr(unicode, '_') == 0) {
11011 : char *ignore;
11012 0 : int c = (int) strtol(unicode, &ignore, 16);
11013 0 : return contains(c, true);
11014 : }
11015 : else
11016 0 : return false;
11017 : }
11018 :
11019 0 : bool charinfo::contains(charinfo *, bool)
11020 : {
11021 : // Werner Lemberg marked this as "TODO" in 2010.
11022 0 : assert(0 == "unimplemented member function");
11023 : return false;
11024 : }
11025 :
11026 5 : void charinfo::describe_flags()
11027 : {
11028 5 : if (0U == flags)
11029 2 : errprint("(none)\n");
11030 : else {
11031 3 : char none[] = { '\0' };
11032 3 : char comma[] = { ',', ' ', '\0' };
11033 3 : char *separator = none;
11034 3 : errprint("(");
11035 3 : if (flags & ENDS_SENTENCE) {
11036 0 : errprint("%1ends sentence", separator);
11037 0 : separator = comma;
11038 : }
11039 3 : if (flags & ALLOWS_BREAK_BEFORE) {
11040 0 : errprint("%1allows break before", separator);
11041 0 : separator = comma;
11042 : }
11043 3 : if (flags & ALLOWS_BREAK_AFTER) {
11044 2 : errprint("%1allows break after", separator);
11045 2 : separator = comma;
11046 : }
11047 3 : if (flags & OVERLAPS_HORIZONTALLY) {
11048 0 : errprint("%1overlaps horizontally", separator);
11049 0 : separator = comma;
11050 : }
11051 3 : if (flags & OVERLAPS_VERTICALLY) {
11052 0 : errprint("%1overlaps vertically", separator);
11053 0 : separator = comma;
11054 : }
11055 3 : if (flags & IS_TRANSPARENT_TO_END_OF_SENTENCE) {
11056 0 : errprint("%1is transparent to end of sentence", separator);
11057 0 : separator = comma;
11058 : }
11059 3 : if (flags & IGNORES_SURROUNDING_HYPHENATION_CODES) {
11060 0 : errprint("%1ignores surrounding hyphenation codes", separator);
11061 0 : separator = comma;
11062 : }
11063 3 : if (flags & PROHIBITS_BREAK_BEFORE) {
11064 1 : errprint("%1prohibits break before", separator);
11065 1 : separator = comma;
11066 : }
11067 3 : if (flags & PROHIBITS_BREAK_AFTER) {
11068 0 : errprint("%1prohibits break after", separator);
11069 0 : separator = comma;
11070 : }
11071 3 : if (flags & IS_INTERWORD_SPACE) {
11072 0 : errprint("%1is interword space", separator);
11073 0 : separator = comma;
11074 : }
11075 3 : errprint(")\n");
11076 : }
11077 5 : }
11078 :
11079 5 : void charinfo::dump_flags()
11080 : {
11081 5 : errprint(" %1flags: %2 ", (is_class() ? "" : "inherent "), flags);
11082 5 : describe_flags();
11083 5 : if (!is_class()) {
11084 : // Report influence of membership in character classes, if any.
11085 4 : unsigned int saved_flags = flags;
11086 4 : get_flags();
11087 4 : if (flags != saved_flags) {
11088 0 : errprint(" effective flags: %1 ", flags);
11089 0 : describe_flags();
11090 0 : flags = saved_flags;
11091 : }
11092 : }
11093 5 : }
11094 :
11095 5 : void charinfo::dump()
11096 : {
11097 5 : if (is_class()) {
11098 1 : std::vector<std::pair<int, int> >::const_iterator ranges_iter;
11099 1 : ranges_iter = ranges.begin();
11100 1 : assert(mac != 0 /* nullptr */);
11101 1 : errprint(" defined at: ");
11102 1 : mac->dump();
11103 1 : fflush(stderr);
11104 1 : errprint(" contains code points: ");
11105 1 : const size_t buflen = sizeof "U+10FFFF";
11106 1 : int range_begin = 0;
11107 1 : int range_end = 0;
11108 : char beg_hexbuf[buflen];
11109 : char end_hexbuf[buflen];
11110 1 : (void) memset(beg_hexbuf, '\0', buflen);
11111 1 : (void) memset(end_hexbuf, '\0', buflen);
11112 1 : bool has_ranges = false;
11113 41 : while (ranges_iter != ranges.end()) {
11114 40 : has_ranges = true;
11115 40 : range_begin = ranges_iter->first;
11116 40 : range_end = ranges_iter->second;
11117 40 : (void) snprintf(beg_hexbuf, buflen, "U+%.4X", range_begin);
11118 40 : (void) snprintf(end_hexbuf, buflen, "U+%.4X", range_end);
11119 : // TODO: comma-separate? JSON list?
11120 40 : if (range_begin == range_end)
11121 40 : errprint("%1 ", beg_hexbuf);
11122 : else
11123 0 : errprint("%1-%2 ", beg_hexbuf, end_hexbuf);
11124 40 : ++ranges_iter;
11125 : }
11126 1 : if (!has_ranges)
11127 0 : errprint("(none)");
11128 1 : errprint("\n");
11129 : #if 0
11130 : // Nested classes don't work. See Savannah #67770.
11131 : errprint(" contains nested classes: ");
11132 : std::vector<charinfo *>::const_iterator nested_iter;
11133 : nested_iter = nested_classes.begin();
11134 : bool has_nested_classes = false;
11135 : while (nested_iter != nested_classes.end()) {
11136 : has_nested_classes = true;
11137 : // TODO: Here's where JSON would really pay off.
11138 : (*nested_iter)->dump();
11139 : }
11140 : if (!has_nested_classes)
11141 : errprint("(none)");
11142 : errprint("\n");
11143 : #endif
11144 1 : dump_flags();
11145 : }
11146 : else {
11147 4 : if (translation != 0 /* nullptr */)
11148 0 : errprint(" is translated\n");
11149 : else
11150 4 : errprint(" is not translated\n");
11151 4 : if (mac != 0 /* nullptr */) {
11152 0 : errprint(" has a macro: ");
11153 0 : mac->json_dump();
11154 0 : errprint("\n");
11155 : }
11156 : else
11157 4 : errprint(" does not have a macro\n");
11158 4 : errprint(" special translation: %1\n",
11159 4 : static_cast<int>(special_translation));
11160 4 : errprint(" hyphenation code: %1\n",
11161 4 : static_cast<int>(hyphenation_code));
11162 4 : dump_flags();
11163 4 : errprint(" asciify code: %1\n", static_cast<int>(asciify_code));
11164 4 : errprint(" ASCII code: %1\n", static_cast<int>(ascii_code));
11165 : // Also see node.cpp::glyph_node::asciify().
11166 4 : int mapping = get_unicode_mapping();
11167 4 : if (mapping >= 0) {
11168 3 : const size_t buflen = 6; // enough for five hex digits + '\0'
11169 : char hexbuf[buflen];
11170 3 : (void) memset(hexbuf, '\0', buflen);
11171 3 : (void) snprintf(hexbuf, buflen, "%.4X", mapping);
11172 3 : errprint(" Unicode mapping: U+%1\n", hexbuf);
11173 : }
11174 : else
11175 1 : errprint(" Unicode mapping: none (%1)\n", mapping);
11176 4 : errprint(" is%1 found\n", is_not_found ? " not" : "");
11177 4 : errprint(" is%1 transparently translatable\n",
11178 4 : is_transparently_translatable ? "" : " not");
11179 4 : errprint(" is%1 translatable as input\n",
11180 4 : translatable_as_input ? "" : " not");
11181 4 : const char *modestr = character_mode_description(mode);
11182 4 : if (strcmp(modestr, "") == 0)
11183 4 : modestr =" normal";
11184 4 : errprint(" mode:%1\n", modestr);
11185 : }
11186 5 : fflush(stderr);
11187 5 : }
11188 :
11189 : symbol UNNAMED_SYMBOL("---");
11190 :
11191 : // For indexed characters not between 0 and 255, we make a symbol out
11192 : // of the number and store them in this dictionary.
11193 :
11194 : dictionary indexed_charinfo_dictionary(11);
11195 :
11196 3064483 : static charinfo *get_charinfo_by_index(int n, bool suppress_creation)
11197 : {
11198 : static charinfo *index_table[256];
11199 :
11200 3064483 : if (n >= 0 && n < 256) {
11201 2278674 : charinfo *ci = index_table[n];
11202 2278674 : if ((0 /*nullptr */ == ci) && !suppress_creation) {
11203 194414 : ci = new charinfo(UNNAMED_SYMBOL);
11204 194414 : ci->set_number(n);
11205 194414 : index_table[n] = ci;
11206 : }
11207 2278674 : return ci;
11208 : }
11209 : else {
11210 785809 : symbol ns(i_to_a(n));
11211 : charinfo *ci =
11212 785809 : static_cast<charinfo *>(indexed_charinfo_dictionary.lookup(ns));
11213 785809 : if ((0 /*nullptr */ == ci) && !suppress_creation) {
11214 142655 : ci = new charinfo(UNNAMED_SYMBOL);
11215 142655 : ci->set_number(n);
11216 142655 : (void) indexed_charinfo_dictionary.lookup(ns, ci);
11217 : }
11218 785809 : return ci;
11219 : }
11220 : }
11221 :
11222 : // This overrides the same function from libgroff; while reading font
11223 : // definition files it puts single-letter glyph names into
11224 : // 'charset_table' and converts glyph names of the form '\x' ('x' a
11225 : // single letter) into 'x'. Consequently, symbol("x") refers to glyph
11226 : // name '\x', not 'x'.
11227 :
11228 11619868 : glyph *name_to_glyph(const char *nm)
11229 : {
11230 : charinfo *ci;
11231 11619868 : if (nm[1] == 0)
11232 4991031 : ci = charset_table[nm[0] & 0xff];
11233 6628837 : else if (nm[0] == '\\' && nm[2] == 0)
11234 10929 : ci = lookup_charinfo(symbol(nm + 1));
11235 : else
11236 6617908 : ci = lookup_charinfo(symbol(nm));
11237 11619868 : return ci->as_glyph();
11238 : }
11239 :
11240 3017909 : glyph *number_to_glyph(int n)
11241 : {
11242 3017909 : return get_charinfo_by_index(n)->as_glyph();
11243 : }
11244 :
11245 26873637 : const char *glyph_to_name(glyph *g)
11246 : {
11247 : // In both libgroff and troff, `charinfo` has `glyph` as a base class.
11248 : // But in troff, `charinfo` stores much more information.
11249 26873637 : charinfo *ci = reinterpret_cast<charinfo *>(g);
11250 26873637 : return ((ci->nm != UNNAMED_SYMBOL) ? ci->nm.contents()
11251 26873637 : : 0 /* nullptr */);
11252 : }
11253 :
11254 : // Local Variables:
11255 : // fill-column: 72
11256 : // mode: C++
11257 : // End:
11258 : // vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
|