Line data Source code
1 : /* Copyright 1989-2025 Free Software Foundation, Inc. 2 : Written by James Clark (jjc@jclark.com) 3 : 4 : This file is part of groff, the GNU roff typesetting system. 5 : 6 : groff is free software; you can redistribute it and/or modify it under 7 : the terms of the GNU General Public License as published by the Free 8 : Software Foundation, either version 3 of the License, or 9 : (at your option) any later version. 10 : 11 : groff is distributed in the hope that it will be useful, but WITHOUT ANY 12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 : for more details. 15 : 16 : You should have received a copy of the GNU General Public License 17 : along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 : 19 : #include <vector> 20 : #include <utility> 21 : 22 : extern bool using_character_classes; // if `.class` is invoked 23 : extern void get_flags(); 24 : 25 : class macro; 26 : 27 : // libgroff has a simpler `charinfo` class that stores much less 28 : // information. 29 : class charinfo : glyph { 30 : static int next_index; 31 : charinfo *translation; 32 : macro *mac; 33 : unsigned char special_translation; 34 : unsigned char hyphenation_code; 35 : unsigned int flags; 36 : unsigned char ascii_code; 37 : unsigned char asciify_code; 38 : bool is_not_found; 39 : bool is_transparently_translatable; 40 : bool translatable_as_input; // asciify_code is active for .asciify 41 : char_mode mode; 42 : // Unicode character classes 43 : std::vector<std::pair<int, int> > ranges; 44 : std::vector<charinfo *> nested_classes; // XXX: see Savannah #67770 45 : public: 46 : // Values for the flags bitmask. See groff manual, description of the 47 : // '.cflags' request. 48 : // 49 : // Keep these symbol names in sync with the subset used in the `enum` 50 : // `break_char_type`; see "node.cpp". 51 : // 52 : // C++11: Use `enum : unsigned int`. 53 : enum { 54 : ENDS_SENTENCE = 0x01, 55 : ALLOWS_BREAK_BEFORE = 0x02, 56 : ALLOWS_BREAK_AFTER = 0x04, 57 : OVERLAPS_HORIZONTALLY = 0x08, 58 : OVERLAPS_VERTICALLY = 0x10, 59 : IS_TRANSPARENT_TO_END_OF_SENTENCE = 0x20, 60 : IGNORES_SURROUNDING_HYPHENATION_CODES = 0x40, 61 : PROHIBITS_BREAK_BEFORE = 0x80, 62 : PROHIBITS_BREAK_AFTER = 0x100, 63 : IS_INTERWORD_SPACE = 0x200, 64 : CFLAGS_MAX = 0x2FF 65 : }; 66 : // 67 : // C++11: Use `enum : unsigned char`. 68 : enum { 69 : TRANSLATE_NONE, 70 : TRANSLATE_SPACE, 71 : TRANSLATE_DUMMY, 72 : TRANSLATE_STRETCHABLE_SPACE, 73 : TRANSLATE_HYPHEN_INDICATOR 74 : }; 75 : symbol nm; 76 : charinfo(symbol); 77 : glyph *as_glyph(); 78 : bool ends_sentence(); 79 : bool overlaps_vertically(); 80 : bool overlaps_horizontally(); 81 : bool allows_break_before(); 82 : bool allows_break_after(); 83 : bool is_transparent_to_end_of_sentence(); 84 : bool ignores_surrounding_hyphenation_codes(); 85 : bool prohibits_break_before(); 86 : bool prohibits_break_after(); 87 : bool is_interword_space(); 88 : unsigned char get_hyphenation_code(); 89 : unsigned char get_ascii_code(); 90 : unsigned char get_asciify_code(); 91 : int get_unicode_mapping(); 92 : void set_hyphenation_code(unsigned char); 93 : void set_ascii_code(unsigned char); 94 : void set_asciify_code(unsigned char); 95 : void make_translatable_as_input(); 96 : bool is_translatable_as_input(); 97 : charinfo *get_translation(bool = false); 98 : void set_translation(charinfo *, bool /* transparently */, 99 : bool /* as_input */); 100 : void get_flags(); 101 : void set_flags(unsigned int); 102 : void set_special_translation(int, bool /* transparently */); 103 : int get_special_translation(bool = false); 104 : macro *set_macro(macro *); 105 : macro *set_macro(macro *, char_mode); 106 : macro *get_macro(); 107 : bool first_time_not_found(); 108 : void set_number(int); 109 : int get_number(); 110 : bool is_numbered(); 111 : bool is_normal(); 112 : bool is_fallback(); 113 : bool is_special(); 114 : symbol *get_symbol(); 115 : void add_to_class(int); 116 : void add_to_class(int, int); 117 : void add_to_class(charinfo *); 118 : bool is_class(); 119 : bool contains(int, bool = false); 120 : bool contains(symbol, bool = false); 121 : bool contains(charinfo *, bool = false); 122 : void describe_flags(); 123 : void dump_flags(); 124 : void dump(); 125 : }; 126 : 127 : extern charinfo *lookup_charinfo(symbol, 128 : bool /* suppress_creation */ = false); 129 : extern charinfo *charset_table[]; 130 : 131 144 : inline bool charinfo::overlaps_horizontally() 132 : { 133 144 : if (using_character_classes) 134 0 : ::get_flags(); 135 144 : return (flags & OVERLAPS_HORIZONTALLY); 136 : } 137 : 138 0 : inline bool charinfo::overlaps_vertically() 139 : { 140 0 : if (using_character_classes) 141 0 : ::get_flags(); 142 0 : return (flags & OVERLAPS_VERTICALLY); 143 : } 144 : 145 12900786 : inline bool charinfo::allows_break_before() 146 : { 147 12900786 : if (using_character_classes) 148 8 : ::get_flags(); 149 12900786 : return (flags & ALLOWS_BREAK_BEFORE); 150 : } 151 : 152 12900786 : inline bool charinfo::allows_break_after() 153 : { 154 12900786 : if (using_character_classes) 155 0 : ::get_flags(); 156 12900786 : return (flags & ALLOWS_BREAK_AFTER); 157 : } 158 : 159 416225 : inline bool charinfo::ends_sentence() 160 : { 161 416225 : if (using_character_classes) 162 0 : ::get_flags(); 163 416225 : return (flags & ENDS_SENTENCE); 164 : } 165 : 166 334643 : inline bool charinfo::is_transparent_to_end_of_sentence() 167 : { 168 334643 : if (using_character_classes) 169 0 : ::get_flags(); 170 334643 : return (flags & IS_TRANSPARENT_TO_END_OF_SENTENCE); 171 : } 172 : 173 12900786 : inline bool charinfo::ignores_surrounding_hyphenation_codes() 174 : { 175 12900786 : if (using_character_classes) 176 0 : ::get_flags(); 177 12900786 : return (flags & IGNORES_SURROUNDING_HYPHENATION_CODES); 178 : } 179 : 180 12900786 : inline bool charinfo::prohibits_break_before() 181 : { 182 12900786 : if (using_character_classes) 183 0 : ::get_flags(); 184 12900786 : return (flags & PROHIBITS_BREAK_BEFORE); 185 : } 186 : 187 12900786 : inline bool charinfo::prohibits_break_after() 188 : { 189 12900786 : if (using_character_classes) 190 0 : ::get_flags(); 191 12900786 : return (flags & PROHIBITS_BREAK_AFTER); 192 : } 193 : 194 12900786 : inline bool charinfo::is_interword_space() 195 : { 196 12900786 : if (using_character_classes) 197 0 : ::get_flags(); 198 12900786 : return (flags & IS_INTERWORD_SPACE); 199 : } 200 : 201 77051 : inline bool charinfo::is_numbered() 202 : { 203 77051 : return (number >= 0); 204 : } 205 : 206 55123 : inline bool charinfo::is_normal() 207 : { 208 55123 : return (mode == CHAR_NORMAL); 209 : } 210 : 211 418 : inline bool charinfo::is_fallback() 212 : { 213 418 : return (mode == CHAR_FALLBACK); 214 : } 215 : 216 214 : inline bool charinfo::is_special() 217 : { 218 214 : return (mode == CHAR_SPECIAL_FALLBACK); 219 : } 220 : 221 13868379 : inline charinfo *charinfo::get_translation(bool for_transparent_throughput) 222 : { 223 588478 : return ((for_transparent_throughput && !is_transparently_translatable) 224 14456857 : ? 0 /* nullptr */ 225 13868379 : : translation); 226 : } 227 : 228 569937 : inline unsigned char charinfo::get_hyphenation_code() 229 : { 230 569937 : return hyphenation_code; 231 : } 232 : 233 13403378 : inline unsigned char charinfo::get_ascii_code() 234 : { 235 13403378 : return ascii_code; 236 : } 237 : 238 114 : inline unsigned char charinfo::get_asciify_code() 239 : { 240 114 : return (translatable_as_input ? asciify_code : 0U); 241 : } 242 : 243 30508 : inline void charinfo::set_flags(unsigned int c) 244 : { 245 30508 : flags = c; 246 30508 : } 247 : 248 72089344 : inline glyph *charinfo::as_glyph() 249 : { 250 72089344 : return this; 251 : } 252 : 253 156362 : inline void charinfo::make_translatable_as_input() 254 : { 255 156362 : translatable_as_input = true; 256 156362 : } 257 : 258 95060 : inline bool charinfo::is_translatable_as_input() 259 : { 260 95060 : return translatable_as_input; 261 : } 262 : 263 13516488 : inline int charinfo::get_special_translation(bool transparently) 264 : { 265 588478 : return (transparently && !is_transparently_translatable 266 13516488 : ? int(TRANSLATE_NONE) 267 13516488 : : special_translation); 268 : } 269 : 270 12960250 : inline macro *charinfo::get_macro() 271 : { 272 12960250 : return mac; 273 : } 274 : 275 103 : inline bool charinfo::first_time_not_found() 276 : { 277 103 : if (is_not_found) 278 75 : return false; 279 : else { 280 28 : is_not_found = true; 281 28 : return true; 282 : } 283 : } 284 : 285 224 : inline symbol *charinfo::get_symbol() 286 : { 287 224 : return &nm; 288 : } 289 : 290 617 : inline void charinfo::add_to_class(int c) 291 : { 292 617 : using_character_classes = true; 293 : // TODO ranges cumbersome for single characters? 294 617 : ranges.push_back(std::pair<int, int>(c, c)); 295 617 : } 296 : 297 35 : inline void charinfo::add_to_class(int lo, 298 : int hi) 299 : { 300 35 : using_character_classes = true; 301 35 : ranges.push_back(std::pair<int, int>(lo, hi)); 302 35 : } 303 : 304 0 : inline void charinfo::add_to_class(charinfo *ci) 305 : { 306 0 : using_character_classes = true; 307 0 : nested_classes.push_back(ci); 308 0 : } 309 : 310 759 : inline bool charinfo::is_class() 311 : { 312 759 : return (!ranges.empty() || !nested_classes.empty()); 313 : } 314 : 315 : // Local Variables: 316 : // fill-column: 72 317 : // mode: C++ 318 : // End: 319 : // vim: set cindent noexpandtab shiftwidth=2 textwidth=72: