Line data Source code
1 : /* Copyright 2002-2024 Free Software Foundation, Inc.
2 : Written by Werner Lemberg <wl@gnu.org>
3 :
4 : This file is part of groff, the GNU roff typesetting system.
5 :
6 : groff is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation, either version 3 of the License, or
9 : (at your option) any later version.
10 :
11 : groff is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 :
19 : #ifdef HAVE_CONFIG_H
20 : #include <config.h>
21 : #endif
22 :
23 : #include <stdcountof.h>
24 :
25 : #include "lib.h"
26 :
27 : #include "stringclass.h"
28 : #include "ptable.h"
29 : #include "unicode.h"
30 :
31 : struct glyph_to_unicode_map {
32 : char *value;
33 : };
34 :
35 : declare_ptable(glyph_to_unicode_map)
36 48503681 : implement_ptable(glyph_to_unicode_map)
37 :
38 : PTABLE(glyph_to_unicode_map) glyph_to_unicode_table;
39 :
40 : // The entries commented out in the table below aren't easily used in
41 : // glyph names. Getting at the names `[` and `]` would require use of
42 : // `\C`, and getting at `\` would require changing the escape character.
43 : //
44 : // Examples: \C'[' \C']'
45 : // .ec @
46 : // @[\]
47 : //
48 : // TODO: Uncomment them, then?
49 :
50 : struct S {
51 : const char *key;
52 : const char *value;
53 : } glyph_to_unicode_list[] = {
54 : { "!", "0021" },
55 : { "\"", "0022" },
56 : { "dq", "0022" },
57 : { "#", "0023" },
58 : { "sh", "0023" },
59 : { "$", "0024" },
60 : { "Do", "0024" },
61 : { "%", "0025" },
62 : { "&", "0026" },
63 : { "aq", "0027" },
64 : { "(", "0028" },
65 : { ")", "0029" },
66 : { "*", "002A" },
67 : { "+", "002B" },
68 : { "pl", "002B" },
69 : { ",", "002C" },
70 : { ".", "002E" },
71 : { "/", "002F" },
72 : { "sl", "002F" },
73 : { "0", "0030" },
74 : { "1", "0031" },
75 : { "2", "0032" },
76 : { "3", "0033" },
77 : { "4", "0034" },
78 : { "5", "0035" },
79 : { "6", "0036" },
80 : { "7", "0037" },
81 : { "8", "0038" },
82 : { "9", "0039" },
83 : { ":", "003A" },
84 : { ";", "003B" },
85 : { "<", "003C" },
86 : { "=", "003D" },
87 : { "eq", "003D" },
88 : { ">", "003E" },
89 : { "?", "003F" },
90 : { "@", "0040" },
91 : { "at", "0040" },
92 : { "A", "0041" },
93 : { "B", "0042" },
94 : { "C", "0043" },
95 : { "D", "0044" },
96 : { "E", "0045" },
97 : { "F", "0046" },
98 : { "G", "0047" },
99 : { "H", "0048" },
100 : { "I", "0049" },
101 : { "J", "004A" },
102 : { "K", "004B" },
103 : { "L", "004C" },
104 : { "M", "004D" },
105 : { "N", "004E" },
106 : { "O", "004F" },
107 : { "P", "0050" },
108 : { "Q", "0051" },
109 : { "R", "0052" },
110 : { "S", "0053" },
111 : { "T", "0054" },
112 : { "U", "0055" },
113 : { "V", "0056" },
114 : { "W", "0057" },
115 : { "X", "0058" },
116 : { "Y", "0059" },
117 : { "Z", "005A" },
118 : //{ "[", "005B" },
119 : { "lB", "005B" },
120 : //{ "\\", "005C" },
121 : { "rs", "005C" },
122 : //{ "]", "005D" },
123 : { "rB", "005D" },
124 : { "a^", "005E" },
125 : { "^", "005E" },
126 : { "ha", "005E" },
127 : { "_", "005F" },
128 : { "ul", "005F" },
129 : { "ga", "0060" },
130 : { "a", "0061" },
131 : { "b", "0062" },
132 : { "c", "0063" },
133 : { "d", "0064" },
134 : { "e", "0065" },
135 : { "f", "0066" },
136 : { "ff", "0066_0066" },
137 : { "Fi", "0066_0066_0069" },
138 : { "Fl", "0066_0066_006C" },
139 : { "fi", "0066_0069" },
140 : { "fl", "0066_006C" },
141 : { "g", "0067" },
142 : { "h", "0068" },
143 : { "i", "0069" },
144 : { "j", "006A" },
145 : { "k", "006B" },
146 : { "l", "006C" },
147 : { "m", "006D" },
148 : { "n", "006E" },
149 : { "o", "006F" },
150 : { "p", "0070" },
151 : { "q", "0071" },
152 : { "r", "0072" },
153 : { "s", "0073" },
154 : { "t", "0074" },
155 : { "u", "0075" },
156 : { "v", "0076" },
157 : { "w", "0077" },
158 : { "x", "0078" },
159 : { "y", "0079" },
160 : { "z", "007A" },
161 : { "lC", "007B" },
162 : { "{", "007B" },
163 : { "ba", "007C" },
164 : { "or", "007C" },
165 : { "|", "007C" },
166 : { "rC", "007D" },
167 : { "}", "007D" },
168 : { "a~", "007E" },
169 : { "~", "007E" },
170 : { "ti", "007E" },
171 : { "r!", "00A1" },
172 : { "ct", "00A2" },
173 : { "Po", "00A3" },
174 : { "Cs", "00A4" },
175 : { "Ye", "00A5" },
176 : { "bb", "00A6" },
177 : { "sc", "00A7" },
178 : { "ad", "00A8" },
179 : { "co", "00A9" },
180 : { "Of", "00AA" },
181 : { "Fo", "00AB" },
182 : { "no", "00AC" },
183 : { "tno", "00AC" },
184 : // The soft hyphen U+00AD is meaningful only in the input file,
185 : // not in the output.
186 : { "rg", "00AE" },
187 : { "a-", "00AF" },
188 : { "de", "00B0" },
189 : { "+-", "00B1" },
190 : { "t+-", "00B1" },
191 : { "S2", "00B2" },
192 : { "S3", "00B3" },
193 : { "aa", "00B4" },
194 : { "mc", "00B5" },
195 : { "ps", "00B6" },
196 : { "pc", "00B7" },
197 : { "ac", "00B8" },
198 : { "S1", "00B9" },
199 : { "Om", "00BA" },
200 : { "Fc", "00BB" },
201 : { "14", "00BC" },
202 : { "12", "00BD" },
203 : { "34", "00BE" },
204 : { "r?", "00BF" },
205 : { "`A", "00C0" },
206 : { "'A", "00C1" },
207 : { "^A", "00C2" },
208 : { "~A", "00C3" },
209 : { ":A", "00C4" },
210 : { "oA", "00C5" },
211 : { "AE", "00C6" },
212 : { ",C", "00C7" },
213 : { "`E", "00C8" },
214 : { "'E", "00C9" },
215 : { "^E", "00CA" },
216 : { ":E", "00CB" },
217 : { "`I", "00CC" },
218 : { "'I", "00CD" },
219 : { "^I", "00CE" },
220 : { ":I", "00CF" },
221 : { "-D", "00D0" },
222 : { "~N", "00D1" },
223 : { "`O", "00D2" },
224 : { "'O", "00D3" },
225 : { "^O", "00D4" },
226 : { "~O", "00D5" },
227 : { ":O", "00D6" },
228 : { "mu", "00D7" },
229 : { "tmu", "00D7" },
230 : { "/O", "00D8" },
231 : { "`U", "00D9" },
232 : { "'U", "00DA" },
233 : { "^U", "00DB" },
234 : { ":U", "00DC" },
235 : { "'Y", "00DD" },
236 : { "TP", "00DE" },
237 : { "ss", "00DF" },
238 : { "`a", "00E0" },
239 : { "'a", "00E1" },
240 : { "^a", "00E2" },
241 : { "~a", "00E3" },
242 : { ":a", "00E4" },
243 : { "oa", "00E5" },
244 : { "ae", "00E6" },
245 : { ",c", "00E7" },
246 : { "`e", "00E8" },
247 : { "'e", "00E9" },
248 : { "^e", "00EA" },
249 : { ":e", "00EB" },
250 : { "`i", "00EC" },
251 : { "'i", "00ED" },
252 : { "^i", "00EE" },
253 : { ":i", "00EF" },
254 : { "Sd", "00F0" },
255 : { "~n", "00F1" },
256 : { "`o", "00F2" },
257 : { "'o", "00F3" },
258 : { "^o", "00F4" },
259 : { "~o", "00F5" },
260 : { ":o", "00F6" },
261 : { "di", "00F7" },
262 : { "tdi", "00F7" },
263 : { "/o", "00F8" },
264 : { "`u", "00F9" },
265 : { "'u", "00FA" },
266 : { "^u", "00FB" },
267 : { ":u", "00FC" },
268 : { "'y", "00FD" },
269 : { "Tp", "00FE" },
270 : { ":y", "00FF" },
271 : { "'C", "0106" },
272 : { "'c", "0107" },
273 : { ".i", "0131" },
274 : { "IJ", "0132" },
275 : { "ij", "0133" },
276 : { "/L", "0141" },
277 : { "/l", "0142" },
278 : { "OE", "0152" },
279 : { "oe", "0153" },
280 : { "vS", "0160" },
281 : { "vs", "0161" },
282 : { ":Y", "0178" },
283 : { "vZ", "017D" },
284 : { "vz", "017E" },
285 : { "Fn", "0192" },
286 : { ".j", "0237" },
287 : { "ah", "02C7" },
288 : { "ab", "02D8" },
289 : { "a.", "02D9" },
290 : { "ao", "02DA" },
291 : { "ho", "02DB" },
292 : { "a\"", "02DD" },
293 : { "*A", "0391" },
294 : { "*B", "0392" },
295 : { "*G", "0393" },
296 : { "*D", "0394" },
297 : { "*E", "0395" },
298 : { "*Z", "0396" },
299 : { "*Y", "0397" },
300 : { "*H", "0398" },
301 : { "*I", "0399" },
302 : { "*K", "039A" },
303 : { "*L", "039B" },
304 : { "*M", "039C" },
305 : { "*N", "039D" },
306 : { "*C", "039E" },
307 : { "*O", "039F" },
308 : { "*P", "03A0" },
309 : { "*R", "03A1" },
310 : { "*S", "03A3" },
311 : { "*T", "03A4" },
312 : { "*U", "03A5" },
313 : { "*F", "03A6" },
314 : { "*X", "03A7" },
315 : { "*Q", "03A8" },
316 : { "*W", "03A9" },
317 : { "*a", "03B1" },
318 : { "*b", "03B2" },
319 : { "*g", "03B3" },
320 : { "*d", "03B4" },
321 : { "*e", "03B5" },
322 : { "*z", "03B6" },
323 : { "*y", "03B7" },
324 : { "*h", "03B8" },
325 : { "*i", "03B9" },
326 : { "*k", "03BA" },
327 : { "*l", "03BB" },
328 : { "*m", "03BC" },
329 : { "*n", "03BD" },
330 : { "*c", "03BE" },
331 : { "*o", "03BF" },
332 : { "*p", "03C0" },
333 : { "*r", "03C1" },
334 : { "ts", "03C2" },
335 : { "*s", "03C3" },
336 : { "*t", "03C4" },
337 : { "*u", "03C5" },
338 : // the curly phi variant
339 : { "+f", "03C6" },
340 : { "*x", "03C7" },
341 : { "*q", "03C8" },
342 : { "*w", "03C9" },
343 : { "+h", "03D1" },
344 : // the stroked phi variant
345 : { "*f", "03D5" },
346 : { "+p", "03D6" },
347 : { "+e", "03F5" },
348 : // '-' and 'hy' denote a HYPHEN, usually a glyph with a smaller width than
349 : // the MINUS sign. Users who are viewing broken man pages that assume
350 : // that '-' denotes a U+002D character can either fix the broken man pages
351 : // or apply the workaround described in the PROBLEMS file.
352 : { "-", "2010" },
353 : { "hy", "2010" },
354 : { "en", "2013" },
355 : { "em", "2014" },
356 : { "`", "2018" },
357 : { "oq", "2018" },
358 : { "'", "2019" },
359 : { "cq", "2019" },
360 : { "bq", "201A" },
361 : { "lq", "201C" },
362 : { "rq", "201D" },
363 : { "Bq", "201E" },
364 : { "dg", "2020" },
365 : { "dd", "2021" },
366 : { "bu", "2022" },
367 : { "%0", "2030" },
368 : { "fm", "2032" },
369 : { "sd", "2033" },
370 : { "fo", "2039" },
371 : { "fc", "203A" },
372 : { "rn", "203E" },
373 : { "f/", "2044" },
374 : { "eu", "20AC" },
375 : { "Eu", "20AC" },
376 : { "-h", "210F" },
377 : { "hbar", "210F" },
378 : { "Im", "2111" },
379 : { "wp", "2118" },
380 : { "Re", "211C" },
381 : { "tm", "2122" },
382 : { "Ah", "2135" },
383 : { "18", "215B" },
384 : { "38", "215C" },
385 : { "58", "215D" },
386 : { "78", "215E" },
387 : { "<-", "2190" },
388 : { "ua", "2191" },
389 : { "->", "2192" },
390 : { "da", "2193" },
391 : { "<>", "2194" },
392 : { "va", "2195" },
393 : { "CR", "21B5" },
394 : { "lA", "21D0" },
395 : { "uA", "21D1" },
396 : { "rA", "21D2" },
397 : { "dA", "21D3" },
398 : { "hA", "21D4" },
399 : { "vA", "21D5" },
400 : { "fa", "2200" },
401 : { "pd", "2202" },
402 : { "te", "2203" },
403 : { "es", "2205" },
404 : { "gr", "2207" },
405 : { "mo", "2208" },
406 : { "nm", "2209" },
407 : { "st", "220B" },
408 : { "product", "220F" },
409 : { "coproduct", "2210" },
410 : { "sum", "2211" },
411 : // 'mi' and '\-' represent a MINUS sign. But it is used in many man pages
412 : // to denote the U+002D character that introduces a command-line option.
413 : // For devices that support copy&paste, such as devhtml and devutf8, the
414 : // user can apply the workaround described in the PROBLEMS file.
415 : { "\\-", "2212" },
416 : { "mi", "2212" },
417 : { "-+", "2213" },
418 : { "**", "2217" },
419 : { "sqrt", "221A" },
420 : { "sr", "221A" },
421 : { "pt", "221D" },
422 : { "if", "221E" },
423 : { "/_", "2220" },
424 : { "AN", "2227" },
425 : { "OR", "2228" },
426 : { "ca", "2229" },
427 : { "cu", "222A" },
428 : { "is", "222B" },
429 : { "integral", "222B" },
430 : { "tf", "2234" },
431 : { "3d", "2234" },
432 : { "ap", "223C" },
433 : { "|=", "2243" },
434 : { "=~", "2245" },
435 : { "~~", "2248" },
436 : { "~=", "2248" },
437 : { "!=", "2260" },
438 : { "==", "2261" },
439 : { "ne", "2262" },
440 : { "<=", "2264" },
441 : { ">=", "2265" },
442 : { "<<", "226A" },
443 : { ">>", "226B" },
444 : { "sb", "2282" },
445 : { "sp", "2283" },
446 : { "nb", "2284" },
447 : { "nc", "2285" },
448 : { "ib", "2286" },
449 : { "ip", "2287" },
450 : { "c+", "2295" },
451 : { "c*", "2297" },
452 : { "pp", "22A5" },
453 : { "md", "22C5" },
454 : { "lc", "2308" },
455 : { "rc", "2309" },
456 : { "lf", "230A" },
457 : { "rf", "230B" },
458 : { "parenlefttp", "239B" },
459 : { "parenleftex", "239C" },
460 : { "parenleftbt", "239D" },
461 : { "parenrighttp", "239E" },
462 : { "parenrightex", "239F" },
463 : { "parenrightbt", "23A0" },
464 : { "bracketlefttp", "23A1" },
465 : { "bracketleftex", "23A2" },
466 : { "bracketleftbt", "23A3" },
467 : { "bracketrighttp", "23A4" },
468 : { "bracketrightex", "23A5" },
469 : { "bracketrightbt", "23A6" },
470 : { "lt", "23A7" },
471 : { "bracelefttp", "23A7" },
472 : { "lk", "23A8" },
473 : { "braceleftmid", "23A8" },
474 : { "lb", "23A9" },
475 : { "braceleftbt", "23A9" },
476 : { "bv", "23AA" },
477 : { "braceex", "23AA" },
478 : { "braceleftex", "23AA" },
479 : { "bracerightex", "23AA" },
480 : { "rt", "23AB" },
481 : { "bracerighttp", "23AB" },
482 : { "rk", "23AC" },
483 : { "bracerightmid", "23AC" },
484 : { "rb", "23AD" },
485 : { "bracerightbt", "23AD" },
486 : { "an", "23AF" },
487 : { "br", "2502" },
488 : { "sq", "25A1" },
489 : { "lz", "25CA" },
490 : { "ci", "25CB" },
491 : { "lh", "261C" },
492 : { "rh", "261E" },
493 : { "SP", "2660" },
494 : { "CL", "2663" },
495 : { "HE", "2665" },
496 : { "DI", "2666" },
497 : { "OK", "2713" },
498 : // The 'left angle bracket' and 'right angle bracket' could be mapped to
499 : // either U+2329,U+232A or U+3008,U+3009 or U+27E8,U+27E9. But the first
500 : // and second possibility are double-width characters (see Unicode's
501 : // 'DerivedEastAsianWidth.txt' file) and are therefore not suitable for
502 : // general use, whereas the third possibility is single-width.
503 : //
504 : // The devhtml device overrides this mapping, because
505 : //
506 : // http://www.w3.org/TR/html401/sgml/entities.html
507 : //
508 : // says that in HTML, '⟨' and '⟩' are U+2329,U+232A,
509 : // respectively.
510 : { "la", "27E8" },
511 : { "ra", "27E9" },
512 : };
513 :
514 : // global constructor
515 : static struct glyph_to_unicode_init {
516 : glyph_to_unicode_init();
517 : } _glyph_to_unicode_init;
518 :
519 3948 : glyph_to_unicode_init::glyph_to_unicode_init()
520 : {
521 1705536 : for (size_t i = 0; i < countof(glyph_to_unicode_list); i++) {
522 1701588 : glyph_to_unicode_map *gtu = new glyph_to_unicode_map[1];
523 1701588 : gtu->value = (char *)glyph_to_unicode_list[i].value;
524 1701588 : glyph_to_unicode_table.define(glyph_to_unicode_list[i].key, gtu);
525 : }
526 3948 : }
527 :
528 290744 : const char *glyph_name_to_unicode(const char *s)
529 : {
530 290744 : glyph_to_unicode_map *result = glyph_to_unicode_table.lookup(s);
531 290744 : return result ? result->value : 0 /* nullptr */;
532 : }
533 :
534 : // Local Variables:
535 : // fill-column: 72
536 : // mode: C++
537 : // End:
538 : // vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
|