LCOV - code coverage report
Current view: top level - libs/libgroff - glyphuni.cpp (source / functions) Hit Total Coverage
Test: GNU roff Lines: 10 10 100.0 %
Date: 2026-01-16 17:51:41 Functions: 7 10 70.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Copyright 2002-2024 Free Software Foundation, Inc.
       2             :      Written by Werner Lemberg <wl@gnu.org>
       3             : 
       4             : This file is part of groff, the GNU roff typesetting system.
       5             : 
       6             : groff is free software; you can redistribute it and/or modify it under
       7             : the terms of the GNU General Public License as published by the Free
       8             : Software Foundation, either version 3 of the License, or
       9             : (at your option) any later version.
      10             : 
      11             : groff is distributed in the hope that it will be useful, but WITHOUT ANY
      12             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14             : for more details.
      15             : 
      16             : You should have received a copy of the GNU General Public License
      17             : along with this program.  If not, see <http://www.gnu.org/licenses/>. */
      18             : 
      19             : #ifdef HAVE_CONFIG_H
      20             : #include <config.h>
      21             : #endif
      22             : 
      23             : #include <stdcountof.h>
      24             : 
      25             : #include "lib.h"
      26             : 
      27             : #include "stringclass.h"
      28             : #include "ptable.h"
      29             : #include "unicode.h"
      30             : 
      31             : struct glyph_to_unicode_map {
      32             :   char *value;
      33             : };
      34             : 
      35             : declare_ptable(glyph_to_unicode_map)
      36    48503681 : implement_ptable(glyph_to_unicode_map)
      37             : 
      38             : PTABLE(glyph_to_unicode_map) glyph_to_unicode_table;
      39             : 
      40             : // The entries commented out in the table below aren't easily used in
      41             : // glyph names.  Getting at the names `[` and `]` would require use of
      42             : // `\C`, and getting at `\` would require changing the escape character.
      43             : //
      44             : // Examples: \C'[' \C']'
      45             : //   .ec @
      46             : //   @[\]
      47             : //
      48             : // TODO: Uncomment them, then?
      49             : 
      50             : struct S {
      51             :   const char *key;
      52             :   const char *value;
      53             : } glyph_to_unicode_list[] = {
      54             :   { "!", "0021" },
      55             :   { "\"", "0022" },
      56             :   { "dq", "0022" },
      57             :   { "#", "0023" },
      58             :   { "sh", "0023" },
      59             :   { "$", "0024" },
      60             :   { "Do", "0024" },
      61             :   { "%", "0025" },
      62             :   { "&", "0026" },
      63             :   { "aq", "0027" },
      64             :   { "(", "0028" },
      65             :   { ")", "0029" },
      66             :   { "*", "002A" },
      67             :   { "+", "002B" },
      68             :   { "pl", "002B" },
      69             :   { ",", "002C" },
      70             :   { ".", "002E" },
      71             :   { "/", "002F" },
      72             :   { "sl", "002F" },
      73             :   { "0", "0030" },
      74             :   { "1", "0031" },
      75             :   { "2", "0032" },
      76             :   { "3", "0033" },
      77             :   { "4", "0034" },
      78             :   { "5", "0035" },
      79             :   { "6", "0036" },
      80             :   { "7", "0037" },
      81             :   { "8", "0038" },
      82             :   { "9", "0039" },
      83             :   { ":", "003A" },
      84             :   { ";", "003B" },
      85             :   { "<", "003C" },
      86             :   { "=", "003D" },
      87             :   { "eq", "003D" },
      88             :   { ">", "003E" },
      89             :   { "?", "003F" },
      90             :   { "@", "0040" },
      91             :   { "at", "0040" },
      92             :   { "A", "0041" },
      93             :   { "B", "0042" },
      94             :   { "C", "0043" },
      95             :   { "D", "0044" },
      96             :   { "E", "0045" },
      97             :   { "F", "0046" },
      98             :   { "G", "0047" },
      99             :   { "H", "0048" },
     100             :   { "I", "0049" },
     101             :   { "J", "004A" },
     102             :   { "K", "004B" },
     103             :   { "L", "004C" },
     104             :   { "M", "004D" },
     105             :   { "N", "004E" },
     106             :   { "O", "004F" },
     107             :   { "P", "0050" },
     108             :   { "Q", "0051" },
     109             :   { "R", "0052" },
     110             :   { "S", "0053" },
     111             :   { "T", "0054" },
     112             :   { "U", "0055" },
     113             :   { "V", "0056" },
     114             :   { "W", "0057" },
     115             :   { "X", "0058" },
     116             :   { "Y", "0059" },
     117             :   { "Z", "005A" },
     118             : //{ "[", "005B" },
     119             :   { "lB", "005B" },
     120             : //{ "\\", "005C" },
     121             :   { "rs", "005C" },
     122             : //{ "]", "005D" },
     123             :   { "rB", "005D" },
     124             :   { "a^", "005E" },
     125             :   { "^", "005E" },
     126             :   { "ha", "005E" },
     127             :   { "_", "005F" },
     128             :   { "ul", "005F" },
     129             :   { "ga", "0060" },
     130             :   { "a", "0061" },
     131             :   { "b", "0062" },
     132             :   { "c", "0063" },
     133             :   { "d", "0064" },
     134             :   { "e", "0065" },
     135             :   { "f", "0066" },
     136             :   { "ff", "0066_0066" },
     137             :   { "Fi", "0066_0066_0069" },
     138             :   { "Fl", "0066_0066_006C" },
     139             :   { "fi", "0066_0069" },
     140             :   { "fl", "0066_006C" },
     141             :   { "g", "0067" },
     142             :   { "h", "0068" },
     143             :   { "i", "0069" },
     144             :   { "j", "006A" },
     145             :   { "k", "006B" },
     146             :   { "l", "006C" },
     147             :   { "m", "006D" },
     148             :   { "n", "006E" },
     149             :   { "o", "006F" },
     150             :   { "p", "0070" },
     151             :   { "q", "0071" },
     152             :   { "r", "0072" },
     153             :   { "s", "0073" },
     154             :   { "t", "0074" },
     155             :   { "u", "0075" },
     156             :   { "v", "0076" },
     157             :   { "w", "0077" },
     158             :   { "x", "0078" },
     159             :   { "y", "0079" },
     160             :   { "z", "007A" },
     161             :   { "lC", "007B" },
     162             :   { "{", "007B" },
     163             :   { "ba", "007C" },
     164             :   { "or", "007C" },
     165             :   { "|", "007C" },
     166             :   { "rC", "007D" },
     167             :   { "}", "007D" },
     168             :   { "a~", "007E" },
     169             :   { "~", "007E" },
     170             :   { "ti", "007E" },
     171             :   { "r!", "00A1" },
     172             :   { "ct", "00A2" },
     173             :   { "Po", "00A3" },
     174             :   { "Cs", "00A4" },
     175             :   { "Ye", "00A5" },
     176             :   { "bb", "00A6" },
     177             :   { "sc", "00A7" },
     178             :   { "ad", "00A8" },
     179             :   { "co", "00A9" },
     180             :   { "Of", "00AA" },
     181             :   { "Fo", "00AB" },
     182             :   { "no", "00AC" },
     183             :   { "tno", "00AC" },
     184             :   // The soft hyphen U+00AD is meaningful only in the input file,
     185             :   // not in the output.
     186             :   { "rg", "00AE" },
     187             :   { "a-", "00AF" },
     188             :   { "de", "00B0" },
     189             :   { "+-", "00B1" },
     190             :   { "t+-", "00B1" },
     191             :   { "S2", "00B2" },
     192             :   { "S3", "00B3" },
     193             :   { "aa", "00B4" },
     194             :   { "mc", "00B5" },
     195             :   { "ps", "00B6" },
     196             :   { "pc", "00B7" },
     197             :   { "ac", "00B8" },
     198             :   { "S1", "00B9" },
     199             :   { "Om", "00BA" },
     200             :   { "Fc", "00BB" },
     201             :   { "14", "00BC" },
     202             :   { "12", "00BD" },
     203             :   { "34", "00BE" },
     204             :   { "r?", "00BF" },
     205             :   { "`A", "00C0" },
     206             :   { "'A", "00C1" },
     207             :   { "^A", "00C2" },
     208             :   { "~A", "00C3" },
     209             :   { ":A", "00C4" },
     210             :   { "oA", "00C5" },
     211             :   { "AE", "00C6" },
     212             :   { ",C", "00C7" },
     213             :   { "`E", "00C8" },
     214             :   { "'E", "00C9" },
     215             :   { "^E", "00CA" },
     216             :   { ":E", "00CB" },
     217             :   { "`I", "00CC" },
     218             :   { "'I", "00CD" },
     219             :   { "^I", "00CE" },
     220             :   { ":I", "00CF" },
     221             :   { "-D", "00D0" },
     222             :   { "~N", "00D1" },
     223             :   { "`O", "00D2" },
     224             :   { "'O", "00D3" },
     225             :   { "^O", "00D4" },
     226             :   { "~O", "00D5" },
     227             :   { ":O", "00D6" },
     228             :   { "mu", "00D7" },
     229             :   { "tmu", "00D7" },
     230             :   { "/O", "00D8" },
     231             :   { "`U", "00D9" },
     232             :   { "'U", "00DA" },
     233             :   { "^U", "00DB" },
     234             :   { ":U", "00DC" },
     235             :   { "'Y", "00DD" },
     236             :   { "TP", "00DE" },
     237             :   { "ss", "00DF" },
     238             :   { "`a", "00E0" },
     239             :   { "'a", "00E1" },
     240             :   { "^a", "00E2" },
     241             :   { "~a", "00E3" },
     242             :   { ":a", "00E4" },
     243             :   { "oa", "00E5" },
     244             :   { "ae", "00E6" },
     245             :   { ",c", "00E7" },
     246             :   { "`e", "00E8" },
     247             :   { "'e", "00E9" },
     248             :   { "^e", "00EA" },
     249             :   { ":e", "00EB" },
     250             :   { "`i", "00EC" },
     251             :   { "'i", "00ED" },
     252             :   { "^i", "00EE" },
     253             :   { ":i", "00EF" },
     254             :   { "Sd", "00F0" },
     255             :   { "~n", "00F1" },
     256             :   { "`o", "00F2" },
     257             :   { "'o", "00F3" },
     258             :   { "^o", "00F4" },
     259             :   { "~o", "00F5" },
     260             :   { ":o", "00F6" },
     261             :   { "di", "00F7" },
     262             :   { "tdi", "00F7" },
     263             :   { "/o", "00F8" },
     264             :   { "`u", "00F9" },
     265             :   { "'u", "00FA" },
     266             :   { "^u", "00FB" },
     267             :   { ":u", "00FC" },
     268             :   { "'y", "00FD" },
     269             :   { "Tp", "00FE" },
     270             :   { ":y", "00FF" },
     271             :   { "'C", "0106" },
     272             :   { "'c", "0107" },
     273             :   { ".i", "0131" },
     274             :   { "IJ", "0132" },
     275             :   { "ij", "0133" },
     276             :   { "/L", "0141" },
     277             :   { "/l", "0142" },
     278             :   { "OE", "0152" },
     279             :   { "oe", "0153" },
     280             :   { "vS", "0160" },
     281             :   { "vs", "0161" },
     282             :   { ":Y", "0178" },
     283             :   { "vZ", "017D" },
     284             :   { "vz", "017E" },
     285             :   { "Fn", "0192" },
     286             :   { ".j", "0237" },
     287             :   { "ah", "02C7" },
     288             :   { "ab", "02D8" },
     289             :   { "a.", "02D9" },
     290             :   { "ao", "02DA" },
     291             :   { "ho", "02DB" },
     292             :   { "a\"", "02DD" },
     293             :   { "*A", "0391" },
     294             :   { "*B", "0392" },
     295             :   { "*G", "0393" },
     296             :   { "*D", "0394" },
     297             :   { "*E", "0395" },
     298             :   { "*Z", "0396" },
     299             :   { "*Y", "0397" },
     300             :   { "*H", "0398" },
     301             :   { "*I", "0399" },
     302             :   { "*K", "039A" },
     303             :   { "*L", "039B" },
     304             :   { "*M", "039C" },
     305             :   { "*N", "039D" },
     306             :   { "*C", "039E" },
     307             :   { "*O", "039F" },
     308             :   { "*P", "03A0" },
     309             :   { "*R", "03A1" },
     310             :   { "*S", "03A3" },
     311             :   { "*T", "03A4" },
     312             :   { "*U", "03A5" },
     313             :   { "*F", "03A6" },
     314             :   { "*X", "03A7" },
     315             :   { "*Q", "03A8" },
     316             :   { "*W", "03A9" },
     317             :   { "*a", "03B1" },
     318             :   { "*b", "03B2" },
     319             :   { "*g", "03B3" },
     320             :   { "*d", "03B4" },
     321             :   { "*e", "03B5" },
     322             :   { "*z", "03B6" },
     323             :   { "*y", "03B7" },
     324             :   { "*h", "03B8" },
     325             :   { "*i", "03B9" },
     326             :   { "*k", "03BA" },
     327             :   { "*l", "03BB" },
     328             :   { "*m", "03BC" },
     329             :   { "*n", "03BD" },
     330             :   { "*c", "03BE" },
     331             :   { "*o", "03BF" },
     332             :   { "*p", "03C0" },
     333             :   { "*r", "03C1" },
     334             :   { "ts", "03C2" },
     335             :   { "*s", "03C3" },
     336             :   { "*t", "03C4" },
     337             :   { "*u", "03C5" },
     338             :   // the curly phi variant
     339             :   { "+f", "03C6" },
     340             :   { "*x", "03C7" },
     341             :   { "*q", "03C8" },
     342             :   { "*w", "03C9" },
     343             :   { "+h", "03D1" },
     344             :   // the stroked phi variant
     345             :   { "*f", "03D5" },
     346             :   { "+p", "03D6" },
     347             :   { "+e", "03F5" },
     348             :   // '-' and 'hy' denote a HYPHEN, usually a glyph with a smaller width than
     349             :   // the MINUS sign.  Users who are viewing broken man pages that assume
     350             :   // that '-' denotes a U+002D character can either fix the broken man pages
     351             :   // or apply the workaround described in the PROBLEMS file.
     352             :   { "-", "2010" },
     353             :   { "hy", "2010" },
     354             :   { "en", "2013" },
     355             :   { "em", "2014" },
     356             :   { "`", "2018" },
     357             :   { "oq", "2018" },
     358             :   { "'", "2019" },
     359             :   { "cq", "2019" },
     360             :   { "bq", "201A" },
     361             :   { "lq", "201C" },
     362             :   { "rq", "201D" },
     363             :   { "Bq", "201E" },
     364             :   { "dg", "2020" },
     365             :   { "dd", "2021" },
     366             :   { "bu", "2022" },
     367             :   { "%0", "2030" },
     368             :   { "fm", "2032" },
     369             :   { "sd", "2033" },
     370             :   { "fo", "2039" },
     371             :   { "fc", "203A" },
     372             :   { "rn", "203E" },
     373             :   { "f/", "2044" },
     374             :   { "eu", "20AC" },
     375             :   { "Eu", "20AC" },
     376             :   { "-h", "210F" },
     377             :   { "hbar", "210F" },
     378             :   { "Im", "2111" },
     379             :   { "wp", "2118" },
     380             :   { "Re", "211C" },
     381             :   { "tm", "2122" },
     382             :   { "Ah", "2135" },
     383             :   { "18", "215B" },
     384             :   { "38", "215C" },
     385             :   { "58", "215D" },
     386             :   { "78", "215E" },
     387             :   { "<-", "2190" },
     388             :   { "ua", "2191" },
     389             :   { "->", "2192" },
     390             :   { "da", "2193" },
     391             :   { "<>", "2194" },
     392             :   { "va", "2195" },
     393             :   { "CR", "21B5" },
     394             :   { "lA", "21D0" },
     395             :   { "uA", "21D1" },
     396             :   { "rA", "21D2" },
     397             :   { "dA", "21D3" },
     398             :   { "hA", "21D4" },
     399             :   { "vA", "21D5" },
     400             :   { "fa", "2200" },
     401             :   { "pd", "2202" },
     402             :   { "te", "2203" },
     403             :   { "es", "2205" },
     404             :   { "gr", "2207" },
     405             :   { "mo", "2208" },
     406             :   { "nm", "2209" },
     407             :   { "st", "220B" },
     408             :   { "product", "220F" },
     409             :   { "coproduct", "2210" },
     410             :   { "sum", "2211" },
     411             :   // 'mi' and '\-' represent a MINUS sign.  But it is used in many man pages
     412             :   // to denote the U+002D character that introduces a command-line option.
     413             :   // For devices that support copy&paste, such as devhtml and devutf8, the
     414             :   // user can apply the workaround described in the PROBLEMS file.
     415             :   { "\\-", "2212" },
     416             :   { "mi", "2212" },
     417             :   { "-+", "2213" },
     418             :   { "**", "2217" },
     419             :   { "sqrt", "221A" },
     420             :   { "sr", "221A" },
     421             :   { "pt", "221D" },
     422             :   { "if", "221E" },
     423             :   { "/_", "2220" },
     424             :   { "AN", "2227" },
     425             :   { "OR", "2228" },
     426             :   { "ca", "2229" },
     427             :   { "cu", "222A" },
     428             :   { "is", "222B" },
     429             :   { "integral", "222B" },
     430             :   { "tf", "2234" },
     431             :   { "3d", "2234" },
     432             :   { "ap", "223C" },
     433             :   { "|=", "2243" },
     434             :   { "=~", "2245" },
     435             :   { "~~", "2248" },
     436             :   { "~=", "2248" },
     437             :   { "!=", "2260" },
     438             :   { "==", "2261" },
     439             :   { "ne", "2262" },
     440             :   { "<=", "2264" },
     441             :   { ">=", "2265" },
     442             :   { "<<", "226A" },
     443             :   { ">>", "226B" },
     444             :   { "sb", "2282" },
     445             :   { "sp", "2283" },
     446             :   { "nb", "2284" },
     447             :   { "nc", "2285" },
     448             :   { "ib", "2286" },
     449             :   { "ip", "2287" },
     450             :   { "c+", "2295" },
     451             :   { "c*", "2297" },
     452             :   { "pp", "22A5" },
     453             :   { "md", "22C5" },
     454             :   { "lc", "2308" },
     455             :   { "rc", "2309" },
     456             :   { "lf", "230A" },
     457             :   { "rf", "230B" },
     458             :   { "parenlefttp", "239B" },
     459             :   { "parenleftex", "239C" },
     460             :   { "parenleftbt", "239D" },
     461             :   { "parenrighttp", "239E" },
     462             :   { "parenrightex", "239F" },
     463             :   { "parenrightbt", "23A0" },
     464             :   { "bracketlefttp", "23A1" },
     465             :   { "bracketleftex", "23A2" },
     466             :   { "bracketleftbt", "23A3" },
     467             :   { "bracketrighttp", "23A4" },
     468             :   { "bracketrightex", "23A5" },
     469             :   { "bracketrightbt", "23A6" },
     470             :   { "lt", "23A7" },
     471             :   { "bracelefttp", "23A7" },
     472             :   { "lk", "23A8" },
     473             :   { "braceleftmid", "23A8" },
     474             :   { "lb", "23A9" },
     475             :   { "braceleftbt", "23A9" },
     476             :   { "bv", "23AA" },
     477             :   { "braceex", "23AA" },
     478             :   { "braceleftex", "23AA" },
     479             :   { "bracerightex", "23AA" },
     480             :   { "rt", "23AB" },
     481             :   { "bracerighttp", "23AB" },
     482             :   { "rk", "23AC" },
     483             :   { "bracerightmid", "23AC" },
     484             :   { "rb", "23AD" },
     485             :   { "bracerightbt", "23AD" },
     486             :   { "an", "23AF" },
     487             :   { "br", "2502" },
     488             :   { "sq", "25A1" },
     489             :   { "lz", "25CA" },
     490             :   { "ci", "25CB" },
     491             :   { "lh", "261C" },
     492             :   { "rh", "261E" },
     493             :   { "SP", "2660" },
     494             :   { "CL", "2663" },
     495             :   { "HE", "2665" },
     496             :   { "DI", "2666" },
     497             :   { "OK", "2713" },
     498             :   // The 'left angle bracket' and 'right angle bracket' could be mapped to
     499             :   // either U+2329,U+232A or U+3008,U+3009 or U+27E8,U+27E9.  But the first
     500             :   // and second possibility are double-width characters (see Unicode's
     501             :   // 'DerivedEastAsianWidth.txt' file) and are therefore not suitable for
     502             :   // general use, whereas the third possibility is single-width.
     503             :   //
     504             :   // The devhtml device overrides this mapping, because
     505             :   //
     506             :   //   http://www.w3.org/TR/html401/sgml/entities.html
     507             :   //
     508             :   // says that in HTML, '&lang;' and '&rang;' are U+2329,U+232A,
     509             :   // respectively.
     510             :   { "la", "27E8" },
     511             :   { "ra", "27E9" },
     512             : };
     513             : 
     514             : // global constructor
     515             : static struct glyph_to_unicode_init {
     516             :   glyph_to_unicode_init();
     517             : } _glyph_to_unicode_init;
     518             : 
     519        3948 : glyph_to_unicode_init::glyph_to_unicode_init()
     520             : {
     521     1705536 :   for (size_t i = 0; i < countof(glyph_to_unicode_list); i++) {
     522     1701588 :     glyph_to_unicode_map *gtu = new glyph_to_unicode_map[1];
     523     1701588 :     gtu->value = (char *)glyph_to_unicode_list[i].value;
     524     1701588 :     glyph_to_unicode_table.define(glyph_to_unicode_list[i].key, gtu);
     525             :   }
     526        3948 : }
     527             : 
     528      290744 : const char *glyph_name_to_unicode(const char *s)
     529             : {
     530      290744 :   glyph_to_unicode_map *result = glyph_to_unicode_table.lookup(s);
     531      290744 :   return result ? result->value : 0 /* nullptr */;
     532             : }
     533             : 
     534             : // Local Variables:
     535             : // fill-column: 72
     536             : // mode: C++
     537             : // End:
     538             : // vim: set cindent noexpandtab shiftwidth=2 textwidth=72:

Generated by: LCOV version 1.14