LCOV - code coverage report
Current view: top level - preproc/eqn - text.cpp (source / functions) Hit Total Coverage
Test: GNU roff Lines: 179 326 54.9 %
Date: 2026-01-16 17:51:41 Functions: 26 43 60.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Copyright (C) 1989-2020 Free Software Foundation, Inc.
       2             :      Written by James Clark (jjc@jclark.com)
       3             : 
       4             : This file is part of groff, the GNU roff typesetting system.
       5             : 
       6             : groff is free software; you can redistribute it and/or modify it under
       7             : the terms of the GNU General Public License as published by the Free
       8             : Software Foundation, either version 3 of the License, or
       9             : (at your option) any later version.
      10             : 
      11             : groff is distributed in the hope that it will be useful, but WITHOUT ANY
      12             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14             : for more details.
      15             : 
      16             : You should have received a copy of the GNU General Public License
      17             : along with this program.  If not, see <http://www.gnu.org/licenses/>. */
      18             : 
      19             : #ifdef HAVE_CONFIG_H
      20             : #include <config.h>
      21             : #endif
      22             : 
      23             : #include <stdcountof.h>
      24             : 
      25             : #include <ctype.h>
      26             : #include <stdlib.h>
      27             : #include "eqn.h"
      28             : #include "pbox.h"
      29             : #include "ptable.h"
      30             : 
      31             : struct map {
      32             :   const char *from;
      33             :   const char *to;
      34             : };
      35             : 
      36             : struct map entity_table[] = {
      37             :   // Classic troff special characters
      38             :   {"%", "&shy;"},       // ISOnum
      39             :   {"'", "&acute;"},     // ISOdia
      40             :   {"!=", "&ne;"},       // ISOtech
      41             :   {"**", "&lowast;"},   // ISOtech
      42             :   {"*a", "&alpha;"},    // ISOgrk3
      43             :   {"*A", "A"},
      44             :   {"*b", "&beta;"},     // ISOgrk3
      45             :   {"*B", "B"},
      46             :   {"*d", "&delta;"},    // ISOgrk3
      47             :   {"*D", "&Delta;"},    // ISOgrk3
      48             :   {"*e", "&epsilon;"},  // ISOgrk3
      49             :   {"*E", "E"},
      50             :   {"*f", "&phi;"},      // ISOgrk3
      51             :   {"*F", "&Phi;"},      // ISOgrk3
      52             :   {"*g", "&gamma;"},    // ISOgrk3
      53             :   {"*G", "&Gamma;"},    // ISOgrk3
      54             :   {"*h", "&theta;"},    // ISOgrk3
      55             :   {"*H", "&Theta;"},    // ISOgrk3
      56             :   {"*i", "&iota;"},     // ISOgrk3
      57             :   {"*I", "I"},
      58             :   {"*k", "&kappa;"},    // ISOgrk3
      59             :   {"*K", "K;"},
      60             :   {"*l", "&lambda;"},   // ISOgrk3
      61             :   {"*L", "&Lambda;"},   // ISOgrk3
      62             :   {"*m", "&mu;"},       // ISOgrk3
      63             :   {"*M", "M"},
      64             :   {"*n", "&nu;"},       // ISOgrk3
      65             :   {"*N", "N"},
      66             :   {"*o", "o"},
      67             :   {"*O", "O"},
      68             :   {"*p", "&pi;"},       // ISOgrk3
      69             :   {"*P", "&Pi;"},       // ISOgrk3
      70             :   {"*q", "&psi;"},      // ISOgrk3
      71             :   {"*Q", "&PSI;"},      // ISOgrk3
      72             :   {"*r", "&rho;"},      // ISOgrk3
      73             :   {"*R", "R"},
      74             :   {"*s", "&sigma;"},    // ISOgrk3
      75             :   {"*S", "&Sigma;"},    // ISOgrk3
      76             :   {"*t", "&tau;"},      // ISOgrk3
      77             :   {"*T", "&Tau;"},      // ISOgrk3
      78             :   {"*u", "&upsilon;"},  // ISOgrk3
      79             :   {"*U", "&Upsilon;"},  // ISOgrk3
      80             :   {"*w", "&omega;"},    // ISOgrk3
      81             :   {"*W", "&Omega;"},    // ISOgrk3
      82             :   {"*x", "&chi;"},      // ISOgrk3
      83             :   {"*X", "&Chi;"},      // ISOgrk3
      84             :   {"*y", "&eta;"},      // ISOgrk3
      85             :   {"*Y", "&Eta;"},      // ISOgrk3
      86             :   {"*z", "&zeta;"},     // ISOgrk3
      87             :   {"*Z", "&Zeta;"},     // ISOgrk3
      88             :   {"+-", "&plusmn;"},   // ISOnum
      89             :   {"->", "&rarr;"},  // ISOnum
      90             :   {"12", "&frac12;"},   // ISOnum
      91             :   {"14", "&frac14;"},   // ISOnum
      92             :   {"34", "&frac34;"},   // ISOnum
      93             :   {"<-", "&larr;"},  // ISOnum
      94             :   {"==", "&equiv;"},    // ISOtech
      95             :   {"Fi", "&ffilig;"},   // ISOpub
      96             :   {"Fl", "&ffllig;"},   // ISOpub
      97             :   {"aa", "&acute;"},    // ISOdia
      98             :   {"ap", "&sim;"},      // ISOtech
      99             :   {"bl", "&phonexb;"},  // ISOpub
     100             :   {"br", "&boxv;"},     // ISObox
     101             :   {"bs", "&phone;"},    // ISOpub (for the Bell logo)
     102             :   {"bu", "&bull;"},     // ISOpub
     103             :   {"bv", "&verbar;"},   // ISOnum
     104             :   {"ca", "&cap;"},      // ISOtech
     105             :   {"ci", "&cir;"},      // ISOpub
     106             :   {"co", "&copy;"},     // ISOnum
     107             :   {"ct", "&cent;"},     // ISOnum
     108             :   {"cu", "&cup;"},      // ISOtech
     109             :   {"da", "&darr;"},     // ISOnum
     110             :   {"de", "&deg;"},      // ISOnum
     111             :   {"dg", "&dagger;"},   // ISOpub
     112             :   {"dd", "&Dagger;"},   // ISOpub
     113             :   {"di", "&divide;"},   // ISOnum
     114             :   {"em", "&mdash;"},    // ISOpub
     115             :   {"eq", "&equals;"},   // ISOnum
     116             :   {"es", "&empty;"},    // ISOamso
     117             :   {"ff", "&fflig;"},    // ISOpub
     118             :   {"fi", "&filig;"},    // ISOpub
     119             :   {"fl", "&fllig;"},    // ISOpub
     120             :   {"fm", "&prime;"},    // ISOtech
     121             :   {"ge", "&ge;"},       // ISOtech
     122             :   {"gr", "&nabla;"},    // ISOtech
     123             :   {"hy", "&hyphen;"},   // ISOnum
     124             :   {"ib", "&sube;"},     // ISOtech
     125             :   {"if", "&infin;"},    // ISOtech
     126             :   {"ip", "&supe;"},     // ISOtech
     127             :   {"is", "&int;"},      // ISOtech
     128             :   {"le", "&le;"},       // ISOtech
     129             :   // Some pile characters go here
     130             :   {"mi", "&minus;"},    // ISOtech
     131             :   {"mo", "&isin;"},     // ISOtech
     132             :   {"mu", "&times;"},    // ISOnum
     133             :   {"no", "&not;"},      // ISOnum
     134             :   {"or", "&verbar;"},   // ISOnum
     135             :   {"pl", "&plus;"},     // ISOnum
     136             :   {"pt", "&prop;"},     // ISOtech
     137             :   {"rg", "&trade;"},    // ISOnum
     138             :   // More pile characters go here
     139             :   {"rn", "&macr;"},     // ISOdia
     140             :   {"ru", "&lowbar;"},   // ISOnum
     141             :   {"sb", "&sub;"},      // ISOtech
     142             :   {"sc", "&sect;"},     // ISOnum
     143             :   {"sl", "/"},
     144             :   {"sp", "&sup;"},      // ISOtech
     145             :   {"sq", "&squf;"},     // ISOpub
     146             :   {"sr", "&radic;"},    // ISOtech
     147             :   {"ts", "&sigmav;"},   // ISOgrk3
     148             :   {"ua", "&uarr;"},     // ISOnum
     149             :   {"ul", "_"},
     150             :   {"~=", "&cong;"},     // ISOtech
     151             :   // Extended specials supported by groff; see groff_char(7).
     152             :   // These are listed in the order they occur on that man page.
     153             :   {"-D", "&ETH;"},      // ISOlat: Icelandic uppercase eth
     154             :   {"Sd", "&eth;"},      // ISOlat1: Icelandic lowercase eth
     155             :   {"TP", "&THORN;"},    // ISOlat1: Icelandic uppercase thorn
     156             :   {"Tp", "&thorn;"},    // ISOlat1: Icelandic lowercase thorn
     157             :   {"ss", "&szlig;"},    // ISOlat1
     158             :   // Ligatures
     159             :   // ff, fi, fl, ffi, ffl from old troff go here
     160             :   {"AE", "&AElig;"},    // ISOlat1
     161             :   {"ae", "&aelig;"},    // ISOlat1
     162             :   {"OE", "&OElig;"},    // ISOlat2
     163             :   {"oe", "&oelig;"},    // ISOlat2
     164             :   {"IJ", "&ijlig;"},    // ISOlat2: Dutch IJ ligature
     165             :   {"ij", "&IJlig;"},    // ISOlat2: Dutch ij ligature
     166             :   {".i", "&inodot;"},   // ISOlat2,ISOamso
     167             :   {".j", "&jnodot;"},   // ISOamso (undocumented but in 1.19)
     168             :   // Accented characters
     169             :   {"'A", "&Aacute;"},   // ISOlat1
     170             :   {"'C", "&Cacute;"},   // ISOlat2
     171             :   {"'E", "&Eacute;"},   // ISOlat1
     172             :   {"'I", "&Iacute;"},   // ISOlat1
     173             :   {"'O", "&Oacute;"},   // ISOlat1
     174             :   {"'U", "&Uacute;"},   // ISOlat1
     175             :   {"'Y", "&Yacute;"},   // ISOlat1
     176             :   {"'a", "&aacute;"},   // ISOlat1
     177             :   {"'c", "&cacute;"},   // ISOlat2
     178             :   {"'e", "&eacute;"},   // ISOlat1
     179             :   {"'i", "&iacute;"},   // ISOlat1
     180             :   {"'o", "&oacute;"},   // ISOlat1
     181             :   {"'u", "&uacute;"},   // ISOlat1
     182             :   {"'y", "&yacute;"},   // ISOlat1
     183             :   {":A", "&Auml;"},     // ISOlat1
     184             :   {":E", "&Euml;"},     // ISOlat1
     185             :   {":I", "&Iuml;"},     // ISOlat1
     186             :   {":O", "&Ouml;"},     // ISOlat1
     187             :   {":U", "&Uuml;"},     // ISOlat1
     188             :   {":Y", "&Yuml;"},     // ISOlat2
     189             :   {":a", "&auml;"},     // ISOlat1
     190             :   {":e", "&euml;"},     // ISOlat1
     191             :   {":i", "&iuml;"},     // ISOlat1
     192             :   {":o", "&ouml;"},     // ISOlat1
     193             :   {":u", "&uuml;"},     // ISOlat1
     194             :   {":y", "&yuml;"},     // ISOlat1
     195             :   {"^A", "&Acirc;"},    // ISOlat1
     196             :   {"^E", "&Ecirc;"},    // ISOlat1
     197             :   {"^I", "&Icirc;"},    // ISOlat1
     198             :   {"^O", "&Ocirc;"},    // ISOlat1
     199             :   {"^U", "&Ucirc;"},    // ISOlat1
     200             :   {"^a", "&acirc;"},    // ISOlat1
     201             :   {"^e", "&ecirc;"},    // ISOlat1
     202             :   {"^i", "&icirc;"},    // ISOlat1
     203             :   {"^o", "&ocirc;"},    // ISOlat1
     204             :   {"^u", "&ucirc;"},    // ISOlat1
     205             :   {"`A", "&Agrave;"},   // ISOlat1
     206             :   {"`E", "&Egrave;"},   // ISOlat1
     207             :   {"`I", "&Igrave;"},   // ISOlat1
     208             :   {"`O", "&Ograve;"},   // ISOlat1
     209             :   {"`U", "&Ugrave;"},   // ISOlat1
     210             :   {"`a", "&agrave;"},   // ISOlat1
     211             :   {"`e", "&egrave;"},   // ISOlat1
     212             :   {"`i", "&igrave;"},   // ISOlat1
     213             :   {"`o", "&ograve;"},   // ISOlat1
     214             :   {"`u", "&ugrave;"},   // ISOlat1
     215             :   {"~A", "&Atilde;"},   // ISOlat1
     216             :   {"~N", "&Ntilde;"},   // ISOlat1
     217             :   {"~O", "&Otilde;"},   // ISOlat1
     218             :   {"~a", "&atilde;"},   // ISOlat1
     219             :   {"~n", "&ntilde;"},   // ISOlat1
     220             :   {"~o", "&otilde;"},   // ISOlat1
     221             :   {"vS", "&Scaron;"},   // ISOlat2
     222             :   {"vs", "&scaron;"},   // ISOlat2
     223             :   {"vZ", "&Zcaron;"},   // ISOlat2
     224             :   {"vz", "&zcaron;"},   // ISOlat2
     225             :   {",C", "&Ccedil;"},   // ISOlat1
     226             :   {",c", "&ccedil;"},   // ISOlat1
     227             :   {"/L", "&Lstrok;"},   // ISOlat2: Polish L with a slash
     228             :   {"/l", "&lstrok;"},   // ISOlat2: Polish l with a slash
     229             :   {"/O", "&Oslash;"},   // ISOlat1
     230             :   {"/o", "&oslash;"},   // ISOlat1
     231             :   {"oA", "&Aring;"},    // ISOlat1
     232             :   {"oa", "&aring;"},    // ISOlat1
     233             :   // Accents
     234             :   {"a\"","&dblac;"},       // ISOdia: double acute accent (Hungarian umlaut)
     235             :   {"a-", "&macr;"},     // ISOdia: macron or bar accent
     236             :   {"a.", "&dot;"},      // ISOdia: dot above
     237             :   {"a^", "&circ;"},     // ISOdia: circumflex accent
     238             :   {"aa", "&acute;"},    // ISOdia: acute accent
     239             :   {"ga", "&grave;"},    // ISOdia: grave accent
     240             :   {"ab", "&breve;"},    // ISOdia: breve accent
     241             :   {"ac", "&cedil;"},    // ISOdia: cedilla accent
     242             :   {"ad", "&uml;"},      // ISOdia: umlaut or dieresis
     243             :   {"ah", "&caron;"},    // ISOdia: caron (aka hacek accent)
     244             :   {"ao", "&ring;"},     // ISOdia: ring or circle accent
     245             :   {"a~", "&tilde;"},    // ISOdia: tilde accent
     246             :   {"ho", "&ogon;"},     // ISOdia: hook or ogonek accent
     247             :   {"ha", "^"},              // ASCII circumflex, hat, caret
     248             :   {"ti", "~"},              // ASCII tilde, large tilde
     249             :   // Quotes
     250             :   {"Bq", "&lsquor;"},   // ISOpub: low double comma quote
     251             :   {"bq", "&ldquor;"},   // ISOpub: low single comma quote
     252             :   {"lq", "&ldquo;"},    // ISOnum
     253             :   {"rq", "&rdquo;"},    // ISOpub
     254             :   {"oq", "&lsquo;"},    // ISOnum: single open quote
     255             :   {"cq", "&rsquo;"},    // ISOnum: single closing quote (ASCII 39)
     256             :   {"aq", "&zerosp;'"},  // apostrophe quote
     257             :   {"dq", "\""},                // double quote (ASCII 34)
     258             :   {"Fo", "&laquo;"},    // ISOnum
     259             :   {"Fc", "&raquo;"},    // ISOnum
     260             :   //{"fo", "&fo;"},
     261             :   //{"fc", "&fc;"},
     262             :   // Punctuation
     263             :   {"r!", "&iexcl;"},    // ISOnum
     264             :   {"r?", "&iquest;"},   // ISOnum
     265             :   // Old troff \(em goes here
     266             :   {"en", "&ndash;"},    // ISOpub: en dash
     267             :   // Old troff \(hy goes here 
     268             :   // Brackets
     269             :   {"lB", "&lsqb;"},     // ISOnum: left (square) bracket
     270             :   {"rB", "&rsqb;"},     // ISOnum: right (square) bracket
     271             :   {"lC", "&lcub;"},     // ISOnum: left (curly) brace
     272             :   {"rC", "&rcub;"},     // ISOnum: right (curly) brace
     273             :   {"la", "&lang;"},     // ISOtech: left angle bracket
     274             :   {"ra", "&rang;"},     // ISOtech: right angle bracket
     275             :   // Old troff \(bv goes here
     276             :   // Bracket-pile characters could go here.
     277             :   // Arrows
     278             :   // Old troff \(<- and \(-> go here
     279             :   {"<>", "&harr;"},       // ISOamsa
     280             :   {"da", "&darr;"},     // ISOnum
     281             :   {"ua", "&uarr;"},     // ISOnum
     282             :   {"lA", "&lArr;"},     // ISOtech
     283             :   {"rA", "&rArr;"},     // ISOtech
     284             :   {"hA", "&iff;"},      // ISOtech: horizontal double-headed arrow
     285             :   {"dA", "&dArr;"},     // ISOamsa
     286             :   {"uA", "&uArr;"},     // ISOamsa
     287             :   {"vA", "&vArr;"},     // ISOamsa: vertical double-headed double arrow
     288             :   //{"an", "&an;"},
     289             :   // Lines
     290             :   {"-h", "&planck;"},   // ISOamso: h-bar (Planck's constant)
     291             :   // Old troff \(or goes here
     292             :   {"ba", "&verbar;"},   // ISOnum
     293             :   // Old troff \(br, \{u, \(ul, \(bv go here
     294             :   {"bb", "&brvbar;"},   // ISOnum
     295             :   {"sl", "/"},
     296             :   {"rs", "&bsol;"},     // ISOnum
     297             :   // Text markers
     298             :   // Old troff \(ci, \(bu, \(dd, \(dg go here
     299             :   {"lz", "&loz;"},      // ISOpub
     300             :   // Old troff sq goes here
     301             :   {"ps", "&para;"},     // ISOnum: paragraph or pilcrow sign
     302             :   {"sc", "&sect;"},     // ISOnum (in old troff)
     303             :   // Old troff \(lh, \{h go here
     304             :   {"at", "&commat;"},   // ISOnum
     305             :   {"sh", "&num;"},      // ISOnum
     306             :   //{"CR", "&CR;"},
     307             :   {"OK", "&check;"},    // ISOpub
     308             :   // Legalize
     309             :   // Old troff \(co, \{g go here
     310             :   {"tm", "&trade;"},    // ISOnum
     311             :   // Currency symbols
     312             :   {"Do", "&dollar;"},   // ISOnum
     313             :   {"ct", "&cent;"},     // ISOnum
     314             :   {"eu", "&euro;"},
     315             :   {"Eu", "&euro;"},
     316             :   {"Ye", "&yen;"},      // ISOnum
     317             :   {"Po", "&pound;"},    // ISOnum
     318             :   {"Cs", "&curren;"},   // ISOnum: currency sign
     319             :   {"Fn", "&fnof"},      // ISOtech
     320             :   // Units
     321             :   // Old troff de goes here
     322             :   {"%0", "&permil;"},   // ISOtech: per thousand, per mille sign
     323             :   // Old troff \(fm goes here
     324             :   {"sd", "&Prime;"},    // ISOtech
     325             :   {"mc", "&micro;"},    // ISOnum
     326             :   {"Of", "&ordf;"},     // ISOnum
     327             :   {"Om", "&ordm;"},     // ISOnum
     328             :   // Logical symbols
     329             :   {"AN", "&and;"},      // ISOtech
     330             :   {"OR", "&or;"},       // ISOtech
     331             :   // Old troff \(no goes here
     332             :   {"te", "&exist;"},    // ISOtech: there exists, existential quantifier
     333             :   {"fa", "&forall;"},   // ISOtech: for all, universal quantifier
     334             :   {"st", "&bepsi"},     // ISOamsr: such that
     335             :   {"3d", "&there4;"},   // ISOtech
     336             :   {"tf", "&there4;"},   // ISOtech
     337             :   // Mathematical symbols
     338             :   // Old troff "12", "14", "34" goes here
     339             :   {"S1", "&sup1;"},     // ISOnum
     340             :   {"S2", "&sup2;"},     // ISOnum
     341             :   {"S3", "&sup3;"},     // ISOnum
     342             :   // Old troff \(pl", \-, \(+- go here
     343             :   {"t+-", "&plusmn;"},  // ISOnum
     344             :   {"-+", "&mnplus;"},   // ISOtech
     345             :   {"pc", "&middot;"},   // ISOnum
     346             :   {"md", "&middot;"},   // ISOnum
     347             :   // Old troff \(mu goes here
     348             :   {"tmu", "&times;"},   // ISOnum
     349             :   {"c*", "&otimes;"},   // ISOamsb: multiply sign in a circle
     350             :   {"c+", "&oplus;"},    // ISOamsb: plus sign in a circle
     351             :   // Old troff \(di goes here
     352             :   {"tdi", "&divide;"},  // ISOnum
     353             :   {"f/", "&horbar;"},   // ISOnum: horizintal bar for fractions
     354             :   // Old troff \(** goes here
     355             :   {"<=", "&le;"},    // ISOtech
     356             :   {">=", "&ge;"},    // ISOtech
     357             :   {"<<", "&Lt;"}, // ISOamsr
     358             :   {">>", "&Gt;"}, // ISOamsr
     359             :   {"!=", "&ne;"},       // ISOtech
     360             :   // Old troff \(eq and \(== go here
     361             :   {"=~", "&cong;"},     // ISOamsr
     362             :   // Old troff \(ap goes here
     363             :   {"~~", "&ap;"},       // ISOtech
     364             :   // This appears to be an error in the groff table.  
     365             :   // It clashes with the Bell Labs use of ~= for a congruence sign
     366             :   // {"~=", "&ap;"},    // ISOamsr
     367             :   // Old troff \(pt, \(es, \(mo go here
     368             :   {"nm", "&notin;"},    // ISOtech
     369             :   {"nb", "&nsub;"},     // ISOamsr
     370             :   {"nc", "&nsup;"},     // ISOamsn
     371             :   {"ne", "&nequiv;"},   // ISOamsn
     372             :   // Old troff \(sb, \(sp, \(ib, \(ip, \(ca, \(cu go here
     373             :   {"/_", "&ang;"},      // ISOamso
     374             :   {"pp", "&perp;"},     // ISOtech
     375             :   // Old troff \(is goes here
     376             :   {"sum", "&sum;"},     // ISOamsb
     377             :   {"product", "&prod;"},        // ISOamsb
     378             :   {"gr", "&nabla;"},    // ISOtech
     379             :   // Old troff \(sr. \{n, \(if go here
     380             :   {"Ah", "&aleph;"},    // ISOtech
     381             :   {"Im", "&image;"},    // ISOamso: Fraktur I, imaginary
     382             :   {"Re", "&real;"},     // ISOamso: Fraktur R, real
     383             :   {"wp", "&weierp;"},   // ISOamso
     384             :   {"pd", "&part;"},     // ISOtech: partial differentiation sign
     385             :   // Their table duplicates the Greek letters here.
     386             :   // We list only the variant forms here, mapping them into
     387             :   // the ISO Greek 4 variants (which may or may not be correct :-() 
     388             :   {"+f", "&b.phiv;"},   // ISOgrk4: variant phi
     389             :   {"+h", "&b.thetas;"}, // ISOgrk4: variant theta
     390             :   {"+p", "&b.omega;"},  // ISOgrk4: variant pi, looking like omega
     391             :   // Card symbols
     392             :   {"CL", "&clubs;"},    // ISOpub: club suit
     393             :   {"SP", "&spades;"},   // ISOpub: spade suit
     394             :   {"HE", "&hearts;"},   // ISOpub: heart suit
     395             :   {"DI", "&diams;"},    // ISOpub: diamond suit
     396             : };
     397             : 
     398           0 : const char *special_to_entity(const char *sp)
     399             : {
     400             :   struct map *mp;
     401           0 :   for (mp = entity_table; mp < entity_table + countof(entity_table); mp++) {
     402           0 :     if (strcmp(mp->from, sp) == 0)
     403           0 :       return mp->to;
     404             :   }
     405           0 :   return NULL;
     406             : }
     407             : 
     408             : class char_box : public simple_box {
     409             :   unsigned char c;
     410             :   char next_is_italic;
     411             :   char prev_is_italic;
     412             : public:
     413             :   char_box(unsigned char);
     414             :   void debug_print();
     415             :   void output();
     416             :   int is_char();
     417             :   int left_is_italic();
     418             :   int right_is_italic();
     419             :   void hint(unsigned);
     420             :   void handle_char_type(int, int);
     421             : };
     422             : 
     423             : class special_char_box : public simple_box {
     424             :   char *s;
     425             : public:
     426             :   special_char_box(const char *);
     427             :   ~special_char_box();
     428             :   void output();
     429             :   void debug_print();
     430             :   int is_char();
     431             :   void handle_char_type(int, int);
     432             : };
     433             : 
     434             : enum spacing_type {
     435             :   s_ordinary,
     436             :   s_operator,
     437             :   s_binary,
     438             :   s_relation,
     439             :   s_opening,
     440             :   s_closing,
     441             :   s_punctuation,
     442             :   s_inner,
     443             :   s_suppress
     444             : };
     445             : 
     446             : const char *spacing_type_table[] = {
     447             :   "ordinary",
     448             :   "operator",
     449             :   "binary",
     450             :   "relation",
     451             :   "opening",
     452             :   "closing",
     453             :   "punctuation",
     454             :   "inner",
     455             :   "suppress",
     456             :   0,
     457             : };
     458             : 
     459             : const int DIGIT_TYPE = 0;
     460             : const int LETTER_TYPE = 1;
     461             : 
     462             : const char *font_type_table[] = {
     463             :   "digit",
     464             :   "letter",
     465             :   0,
     466             : };
     467             : 
     468             : struct char_info {
     469             :   int spacing_type;
     470             :   int font_type;
     471             :   char_info();
     472             : };
     473             : 
     474       15402 : char_info::char_info()
     475       15402 : : spacing_type(ORDINARY_TYPE), font_type(DIGIT_TYPE)
     476             : {
     477       15402 : }
     478             : 
     479             : static char_info char_table[256];
     480             : 
     481             : declare_ptable(char_info)
     482        5474 : implement_ptable(char_info)
     483             : 
     484             : PTABLE(char_info) special_char_table;
     485             : 
     486         162 : static int get_special_char_spacing_type(const char *ch)
     487             : {
     488         162 :   char_info *p = special_char_table.lookup(ch);
     489         162 :   return p ? p->spacing_type : ORDINARY_TYPE;
     490             : }
     491             : 
     492         318 : static int get_special_char_font_type(const char *ch)
     493             : {
     494         318 :   char_info *p = special_char_table.lookup(ch);
     495         318 :   return p ? p->font_type : DIGIT_TYPE;
     496             : }
     497             : 
     498         298 : static void set_special_char_type(const char *ch, int st, int ft)
     499             : {
     500         298 :   char_info *p = special_char_table.lookup(ch);
     501         298 :   if (!p) {
     502         596 :     p = new char_info[1];
     503         298 :     special_char_table.define(ch, p);
     504             :   }
     505         298 :   if (st >= 0)
     506         298 :     p->spacing_type = st;
     507         298 :   if (ft >= 0)
     508           0 :     p->font_type = ft;
     509         298 : }
     510             : 
     511          59 : void init_char_table()
     512             : {
     513          59 :   set_special_char_type("pl", s_binary, -1);
     514          59 :   set_special_char_type("mi", s_binary, -1);
     515          59 :   set_special_char_type("eq", s_relation, -1);
     516          59 :   set_special_char_type("<=", s_relation, -1);
     517          59 :   set_special_char_type(">=", s_relation, -1);
     518          59 :   char_table['}'].spacing_type = s_closing;
     519          59 :   char_table[')'].spacing_type = s_closing;
     520          59 :   char_table[']'].spacing_type = s_closing;
     521          59 :   char_table['{'].spacing_type = s_opening;
     522          59 :   char_table['('].spacing_type = s_opening;
     523          59 :   char_table['['].spacing_type = s_opening;
     524          59 :   char_table[','].spacing_type = s_punctuation;
     525          59 :   char_table[';'].spacing_type = s_punctuation;
     526          59 :   char_table[':'].spacing_type = s_punctuation;
     527          59 :   char_table['.'].spacing_type = s_punctuation;
     528          59 :   char_table['>'].spacing_type = s_relation;
     529          59 :   char_table['<'].spacing_type = s_relation;
     530          59 :   char_table['*'].spacing_type = s_binary;
     531       15163 :   for (int i = 0; i < 256; i++)
     532       15104 :     if (csalpha(i))
     533        3068 :       char_table[i].font_type = LETTER_TYPE;
     534          59 : }
     535             : 
     536          61 : static int lookup_spacing_type(const char *type)
     537             : {
     538         218 :   for (int i = 0; spacing_type_table[i] != 0; i++)
     539         218 :     if (strcmp(spacing_type_table[i], type) == 0)
     540          61 :       return i;
     541           0 :   return -1;
     542             : }
     543             : 
     544           3 : static int lookup_font_type(const char *type)
     545             : {
     546           9 :   for (int i = 0; font_type_table[i] != 0; i++)
     547           6 :     if (strcmp(font_type_table[i], type) == 0)
     548           0 :       return i;
     549           3 :   return -1;
     550             : }
     551             : 
     552          58 : void box::set_spacing_type(char *type)
     553             : {
     554          58 :   int t = lookup_spacing_type(type);
     555          58 :   if (t < 0)
     556           0 :     error("unrecognised type '%1'", type);
     557             :   else
     558          58 :     spacing_type = t;
     559          58 :   free(type);
     560          58 : }
     561             : 
     562         863 : char_box::char_box(unsigned char cc)
     563         863 : : c(cc), next_is_italic(0), prev_is_italic(0)
     564             : {
     565         863 :   spacing_type = char_table[c].spacing_type;
     566         863 : }
     567             : 
     568         352 : void char_box::hint(unsigned flags)
     569             : {
     570         352 :   if (flags & HINT_PREV_IS_ITALIC)
     571         191 :     prev_is_italic = 1;
     572         352 :   if (flags & HINT_NEXT_IS_ITALIC)
     573         194 :     next_is_italic = 1;
     574         352 : }
     575             : 
     576        1726 : void char_box::output()
     577             : {
     578        1726 :   if (output_format == troff) {
     579        1726 :     int font_type = char_table[c].font_type;
     580        1726 :     if (font_type != LETTER_TYPE)
     581         564 :       printf("\\f[%s]", current_roman_font);
     582        1726 :     if (!prev_is_italic)
     583        1344 :       fputs("\\,", stdout);
     584        1726 :     if (c == '\\')
     585           0 :       fputs("\\e", stdout);
     586             :     else
     587        1726 :       putchar(c);
     588        1726 :     if (!next_is_italic)
     589        1338 :       fputs("\\/", stdout);
     590             :     else
     591         388 :       fputs("\\&", stdout);               // suppress ligaturing and kerning
     592        1726 :     if (font_type != LETTER_TYPE)
     593         564 :       fputs("\\fP", stdout);
     594             :   }
     595           0 :   else if (output_format == mathml) {
     596           0 :     if (isdigit(c))
     597           0 :       printf("<mn>");
     598           0 :     else if (char_table[c].spacing_type)
     599           0 :       printf("<mo>");
     600             :     else
     601           0 :       printf("<mi>");
     602           0 :     if (c == '<')
     603           0 :       printf("&lt;");
     604           0 :     else if (c == '>')
     605           0 :       printf("&gt;");
     606           0 :     else if (c == '&')
     607           0 :       printf("&amp;");
     608             :     else
     609           0 :       putchar(c);
     610           0 :     if (isdigit(c))
     611           0 :       printf("</mn>");
     612           0 :     else if (char_table[c].spacing_type)
     613           0 :       printf("</mo>");
     614             :     else
     615           0 :       printf("</mi>");
     616             :   }
     617        1726 : }
     618             : 
     619         483 : int char_box::left_is_italic()
     620             : {
     621         483 :   int font_type = char_table[c].font_type;
     622         483 :   return font_type == LETTER_TYPE;
     623             : }
     624             : 
     625         388 : int char_box::right_is_italic()
     626             : {
     627         388 :   int font_type = char_table[c].font_type;
     628         388 :   return font_type == LETTER_TYPE;
     629             : }
     630             : 
     631         167 : int char_box::is_char()
     632             : {
     633         167 :   return 1;
     634             : }
     635             : 
     636           0 : void char_box::debug_print()
     637             : {
     638           0 :   if (c == '\\') {
     639           0 :     putc('\\', stderr);
     640           0 :     putc('\\', stderr);
     641             :   }
     642             :   else
     643           0 :     putc(c, stderr);
     644           0 : }
     645             : 
     646         162 : special_char_box::special_char_box(const char *t)
     647             : {
     648         162 :   s = strsave(t);
     649         162 :   spacing_type = get_special_char_spacing_type(s);
     650         162 : }
     651             : 
     652         324 : special_char_box::~special_char_box()
     653             : {
     654         162 :   free(s);
     655         324 : }
     656             : 
     657         318 : void special_char_box::output()
     658             : {
     659         318 :   if (output_format == troff) {
     660         318 :     int font_type = get_special_char_font_type(s);
     661         318 :     if (font_type != LETTER_TYPE)
     662         318 :       printf("\\f[%s]", current_roman_font);
     663         318 :     printf("\\,\\[%s]\\/", s);
     664         318 :     if (font_type != LETTER_TYPE)
     665         318 :       printf("\\fP");
     666             :   }
     667           0 :   else if (output_format == mathml) {
     668           0 :     const char *entity = special_to_entity(s);
     669           0 :     if (entity != NULL)
     670           0 :       printf("<mo>%s</mo>", entity);
     671             :     else
     672           0 :       printf("<merror>unknown eqn/troff special char %s</merror>", s);
     673             :   }
     674         318 : }
     675             : 
     676           0 : int special_char_box::is_char()
     677             : {
     678           0 :   return 1;
     679             : }
     680             : 
     681           0 : void special_char_box::debug_print()
     682             : {
     683           0 :   fprintf(stderr, "\\[%s]", s);
     684           0 : }
     685             : 
     686             : 
     687           0 : void char_box::handle_char_type(int st, int ft)
     688             : {
     689           0 :   if (st >= 0)
     690           0 :     char_table[c].spacing_type = st;
     691           0 :   if (ft >= 0)
     692           0 :     char_table[c].font_type = ft;
     693           0 : }
     694             : 
     695           3 : void special_char_box::handle_char_type(int st, int ft)
     696             : {
     697           3 :   set_special_char_type(s, st, ft);
     698           3 : }
     699             : 
     700           3 : void set_char_type(const char *type, char *ch)
     701             : {
     702           3 :   assert(ch != 0);
     703           3 :   int st = lookup_spacing_type(type);
     704           3 :   int ft = lookup_font_type(type);
     705           3 :   if (st < 0 && ft < 0) {
     706           0 :     error("bad character type '%1'", type);
     707           0 :     delete[] ch;
     708           0 :     return;
     709             :   }
     710           3 :   box *b = split_text(ch);
     711           3 :   b->handle_char_type(st, ft);
     712           3 :   delete b;
     713             : }
     714             : 
     715             : /* We give primes special treatment so that in "x' sub 2", the "2"
     716             : will be tucked under the prime */
     717             : 
     718             : class prime_box : public pointer_box {
     719             :   box *pb;
     720             : public:
     721             :   prime_box(box *);
     722             :   ~prime_box();
     723             :   int compute_metrics(int style);
     724             :   void output();
     725             :   void compute_subscript_kern();
     726             :   void debug_print();
     727             :   void handle_char_type(int, int);
     728             : };
     729             : 
     730           0 : box *make_prime_box(box *pp)
     731             : {
     732           0 :   return new prime_box(pp);
     733             : }
     734             : 
     735           0 : prime_box::prime_box(box *pp) : pointer_box(pp)
     736             : {
     737           0 :   pb = new special_char_box("fm");
     738           0 : }
     739             : 
     740           0 : prime_box::~prime_box()
     741             : {
     742           0 :   delete pb;
     743           0 : }
     744             : 
     745           0 : int prime_box::compute_metrics(int style)
     746             : {
     747           0 :   int res = p->compute_metrics(style);
     748           0 :   pb->compute_metrics(style);
     749           0 :   printf(".nr " WIDTH_FORMAT " 0\\n[" WIDTH_FORMAT "]"
     750             :          "+\\n[" WIDTH_FORMAT "]\n",
     751           0 :          uid, p->uid, pb->uid);
     752           0 :   printf(".nr " HEIGHT_FORMAT " \\n[" HEIGHT_FORMAT "]"
     753             :          ">?\\n[" HEIGHT_FORMAT "]\n",
     754           0 :          uid, p->uid, pb->uid);
     755           0 :   printf(".nr " DEPTH_FORMAT " \\n[" DEPTH_FORMAT "]"
     756             :          ">?\\n[" DEPTH_FORMAT "]\n",
     757           0 :          uid, p->uid, pb->uid);
     758           0 :   return res;
     759             : }
     760             : 
     761           0 : void prime_box::compute_subscript_kern()
     762             : {
     763           0 :   p->compute_subscript_kern();
     764           0 :   printf(".nr " SUB_KERN_FORMAT " 0\\n[" WIDTH_FORMAT "]"
     765             :          "+\\n[" SUB_KERN_FORMAT "]>?0\n",
     766           0 :          uid, pb->uid, p->uid);
     767           0 : }
     768             : 
     769           0 : void prime_box::output()
     770             : {
     771           0 :   p->output();
     772           0 :   pb->output();
     773           0 : }
     774             : 
     775           0 : void prime_box::handle_char_type(int st, int ft)
     776             : {
     777           0 :   p->handle_char_type(st, ft);
     778           0 :   pb->handle_char_type(st, ft);
     779           0 : }
     780             : 
     781           0 : void prime_box::debug_print()
     782             : {
     783           0 :   p->debug_print();
     784           0 :   putc('\'', stderr);
     785           0 : }
     786             : 
     787         856 : box *split_text(char *text)
     788             : {
     789         856 :   list_box *lb = 0;
     790         856 :   box *fb = 0;
     791         856 :   char *s = text;
     792        1881 :   while (*s != '\0') {
     793        1025 :     char c = *s++;
     794        1025 :     box *b = 0;
     795        1025 :     switch (c) {
     796          32 :     case '+':
     797          32 :       b = new special_char_box("pl");
     798          32 :       break;
     799          30 :     case '-':
     800          30 :       b = new special_char_box("mi");
     801          30 :       break;
     802          42 :     case '=':
     803          42 :       b = new special_char_box("eq");
     804          42 :       break;
     805           0 :     case '\'':
     806           0 :       b = new special_char_box("fm");
     807           0 :       break;
     808           9 :     case '<':
     809           9 :       if (*s == '=') {
     810           6 :         b = new special_char_box("<=");
     811           6 :         s++;
     812           6 :         break;
     813             :       }
     814           3 :       goto normal_char;
     815           3 :     case '>':
     816           3 :       if (*s == '=') {
     817           0 :         b = new special_char_box(">=");
     818           0 :         s++;
     819           0 :         break;
     820             :       }
     821           3 :       goto normal_char;
     822          52 :     case '\\':
     823          52 :       if (*s == '\0') {
     824           0 :         lex_error("bad escape");
     825           0 :         break;
     826             :       }
     827          52 :       c = *s++;
     828          52 :       switch (c) {
     829          43 :       case '(':
     830             :         {
     831             :           char buf[3];
     832          43 :           if (*s != '\0') {
     833          43 :             buf[0] = *s++;
     834          43 :             if (*s != '\0') {
     835          43 :               buf[1] = *s++;
     836          43 :               buf[2] = '\0';
     837          43 :               b = new special_char_box(buf);
     838             :             }
     839             :             else {
     840           0 :               lex_error("bad escape");
     841             :             }
     842             :           }
     843             :           else {
     844           0 :             lex_error("bad escape");
     845             :           }
     846             :         }
     847          43 :         break;
     848           9 :       case '[':
     849             :         {
     850           9 :           char *ch = s;
     851          27 :           while (*s != ']' && *s != '\0')
     852          18 :             s++;
     853           9 :           if (*s == '\0')
     854           0 :             lex_error("bad escape");
     855             :           else {
     856           9 :             *s++ = '\0';
     857           9 :             b = new special_char_box(ch);
     858             :           }
     859             :         }
     860           9 :         break;
     861           0 :       case 'f':
     862             :       case 'g':
     863             :       case 'k':
     864             :       case 'n':
     865             :       case '*':
     866             :         {
     867           0 :           char *escape_start = s - 2;
     868           0 :           switch (*s) {
     869           0 :           case '(':
     870           0 :             if (*++s != '\0')
     871           0 :               ++s;
     872           0 :             break;
     873           0 :           case '[':
     874           0 :             for (++s; *s != '\0' && *s != ']'; s++)
     875             :               ;
     876           0 :             break;
     877             :           }
     878           0 :           if (*s == '\0')
     879           0 :             lex_error("bad escape");
     880             :           else {
     881           0 :             ++s;
     882           0 :             char *buf = new char[s - escape_start + 1];
     883           0 :             memcpy(buf, escape_start, s - escape_start);
     884           0 :             buf[s - escape_start] = '\0';
     885           0 :             b = new quoted_text_box(buf);
     886             :           }
     887             :         }
     888           0 :         break;
     889           0 :       case '-':
     890             :       case '_':
     891             :         {
     892             :           char buf[2];
     893           0 :           buf[0] = c;
     894           0 :           buf[1] = '\0';
     895           0 :           b = new special_char_box(buf);
     896             :         }
     897           0 :         break;
     898           0 :       case '`':
     899           0 :         b = new special_char_box("ga");
     900           0 :         break;
     901           0 :       case '\'':
     902           0 :         b = new special_char_box("aa");
     903           0 :         break;
     904           0 :       case 'e':
     905             :       case '\\':
     906           0 :         b = new char_box('\\');
     907           0 :         break;
     908           0 :       case '^':
     909             :       case '|':
     910             :       case '0':
     911             :         {
     912             :           char buf[3];
     913           0 :           buf[0] = '\\';
     914           0 :           buf[1] = c;
     915           0 :           buf[2] = '\0';
     916           0 :           b = new quoted_text_box(strsave(buf));
     917           0 :           break;
     918             :         }
     919           0 :       default:
     920           0 :         lex_error("unquoted escape");
     921           0 :         b = new quoted_text_box(strsave(s - 2));
     922           0 :         s = strchr(s, '\0');
     923           0 :         break;
     924             :       }
     925          52 :       break;
     926             :     default:
     927         863 :     normal_char:
     928         863 :       b = new char_box(c);
     929         863 :       break;
     930             :     }
     931        1025 :     while (*s == '\'') {
     932           0 :       if (b == 0)
     933           0 :         b = new quoted_text_box(0);
     934           0 :       b = new prime_box(b);
     935           0 :       s++;
     936             :     }
     937        1025 :     if (b != 0) {
     938        1025 :       if (lb != 0)
     939          41 :         lb->append(b);
     940         984 :       else if (fb != 0) {
     941         128 :         lb = new list_box(fb);
     942         128 :         lb->append(b);
     943             :       }
     944             :       else
     945         856 :         fb = b;
     946             :     }
     947             :   }
     948         856 :   free(text);
     949         856 :   if (lb != 0)
     950         128 :     return lb;
     951         728 :   else if (fb != 0)
     952         728 :     return fb;
     953             :   else
     954           0 :     return new quoted_text_box(0);
     955             : }
     956             : 
     957             : // Local Variables:
     958             : // fill-column: 72
     959             : // mode: C++
     960             : // End:
     961             : // vim: set cindent noexpandtab shiftwidth=2 textwidth=72:

Generated by: LCOV version 1.14