Line data Source code
1 : /* Copyright 1989-2025 Free Software Foundation, Inc.
2 : Written by James Clark (jjc@jclark.com)
3 :
4 : This file is part of groff, the GNU roff typesetting system.
5 :
6 : groff is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation, either version 3 of the License, or
9 : (at your option) any later version.
10 :
11 : groff is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 :
19 : #ifdef HAVE_CONFIG_H
20 : #include <config.h>
21 : #endif
22 :
23 : #include <assert.h>
24 : #include <string.h> // strcat(), strcmp(), strcpy(), strlen()
25 : #include <stdlib.h> // calloc()
26 :
27 : #include "cset.h" // csprint()
28 : #include "json-encode.h" // json_char, json_encode_char()
29 : #include "lib.h"
30 :
31 : #include "errarg.h"
32 : #include "error.h"
33 : #include "symbol.h"
34 :
35 : const char **symbol::table = 0 /* nullptr */;
36 : int symbol::table_used = 0; // # of entries in use
37 : int symbol::table_size = 0;
38 : char *symbol::block = 0 /* nullptr */;
39 : size_t symbol::block_size = 0;
40 :
41 : const symbol NULL_SYMBOL;
42 : const symbol EMPTY_SYMBOL("");
43 :
44 : #ifdef BLOCK_SIZE
45 : #undef BLOCK_SIZE
46 : #endif
47 :
48 : const int BLOCK_SIZE = 1024;
49 : // the table will increase in size as necessary
50 : // the size will be chosen from the following array
51 : // add some more if you want
52 : static const unsigned int table_sizes[] = {
53 : 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009, 80021,
54 : 160001, 500009, 1000003, 1500007, 2000003, 0
55 : };
56 : // Don't populate table entries above this ratio.
57 : const double FULL_MAX = 0.3;
58 :
59 54926875 : static unsigned int hash_string(const char *p)
60 : {
61 : // compute a hash code; this assumes 32-bit unsigned ints
62 : // see p436 of Compilers by Aho, Sethi & Ullman
63 : // give special treatment to two-character names
64 54926875 : unsigned int hc = 0, g;
65 54926875 : if (*p != 0) {
66 54926875 : hc = *p++;
67 54926875 : if (*p != 0) {
68 47008896 : hc <<= 7;
69 47008896 : hc += *p++;
70 185259638 : for (; *p != 0; p++) {
71 138250742 : hc <<= 4;
72 138250742 : hc += *p;
73 138250742 : if ((g = (hc & 0xf0000000)) == 0) {
74 68211139 : hc ^= g >> 24;
75 68211139 : hc ^= g;
76 : }
77 : }
78 : }
79 : }
80 54926875 : return hc;
81 : }
82 :
83 : // Tell compiler that a variable is intentionally unused.
84 17574905 : inline void unused(void *) { }
85 :
86 67701096 : symbol::symbol(const char *p, int how)
87 : {
88 67701096 : if (p == 0 /* nullptr */) {
89 12771728 : s = 0 /* nullptr */;
90 12771728 : return;
91 : }
92 54929368 : if (*p == 0 /* nullptr */) {
93 2493 : s = "";
94 2493 : return;
95 : }
96 54926875 : if (table == 0 /* nullptr */) {
97 2493 : table_size = table_sizes[0];
98 2493 : table = (const char **)new char*[table_size];
99 254286 : for (int i = 0; i < table_size; i++)
100 251793 : table[i] = 0 /* nullptr */;
101 2493 : table_used = 0;
102 : }
103 54926875 : unsigned int hc = hash_string(p);
104 : const char **pp;
105 162506038 : for (pp = table + hc % table_size;
106 162508719 : *pp != 0 /* nullptr */;
107 107581844 : (pp == table ? pp = table + table_size - 1 : --pp))
108 154572091 : if (strcmp(p, *pp) == 0) {
109 46990247 : s = *pp;
110 46990247 : return;
111 : }
112 7936628 : if (how == MUST_ALREADY_EXIST) {
113 37006 : s = 0 /* nullptr */;
114 37006 : return;
115 : }
116 7899622 : if ((table_used >= (table_size - 1))
117 7899622 : || (table_used >= (table_size * FULL_MAX))) {
118 8599 : const char **old_table = table;
119 8599 : unsigned int old_table_size = table_size;
120 : int i;
121 31315 : for (i = 1; table_sizes[i] <= old_table_size; i++)
122 22716 : if (table_sizes[i] == 0)
123 0 : fatal("too many symbols");
124 8599 : table_size = table_sizes[i];
125 8599 : table_used = 0;
126 8599 : table = (const char **)new char*[table_size];
127 28990022 : for (i = 0; i < table_size; i++)
128 28981423 : table[i] = 0 /* nullptr */;
129 17583504 : for (pp = old_table + old_table_size - 1;
130 17583504 : pp >= old_table;
131 : --pp) {
132 17574905 : symbol temp(*pp, 1); /* insert it into the new table */
133 17574905 : unused(&temp);
134 : }
135 8599 : delete[] old_table;
136 201056 : for (pp = table + hc % table_size;
137 201056 : *pp != 0 /* nullptr */;
138 192457 : (pp == table ? pp = table + table_size - 1 : --pp))
139 : ;
140 : }
141 7899622 : ++table_used;
142 7899622 : if (how == DONT_STORE)
143 5275954 : s = *pp = p;
144 : else {
145 2623668 : size_t len = strlen(p) + 1;
146 2623668 : if ((block == 0 /* nullptr */) || (block_size < len)) {
147 22492 : block_size = len > BLOCK_SIZE ? len : BLOCK_SIZE;
148 22492 : block = new char [block_size];
149 : }
150 2623668 : (void) strcpy(block, p);
151 2623668 : s = *pp = block;
152 2623668 : block += len;
153 2623668 : block_size -= len;
154 : }
155 : }
156 :
157 1442 : symbol concat(symbol s1, symbol s2)
158 : {
159 1442 : char *buf = new char [strlen(s1.contents())
160 1442 : + strlen(s2.contents())
161 1442 : + 1];
162 1442 : strcpy(buf, s1.contents());
163 1442 : strcat(buf, s2.contents());
164 1442 : symbol res(buf);
165 1442 : delete[] buf;
166 1442 : return res;
167 : }
168 :
169 : // Compute length of JSON representation of object.
170 6 : size_t symbol::json_length() const
171 : {
172 6 : size_t len = 0;
173 6 : const char *p = s;
174 : char ch;
175 6 : int nextrachars = 2; // leading and trailing double quotes
176 16 : for (size_t i = 0; p[i] != '\0'; i++, len++) {
177 10 : ch = p[i];
178 10 : assert ((ch >= 0) && (ch <= 127));
179 : // These printable characters require escaping.
180 10 : if (('"' == ch) || ('\\' == ch) || ('/' == ch))
181 0 : nextrachars++;
182 10 : else if (csprint(ch))
183 : ;
184 : else
185 0 : switch (ch) {
186 0 : case '\b':
187 : case '\f':
188 : case '\n':
189 : case '\r':
190 : case '\t':
191 0 : nextrachars++;
192 0 : break;
193 0 : default:
194 0 : nextrachars += 5;
195 : }
196 : }
197 6 : return (len + nextrachars);
198 : }
199 :
200 : // Like `extract()`, but double-quote the string and escape characters
201 : // per JSON. (Unlike groff's `string`, a `symbol` doesn't contain
202 : // embedded null characters.)
203 4 : const char *symbol::json_extract() const
204 : {
205 4 : const char *p = s;
206 : char *r;
207 4 : size_t n = strlen(s);
208 : size_t i;
209 4 : char *q = static_cast<char *>(calloc((this->json_length() + 1),
210 : sizeof (char)));
211 4 : if (q != 0 /* nullptr */) {
212 4 : r = q;
213 4 : *r++ = '"';
214 : json_char ch;
215 10 : for (i = 0; i < n; i++, p++) {
216 6 : ch = json_encode_char(*p);
217 12 : for (size_t j = 0; j < ch.len; j++)
218 6 : *r++ = ch.buf[j];
219 : }
220 4 : *r++ = '"';
221 : }
222 : else
223 0 : return strdup("\"\""); // so it can be free()d
224 4 : *r++ = '\0';
225 4 : return q;
226 : }
227 :
228 : // Dump symbol in JSON representation to standard error stream.
229 2 : void symbol::json_dump() const
230 : {
231 2 : const char *repr = this->json_extract();
232 2 : size_t jsonlen = this->json_length();
233 : // Write it out by character to keep libc string functions from
234 : // interpreting escape sequences.
235 10 : for (size_t i = 0; i < jsonlen; i++)
236 8 : fputc(repr[i], stderr);
237 2 : free(const_cast<char *>(repr));
238 2 : }
239 :
240 : symbol default_symbol("default");
241 :
242 : // Local Variables:
243 : // fill-column: 72
244 : // mode: C++
245 : // End:
246 : // vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
|