1use super::string::{Sid, STANDARD_STRINGS};
9
10#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)]
16pub enum PredefinedEncoding {
17 #[default]
18 Standard,
19 Expert,
20 IsoLatin1,
21}
22
23impl PredefinedEncoding {
24 pub fn name(&self, code: u8) -> &'static str {
27 let code = code as usize;
28 let sid = match self {
30 Self::Standard => STANDARD_ENCODING[code] as u16,
31 Self::Expert => EXPERT_ENCODING[code],
32 Self::IsoLatin1 => {
33 match code {
42 0x00A0 => return "nonbreakingspace",
43 0x00AD => return "softhyphen",
44 _ => ISO_LATIN1_ENCODING[code],
45 }
46 }
47 };
48 STANDARD_STRINGS
49 .get(sid as usize)
50 .copied()
51 .unwrap_or_default()
52 }
53
54 pub fn sid(&self, code: u8) -> Option<Sid> {
56 let code = code as usize;
57 let sid = match self {
58 Self::Standard => STANDARD_ENCODING[code] as u16,
59 Self::Expert => EXPERT_ENCODING[code],
60 _ => return None,
61 };
62 Some(Sid::new(sid))
63 }
64}
65
66#[rustfmt::skip]
69pub(super) static STANDARD_ENCODING: [u8; 256] = [
70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
73 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
74 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
75 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
76 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
77 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 0,
78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
80 0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
81 0, 111, 112, 113, 114, 0, 115, 116, 117, 118, 119, 120, 121, 122, 0, 123,
82 0, 124, 125, 126, 127, 128, 129, 130, 131, 0, 132, 133, 0, 134, 135, 136,
83 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 138, 0, 139, 0, 0, 0, 0, 140, 141, 142, 143, 0, 0, 0, 0,
85 0, 144, 0, 0, 0, 145, 0, 0, 146, 147, 148, 149, 0, 0, 0, 0,
86];
87
88#[rustfmt::skip]
91pub(super) static EXPERT_ENCODING: [u16; 256] = [
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94 1, 229, 230, 0, 231, 232, 233, 234, 235, 236, 237, 238, 13, 14, 15, 99,
95 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 27, 28, 249, 250, 251, 252,
96 0, 253, 254, 255, 256, 257, 0, 0, 0, 258, 0, 0, 259, 260, 261, 262,
97 0, 0, 263, 264, 265, 0, 266, 109, 110, 267, 268, 269, 0, 270, 271, 272,
98 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288,
99 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 0,
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 304, 305, 306, 0, 0, 307, 308, 309, 310, 311, 0, 312, 0, 0, 313,
103 0, 0, 314, 315, 0, 0, 316, 317, 318, 0, 0, 0, 158, 155, 163, 319,
104 320, 321, 322, 323, 324, 325, 0, 0, 326, 150, 164, 169, 327, 328, 329, 330,
105 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346,
106 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362,
107 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378,
108];
109
110#[rustfmt::skip]
119pub(super) static ISO_LATIN1_ENCODING: [u16; 256] = [
120 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
121 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
122 1, 2, 3, 4, 5, 6, 7, 104, 9, 10, 11, 12, 13, 14, 15, 16,
123 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
124 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
125 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
126 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
127 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 0,
128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130 1, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 14, 173, 174,
131 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
132 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206,
133 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
134 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
135 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
136];
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141
142 #[test]
143 fn standard_encoding_names() {
144 let pairs = [
145 (0, ".notdef"),
146 (32, "space"),
147 (33, "exclam"),
148 (34, "quotedbl"),
149 (35, "numbersign"),
150 (42, "asterisk"),
151 (43, "plus"),
152 (44, "comma"),
153 (45, "hyphen"),
154 (46, "period"),
155 (47, "slash"),
156 (48, "zero"),
157 (49, "one"),
158 (57, "nine"),
159 (58, "colon"),
160 (61, "equal"),
161 (62, "greater"),
162 (65, "A"),
163 (77, "M"),
164 (90, "Z"),
165 (95, "underscore"),
166 (96, "quoteleft"),
167 (97, "a"),
168 (109, "m"),
169 (122, "z"),
170 (164, "fraction"),
171 (165, "yen"),
172 (166, "florin"),
173 (174, "fi"),
174 (175, "fl"),
175 (188, "ellipsis"),
176 (207, "caron"),
177 (208, "emdash"),
178 (225, "AE"),
179 (255, ".notdef"),
180 ];
181 check_names(&pairs, PredefinedEncoding::Standard);
182 }
183
184 #[test]
185 fn expert_encoding_names() {
186 let pairs = [
187 (0, ".notdef"),
188 (32, "space"),
189 (44, "comma"),
190 (45, "hyphen"),
191 (46, "period"),
192 (47, "fraction"),
193 (48, "zerooldstyle"),
194 (57, "nineoldstyle"),
195 (58, "colon"),
196 (59, "semicolon"),
197 (60, "commasuperior"),
198 (61, "threequartersemdash"),
199 (62, "periodsuperior"),
200 (63, "questionsmall"),
201 (65, "asuperior"),
202 (84, "tsuperior"),
203 (86, "ff"),
204 (87, "fi"),
205 (88, "fl"),
206 (89, "ffi"),
207 (90, "ffl"),
208 (91, "parenleftinferior"),
209 (96, "Gravesmall"),
210 (97, "Asmall"),
211 (109, "Msmall"),
212 (122, "Zsmall"),
213 (123, "colonmonetary"),
214 (124, "onefitted"),
215 (125, "rupiah"),
216 (126, "Tildesmall"),
217 (188, "onequarter"),
218 (200, "zerosuperior"),
219 (201, "onesuperior"),
220 (219, "nineinferior"),
221 (220, "centinferior"),
222 (221, "dollarinferior"),
223 (222, "periodinferior"),
224 (223, "commainferior"),
225 (224, "Agravesmall"),
226 (225, "Aacutesmall"),
227 (226, "Acircumflexsmall"),
228 (227, "Atildesmall"),
229 (255, "Ydieresissmall"),
230 ];
231 check_names(&pairs, PredefinedEncoding::Expert);
232 }
233
234 #[test]
235 fn iso_latin1_encoding_names() {
236 let pairs = [
237 (0, ".notdef"),
238 (32, "space"),
239 (42, "asterisk"),
240 (43, "plus"),
241 (44, "comma"),
242 (46, "period"),
243 (48, "zero"),
244 (49, "one"),
245 (57, "nine"),
246 (58, "colon"),
247 (62, "greater"),
248 (63, "question"),
249 (64, "at"),
250 (65, "A"),
251 (77, "M"),
252 (90, "Z"),
253 (95, "underscore"),
254 (97, "a"),
255 (109, "m"),
256 (122, "z"),
257 (123, "braceleft"),
258 (124, "bar"),
259 (125, "braceright"),
260 (126, "asciitilde"),
261 (160, "nonbreakingspace"),
262 (166, "minus"),
263 (173, "softhyphen"),
264 (187, "Ntilde"),
265 (205, "aring"),
266 (226, "ugrave"),
267 (238, "twodotenleader"),
268 (239, "onedotenleader"),
269 (240, "zerooldstyle"),
270 (249, "nineoldstyle"),
271 (255, "bsuperior"),
272 ];
273 check_names(&pairs, PredefinedEncoding::IsoLatin1);
274 }
275
276 #[track_caller]
277 fn check_names(pairs: &[(u8, &str)], encoding: PredefinedEncoding) {
278 for (code, expected_name) in pairs.iter().copied() {
279 let name = encoding.name(code);
280 assert_eq!(
281 name, expected_name,
282 "expected {expected_name}, got {name} for {code}"
283 );
284 }
285 }
286}