Skip to main content

read_fonts/ps/
encoding.rs

1//! Predefined encodings.
2//!
3//! This maps font specific character codes to string ids.
4//!
5//! See "Glyph Organization" at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=18>
6//! for an explanation of how charsets, encodings and glyphs are related.
7
8use super::string::{Sid, STANDARD_STRINGS};
9
10/// Predefined encodings for Adobe CFF and Type1 fonts.
11///
12/// Encodings map character codes to glyph names.
13///
14/// See <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=37>.
15#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)]
16pub enum PredefinedEncoding {
17    #[default]
18    Standard,
19    Expert,
20    IsoLatin1,
21}
22
23impl PredefinedEncoding {
24    /// Converts a character code to the associated glyph name according to
25    /// the selected encoding.
26    pub fn name(&self, code: u8) -> &'static str {
27        let code = code as usize;
28        // All arrays have 256 entries so code is guaranteed to be in bounds
29        let sid = match self {
30            Self::Standard => STANDARD_ENCODING[code] as u16,
31            Self::Expert => EXPERT_ENCODING[code],
32            Self::IsoLatin1 => {
33                // The standard string set is missing names for non breaking
34                // space and soft hyphen so catch these here and return names
35                // from the Adobe Glyph List.
36                //
37                // nonbreakingspace;00A0
38                // softhyphen;00AD
39                //
40                // See <https://github.com/adobe-type-tools/agl-aglfn/blob/4036a9ca80a62f64f9de4f7321a9a045ad0ecfd6/glyphlist.txt>
41                match code {
42                    0x00A0 => return "nonbreakingspace",
43                    0x00AD => return "softhyphen",
44                    _ => ISO_LATIN1_ENCODING[code],
45                }
46            }
47        };
48        STANDARD_STRINGS
49            .get(sid as usize)
50            .copied()
51            .unwrap_or_default()
52    }
53
54    /// Returns the string id for the given character code.
55    pub fn sid(&self, code: u8) -> Option<Sid> {
56        let code = code as usize;
57        let sid = match self {
58            Self::Standard => STANDARD_ENCODING[code] as u16,
59            Self::Expert => EXPERT_ENCODING[code],
60            _ => return None,
61        };
62        Some(Sid::new(sid))
63    }
64}
65
66/// See "Standard" encoding at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=37>
67/// for this particular mapping.
68#[rustfmt::skip]
69pub(super) static STANDARD_ENCODING: [u8; 256] = [
70      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
71      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
72      1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,
73     17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,
74     33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
75     49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
76     65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
77     81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,   0,
78      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
79      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
80      0,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
81      0, 111, 112, 113, 114,   0, 115, 116, 117, 118, 119, 120, 121, 122,   0, 123,
82      0, 124, 125, 126, 127, 128, 129, 130, 131,   0, 132, 133,   0, 134, 135, 136,
83    137,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
84      0, 138,   0, 139,   0,   0,   0,   0, 140, 141, 142, 143,   0,   0,   0,   0,
85      0, 144,   0,   0,   0, 145,   0,   0, 146, 147, 148, 149,   0,   0,   0,   0,
86];
87
88/// See "Expert" encoding at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=40>
89/// for this particular mapping.
90#[rustfmt::skip]
91pub(super) static EXPERT_ENCODING: [u16; 256] = [
92      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 
93      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 
94      1, 229, 230,   0, 231, 232, 233, 234, 235, 236, 237, 238,  13,  14,  15,  99, 
95    239, 240, 241, 242, 243, 244, 245, 246, 247, 248,  27,  28, 249, 250, 251, 252, 
96      0, 253, 254, 255, 256, 257,   0,   0,   0, 258,   0,   0, 259, 260, 261, 262, 
97      0,   0, 263, 264, 265,   0, 266, 109, 110, 267, 268, 269,   0, 270, 271, 272, 
98    273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 
99    289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303,   0, 
100      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 
101      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 
102      0, 304, 305, 306,   0,   0, 307, 308, 309, 310, 311,   0, 312,   0,   0, 313, 
103      0,   0, 314, 315,   0,   0, 316, 317, 318,   0,   0,   0, 158, 155, 163, 319, 
104    320, 321, 322, 323, 324, 325,   0,   0, 326, 150, 164, 169, 327, 328, 329, 330, 
105    331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 
106    347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 
107    363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 
108];
109
110/// Maps ISO Latin-1 byte codes (0-255) to Adobe CFF Standard String IDs (SIDs).
111/// SIDs are based on the Adobe CFF Specification, Appendix A.
112/// 
113/// Note that U+00A0 (non breaking space) and U+00AD (soft hyphen) do not have
114/// corresponding SIDs. These are mapped here to space and hyphen, respectively,
115/// but are special cased to return correct values in the public API below.
116/// 
117/// See <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf>
118#[rustfmt::skip]
119pub(super) static ISO_LATIN1_ENCODING: [u16; 256] = [
120      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
121      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
122      1,   2,   3,   4,   5,   6,   7, 104,   9,  10,  11,  12,  13,  14,  15,  16,
123     17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,
124     33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
125     49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
126     65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
127     81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,   0,
128      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
129      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
130      1, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172,  14, 173, 174,
131    175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
132    191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206,
133    207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
134    223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
135    239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
136];
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    #[test]
143    fn standard_encoding_names() {
144        let pairs = [
145            (0, ".notdef"),
146            (32, "space"),
147            (33, "exclam"),
148            (34, "quotedbl"),
149            (35, "numbersign"),
150            (42, "asterisk"),
151            (43, "plus"),
152            (44, "comma"),
153            (45, "hyphen"),
154            (46, "period"),
155            (47, "slash"),
156            (48, "zero"),
157            (49, "one"),
158            (57, "nine"),
159            (58, "colon"),
160            (61, "equal"),
161            (62, "greater"),
162            (65, "A"),
163            (77, "M"),
164            (90, "Z"),
165            (95, "underscore"),
166            (96, "quoteleft"),
167            (97, "a"),
168            (109, "m"),
169            (122, "z"),
170            (164, "fraction"),
171            (165, "yen"),
172            (166, "florin"),
173            (174, "fi"),
174            (175, "fl"),
175            (188, "ellipsis"),
176            (207, "caron"),
177            (208, "emdash"),
178            (225, "AE"),
179            (255, ".notdef"),
180        ];
181        check_names(&pairs, PredefinedEncoding::Standard);
182    }
183
184    #[test]
185    fn expert_encoding_names() {
186        let pairs = [
187            (0, ".notdef"),
188            (32, "space"),
189            (44, "comma"),
190            (45, "hyphen"),
191            (46, "period"),
192            (47, "fraction"),
193            (48, "zerooldstyle"),
194            (57, "nineoldstyle"),
195            (58, "colon"),
196            (59, "semicolon"),
197            (60, "commasuperior"),
198            (61, "threequartersemdash"),
199            (62, "periodsuperior"),
200            (63, "questionsmall"),
201            (65, "asuperior"),
202            (84, "tsuperior"),
203            (86, "ff"),
204            (87, "fi"),
205            (88, "fl"),
206            (89, "ffi"),
207            (90, "ffl"),
208            (91, "parenleftinferior"),
209            (96, "Gravesmall"),
210            (97, "Asmall"),
211            (109, "Msmall"),
212            (122, "Zsmall"),
213            (123, "colonmonetary"),
214            (124, "onefitted"),
215            (125, "rupiah"),
216            (126, "Tildesmall"),
217            (188, "onequarter"),
218            (200, "zerosuperior"),
219            (201, "onesuperior"),
220            (219, "nineinferior"),
221            (220, "centinferior"),
222            (221, "dollarinferior"),
223            (222, "periodinferior"),
224            (223, "commainferior"),
225            (224, "Agravesmall"),
226            (225, "Aacutesmall"),
227            (226, "Acircumflexsmall"),
228            (227, "Atildesmall"),
229            (255, "Ydieresissmall"),
230        ];
231        check_names(&pairs, PredefinedEncoding::Expert);
232    }
233
234    #[test]
235    fn iso_latin1_encoding_names() {
236        let pairs = [
237            (0, ".notdef"),
238            (32, "space"),
239            (42, "asterisk"),
240            (43, "plus"),
241            (44, "comma"),
242            (46, "period"),
243            (48, "zero"),
244            (49, "one"),
245            (57, "nine"),
246            (58, "colon"),
247            (62, "greater"),
248            (63, "question"),
249            (64, "at"),
250            (65, "A"),
251            (77, "M"),
252            (90, "Z"),
253            (95, "underscore"),
254            (97, "a"),
255            (109, "m"),
256            (122, "z"),
257            (123, "braceleft"),
258            (124, "bar"),
259            (125, "braceright"),
260            (126, "asciitilde"),
261            (160, "nonbreakingspace"),
262            (166, "minus"),
263            (173, "softhyphen"),
264            (187, "Ntilde"),
265            (205, "aring"),
266            (226, "ugrave"),
267            (238, "twodotenleader"),
268            (239, "onedotenleader"),
269            (240, "zerooldstyle"),
270            (249, "nineoldstyle"),
271            (255, "bsuperior"),
272        ];
273        check_names(&pairs, PredefinedEncoding::IsoLatin1);
274    }
275
276    #[track_caller]
277    fn check_names(pairs: &[(u8, &str)], encoding: PredefinedEncoding) {
278        for (code, expected_name) in pairs.iter().copied() {
279            let name = encoding.name(code);
280            assert_eq!(
281                name, expected_name,
282                "expected {expected_name}, got {name} for {code}"
283            );
284        }
285    }
286}