Skip to main content

read_fonts/tables/
name.rs

1//! The [name (Naming)](https://docs.microsoft.com/en-us/typography/opentype/spec/name) table
2
3include!("../../generated/generated_name.rs");
4
5pub use types::NameId;
6
7impl<'a> Name<'a> {
8    /// The FontData containing the encoded name strings.
9    pub fn string_data(&self) -> FontData<'a> {
10        let base = self.offset_data();
11        let off = self.storage_offset();
12        base.split_off(off as usize).unwrap_or_default()
13    }
14}
15
16impl NameRecord {
17    /// Return a type that can decode the string data for this name entry.
18    ///
19    /// The `data` argument should be the name table's data section, which can
20    /// be retrieved via [`Name::string_data`].
21    pub fn string<'a>(&self, data: FontData<'a>) -> Result<NameString<'a>, ReadError> {
22        let start = self.string_offset().non_null().unwrap_or(0);
23        let end = start + self.length() as usize;
24
25        let data = data
26            .as_bytes()
27            .get(start..end)
28            .ok_or(ReadError::OutOfBounds)?;
29
30        let encoding = Encoding::new(self.platform_id(), self.encoding_id());
31        Ok(NameString { data, encoding })
32    }
33
34    // reference from fonttools:
35    // https://github.com/fonttools/fonttools/blob/c2119229cfb02cdb7c5a63374ef29d3d514259e8/Lib/fontTools/ttLib/tables/_n_a_m_e.py#L509
36    pub fn is_unicode(&self) -> bool {
37        self.platform_id() == 0
38            || (self.platform_id() == 3 && [0, 1, 10].contains(&self.encoding_id()))
39    }
40}
41
42impl LangTagRecord {
43    /// Return a type that can decode the string data for this name entry.
44    pub fn lang_tag<'a>(&self, data: FontData<'a>) -> Result<NameString<'a>, ReadError> {
45        let start = self.lang_tag_offset().non_null().unwrap_or(0);
46        let end = start + self.length() as usize;
47
48        let data = data
49            .as_bytes()
50            .get(start..end)
51            .ok_or(ReadError::OutOfBounds)?;
52
53        let encoding = Encoding::Utf16Be;
54        Ok(NameString { data, encoding })
55    }
56}
57
58//-- all this is from pinot https://github.com/dfrg/pinot/blob/eff5239018ca50290fb890a84da3dd51505da364/src/name.rs
59/// Entry for a name in the naming table.
60///
61/// This provides an iterator over characters.
62#[derive(Copy, Clone, PartialEq, Eq)]
63pub struct NameString<'a> {
64    data: &'a [u8],
65    encoding: Encoding,
66}
67
68impl<'a> NameString<'a> {
69    /// An iterator over the `char`s in this name.
70    pub fn chars(&self) -> CharIter<'a> {
71        CharIter {
72            data: self.data,
73            encoding: self.encoding,
74            pos: 0,
75        }
76    }
77}
78
79#[cfg(feature = "experimental_traverse")]
80impl<'a> traversal::SomeString<'a> for NameString<'a> {
81    fn iter_chars(&self) -> Box<dyn Iterator<Item = char> + 'a> {
82        Box::new(self.into_iter())
83    }
84}
85
86#[cfg(feature = "experimental_traverse")]
87impl NameRecord {
88    fn traverse_string<'a>(&self, data: FontData<'a>) -> traversal::FieldType<'a> {
89        FieldType::StringOffset(traversal::StringOffset {
90            offset: self.string_offset().into(),
91            target: self.string(data).map(|s| Box::new(s) as _),
92        })
93    }
94}
95
96#[cfg(feature = "experimental_traverse")]
97impl LangTagRecord {
98    fn traverse_lang_tag<'a>(&self, data: FontData<'a>) -> traversal::FieldType<'a> {
99        FieldType::StringOffset(traversal::StringOffset {
100            offset: self.lang_tag_offset().into(),
101            target: self.lang_tag(data).map(|s| Box::new(s) as _),
102        })
103    }
104}
105
106impl<'a> IntoIterator for NameString<'a> {
107    type Item = char;
108    type IntoIter = CharIter<'a>;
109    fn into_iter(self) -> Self::IntoIter {
110        self.chars()
111    }
112}
113
114impl std::fmt::Display for NameString<'_> {
115    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
116        for c in self.chars() {
117            c.fmt(f)?;
118        }
119        Ok(())
120    }
121}
122
123impl std::fmt::Debug for NameString<'_> {
124    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
125        write!(f, "\"{self}\"")
126    }
127}
128
129/// An iterator over the chars of a name record.
130#[derive(Clone)]
131pub struct CharIter<'a> {
132    data: &'a [u8],
133    encoding: Encoding,
134    pos: usize,
135}
136
137impl CharIter<'_> {
138    fn bump_u16(&mut self) -> Option<u16> {
139        let result = self
140            .data
141            .get(self.pos..self.pos + 2)
142            .map(|x| u16::from_be_bytes(x.try_into().unwrap()))?;
143        self.pos += 2;
144        Some(result)
145    }
146
147    fn bump_u8(&mut self) -> Option<u8> {
148        let result = self.data.get(self.pos)?;
149        self.pos += 1;
150        Some(*result)
151    }
152}
153
154impl Iterator for CharIter<'_> {
155    type Item = char;
156
157    fn next(&mut self) -> Option<Self::Item> {
158        if self.pos >= self.data.len() {
159            return None;
160        }
161        let rep = core::char::REPLACEMENT_CHARACTER;
162        let raw_c = match self.encoding {
163            Encoding::Utf16Be => {
164                let c1 = self.bump_u16()? as u32;
165                if (0xD800..0xDC00).contains(&c1) {
166                    let Some(c2) = self.bump_u16() else {
167                        return Some(rep);
168                    };
169                    ((c1 & 0x3FF) << 10) + (c2 as u32 & 0x3FF) + 0x10000
170                } else {
171                    c1
172                }
173            }
174            Encoding::MacRoman => {
175                let c = self.bump_u8()?;
176                MacRomanMapping.decode(c) as u32
177            }
178            _ => return None,
179        };
180        Some(std::char::from_u32(raw_c).unwrap_or(rep))
181    }
182}
183
184/// The encoding used by the name table.
185#[derive(Copy, Clone, PartialEq, Eq)]
186pub enum Encoding {
187    Utf16Be,
188    MacRoman,
189    Unknown,
190}
191
192impl Encoding {
193    /// Determine the coding from the platform and encoding id.
194    pub fn new(platform_id: u16, encoding_id: u16) -> Encoding {
195        match (platform_id, encoding_id) {
196            (0, _) => Encoding::Utf16Be,
197            (1, 0) => Encoding::MacRoman,
198            (3, 0) => Encoding::Utf16Be,
199            (3, 1) => Encoding::Utf16Be,
200            (3, 10) => Encoding::Utf16Be,
201            _ => Encoding::Unknown,
202        }
203    }
204}
205
206/// A helper for encoding and decoding Mac OS Roman encoded strings.
207pub struct MacRomanMapping;
208
209impl MacRomanMapping {
210    const START_REMAP: u8 = 128;
211    /// Convert from a mac-roman encoded byte to a `char`
212    pub fn decode(self, raw: u8) -> char {
213        if raw < Self::START_REMAP {
214            raw as char
215        } else {
216            let idx = raw - Self::START_REMAP;
217            char::from_u32(MAC_ROMAN_DECODE[idx as usize] as u32).unwrap()
218        }
219    }
220
221    /// convert from a char to a mac-roman encoded byte, if the char is in the mac-roman charset.
222    pub fn encode(self, c: char) -> Option<u8> {
223        let raw_c = c as u32;
224        let raw_c: u16 = raw_c.try_into().ok()?;
225        if raw_c < Self::START_REMAP as u16 {
226            Some(raw_c as u8)
227        } else {
228            match MAC_ROMAN_ENCODE.binary_search_by_key(&raw_c, |(unic, _)| *unic) {
229                Ok(idx) => Some(MAC_ROMAN_ENCODE[idx].1),
230                Err(_) => None,
231            }
232        }
233    }
234}
235
236/// A lookup table for the Mac Roman encoding. This matches the values `128..=255`
237/// to specific Unicode values.
238#[rustfmt::skip]
239static MAC_ROMAN_DECODE: [u16; 128] = [
240    196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233,
241    232, 234, 235, 237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249,
242    251, 252, 8224, 176, 162, 163, 167, 8226, 182, 223, 174, 169, 8482, 180,
243    168, 8800, 198, 216, 8734, 177, 8804, 8805, 165, 181, 8706, 8721, 8719,
244    960, 8747, 170, 186, 937, 230, 248, 191, 161, 172, 8730, 402, 8776, 8710,
245    171, 187, 8230, 160, 192, 195, 213, 338, 339, 8211, 8212, 8220, 8221, 8216,
246    8217, 247, 9674, 255, 376, 8260, 8364, 8249, 8250, 64257, 64258, 8225, 183,
247    8218, 8222, 8240, 194, 202, 193, 203, 200, 205, 206, 207, 204, 211, 212,
248    63743, 210, 218, 219, 217, 305, 710, 732, 175, 728, 729, 730, 184, 733,
249    731, 711,
250];
251
252/// A lookup pairing (sorted) Unicode values to Mac Roman values
253#[rustfmt::skip]
254static MAC_ROMAN_ENCODE: [(u16, u8); 128] = [
255    (160, 202), (161, 193), (162, 162), (163, 163),
256    (165, 180), (167, 164), (168, 172), (169, 169),
257    (170, 187), (171, 199), (172, 194), (174, 168),
258    (175, 248), (176, 161), (177, 177), (180, 171),
259    (181, 181), (182, 166), (183, 225), (184, 252),
260    (186, 188), (187, 200), (191, 192), (192, 203),
261    (193, 231), (194, 229), (195, 204), (196, 128),
262    (197, 129), (198, 174), (199, 130), (200, 233),
263    (201, 131), (202, 230), (203, 232), (204, 237),
264    (205, 234), (206, 235), (207, 236), (209, 132),
265    (210, 241), (211, 238), (212, 239), (213, 205),
266    (214, 133), (216, 175), (217, 244), (218, 242),
267    (219, 243), (220, 134), (223, 167), (224, 136),
268    (225, 135), (226, 137), (227, 139), (228, 138),
269    (229, 140), (230, 190), (231, 141), (232, 143),
270    (233, 142), (234, 144), (235, 145), (236, 147),
271    (237, 146), (238, 148), (239, 149), (241, 150),
272    (242, 152), (243, 151), (244, 153), (245, 155),
273    (246, 154), (247, 214), (248, 191), (249, 157),
274    (250, 156), (251, 158), (252, 159), (255, 216),
275    (305, 245), (338, 206), (339, 207), (376, 217),
276    (402, 196), (710, 246), (711, 255), (728, 249),
277    (729, 250), (730, 251), (731, 254), (732, 247),
278    (733, 253), (937, 189), (960, 185), (8211, 208),
279    (8212, 209), (8216, 212), (8217, 213), (8218, 226),
280    (8220, 210), (8221, 211), (8222, 227), (8224, 160),
281    (8225, 224), (8226, 165), (8230, 201), (8240, 228),
282    (8249, 220), (8250, 221), (8260, 218), (8364, 219),
283    (8482, 170), (8706, 182), (8710, 198), (8719, 184),
284    (8721, 183), (8730, 195), (8734, 176), (8747, 186),
285    (8776, 197), (8800, 173), (8804, 178), (8805, 179),
286    (9674, 215), (63743, 240), (64257, 222), (64258, 223),
287];
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292
293    #[test]
294    fn mac_roman() {
295        static INPUT: &str = "Joachim Müller-Lancé";
296        for c in INPUT.chars() {
297            let enc = MacRomanMapping.encode(c).unwrap();
298            assert_eq!(MacRomanMapping.decode(enc), c);
299        }
300    }
301
302    #[test]
303    fn lone_surrogate_at_end() {
304        let chars = CharIter {
305            // DEVANAGARI LETTER SHORT A (U+0904), unpaired high surrogate (0xD800)
306            data: &[0x09, 0x04, 0xD8, 0x00],
307            encoding: Encoding::Utf16Be,
308            pos: 0,
309        };
310        assert!(chars.eq(['ऄ', std::char::REPLACEMENT_CHARACTER].into_iter()))
311    }
312}