read_fonts/tables/postscript/
charset.rs

1//! CFF charset support.
2
3use super::{
4    CharsetFormat0, CharsetFormat1, CharsetFormat2, CharsetRange1, CharsetRange2, CustomCharset,
5    FontData, FontRead, GlyphId, ReadError, StringId,
6};
7
8/// Character set for mapping from glyph to string identifiers.
9///
10/// See <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=21>
11#[derive(Clone)]
12pub struct Charset<'a> {
13    kind: CharsetKind<'a>,
14    num_glyphs: u32,
15}
16
17impl<'a> Charset<'a> {
18    pub fn new(
19        cff_data: FontData<'a>,
20        charset_offset: usize,
21        num_glyphs: u32,
22    ) -> Result<Self, ReadError> {
23        let kind = match charset_offset {
24            0 => CharsetKind::IsoAdobe,
25            1 => CharsetKind::Expert,
26            2 => CharsetKind::ExpertSubset,
27            _ => {
28                let data = cff_data
29                    .split_off(charset_offset)
30                    .ok_or(ReadError::OutOfBounds)?;
31                CharsetKind::Custom(CustomCharset::read(data)?)
32            }
33        };
34        Ok(Self { kind, num_glyphs })
35    }
36
37    pub fn kind(&self) -> &CharsetKind<'a> {
38        &self.kind
39    }
40
41    pub fn num_glyphs(&self) -> u32 {
42        self.num_glyphs
43    }
44
45    /// Returns the string identifier for the given glyph identifier.
46    pub fn string_id(&self, glyph_id: GlyphId) -> Result<StringId, ReadError> {
47        let gid = glyph_id.to_u32();
48        if gid >= self.num_glyphs {
49            return Err(ReadError::OutOfBounds);
50        }
51        match &self.kind {
52            CharsetKind::IsoAdobe => {
53                // The ISOAdobe charset is an identity mapping of gid->sid up
54                // to 228 entries
55                // <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=45>
56                if gid <= 228 {
57                    Ok(StringId::new(gid as u16))
58                } else {
59                    Err(ReadError::OutOfBounds)
60                }
61            }
62            CharsetKind::Expert => EXPERT_CHARSET
63                .get(gid as usize)
64                .copied()
65                .ok_or(ReadError::OutOfBounds)
66                .map(StringId::new),
67            CharsetKind::ExpertSubset => EXPERT_SUBSET_CHARSET
68                .get(gid as usize)
69                .copied()
70                .ok_or(ReadError::OutOfBounds)
71                .map(StringId::new),
72            CharsetKind::Custom(custom) => match custom {
73                CustomCharset::Format0(fmt) => fmt.string_id(glyph_id),
74                CustomCharset::Format1(fmt) => fmt.string_id(glyph_id),
75                CustomCharset::Format2(fmt) => fmt.string_id(glyph_id),
76            },
77        }
78    }
79
80    /// Returns the glyph identifier for the given string identifier.
81    pub fn glyph_id(&self, string_id: StringId) -> Result<GlyphId, ReadError> {
82        let sid = string_id.to_u16();
83        match &self.kind {
84            CharsetKind::IsoAdobe => {
85                // The ISOAdobe charset is an identity mapping of gid->sid up
86                // to 228 entries
87                // <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=45>
88                if sid <= 228 {
89                    Ok(GlyphId::from(sid))
90                } else {
91                    Err(ReadError::OutOfBounds)
92                }
93            }
94            CharsetKind::Expert => EXPERT_CHARSET
95                .iter()
96                .position(|n| *n == sid)
97                .map(|pos| GlyphId::new(pos as u32))
98                .ok_or(ReadError::OutOfBounds),
99            CharsetKind::ExpertSubset => EXPERT_SUBSET_CHARSET
100                .iter()
101                .position(|n| *n == sid)
102                .map(|pos| GlyphId::new(pos as u32))
103                .ok_or(ReadError::OutOfBounds),
104            CharsetKind::Custom(custom) => match custom {
105                CustomCharset::Format0(fmt) => fmt.glyph_id(string_id),
106                CustomCharset::Format1(fmt) => fmt.glyph_id(string_id),
107                CustomCharset::Format2(fmt) => fmt.glyph_id(string_id),
108            },
109        }
110    }
111
112    /// Returns an iterator over all of the glyph and string identifier
113    /// mappings.
114    pub fn iter(&self) -> CharsetIter<'a> {
115        match &self.kind {
116            CharsetKind::IsoAdobe
117            | CharsetKind::Expert
118            | CharsetKind::ExpertSubset
119            | CharsetKind::Custom(CustomCharset::Format0(_)) => {
120                CharsetIter(Iter::Simple(self.clone(), 0))
121            }
122            CharsetKind::Custom(CustomCharset::Format1(custom)) => CharsetIter(Iter::Custom1(
123                RangeIter::new(custom.ranges(), self.num_glyphs),
124            )),
125            CharsetKind::Custom(CustomCharset::Format2(custom)) => CharsetIter(Iter::Custom2(
126                RangeIter::new(custom.ranges(), self.num_glyphs),
127            )),
128        }
129    }
130}
131
132/// Predefined and custom character sets.
133#[derive(Clone)]
134pub enum CharsetKind<'a> {
135    IsoAdobe,
136    Expert,
137    ExpertSubset,
138    Custom(CustomCharset<'a>),
139}
140
141impl CharsetFormat0<'_> {
142    fn string_id(&self, glyph_id: GlyphId) -> Result<StringId, ReadError> {
143        let gid = glyph_id.to_u32() as usize;
144        if gid == 0 {
145            Ok(StringId::new(0))
146        } else {
147            self.glyph()
148                .get(gid - 1)
149                .map(|id| StringId::new(id.get()))
150                .ok_or(ReadError::OutOfBounds)
151        }
152    }
153
154    fn glyph_id(&self, string_id: StringId) -> Result<GlyphId, ReadError> {
155        if string_id.to_u16() == 0 {
156            return Ok(GlyphId::NOTDEF);
157        }
158        self.glyph()
159            .iter()
160            .position(|n| n.get() == string_id.to_u16())
161            .map(|n| GlyphId::from((n as u16).saturating_add(1)))
162            .ok_or(ReadError::OutOfBounds)
163    }
164}
165
166impl CharsetFormat1<'_> {
167    fn string_id(&self, glyph_id: GlyphId) -> Result<StringId, ReadError> {
168        string_id_from_ranges(self.ranges(), glyph_id)
169    }
170
171    fn glyph_id(&self, string_id: StringId) -> Result<GlyphId, ReadError> {
172        glyph_id_from_ranges(self.ranges(), string_id)
173    }
174}
175
176impl CharsetFormat2<'_> {
177    fn string_id(&self, glyph_id: GlyphId) -> Result<StringId, ReadError> {
178        string_id_from_ranges(self.ranges(), glyph_id)
179    }
180
181    fn glyph_id(&self, string_id: StringId) -> Result<GlyphId, ReadError> {
182        glyph_id_from_ranges(self.ranges(), string_id)
183    }
184}
185
186fn string_id_from_ranges<T: CharsetRange>(
187    ranges: &[T],
188    glyph_id: GlyphId,
189) -> Result<StringId, ReadError> {
190    let mut gid = glyph_id.to_u32();
191    // The notdef glyph isn't explicitly mapped so we need to special case
192    // it and add -1 and +1 at a few places when processing ranges
193    if gid == 0 {
194        return Ok(StringId::new(0));
195    }
196    gid -= 1;
197    let mut end = 0u32;
198    // Each range provides the string ids for `n_left + 1` glyphs with
199    // the sequence of string ids starting at `first`. Since the counts
200    // are cumulative, we must scan them all in order until we find
201    // the range that contains our requested glyph.
202    for range in ranges {
203        let next_end = end
204            .checked_add(range.n_left() + 1)
205            .ok_or(ReadError::OutOfBounds)?;
206        if gid < next_end {
207            return (gid - end)
208                .checked_add(range.first())
209                .and_then(|sid| sid.try_into().ok())
210                .ok_or(ReadError::OutOfBounds)
211                .map(StringId::new);
212        }
213        end = next_end;
214    }
215    Err(ReadError::OutOfBounds)
216}
217
218fn glyph_id_from_ranges<T: CharsetRange>(
219    ranges: &[T],
220    string_id: StringId,
221) -> Result<GlyphId, ReadError> {
222    let sid = string_id.to_u16() as u32;
223    // notdef glyph is not explicitly mapped
224    if sid == 0 {
225        return Ok(GlyphId::NOTDEF);
226    }
227    let mut gid = 1u32;
228    for range in ranges {
229        let first = range.first();
230        let n_left = range.n_left();
231        if first <= sid && sid <= (first + n_left) {
232            gid += sid - first;
233            return Ok(GlyphId::new(gid));
234        }
235        gid += n_left + 1;
236    }
237    Err(ReadError::OutOfBounds)
238}
239
240/// Trait that unifies ranges for formats 1 and 2 so that we can implement
241/// the tricky search logic once.
242trait CharsetRange {
243    fn first(&self) -> u32;
244    fn n_left(&self) -> u32;
245}
246
247impl CharsetRange for CharsetRange1 {
248    fn first(&self) -> u32 {
249        self.first.get() as u32
250    }
251
252    fn n_left(&self) -> u32 {
253        self.n_left as u32
254    }
255}
256
257impl CharsetRange for CharsetRange2 {
258    fn first(&self) -> u32 {
259        self.first.get() as u32
260    }
261
262    fn n_left(&self) -> u32 {
263        self.n_left.get() as u32
264    }
265}
266
267/// Iterator over the glyph and string identifier mappings in a character set.
268#[derive(Clone)]
269pub struct CharsetIter<'a>(Iter<'a>);
270
271impl Iterator for CharsetIter<'_> {
272    type Item = (GlyphId, StringId);
273
274    fn next(&mut self) -> Option<Self::Item> {
275        match &mut self.0 {
276            Iter::Simple(charset, cur) => {
277                let gid = GlyphId::new(*cur);
278                let sid = charset.string_id(gid).ok()?;
279                *cur = cur.checked_add(1)?;
280                Some((gid, sid))
281            }
282            Iter::Custom1(custom) => custom.next(),
283            Iter::Custom2(custom) => custom.next(),
284        }
285    }
286}
287
288#[derive(Clone)]
289enum Iter<'a> {
290    /// Predefined sets and custom format 0 are just array lookups so we use
291    /// the builtin mapping function.
292    Simple(Charset<'a>, u32),
293    Custom1(RangeIter<'a, CharsetRange1>),
294    Custom2(RangeIter<'a, CharsetRange2>),
295}
296
297/// Custom iterator for range based formats.
298///
299/// Each individual lookup requires a linear scan through the ranges so this
300/// provides a more efficient code path for iteration.
301#[derive(Clone)]
302struct RangeIter<'a, T> {
303    ranges: std::slice::Iter<'a, T>,
304    num_glyphs: u32,
305    gid: u32,
306    first: u32,
307    end: u32,
308    prev_end: u32,
309}
310
311impl<'a, T> RangeIter<'a, T>
312where
313    T: CharsetRange,
314{
315    fn new(ranges: &'a [T], num_glyphs: u32) -> Self {
316        let mut ranges = ranges.iter();
317        let (first, end) = next_range(&mut ranges).unwrap_or_default();
318        Self {
319            ranges,
320            num_glyphs,
321            gid: 0,
322            first,
323            end,
324            prev_end: 0,
325        }
326    }
327
328    fn next(&mut self) -> Option<(GlyphId, StringId)> {
329        if self.gid >= self.num_glyphs {
330            return None;
331        }
332        // The notdef glyph isn't explicitly mapped so we need to special case
333        // it and add -1 and +1 at a few places when processing ranges
334        if self.gid == 0 {
335            self.gid += 1;
336            return Some((GlyphId::new(0), StringId::new(0)));
337        }
338        let gid = self.gid - 1;
339        self.gid = self.gid.checked_add(1)?;
340        while gid >= self.end {
341            let (first, end) = next_range(&mut self.ranges)?;
342            self.prev_end = self.end;
343            self.first = first;
344            self.end = self.prev_end.checked_add(end)?;
345        }
346        let sid = self
347            .first
348            .checked_add(gid.checked_sub(self.prev_end)?)?
349            .try_into()
350            .ok()?;
351        Some((GlyphId::new(gid + 1), StringId::new(sid)))
352    }
353}
354
355fn next_range<T: CharsetRange>(ranges: &mut std::slice::Iter<T>) -> Option<(u32, u32)> {
356    ranges
357        .next()
358        .map(|range| (range.first(), range.n_left() + 1))
359}
360
361/// See "Expert" charset at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=47>
362#[rustfmt::skip]
363const EXPERT_CHARSET: &[u16] = &[
364      0,    1,  229,  230,  231,  232,  233,  234,  235,  236,  237,  238,   13,   14,   15,   99,
365    239,  240,  241,  242,  243,  244,  245,  246,  247,  248,   27,   28,  249,  250,  251,  252,
366    253,  254,  255,  256,  257,  258,  259,  260,  261,  262,  263,  264,  265,  266,  109,  110,
367    267,  268,  269,  270,  271,  272,  273,  274,  275,  276,  277,  278,  279,  280,  281,  282,
368    283,  284,  285,  286,  287,  288,  289,  290,  291,  292,  293,  294,  295,  296,  297,  298,
369    299,  300,  301,  302,  303,  304,  305,  306,  307,  308,  309,  310,  311,  312,  313,  314,
370    315,  316,  317,  318,  158,  155,  163,  319,  320,  321,  322,  323,  324,  325,  326,  150,
371    164,  169,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,  337,  338,  339,  340,
372    341,  342,  343,  344,  345,  346,  347,  348,  349,  350,  351,  352,  353,  354,  355,  356,
373    357,  358,  359,  360,  361,  362,  363,  364,  365,  366,  367,  368,  369,  370,  371,  372,
374    373,  374,  375,  376,  377,  378,
375];
376
377/// See "Expert Subset" charset at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=49>
378#[rustfmt::skip]
379const EXPERT_SUBSET_CHARSET: &[u16] = &[
380      0,    1,  231,  232,  235,  236,  237,  238,   13,   14,   15,   99,  239,  240,  241,  242,
381    243,  244,  245,  246,  247,  248,   27,   28,  249,  250,  251,  253,  254,  255,  256,  257,
382    258,  259,  260,  261,  262,  263,  264,  265,  266,  109,  110,  267,  268,  269,  270,  272,
383    300,  301,  302,  305,  314,  315,  158,  155,  163,  320,  321,  322,  323,  324,  325,  326,
384    150,  164,  169,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,  337,  338,  339,
385    340,  341,  342,  343,  344,  345,  346
386];
387
388#[cfg(test)]
389mod tests {
390    use super::*;
391    use font_test_data::bebuffer::BeBuffer;
392
393    #[test]
394    fn iso_adobe_charset() {
395        // Offset of 0 signifies the ISOAdobe charset
396        let charset_offset = 0;
397        let num_glyphs = 64;
398        // This is an identity mapping
399        let expected = |gid: GlyphId| Some(gid.to_u32());
400        test_simple_mapping(charset_offset, num_glyphs, expected);
401    }
402
403    #[test]
404    fn expert_charset() {
405        // Offset 1 signifies the expert charset
406        let charset_offset = 1;
407        let num_glyphs = 64;
408        // This is an array based mapping
409        let expected = |gid: GlyphId| {
410            EXPERT_CHARSET
411                .get(gid.to_u32() as usize)
412                .map(|id| *id as u32)
413        };
414        test_simple_mapping(charset_offset, num_glyphs, expected);
415    }
416
417    #[test]
418    fn expert_subset_charset() {
419        // Offset 2 signifies the expert subset charset
420        let charset_offset = 2;
421        let num_glyphs = 64;
422        // This is an array based mapping
423        let expected = |gid: GlyphId| {
424            EXPERT_SUBSET_CHARSET
425                .get(gid.to_u32() as usize)
426                .map(|id| *id as u32)
427        };
428        test_simple_mapping(charset_offset, num_glyphs, expected);
429    }
430
431    // Common test setup for identity or array based charset mappings
432    fn test_simple_mapping(
433        charset_offset: usize,
434        num_glyphs: u32,
435        expected: impl Fn(GlyphId) -> Option<u32>,
436    ) {
437        let charset = Charset::new(FontData::new(&[]), charset_offset, num_glyphs).unwrap();
438        for gid in 0..num_glyphs {
439            let gid = GlyphId::new(gid);
440            let sid = expected(gid).unwrap();
441            assert_eq!(charset.string_id(gid).unwrap().to_u16() as u32, sid);
442            assert_eq!(charset.glyph_id(StringId::new(sid as _)).unwrap(), gid);
443        }
444        // Don't map glyphs beyond num_glyphs
445        for gid in num_glyphs..u16::MAX as u32 {
446            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
447        }
448    }
449
450    #[test]
451    fn custom_mapping_format0() {
452        let mut buf = BeBuffer::new();
453        let num_glyphs = 6;
454        // Add some padding so we can generate an offset greater than 2
455        buf = buf.extend([0u8; 4]);
456        // format 0
457        buf = buf.push(0u8);
458        // glyph array: each sid is gid * 2
459        buf = buf.extend([2u16, 4, 6, 8, 10]);
460        let charset = Charset::new(FontData::new(buf.data()), 4, num_glyphs).unwrap();
461        // Test lookup code path
462        for gid in 0..num_glyphs {
463            assert_eq!(
464                charset.string_id(GlyphId::new(gid)).unwrap().to_u16() as u32,
465                gid * 2
466            )
467        }
468        // Test iterator code path
469        for (gid, sid) in charset.iter() {
470            assert_eq!(sid.to_u16() as u32, gid.to_u32() * 2);
471        }
472        assert_eq!(charset.iter().count() as u32, num_glyphs);
473        // Test out of bounds glyphs
474        for gid in num_glyphs..u16::MAX as u32 {
475            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
476        }
477    }
478
479    #[test]
480    fn custom_mapping_format1() {
481        let mut buf = BeBuffer::new();
482        let num_glyphs = 7;
483        // Add some padding so we can generate an offset greater than 2
484        buf = buf.extend([0u8; 4]);
485        // format 1
486        buf = buf.push(1u8);
487        // Three disjoint range mappings
488        buf = buf.push(8u16).push(2u8);
489        buf = buf.push(1200u16).push(0u8);
490        buf = buf.push(20u16).push(1u8);
491        let expected_sids = [0, 8, 9, 10, 1200, 20, 21];
492        test_range_mapping(buf.data(), num_glyphs, &expected_sids);
493    }
494
495    #[test]
496    fn custom_mapping_format2() {
497        let mut buf = BeBuffer::new();
498        // Add some padding so we can generate an offset greater than 2
499        buf = buf.extend([0u8; 4]);
500        // format 2
501        buf = buf.push(2u8);
502        // Three disjoint range mappings
503        buf = buf.push(8u16).push(2u16);
504        buf = buf.push(1200u16).push(0u16);
505        buf = buf.push(20u16).push(800u16);
506        let mut expected_sids = vec![0, 8, 9, 10, 1200];
507        for i in 0..=800 {
508            expected_sids.push(i + 20);
509        }
510        let num_glyphs = expected_sids.len() as u32;
511        test_range_mapping(buf.data(), num_glyphs, &expected_sids);
512    }
513
514    // Common code for testing range based mappings
515    fn test_range_mapping(data: &[u8], num_glyphs: u32, expected_sids: &[u32]) {
516        let charset = Charset::new(FontData::new(data), 4, num_glyphs).unwrap();
517        // Test lookup code path
518        for (gid, sid) in expected_sids.iter().enumerate() {
519            assert_eq!(
520                charset.string_id(GlyphId::new(gid as _)).unwrap().to_u16() as u32,
521                *sid
522            )
523        }
524        // Test iterator code path
525        assert!(charset.iter().eq(expected_sids
526            .iter()
527            .enumerate()
528            .map(|(gid, sid)| (GlyphId::new(gid as u32), StringId::new(*sid as u16)))));
529        assert_eq!(charset.iter().count() as u32, num_glyphs);
530        // Test out of bounds glyphs
531        for gid in num_glyphs..u16::MAX as u32 {
532            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
533        }
534        // Test reverse mapping
535        for (gid, sid) in expected_sids.iter().enumerate() {
536            assert_eq!(
537                charset.glyph_id(StringId::new(*sid as u16)),
538                Ok(GlyphId::new(gid as u32))
539            );
540        }
541    }
542}