Skip to main content

read_fonts/ps/cff/
charset.rs

1//! CFF charset support.
2
3use crate::ps::string::Sid;
4use crate::{FontData, FontRead, GlyphId, ReadError};
5
6#[doc(inline)]
7pub use super::v1::{
8    CharsetFormat0 as Format0, CharsetFormat1 as Format1, CharsetFormat2 as Format2,
9    CharsetRange1 as Range1, CharsetRange2 as Range2, CustomCharset,
10};
11
12/// Character set for mapping from glyph to string identifiers.
13///
14/// See <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=21>
15#[derive(Clone)]
16pub struct Charset<'a> {
17    kind: CharsetKind<'a>,
18    num_glyphs: u32,
19}
20
21impl<'a> Charset<'a> {
22    pub fn new(
23        cff_data: FontData<'a>,
24        charset_offset: usize,
25        num_glyphs: u32,
26    ) -> Result<Self, ReadError> {
27        let kind = match charset_offset {
28            0 => CharsetKind::IsoAdobe,
29            1 => CharsetKind::Expert,
30            2 => CharsetKind::ExpertSubset,
31            _ => {
32                let data = cff_data
33                    .split_off(charset_offset)
34                    .ok_or(ReadError::OutOfBounds)?;
35                CharsetKind::Custom(CustomCharset::read(data)?)
36            }
37        };
38        Ok(Self { kind, num_glyphs })
39    }
40
41    pub fn kind(&self) -> &CharsetKind<'a> {
42        &self.kind
43    }
44
45    pub fn num_glyphs(&self) -> u32 {
46        self.num_glyphs
47    }
48
49    /// Returns the string identifier for the given glyph identifier.
50    pub fn string_id(&self, glyph_id: GlyphId) -> Result<Sid, ReadError> {
51        let gid = glyph_id.to_u32();
52        if gid >= self.num_glyphs {
53            return Err(ReadError::OutOfBounds);
54        }
55        match &self.kind {
56            CharsetKind::IsoAdobe => {
57                // The ISOAdobe charset is an identity mapping of gid->sid up
58                // to 228 entries
59                // <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=45>
60                if gid <= 228 {
61                    Ok(Sid::new(gid as u16))
62                } else {
63                    Err(ReadError::OutOfBounds)
64                }
65            }
66            CharsetKind::Expert => EXPERT_CHARSET
67                .get(gid as usize)
68                .copied()
69                .ok_or(ReadError::OutOfBounds)
70                .map(Sid::new),
71            CharsetKind::ExpertSubset => EXPERT_SUBSET_CHARSET
72                .get(gid as usize)
73                .copied()
74                .ok_or(ReadError::OutOfBounds)
75                .map(Sid::new),
76            CharsetKind::Custom(custom) => match custom {
77                CustomCharset::Format0(fmt) => fmt.string_id(glyph_id),
78                CustomCharset::Format1(fmt) => fmt.string_id(glyph_id),
79                CustomCharset::Format2(fmt) => fmt.string_id(glyph_id),
80            },
81        }
82    }
83
84    /// Returns the glyph identifier for the given string identifier.
85    pub fn glyph_id(&self, string_id: Sid) -> Result<GlyphId, ReadError> {
86        let sid = string_id.to_u16();
87        match &self.kind {
88            CharsetKind::IsoAdobe => {
89                // The ISOAdobe charset is an identity mapping of gid->sid up
90                // to 228 entries
91                // <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=45>
92                if sid <= 228 {
93                    Ok(GlyphId::from(sid))
94                } else {
95                    Err(ReadError::OutOfBounds)
96                }
97            }
98            CharsetKind::Expert => EXPERT_CHARSET
99                .iter()
100                .position(|n| *n == sid)
101                .map(|pos| GlyphId::new(pos as u32))
102                .ok_or(ReadError::OutOfBounds),
103            CharsetKind::ExpertSubset => EXPERT_SUBSET_CHARSET
104                .iter()
105                .position(|n| *n == sid)
106                .map(|pos| GlyphId::new(pos as u32))
107                .ok_or(ReadError::OutOfBounds),
108            CharsetKind::Custom(custom) => match custom {
109                CustomCharset::Format0(fmt) => fmt.glyph_id(string_id),
110                CustomCharset::Format1(fmt) => fmt.glyph_id(string_id),
111                CustomCharset::Format2(fmt) => fmt.glyph_id(string_id),
112            },
113        }
114    }
115
116    /// Returns an iterator over all of the glyph and string identifier
117    /// mappings.
118    pub fn iter(&self) -> Iter<'a> {
119        match &self.kind {
120            CharsetKind::IsoAdobe
121            | CharsetKind::Expert
122            | CharsetKind::ExpertSubset
123            | CharsetKind::Custom(CustomCharset::Format0(_)) => {
124                Iter(IterKind::Simple(self.clone(), 0))
125            }
126            CharsetKind::Custom(CustomCharset::Format1(custom)) => Iter(IterKind::Custom1(
127                RangeIter::new(custom.ranges(), self.num_glyphs),
128            )),
129            CharsetKind::Custom(CustomCharset::Format2(custom)) => Iter(IterKind::Custom2(
130                RangeIter::new(custom.ranges(), self.num_glyphs),
131            )),
132        }
133    }
134}
135
136/// Predefined and custom character sets.
137#[derive(Clone)]
138pub enum CharsetKind<'a> {
139    IsoAdobe,
140    Expert,
141    ExpertSubset,
142    Custom(CustomCharset<'a>),
143}
144
145impl Format0<'_> {
146    fn string_id(&self, glyph_id: GlyphId) -> Result<Sid, ReadError> {
147        let gid = glyph_id.to_u32() as usize;
148        if gid == 0 {
149            Ok(Sid::new(0))
150        } else {
151            self.glyph()
152                .get(gid - 1)
153                .map(|id| Sid::new(id.get()))
154                .ok_or(ReadError::OutOfBounds)
155        }
156    }
157
158    fn glyph_id(&self, string_id: Sid) -> Result<GlyphId, ReadError> {
159        if string_id.to_u16() == 0 {
160            return Ok(GlyphId::NOTDEF);
161        }
162        self.glyph()
163            .iter()
164            .position(|n| n.get() == string_id.to_u16())
165            .map(|n| GlyphId::from((n as u16).saturating_add(1)))
166            .ok_or(ReadError::OutOfBounds)
167    }
168}
169
170impl Format1<'_> {
171    fn string_id(&self, glyph_id: GlyphId) -> Result<Sid, ReadError> {
172        string_id_from_ranges(self.ranges(), glyph_id)
173    }
174
175    fn glyph_id(&self, string_id: Sid) -> Result<GlyphId, ReadError> {
176        glyph_id_from_ranges(self.ranges(), string_id)
177    }
178}
179
180impl Format2<'_> {
181    fn string_id(&self, glyph_id: GlyphId) -> Result<Sid, ReadError> {
182        string_id_from_ranges(self.ranges(), glyph_id)
183    }
184
185    fn glyph_id(&self, string_id: Sid) -> Result<GlyphId, ReadError> {
186        glyph_id_from_ranges(self.ranges(), string_id)
187    }
188}
189
190fn string_id_from_ranges<T: CharsetRange>(
191    ranges: &[T],
192    glyph_id: GlyphId,
193) -> Result<Sid, ReadError> {
194    let mut gid = glyph_id.to_u32();
195    // The notdef glyph isn't explicitly mapped so we need to special case
196    // it and add -1 and +1 at a few places when processing ranges
197    if gid == 0 {
198        return Ok(Sid::new(0));
199    }
200    gid -= 1;
201    let mut end = 0u32;
202    // Each range provides the string ids for `n_left + 1` glyphs with
203    // the sequence of string ids starting at `first`. Since the counts
204    // are cumulative, we must scan them all in order until we find
205    // the range that contains our requested glyph.
206    for range in ranges {
207        let next_end = end
208            .checked_add(range.n_left() + 1)
209            .ok_or(ReadError::OutOfBounds)?;
210        if gid < next_end {
211            return (gid - end)
212                .checked_add(range.first())
213                .and_then(|sid| sid.try_into().ok())
214                .ok_or(ReadError::OutOfBounds)
215                .map(Sid::new);
216        }
217        end = next_end;
218    }
219    Err(ReadError::OutOfBounds)
220}
221
222fn glyph_id_from_ranges<T: CharsetRange>(
223    ranges: &[T],
224    string_id: Sid,
225) -> Result<GlyphId, ReadError> {
226    let sid = string_id.to_u16() as u32;
227    // notdef glyph is not explicitly mapped
228    if sid == 0 {
229        return Ok(GlyphId::NOTDEF);
230    }
231    let mut gid = 1u32;
232    for range in ranges {
233        let first = range.first();
234        let n_left = range.n_left();
235        if first <= sid && sid <= (first + n_left) {
236            gid += sid - first;
237            return Ok(GlyphId::new(gid));
238        }
239        gid += n_left + 1;
240    }
241    Err(ReadError::OutOfBounds)
242}
243
244/// Trait that unifies ranges for formats 1 and 2 so that we can implement
245/// the tricky search logic once.
246trait CharsetRange {
247    fn first(&self) -> u32;
248    fn n_left(&self) -> u32;
249}
250
251impl CharsetRange for Range1 {
252    fn first(&self) -> u32 {
253        self.first.get() as u32
254    }
255
256    fn n_left(&self) -> u32 {
257        self.n_left as u32
258    }
259}
260
261impl CharsetRange for Range2 {
262    fn first(&self) -> u32 {
263        self.first.get() as u32
264    }
265
266    fn n_left(&self) -> u32 {
267        self.n_left.get() as u32
268    }
269}
270
271/// Iterator over the glyph and string identifier mappings in a character set.
272#[derive(Clone)]
273pub struct Iter<'a>(IterKind<'a>);
274
275impl Iterator for Iter<'_> {
276    type Item = (GlyphId, Sid);
277
278    fn next(&mut self) -> Option<Self::Item> {
279        match &mut self.0 {
280            IterKind::Simple(charset, cur) => {
281                let gid = GlyphId::new(*cur);
282                let sid = charset.string_id(gid).ok()?;
283                *cur = cur.checked_add(1)?;
284                Some((gid, sid))
285            }
286            IterKind::Custom1(custom) => custom.next(),
287            IterKind::Custom2(custom) => custom.next(),
288        }
289    }
290}
291
292#[derive(Clone)]
293enum IterKind<'a> {
294    /// Predefined sets and custom format 0 are just array lookups so we use
295    /// the builtin mapping function.
296    Simple(Charset<'a>, u32),
297    Custom1(RangeIter<'a, Range1>),
298    Custom2(RangeIter<'a, Range2>),
299}
300
301/// Custom iterator for range based formats.
302///
303/// Each individual lookup requires a linear scan through the ranges so this
304/// provides a more efficient code path for iteration.
305#[derive(Clone)]
306struct RangeIter<'a, T> {
307    ranges: std::slice::Iter<'a, T>,
308    num_glyphs: u32,
309    gid: u32,
310    first: u32,
311    end: u32,
312    prev_end: u32,
313}
314
315impl<'a, T> RangeIter<'a, T>
316where
317    T: CharsetRange,
318{
319    fn new(ranges: &'a [T], num_glyphs: u32) -> Self {
320        let mut ranges = ranges.iter();
321        let (first, end) = next_range(&mut ranges).unwrap_or_default();
322        Self {
323            ranges,
324            num_glyphs,
325            gid: 0,
326            first,
327            end,
328            prev_end: 0,
329        }
330    }
331
332    fn next(&mut self) -> Option<(GlyphId, Sid)> {
333        if self.gid >= self.num_glyphs {
334            return None;
335        }
336        // The notdef glyph isn't explicitly mapped so we need to special case
337        // it and add -1 and +1 at a few places when processing ranges
338        if self.gid == 0 {
339            self.gid += 1;
340            return Some((GlyphId::new(0), Sid::new(0)));
341        }
342        let gid = self.gid - 1;
343        self.gid = self.gid.checked_add(1)?;
344        while gid >= self.end {
345            let (first, end) = next_range(&mut self.ranges)?;
346            self.prev_end = self.end;
347            self.first = first;
348            self.end = self.prev_end.checked_add(end)?;
349        }
350        let sid = self
351            .first
352            .checked_add(gid.checked_sub(self.prev_end)?)?
353            .try_into()
354            .ok()?;
355        Some((GlyphId::new(gid + 1), Sid::new(sid)))
356    }
357}
358
359fn next_range<T: CharsetRange>(ranges: &mut std::slice::Iter<T>) -> Option<(u32, u32)> {
360    ranges
361        .next()
362        .map(|range| (range.first(), range.n_left() + 1))
363}
364
365/// See "Expert" charset at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=47>
366#[rustfmt::skip]
367const EXPERT_CHARSET: &[u16] = &[
368      0,    1,  229,  230,  231,  232,  233,  234,  235,  236,  237,  238,   13,   14,   15,   99,
369    239,  240,  241,  242,  243,  244,  245,  246,  247,  248,   27,   28,  249,  250,  251,  252,
370    253,  254,  255,  256,  257,  258,  259,  260,  261,  262,  263,  264,  265,  266,  109,  110,
371    267,  268,  269,  270,  271,  272,  273,  274,  275,  276,  277,  278,  279,  280,  281,  282,
372    283,  284,  285,  286,  287,  288,  289,  290,  291,  292,  293,  294,  295,  296,  297,  298,
373    299,  300,  301,  302,  303,  304,  305,  306,  307,  308,  309,  310,  311,  312,  313,  314,
374    315,  316,  317,  318,  158,  155,  163,  319,  320,  321,  322,  323,  324,  325,  326,  150,
375    164,  169,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,  337,  338,  339,  340,
376    341,  342,  343,  344,  345,  346,  347,  348,  349,  350,  351,  352,  353,  354,  355,  356,
377    357,  358,  359,  360,  361,  362,  363,  364,  365,  366,  367,  368,  369,  370,  371,  372,
378    373,  374,  375,  376,  377,  378,
379];
380
381/// See "Expert Subset" charset at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=49>
382#[rustfmt::skip]
383const EXPERT_SUBSET_CHARSET: &[u16] = &[
384      0,    1,  231,  232,  235,  236,  237,  238,   13,   14,   15,   99,  239,  240,  241,  242,
385    243,  244,  245,  246,  247,  248,   27,   28,  249,  250,  251,  253,  254,  255,  256,  257,
386    258,  259,  260,  261,  262,  263,  264,  265,  266,  109,  110,  267,  268,  269,  270,  272,
387    300,  301,  302,  305,  314,  315,  158,  155,  163,  320,  321,  322,  323,  324,  325,  326,
388    150,  164,  169,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,  337,  338,  339,
389    340,  341,  342,  343,  344,  345,  346
390];
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395    use font_test_data::bebuffer::BeBuffer;
396
397    #[test]
398    fn iso_adobe_charset() {
399        // Offset of 0 signifies the ISOAdobe charset
400        let charset_offset = 0;
401        let num_glyphs = 64;
402        // This is an identity mapping
403        let expected = |gid: GlyphId| Some(gid.to_u32());
404        test_simple_mapping(charset_offset, num_glyphs, expected);
405    }
406
407    #[test]
408    fn expert_charset() {
409        // Offset 1 signifies the expert charset
410        let charset_offset = 1;
411        let num_glyphs = 64;
412        // This is an array based mapping
413        let expected = |gid: GlyphId| {
414            EXPERT_CHARSET
415                .get(gid.to_u32() as usize)
416                .map(|id| *id as u32)
417        };
418        test_simple_mapping(charset_offset, num_glyphs, expected);
419    }
420
421    #[test]
422    fn expert_subset_charset() {
423        // Offset 2 signifies the expert subset charset
424        let charset_offset = 2;
425        let num_glyphs = 64;
426        // This is an array based mapping
427        let expected = |gid: GlyphId| {
428            EXPERT_SUBSET_CHARSET
429                .get(gid.to_u32() as usize)
430                .map(|id| *id as u32)
431        };
432        test_simple_mapping(charset_offset, num_glyphs, expected);
433    }
434
435    // Common test setup for identity or array based charset mappings
436    fn test_simple_mapping(
437        charset_offset: usize,
438        num_glyphs: u32,
439        expected: impl Fn(GlyphId) -> Option<u32>,
440    ) {
441        let charset = Charset::new(FontData::new(&[]), charset_offset, num_glyphs).unwrap();
442        for gid in 0..num_glyphs {
443            let gid = GlyphId::new(gid);
444            let sid = expected(gid).unwrap();
445            assert_eq!(charset.string_id(gid).unwrap().to_u16() as u32, sid);
446            assert_eq!(charset.glyph_id(Sid::new(sid as _)).unwrap(), gid);
447        }
448        // Don't map glyphs beyond num_glyphs
449        for gid in num_glyphs..u16::MAX as u32 {
450            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
451        }
452    }
453
454    #[test]
455    fn custom_mapping_format0() {
456        let mut buf = BeBuffer::new();
457        let num_glyphs = 6;
458        // Add some padding so we can generate an offset greater than 2
459        buf = buf.extend([0u8; 4]);
460        // format 0
461        buf = buf.push(0u8);
462        // glyph array: each sid is gid * 2
463        buf = buf.extend([2u16, 4, 6, 8, 10]);
464        let charset = Charset::new(FontData::new(buf.data()), 4, num_glyphs).unwrap();
465        // Test lookup code path
466        for gid in 0..num_glyphs {
467            assert_eq!(
468                charset.string_id(GlyphId::new(gid)).unwrap().to_u16() as u32,
469                gid * 2
470            )
471        }
472        // Test iterator code path
473        for (gid, sid) in charset.iter() {
474            assert_eq!(sid.to_u16() as u32, gid.to_u32() * 2);
475        }
476        assert_eq!(charset.iter().count() as u32, num_glyphs);
477        // Test out of bounds glyphs
478        for gid in num_glyphs..u16::MAX as u32 {
479            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
480        }
481    }
482
483    #[test]
484    fn custom_mapping_format1() {
485        let mut buf = BeBuffer::new();
486        let num_glyphs = 7;
487        // Add some padding so we can generate an offset greater than 2
488        buf = buf.extend([0u8; 4]);
489        // format 1
490        buf = buf.push(1u8);
491        // Three disjoint range mappings
492        buf = buf.push(8u16).push(2u8);
493        buf = buf.push(1200u16).push(0u8);
494        buf = buf.push(20u16).push(1u8);
495        let expected_sids = [0, 8, 9, 10, 1200, 20, 21];
496        test_range_mapping(buf.data(), num_glyphs, &expected_sids);
497    }
498
499    #[test]
500    fn custom_mapping_format2() {
501        let mut buf = BeBuffer::new();
502        // Add some padding so we can generate an offset greater than 2
503        buf = buf.extend([0u8; 4]);
504        // format 2
505        buf = buf.push(2u8);
506        // Three disjoint range mappings
507        buf = buf.push(8u16).push(2u16);
508        buf = buf.push(1200u16).push(0u16);
509        buf = buf.push(20u16).push(800u16);
510        let mut expected_sids = vec![0, 8, 9, 10, 1200];
511        for i in 0..=800 {
512            expected_sids.push(i + 20);
513        }
514        let num_glyphs = expected_sids.len() as u32;
515        test_range_mapping(buf.data(), num_glyphs, &expected_sids);
516    }
517
518    // Common code for testing range based mappings
519    fn test_range_mapping(data: &[u8], num_glyphs: u32, expected_sids: &[u32]) {
520        let charset = Charset::new(FontData::new(data), 4, num_glyphs).unwrap();
521        // Test lookup code path
522        for (gid, sid) in expected_sids.iter().enumerate() {
523            assert_eq!(
524                charset.string_id(GlyphId::new(gid as _)).unwrap().to_u16() as u32,
525                *sid
526            )
527        }
528        // Test iterator code path
529        assert!(charset.iter().eq(expected_sids
530            .iter()
531            .enumerate()
532            .map(|(gid, sid)| (GlyphId::new(gid as u32), Sid::new(*sid as u16)))));
533        assert_eq!(charset.iter().count() as u32, num_glyphs);
534        // Test out of bounds glyphs
535        for gid in num_glyphs..u16::MAX as u32 {
536            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
537        }
538        // Test reverse mapping
539        for (gid, sid) in expected_sids.iter().enumerate() {
540            assert_eq!(
541                charset.glyph_id(Sid::new(*sid as u16)),
542                Ok(GlyphId::new(gid as u32))
543            );
544        }
545    }
546}