Skip to main content

read_fonts/ps/cff/
dict.rs

1//! Parsing for PostScript DICTs.
2
3use super::{
4    blend::BlendState,
5    stack::{Number, Stack},
6};
7use crate::{
8    ps::{
9        error::Error,
10        hinting::{Blues, StemSnaps},
11        num::{self, BcdComponents},
12        string::Sid,
13        transform::ScaledFontMatrix,
14    },
15    types::Fixed,
16    Cursor, ReadError,
17};
18use std::ops::Range;
19
20/// PostScript DICT operator.
21///
22/// See "Table 9 Top DICT Operator Entries" and "Table 23 Private DICT
23/// Operators" at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf>
24#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
25pub enum Operator {
26    Version,
27    Notice,
28    FullName,
29    FamilyName,
30    Weight,
31    FontBbox,
32    CharstringsOffset,
33    PrivateDictRange,
34    VariationStoreOffset,
35    Copyright,
36    IsFixedPitch,
37    ItalicAngle,
38    UnderlinePosition,
39    UnderlineThickness,
40    PaintType,
41    CharstringType,
42    FontMatrix,
43    StrokeWidth,
44    FdArrayOffset,
45    FdSelectOffset,
46    BlueValues,
47    OtherBlues,
48    FamilyBlues,
49    FamilyOtherBlues,
50    SubrsOffset,
51    VariationStoreIndex,
52    BlueScale,
53    BlueShift,
54    BlueFuzz,
55    LanguageGroup,
56    ExpansionFactor,
57    Encoding,
58    Charset,
59    UniqueId,
60    Xuid,
61    SyntheticBase,
62    PostScript,
63    BaseFontName,
64    BaseFontBlend,
65    Ros,
66    CidFontVersion,
67    CidFontRevision,
68    CidFontType,
69    CidCount,
70    UidBase,
71    FontName,
72    StdHw,
73    StdVw,
74    DefaultWidthX,
75    NominalWidthX,
76    Blend,
77    StemSnapH,
78    StemSnapV,
79    ForceBold,
80    InitialRandomSeed,
81}
82
83impl Operator {
84    fn from_opcode(opcode: u8) -> Option<Self> {
85        use Operator::*;
86        Some(match opcode {
87            // Top DICT operators
88            0 => Version,
89            1 => Notice,
90            2 => FullName,
91            3 => FamilyName,
92            4 => Weight,
93            5 => FontBbox,
94            13 => UniqueId,
95            14 => Xuid,
96            15 => Charset,
97            16 => Encoding,
98            17 => CharstringsOffset,
99            18 => PrivateDictRange,
100            24 => VariationStoreOffset,
101            // Private DICT operators
102            6 => BlueValues,
103            7 => OtherBlues,
104            8 => FamilyBlues,
105            9 => FamilyOtherBlues,
106            10 => StdHw,
107            11 => StdVw,
108            19 => SubrsOffset,
109            20 => DefaultWidthX,
110            21 => NominalWidthX,
111            22 => VariationStoreIndex,
112            23 => Blend,
113            // Font DICT only uses PrivateDictRange
114            _ => return None,
115        })
116    }
117
118    fn from_extended_opcode(opcode: u8) -> Option<Self> {
119        use Operator::*;
120        Some(match opcode {
121            // Top DICT operators
122            0 => Copyright,
123            1 => IsFixedPitch,
124            2 => ItalicAngle,
125            3 => UnderlinePosition,
126            4 => UnderlineThickness,
127            5 => PaintType,
128            6 => CharstringType,
129            7 => FontMatrix,
130            8 => StrokeWidth,
131            20 => SyntheticBase,
132            21 => PostScript,
133            22 => BaseFontName,
134            23 => BaseFontBlend,
135            30 => Ros,
136            31 => CidFontVersion,
137            32 => CidFontRevision,
138            33 => CidFontType,
139            34 => CidCount,
140            35 => UidBase,
141            36 => FdArrayOffset,
142            37 => FdSelectOffset,
143            38 => FontName,
144            // Private DICT operators
145            9 => BlueScale,
146            10 => BlueShift,
147            11 => BlueFuzz,
148            12 => StemSnapH,
149            13 => StemSnapV,
150            14 => ForceBold,
151            17 => LanguageGroup,
152            18 => ExpansionFactor,
153            19 => InitialRandomSeed,
154            _ => return None,
155        })
156    }
157}
158
159/// Either a PostScript DICT operator or a (numeric) operand.
160#[derive(Copy, Clone, PartialEq, Eq, Debug)]
161pub enum Token {
162    /// An operator parsed from a DICT.
163    Operator(Operator),
164    /// A number parsed from a DICT. If the source was in
165    /// binary coded decimal format, then the second field
166    /// contains the parsed components.
167    Operand(Number, Option<BcdComponents>),
168}
169
170impl From<Operator> for Token {
171    fn from(value: Operator) -> Self {
172        Self::Operator(value)
173    }
174}
175
176impl<T> From<T> for Token
177where
178    T: Into<Number>,
179{
180    fn from(value: T) -> Self {
181        Self::Operand(value.into(), None)
182    }
183}
184
185/// Given a byte slice containing DICT data, returns an iterator yielding
186/// raw operands and operators.
187///
188/// This does not perform any additional processing such as type conversion,
189/// delta decoding or blending.
190pub fn tokens(dict_data: &[u8]) -> impl Iterator<Item = Result<Token, Error>> + '_ + Clone {
191    let mut cursor = crate::FontData::new(dict_data).cursor();
192    std::iter::from_fn(move || {
193        if cursor.remaining_bytes() == 0 {
194            None
195        } else {
196            Some(parse_token(&mut cursor))
197        }
198    })
199}
200
201fn parse_token(cursor: &mut Cursor) -> Result<Token, Error> {
202    // Escape opcode for accessing extensions.
203    const ESCAPE: u8 = 12;
204    let b0 = cursor.read::<u8>()?;
205    Ok(if b0 == ESCAPE {
206        let b1 = cursor.read::<u8>()?;
207        Token::Operator(Operator::from_extended_opcode(b1).ok_or(Error::InvalidDictOperator(b1))?)
208    } else {
209        // See <https://learn.microsoft.com/en-us/typography/opentype/spec/cff2#table-3-operand-encoding>
210        match b0 {
211            28 | 29 | 32..=254 => Token::Operand(num::parse_int(cursor, b0)?.into(), None),
212            30 => {
213                let components = BcdComponents::parse(cursor)?;
214                Token::Operand(components.value(false).into(), Some(components))
215            }
216            _ => Token::Operator(Operator::from_opcode(b0).ok_or(Error::InvalidDictOperator(b0))?),
217        }
218    })
219}
220
221/// PostScript DICT Operator with its associated operands.
222#[derive(Clone, PartialEq, Eq, Debug)]
223pub enum Entry {
224    Version(Sid),
225    Notice(Sid),
226    FullName(Sid),
227    FamilyName(Sid),
228    Weight(Sid),
229    FontBbox([Fixed; 4]),
230    CharstringsOffset(usize),
231    PrivateDictRange(Range<usize>),
232    VariationStoreOffset(usize),
233    Copyright(Sid),
234    IsFixedPitch(bool),
235    ItalicAngle(Fixed),
236    UnderlinePosition(Fixed),
237    UnderlineThickness(Fixed),
238    PaintType(i32),
239    CharstringType(i32),
240    FontMatrix(ScaledFontMatrix),
241    StrokeWidth(Fixed),
242    FdArrayOffset(usize),
243    FdSelectOffset(usize),
244    BlueValues(Blues),
245    OtherBlues(Blues),
246    FamilyBlues(Blues),
247    FamilyOtherBlues(Blues),
248    SubrsOffset(usize),
249    VariationStoreIndex(u16),
250    BlueScale(Fixed),
251    BlueShift(Fixed),
252    BlueFuzz(Fixed),
253    LanguageGroup(i32),
254    ExpansionFactor(Fixed),
255    Encoding(usize),
256    Charset(usize),
257    UniqueId(i32),
258    Xuid,
259    SyntheticBase(i32),
260    PostScript(Sid),
261    BaseFontName(Sid),
262    BaseFontBlend,
263    Ros {
264        registry: Sid,
265        ordering: Sid,
266        supplement: Fixed,
267    },
268    CidFontVersion(Fixed),
269    CidFontRevision(Fixed),
270    CidFontType(i32),
271    CidCount(u32),
272    UidBase(i32),
273    FontName(Sid),
274    StdHw(Fixed),
275    StdVw(Fixed),
276    DefaultWidthX(Fixed),
277    NominalWidthX(Fixed),
278    StemSnapH(StemSnaps),
279    StemSnapV(StemSnaps),
280    ForceBold(bool),
281    InitialRandomSeed(i32),
282}
283
284/// Given a byte slice containing DICT data, returns an iterator yielding
285/// each operator with its associated operands.
286///
287/// This performs appropriate type conversions, decodes deltas and applies
288/// blending.
289///
290/// If processing a Private DICT from a CFF2 table and an item variation
291/// store is present, then `blend_state` must be provided.
292pub fn entries<'a>(
293    dict_data: &'a [u8],
294    mut blend_state: Option<BlendState<'a>>,
295) -> impl Iterator<Item = Result<Entry, Error>> + 'a {
296    let mut stack = Stack::new();
297    let mut last_bcd_components = None;
298    let mut cursor = crate::FontData::new(dict_data).cursor();
299    let mut cursor_pos = 0;
300    std::iter::from_fn(move || loop {
301        if cursor.remaining_bytes() == 0 {
302            return None;
303        }
304        let token = match parse_token(&mut cursor) {
305            Ok(token) => token,
306            Err(Error::InvalidDictOperator(_)) => {
307                // Some buggy fonts have invalid dict operators. Clear
308                // the stack and attempt to continue.
309                // FreeType only processes known fields:
310                // <https://gitlab.freedesktop.org/freetype/freetype/-/blob/80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef/src/cff/cffparse.c#L1328>
311                // And then clears the stack regardless:
312                // <https://gitlab.freedesktop.org/freetype/freetype/-/blob/80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef/src/cff/cffparse.c#L1469>
313                stack.clear();
314                continue;
315            }
316            Err(e) => return Some(Err(e)),
317        };
318        match token {
319            Token::Operand(number, bcd_components) => {
320                last_bcd_components = bcd_components;
321                match stack.push(number) {
322                    Ok(_) => continue,
323                    Err(e) => return Some(Err(e)),
324                }
325            }
326            Token::Operator(op) => {
327                if op == Operator::Blend || op == Operator::VariationStoreIndex {
328                    let state = match blend_state.as_mut() {
329                        Some(state) => state,
330                        None => return Some(Err(Error::MissingBlendState)),
331                    };
332                    if op == Operator::VariationStoreIndex {
333                        match stack
334                            .get_i32(0)
335                            .and_then(|ix| state.set_store_index(ix as u16))
336                        {
337                            Ok(_) => {}
338                            Err(e) => return Some(Err(e)),
339                        }
340                    }
341                    if op == Operator::Blend {
342                        match stack.apply_blend(state) {
343                            Ok(_) => continue,
344                            Err(e) => return Some(Err(e)),
345                        }
346                    }
347                }
348                if op == Operator::BlueScale {
349                    // FreeType parses BlueScale using a scaling factor of
350                    // 1000, presumably to capture more precision in the
351                    // fractional part. We do the same.
352                    // See <https://gitlab.freedesktop.org/freetype/freetype/-/blob/master/src/cff/cfftoken.h?ref_type=heads#L87>
353                    if let Some(bcd_components) = last_bcd_components.take() {
354                        // If the most recent numeric value was parsed as a
355                        // binary coded decimal then recompute the value using
356                        // the desired scaling and replace it on the stack
357                        stack.pop_fixed().ok()?;
358                        stack.push(bcd_components.value(true)).ok()?;
359                    }
360                }
361                if op == Operator::FontMatrix {
362                    // FontMatrix is also parsed specially... *sigh*
363                    // Redo the entire thing with special scaling factors
364                    // See <https://gitlab.freedesktop.org/freetype/freetype/-/blob/f1cd6dbfa0c98f352b698448f40ac27e8fb3832e/src/cff/cffparse.c#L623>
365                    // Dump the current values
366                    stack.clear();
367                    last_bcd_components = None;
368                    // Now reparse with dynamic scaling
369                    let mut cursor = crate::FontData::new(dict_data).cursor();
370                    cursor.advance_by(cursor_pos);
371                    if let Some(matrix) = ScaledFontMatrix::parse(&mut cursor) {
372                        return Some(Ok(Entry::FontMatrix(matrix)));
373                    }
374                    continue;
375                }
376                last_bcd_components = None;
377                let entry = parse_entry(op, &mut stack);
378                stack.clear();
379                cursor_pos = cursor.position().unwrap_or_default();
380                return Some(entry);
381            }
382        }
383    })
384}
385
386fn parse_entry(op: Operator, stack: &mut Stack) -> Result<Entry, Error> {
387    use Operator::*;
388    Ok(match op {
389        Version => Entry::Version(stack.pop_i32()?.into()),
390        Notice => Entry::Notice(stack.pop_i32()?.into()),
391        FullName => Entry::FullName(stack.pop_i32()?.into()),
392        FamilyName => Entry::FamilyName(stack.pop_i32()?.into()),
393        Weight => Entry::Weight(stack.pop_i32()?.into()),
394        FontBbox => Entry::FontBbox([
395            stack.get_fixed(0)?,
396            stack.get_fixed(1)?,
397            stack.get_fixed(2)?,
398            stack.get_fixed(3)?,
399        ]),
400        CharstringsOffset => Entry::CharstringsOffset(stack.pop_i32()? as usize),
401        PrivateDictRange => {
402            let len = stack.get_i32(0)? as usize;
403            let start = stack.get_i32(1)? as usize;
404            let end = start.checked_add(len).ok_or(ReadError::OutOfBounds)?;
405            Entry::PrivateDictRange(start..end)
406        }
407        VariationStoreOffset => Entry::VariationStoreOffset(stack.pop_i32()? as usize),
408        Copyright => Entry::Copyright(stack.pop_i32()?.into()),
409        IsFixedPitch => Entry::IsFixedPitch(stack.pop_i32()? != 0),
410        ItalicAngle => Entry::ItalicAngle(stack.pop_fixed()?),
411        UnderlinePosition => Entry::UnderlinePosition(stack.pop_fixed()?),
412        UnderlineThickness => Entry::UnderlineThickness(stack.pop_fixed()?),
413        PaintType => Entry::PaintType(stack.pop_i32()?),
414        CharstringType => Entry::CharstringType(stack.pop_i32()?),
415        FontMatrix => unreachable!(),
416        StrokeWidth => Entry::StrokeWidth(stack.pop_fixed()?),
417        FdArrayOffset => Entry::FdArrayOffset(stack.pop_i32()? as usize),
418        FdSelectOffset => Entry::FdSelectOffset(stack.pop_i32()? as usize),
419        BlueValues => {
420            stack.apply_delta_prefix_sum();
421            Entry::BlueValues(Blues::new(stack.fixed_values()))
422        }
423        OtherBlues => {
424            stack.apply_delta_prefix_sum();
425            Entry::OtherBlues(Blues::new(stack.fixed_values()))
426        }
427        FamilyBlues => {
428            stack.apply_delta_prefix_sum();
429            Entry::FamilyBlues(Blues::new(stack.fixed_values()))
430        }
431        FamilyOtherBlues => {
432            stack.apply_delta_prefix_sum();
433            Entry::FamilyOtherBlues(Blues::new(stack.fixed_values()))
434        }
435        SubrsOffset => Entry::SubrsOffset(stack.pop_i32()? as usize),
436        VariationStoreIndex => Entry::VariationStoreIndex(stack.pop_i32()? as u16),
437        BlueScale => Entry::BlueScale(stack.pop_fixed()?),
438        BlueShift => Entry::BlueShift(stack.pop_fixed()?),
439        BlueFuzz => Entry::BlueFuzz(stack.pop_fixed()?),
440        LanguageGroup => Entry::LanguageGroup(stack.pop_i32()?),
441        ExpansionFactor => Entry::ExpansionFactor(stack.pop_fixed()?),
442        Encoding => Entry::Encoding(stack.pop_i32()? as usize),
443        Charset => Entry::Charset(stack.pop_i32()? as usize),
444        UniqueId => Entry::UniqueId(stack.pop_i32()?),
445        Xuid => Entry::Xuid,
446        SyntheticBase => Entry::SyntheticBase(stack.pop_i32()?),
447        PostScript => Entry::PostScript(stack.pop_i32()?.into()),
448        BaseFontName => Entry::BaseFontName(stack.pop_i32()?.into()),
449        BaseFontBlend => Entry::BaseFontBlend,
450        Ros => Entry::Ros {
451            registry: stack.get_i32(0)?.into(),
452            ordering: stack.get_i32(1)?.into(),
453            supplement: stack.get_fixed(2)?,
454        },
455        CidFontVersion => Entry::CidFontVersion(stack.pop_fixed()?),
456        CidFontRevision => Entry::CidFontRevision(stack.pop_fixed()?),
457        CidFontType => Entry::CidFontType(stack.pop_i32()?),
458        CidCount => Entry::CidCount(stack.pop_i32()? as u32),
459        UidBase => Entry::UidBase(stack.pop_i32()?),
460        FontName => Entry::FontName(stack.pop_i32()?.into()),
461        StdHw => Entry::StdHw(stack.pop_fixed()?),
462        StdVw => Entry::StdVw(stack.pop_fixed()?),
463        DefaultWidthX => Entry::DefaultWidthX(stack.pop_fixed()?),
464        NominalWidthX => Entry::NominalWidthX(stack.pop_fixed()?),
465        StemSnapH => {
466            stack.apply_delta_prefix_sum();
467            Entry::StemSnapH(StemSnaps::new(stack.fixed_values()))
468        }
469        StemSnapV => {
470            stack.apply_delta_prefix_sum();
471            Entry::StemSnapV(StemSnaps::new(stack.fixed_values()))
472        }
473        ForceBold => Entry::ForceBold(stack.pop_i32()? != 0),
474        InitialRandomSeed => Entry::InitialRandomSeed(stack.pop_i32()?),
475        // Blend is handled at the layer above
476        Blend => unreachable!(),
477    })
478}
479
480#[cfg(test)]
481mod tests {
482    use super::*;
483    use crate::{
484        tables::variations::ItemVariationStore, types::F2Dot14, FontData, FontRead, FontRef,
485        TableProvider,
486    };
487    use font_test_data::bebuffer::BeBuffer;
488
489    #[test]
490    fn example_top_dict_tokens() {
491        use Operator::*;
492        let top_dict_data = &font_test_data::cff2::EXAMPLE[5..12];
493        let tokens: Vec<_> = tokens(top_dict_data).map(|entry| entry.unwrap()).collect();
494        let expected: &[Token] = &[
495            68.into(),
496            FdArrayOffset.into(),
497            56.into(),
498            CharstringsOffset.into(),
499            16.into(),
500            VariationStoreOffset.into(),
501        ];
502        assert_eq!(&tokens, expected);
503    }
504
505    #[test]
506    fn example_top_dict_entries() {
507        use Entry::*;
508        let top_dict_data = &font_test_data::cff2::EXAMPLE[0x5..=0xB];
509        let entries: Vec<_> = entries(top_dict_data, None)
510            .map(|entry| entry.unwrap())
511            .collect();
512        let expected: &[Entry] = &[
513            FdArrayOffset(68),
514            CharstringsOffset(56),
515            VariationStoreOffset(16),
516        ];
517        assert_eq!(&entries, expected);
518    }
519
520    #[test]
521    fn example_private_dict_entries() {
522        use Entry::*;
523        let private_dict_data = &font_test_data::cff2::EXAMPLE[0x4f..=0xc0];
524        let store =
525            ItemVariationStore::read(FontData::new(&font_test_data::cff2::EXAMPLE[18..])).unwrap();
526        let coords = &[F2Dot14::from_f32(0.0)];
527        let blend_state = BlendState::new(store, coords, 0).unwrap();
528        let entries: Vec<_> = entries(private_dict_data, Some(blend_state))
529            .map(|entry| entry.unwrap())
530            .collect();
531        fn make_blues(values: &[f64]) -> Blues {
532            Blues::new(values.iter().copied().map(Fixed::from_f64))
533        }
534        fn make_stem_snaps(values: &[f64]) -> StemSnaps {
535            StemSnaps::new(values.iter().copied().map(Fixed::from_f64))
536        }
537        let expected: &[Entry] = &[
538            BlueValues(make_blues(&[
539                -20.0, 0.0, 472.0, 490.0, 525.0, 540.0, 645.0, 660.0, 670.0, 690.0, 730.0, 750.0,
540            ])),
541            OtherBlues(make_blues(&[-250.0, -240.0])),
542            FamilyBlues(make_blues(&[
543                -20.0, 0.0, 473.0, 491.0, 525.0, 540.0, 644.0, 659.0, 669.0, 689.0, 729.0, 749.0,
544            ])),
545            FamilyOtherBlues(make_blues(&[-249.0, -239.0])),
546            BlueScale(Fixed::from_f64(0.037506103515625)),
547            BlueFuzz(Fixed::ZERO),
548            StdHw(Fixed::from_f64(55.0)),
549            StdVw(Fixed::from_f64(80.0)),
550            StemSnapH(make_stem_snaps(&[40.0, 55.0])),
551            StemSnapV(make_stem_snaps(&[80.0, 90.0])),
552            SubrsOffset(114),
553        ];
554        assert_eq!(&entries, expected);
555    }
556
557    #[test]
558    fn noto_serif_display_top_dict_entries() {
559        use Entry::*;
560        let top_dict_data = FontRef::new(font_test_data::NOTO_SERIF_DISPLAY_TRIMMED)
561            .unwrap()
562            .cff()
563            .unwrap()
564            .top_dicts()
565            .get(0)
566            .unwrap();
567        let entries: Vec<_> = entries(top_dict_data, None)
568            .map(|entry| entry.unwrap())
569            .collect();
570        let expected = &[
571            Version(Sid::new(391)),
572            Notice(Sid::new(392)),
573            Copyright(Sid::new(393)),
574            FullName(Sid::new(394)),
575            FamilyName(Sid::new(395)),
576            FontBbox([-693.0, -470.0, 2797.0, 1048.0].map(Fixed::from_f64)),
577            Charset(517),
578            PrivateDictRange(549..587),
579            CharstringsOffset(521),
580        ];
581        assert_eq!(&entries, expected);
582    }
583
584    // Fuzzer caught add with overflow when constructing private DICT
585    // range.
586    // See <https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=71746>
587    // and <https://oss-fuzz.com/testcase?key=4591358306746368>
588    #[test]
589    fn private_dict_range_avoid_overflow() {
590        // A Private DICT that tries to construct a range from -1..(-1 + -1)
591        // which overflows when converted to usize
592        let private_dict = BeBuffer::new()
593            .push(29u8) // integer operator
594            .push(-1i32) // integer value
595            .push(29u8) // integer operator
596            .push(-1i32) // integer value
597            .push(18u8) // PrivateDICT operator
598            .to_vec();
599        // Just don't panic
600        let _ = entries(&private_dict, None).count();
601    }
602
603    #[test]
604    fn read_font_matrix() {
605        let dict_data = [
606            30u8, 10, 0, 31, 139, 30, 10, 0, 1, 103, 255, 30, 10, 0, 31, 139, 139, 12, 7,
607        ];
608        let Entry::FontMatrix(matrix) = entries(&dict_data, None).next().unwrap().unwrap() else {
609            panic!("This was totally a font matrix");
610        };
611        // From ttx: <FontMatrix value="0.001 0 0.000167 0.001 0 0"/>
612        // But scaled by 1000 because that's how FreeType does it
613        assert_eq!(
614            matrix.matrix.elements(),
615            [
616                Fixed::ONE,
617                Fixed::ZERO,
618                Fixed::from_f64(0.167007446289062),
619                Fixed::ONE,
620                Fixed::ZERO,
621                Fixed::ZERO,
622            ]
623        );
624    }
625
626    #[test]
627    fn parse_degenerate_font_matrix() {
628        let dict_data = [
629            30u8, 0x0F, 30, 0x0F, 30, 0x0F, 30, 0x0F, 30, 0x0F, 30, 0x0F, 12, 7,
630        ];
631        // Don't return a degenerate matrix at all
632        assert!(entries(&dict_data, None).next().is_none());
633    }
634}