Skip to main content

read_fonts/ps/cff/
encoding.rs

1//! PostScript encodings.
2//!
3//! This maps font specific character codes to string ids.
4//!
5//! See "Glyph Organization" at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=18>
6//! for an explanation of how charsets, encodings and glyphs are related.
7
8use super::charset::Charset;
9use crate::{
10    ps::{encoding::PredefinedEncoding, string::Sid},
11    FontData, GlyphId, ReadError,
12};
13
14#[doc(inline)]
15pub use super::v1::{EncodingRange1 as Range1, EncodingSupplement as Supplement};
16
17/// Mapping from character codes to string ids.
18///
19/// See "Encodings" at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=18>.
20#[derive(Clone)]
21pub enum Encoding<'a> {
22    Predefined(PredefinedEncoding),
23    Custom(CustomEncoding<'a>),
24}
25
26impl<'a> Encoding<'a> {
27    /// Parses an encoding at the given offset.
28    ///
29    /// Special offsets 0 and 1 are parsed as the predefined standard and
30    /// expert encodings, respectively.
31    pub fn new(data: &'a [u8], offset: usize) -> Result<Self, ReadError> {
32        match offset {
33            0 => Ok(Self::Predefined(PredefinedEncoding::Standard)),
34            1 => Ok(Self::Predefined(PredefinedEncoding::Expert)),
35            _ => CustomEncoding::new(data.get(offset..).ok_or(ReadError::OutOfBounds)?)
36                .map(Self::Custom),
37        }
38    }
39
40    /// Maps a character code to a glyph identifier.
41    pub fn map(&self, charset: &Charset, code: u8) -> Option<GlyphId> {
42        match self {
43            Self::Predefined(predefined) => charset.glyph_id(predefined.sid(code)?).ok(),
44            Self::Custom(custom) => custom.map(charset, code),
45        }
46    }
47}
48
49/// Custom mapping from character codes to string ids.
50#[derive(Clone)]
51pub enum CustomEncoding<'a> {
52    /// Sequence of character codes where the string id is equal to the index
53    /// of the code plus one.
54    Format0(&'a [u8], &'a [Supplement]),
55    /// Sequence of ranges mapping character codes to string ids.
56    Format1(&'a [Range1], &'a [Supplement]),
57}
58
59impl<'a> CustomEncoding<'a> {
60    /// Parses a custom encoding from the given data.
61    pub fn new(data: &'a [u8]) -> Result<Self, ReadError> {
62        let mut cursor = FontData::new(data).cursor();
63        let header = cursor.read::<u8>()?;
64        let has_supplement = header & 0x80 != 0;
65        // Macro because a closure cannot borrow cursor mutably
66        macro_rules! read_supplement {
67            () => {
68                if has_supplement {
69                    let count = cursor.read::<u8>()?;
70                    cursor.read_array::<Supplement>(count as usize)?
71                } else {
72                    &[]
73                }
74            };
75        }
76        let format = header & 0x7F;
77        match format {
78            0 => {
79                let n_codes = cursor.read::<u8>()?;
80                let codes = cursor.read_array(n_codes as usize)?;
81                let supp = read_supplement!();
82                Ok(Self::Format0(codes, supp))
83            }
84            1 => {
85                let n_ranges = cursor.read::<u8>()?;
86                let ranges = cursor.read_array(n_ranges as usize)?;
87                let supp = read_supplement!();
88                Ok(Self::Format1(ranges, supp))
89            }
90            _ => Err(ReadError::InvalidFormat(format as _)),
91        }
92    }
93
94    /// Maps a character code to a glyph identifier.  
95    pub fn map(&self, charset: &Charset, code: u8) -> Option<GlyphId> {
96        let read_sup = |sup: &[Supplement]| {
97            sup.iter()
98                .find(|s| s.code == code)
99                .and_then(|s| charset.glyph_id(Sid::new(s.glyph.get())).ok())
100        };
101        match self {
102            Self::Format0(codes, sup) => read_sup(sup).or_else(|| {
103                codes
104                    .iter()
105                    .position(|c| *c == code)
106                    // notdef is implicit so add one
107                    .map(|gid| GlyphId::new(gid as u32 + 1))
108            }),
109            Self::Format1(ranges, sup) => read_sup(sup).or_else(|| {
110                let mut gid = 1u32;
111                for range in ranges.iter() {
112                    let end = range.first.saturating_add(range.n_left);
113                    if (range.first..=end).contains(&code) {
114                        gid += (code - range.first) as u32;
115                        return Some(GlyphId::new(gid));
116                    }
117                    gid += range.n_left as u32 + 1;
118                }
119                None
120            }),
121        }
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn predefined_standard() {
131        let encoding = Encoding::Predefined(PredefinedEncoding::Standard);
132        let charset = iso_adobe_charset();
133        for code in 0..=255 {
134            let gid = encoding.map(&charset, code);
135            assert_eq!(
136                gid.unwrap(),
137                charset
138                    .glyph_id(PredefinedEncoding::Standard.sid(code).unwrap())
139                    .unwrap()
140            );
141        }
142    }
143
144    #[test]
145    fn predefined_expert() {
146        let encoding = Encoding::Predefined(PredefinedEncoding::Expert);
147        let charset = iso_expert_charset();
148        for code in 0..=255 {
149            let gid = encoding.map(&charset, code);
150            assert_eq!(
151                gid.unwrap(),
152                charset
153                    .glyph_id(PredefinedEncoding::Expert.sid(code).unwrap())
154                    .unwrap()
155            );
156        }
157    }
158
159    #[test]
160    fn custom_format_0() {
161        let codes = [3, 8, 9, 10, 11];
162        let encoding = Encoding::Custom(CustomEncoding::Format0(&codes, &[]));
163        let charset = iso_adobe_charset();
164        for (i, code) in codes.into_iter().enumerate() {
165            assert_eq!(
166                encoding.map(&charset, code).unwrap(),
167                GlyphId::new(i as u32 + 1)
168            );
169        }
170    }
171
172    #[test]
173    fn custom_format_1() {
174        let ranges = [(51, 4), (250, 5)].map(|(first, n_left)| Range1 { first, n_left });
175        let encoding = Encoding::Custom(CustomEncoding::Format1(&ranges, &[]));
176        let charset = iso_adobe_charset();
177        for code in 0..=255 {
178            let gid = encoding.map(&charset, code);
179            let expected = match code {
180                51..=55 => Some(code as u32 - 50),
181                250..=255 => Some(code as u32 - 250 + 6),
182                _ => None,
183            };
184            assert_eq!(gid, expected.map(GlyphId::new));
185        }
186    }
187
188    #[test]
189    fn supplemental() {
190        // map 40 -> z and 122 -> parenleft
191        let supplement = [(40, 91), (122, 9)].map(|(code, glyph)| Supplement {
192            code,
193            glyph: glyph.into(),
194        });
195        let encoding = Encoding::Custom(CustomEncoding::Format0(&[], &supplement));
196        let charset = iso_adobe_charset();
197        assert_eq!(encoding.map(&charset, 40).unwrap().to_u32(), 91);
198        assert_eq!(encoding.map(&charset, 122).unwrap().to_u32(), 9);
199        assert_eq!(
200            charset
201                .string_id(91u32.into())
202                .unwrap()
203                .resolve_standard()
204                .unwrap(),
205            b"z"
206        );
207        assert_eq!(
208            charset
209                .string_id(9u32.into())
210                .unwrap()
211                .resolve_standard()
212                .unwrap(),
213            b"parenleft"
214        );
215    }
216
217    fn iso_adobe_charset() -> Charset<'static> {
218        Charset::new(Default::default(), 0, 256).unwrap()
219    }
220
221    fn iso_expert_charset() -> Charset<'static> {
222        Charset::new(Default::default(), 1, 256).unwrap()
223    }
224}