Skip to main content

read_fonts/ps/
charmap.rs

1//! Unicode character map generated from glyph names.
2//!
3//! Building a character map depends on the `agl` feature.
4
5#[cfg(feature = "agl")]
6use super::agl;
7use alloc::vec::Vec;
8use types::GlyphId;
9
10/// Used to mark variant glyphs such as A.alt.
11const VARIANT_BIT: u32 = 0x80000000;
12
13/// A Unicode charmap built from glyph names.
14#[derive(Clone, Default, Debug)]
15pub struct Charmap {
16    mapping: Vec<(u32, GlyphId)>,
17}
18
19impl Charmap {
20    /// Create a new unicode charmap for the given sequence of glyph id
21    /// and name pairs.
22    // See <https://gitlab.freedesktop.org/freetype/freetype/-/blob/80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef/src/psnames/psmodule.c#L313>
23    #[cfg(feature = "agl")]
24    pub fn from_glyph_names<'a>(pairs: impl Iterator<Item = (GlyphId, &'a str)>) -> Self {
25        #[derive(Copy, Clone, PartialEq)]
26        enum State {
27            Unchecked,
28            Include,
29            Exclude,
30        }
31        let mut extra_glyphs = [(State::Unchecked, GlyphId::NOTDEF); 10];
32        let mut mapping = Vec::new();
33        for (gid, name) in pairs {
34            // Check extra glyphs by name
35            if let Some(n) = EXTRA_GLYPH_LIST
36                .iter()
37                .position(|(_, extra_name)| name == *extra_name)
38            {
39                let extra = &mut extra_glyphs[n];
40                if extra.0 == State::Unchecked {
41                    extra.0 = State::Include;
42                    extra.1 = gid;
43                }
44            }
45            // Map to a char
46            let Some(mut ch) = agl::name_to_char(name).map(|ch| ch as u32) else {
47                continue;
48            };
49            if agl::split_variant(name).1.is_some() {
50                // FreeType sets the high bit for variant glyphs
51                ch |= VARIANT_BIT;
52            }
53            // If we have a direct char mapping for an entry in the extra
54            // glyph list then disable it
55            if let Some(n) = EXTRA_GLYPH_LIST
56                .iter()
57                .position(|(extra_ch, _)| ch == *extra_ch)
58            {
59                extra_glyphs[n].0 = State::Exclude;
60            }
61            mapping.push((ch, gid));
62        }
63        for ((extra_ch, _), (state, gid)) in EXTRA_GLYPH_LIST.iter().zip(extra_glyphs) {
64            if state == State::Include {
65                mapping.push((*extra_ch, gid));
66            }
67        }
68        mapping.shrink_to_fit();
69        mapping.sort_unstable_by(|a, b| {
70            // Custom comparison to properly sort base glyphs and variants
71            // <https://gitlab.freedesktop.org/freetype/freetype/-/blob/80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef/src/psnames/psmodule.c#L182>
72            let a_base = a.0 & !VARIANT_BIT;
73            let b_base = b.0 & !VARIANT_BIT;
74            if a_base == b_base {
75                a.0.cmp(&b.0)
76            } else {
77                a_base.cmp(&b_base)
78            }
79        });
80        Self { mapping }
81    }
82
83    /// Returns the glyph id for the given character.
84    pub fn map(&self, ch: impl Into<u32>) -> Option<GlyphId> {
85        // Custom binary search that falls back to variants if a base
86        // glyph isn't found
87        // <https://gitlab.freedesktop.org/freetype/freetype/-/blob/80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef/src/psnames/psmodule.c#L412>
88        let ch = ch.into();
89        let mut min = 0;
90        let mut max = self.mapping.len();
91        let mut result = None;
92        while min < max {
93            let mid = min + ((max - min) >> 1);
94            let entry = self.mapping.get(mid)?;
95            if entry.0 == ch {
96                result = Some(entry.1);
97                break;
98            }
99            let base_gid = entry.0 & !VARIANT_BIT;
100            if base_gid == ch {
101                // Remember the variant but keep on search for a base
102                result = Some(entry.1);
103            }
104            if base_gid < ch {
105                min = mid + 1;
106            } else {
107                max = mid;
108            }
109        }
110        result
111    }
112
113    pub fn iter(&self) -> Iter<'_> {
114        Iter(self.mapping.iter().copied())
115    }
116}
117
118/// Iterator for a character map.
119#[derive(Clone)]
120pub struct Iter<'a>(core::iter::Copied<core::slice::Iter<'a, (u32, GlyphId)>>);
121
122impl Iterator for Iter<'_> {
123    type Item = (u32, GlyphId);
124
125    fn next(&mut self) -> Option<Self::Item> {
126        self.0.next()
127    }
128}
129
130/// Support for extra glyphs not handled well in AGL
131/// See <https://gitlab.freedesktop.org/freetype/freetype/-/blob/80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef/src/psnames/psmodule.c#L218>
132#[cfg(feature = "agl")]
133#[rustfmt::skip]
134const EXTRA_GLYPH_LIST: [(u32, &str); 10] = [
135    // WGL 4
136    (0x0394, "Delta"),
137    (0x03A9, "Omega"),
138    (0x2215, "fraction"),
139    (0x00AD, "hyphen"),
140    (0x02C9, "macron"),
141    (0x03BC, "mu"),
142    (0x2219, "periodcentered"),
143    (0x00A0, "space"),
144    // Romanian
145    (0x021A, "Tcommaaccent"),
146    (0x021B, "tcommaaccent"),
147];
148
149#[cfg(test)]
150#[cfg(feature = "agl")]
151mod tests {
152    use super::super::type1::Type1Font;
153    use super::*;
154
155    #[test]
156    fn cmap() {
157        let cmap = Charmap::from_glyph_names(
158            [
159                (1, "A"),
160                (2, "uni0042"), // B
161                (333, "C.alt"),
162                (4, "D"),
163                (51, "Lcedilla"),
164                (22, "Cdot"),
165                (8, "aacute"),
166                (7, "union"),
167            ]
168            .map(|(gid, name)| (GlyphId::new(gid), name))
169            .iter()
170            .copied(),
171        );
172        assert_eq!(cmap.map('A'), Some(GlyphId::new(1)));
173        assert_eq!(cmap.map('B'), Some(GlyphId::new(2)));
174        // We're actually missing a glyph for "C" but have "C.alt" which
175        // we select by design (matching FT)
176        assert_eq!(cmap.map('C'), Some(GlyphId::new(333)));
177        assert_eq!(cmap.map('D'), Some(GlyphId::new(4)));
178        assert_eq!(cmap.map('Ļ'), Some(GlyphId::new(51)));
179        assert_eq!(cmap.map('Ċ'), Some(GlyphId::new(22)));
180        assert_eq!(cmap.map('á'), Some(GlyphId::new(8)));
181        assert_eq!(cmap.map('∪'), Some(GlyphId::new(7)));
182    }
183
184    #[test]
185    fn cmap_from_type1() {
186        let font = Type1Font::new(font_test_data::type1::NOTO_SERIF_REGULAR_SUBSET_PFB).unwrap();
187        let cmap = Charmap::from_glyph_names(font.glyph_names());
188        // Extracted from FreeType's generated unicode cmap
189        let expected = [
190            ('H' as u32, 1),
191            // H.c2sc which gets encoded as a variant
192            ('H' as u32 | VARIANT_BIT, 8),
193            ('f' as u32, 2),
194            ('i' as u32, 3),
195            ('x' as u32, 4),
196        ];
197        let result = cmap
198            .iter()
199            .map(|(ch, gid)| (ch, gid.to_u32()))
200            .collect::<Vec<_>>();
201        assert_eq!(result, expected);
202        assert_eq!(cmap.map('a'), None);
203        for (ch, gid) in expected {
204            if ch & VARIANT_BIT != 0 {
205                assert_eq!(cmap.map(ch), None);
206            } else {
207                assert_eq!(
208                    cmap.map(ch),
209                    Some(GlyphId::new(gid)),
210                    "cmap failed for {ch} -> {gid}"
211                );
212            }
213        }
214    }
215
216    #[test]
217    fn extra_glyphs_override() {
218        // If we have a "hyphen" glyph but no "softhyphen" then add an
219        // additional entry mapping the soft-hyphen codepoint to the
220        // hyphen glyph
221        let cmap = Charmap::from_glyph_names([(GlyphId::new(1), "hyphen")].into_iter());
222        assert_eq!(cmap.mapping.len(), 2);
223        assert_eq!(cmap.map('\u{00AD}'), Some(GlyphId::new(1)));
224        assert_eq!(cmap.map('\u{002D}'), Some(GlyphId::new(1)));
225    }
226
227    #[test]
228    fn extra_glyphs_no_override() {
229        // If we have an explicit "softhyphen" glyph then don't add any
230        // additional mapping
231        let cmap = Charmap::from_glyph_names([(GlyphId::new(1), "softhyphen")].into_iter());
232        assert_eq!(cmap.mapping.len(), 1);
233        assert_eq!(cmap.map('\u{00AD}'), Some(GlyphId::new(1)));
234    }
235}