fonts/
glyph.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::fmt;
6use std::vec::Vec;
7
8use app_units::Au;
9use euclid::default::Point2D;
10use euclid::num::Zero;
11use itertools::Either;
12use log::{debug, error};
13use malloc_size_of_derive::MallocSizeOf;
14use serde::{Deserialize, Serialize};
15
16use crate::{Font, GlyphShapingResult, ShapedGlyph, ShapingFlags, ShapingOptions};
17
18/// GlyphEntry is a port of Gecko's CompressedGlyph scheme for storing glyph data compactly.
19///
20/// In the common case (reasonable glyph advances, no offsets from the font em-box, and one glyph
21/// per character), we pack glyph advance, glyph id, and some flags into a single u32.
22///
23/// In the uncommon case (multiple glyphs per unicode character, large glyph index/advance, or
24/// glyph offsets), we pack the glyph count into GlyphEntry, and store the other glyph information
25/// in DetailedGlyphStore.
26#[derive(Clone, Copy, Debug, Deserialize, MallocSizeOf, PartialEq, Serialize)]
27pub struct GlyphEntry {
28    value: u32,
29}
30
31impl GlyphEntry {
32    fn new(value: u32) -> GlyphEntry {
33        GlyphEntry { value }
34    }
35
36    // Creates a GlyphEntry for the common case
37    fn simple(id: GlyphId, advance: Au) -> GlyphEntry {
38        assert!(is_simple_glyph_id(id));
39        assert!(is_simple_advance(advance));
40
41        let id_mask = id;
42        let Au(advance) = advance;
43        let advance_mask = (advance as u32) << GLYPH_ADVANCE_SHIFT;
44
45        GlyphEntry::new(id_mask | advance_mask | FLAG_IS_SIMPLE_GLYPH)
46    }
47
48    fn complex(detailed_glyph_index: usize) -> GlyphEntry {
49        assert!(detailed_glyph_index as u32 <= u32::MAX >> 1);
50        GlyphEntry::new(detailed_glyph_index as u32)
51    }
52}
53
54/// The id of a particular glyph within a font
55pub(crate) type GlyphId = u32;
56
57// TODO: make this more type-safe.
58
59const FLAG_CHAR_IS_WORD_SEPARATOR: u32 = 0x40000000;
60const FLAG_IS_SIMPLE_GLYPH: u32 = 0x80000000;
61
62// glyph advance; in Au's.
63const GLYPH_ADVANCE_MASK: u32 = 0x3FFF0000;
64const GLYPH_ADVANCE_SHIFT: u32 = 16;
65const GLYPH_ID_MASK: u32 = 0x0000FFFF;
66
67// Non-simple glyphs (more than one glyph per char; missing glyph,
68// newline, tab, large advance, or nonzero x/y offsets) may have one
69// or more detailed glyphs associated with them. They are stored in a
70// side array so that there is a 1:1 mapping of GlyphEntry to
71// unicode char.
72
73fn is_simple_glyph_id(id: GlyphId) -> bool {
74    (id & GLYPH_ID_MASK) == id
75}
76
77fn is_simple_advance(advance: Au) -> bool {
78    advance >= Au::zero() && {
79        let unsigned_au = advance.0 as u32;
80        (unsigned_au & (GLYPH_ADVANCE_MASK >> GLYPH_ADVANCE_SHIFT)) == unsigned_au
81    }
82}
83
84// Getters and setters for GlyphEntry. Setter methods are functional,
85// because GlyphEntry is immutable and only a u32 in size.
86impl GlyphEntry {
87    #[inline(always)]
88    fn advance(&self) -> Au {
89        Au::new(((self.value & GLYPH_ADVANCE_MASK) >> GLYPH_ADVANCE_SHIFT) as i32)
90    }
91
92    #[inline]
93    fn id(&self) -> GlyphId {
94        self.value & GLYPH_ID_MASK
95    }
96
97    /// True if the original character was a word separator. These include spaces
98    /// (U+0020), non-breaking spaces (U+00A0), and a few other characters
99    /// non-exhaustively listed in the specification. Other characters may map to the same
100    /// glyphs, but this function does not take mapping into account.
101    ///
102    /// See <https://drafts.csswg.org/css-text/#word-separator>.
103    fn char_is_word_separator(&self) -> bool {
104        self.has_flag(FLAG_CHAR_IS_WORD_SEPARATOR)
105    }
106
107    #[inline(always)]
108    fn set_char_is_word_separator(&mut self) {
109        self.value |= FLAG_CHAR_IS_WORD_SEPARATOR;
110    }
111
112    fn detailed_glyph_index(&self) -> usize {
113        self.value as usize
114    }
115
116    #[inline(always)]
117    fn is_simple(&self) -> bool {
118        self.has_flag(FLAG_IS_SIMPLE_GLYPH)
119    }
120
121    #[inline(always)]
122    fn has_flag(&self, flag: u32) -> bool {
123        (self.value & flag) != 0
124    }
125}
126
127#[derive(Clone, Deserialize, MallocSizeOf, Serialize)]
128pub struct DetailedGlyphEntry {
129    /// The id of the this glyph within the font.
130    id: u32,
131    /// The advance that this glyphs needs ie the distance between where this
132    /// glyph is painted and the next is painted.
133    advance: Au,
134    /// The physical offset that this glyph should be painted with.
135    offset: Option<Point2D<Au>>,
136    /// The number of character this glyph corresponds to in the original string.
137    /// This might be zero and this might be more than one.
138    character_count: usize,
139    /// Whether or not the originating character for this glyph was a word separator
140    is_word_separator: bool,
141}
142
143// This enum is a proxy that's provided to GlyphStore clients when iterating
144// through glyphs (either for a particular TextRun offset, or all glyphs).
145// Rather than eagerly assembling and copying glyph data, it only retrieves
146// values as they are needed from the GlyphStore, using provided offsets.
147#[derive(Clone, Copy)]
148pub enum GlyphInfo<'a> {
149    Simple(&'a GlyphEntry),
150    Detail(&'a DetailedGlyphEntry),
151}
152
153impl GlyphInfo<'_> {
154    pub fn id(self) -> GlyphId {
155        match self {
156            GlyphInfo::Simple(entry) => entry.id(),
157            GlyphInfo::Detail(entry) => entry.id,
158        }
159    }
160
161    #[inline(always)]
162    pub fn advance(self) -> Au {
163        match self {
164            GlyphInfo::Simple(entry) => entry.advance(),
165            GlyphInfo::Detail(entry) => entry.advance,
166        }
167    }
168
169    #[inline]
170    pub fn offset(self) -> Option<Point2D<Au>> {
171        match self {
172            GlyphInfo::Simple(..) => None,
173            GlyphInfo::Detail(entry) => entry.offset,
174        }
175    }
176
177    #[inline]
178    pub fn char_is_word_separator(self) -> bool {
179        match self {
180            GlyphInfo::Simple(entry) => entry.char_is_word_separator(),
181            GlyphInfo::Detail(entry) => entry.is_word_separator,
182        }
183    }
184
185    /// The number of characters that this glyph corresponds to. This may be more
186    /// than one when a single glyph is produced for multiple characters. This may
187    /// be zero when multiple glyphs are produced for a single character.
188    #[inline]
189    pub fn character_count(self) -> usize {
190        match self {
191            GlyphInfo::Simple(..) => 1,
192            GlyphInfo::Detail(entry) => entry.character_count,
193        }
194    }
195}
196
197/// Stores the glyph data belonging to a text run.
198///
199/// Simple glyphs are stored inline in the `entry_buffer`, detailed glyphs are
200/// stored as pointers into the `detail_store`.
201///
202/// ~~~ascii
203/// +- GlyphStore --------------------------------+
204/// |               +---+---+---+---+---+---+---+ |
205/// | entry_buffer: |   | s |   | s |   | s | s | |  d = detailed
206/// |               +-|-+---+-|-+---+-|-+---+---+ |  s = simple
207/// |                 |       |       |           |
208/// |                 |   +---+-------+           |
209/// |                 |   |                       |
210/// |               +-V-+-V-+                     |
211/// | detail_store: | d | d |                     |
212/// |               +---+---+                     |
213/// +---------------------------------------------+
214/// ~~~
215#[derive(Clone, Deserialize, MallocSizeOf, Serialize)]
216pub struct GlyphStore {
217    // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector
218    // optimization.
219    /// A collection of [`GlyphEntry`]s within the [`GlyphStore`]. Each [`GlyphEntry`]
220    /// maybe simple or detailed. When detailed, there will be a corresponding entry
221    /// in [`Self::detailed_glyphs`].
222    glyphs: Vec<GlyphEntry>,
223
224    /// A vector of glyphs that cannot fit within a single [`GlyphEntry`] or that
225    /// correspond to 0 or more than 1 character in the original string.
226    detailed_glyphs: Vec<DetailedGlyphEntry>,
227
228    /// A cache of the advance of the entire glyph store.
229    total_advance: Au,
230
231    /// The number of characters that correspond to the glyphs in this [`GlyphStore`]
232    total_characters: usize,
233
234    /// A cache of the number of word separators in the entire glyph store.
235    /// See <https://drafts.csswg.org/css-text/#word-separator>.
236    total_word_separators: usize,
237
238    /// Whether or not this glyph store contains only glyphs for whitespace.
239    is_whitespace: bool,
240
241    /// Whether or not this glyph store ends with whitespace glyphs.
242    /// Typically whitespace glyphs are placed in a separate store,
243    /// but that may not be the case with `white-space: break-spaces`.
244    ends_with_whitespace: bool,
245
246    /// Whether or not this glyph store contains only a single glyph for a single
247    /// preserved newline.
248    is_single_preserved_newline: bool,
249
250    /// Whether or not this [`GlyphStore`] has right-to-left text, which has implications
251    /// about the order of the glyphs in the store.
252    is_rtl: bool,
253}
254
255impl GlyphStore {
256    /// Initializes the glyph store with the given capacity, but doesn't actually add any glyphs.
257    ///
258    /// Use the `add_*` methods to store glyph data.
259    pub(crate) fn new(text: &str, length: usize, options: &ShapingOptions) -> Self {
260        Self {
261            glyphs: Vec::with_capacity(length),
262            detailed_glyphs: Default::default(),
263            total_advance: Au::zero(),
264            total_characters: 0,
265            total_word_separators: 0,
266            is_whitespace: options
267                .flags
268                .contains(ShapingFlags::IS_WHITESPACE_SHAPING_FLAG),
269            ends_with_whitespace: options
270                .flags
271                .contains(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG),
272            is_single_preserved_newline: text.len() == 1 && text.starts_with('\n'),
273            is_rtl: options.flags.contains(ShapingFlags::RTL_FLAG),
274        }
275    }
276
277    /// This constructor turns shaping output from HarfBuzz into a glyph run to be
278    /// used by layout. The idea here is that we add each glyph to the [`GlyphStore`]
279    /// and track to which characters from the original string each glyph
280    /// corresponds. HarfBuzz will either give us glyphs that correspond to
281    /// characters left-to-right or right-to-left. Each character can produce
282    /// multiple glyphs and multiple characters can produce one glyph. HarfBuzz just
283    /// guarantees that the resulting character offsets are in monotone order.
284    pub(crate) fn with_shaped_glyph_data(
285        font: &Font,
286        text: &str,
287        options: &ShapingOptions,
288        shaped_glyph_data: &impl GlyphShapingResult,
289    ) -> Self {
290        debug!(
291            "Shaped: '{text:?}: {:?}",
292            shaped_glyph_data.iter().collect::<Vec<_>>()
293        );
294
295        // Note: Even if we set the `RTL_FLAG` in the options, Harfbuzz may still
296        // give us shaped glyphs in left-to-right order. We need to look at the
297        // actual cluster indices in the shaped run.
298        let shaped_run_is_rtl = shaped_glyph_data.is_rtl();
299        let mut characters = if !shaped_run_is_rtl {
300            Either::Left(text.char_indices())
301        } else {
302            Either::Right(text.char_indices().rev())
303        };
304
305        let mut previous_character_offset = None;
306        let mut glyph_store = GlyphStore::new(text, shaped_glyph_data.len(), options);
307        for mut shaped_glyph in shaped_glyph_data.iter() {
308            // The glyph "cluster" (HarfBuzz terminology) is the byte offset in the string that
309            // this glyph corresponds to. More than one glyph can share a cluster.
310            let glyph_cluster = shaped_glyph.string_byte_offset;
311
312            if let Some(previous_character_offset) = previous_character_offset {
313                if previous_character_offset == glyph_cluster {
314                    glyph_store.add_glyph_for_current_character(&shaped_glyph, options);
315                    continue;
316                }
317            }
318
319            previous_character_offset = Some(glyph_cluster);
320            let mut characters_skipped = 0;
321            let Some(character) = characters.find_map(|(character_offset, character)| {
322                if glyph_cluster == character_offset {
323                    Some(character)
324                } else {
325                    characters_skipped += 1;
326                    None
327                }
328            }) else {
329                error!("HarfBuzz shaping results extended past character count");
330                return glyph_store;
331            };
332
333            shaped_glyph.adjust_for_character(character, options, font);
334
335            // If the we are working from the end of the string to the start and
336            // characters were skipped to produce this glyph, they belong to this
337            // glyph.
338            if shaped_run_is_rtl {
339                glyph_store.add_glyph(character, &shaped_glyph);
340            }
341
342            for _ in 0..characters_skipped {
343                glyph_store.extend_previous_glyph_by_character()
344            }
345
346            // If the we are working from the estart of the string to the end and
347            // characters were skipped to produce this glyph, they belong to the
348            // previous glyph.
349            if !shaped_run_is_rtl {
350                glyph_store.add_glyph(character, &shaped_glyph);
351            }
352        }
353
354        // Consume any remaining characters that belong to the more-recently added glyph.
355        for (_, _) in characters {
356            glyph_store.extend_previous_glyph_by_character();
357        }
358
359        glyph_store
360    }
361
362    #[inline]
363    pub fn total_advance(&self) -> Au {
364        self.total_advance
365    }
366
367    /// Return the number of glyphs stored in this [`GlyphStore`].
368    #[inline]
369    pub fn len(&self) -> usize {
370        self.glyphs.len()
371    }
372
373    /// Whether or not this [`GlyphStore`] has any glyphs.
374    #[inline]
375    pub fn is_empty(&self) -> bool {
376        self.glyphs.is_empty()
377    }
378
379    /// The number of characters (`char`) from the original string that produced this
380    /// [`GlyphStore`].
381    #[inline]
382    pub fn character_count(&self) -> usize {
383        self.total_characters
384    }
385
386    /// Whether or not this [`GlyphStore`] is entirely whitepsace.
387    #[inline]
388    pub fn is_whitespace(&self) -> bool {
389        self.is_whitespace
390    }
391
392    /// Whether or not this [`GlyphStore`] is a single preserved newline.
393    #[inline]
394    pub fn is_single_preserved_newline(&self) -> bool {
395        self.is_single_preserved_newline
396    }
397
398    /// Whether or not this [`GlyphStore`] ends with whitespace.
399    #[inline]
400    pub fn ends_with_whitespace(&self) -> bool {
401        self.ends_with_whitespace
402    }
403
404    /// The number of word separators in this [`GlyphStore`].
405    #[inline]
406    pub fn total_word_separators(&self) -> usize {
407        self.total_word_separators
408    }
409
410    /// The number of characters that were consumed to produce this [`GlyphStore`]. Some
411    /// characters correpond to more than one glyph and some glyphs correspond to more than
412    /// one character.
413    #[inline]
414    pub fn total_characters(&self) -> usize {
415        self.total_characters
416    }
417
418    /// Adds glyph that corresponds to a single character (as far we know) in the originating string.
419    #[inline]
420    pub(crate) fn add_glyph(&mut self, character: char, glyph: &ShapedGlyph) {
421        if !glyph.can_be_simple_glyph() {
422            self.add_detailed_glyph(glyph, Some(character), 1);
423            return;
424        }
425
426        let mut simple_glyph_entry = GlyphEntry::simple(glyph.glyph_id, glyph.advance);
427        if character_is_word_separator(character) {
428            self.total_word_separators += 1;
429            simple_glyph_entry.set_char_is_word_separator();
430        }
431
432        self.total_characters += 1;
433        self.total_advance += glyph.advance;
434        self.glyphs.push(simple_glyph_entry)
435    }
436
437    fn add_detailed_glyph(
438        &mut self,
439        shaped_glyph: &ShapedGlyph,
440        character: Option<char>,
441        character_count: usize,
442    ) {
443        let is_word_separator = character.is_some_and(character_is_word_separator);
444        if is_word_separator {
445            self.total_word_separators += 1;
446        }
447
448        self.total_characters += character_count;
449        self.total_advance += shaped_glyph.advance;
450        self.detailed_glyphs.push(DetailedGlyphEntry {
451            id: shaped_glyph.glyph_id,
452            advance: shaped_glyph.advance,
453            offset: shaped_glyph.offset,
454            character_count,
455            is_word_separator,
456        });
457        self.glyphs
458            .push(GlyphEntry::complex(self.detailed_glyphs.len() - 1));
459    }
460
461    fn extend_previous_glyph_by_character(&mut self) {
462        let detailed_glyph_index = self.ensure_last_glyph_is_detailed();
463        let detailed_glyph = self
464            .detailed_glyphs
465            .get_mut(detailed_glyph_index)
466            .expect("GlyphEntry should have valid index to detailed glyph");
467        detailed_glyph.character_count += 1;
468        self.total_characters += 1;
469    }
470
471    fn add_glyph_for_current_character(
472        &mut self,
473        shaped_glyph: &ShapedGlyph,
474        options: &ShapingOptions,
475    ) {
476        // If this glyph cluster is extending to include another glyph and we applied
477        // letter spacing to the previous glyph, ensure that the letter spacing is only
478        // applied to the last glyph in the cluster. Note that this is unconditionally
479        // converting the previous glyph to a detailed one because it's quite likely that
480        // the advance will not fit into the simple bitmask due to being negative.
481        if let Some(letter_spacing) = options.letter_spacing {
482            if letter_spacing != Au::zero() {
483                let last_glyph_index = self.ensure_last_glyph_is_detailed();
484                self.detailed_glyphs[last_glyph_index].advance -= letter_spacing;
485            }
486        }
487
488        // Add a detailed glyph entry for this new glyph, but it corresponds to a character
489        // we have already started processing. It should not contribute any character count.
490        self.add_detailed_glyph(shaped_glyph, None, 0);
491    }
492
493    /// If the last glyph added to this [`GlyphStore`] was a simple glyph, convert it to a
494    /// detailed one. In either case, return the index into [`Self::detailed_glyphs`] for
495    /// the most recently added glyph.
496    fn ensure_last_glyph_is_detailed(&mut self) -> usize {
497        let last_glyph = self
498            .glyphs
499            .last_mut()
500            .expect("Should never call this before any glyphs have been added.");
501        if !last_glyph.is_simple() {
502            return last_glyph.detailed_glyph_index();
503        }
504
505        self.detailed_glyphs.push(DetailedGlyphEntry {
506            id: last_glyph.id(),
507            advance: last_glyph.advance(),
508            offset: Default::default(),
509            character_count: 1,
510            is_word_separator: last_glyph.char_is_word_separator(),
511        });
512
513        let detailed_glyph_index = self.detailed_glyphs.len() - 1;
514        *last_glyph = GlyphEntry::complex(detailed_glyph_index);
515        detailed_glyph_index
516    }
517
518    pub fn glyphs(&self) -> impl Iterator<Item = GlyphInfo<'_>> + use<'_> {
519        self.glyphs.iter().map(|entry| {
520            if entry.is_simple() {
521                GlyphInfo::Simple(entry)
522            } else {
523                GlyphInfo::Detail(&self.detailed_glyphs[entry.detailed_glyph_index()])
524            }
525        })
526    }
527}
528
529impl ShapedGlyph {
530    fn can_be_simple_glyph(&self) -> bool {
531        is_simple_glyph_id(self.glyph_id) &&
532            is_simple_advance(self.advance) &&
533            self.offset
534                .is_none_or(|offset| offset == Default::default())
535    }
536
537    /// After shaping is complete, some glyphs need their spacing adjusted to take into
538    /// account `letter-spacing`, `word-spacing` and tabs.
539    ///
540    /// TODO: This should all likely move to layout. In particular, proper tab stops
541    /// are context sensitive and be based on the size of the space character in the
542    /// inline formatting context.
543    fn adjust_for_character(
544        &mut self,
545        character: char,
546        shaping_options: &ShapingOptions,
547        font: &Font,
548    ) {
549        // Treat tabs in pre-formatted text as a fixed number of spaces. The glyph id does
550        // not matter here as Servo doesn't render any glyphs for whitespace.
551        if character == '\t' {
552            self.glyph_id = font.glyph_index(' ').unwrap_or_default();
553            self.advance = font.metrics.space_advance * 8;
554        }
555
556        if let Some(letter_spacing) = shaping_options.letter_spacing {
557            self.advance += letter_spacing;
558        };
559
560        // CSS 2.1 ยง 16.4 states that "word spacing affects each space (U+0020) and non-breaking
561        // space (U+00A0) left in the text after the white space processing rules have been
562        // applied. The effect of the property on other word-separator characters is undefined."
563        // We elect to only space the two required code points.
564        if character == ' ' || character == '\u{a0}' {
565            // https://drafts.csswg.org/css-text-3/#word-spacing-property
566            self.advance += shaping_options.word_spacing;
567        }
568    }
569}
570
571fn character_is_word_separator(character: char) -> bool {
572    // This list is taken from the non-exhaustive list of word separator characters in
573    // the CSS Text Module Level 3 Spec:
574    // See https://drafts.csswg.org/css-text/#word-separator
575    let is_word_separator = matches!(
576        character,
577        ' ' |
578                '\u{00A0}' | // non-breaking space
579                '\u{1361}' | // Ethiopic word space
580                '\u{10100}' | // Aegean word separator
581                '\u{10101}' | // Aegean word separator
582                '\u{1039F}' | // Ugartic word divider
583                '\u{1091F}' // Phoenician word separator
584    );
585    is_word_separator
586}
587
588impl fmt::Debug for GlyphStore {
589    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
590        writeln!(formatter, "GlyphStore:")?;
591        for entry in self.glyphs.iter() {
592            if entry.is_simple() {
593                writeln!(
594                    formatter,
595                    "  simple id={:?} advance={:?}",
596                    entry.id(),
597                    entry.advance()
598                )?;
599                continue;
600            } else {
601                let detailed_glyph = &self.detailed_glyphs[entry.detailed_glyph_index()];
602                writeln!(
603                    formatter,
604                    "  detailed id={:?} advance={:?} characters={:?}",
605                    detailed_glyph.id, detailed_glyph.advance, detailed_glyph.character_count,
606                )?;
607            }
608        }
609        Ok(())
610    }
611}