fonts/glyph.rs
1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::fmt;
6use std::vec::Vec;
7
8use app_units::Au;
9use euclid::default::Point2D;
10use euclid::num::Zero;
11use itertools::Either;
12use log::{debug, error};
13use malloc_size_of_derive::MallocSizeOf;
14use serde::{Deserialize, Serialize};
15
16use crate::{Font, GlyphShapingResult, ShapedGlyph, ShapingFlags, ShapingOptions};
17
18/// GlyphEntry is a port of Gecko's CompressedGlyph scheme for storing glyph data compactly.
19///
20/// In the common case (reasonable glyph advances, no offsets from the font em-box, and one glyph
21/// per character), we pack glyph advance, glyph id, and some flags into a single u32.
22///
23/// In the uncommon case (multiple glyphs per unicode character, large glyph index/advance, or
24/// glyph offsets), we pack the glyph count into GlyphEntry, and store the other glyph information
25/// in DetailedGlyphStore.
26#[derive(Clone, Copy, Debug, Deserialize, MallocSizeOf, PartialEq, Serialize)]
27pub struct GlyphEntry {
28 value: u32,
29}
30
31impl GlyphEntry {
32 fn new(value: u32) -> GlyphEntry {
33 GlyphEntry { value }
34 }
35
36 // Creates a GlyphEntry for the common case
37 fn simple(id: GlyphId, advance: Au) -> GlyphEntry {
38 assert!(is_simple_glyph_id(id));
39 assert!(is_simple_advance(advance));
40
41 let id_mask = id;
42 let Au(advance) = advance;
43 let advance_mask = (advance as u32) << GLYPH_ADVANCE_SHIFT;
44
45 GlyphEntry::new(id_mask | advance_mask | FLAG_IS_SIMPLE_GLYPH)
46 }
47
48 fn complex(detailed_glyph_index: usize) -> GlyphEntry {
49 assert!(detailed_glyph_index as u32 <= u32::MAX >> 1);
50 GlyphEntry::new(detailed_glyph_index as u32)
51 }
52}
53
54/// The id of a particular glyph within a font
55pub(crate) type GlyphId = u32;
56
57// TODO: make this more type-safe.
58
59const FLAG_CHAR_IS_WORD_SEPARATOR: u32 = 0x40000000;
60const FLAG_IS_SIMPLE_GLYPH: u32 = 0x80000000;
61
62// glyph advance; in Au's.
63const GLYPH_ADVANCE_MASK: u32 = 0x3FFF0000;
64const GLYPH_ADVANCE_SHIFT: u32 = 16;
65const GLYPH_ID_MASK: u32 = 0x0000FFFF;
66
67// Non-simple glyphs (more than one glyph per char; missing glyph,
68// newline, tab, large advance, or nonzero x/y offsets) may have one
69// or more detailed glyphs associated with them. They are stored in a
70// side array so that there is a 1:1 mapping of GlyphEntry to
71// unicode char.
72
73fn is_simple_glyph_id(id: GlyphId) -> bool {
74 (id & GLYPH_ID_MASK) == id
75}
76
77fn is_simple_advance(advance: Au) -> bool {
78 advance >= Au::zero() && {
79 let unsigned_au = advance.0 as u32;
80 (unsigned_au & (GLYPH_ADVANCE_MASK >> GLYPH_ADVANCE_SHIFT)) == unsigned_au
81 }
82}
83
84// Getters and setters for GlyphEntry. Setter methods are functional,
85// because GlyphEntry is immutable and only a u32 in size.
86impl GlyphEntry {
87 #[inline(always)]
88 fn advance(&self) -> Au {
89 Au::new(((self.value & GLYPH_ADVANCE_MASK) >> GLYPH_ADVANCE_SHIFT) as i32)
90 }
91
92 #[inline]
93 fn id(&self) -> GlyphId {
94 self.value & GLYPH_ID_MASK
95 }
96
97 /// True if the original character was a word separator. These include spaces
98 /// (U+0020), non-breaking spaces (U+00A0), and a few other characters
99 /// non-exhaustively listed in the specification. Other characters may map to the same
100 /// glyphs, but this function does not take mapping into account.
101 ///
102 /// See <https://drafts.csswg.org/css-text/#word-separator>.
103 fn char_is_word_separator(&self) -> bool {
104 self.has_flag(FLAG_CHAR_IS_WORD_SEPARATOR)
105 }
106
107 #[inline(always)]
108 fn set_char_is_word_separator(&mut self) {
109 self.value |= FLAG_CHAR_IS_WORD_SEPARATOR;
110 }
111
112 fn detailed_glyph_index(&self) -> usize {
113 self.value as usize
114 }
115
116 #[inline(always)]
117 fn is_simple(&self) -> bool {
118 self.has_flag(FLAG_IS_SIMPLE_GLYPH)
119 }
120
121 #[inline(always)]
122 fn has_flag(&self, flag: u32) -> bool {
123 (self.value & flag) != 0
124 }
125}
126
127#[derive(Clone, Deserialize, MallocSizeOf, Serialize)]
128pub struct DetailedGlyphEntry {
129 /// The id of the this glyph within the font.
130 id: u32,
131 /// The advance that this glyphs needs ie the distance between where this
132 /// glyph is painted and the next is painted.
133 advance: Au,
134 /// The physical offset that this glyph should be painted with.
135 offset: Option<Point2D<Au>>,
136 /// The number of character this glyph corresponds to in the original string.
137 /// This might be zero and this might be more than one.
138 character_count: usize,
139 /// Whether or not the originating character for this glyph was a word separator
140 is_word_separator: bool,
141}
142
143// This enum is a proxy that's provided to GlyphStore clients when iterating
144// through glyphs (either for a particular TextRun offset, or all glyphs).
145// Rather than eagerly assembling and copying glyph data, it only retrieves
146// values as they are needed from the GlyphStore, using provided offsets.
147#[derive(Clone, Copy)]
148pub enum GlyphInfo<'a> {
149 Simple(&'a GlyphEntry),
150 Detail(&'a DetailedGlyphEntry),
151}
152
153impl GlyphInfo<'_> {
154 pub fn id(self) -> GlyphId {
155 match self {
156 GlyphInfo::Simple(entry) => entry.id(),
157 GlyphInfo::Detail(entry) => entry.id,
158 }
159 }
160
161 #[inline(always)]
162 pub fn advance(self) -> Au {
163 match self {
164 GlyphInfo::Simple(entry) => entry.advance(),
165 GlyphInfo::Detail(entry) => entry.advance,
166 }
167 }
168
169 #[inline]
170 pub fn offset(self) -> Option<Point2D<Au>> {
171 match self {
172 GlyphInfo::Simple(..) => None,
173 GlyphInfo::Detail(entry) => entry.offset,
174 }
175 }
176
177 #[inline]
178 pub fn char_is_word_separator(self) -> bool {
179 match self {
180 GlyphInfo::Simple(entry) => entry.char_is_word_separator(),
181 GlyphInfo::Detail(entry) => entry.is_word_separator,
182 }
183 }
184
185 /// The number of characters that this glyph corresponds to. This may be more
186 /// than one when a single glyph is produced for multiple characters. This may
187 /// be zero when multiple glyphs are produced for a single character.
188 #[inline]
189 pub fn character_count(self) -> usize {
190 match self {
191 GlyphInfo::Simple(..) => 1,
192 GlyphInfo::Detail(entry) => entry.character_count,
193 }
194 }
195}
196
197/// Stores the glyph data belonging to a text run.
198///
199/// Simple glyphs are stored inline in the `entry_buffer`, detailed glyphs are
200/// stored as pointers into the `detail_store`.
201///
202/// ~~~ascii
203/// +- GlyphStore --------------------------------+
204/// | +---+---+---+---+---+---+---+ |
205/// | entry_buffer: | | s | | s | | s | s | | d = detailed
206/// | +-|-+---+-|-+---+-|-+---+---+ | s = simple
207/// | | | | |
208/// | | +---+-------+ |
209/// | | | |
210/// | +-V-+-V-+ |
211/// | detail_store: | d | d | |
212/// | +---+---+ |
213/// +---------------------------------------------+
214/// ~~~
215#[derive(Clone, Deserialize, MallocSizeOf, Serialize)]
216pub struct GlyphStore {
217 // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector
218 // optimization.
219 /// A collection of [`GlyphEntry`]s within the [`GlyphStore`]. Each [`GlyphEntry`]
220 /// maybe simple or detailed. When detailed, there will be a corresponding entry
221 /// in [`Self::detailed_glyphs`].
222 glyphs: Vec<GlyphEntry>,
223
224 /// A vector of glyphs that cannot fit within a single [`GlyphEntry`] or that
225 /// correspond to 0 or more than 1 character in the original string.
226 detailed_glyphs: Vec<DetailedGlyphEntry>,
227
228 /// A cache of the advance of the entire glyph store.
229 total_advance: Au,
230
231 /// The number of characters that correspond to the glyphs in this [`GlyphStore`]
232 total_characters: usize,
233
234 /// A cache of the number of word separators in the entire glyph store.
235 /// See <https://drafts.csswg.org/css-text/#word-separator>.
236 total_word_separators: usize,
237
238 /// Whether or not this glyph store contains only glyphs for whitespace.
239 is_whitespace: bool,
240
241 /// Whether or not this glyph store ends with whitespace glyphs.
242 /// Typically whitespace glyphs are placed in a separate store,
243 /// but that may not be the case with `white-space: break-spaces`.
244 ends_with_whitespace: bool,
245
246 /// Whether or not this glyph store contains only a single glyph for a single
247 /// preserved newline.
248 is_single_preserved_newline: bool,
249
250 /// Whether or not this [`GlyphStore`] has right-to-left text, which has implications
251 /// about the order of the glyphs in the store.
252 is_rtl: bool,
253}
254
255impl GlyphStore {
256 /// Initializes the glyph store with the given capacity, but doesn't actually add any glyphs.
257 ///
258 /// Use the `add_*` methods to store glyph data.
259 pub(crate) fn new(text: &str, length: usize, options: &ShapingOptions) -> Self {
260 Self {
261 glyphs: Vec::with_capacity(length),
262 detailed_glyphs: Default::default(),
263 total_advance: Au::zero(),
264 total_characters: 0,
265 total_word_separators: 0,
266 is_whitespace: options
267 .flags
268 .contains(ShapingFlags::IS_WHITESPACE_SHAPING_FLAG),
269 ends_with_whitespace: options
270 .flags
271 .contains(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG),
272 is_single_preserved_newline: text.len() == 1 && text.starts_with('\n'),
273 is_rtl: options.flags.contains(ShapingFlags::RTL_FLAG),
274 }
275 }
276
277 /// This constructor turns shaping output from HarfBuzz into a glyph run to be
278 /// used by layout. The idea here is that we add each glyph to the [`GlyphStore`]
279 /// and track to which characters from the original string each glyph
280 /// corresponds. HarfBuzz will either give us glyphs that correspond to
281 /// characters left-to-right or right-to-left. Each character can produce
282 /// multiple glyphs and multiple characters can produce one glyph. HarfBuzz just
283 /// guarantees that the resulting character offsets are in monotone order.
284 pub(crate) fn with_shaped_glyph_data(
285 font: &Font,
286 text: &str,
287 options: &ShapingOptions,
288 shaped_glyph_data: &impl GlyphShapingResult,
289 ) -> Self {
290 debug!(
291 "Shaped: '{text:?}: {:?}",
292 shaped_glyph_data.iter().collect::<Vec<_>>()
293 );
294
295 // Note: Even if we set the `RTL_FLAG` in the options, Harfbuzz may still
296 // give us shaped glyphs in left-to-right order. We need to look at the
297 // actual cluster indices in the shaped run.
298 let shaped_run_is_rtl = shaped_glyph_data.is_rtl();
299 let mut characters = if !shaped_run_is_rtl {
300 Either::Left(text.char_indices())
301 } else {
302 Either::Right(text.char_indices().rev())
303 };
304
305 let mut previous_character_offset = None;
306 let mut glyph_store = GlyphStore::new(text, shaped_glyph_data.len(), options);
307 for mut shaped_glyph in shaped_glyph_data.iter() {
308 // The glyph "cluster" (HarfBuzz terminology) is the byte offset in the string that
309 // this glyph corresponds to. More than one glyph can share a cluster.
310 let glyph_cluster = shaped_glyph.string_byte_offset;
311
312 if let Some(previous_character_offset) = previous_character_offset {
313 if previous_character_offset == glyph_cluster {
314 glyph_store.add_glyph_for_current_character(&shaped_glyph, options);
315 continue;
316 }
317 }
318
319 previous_character_offset = Some(glyph_cluster);
320 let mut characters_skipped = 0;
321 let Some(character) = characters.find_map(|(character_offset, character)| {
322 if glyph_cluster == character_offset {
323 Some(character)
324 } else {
325 characters_skipped += 1;
326 None
327 }
328 }) else {
329 error!("HarfBuzz shaping results extended past character count");
330 return glyph_store;
331 };
332
333 shaped_glyph.adjust_for_character(character, options, font);
334
335 // If the we are working from the end of the string to the start and
336 // characters were skipped to produce this glyph, they belong to this
337 // glyph.
338 if shaped_run_is_rtl {
339 glyph_store.add_glyph(character, &shaped_glyph);
340 }
341
342 for _ in 0..characters_skipped {
343 glyph_store.extend_previous_glyph_by_character()
344 }
345
346 // If the we are working from the estart of the string to the end and
347 // characters were skipped to produce this glyph, they belong to the
348 // previous glyph.
349 if !shaped_run_is_rtl {
350 glyph_store.add_glyph(character, &shaped_glyph);
351 }
352 }
353
354 // Consume any remaining characters that belong to the more-recently added glyph.
355 for (_, _) in characters {
356 glyph_store.extend_previous_glyph_by_character();
357 }
358
359 glyph_store
360 }
361
362 #[inline]
363 pub fn total_advance(&self) -> Au {
364 self.total_advance
365 }
366
367 /// Return the number of glyphs stored in this [`GlyphStore`].
368 #[inline]
369 pub fn len(&self) -> usize {
370 self.glyphs.len()
371 }
372
373 /// Whether or not this [`GlyphStore`] has any glyphs.
374 #[inline]
375 pub fn is_empty(&self) -> bool {
376 self.glyphs.is_empty()
377 }
378
379 /// The number of characters (`char`) from the original string that produced this
380 /// [`GlyphStore`].
381 #[inline]
382 pub fn character_count(&self) -> usize {
383 self.total_characters
384 }
385
386 /// Whether or not this [`GlyphStore`] is entirely whitepsace.
387 #[inline]
388 pub fn is_whitespace(&self) -> bool {
389 self.is_whitespace
390 }
391
392 /// Whether or not this [`GlyphStore`] is a single preserved newline.
393 #[inline]
394 pub fn is_single_preserved_newline(&self) -> bool {
395 self.is_single_preserved_newline
396 }
397
398 /// Whether or not this [`GlyphStore`] ends with whitespace.
399 #[inline]
400 pub fn ends_with_whitespace(&self) -> bool {
401 self.ends_with_whitespace
402 }
403
404 /// The number of word separators in this [`GlyphStore`].
405 #[inline]
406 pub fn total_word_separators(&self) -> usize {
407 self.total_word_separators
408 }
409
410 /// The number of characters that were consumed to produce this [`GlyphStore`]. Some
411 /// characters correpond to more than one glyph and some glyphs correspond to more than
412 /// one character.
413 #[inline]
414 pub fn total_characters(&self) -> usize {
415 self.total_characters
416 }
417
418 /// Adds glyph that corresponds to a single character (as far we know) in the originating string.
419 #[inline]
420 pub(crate) fn add_glyph(&mut self, character: char, glyph: &ShapedGlyph) {
421 if !glyph.can_be_simple_glyph() {
422 self.add_detailed_glyph(glyph, Some(character), 1);
423 return;
424 }
425
426 let mut simple_glyph_entry = GlyphEntry::simple(glyph.glyph_id, glyph.advance);
427 if character_is_word_separator(character) {
428 self.total_word_separators += 1;
429 simple_glyph_entry.set_char_is_word_separator();
430 }
431
432 self.total_characters += 1;
433 self.total_advance += glyph.advance;
434 self.glyphs.push(simple_glyph_entry)
435 }
436
437 fn add_detailed_glyph(
438 &mut self,
439 shaped_glyph: &ShapedGlyph,
440 character: Option<char>,
441 character_count: usize,
442 ) {
443 let is_word_separator = character.is_some_and(character_is_word_separator);
444 if is_word_separator {
445 self.total_word_separators += 1;
446 }
447
448 self.total_characters += character_count;
449 self.total_advance += shaped_glyph.advance;
450 self.detailed_glyphs.push(DetailedGlyphEntry {
451 id: shaped_glyph.glyph_id,
452 advance: shaped_glyph.advance,
453 offset: shaped_glyph.offset,
454 character_count,
455 is_word_separator,
456 });
457 self.glyphs
458 .push(GlyphEntry::complex(self.detailed_glyphs.len() - 1));
459 }
460
461 fn extend_previous_glyph_by_character(&mut self) {
462 let detailed_glyph_index = self.ensure_last_glyph_is_detailed();
463 let detailed_glyph = self
464 .detailed_glyphs
465 .get_mut(detailed_glyph_index)
466 .expect("GlyphEntry should have valid index to detailed glyph");
467 detailed_glyph.character_count += 1;
468 self.total_characters += 1;
469 }
470
471 fn add_glyph_for_current_character(
472 &mut self,
473 shaped_glyph: &ShapedGlyph,
474 options: &ShapingOptions,
475 ) {
476 // If this glyph cluster is extending to include another glyph and we applied
477 // letter spacing to the previous glyph, ensure that the letter spacing is only
478 // applied to the last glyph in the cluster. Note that this is unconditionally
479 // converting the previous glyph to a detailed one because it's quite likely that
480 // the advance will not fit into the simple bitmask due to being negative.
481 if let Some(letter_spacing) = options.letter_spacing {
482 if letter_spacing != Au::zero() {
483 let last_glyph_index = self.ensure_last_glyph_is_detailed();
484 self.detailed_glyphs[last_glyph_index].advance -= letter_spacing;
485 }
486 }
487
488 // Add a detailed glyph entry for this new glyph, but it corresponds to a character
489 // we have already started processing. It should not contribute any character count.
490 self.add_detailed_glyph(shaped_glyph, None, 0);
491 }
492
493 /// If the last glyph added to this [`GlyphStore`] was a simple glyph, convert it to a
494 /// detailed one. In either case, return the index into [`Self::detailed_glyphs`] for
495 /// the most recently added glyph.
496 fn ensure_last_glyph_is_detailed(&mut self) -> usize {
497 let last_glyph = self
498 .glyphs
499 .last_mut()
500 .expect("Should never call this before any glyphs have been added.");
501 if !last_glyph.is_simple() {
502 return last_glyph.detailed_glyph_index();
503 }
504
505 self.detailed_glyphs.push(DetailedGlyphEntry {
506 id: last_glyph.id(),
507 advance: last_glyph.advance(),
508 offset: Default::default(),
509 character_count: 1,
510 is_word_separator: last_glyph.char_is_word_separator(),
511 });
512
513 let detailed_glyph_index = self.detailed_glyphs.len() - 1;
514 *last_glyph = GlyphEntry::complex(detailed_glyph_index);
515 detailed_glyph_index
516 }
517
518 pub fn glyphs(&self) -> impl Iterator<Item = GlyphInfo<'_>> + use<'_> {
519 self.glyphs.iter().map(|entry| {
520 if entry.is_simple() {
521 GlyphInfo::Simple(entry)
522 } else {
523 GlyphInfo::Detail(&self.detailed_glyphs[entry.detailed_glyph_index()])
524 }
525 })
526 }
527}
528
529impl ShapedGlyph {
530 fn can_be_simple_glyph(&self) -> bool {
531 is_simple_glyph_id(self.glyph_id) &&
532 is_simple_advance(self.advance) &&
533 self.offset
534 .is_none_or(|offset| offset == Default::default())
535 }
536
537 /// After shaping is complete, some glyphs need their spacing adjusted to take into
538 /// account `letter-spacing`, `word-spacing` and tabs.
539 ///
540 /// TODO: This should all likely move to layout. In particular, proper tab stops
541 /// are context sensitive and be based on the size of the space character in the
542 /// inline formatting context.
543 fn adjust_for_character(
544 &mut self,
545 character: char,
546 shaping_options: &ShapingOptions,
547 font: &Font,
548 ) {
549 // Treat tabs in pre-formatted text as a fixed number of spaces. The glyph id does
550 // not matter here as Servo doesn't render any glyphs for whitespace.
551 if character == '\t' {
552 self.glyph_id = font.glyph_index(' ').unwrap_or_default();
553 self.advance = font.metrics.space_advance * 8;
554 }
555
556 if let Some(letter_spacing) = shaping_options.letter_spacing {
557 self.advance += letter_spacing;
558 };
559
560 // CSS 2.1 ยง 16.4 states that "word spacing affects each space (U+0020) and non-breaking
561 // space (U+00A0) left in the text after the white space processing rules have been
562 // applied. The effect of the property on other word-separator characters is undefined."
563 // We elect to only space the two required code points.
564 if character == ' ' || character == '\u{a0}' {
565 // https://drafts.csswg.org/css-text-3/#word-spacing-property
566 self.advance += shaping_options.word_spacing;
567 }
568 }
569}
570
571fn character_is_word_separator(character: char) -> bool {
572 // This list is taken from the non-exhaustive list of word separator characters in
573 // the CSS Text Module Level 3 Spec:
574 // See https://drafts.csswg.org/css-text/#word-separator
575 let is_word_separator = matches!(
576 character,
577 ' ' |
578 '\u{00A0}' | // non-breaking space
579 '\u{1361}' | // Ethiopic word space
580 '\u{10100}' | // Aegean word separator
581 '\u{10101}' | // Aegean word separator
582 '\u{1039F}' | // Ugartic word divider
583 '\u{1091F}' // Phoenician word separator
584 );
585 is_word_separator
586}
587
588impl fmt::Debug for GlyphStore {
589 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
590 writeln!(formatter, "GlyphStore:")?;
591 for entry in self.glyphs.iter() {
592 if entry.is_simple() {
593 writeln!(
594 formatter,
595 " simple id={:?} advance={:?}",
596 entry.id(),
597 entry.advance()
598 )?;
599 continue;
600 } else {
601 let detailed_glyph = &self.detailed_glyphs[entry.detailed_glyph_index()];
602 writeln!(
603 formatter,
604 " detailed id={:?} advance={:?} characters={:?}",
605 detailed_glyph.id, detailed_glyph.advance, detailed_glyph.character_count,
606 )?;
607 }
608 }
609 Ok(())
610 }
611}