layout/flow/inline/
text_run.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::mem;
6use std::ops::Range;
7
8use app_units::Au;
9use base::text::is_bidi_control;
10use fonts::{
11    FontContext, FontRef, GlyphRun, LAST_RESORT_GLYPH_ADVANCE, ShapingFlags, ShapingOptions,
12};
13use fonts_traits::ByteIndex;
14use log::warn;
15use malloc_size_of_derive::MallocSizeOf;
16use range::Range as ServoRange;
17use servo_arc::Arc;
18use style::computed_values::text_rendering::T as TextRendering;
19use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
20use style::computed_values::word_break::T as WordBreak;
21use style::properties::ComputedValues;
22use style::str::char_is_whitespace;
23use style::values::computed::OverflowWrap;
24use unicode_bidi::{BidiInfo, Level};
25use unicode_script::Script;
26use xi_unicode::linebreak_property;
27
28use super::line_breaker::LineBreaker;
29use super::{FontKeyAndMetrics, InlineFormattingContextLayout, SharedInlineStyles};
30use crate::fragment_tree::BaseFragmentInfo;
31
32// These constants are the xi-unicode line breaking classes that are defined in
33// `table.rs`. Unfortunately, they are only identified by number.
34pub(crate) const XI_LINE_BREAKING_CLASS_CM: u8 = 9;
35pub(crate) const XI_LINE_BREAKING_CLASS_GL: u8 = 12;
36pub(crate) const XI_LINE_BREAKING_CLASS_ZW: u8 = 28;
37pub(crate) const XI_LINE_BREAKING_CLASS_WJ: u8 = 30;
38pub(crate) const XI_LINE_BREAKING_CLASS_ZWJ: u8 = 42;
39
40// There are two reasons why we might want to break at the start:
41//
42//  1. The line breaker told us that a break was necessary between two separate
43//     instances of sending text to it.
44//  2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`.
45//
46// In both cases, we don't want to do this if the first character prevents a
47// soft wrap opportunity.
48#[derive(PartialEq)]
49enum SegmentStartSoftWrapPolicy {
50    Force,
51    FollowLinebreaker,
52}
53
54#[derive(Debug, MallocSizeOf)]
55pub(crate) struct TextRunSegment {
56    /// The index of this font in the parent [`super::InlineFormattingContext`]'s collection of font
57    /// information.
58    pub font_index: usize,
59
60    /// The [`Script`] of this segment.
61    pub script: Script,
62
63    /// The bidi Level of this segment.
64    pub bidi_level: Level,
65
66    /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
67    pub range: Range<usize>,
68
69    /// Whether or not the linebreaker said that we should allow a line break at the start of this
70    /// segment.
71    pub break_at_start: bool,
72
73    /// The shaped runs within this segment.
74    pub runs: Vec<GlyphRun>,
75}
76
77impl TextRunSegment {
78    fn new(font_index: usize, script: Script, bidi_level: Level, start_offset: usize) -> Self {
79        Self {
80            font_index,
81            script,
82            bidi_level,
83            range: start_offset..start_offset,
84            runs: Vec::new(),
85            break_at_start: false,
86        }
87    }
88
89    /// Update this segment if the Font and Script are compatible. The update will only
90    /// ever make the Script specific. Returns true if the new Font and Script are
91    /// compatible with this segment or false otherwise.
92    fn update_if_compatible(
93        &mut self,
94        new_font: &FontRef,
95        script: Script,
96        bidi_level: Level,
97        fonts: &[FontKeyAndMetrics],
98        font_context: &FontContext,
99    ) -> bool {
100        fn is_specific(script: Script) -> bool {
101            script != Script::Common && script != Script::Inherited
102        }
103
104        if bidi_level != self.bidi_level {
105            return false;
106        }
107
108        let current_font_key_and_metrics = &fonts[self.font_index];
109        if new_font.key(font_context) != current_font_key_and_metrics.key ||
110            new_font.descriptor.pt_size != current_font_key_and_metrics.pt_size
111        {
112            return false;
113        }
114
115        if !is_specific(self.script) && is_specific(script) {
116            self.script = script;
117        }
118        script == self.script || !is_specific(script)
119    }
120
121    fn layout_into_line_items(
122        &self,
123        text_run: &TextRun,
124        mut soft_wrap_policy: SegmentStartSoftWrapPolicy,
125        ifc: &mut InlineFormattingContextLayout,
126    ) {
127        if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker
128        {
129            soft_wrap_policy = SegmentStartSoftWrapPolicy::Force;
130        }
131
132        let mut byte_processed = ByteIndex(0);
133        for (run_index, run) in self.runs.iter().enumerate() {
134            ifc.possibly_flush_deferred_forced_line_break();
135
136            // If this whitespace forces a line break, queue up a hard line break the next time we
137            // see any content. We don't line break immediately, because we'd like to finish processing
138            // any ongoing inline boxes before ending the line.
139            if run.is_single_preserved_newline() {
140                byte_processed = byte_processed + run.range.length();
141                ifc.defer_forced_line_break();
142                continue;
143            }
144            // Break before each unbreakable run in this TextRun, except the first unless the
145            // linebreaker was set to break before the first run.
146            if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
147                ifc.process_soft_wrap_opportunity();
148            }
149            ifc.push_glyph_store_to_unbreakable_segment(
150                run.glyph_store.clone(),
151                text_run,
152                self.font_index,
153                self.bidi_level,
154                ServoRange::<ByteIndex>::new(
155                    byte_processed + ByteIndex(self.range.start as isize),
156                    ByteIndex(self.range.len() as isize) - byte_processed,
157                ),
158            );
159            byte_processed = byte_processed + run.range.length();
160        }
161    }
162
163    fn shape_and_push_range(
164        &mut self,
165        range: &Range<usize>,
166        formatting_context_text: &str,
167        segment_font: &FontRef,
168        options: &ShapingOptions,
169    ) {
170        self.runs.push(GlyphRun {
171            glyph_store: segment_font.shape_text(&formatting_context_text[range.clone()], options),
172            range: ServoRange::new(
173                ByteIndex(range.start as isize),
174                ByteIndex(range.len() as isize),
175            ),
176        });
177    }
178
179    /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing
180    /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered,
181    /// based on the style of the parent inline box.
182    fn shape_text(
183        &mut self,
184        parent_style: &ComputedValues,
185        formatting_context_text: &str,
186        linebreaker: &mut LineBreaker,
187        shaping_options: &ShapingOptions,
188        font: FontRef,
189    ) {
190        // Gather the linebreaks that apply to this segment from the inline formatting context's collection
191        // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
192        // piece of text is processed.
193        let range = self.range.clone();
194        let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
195        let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
196
197        self.runs.clear();
198        self.runs.reserve(linebreaks.len());
199        self.break_at_start = false;
200
201        let text_style = parent_style.get_inherited_text().clone();
202        let can_break_anywhere = text_style.word_break == WordBreak::BreakAll ||
203            text_style.overflow_wrap == OverflowWrap::Anywhere ||
204            text_style.overflow_wrap == OverflowWrap::BreakWord;
205
206        let mut last_slice = self.range.start..self.range.start;
207        for break_index in linebreak_iter {
208            if *break_index == self.range.start {
209                self.break_at_start = true;
210                continue;
211            }
212
213            let mut options = *shaping_options;
214
215            // Extend the slice to the next UAX#14 line break opportunity.
216            let mut slice = last_slice.end..*break_index;
217            let word = &formatting_context_text[slice.clone()];
218
219            // Split off any trailing whitespace into a separate glyph run.
220            let mut whitespace = slice.end..slice.end;
221            let mut rev_char_indices = word.char_indices().rev().peekable();
222
223            let mut ends_with_whitespace = false;
224            let ends_with_newline = rev_char_indices
225                .peek()
226                .is_some_and(|&(_, character)| character == '\n');
227            if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices
228                .take_while(|&(_, character)| char_is_whitespace(character))
229                .last()
230            {
231                ends_with_whitespace = true;
232                whitespace.start = slice.start + first_white_space_index;
233
234                // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there
235                // is a line break opportunity *after* every preserved space, but not before. This means
236                // that we should not split off the first whitespace, unless that white-space is a preserved
237                // newline.
238                //
239                // An exception to this is if the style tells us that we can break in the middle of words.
240                if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces &&
241                    first_white_space_character != '\n' &&
242                    !can_break_anywhere
243                {
244                    whitespace.start += first_white_space_character.len_utf8();
245                    options
246                        .flags
247                        .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG);
248                }
249
250                slice.end = whitespace.start;
251            }
252
253            // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
254            // TODO: This should only happen for CJK text.
255            if !ends_with_whitespace &&
256                *break_index != self.range.end &&
257                text_style.word_break == WordBreak::KeepAll &&
258                !can_break_anywhere
259            {
260                continue;
261            }
262
263            // Only advance the last slice if we are not going to try to expand the slice.
264            last_slice = slice.start..*break_index;
265
266            // Push the non-whitespace part of the range.
267            if !slice.is_empty() {
268                self.shape_and_push_range(&slice, formatting_context_text, &font, &options);
269            }
270
271            if whitespace.is_empty() {
272                continue;
273            }
274
275            options.flags.insert(
276                ShapingFlags::IS_WHITESPACE_SHAPING_FLAG |
277                    ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG,
278            );
279
280            // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity
281            // between each white space character in the white space that we trimmed off.
282            if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces {
283                let start_index = whitespace.start;
284                for (index, character) in formatting_context_text[whitespace].char_indices() {
285                    let index = start_index + index;
286                    self.shape_and_push_range(
287                        &(index..index + character.len_utf8()),
288                        formatting_context_text,
289                        &font,
290                        &options,
291                    );
292                }
293                continue;
294            }
295
296            // The breaker breaks after every newline, so either there is none,
297            // or there is exactly one at the very end. In the latter case,
298            // split it into a different run. That's because shaping considers
299            // a newline to have the same advance as a space, but during layout
300            // we want to treat the newline as having no advance.
301            if ends_with_newline && whitespace.len() > 1 {
302                self.shape_and_push_range(
303                    &(whitespace.start..whitespace.end - 1),
304                    formatting_context_text,
305                    &font,
306                    &options,
307                );
308                self.shape_and_push_range(
309                    &(whitespace.end - 1..whitespace.end),
310                    formatting_context_text,
311                    &font,
312                    &options,
313                );
314            } else {
315                self.shape_and_push_range(&whitespace, formatting_context_text, &font, &options);
316            }
317        }
318    }
319}
320
321/// A single [`TextRun`] for the box tree. These are all descendants of
322/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`].  During
323/// box tree construction, text is split into [`TextRun`]s based on their font, script,
324/// etc. When these are created text is already shaped.
325///
326/// <https://www.w3.org/TR/css-display-3/#css-text-run>
327#[derive(Debug, MallocSizeOf)]
328pub(crate) struct TextRun {
329    /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the
330    /// original text node in the DOM for the text.
331    pub base_fragment_info: BaseFragmentInfo,
332
333    /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is
334    /// shared so that incremental layout can simply update the parent element and
335    /// this [`TextRun`] will be updated automatically.
336    pub inline_styles: SharedInlineStyles,
337
338    /// The range of text in [`super::InlineFormattingContext::text_content`] of the
339    /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets.
340    pub text_range: Range<usize>,
341
342    /// The text of this [`TextRun`] with a font selected, broken into unbreakable
343    /// segments, and shaped.
344    pub shaped_text: Vec<TextRunSegment>,
345
346    /// The selection range for the DOM text node that originated this [`TextRun`]. This
347    /// comes directly from the DOM.
348    pub selection_range: Option<ServoRange<ByteIndex>>,
349}
350
351impl TextRun {
352    pub(crate) fn new(
353        base_fragment_info: BaseFragmentInfo,
354        inline_styles: SharedInlineStyles,
355        text_range: Range<usize>,
356        selection_range: Option<ServoRange<ByteIndex>>,
357    ) -> Self {
358        Self {
359            base_fragment_info,
360            inline_styles,
361            text_range,
362            shaped_text: Vec::new(),
363            selection_range,
364        }
365    }
366
367    pub(super) fn segment_and_shape(
368        &mut self,
369        formatting_context_text: &str,
370        font_context: &FontContext,
371        linebreaker: &mut LineBreaker,
372        font_cache: &mut Vec<FontKeyAndMetrics>,
373        bidi_info: &BidiInfo,
374    ) {
375        let parent_style = self.inline_styles.style.borrow().clone();
376        let inherited_text_style = parent_style.get_inherited_text().clone();
377        let letter_spacing = inherited_text_style
378            .letter_spacing
379            .0
380            .resolve(parent_style.clone_font().font_size.computed_size());
381        let letter_spacing = if letter_spacing.px() != 0. {
382            Some(app_units::Au::from(letter_spacing))
383        } else {
384            None
385        };
386
387        let mut flags = ShapingFlags::empty();
388        if letter_spacing.is_some() {
389            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
390        }
391        if inherited_text_style.text_rendering == TextRendering::Optimizespeed {
392            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
393            flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
394        }
395
396        let specified_word_spacing = &inherited_text_style.word_spacing;
397        let style_word_spacing: Option<Au> = specified_word_spacing.to_length().map(|l| l.into());
398
399        let segments = self
400            .segment_text_by_font(
401                formatting_context_text,
402                font_context,
403                font_cache,
404                bidi_info,
405                &parent_style,
406            )
407            .into_iter()
408            .map(|(mut segment, font)| {
409                let word_spacing = style_word_spacing.unwrap_or_else(|| {
410                    let space_width = font
411                        .glyph_index(' ')
412                        .map(|glyph_id| font.glyph_h_advance(glyph_id))
413                        .unwrap_or(LAST_RESORT_GLYPH_ADVANCE);
414                    specified_word_spacing.to_used_value(Au::from_f64_px(space_width))
415                });
416
417                let mut flags = flags;
418                if segment.bidi_level.is_rtl() {
419                    flags.insert(ShapingFlags::RTL_FLAG);
420                }
421                let shaping_options = ShapingOptions {
422                    letter_spacing,
423                    word_spacing,
424                    script: segment.script,
425                    flags,
426                };
427
428                segment.shape_text(
429                    &parent_style,
430                    formatting_context_text,
431                    linebreaker,
432                    &shaping_options,
433                    font,
434                );
435
436                segment
437            })
438            .collect();
439
440        let _ = std::mem::replace(&mut self.shaped_text, segments);
441    }
442
443    /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
444    /// font and script. Fonts may differ when glyphs are found in fallback fonts. Fonts are stored
445    /// in the `font_cache` which is a cache of all font keys and metrics used in this
446    /// [`super::InlineFormattingContext`].
447    fn segment_text_by_font(
448        &mut self,
449        formatting_context_text: &str,
450        font_context: &FontContext,
451        font_cache: &mut Vec<FontKeyAndMetrics>,
452        bidi_info: &BidiInfo,
453        parent_style: &Arc<ComputedValues>,
454    ) -> Vec<(TextRunSegment, FontRef)> {
455        let font_group = font_context.font_group(parent_style.clone_font());
456        let mut current: Option<(TextRunSegment, FontRef)> = None;
457        let mut results = Vec::new();
458
459        let text_run_text = &formatting_context_text[self.text_range.clone()];
460        let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
461        let mut next_byte_index = self.text_range.start;
462        for (character, next_character) in char_iterator {
463            let current_byte_index = next_byte_index;
464            next_byte_index += character.len_utf8();
465
466            if char_does_not_change_font(character) {
467                continue;
468            }
469
470            // If the script and BiDi level do not change, use the current font as the first fallback. This
471            // can potentially speed up fallback on long font lists or with uncommon scripts which might be
472            // at the bottom of the list.
473            let script = Script::from(character);
474            let bidi_level = bidi_info.levels[current_byte_index];
475            let current_font = current.as_ref().and_then(|(text_run_segment, font)| {
476                if text_run_segment.bidi_level == bidi_level && text_run_segment.script == script {
477                    Some(font.clone())
478                } else {
479                    None
480                }
481            });
482
483            let Some(font) = font_group.write().find_by_codepoint(
484                font_context,
485                character,
486                next_character,
487                current_font,
488            ) else {
489                continue;
490            };
491
492            // If the existing segment is compatible with the character, keep going.
493            if let Some(current) = current.as_mut() {
494                if current.0.update_if_compatible(
495                    &font,
496                    script,
497                    bidi_level,
498                    font_cache,
499                    font_context,
500                ) {
501                    continue;
502                }
503            }
504
505            let font_index = add_or_get_font(&font, font_cache, font_context);
506
507            // Add the new segment and finish the existing one, if we had one. If the first
508            // characters in the run were control characters we may be creating the first
509            // segment in the middle of the run (ie the start should be the start of this
510            // text run's text).
511            let start_byte_index = match current {
512                Some(_) => current_byte_index,
513                None => self.text_range.start,
514            };
515            let new = (
516                TextRunSegment::new(font_index, script, bidi_level, start_byte_index),
517                font,
518            );
519            if let Some(mut finished) = current.replace(new) {
520                // The end of the previous segment is the start of the next one.
521                finished.0.range.end = current_byte_index;
522                results.push(finished);
523            }
524        }
525
526        // Either we have a current segment or we only had control character and whitespace. In both
527        // of those cases, just use the first font.
528        if current.is_none() {
529            current = font_group.write().first(font_context).map(|font| {
530                let font_index = add_or_get_font(&font, font_cache, font_context);
531                (
532                    TextRunSegment::new(
533                        font_index,
534                        Script::Common,
535                        Level::ltr(),
536                        self.text_range.start,
537                    ),
538                    font,
539                )
540            })
541        }
542
543        // Extend the last segment to the end of the string and add it to the results.
544        if let Some(mut last_segment) = current.take() {
545            last_segment.0.range.end = self.text_range.end;
546            results.push(last_segment);
547        }
548
549        results
550    }
551
552    pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) {
553        if self.text_range.is_empty() {
554            return;
555        }
556
557        // If we are following replaced content, we should have a soft wrap opportunity, unless the
558        // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a
559        // character it should also override the LineBreaker's indication to break at the start.
560        let have_deferred_soft_wrap_opportunity =
561            mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false);
562        let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity {
563            true => SegmentStartSoftWrapPolicy::Force,
564            false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
565        };
566
567        for segment in self.shaped_text.iter() {
568            segment.layout_into_line_items(self, soft_wrap_policy, ifc);
569            soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
570        }
571    }
572}
573
574/// Whether or not this character should be able to change the font during segmentation.  Certain
575/// character are not rendered at all, so it doesn't matter what font we use to render them. They
576/// should just be added to the current segment.
577fn char_does_not_change_font(character: char) -> bool {
578    if character.is_control() {
579        return true;
580    }
581    if character == '\u{00A0}' {
582        return true;
583    }
584    if is_bidi_control(character) {
585        return false;
586    }
587
588    let class = linebreak_property(character);
589    class == XI_LINE_BREAKING_CLASS_CM ||
590        class == XI_LINE_BREAKING_CLASS_GL ||
591        class == XI_LINE_BREAKING_CLASS_ZW ||
592        class == XI_LINE_BREAKING_CLASS_WJ ||
593        class == XI_LINE_BREAKING_CLASS_ZWJ
594}
595
596pub(super) fn add_or_get_font(
597    font: &FontRef,
598    ifc_fonts: &mut Vec<FontKeyAndMetrics>,
599    font_context: &FontContext,
600) -> usize {
601    let font_instance_key = font.key(font_context);
602    for (index, ifc_font_info) in ifc_fonts.iter().enumerate() {
603        if ifc_font_info.key == font_instance_key &&
604            ifc_font_info.pt_size == font.descriptor.pt_size
605        {
606            return index;
607        }
608    }
609    ifc_fonts.push(FontKeyAndMetrics {
610        metrics: font.metrics.clone(),
611        key: font_instance_key,
612        pt_size: font.descriptor.pt_size,
613    });
614    ifc_fonts.len() - 1
615}
616
617pub(super) fn get_font_for_first_font_for_style(
618    style: &ComputedValues,
619    font_context: &FontContext,
620) -> Option<FontRef> {
621    let font = font_context
622        .font_group(style.clone_font())
623        .write()
624        .first(font_context);
625    if font.is_none() {
626        warn!("Could not find font for style: {:?}", style.clone_font());
627    }
628    font
629}
630pub(crate) struct TwoCharsAtATimeIterator<InputIterator> {
631    /// The input character iterator.
632    iterator: InputIterator,
633    /// The first character to produce in the next run of the iterator.
634    next_character: Option<char>,
635}
636
637impl<InputIterator> TwoCharsAtATimeIterator<InputIterator> {
638    fn new(iterator: InputIterator) -> Self {
639        Self {
640            iterator,
641            next_character: None,
642        }
643    }
644}
645
646impl<InputIterator> Iterator for TwoCharsAtATimeIterator<InputIterator>
647where
648    InputIterator: Iterator<Item = char>,
649{
650    type Item = (char, Option<char>);
651
652    fn next(&mut self) -> Option<Self::Item> {
653        // If the iterator isn't initialized do that now.
654        if self.next_character.is_none() {
655            self.next_character = self.iterator.next();
656        }
657        let character = self.next_character?;
658        self.next_character = self.iterator.next();
659        Some((character, self.next_character))
660    }
661}