layout/flow/inline/
text_run.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::mem;
6use std::ops::Range;
7
8use app_units::Au;
9use base::id::RenderingGroupId;
10use base::text::is_bidi_control;
11use fonts::{
12    FontContext, FontRef, GlyphRun, LAST_RESORT_GLYPH_ADVANCE, ShapingFlags, ShapingOptions,
13};
14use fonts_traits::ByteIndex;
15use log::warn;
16use malloc_size_of_derive::MallocSizeOf;
17use range::Range as ServoRange;
18use servo_arc::Arc;
19use style::computed_values::text_rendering::T as TextRendering;
20use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
21use style::computed_values::word_break::T as WordBreak;
22use style::properties::ComputedValues;
23use style::str::char_is_whitespace;
24use style::values::computed::OverflowWrap;
25use unicode_bidi::{BidiInfo, Level};
26use unicode_script::Script;
27use xi_unicode::linebreak_property;
28
29use super::line_breaker::LineBreaker;
30use super::{FontKeyAndMetrics, InlineFormattingContextLayout, SharedInlineStyles};
31use crate::fragment_tree::BaseFragmentInfo;
32
33// These constants are the xi-unicode line breaking classes that are defined in
34// `table.rs`. Unfortunately, they are only identified by number.
35pub(crate) const XI_LINE_BREAKING_CLASS_CM: u8 = 9;
36pub(crate) const XI_LINE_BREAKING_CLASS_GL: u8 = 12;
37pub(crate) const XI_LINE_BREAKING_CLASS_ZW: u8 = 28;
38pub(crate) const XI_LINE_BREAKING_CLASS_WJ: u8 = 30;
39pub(crate) const XI_LINE_BREAKING_CLASS_ZWJ: u8 = 42;
40
41// There are two reasons why we might want to break at the start:
42//
43//  1. The line breaker told us that a break was necessary between two separate
44//     instances of sending text to it.
45//  2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`.
46//
47// In both cases, we don't want to do this if the first character prevents a
48// soft wrap opportunity.
49#[derive(PartialEq)]
50enum SegmentStartSoftWrapPolicy {
51    Force,
52    FollowLinebreaker,
53}
54
55#[derive(Debug, MallocSizeOf)]
56pub(crate) struct TextRunSegment {
57    /// The index of this font in the parent [`super::InlineFormattingContext`]'s collection of font
58    /// information.
59    pub font_index: usize,
60
61    /// The [`Script`] of this segment.
62    pub script: Script,
63
64    /// The bidi Level of this segment.
65    pub bidi_level: Level,
66
67    /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
68    pub range: Range<usize>,
69
70    /// Whether or not the linebreaker said that we should allow a line break at the start of this
71    /// segment.
72    pub break_at_start: bool,
73
74    /// The shaped runs within this segment.
75    pub runs: Vec<GlyphRun>,
76}
77
78impl TextRunSegment {
79    fn new(font_index: usize, script: Script, bidi_level: Level, start_offset: usize) -> Self {
80        Self {
81            font_index,
82            script,
83            bidi_level,
84            range: start_offset..start_offset,
85            runs: Vec::new(),
86            break_at_start: false,
87        }
88    }
89
90    /// Update this segment if the Font and Script are compatible. The update will only
91    /// ever make the Script specific. Returns true if the new Font and Script are
92    /// compatible with this segment or false otherwise.
93    fn update_if_compatible(
94        &mut self,
95        new_font: &FontRef,
96        script: Script,
97        bidi_level: Level,
98        fonts: &[FontKeyAndMetrics],
99        font_context: &FontContext,
100        rendering_group_id: RenderingGroupId,
101    ) -> bool {
102        fn is_specific(script: Script) -> bool {
103            script != Script::Common && script != Script::Inherited
104        }
105
106        if bidi_level != self.bidi_level {
107            return false;
108        }
109
110        let current_font_key_and_metrics = &fonts[self.font_index];
111        if new_font.key(rendering_group_id, font_context) != current_font_key_and_metrics.key ||
112            new_font.descriptor.pt_size != current_font_key_and_metrics.pt_size
113        {
114            return false;
115        }
116
117        if !is_specific(self.script) && is_specific(script) {
118            self.script = script;
119        }
120        script == self.script || !is_specific(script)
121    }
122
123    fn layout_into_line_items(
124        &self,
125        text_run: &TextRun,
126        mut soft_wrap_policy: SegmentStartSoftWrapPolicy,
127        ifc: &mut InlineFormattingContextLayout,
128    ) {
129        if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker
130        {
131            soft_wrap_policy = SegmentStartSoftWrapPolicy::Force;
132        }
133
134        let mut byte_processed = ByteIndex(0);
135        for (run_index, run) in self.runs.iter().enumerate() {
136            ifc.possibly_flush_deferred_forced_line_break();
137
138            // If this whitespace forces a line break, queue up a hard line break the next time we
139            // see any content. We don't line break immediately, because we'd like to finish processing
140            // any ongoing inline boxes before ending the line.
141            if run.is_single_preserved_newline() {
142                byte_processed = byte_processed + run.range.length();
143                ifc.defer_forced_line_break();
144                continue;
145            }
146            // Break before each unbreakable run in this TextRun, except the first unless the
147            // linebreaker was set to break before the first run.
148            if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
149                ifc.process_soft_wrap_opportunity();
150            }
151            ifc.push_glyph_store_to_unbreakable_segment(
152                run.glyph_store.clone(),
153                text_run,
154                self.font_index,
155                self.bidi_level,
156                ServoRange::<ByteIndex>::new(
157                    byte_processed + ByteIndex(self.range.start as isize),
158                    ByteIndex(self.range.len() as isize) - byte_processed,
159                ),
160            );
161            byte_processed = byte_processed + run.range.length();
162        }
163    }
164
165    fn shape_and_push_range(
166        &mut self,
167        range: &Range<usize>,
168        formatting_context_text: &str,
169        segment_font: &FontRef,
170        options: &ShapingOptions,
171    ) {
172        self.runs.push(GlyphRun {
173            glyph_store: segment_font.shape_text(&formatting_context_text[range.clone()], options),
174            range: ServoRange::new(
175                ByteIndex(range.start as isize),
176                ByteIndex(range.len() as isize),
177            ),
178        });
179    }
180
181    /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing
182    /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered,
183    /// based on the style of the parent inline box.
184    fn shape_text(
185        &mut self,
186        parent_style: &ComputedValues,
187        formatting_context_text: &str,
188        linebreaker: &mut LineBreaker,
189        shaping_options: &ShapingOptions,
190        font: FontRef,
191    ) {
192        // Gather the linebreaks that apply to this segment from the inline formatting context's collection
193        // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
194        // piece of text is processed.
195        let range = self.range.clone();
196        let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
197        let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
198
199        self.runs.clear();
200        self.runs.reserve(linebreaks.len());
201        self.break_at_start = false;
202
203        let text_style = parent_style.get_inherited_text().clone();
204        let can_break_anywhere = text_style.word_break == WordBreak::BreakAll ||
205            text_style.overflow_wrap == OverflowWrap::Anywhere ||
206            text_style.overflow_wrap == OverflowWrap::BreakWord;
207
208        let mut last_slice = self.range.start..self.range.start;
209        for break_index in linebreak_iter {
210            if *break_index == self.range.start {
211                self.break_at_start = true;
212                continue;
213            }
214
215            let mut options = *shaping_options;
216
217            // Extend the slice to the next UAX#14 line break opportunity.
218            let mut slice = last_slice.end..*break_index;
219            let word = &formatting_context_text[slice.clone()];
220
221            // Split off any trailing whitespace into a separate glyph run.
222            let mut whitespace = slice.end..slice.end;
223            let mut rev_char_indices = word.char_indices().rev().peekable();
224
225            let mut ends_with_whitespace = false;
226            let ends_with_newline = rev_char_indices
227                .peek()
228                .is_some_and(|&(_, character)| character == '\n');
229            if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices
230                .take_while(|&(_, character)| char_is_whitespace(character))
231                .last()
232            {
233                ends_with_whitespace = true;
234                whitespace.start = slice.start + first_white_space_index;
235
236                // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there
237                // is a line break opportunity *after* every preserved space, but not before. This means
238                // that we should not split off the first whitespace, unless that white-space is a preserved
239                // newline.
240                //
241                // An exception to this is if the style tells us that we can break in the middle of words.
242                if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces &&
243                    first_white_space_character != '\n' &&
244                    !can_break_anywhere
245                {
246                    whitespace.start += first_white_space_character.len_utf8();
247                    options
248                        .flags
249                        .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG);
250                }
251
252                slice.end = whitespace.start;
253            }
254
255            // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
256            // TODO: This should only happen for CJK text.
257            if !ends_with_whitespace &&
258                *break_index != self.range.end &&
259                text_style.word_break == WordBreak::KeepAll &&
260                !can_break_anywhere
261            {
262                continue;
263            }
264
265            // Only advance the last slice if we are not going to try to expand the slice.
266            last_slice = slice.start..*break_index;
267
268            // Push the non-whitespace part of the range.
269            if !slice.is_empty() {
270                self.shape_and_push_range(&slice, formatting_context_text, &font, &options);
271            }
272
273            if whitespace.is_empty() {
274                continue;
275            }
276
277            options.flags.insert(
278                ShapingFlags::IS_WHITESPACE_SHAPING_FLAG |
279                    ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG,
280            );
281
282            // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity
283            // between each white space character in the white space that we trimmed off.
284            if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces {
285                let start_index = whitespace.start;
286                for (index, character) in formatting_context_text[whitespace].char_indices() {
287                    let index = start_index + index;
288                    self.shape_and_push_range(
289                        &(index..index + character.len_utf8()),
290                        formatting_context_text,
291                        &font,
292                        &options,
293                    );
294                }
295                continue;
296            }
297
298            // The breaker breaks after every newline, so either there is none,
299            // or there is exactly one at the very end. In the latter case,
300            // split it into a different run. That's because shaping considers
301            // a newline to have the same advance as a space, but during layout
302            // we want to treat the newline as having no advance.
303            if ends_with_newline && whitespace.len() > 1 {
304                self.shape_and_push_range(
305                    &(whitespace.start..whitespace.end - 1),
306                    formatting_context_text,
307                    &font,
308                    &options,
309                );
310                self.shape_and_push_range(
311                    &(whitespace.end - 1..whitespace.end),
312                    formatting_context_text,
313                    &font,
314                    &options,
315                );
316            } else {
317                self.shape_and_push_range(&whitespace, formatting_context_text, &font, &options);
318            }
319        }
320    }
321}
322
323/// A single [`TextRun`] for the box tree. These are all descendants of
324/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`].  During
325/// box tree construction, text is split into [`TextRun`]s based on their font, script,
326/// etc. When these are created text is already shaped.
327///
328/// <https://www.w3.org/TR/css-display-3/#css-text-run>
329#[derive(Debug, MallocSizeOf)]
330pub(crate) struct TextRun {
331    /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the
332    /// original text node in the DOM for the text.
333    pub base_fragment_info: BaseFragmentInfo,
334
335    /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is
336    /// shared so that incremental layout can simply update the parent element and
337    /// this [`TextRun`] will be updated automatically.
338    pub inline_styles: SharedInlineStyles,
339
340    /// The range of text in [`super::InlineFormattingContext::text_content`] of the
341    /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets.
342    pub text_range: Range<usize>,
343
344    /// The text of this [`TextRun`] with a font selected, broken into unbreakable
345    /// segments, and shaped.
346    pub shaped_text: Vec<TextRunSegment>,
347
348    /// The selection range for the DOM text node that originated this [`TextRun`]. This
349    /// comes directly from the DOM.
350    pub selection_range: Option<ServoRange<ByteIndex>>,
351}
352
353impl TextRun {
354    pub(crate) fn new(
355        base_fragment_info: BaseFragmentInfo,
356        inline_styles: SharedInlineStyles,
357        text_range: Range<usize>,
358        selection_range: Option<ServoRange<ByteIndex>>,
359    ) -> Self {
360        Self {
361            base_fragment_info,
362            inline_styles,
363            text_range,
364            shaped_text: Vec::new(),
365            selection_range,
366        }
367    }
368
369    pub(super) fn segment_and_shape(
370        &mut self,
371        formatting_context_text: &str,
372        font_context: &FontContext,
373        linebreaker: &mut LineBreaker,
374        font_cache: &mut Vec<FontKeyAndMetrics>,
375        bidi_info: &BidiInfo,
376        rendering_group_id: RenderingGroupId,
377    ) {
378        let parent_style = self.inline_styles.style.borrow().clone();
379        let inherited_text_style = parent_style.get_inherited_text().clone();
380        let letter_spacing = inherited_text_style
381            .letter_spacing
382            .0
383            .resolve(parent_style.clone_font().font_size.computed_size());
384        let letter_spacing = if letter_spacing.px() != 0. {
385            Some(app_units::Au::from(letter_spacing))
386        } else {
387            None
388        };
389
390        let mut flags = ShapingFlags::empty();
391        if letter_spacing.is_some() {
392            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
393        }
394        if inherited_text_style.text_rendering == TextRendering::Optimizespeed {
395            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
396            flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
397        }
398
399        let specified_word_spacing = &inherited_text_style.word_spacing;
400        let style_word_spacing: Option<Au> = specified_word_spacing.to_length().map(|l| l.into());
401
402        let segments = self
403            .segment_text_by_font(
404                formatting_context_text,
405                font_context,
406                font_cache,
407                bidi_info,
408                &parent_style,
409                rendering_group_id,
410            )
411            .into_iter()
412            .map(|(mut segment, font)| {
413                let word_spacing = style_word_spacing.unwrap_or_else(|| {
414                    let space_width = font
415                        .glyph_index(' ')
416                        .map(|glyph_id| font.glyph_h_advance(glyph_id))
417                        .unwrap_or(LAST_RESORT_GLYPH_ADVANCE);
418                    specified_word_spacing.to_used_value(Au::from_f64_px(space_width))
419                });
420
421                let mut flags = flags;
422                if segment.bidi_level.is_rtl() {
423                    flags.insert(ShapingFlags::RTL_FLAG);
424                }
425                let shaping_options = ShapingOptions {
426                    letter_spacing,
427                    word_spacing,
428                    script: segment.script,
429                    flags,
430                };
431
432                segment.shape_text(
433                    &parent_style,
434                    formatting_context_text,
435                    linebreaker,
436                    &shaping_options,
437                    font,
438                );
439
440                segment
441            })
442            .collect();
443
444        let _ = std::mem::replace(&mut self.shaped_text, segments);
445    }
446
447    /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
448    /// font and script. Fonts may differ when glyphs are found in fallback fonts. Fonts are stored
449    /// in the `font_cache` which is a cache of all font keys and metrics used in this
450    /// [`super::InlineFormattingContext`].
451    fn segment_text_by_font(
452        &mut self,
453        formatting_context_text: &str,
454        font_context: &FontContext,
455        font_cache: &mut Vec<FontKeyAndMetrics>,
456        bidi_info: &BidiInfo,
457        parent_style: &Arc<ComputedValues>,
458        rendering_group_id: RenderingGroupId,
459    ) -> Vec<(TextRunSegment, FontRef)> {
460        let font_group = font_context.font_group(parent_style.clone_font());
461        let mut current: Option<(TextRunSegment, FontRef)> = None;
462        let mut results = Vec::new();
463
464        let text_run_text = &formatting_context_text[self.text_range.clone()];
465        let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
466        let mut next_byte_index = self.text_range.start;
467        for (character, next_character) in char_iterator {
468            let current_byte_index = next_byte_index;
469            next_byte_index += character.len_utf8();
470
471            if char_does_not_change_font(character) {
472                continue;
473            }
474
475            // If the script and BiDi level do not change, use the current font as the first fallback. This
476            // can potentially speed up fallback on long font lists or with uncommon scripts which might be
477            // at the bottom of the list.
478            let script = Script::from(character);
479            let bidi_level = bidi_info.levels[current_byte_index];
480            let current_font = current.as_ref().and_then(|(text_run_segment, font)| {
481                if text_run_segment.bidi_level == bidi_level && text_run_segment.script == script {
482                    Some(font.clone())
483                } else {
484                    None
485                }
486            });
487
488            let lang = parent_style.get_font()._x_lang.clone();
489
490            let Some(font) = font_group.write().find_by_codepoint(
491                font_context,
492                character,
493                next_character,
494                current_font,
495                Some(lang.0.as_ref().to_string()),
496            ) else {
497                continue;
498            };
499
500            // If the existing segment is compatible with the character, keep going.
501            if let Some(current) = current.as_mut() {
502                if current.0.update_if_compatible(
503                    &font,
504                    script,
505                    bidi_level,
506                    font_cache,
507                    font_context,
508                    rendering_group_id,
509                ) {
510                    continue;
511                }
512            }
513
514            let font_index = add_or_get_font(&font, font_cache, font_context, rendering_group_id);
515
516            // Add the new segment and finish the existing one, if we had one. If the first
517            // characters in the run were control characters we may be creating the first
518            // segment in the middle of the run (ie the start should be the start of this
519            // text run's text).
520            let start_byte_index = match current {
521                Some(_) => current_byte_index,
522                None => self.text_range.start,
523            };
524            let new = (
525                TextRunSegment::new(font_index, script, bidi_level, start_byte_index),
526                font,
527            );
528            if let Some(mut finished) = current.replace(new) {
529                // The end of the previous segment is the start of the next one.
530                finished.0.range.end = current_byte_index;
531                results.push(finished);
532            }
533        }
534
535        // Either we have a current segment or we only had control character and whitespace. In both
536        // of those cases, just use the first font.
537        if current.is_none() {
538            current = font_group.write().first(font_context).map(|font| {
539                let font_index =
540                    add_or_get_font(&font, font_cache, font_context, rendering_group_id);
541                (
542                    TextRunSegment::new(
543                        font_index,
544                        Script::Common,
545                        Level::ltr(),
546                        self.text_range.start,
547                    ),
548                    font,
549                )
550            })
551        }
552
553        // Extend the last segment to the end of the string and add it to the results.
554        if let Some(mut last_segment) = current.take() {
555            last_segment.0.range.end = self.text_range.end;
556            results.push(last_segment);
557        }
558
559        results
560    }
561
562    pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) {
563        if self.text_range.is_empty() {
564            return;
565        }
566
567        // If we are following replaced content, we should have a soft wrap opportunity, unless the
568        // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a
569        // character it should also override the LineBreaker's indication to break at the start.
570        let have_deferred_soft_wrap_opportunity =
571            mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false);
572        let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity {
573            true => SegmentStartSoftWrapPolicy::Force,
574            false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
575        };
576
577        for segment in self.shaped_text.iter() {
578            segment.layout_into_line_items(self, soft_wrap_policy, ifc);
579            soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
580        }
581    }
582}
583
584/// Whether or not this character should be able to change the font during segmentation.  Certain
585/// character are not rendered at all, so it doesn't matter what font we use to render them. They
586/// should just be added to the current segment.
587fn char_does_not_change_font(character: char) -> bool {
588    if character.is_control() {
589        return true;
590    }
591    if character == '\u{00A0}' {
592        return true;
593    }
594    if is_bidi_control(character) {
595        return false;
596    }
597
598    let class = linebreak_property(character);
599    class == XI_LINE_BREAKING_CLASS_CM ||
600        class == XI_LINE_BREAKING_CLASS_GL ||
601        class == XI_LINE_BREAKING_CLASS_ZW ||
602        class == XI_LINE_BREAKING_CLASS_WJ ||
603        class == XI_LINE_BREAKING_CLASS_ZWJ
604}
605
606pub(super) fn add_or_get_font(
607    font: &FontRef,
608    ifc_fonts: &mut Vec<FontKeyAndMetrics>,
609    font_context: &FontContext,
610    rendering_group_id: RenderingGroupId,
611) -> usize {
612    let font_instance_key = font.key(rendering_group_id, font_context);
613    for (index, ifc_font_info) in ifc_fonts.iter().enumerate() {
614        if ifc_font_info.key == font_instance_key &&
615            ifc_font_info.pt_size == font.descriptor.pt_size
616        {
617            return index;
618        }
619    }
620    ifc_fonts.push(FontKeyAndMetrics {
621        metrics: font.metrics.clone(),
622        key: font_instance_key,
623        pt_size: font.descriptor.pt_size,
624    });
625    ifc_fonts.len() - 1
626}
627
628pub(super) fn get_font_for_first_font_for_style(
629    style: &ComputedValues,
630    font_context: &FontContext,
631) -> Option<FontRef> {
632    let font = font_context
633        .font_group(style.clone_font())
634        .write()
635        .first(font_context);
636    if font.is_none() {
637        warn!("Could not find font for style: {:?}", style.clone_font());
638    }
639    font
640}
641pub(crate) struct TwoCharsAtATimeIterator<InputIterator> {
642    /// The input character iterator.
643    iterator: InputIterator,
644    /// The first character to produce in the next run of the iterator.
645    next_character: Option<char>,
646}
647
648impl<InputIterator> TwoCharsAtATimeIterator<InputIterator> {
649    fn new(iterator: InputIterator) -> Self {
650        Self {
651            iterator,
652            next_character: None,
653        }
654    }
655}
656
657impl<InputIterator> Iterator for TwoCharsAtATimeIterator<InputIterator>
658where
659    InputIterator: Iterator<Item = char>,
660{
661    type Item = (char, Option<char>);
662
663    fn next(&mut self) -> Option<Self::Item> {
664        // If the iterator isn't initialized do that now.
665        if self.next_character.is_none() {
666            self.next_character = self.iterator.next();
667        }
668        let character = self.next_character?;
669        self.next_character = self.iterator.next();
670        Some((character, self.next_character))
671    }
672}