layout/flow/inline/
text_run.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::mem;
6use std::ops::Range;
7
8use app_units::Au;
9use base::text::is_bidi_control;
10use fonts::{
11    FontContext, FontRef, GlyphRun, LAST_RESORT_GLYPH_ADVANCE, ShapingFlags, ShapingOptions,
12};
13use fonts_traits::ByteIndex;
14use log::warn;
15use malloc_size_of_derive::MallocSizeOf;
16use range::Range as ServoRange;
17use servo_arc::Arc;
18use style::computed_values::text_rendering::T as TextRendering;
19use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
20use style::computed_values::word_break::T as WordBreak;
21use style::properties::ComputedValues;
22use style::str::char_is_whitespace;
23use style::values::computed::OverflowWrap;
24use unicode_bidi::{BidiInfo, Level};
25use unicode_script::Script;
26use xi_unicode::linebreak_property;
27
28use super::line_breaker::LineBreaker;
29use super::{FontKeyAndMetrics, InlineFormattingContextLayout, SharedInlineStyles};
30use crate::context::LayoutContext;
31use crate::fragment_tree::BaseFragmentInfo;
32
33// These constants are the xi-unicode line breaking classes that are defined in
34// `table.rs`. Unfortunately, they are only identified by number.
35pub(crate) const XI_LINE_BREAKING_CLASS_CM: u8 = 9;
36pub(crate) const XI_LINE_BREAKING_CLASS_GL: u8 = 12;
37pub(crate) const XI_LINE_BREAKING_CLASS_ZW: u8 = 28;
38pub(crate) const XI_LINE_BREAKING_CLASS_WJ: u8 = 30;
39pub(crate) const XI_LINE_BREAKING_CLASS_ZWJ: u8 = 42;
40
41// There are two reasons why we might want to break at the start:
42//
43//  1. The line breaker told us that a break was necessary between two separate
44//     instances of sending text to it.
45//  2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`.
46//
47// In both cases, we don't want to do this if the first character prevents a
48// soft wrap opportunity.
49#[derive(PartialEq)]
50enum SegmentStartSoftWrapPolicy {
51    Force,
52    FollowLinebreaker,
53}
54
55#[derive(Debug, MallocSizeOf)]
56pub(crate) struct TextRunSegment {
57    /// The index of this font in the parent [`super::InlineFormattingContext`]'s collection of font
58    /// information.
59    pub font_index: usize,
60
61    /// The [`Script`] of this segment.
62    pub script: Script,
63
64    /// The bidi Level of this segment.
65    pub bidi_level: Level,
66
67    /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
68    pub range: Range<usize>,
69
70    /// Whether or not the linebreaker said that we should allow a line break at the start of this
71    /// segment.
72    pub break_at_start: bool,
73
74    /// The shaped runs within this segment.
75    pub runs: Vec<GlyphRun>,
76}
77
78impl TextRunSegment {
79    fn new(font_index: usize, script: Script, bidi_level: Level, start_offset: usize) -> Self {
80        Self {
81            font_index,
82            script,
83            bidi_level,
84            range: start_offset..start_offset,
85            runs: Vec::new(),
86            break_at_start: false,
87        }
88    }
89
90    /// Update this segment if the Font and Script are compatible. The update will only
91    /// ever make the Script specific. Returns true if the new Font and Script are
92    /// compatible with this segment or false otherwise.
93    fn update_if_compatible(
94        &mut self,
95        layout_context: &LayoutContext,
96        new_font: &FontRef,
97        script: Script,
98        bidi_level: Level,
99        fonts: &[FontKeyAndMetrics],
100    ) -> bool {
101        fn is_specific(script: Script) -> bool {
102            script != Script::Common && script != Script::Inherited
103        }
104
105        if bidi_level != self.bidi_level {
106            return false;
107        }
108
109        let current_font_key_and_metrics = &fonts[self.font_index];
110        let painter_id = layout_context.painter_id;
111        let font_context = &layout_context.font_context;
112        if new_font.key(painter_id, font_context) != current_font_key_and_metrics.key ||
113            new_font.descriptor.pt_size != current_font_key_and_metrics.pt_size
114        {
115            return false;
116        }
117
118        if !is_specific(self.script) && is_specific(script) {
119            self.script = script;
120        }
121        script == self.script || !is_specific(script)
122    }
123
124    fn layout_into_line_items(
125        &self,
126        text_run: &TextRun,
127        mut soft_wrap_policy: SegmentStartSoftWrapPolicy,
128        ifc: &mut InlineFormattingContextLayout,
129    ) {
130        if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker
131        {
132            soft_wrap_policy = SegmentStartSoftWrapPolicy::Force;
133        }
134
135        let mut byte_processed = ByteIndex(0);
136        for (run_index, run) in self.runs.iter().enumerate() {
137            ifc.possibly_flush_deferred_forced_line_break();
138
139            // If this whitespace forces a line break, queue up a hard line break the next time we
140            // see any content. We don't line break immediately, because we'd like to finish processing
141            // any ongoing inline boxes before ending the line.
142            if run.is_single_preserved_newline() {
143                byte_processed = byte_processed + run.range.length();
144                ifc.defer_forced_line_break();
145                continue;
146            }
147            // Break before each unbreakable run in this TextRun, except the first unless the
148            // linebreaker was set to break before the first run.
149            if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
150                ifc.process_soft_wrap_opportunity();
151            }
152            ifc.push_glyph_store_to_unbreakable_segment(
153                run.glyph_store.clone(),
154                text_run,
155                self.font_index,
156                self.bidi_level,
157                ServoRange::<ByteIndex>::new(
158                    byte_processed + ByteIndex(self.range.start as isize),
159                    ByteIndex(self.range.len() as isize) - byte_processed,
160                ),
161            );
162            byte_processed = byte_processed + run.range.length();
163        }
164    }
165
166    fn shape_and_push_range(
167        &mut self,
168        range: &Range<usize>,
169        formatting_context_text: &str,
170        segment_font: &FontRef,
171        options: &ShapingOptions,
172    ) {
173        self.runs.push(GlyphRun {
174            glyph_store: segment_font.shape_text(&formatting_context_text[range.clone()], options),
175            range: ServoRange::new(
176                ByteIndex(range.start as isize),
177                ByteIndex(range.len() as isize),
178            ),
179        });
180    }
181
182    /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing
183    /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered,
184    /// based on the style of the parent inline box.
185    fn shape_text(
186        &mut self,
187        parent_style: &ComputedValues,
188        formatting_context_text: &str,
189        linebreaker: &mut LineBreaker,
190        shaping_options: &ShapingOptions,
191        font: FontRef,
192    ) {
193        // Gather the linebreaks that apply to this segment from the inline formatting context's collection
194        // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
195        // piece of text is processed.
196        let range = self.range.clone();
197        let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
198        let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
199
200        self.runs.clear();
201        self.runs.reserve(linebreaks.len());
202        self.break_at_start = false;
203
204        let text_style = parent_style.get_inherited_text().clone();
205        let can_break_anywhere = text_style.word_break == WordBreak::BreakAll ||
206            text_style.overflow_wrap == OverflowWrap::Anywhere ||
207            text_style.overflow_wrap == OverflowWrap::BreakWord;
208
209        let mut last_slice = self.range.start..self.range.start;
210        for break_index in linebreak_iter {
211            if *break_index == self.range.start {
212                self.break_at_start = true;
213                continue;
214            }
215
216            let mut options = *shaping_options;
217
218            // Extend the slice to the next UAX#14 line break opportunity.
219            let mut slice = last_slice.end..*break_index;
220            let word = &formatting_context_text[slice.clone()];
221
222            // Split off any trailing whitespace into a separate glyph run.
223            let mut whitespace = slice.end..slice.end;
224            let mut rev_char_indices = word.char_indices().rev().peekable();
225
226            let mut ends_with_whitespace = false;
227            let ends_with_newline = rev_char_indices
228                .peek()
229                .is_some_and(|&(_, character)| character == '\n');
230            if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices
231                .take_while(|&(_, character)| char_is_whitespace(character))
232                .last()
233            {
234                ends_with_whitespace = true;
235                whitespace.start = slice.start + first_white_space_index;
236
237                // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there
238                // is a line break opportunity *after* every preserved space, but not before. This means
239                // that we should not split off the first whitespace, unless that white-space is a preserved
240                // newline.
241                //
242                // An exception to this is if the style tells us that we can break in the middle of words.
243                if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces &&
244                    first_white_space_character != '\n' &&
245                    !can_break_anywhere
246                {
247                    whitespace.start += first_white_space_character.len_utf8();
248                    options
249                        .flags
250                        .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG);
251                }
252
253                slice.end = whitespace.start;
254            }
255
256            // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
257            // TODO: This should only happen for CJK text.
258            if !ends_with_whitespace &&
259                *break_index != self.range.end &&
260                text_style.word_break == WordBreak::KeepAll &&
261                !can_break_anywhere
262            {
263                continue;
264            }
265
266            // Only advance the last slice if we are not going to try to expand the slice.
267            last_slice = slice.start..*break_index;
268
269            // Push the non-whitespace part of the range.
270            if !slice.is_empty() {
271                self.shape_and_push_range(&slice, formatting_context_text, &font, &options);
272            }
273
274            if whitespace.is_empty() {
275                continue;
276            }
277
278            options.flags.insert(
279                ShapingFlags::IS_WHITESPACE_SHAPING_FLAG |
280                    ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG,
281            );
282
283            // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity
284            // between each white space character in the white space that we trimmed off.
285            if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces {
286                let start_index = whitespace.start;
287                for (index, character) in formatting_context_text[whitespace].char_indices() {
288                    let index = start_index + index;
289                    self.shape_and_push_range(
290                        &(index..index + character.len_utf8()),
291                        formatting_context_text,
292                        &font,
293                        &options,
294                    );
295                }
296                continue;
297            }
298
299            // The breaker breaks after every newline, so either there is none,
300            // or there is exactly one at the very end. In the latter case,
301            // split it into a different run. That's because shaping considers
302            // a newline to have the same advance as a space, but during layout
303            // we want to treat the newline as having no advance.
304            if ends_with_newline && whitespace.len() > 1 {
305                self.shape_and_push_range(
306                    &(whitespace.start..whitespace.end - 1),
307                    formatting_context_text,
308                    &font,
309                    &options,
310                );
311                self.shape_and_push_range(
312                    &(whitespace.end - 1..whitespace.end),
313                    formatting_context_text,
314                    &font,
315                    &options,
316                );
317            } else {
318                self.shape_and_push_range(&whitespace, formatting_context_text, &font, &options);
319            }
320        }
321    }
322}
323
324/// A single [`TextRun`] for the box tree. These are all descendants of
325/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`].  During
326/// box tree construction, text is split into [`TextRun`]s based on their font, script,
327/// etc. When these are created text is already shaped.
328///
329/// <https://www.w3.org/TR/css-display-3/#css-text-run>
330#[derive(Debug, MallocSizeOf)]
331pub(crate) struct TextRun {
332    /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the
333    /// original text node in the DOM for the text.
334    pub base_fragment_info: BaseFragmentInfo,
335
336    /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is
337    /// shared so that incremental layout can simply update the parent element and
338    /// this [`TextRun`] will be updated automatically.
339    pub inline_styles: SharedInlineStyles,
340
341    /// The range of text in [`super::InlineFormattingContext::text_content`] of the
342    /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets.
343    pub text_range: Range<usize>,
344
345    /// The text of this [`TextRun`] with a font selected, broken into unbreakable
346    /// segments, and shaped.
347    pub shaped_text: Vec<TextRunSegment>,
348
349    /// The selection range for the DOM text node that originated this [`TextRun`]. This
350    /// comes directly from the DOM.
351    pub selection_range: Option<ServoRange<ByteIndex>>,
352}
353
354impl TextRun {
355    pub(crate) fn new(
356        base_fragment_info: BaseFragmentInfo,
357        inline_styles: SharedInlineStyles,
358        text_range: Range<usize>,
359        selection_range: Option<ServoRange<ByteIndex>>,
360    ) -> Self {
361        Self {
362            base_fragment_info,
363            inline_styles,
364            text_range,
365            shaped_text: Vec::new(),
366            selection_range,
367        }
368    }
369
370    pub(super) fn segment_and_shape(
371        &mut self,
372        formatting_context_text: &str,
373        layout_context: &LayoutContext,
374        linebreaker: &mut LineBreaker,
375        font_cache: &mut Vec<FontKeyAndMetrics>,
376        bidi_info: &BidiInfo,
377    ) {
378        let parent_style = self.inline_styles.style.borrow().clone();
379        let inherited_text_style = parent_style.get_inherited_text().clone();
380        let letter_spacing = inherited_text_style
381            .letter_spacing
382            .0
383            .resolve(parent_style.clone_font().font_size.computed_size());
384        let letter_spacing = if letter_spacing.px() != 0. {
385            Some(app_units::Au::from(letter_spacing))
386        } else {
387            None
388        };
389
390        let mut flags = ShapingFlags::empty();
391        if letter_spacing.is_some() {
392            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
393        }
394        if inherited_text_style.text_rendering == TextRendering::Optimizespeed {
395            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
396            flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
397        }
398
399        let specified_word_spacing = &inherited_text_style.word_spacing;
400        let style_word_spacing: Option<Au> = specified_word_spacing.to_length().map(|l| l.into());
401
402        let segments = self
403            .segment_text_by_font(
404                layout_context,
405                formatting_context_text,
406                font_cache,
407                bidi_info,
408                &parent_style,
409            )
410            .into_iter()
411            .map(|(mut segment, font)| {
412                let word_spacing = style_word_spacing.unwrap_or_else(|| {
413                    let space_width = font
414                        .glyph_index(' ')
415                        .map(|glyph_id| font.glyph_h_advance(glyph_id))
416                        .unwrap_or(LAST_RESORT_GLYPH_ADVANCE);
417                    specified_word_spacing.to_used_value(Au::from_f64_px(space_width))
418                });
419
420                let mut flags = flags;
421                if segment.bidi_level.is_rtl() {
422                    flags.insert(ShapingFlags::RTL_FLAG);
423                }
424                let shaping_options = ShapingOptions {
425                    letter_spacing,
426                    word_spacing,
427                    script: segment.script,
428                    flags,
429                };
430
431                segment.shape_text(
432                    &parent_style,
433                    formatting_context_text,
434                    linebreaker,
435                    &shaping_options,
436                    font,
437                );
438
439                segment
440            })
441            .collect();
442
443        let _ = std::mem::replace(&mut self.shaped_text, segments);
444    }
445
446    /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
447    /// font and script. Fonts may differ when glyphs are found in fallback fonts. Fonts are stored
448    /// in the `font_cache` which is a cache of all font keys and metrics used in this
449    /// [`super::InlineFormattingContext`].
450    fn segment_text_by_font(
451        &mut self,
452        layout_context: &LayoutContext,
453        formatting_context_text: &str,
454        font_cache: &mut Vec<FontKeyAndMetrics>,
455        bidi_info: &BidiInfo,
456        parent_style: &Arc<ComputedValues>,
457    ) -> Vec<(TextRunSegment, FontRef)> {
458        let font_group = layout_context
459            .font_context
460            .font_group(parent_style.clone_font());
461        let mut current: Option<(TextRunSegment, FontRef)> = None;
462        let mut results = Vec::new();
463
464        let text_run_text = &formatting_context_text[self.text_range.clone()];
465        let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
466        let mut next_byte_index = self.text_range.start;
467        for (character, next_character) in char_iterator {
468            let current_byte_index = next_byte_index;
469            next_byte_index += character.len_utf8();
470
471            if char_does_not_change_font(character) {
472                continue;
473            }
474
475            // If the script and BiDi level do not change, use the current font as the first fallback. This
476            // can potentially speed up fallback on long font lists or with uncommon scripts which might be
477            // at the bottom of the list.
478            let script = Script::from(character);
479            let bidi_level = bidi_info.levels[current_byte_index];
480            let current_font = current.as_ref().and_then(|(text_run_segment, font)| {
481                if text_run_segment.bidi_level == bidi_level && text_run_segment.script == script {
482                    Some(font.clone())
483                } else {
484                    None
485                }
486            });
487
488            let lang = parent_style.get_font()._x_lang.clone();
489
490            let Some(font) = font_group.write().find_by_codepoint(
491                &layout_context.font_context,
492                character,
493                next_character,
494                current_font,
495                Some(lang.0.as_ref().to_string()),
496            ) else {
497                continue;
498            };
499
500            // If the existing segment is compatible with the character, keep going.
501            if let Some(current) = current.as_mut() {
502                if current.0.update_if_compatible(
503                    layout_context,
504                    &font,
505                    script,
506                    bidi_level,
507                    font_cache,
508                ) {
509                    continue;
510                }
511            }
512
513            let font_index = add_or_get_font(layout_context, &font, font_cache);
514
515            // Add the new segment and finish the existing one, if we had one. If the first
516            // characters in the run were control characters we may be creating the first
517            // segment in the middle of the run (ie the start should be the start of this
518            // text run's text).
519            let start_byte_index = match current {
520                Some(_) => current_byte_index,
521                None => self.text_range.start,
522            };
523            let new = (
524                TextRunSegment::new(font_index, script, bidi_level, start_byte_index),
525                font,
526            );
527            if let Some(mut finished) = current.replace(new) {
528                // The end of the previous segment is the start of the next one.
529                finished.0.range.end = current_byte_index;
530                results.push(finished);
531            }
532        }
533
534        // Either we have a current segment or we only had control character and whitespace. In both
535        // of those cases, just use the first font.
536        if current.is_none() {
537            current = font_group
538                .write()
539                .first(&layout_context.font_context)
540                .map(|font| {
541                    let font_index = add_or_get_font(layout_context, &font, font_cache);
542                    (
543                        TextRunSegment::new(
544                            font_index,
545                            Script::Common,
546                            Level::ltr(),
547                            self.text_range.start,
548                        ),
549                        font,
550                    )
551                })
552        }
553
554        // Extend the last segment to the end of the string and add it to the results.
555        if let Some(mut last_segment) = current.take() {
556            last_segment.0.range.end = self.text_range.end;
557            results.push(last_segment);
558        }
559
560        results
561    }
562
563    pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) {
564        if self.text_range.is_empty() {
565            return;
566        }
567
568        // If we are following replaced content, we should have a soft wrap opportunity, unless the
569        // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a
570        // character it should also override the LineBreaker's indication to break at the start.
571        let have_deferred_soft_wrap_opportunity =
572            mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false);
573        let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity {
574            true => SegmentStartSoftWrapPolicy::Force,
575            false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
576        };
577
578        for segment in self.shaped_text.iter() {
579            segment.layout_into_line_items(self, soft_wrap_policy, ifc);
580            soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
581        }
582    }
583}
584
585/// Whether or not this character should be able to change the font during segmentation.  Certain
586/// character are not rendered at all, so it doesn't matter what font we use to render them. They
587/// should just be added to the current segment.
588fn char_does_not_change_font(character: char) -> bool {
589    if character.is_control() {
590        return true;
591    }
592    if character == '\u{00A0}' {
593        return true;
594    }
595    if is_bidi_control(character) {
596        return false;
597    }
598
599    let class = linebreak_property(character);
600    class == XI_LINE_BREAKING_CLASS_CM ||
601        class == XI_LINE_BREAKING_CLASS_GL ||
602        class == XI_LINE_BREAKING_CLASS_ZW ||
603        class == XI_LINE_BREAKING_CLASS_WJ ||
604        class == XI_LINE_BREAKING_CLASS_ZWJ
605}
606
607pub(super) fn add_or_get_font(
608    layout_context: &LayoutContext,
609    font: &FontRef,
610    ifc_fonts: &mut Vec<FontKeyAndMetrics>,
611) -> usize {
612    let font_instance_key = font.key(layout_context.painter_id, &layout_context.font_context);
613    for (index, ifc_font_info) in ifc_fonts.iter().enumerate() {
614        if ifc_font_info.key == font_instance_key &&
615            ifc_font_info.pt_size == font.descriptor.pt_size
616        {
617            return index;
618        }
619    }
620    ifc_fonts.push(FontKeyAndMetrics {
621        metrics: font.metrics.clone(),
622        key: font_instance_key,
623        pt_size: font.descriptor.pt_size,
624    });
625    ifc_fonts.len() - 1
626}
627
628pub(super) fn get_font_for_first_font_for_style(
629    style: &ComputedValues,
630    font_context: &FontContext,
631) -> Option<FontRef> {
632    let font = font_context
633        .font_group(style.clone_font())
634        .write()
635        .first(font_context);
636    if font.is_none() {
637        warn!("Could not find font for style: {:?}", style.clone_font());
638    }
639    font
640}
641pub(crate) struct TwoCharsAtATimeIterator<InputIterator> {
642    /// The input character iterator.
643    iterator: InputIterator,
644    /// The first character to produce in the next run of the iterator.
645    next_character: Option<char>,
646}
647
648impl<InputIterator> TwoCharsAtATimeIterator<InputIterator> {
649    fn new(iterator: InputIterator) -> Self {
650        Self {
651            iterator,
652            next_character: None,
653        }
654    }
655}
656
657impl<InputIterator> Iterator for TwoCharsAtATimeIterator<InputIterator>
658where
659    InputIterator: Iterator<Item = char>,
660{
661    type Item = (char, Option<char>);
662
663    fn next(&mut self) -> Option<Self::Item> {
664        // If the iterator isn't initialized do that now.
665        if self.next_character.is_none() {
666            self.next_character = self.iterator.next();
667        }
668        let character = self.next_character?;
669        self.next_character = self.iterator.next();
670        Some((character, self.next_character))
671    }
672}