layout/flow/inline/
text_run.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::mem;
6use std::ops::Range;
7
8use app_units::Au;
9use base::id::RenderingGroupId;
10use base::text::is_bidi_control;
11use fonts::{
12    FontContext, FontRef, GlyphRun, LAST_RESORT_GLYPH_ADVANCE, ShapingFlags, ShapingOptions,
13};
14use fonts_traits::ByteIndex;
15use log::warn;
16use malloc_size_of_derive::MallocSizeOf;
17use range::Range as ServoRange;
18use servo_arc::Arc;
19use style::computed_values::text_rendering::T as TextRendering;
20use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
21use style::computed_values::word_break::T as WordBreak;
22use style::properties::ComputedValues;
23use style::str::char_is_whitespace;
24use style::values::computed::OverflowWrap;
25use unicode_bidi::{BidiInfo, Level};
26use unicode_script::Script;
27use xi_unicode::linebreak_property;
28
29use super::line_breaker::LineBreaker;
30use super::{FontKeyAndMetrics, InlineFormattingContextLayout, SharedInlineStyles};
31use crate::fragment_tree::BaseFragmentInfo;
32
33// These constants are the xi-unicode line breaking classes that are defined in
34// `table.rs`. Unfortunately, they are only identified by number.
35pub(crate) const XI_LINE_BREAKING_CLASS_CM: u8 = 9;
36pub(crate) const XI_LINE_BREAKING_CLASS_GL: u8 = 12;
37pub(crate) const XI_LINE_BREAKING_CLASS_ZW: u8 = 28;
38pub(crate) const XI_LINE_BREAKING_CLASS_WJ: u8 = 30;
39pub(crate) const XI_LINE_BREAKING_CLASS_ZWJ: u8 = 42;
40
41// There are two reasons why we might want to break at the start:
42//
43//  1. The line breaker told us that a break was necessary between two separate
44//     instances of sending text to it.
45//  2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`.
46//
47// In both cases, we don't want to do this if the first character prevents a
48// soft wrap opportunity.
49#[derive(PartialEq)]
50enum SegmentStartSoftWrapPolicy {
51    Force,
52    FollowLinebreaker,
53}
54
55#[derive(Debug, MallocSizeOf)]
56pub(crate) struct TextRunSegment {
57    /// The index of this font in the parent [`super::InlineFormattingContext`]'s collection of font
58    /// information.
59    pub font_index: usize,
60
61    /// The [`Script`] of this segment.
62    pub script: Script,
63
64    /// The bidi Level of this segment.
65    pub bidi_level: Level,
66
67    /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
68    pub range: Range<usize>,
69
70    /// Whether or not the linebreaker said that we should allow a line break at the start of this
71    /// segment.
72    pub break_at_start: bool,
73
74    /// The shaped runs within this segment.
75    pub runs: Vec<GlyphRun>,
76}
77
78impl TextRunSegment {
79    fn new(font_index: usize, script: Script, bidi_level: Level, start_offset: usize) -> Self {
80        Self {
81            font_index,
82            script,
83            bidi_level,
84            range: start_offset..start_offset,
85            runs: Vec::new(),
86            break_at_start: false,
87        }
88    }
89
90    /// Update this segment if the Font and Script are compatible. The update will only
91    /// ever make the Script specific. Returns true if the new Font and Script are
92    /// compatible with this segment or false otherwise.
93    fn update_if_compatible(
94        &mut self,
95        new_font: &FontRef,
96        script: Script,
97        bidi_level: Level,
98        fonts: &[FontKeyAndMetrics],
99        font_context: &FontContext,
100        rendering_group_id: RenderingGroupId,
101    ) -> bool {
102        fn is_specific(script: Script) -> bool {
103            script != Script::Common && script != Script::Inherited
104        }
105
106        if bidi_level != self.bidi_level {
107            return false;
108        }
109
110        let current_font_key_and_metrics = &fonts[self.font_index];
111        if new_font.key(rendering_group_id, font_context) != current_font_key_and_metrics.key ||
112            new_font.descriptor.pt_size != current_font_key_and_metrics.pt_size
113        {
114            return false;
115        }
116
117        if !is_specific(self.script) && is_specific(script) {
118            self.script = script;
119        }
120        script == self.script || !is_specific(script)
121    }
122
123    fn layout_into_line_items(
124        &self,
125        text_run: &TextRun,
126        mut soft_wrap_policy: SegmentStartSoftWrapPolicy,
127        ifc: &mut InlineFormattingContextLayout,
128    ) {
129        if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker
130        {
131            soft_wrap_policy = SegmentStartSoftWrapPolicy::Force;
132        }
133
134        let mut byte_processed = ByteIndex(0);
135        for (run_index, run) in self.runs.iter().enumerate() {
136            ifc.possibly_flush_deferred_forced_line_break();
137
138            // If this whitespace forces a line break, queue up a hard line break the next time we
139            // see any content. We don't line break immediately, because we'd like to finish processing
140            // any ongoing inline boxes before ending the line.
141            if run.is_single_preserved_newline() {
142                byte_processed = byte_processed + run.range.length();
143                ifc.defer_forced_line_break();
144                continue;
145            }
146            // Break before each unbreakable run in this TextRun, except the first unless the
147            // linebreaker was set to break before the first run.
148            if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
149                ifc.process_soft_wrap_opportunity();
150            }
151            ifc.push_glyph_store_to_unbreakable_segment(
152                run.glyph_store.clone(),
153                text_run,
154                self.font_index,
155                self.bidi_level,
156                ServoRange::<ByteIndex>::new(
157                    byte_processed + ByteIndex(self.range.start as isize),
158                    ByteIndex(self.range.len() as isize) - byte_processed,
159                ),
160            );
161            byte_processed = byte_processed + run.range.length();
162        }
163    }
164
165    fn shape_and_push_range(
166        &mut self,
167        range: &Range<usize>,
168        formatting_context_text: &str,
169        segment_font: &FontRef,
170        options: &ShapingOptions,
171    ) {
172        self.runs.push(GlyphRun {
173            glyph_store: segment_font.shape_text(&formatting_context_text[range.clone()], options),
174            range: ServoRange::new(
175                ByteIndex(range.start as isize),
176                ByteIndex(range.len() as isize),
177            ),
178        });
179    }
180
181    /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing
182    /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered,
183    /// based on the style of the parent inline box.
184    fn shape_text(
185        &mut self,
186        parent_style: &ComputedValues,
187        formatting_context_text: &str,
188        linebreaker: &mut LineBreaker,
189        shaping_options: &ShapingOptions,
190        font: FontRef,
191    ) {
192        // Gather the linebreaks that apply to this segment from the inline formatting context's collection
193        // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
194        // piece of text is processed.
195        let range = self.range.clone();
196        let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
197        let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
198
199        self.runs.clear();
200        self.runs.reserve(linebreaks.len());
201        self.break_at_start = false;
202
203        let text_style = parent_style.get_inherited_text().clone();
204        let can_break_anywhere = text_style.word_break == WordBreak::BreakAll ||
205            text_style.overflow_wrap == OverflowWrap::Anywhere ||
206            text_style.overflow_wrap == OverflowWrap::BreakWord;
207
208        let mut last_slice = self.range.start..self.range.start;
209        for break_index in linebreak_iter {
210            if *break_index == self.range.start {
211                self.break_at_start = true;
212                continue;
213            }
214
215            let mut options = *shaping_options;
216
217            // Extend the slice to the next UAX#14 line break opportunity.
218            let mut slice = last_slice.end..*break_index;
219            let word = &formatting_context_text[slice.clone()];
220
221            // Split off any trailing whitespace into a separate glyph run.
222            let mut whitespace = slice.end..slice.end;
223            let mut rev_char_indices = word.char_indices().rev().peekable();
224
225            let mut ends_with_whitespace = false;
226            let ends_with_newline = rev_char_indices
227                .peek()
228                .is_some_and(|&(_, character)| character == '\n');
229            if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices
230                .take_while(|&(_, character)| char_is_whitespace(character))
231                .last()
232            {
233                ends_with_whitespace = true;
234                whitespace.start = slice.start + first_white_space_index;
235
236                // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there
237                // is a line break opportunity *after* every preserved space, but not before. This means
238                // that we should not split off the first whitespace, unless that white-space is a preserved
239                // newline.
240                //
241                // An exception to this is if the style tells us that we can break in the middle of words.
242                if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces &&
243                    first_white_space_character != '\n' &&
244                    !can_break_anywhere
245                {
246                    whitespace.start += first_white_space_character.len_utf8();
247                    options
248                        .flags
249                        .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG);
250                }
251
252                slice.end = whitespace.start;
253            }
254
255            // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
256            // TODO: This should only happen for CJK text.
257            if !ends_with_whitespace &&
258                *break_index != self.range.end &&
259                text_style.word_break == WordBreak::KeepAll &&
260                !can_break_anywhere
261            {
262                continue;
263            }
264
265            // Only advance the last slice if we are not going to try to expand the slice.
266            last_slice = slice.start..*break_index;
267
268            // Push the non-whitespace part of the range.
269            if !slice.is_empty() {
270                self.shape_and_push_range(&slice, formatting_context_text, &font, &options);
271            }
272
273            if whitespace.is_empty() {
274                continue;
275            }
276
277            options.flags.insert(
278                ShapingFlags::IS_WHITESPACE_SHAPING_FLAG |
279                    ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG,
280            );
281
282            // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity
283            // between each white space character in the white space that we trimmed off.
284            if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces {
285                let start_index = whitespace.start;
286                for (index, character) in formatting_context_text[whitespace].char_indices() {
287                    let index = start_index + index;
288                    self.shape_and_push_range(
289                        &(index..index + character.len_utf8()),
290                        formatting_context_text,
291                        &font,
292                        &options,
293                    );
294                }
295                continue;
296            }
297
298            // The breaker breaks after every newline, so either there is none,
299            // or there is exactly one at the very end. In the latter case,
300            // split it into a different run. That's because shaping considers
301            // a newline to have the same advance as a space, but during layout
302            // we want to treat the newline as having no advance.
303            if ends_with_newline && whitespace.len() > 1 {
304                self.shape_and_push_range(
305                    &(whitespace.start..whitespace.end - 1),
306                    formatting_context_text,
307                    &font,
308                    &options,
309                );
310                self.shape_and_push_range(
311                    &(whitespace.end - 1..whitespace.end),
312                    formatting_context_text,
313                    &font,
314                    &options,
315                );
316            } else {
317                self.shape_and_push_range(&whitespace, formatting_context_text, &font, &options);
318            }
319        }
320    }
321}
322
323/// A single [`TextRun`] for the box tree. These are all descendants of
324/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`].  During
325/// box tree construction, text is split into [`TextRun`]s based on their font, script,
326/// etc. When these are created text is already shaped.
327///
328/// <https://www.w3.org/TR/css-display-3/#css-text-run>
329#[derive(Debug, MallocSizeOf)]
330pub(crate) struct TextRun {
331    /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the
332    /// original text node in the DOM for the text.
333    pub base_fragment_info: BaseFragmentInfo,
334
335    /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is
336    /// shared so that incremental layout can simply update the parent element and
337    /// this [`TextRun`] will be updated automatically.
338    pub inline_styles: SharedInlineStyles,
339
340    /// The range of text in [`super::InlineFormattingContext::text_content`] of the
341    /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets.
342    pub text_range: Range<usize>,
343
344    /// The text of this [`TextRun`] with a font selected, broken into unbreakable
345    /// segments, and shaped.
346    pub shaped_text: Vec<TextRunSegment>,
347
348    /// The selection range for the DOM text node that originated this [`TextRun`]. This
349    /// comes directly from the DOM.
350    pub selection_range: Option<ServoRange<ByteIndex>>,
351}
352
353impl TextRun {
354    pub(crate) fn new(
355        base_fragment_info: BaseFragmentInfo,
356        inline_styles: SharedInlineStyles,
357        text_range: Range<usize>,
358        selection_range: Option<ServoRange<ByteIndex>>,
359    ) -> Self {
360        Self {
361            base_fragment_info,
362            inline_styles,
363            text_range,
364            shaped_text: Vec::new(),
365            selection_range,
366        }
367    }
368
369    pub(super) fn segment_and_shape(
370        &mut self,
371        formatting_context_text: &str,
372        font_context: &FontContext,
373        linebreaker: &mut LineBreaker,
374        font_cache: &mut Vec<FontKeyAndMetrics>,
375        bidi_info: &BidiInfo,
376        rendering_group_id: RenderingGroupId,
377    ) {
378        let parent_style = self.inline_styles.style.borrow().clone();
379        let inherited_text_style = parent_style.get_inherited_text().clone();
380        let letter_spacing = inherited_text_style
381            .letter_spacing
382            .0
383            .resolve(parent_style.clone_font().font_size.computed_size());
384        let letter_spacing = if letter_spacing.px() != 0. {
385            Some(app_units::Au::from(letter_spacing))
386        } else {
387            None
388        };
389
390        let mut flags = ShapingFlags::empty();
391        if letter_spacing.is_some() {
392            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
393        }
394        if inherited_text_style.text_rendering == TextRendering::Optimizespeed {
395            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
396            flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
397        }
398
399        let specified_word_spacing = &inherited_text_style.word_spacing;
400        let style_word_spacing: Option<Au> = specified_word_spacing.to_length().map(|l| l.into());
401
402        let segments = self
403            .segment_text_by_font(
404                formatting_context_text,
405                font_context,
406                font_cache,
407                bidi_info,
408                &parent_style,
409                rendering_group_id,
410            )
411            .into_iter()
412            .map(|(mut segment, font)| {
413                let word_spacing = style_word_spacing.unwrap_or_else(|| {
414                    let space_width = font
415                        .glyph_index(' ')
416                        .map(|glyph_id| font.glyph_h_advance(glyph_id))
417                        .unwrap_or(LAST_RESORT_GLYPH_ADVANCE);
418                    specified_word_spacing.to_used_value(Au::from_f64_px(space_width))
419                });
420
421                let mut flags = flags;
422                if segment.bidi_level.is_rtl() {
423                    flags.insert(ShapingFlags::RTL_FLAG);
424                }
425                let shaping_options = ShapingOptions {
426                    letter_spacing,
427                    word_spacing,
428                    script: segment.script,
429                    flags,
430                };
431
432                segment.shape_text(
433                    &parent_style,
434                    formatting_context_text,
435                    linebreaker,
436                    &shaping_options,
437                    font,
438                );
439
440                segment
441            })
442            .collect();
443
444        let _ = std::mem::replace(&mut self.shaped_text, segments);
445    }
446
447    /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
448    /// font and script. Fonts may differ when glyphs are found in fallback fonts. Fonts are stored
449    /// in the `font_cache` which is a cache of all font keys and metrics used in this
450    /// [`super::InlineFormattingContext`].
451    fn segment_text_by_font(
452        &mut self,
453        formatting_context_text: &str,
454        font_context: &FontContext,
455        font_cache: &mut Vec<FontKeyAndMetrics>,
456        bidi_info: &BidiInfo,
457        parent_style: &Arc<ComputedValues>,
458        rendering_group_id: RenderingGroupId,
459    ) -> Vec<(TextRunSegment, FontRef)> {
460        let font_group = font_context.font_group(parent_style.clone_font());
461        let mut current: Option<(TextRunSegment, FontRef)> = None;
462        let mut results = Vec::new();
463
464        let text_run_text = &formatting_context_text[self.text_range.clone()];
465        let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
466        let mut next_byte_index = self.text_range.start;
467        for (character, next_character) in char_iterator {
468            let current_byte_index = next_byte_index;
469            next_byte_index += character.len_utf8();
470
471            if char_does_not_change_font(character) {
472                continue;
473            }
474
475            // If the script and BiDi level do not change, use the current font as the first fallback. This
476            // can potentially speed up fallback on long font lists or with uncommon scripts which might be
477            // at the bottom of the list.
478            let script = Script::from(character);
479            let bidi_level = bidi_info.levels[current_byte_index];
480            let current_font = current.as_ref().and_then(|(text_run_segment, font)| {
481                if text_run_segment.bidi_level == bidi_level && text_run_segment.script == script {
482                    Some(font.clone())
483                } else {
484                    None
485                }
486            });
487
488            let Some(font) = font_group.write().find_by_codepoint(
489                font_context,
490                character,
491                next_character,
492                current_font,
493            ) else {
494                continue;
495            };
496
497            // If the existing segment is compatible with the character, keep going.
498            if let Some(current) = current.as_mut() {
499                if current.0.update_if_compatible(
500                    &font,
501                    script,
502                    bidi_level,
503                    font_cache,
504                    font_context,
505                    rendering_group_id,
506                ) {
507                    continue;
508                }
509            }
510
511            let font_index = add_or_get_font(&font, font_cache, font_context, rendering_group_id);
512
513            // Add the new segment and finish the existing one, if we had one. If the first
514            // characters in the run were control characters we may be creating the first
515            // segment in the middle of the run (ie the start should be the start of this
516            // text run's text).
517            let start_byte_index = match current {
518                Some(_) => current_byte_index,
519                None => self.text_range.start,
520            };
521            let new = (
522                TextRunSegment::new(font_index, script, bidi_level, start_byte_index),
523                font,
524            );
525            if let Some(mut finished) = current.replace(new) {
526                // The end of the previous segment is the start of the next one.
527                finished.0.range.end = current_byte_index;
528                results.push(finished);
529            }
530        }
531
532        // Either we have a current segment or we only had control character and whitespace. In both
533        // of those cases, just use the first font.
534        if current.is_none() {
535            current = font_group.write().first(font_context).map(|font| {
536                let font_index =
537                    add_or_get_font(&font, font_cache, font_context, rendering_group_id);
538                (
539                    TextRunSegment::new(
540                        font_index,
541                        Script::Common,
542                        Level::ltr(),
543                        self.text_range.start,
544                    ),
545                    font,
546                )
547            })
548        }
549
550        // Extend the last segment to the end of the string and add it to the results.
551        if let Some(mut last_segment) = current.take() {
552            last_segment.0.range.end = self.text_range.end;
553            results.push(last_segment);
554        }
555
556        results
557    }
558
559    pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) {
560        if self.text_range.is_empty() {
561            return;
562        }
563
564        // If we are following replaced content, we should have a soft wrap opportunity, unless the
565        // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a
566        // character it should also override the LineBreaker's indication to break at the start.
567        let have_deferred_soft_wrap_opportunity =
568            mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false);
569        let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity {
570            true => SegmentStartSoftWrapPolicy::Force,
571            false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
572        };
573
574        for segment in self.shaped_text.iter() {
575            segment.layout_into_line_items(self, soft_wrap_policy, ifc);
576            soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
577        }
578    }
579}
580
581/// Whether or not this character should be able to change the font during segmentation.  Certain
582/// character are not rendered at all, so it doesn't matter what font we use to render them. They
583/// should just be added to the current segment.
584fn char_does_not_change_font(character: char) -> bool {
585    if character.is_control() {
586        return true;
587    }
588    if character == '\u{00A0}' {
589        return true;
590    }
591    if is_bidi_control(character) {
592        return false;
593    }
594
595    let class = linebreak_property(character);
596    class == XI_LINE_BREAKING_CLASS_CM ||
597        class == XI_LINE_BREAKING_CLASS_GL ||
598        class == XI_LINE_BREAKING_CLASS_ZW ||
599        class == XI_LINE_BREAKING_CLASS_WJ ||
600        class == XI_LINE_BREAKING_CLASS_ZWJ
601}
602
603pub(super) fn add_or_get_font(
604    font: &FontRef,
605    ifc_fonts: &mut Vec<FontKeyAndMetrics>,
606    font_context: &FontContext,
607    rendering_group_id: RenderingGroupId,
608) -> usize {
609    let font_instance_key = font.key(rendering_group_id, font_context);
610    for (index, ifc_font_info) in ifc_fonts.iter().enumerate() {
611        if ifc_font_info.key == font_instance_key &&
612            ifc_font_info.pt_size == font.descriptor.pt_size
613        {
614            return index;
615        }
616    }
617    ifc_fonts.push(FontKeyAndMetrics {
618        metrics: font.metrics.clone(),
619        key: font_instance_key,
620        pt_size: font.descriptor.pt_size,
621    });
622    ifc_fonts.len() - 1
623}
624
625pub(super) fn get_font_for_first_font_for_style(
626    style: &ComputedValues,
627    font_context: &FontContext,
628) -> Option<FontRef> {
629    let font = font_context
630        .font_group(style.clone_font())
631        .write()
632        .first(font_context);
633    if font.is_none() {
634        warn!("Could not find font for style: {:?}", style.clone_font());
635    }
636    font
637}
638pub(crate) struct TwoCharsAtATimeIterator<InputIterator> {
639    /// The input character iterator.
640    iterator: InputIterator,
641    /// The first character to produce in the next run of the iterator.
642    next_character: Option<char>,
643}
644
645impl<InputIterator> TwoCharsAtATimeIterator<InputIterator> {
646    fn new(iterator: InputIterator) -> Self {
647        Self {
648            iterator,
649            next_character: None,
650        }
651    }
652}
653
654impl<InputIterator> Iterator for TwoCharsAtATimeIterator<InputIterator>
655where
656    InputIterator: Iterator<Item = char>,
657{
658    type Item = (char, Option<char>);
659
660    fn next(&mut self) -> Option<Self::Item> {
661        // If the iterator isn't initialized do that now.
662        if self.next_character.is_none() {
663            self.next_character = self.iterator.next();
664        }
665        let character = self.next_character?;
666        self.next_character = self.iterator.next();
667        Some((character, self.next_character))
668    }
669}