layout/flow/inline/
text_run.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::mem;
6use std::ops::Range;
7use std::sync::Arc;
8
9use app_units::Au;
10use fonts::{
11    FontContext, FontRef, GlyphStore, LAST_RESORT_GLYPH_ADVANCE, ShapingFlags, ShapingOptions,
12};
13use icu_locid::subtags::Language;
14use log::warn;
15use malloc_size_of_derive::MallocSizeOf;
16use servo_arc::Arc as ServoArc;
17use servo_base::text::is_bidi_control;
18use style::computed_values::text_rendering::T as TextRendering;
19use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
20use style::computed_values::word_break::T as WordBreak;
21use style::properties::ComputedValues;
22use style::str::char_is_whitespace;
23use style::values::computed::OverflowWrap;
24use unicode_bidi::{BidiInfo, Level};
25use unicode_script::Script;
26use xi_unicode::linebreak_property;
27
28use super::line_breaker::LineBreaker;
29use super::{InlineFormattingContextLayout, SharedInlineStyles};
30use crate::context::LayoutContext;
31use crate::dom::WeakLayoutBox;
32use crate::flow::inline::line::TextRunOffsets;
33use crate::fragment_tree::BaseFragmentInfo;
34
35// These constants are the xi-unicode line breaking classes that are defined in
36// `table.rs`. Unfortunately, they are only identified by number.
37pub(crate) const XI_LINE_BREAKING_CLASS_CM: u8 = 9;
38pub(crate) const XI_LINE_BREAKING_CLASS_GL: u8 = 12;
39pub(crate) const XI_LINE_BREAKING_CLASS_ZW: u8 = 28;
40pub(crate) const XI_LINE_BREAKING_CLASS_WJ: u8 = 30;
41pub(crate) const XI_LINE_BREAKING_CLASS_ZWJ: u8 = 42;
42
43// There are two reasons why we might want to break at the start:
44//
45//  1. The line breaker told us that a break was necessary between two separate
46//     instances of sending text to it.
47//  2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`.
48//
49// In both cases, we don't want to do this if the first character prevents a
50// soft wrap opportunity.
51#[derive(PartialEq)]
52enum SegmentStartSoftWrapPolicy {
53    Force,
54    FollowLinebreaker,
55}
56
57#[derive(Debug, MallocSizeOf)]
58pub(crate) struct TextRunSegment {
59    /// The index of this font in the parent [`super::InlineFormattingContext`]'s collection of font
60    /// information.
61    pub font: FontRef,
62
63    /// The [`Script`] of this segment.
64    pub script: Script,
65
66    /// The bidi Level of this segment.
67    pub bidi_level: Level,
68
69    /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
70    pub range: Range<usize>,
71
72    /// The range of characters in the parent [`super::InlineFormattingContext`]'s text content.
73    pub character_range: Range<usize>,
74
75    /// Whether or not the linebreaker said that we should allow a line break at the start of this
76    /// segment.
77    pub break_at_start: bool,
78
79    /// The shaped runs within this segment.
80    #[conditional_malloc_size_of]
81    pub runs: Vec<Arc<GlyphStore>>,
82}
83
84impl TextRunSegment {
85    fn new(
86        font: FontRef,
87        script: Script,
88        bidi_level: Level,
89        start_offset: usize,
90        start_character_offset: usize,
91    ) -> Self {
92        Self {
93            font,
94            script,
95            bidi_level,
96            range: start_offset..start_offset,
97            character_range: start_character_offset..start_character_offset,
98            runs: Vec::new(),
99            break_at_start: false,
100        }
101    }
102
103    /// Update this segment if the Font and Script are compatible. The update will only
104    /// ever make the Script specific. Returns true if the new Font and Script are
105    /// compatible with this segment or false otherwise.
106    fn update_if_compatible(
107        &mut self,
108        layout_context: &LayoutContext,
109        new_font: &FontRef,
110        script: Script,
111        bidi_level: Level,
112    ) -> bool {
113        fn is_specific(script: Script) -> bool {
114            script != Script::Common && script != Script::Inherited
115        }
116
117        if bidi_level != self.bidi_level {
118            return false;
119        }
120
121        let painter_id = layout_context.painter_id;
122        let font_context = &layout_context.font_context;
123        if new_font.key(painter_id, font_context) !=
124            self.font
125                .key(layout_context.painter_id, &layout_context.font_context) ||
126            new_font.descriptor.pt_size != self.font.descriptor.pt_size
127        {
128            return false;
129        }
130
131        if !is_specific(self.script) && is_specific(script) {
132            self.script = script;
133        }
134        script == self.script || !is_specific(script)
135    }
136
137    fn layout_into_line_items(
138        &self,
139        text_run: &TextRun,
140        mut soft_wrap_policy: SegmentStartSoftWrapPolicy,
141        ifc: &mut InlineFormattingContextLayout,
142    ) {
143        if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker
144        {
145            soft_wrap_policy = SegmentStartSoftWrapPolicy::Force;
146        }
147
148        let mut character_range_start = self.character_range.start;
149        for (run_index, run) in self.runs.iter().enumerate() {
150            ifc.possibly_flush_deferred_forced_line_break();
151
152            let new_character_range_end = character_range_start + run.character_count();
153            let offsets = ifc
154                .ifc
155                .shared_selection
156                .clone()
157                .map(|shared_selection| TextRunOffsets {
158                    shared_selection,
159                    character_range: character_range_start..new_character_range_end,
160                });
161
162            // If this whitespace forces a line break, queue up a hard line break the next time we
163            // see any content. We don't line break immediately, because we'd like to finish processing
164            // any ongoing inline boxes before ending the line.
165            if run.is_single_preserved_newline() {
166                ifc.possibly_push_empty_text_run_to_unbreakable_segment(
167                    text_run,
168                    &self.font,
169                    self.bidi_level,
170                    offsets,
171                );
172                character_range_start = new_character_range_end;
173                ifc.defer_forced_line_break();
174                continue;
175            }
176
177            // Break before each unbreakable run in this TextRun, except the first unless the
178            // linebreaker was set to break before the first run.
179            if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
180                ifc.process_soft_wrap_opportunity();
181            }
182
183            ifc.push_glyph_store_to_unbreakable_segment(
184                run.clone(),
185                text_run,
186                &self.font,
187                self.bidi_level,
188                offsets,
189            );
190            character_range_start = new_character_range_end;
191        }
192    }
193
194    fn shape_and_push_range(
195        &mut self,
196        range: &Range<usize>,
197        formatting_context_text: &str,
198        options: &ShapingOptions,
199    ) {
200        self.runs.push(
201            self.font
202                .shape_text(&formatting_context_text[range.clone()], options),
203        );
204    }
205
206    /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing
207    /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered,
208    /// based on the style of the parent inline box.
209    fn shape_text(
210        &mut self,
211        parent_style: &ComputedValues,
212        formatting_context_text: &str,
213        linebreaker: &mut LineBreaker,
214        shaping_options: &ShapingOptions,
215    ) {
216        // Gather the linebreaks that apply to this segment from the inline formatting context's collection
217        // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
218        // piece of text is processed.
219        let range = self.range.clone();
220        let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
221        let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
222
223        self.runs.clear();
224        self.runs.reserve(linebreaks.len());
225        self.break_at_start = false;
226
227        let text_style = parent_style.get_inherited_text().clone();
228        let can_break_anywhere = text_style.word_break == WordBreak::BreakAll ||
229            text_style.overflow_wrap == OverflowWrap::Anywhere ||
230            text_style.overflow_wrap == OverflowWrap::BreakWord;
231
232        let mut last_slice = self.range.start..self.range.start;
233        for break_index in linebreak_iter {
234            if *break_index == self.range.start {
235                self.break_at_start = true;
236                continue;
237            }
238
239            let mut options = *shaping_options;
240
241            // Extend the slice to the next UAX#14 line break opportunity.
242            let mut slice = last_slice.end..*break_index;
243            let word = &formatting_context_text[slice.clone()];
244
245            // Split off any trailing whitespace into a separate glyph run.
246            let mut whitespace = slice.end..slice.end;
247            let mut rev_char_indices = word.char_indices().rev().peekable();
248
249            let mut ends_with_whitespace = false;
250            let ends_with_newline = rev_char_indices
251                .peek()
252                .is_some_and(|&(_, character)| character == '\n');
253            if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices
254                .take_while(|&(_, character)| char_is_whitespace(character))
255                .last()
256            {
257                ends_with_whitespace = true;
258                whitespace.start = slice.start + first_white_space_index;
259
260                // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there
261                // is a line break opportunity *after* every preserved space, but not before. This means
262                // that we should not split off the first whitespace, unless that white-space is a preserved
263                // newline.
264                //
265                // An exception to this is if the style tells us that we can break in the middle of words.
266                if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces &&
267                    first_white_space_character != '\n' &&
268                    !can_break_anywhere
269                {
270                    whitespace.start += first_white_space_character.len_utf8();
271                    options
272                        .flags
273                        .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG);
274                }
275
276                slice.end = whitespace.start;
277            }
278
279            // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
280            // TODO: This should only happen for CJK text.
281            if !ends_with_whitespace &&
282                *break_index != self.range.end &&
283                text_style.word_break == WordBreak::KeepAll &&
284                !can_break_anywhere
285            {
286                continue;
287            }
288
289            // Only advance the last slice if we are not going to try to expand the slice.
290            last_slice = slice.start..*break_index;
291
292            // Push the non-whitespace part of the range.
293            if !slice.is_empty() {
294                self.shape_and_push_range(&slice, formatting_context_text, &options);
295            }
296
297            if whitespace.is_empty() {
298                continue;
299            }
300
301            options.flags.insert(
302                ShapingFlags::IS_WHITESPACE_SHAPING_FLAG |
303                    ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG,
304            );
305
306            // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity
307            // between each white space character in the white space that we trimmed off.
308            if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces {
309                let start_index = whitespace.start;
310                for (index, character) in formatting_context_text[whitespace].char_indices() {
311                    let index = start_index + index;
312                    self.shape_and_push_range(
313                        &(index..index + character.len_utf8()),
314                        formatting_context_text,
315                        &options,
316                    );
317                }
318                continue;
319            }
320
321            // The breaker breaks after every newline, so either there is none,
322            // or there is exactly one at the very end. In the latter case,
323            // split it into a different run. That's because shaping considers
324            // a newline to have the same advance as a space, but during layout
325            // we want to treat the newline as having no advance.
326            if ends_with_newline && whitespace.len() > 1 {
327                self.shape_and_push_range(
328                    &(whitespace.start..whitespace.end - 1),
329                    formatting_context_text,
330                    &options,
331                );
332                self.shape_and_push_range(
333                    &(whitespace.end - 1..whitespace.end),
334                    formatting_context_text,
335                    &options,
336                );
337            } else {
338                self.shape_and_push_range(&whitespace, formatting_context_text, &options);
339            }
340        }
341    }
342}
343
344/// A single [`TextRun`] for the box tree. These are all descendants of
345/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`].  During
346/// box tree construction, text is split into [`TextRun`]s based on their font, script,
347/// etc. When these are created text is already shaped.
348///
349/// <https://www.w3.org/TR/css-display-3/#css-text-run>
350#[derive(Debug, MallocSizeOf)]
351pub(crate) struct TextRun {
352    /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the
353    /// original text node in the DOM for the text.
354    pub base_fragment_info: BaseFragmentInfo,
355
356    /// A weak reference to the parent of this layout box. This becomes valid as soon
357    /// as the *parent* of this box is added to the tree.
358    pub parent_box: Option<WeakLayoutBox>,
359
360    /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is
361    /// shared so that incremental layout can simply update the parent element and
362    /// this [`TextRun`] will be updated automatically.
363    pub inline_styles: SharedInlineStyles,
364
365    /// The range of text in [`super::InlineFormattingContext::text_content`] of the
366    /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets.
367    pub text_range: Range<usize>,
368
369    /// The range of characters in this text in [`super::InlineFormattingContext::text_content`]
370    /// of the [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are *not*
371    /// UTF-8 offsets.
372    pub character_range: Range<usize>,
373
374    /// The text of this [`TextRun`] with a font selected, broken into unbreakable
375    /// segments, and shaped.
376    pub shaped_text: Vec<TextRunSegment>,
377}
378
379impl TextRun {
380    pub(crate) fn new(
381        base_fragment_info: BaseFragmentInfo,
382        inline_styles: SharedInlineStyles,
383        text_range: Range<usize>,
384        character_range: Range<usize>,
385    ) -> Self {
386        Self {
387            base_fragment_info,
388            parent_box: None,
389            inline_styles,
390            text_range,
391            character_range,
392            shaped_text: Vec::new(),
393        }
394    }
395
396    pub(super) fn segment_and_shape(
397        &mut self,
398        formatting_context_text: &str,
399        layout_context: &LayoutContext,
400        linebreaker: &mut LineBreaker,
401        bidi_info: &BidiInfo,
402    ) {
403        let parent_style = self.inline_styles.style.borrow().clone();
404        let inherited_text_style = parent_style.get_inherited_text().clone();
405        let letter_spacing = inherited_text_style
406            .letter_spacing
407            .0
408            .resolve(parent_style.clone_font().font_size.computed_size());
409        let letter_spacing = if letter_spacing.px() != 0. {
410            Some(app_units::Au::from(letter_spacing))
411        } else {
412            None
413        };
414        let language = parent_style
415            .get_font()
416            ._x_lang
417            .0
418            .parse()
419            .unwrap_or(Language::UND);
420
421        let mut flags = ShapingFlags::empty();
422        if inherited_text_style.text_rendering == TextRendering::Optimizespeed {
423            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
424            flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
425        }
426
427        let specified_word_spacing = &inherited_text_style.word_spacing;
428        let style_word_spacing: Option<Au> = specified_word_spacing.to_length().map(|l| l.into());
429
430        let segments = self
431            .segment_text_by_font(
432                layout_context,
433                formatting_context_text,
434                bidi_info,
435                &parent_style,
436            )
437            .into_iter()
438            .map(|mut segment| {
439                let word_spacing = style_word_spacing.unwrap_or_else(|| {
440                    let space_width = segment
441                        .font
442                        .glyph_index(' ')
443                        .map(|glyph_id| segment.font.glyph_h_advance(glyph_id))
444                        .unwrap_or(LAST_RESORT_GLYPH_ADVANCE);
445                    specified_word_spacing.to_used_value(Au::from_f64_px(space_width))
446                });
447
448                let mut flags = flags;
449                if segment.bidi_level.is_rtl() {
450                    flags.insert(ShapingFlags::RTL_FLAG);
451                }
452
453                // From https://www.w3.org/TR/css-text-3/#cursive-script:
454                // Cursive scripts do not admit gaps between their letters for either
455                // justification or letter-spacing.
456                let letter_spacing = if is_cursive_script(segment.script) {
457                    None
458                } else {
459                    letter_spacing
460                };
461                if letter_spacing.is_some() {
462                    flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
463                };
464
465                let shaping_options = ShapingOptions {
466                    letter_spacing,
467                    word_spacing,
468                    script: segment.script,
469                    language,
470                    flags,
471                };
472
473                segment.shape_text(
474                    &parent_style,
475                    formatting_context_text,
476                    linebreaker,
477                    &shaping_options,
478                );
479
480                segment
481            })
482            .collect();
483
484        let _ = std::mem::replace(&mut self.shaped_text, segments);
485    }
486
487    /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
488    /// font and script. Fonts may differ when glyphs are found in fallback fonts.
489    /// [`super::InlineFormattingContext`].
490    fn segment_text_by_font(
491        &mut self,
492        layout_context: &LayoutContext,
493        formatting_context_text: &str,
494        bidi_info: &BidiInfo,
495        parent_style: &ServoArc<ComputedValues>,
496    ) -> Vec<TextRunSegment> {
497        let font_group = layout_context
498            .font_context
499            .font_group(parent_style.clone_font());
500        let mut current: Option<TextRunSegment> = None;
501        let mut results = Vec::new();
502
503        let lang = parent_style.get_font()._x_lang.clone();
504        let text_run_text = &formatting_context_text[self.text_range.clone()];
505        let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
506
507        // The next current character index within the entire inline formatting context's text.
508        let mut next_character_index = self.character_range.start;
509        // The next bytes index of the charcter within the entire inline formatting context's text.
510        let mut next_byte_index = self.text_range.start;
511
512        for (character, next_character) in char_iterator {
513            let current_character_index = next_character_index;
514            next_character_index += 1;
515
516            let current_byte_index = next_byte_index;
517            next_byte_index += character.len_utf8();
518
519            if char_does_not_change_font(character) {
520                continue;
521            }
522
523            // If the script and BiDi level do not change, use the current font as the first fallback. This
524            // can potentially speed up fallback on long font lists or with uncommon scripts which might be
525            // at the bottom of the list.
526            let script = Script::from(character);
527            let bidi_level = bidi_info.levels[current_byte_index];
528
529            let Some(font) = font_group.find_by_codepoint(
530                &layout_context.font_context,
531                character,
532                next_character,
533                lang.clone(),
534            ) else {
535                continue;
536            };
537
538            // If the existing segment is compatible with the character, keep going.
539            if let Some(current) = current.as_mut() {
540                if current.update_if_compatible(layout_context, &font, script, bidi_level) {
541                    continue;
542                }
543            }
544
545            // Add the new segment and finish the existing one, if we had one. If the first
546            // characters in the run were control characters we may be creating the first
547            // segment in the middle of the run (ie the start should be the start of this
548            // text run's text).
549            let (start_byte_index, start_character_index) = match current {
550                Some(_) => (current_byte_index, current_character_index),
551                None => (self.text_range.start, self.character_range.start),
552            };
553            let new = TextRunSegment::new(
554                font,
555                script,
556                bidi_level,
557                start_byte_index,
558                start_character_index,
559            );
560            if let Some(mut finished) = current.replace(new) {
561                // The end of the previous segment is the start of the next one.
562                finished.range.end = current_byte_index;
563                finished.character_range.end = current_character_index;
564                results.push(finished);
565            }
566        }
567
568        // Either we have a current segment or we only had control characters and whitespace. In both
569        // of those cases, just use the first font.
570        if current.is_none() {
571            current = font_group.first(&layout_context.font_context).map(|font| {
572                TextRunSegment::new(
573                    font,
574                    Script::Common,
575                    Level::ltr(),
576                    self.text_range.start,
577                    self.character_range.start,
578                )
579            })
580        }
581
582        // Extend the last segment to the end of the string and add it to the results.
583        if let Some(mut last_segment) = current.take() {
584            last_segment.range.end = self.text_range.end;
585            last_segment.character_range.end = self.character_range.end;
586            results.push(last_segment);
587        }
588
589        results
590    }
591
592    pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) {
593        if self.text_range.is_empty() {
594            return;
595        }
596
597        // If we are following replaced content, we should have a soft wrap opportunity, unless the
598        // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a
599        // character it should also override the LineBreaker's indication to break at the start.
600        let have_deferred_soft_wrap_opportunity =
601            mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false);
602        let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity {
603            true => SegmentStartSoftWrapPolicy::Force,
604            false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
605        };
606
607        for segment in self.shaped_text.iter() {
608            segment.layout_into_line_items(self, soft_wrap_policy, ifc);
609            soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
610        }
611    }
612}
613
614/// From <https://www.w3.org/TR/css-text-3/#cursive-script>:
615/// Cursive scripts do not admit gaps between their letters for either justification
616/// or letter-spacing. The following Unicode scripts are included: Arabic, Hanifi
617/// Rohingya, Mandaic, Mongolian, N’Ko, Phags Pa, Syriac
618fn is_cursive_script(script: Script) -> bool {
619    matches!(
620        script,
621        Script::Arabic |
622            Script::Hanifi_Rohingya |
623            Script::Mandaic |
624            Script::Mongolian |
625            Script::Nko |
626            Script::Phags_Pa |
627            Script::Syriac
628    )
629}
630
631/// Whether or not this character should be able to change the font during segmentation.  Certain
632/// character are not rendered at all, so it doesn't matter what font we use to render them. They
633/// should just be added to the current segment.
634fn char_does_not_change_font(character: char) -> bool {
635    if character.is_control() {
636        return true;
637    }
638    if character == '\u{00A0}' {
639        return true;
640    }
641    if is_bidi_control(character) {
642        return false;
643    }
644
645    let class = linebreak_property(character);
646    class == XI_LINE_BREAKING_CLASS_CM ||
647        class == XI_LINE_BREAKING_CLASS_GL ||
648        class == XI_LINE_BREAKING_CLASS_ZW ||
649        class == XI_LINE_BREAKING_CLASS_WJ ||
650        class == XI_LINE_BREAKING_CLASS_ZWJ
651}
652
653pub(super) fn get_font_for_first_font_for_style(
654    style: &ComputedValues,
655    font_context: &FontContext,
656) -> Option<FontRef> {
657    let font = font_context
658        .font_group(style.clone_font())
659        .first(font_context);
660    if font.is_none() {
661        warn!("Could not find font for style: {:?}", style.clone_font());
662    }
663    font
664}
665pub(crate) struct TwoCharsAtATimeIterator<InputIterator> {
666    /// The input character iterator.
667    iterator: InputIterator,
668    /// The first character to produce in the next run of the iterator.
669    next_character: Option<char>,
670}
671
672impl<InputIterator> TwoCharsAtATimeIterator<InputIterator> {
673    fn new(iterator: InputIterator) -> Self {
674        Self {
675            iterator,
676            next_character: None,
677        }
678    }
679}
680
681impl<InputIterator> Iterator for TwoCharsAtATimeIterator<InputIterator>
682where
683    InputIterator: Iterator<Item = char>,
684{
685    type Item = (char, Option<char>);
686
687    fn next(&mut self) -> Option<Self::Item> {
688        // If the iterator isn't initialized do that now.
689        if self.next_character.is_none() {
690            self.next_character = self.iterator.next();
691        }
692        let character = self.next_character?;
693        self.next_character = self.iterator.next();
694        Some((character, self.next_character))
695    }
696}