layout/flow/inline/
text_run.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::mem;
6use std::ops::Range;
7use std::sync::Arc;
8
9use app_units::Au;
10use base::text::is_bidi_control;
11use fonts::{
12    FontContext, FontRef, GlyphStore, LAST_RESORT_GLYPH_ADVANCE, ShapingFlags, ShapingOptions,
13};
14use log::warn;
15use malloc_size_of_derive::MallocSizeOf;
16use servo_arc::Arc as ServoArc;
17use style::computed_values::text_rendering::T as TextRendering;
18use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
19use style::computed_values::word_break::T as WordBreak;
20use style::properties::ComputedValues;
21use style::str::char_is_whitespace;
22use style::values::computed::OverflowWrap;
23use unicode_bidi::{BidiInfo, Level};
24use unicode_script::Script;
25use xi_unicode::linebreak_property;
26
27use super::line_breaker::LineBreaker;
28use super::{InlineFormattingContextLayout, SharedInlineStyles};
29use crate::context::LayoutContext;
30use crate::dom::WeakLayoutBox;
31use crate::flow::inline::line::TextRunOffsets;
32use crate::fragment_tree::BaseFragmentInfo;
33
34// These constants are the xi-unicode line breaking classes that are defined in
35// `table.rs`. Unfortunately, they are only identified by number.
36pub(crate) const XI_LINE_BREAKING_CLASS_CM: u8 = 9;
37pub(crate) const XI_LINE_BREAKING_CLASS_GL: u8 = 12;
38pub(crate) const XI_LINE_BREAKING_CLASS_ZW: u8 = 28;
39pub(crate) const XI_LINE_BREAKING_CLASS_WJ: u8 = 30;
40pub(crate) const XI_LINE_BREAKING_CLASS_ZWJ: u8 = 42;
41
42// There are two reasons why we might want to break at the start:
43//
44//  1. The line breaker told us that a break was necessary between two separate
45//     instances of sending text to it.
46//  2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`.
47//
48// In both cases, we don't want to do this if the first character prevents a
49// soft wrap opportunity.
50#[derive(PartialEq)]
51enum SegmentStartSoftWrapPolicy {
52    Force,
53    FollowLinebreaker,
54}
55
56#[derive(Debug, MallocSizeOf)]
57pub(crate) struct TextRunSegment {
58    /// The index of this font in the parent [`super::InlineFormattingContext`]'s collection of font
59    /// information.
60    pub font: FontRef,
61
62    /// The [`Script`] of this segment.
63    pub script: Script,
64
65    /// The bidi Level of this segment.
66    pub bidi_level: Level,
67
68    /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
69    pub range: Range<usize>,
70
71    /// The range of characters in the parent [`super::InlineFormattingContext`]'s text content.
72    pub character_range: Range<usize>,
73
74    /// Whether or not the linebreaker said that we should allow a line break at the start of this
75    /// segment.
76    pub break_at_start: bool,
77
78    /// The shaped runs within this segment.
79    #[conditional_malloc_size_of]
80    pub runs: Vec<Arc<GlyphStore>>,
81}
82
83impl TextRunSegment {
84    fn new(
85        font: FontRef,
86        script: Script,
87        bidi_level: Level,
88        start_offset: usize,
89        start_character_offset: usize,
90    ) -> Self {
91        Self {
92            font,
93            script,
94            bidi_level,
95            range: start_offset..start_offset,
96            character_range: start_character_offset..start_character_offset,
97            runs: Vec::new(),
98            break_at_start: false,
99        }
100    }
101
102    /// Update this segment if the Font and Script are compatible. The update will only
103    /// ever make the Script specific. Returns true if the new Font and Script are
104    /// compatible with this segment or false otherwise.
105    fn update_if_compatible(
106        &mut self,
107        layout_context: &LayoutContext,
108        new_font: &FontRef,
109        script: Script,
110        bidi_level: Level,
111    ) -> bool {
112        fn is_specific(script: Script) -> bool {
113            script != Script::Common && script != Script::Inherited
114        }
115
116        if bidi_level != self.bidi_level {
117            return false;
118        }
119
120        let painter_id = layout_context.painter_id;
121        let font_context = &layout_context.font_context;
122        if new_font.key(painter_id, font_context) !=
123            self.font
124                .key(layout_context.painter_id, &layout_context.font_context) ||
125            new_font.descriptor.pt_size != self.font.descriptor.pt_size
126        {
127            return false;
128        }
129
130        if !is_specific(self.script) && is_specific(script) {
131            self.script = script;
132        }
133        script == self.script || !is_specific(script)
134    }
135
136    fn layout_into_line_items(
137        &self,
138        text_run: &TextRun,
139        mut soft_wrap_policy: SegmentStartSoftWrapPolicy,
140        ifc: &mut InlineFormattingContextLayout,
141    ) {
142        if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker
143        {
144            soft_wrap_policy = SegmentStartSoftWrapPolicy::Force;
145        }
146
147        let mut character_range_start = self.character_range.start;
148        for (run_index, run) in self.runs.iter().enumerate() {
149            ifc.possibly_flush_deferred_forced_line_break();
150
151            // If this whitespace forces a line break, queue up a hard line break the next time we
152            // see any content. We don't line break immediately, because we'd like to finish processing
153            // any ongoing inline boxes before ending the line.
154            if run.is_single_preserved_newline() {
155                character_range_start += run.character_count();
156                ifc.defer_forced_line_break();
157                continue;
158            }
159            // Break before each unbreakable run in this TextRun, except the first unless the
160            // linebreaker was set to break before the first run.
161            if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
162                ifc.process_soft_wrap_opportunity();
163            }
164
165            let new_character_range_end = character_range_start + run.character_count();
166            let offsets = ifc
167                .ifc
168                .shared_selection
169                .clone()
170                .map(|shared_selection| TextRunOffsets {
171                    shared_selection,
172                    character_range: character_range_start..new_character_range_end,
173                });
174
175            ifc.push_glyph_store_to_unbreakable_segment(
176                run.clone(),
177                text_run,
178                &self.font,
179                self.bidi_level,
180                offsets,
181            );
182            character_range_start = new_character_range_end;
183        }
184    }
185
186    fn shape_and_push_range(
187        &mut self,
188        range: &Range<usize>,
189        formatting_context_text: &str,
190        options: &ShapingOptions,
191    ) {
192        self.runs.push(
193            self.font
194                .shape_text(&formatting_context_text[range.clone()], options),
195        );
196    }
197
198    /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing
199    /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered,
200    /// based on the style of the parent inline box.
201    fn shape_text(
202        &mut self,
203        parent_style: &ComputedValues,
204        formatting_context_text: &str,
205        linebreaker: &mut LineBreaker,
206        shaping_options: &ShapingOptions,
207    ) {
208        // Gather the linebreaks that apply to this segment from the inline formatting context's collection
209        // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
210        // piece of text is processed.
211        let range = self.range.clone();
212        let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
213        let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
214
215        self.runs.clear();
216        self.runs.reserve(linebreaks.len());
217        self.break_at_start = false;
218
219        let text_style = parent_style.get_inherited_text().clone();
220        let can_break_anywhere = text_style.word_break == WordBreak::BreakAll ||
221            text_style.overflow_wrap == OverflowWrap::Anywhere ||
222            text_style.overflow_wrap == OverflowWrap::BreakWord;
223
224        let mut last_slice = self.range.start..self.range.start;
225        for break_index in linebreak_iter {
226            if *break_index == self.range.start {
227                self.break_at_start = true;
228                continue;
229            }
230
231            let mut options = *shaping_options;
232
233            // Extend the slice to the next UAX#14 line break opportunity.
234            let mut slice = last_slice.end..*break_index;
235            let word = &formatting_context_text[slice.clone()];
236
237            // Split off any trailing whitespace into a separate glyph run.
238            let mut whitespace = slice.end..slice.end;
239            let mut rev_char_indices = word.char_indices().rev().peekable();
240
241            let mut ends_with_whitespace = false;
242            let ends_with_newline = rev_char_indices
243                .peek()
244                .is_some_and(|&(_, character)| character == '\n');
245            if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices
246                .take_while(|&(_, character)| char_is_whitespace(character))
247                .last()
248            {
249                ends_with_whitespace = true;
250                whitespace.start = slice.start + first_white_space_index;
251
252                // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there
253                // is a line break opportunity *after* every preserved space, but not before. This means
254                // that we should not split off the first whitespace, unless that white-space is a preserved
255                // newline.
256                //
257                // An exception to this is if the style tells us that we can break in the middle of words.
258                if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces &&
259                    first_white_space_character != '\n' &&
260                    !can_break_anywhere
261                {
262                    whitespace.start += first_white_space_character.len_utf8();
263                    options
264                        .flags
265                        .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG);
266                }
267
268                slice.end = whitespace.start;
269            }
270
271            // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
272            // TODO: This should only happen for CJK text.
273            if !ends_with_whitespace &&
274                *break_index != self.range.end &&
275                text_style.word_break == WordBreak::KeepAll &&
276                !can_break_anywhere
277            {
278                continue;
279            }
280
281            // Only advance the last slice if we are not going to try to expand the slice.
282            last_slice = slice.start..*break_index;
283
284            // Push the non-whitespace part of the range.
285            if !slice.is_empty() {
286                self.shape_and_push_range(&slice, formatting_context_text, &options);
287            }
288
289            if whitespace.is_empty() {
290                continue;
291            }
292
293            options.flags.insert(
294                ShapingFlags::IS_WHITESPACE_SHAPING_FLAG |
295                    ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG,
296            );
297
298            // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity
299            // between each white space character in the white space that we trimmed off.
300            if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces {
301                let start_index = whitespace.start;
302                for (index, character) in formatting_context_text[whitespace].char_indices() {
303                    let index = start_index + index;
304                    self.shape_and_push_range(
305                        &(index..index + character.len_utf8()),
306                        formatting_context_text,
307                        &options,
308                    );
309                }
310                continue;
311            }
312
313            // The breaker breaks after every newline, so either there is none,
314            // or there is exactly one at the very end. In the latter case,
315            // split it into a different run. That's because shaping considers
316            // a newline to have the same advance as a space, but during layout
317            // we want to treat the newline as having no advance.
318            if ends_with_newline && whitespace.len() > 1 {
319                self.shape_and_push_range(
320                    &(whitespace.start..whitespace.end - 1),
321                    formatting_context_text,
322                    &options,
323                );
324                self.shape_and_push_range(
325                    &(whitespace.end - 1..whitespace.end),
326                    formatting_context_text,
327                    &options,
328                );
329            } else {
330                self.shape_and_push_range(&whitespace, formatting_context_text, &options);
331            }
332        }
333    }
334}
335
336/// A single [`TextRun`] for the box tree. These are all descendants of
337/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`].  During
338/// box tree construction, text is split into [`TextRun`]s based on their font, script,
339/// etc. When these are created text is already shaped.
340///
341/// <https://www.w3.org/TR/css-display-3/#css-text-run>
342#[derive(Debug, MallocSizeOf)]
343pub(crate) struct TextRun {
344    /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the
345    /// original text node in the DOM for the text.
346    pub base_fragment_info: BaseFragmentInfo,
347
348    /// A weak reference to the parent of this layout box. This becomes valid as soon
349    /// as the *parent* of this box is added to the tree.
350    pub parent_box: Option<WeakLayoutBox>,
351
352    /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is
353    /// shared so that incremental layout can simply update the parent element and
354    /// this [`TextRun`] will be updated automatically.
355    pub inline_styles: SharedInlineStyles,
356
357    /// The range of text in [`super::InlineFormattingContext::text_content`] of the
358    /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets.
359    pub text_range: Range<usize>,
360
361    /// The range of characters in this text in [`super::InlineFormattingContext::text_content`]
362    /// of the [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are *not*
363    /// UTF-8 offsets.
364    pub character_range: Range<usize>,
365
366    /// The text of this [`TextRun`] with a font selected, broken into unbreakable
367    /// segments, and shaped.
368    pub shaped_text: Vec<TextRunSegment>,
369}
370
371impl TextRun {
372    pub(crate) fn new(
373        base_fragment_info: BaseFragmentInfo,
374        inline_styles: SharedInlineStyles,
375        text_range: Range<usize>,
376        character_range: Range<usize>,
377    ) -> Self {
378        Self {
379            base_fragment_info,
380            parent_box: None,
381            inline_styles,
382            text_range,
383            character_range,
384            shaped_text: Vec::new(),
385        }
386    }
387
388    pub(super) fn segment_and_shape(
389        &mut self,
390        formatting_context_text: &str,
391        layout_context: &LayoutContext,
392        linebreaker: &mut LineBreaker,
393        bidi_info: &BidiInfo,
394    ) {
395        let parent_style = self.inline_styles.style.borrow().clone();
396        let inherited_text_style = parent_style.get_inherited_text().clone();
397        let letter_spacing = inherited_text_style
398            .letter_spacing
399            .0
400            .resolve(parent_style.clone_font().font_size.computed_size());
401        let letter_spacing = if letter_spacing.px() != 0. {
402            Some(app_units::Au::from(letter_spacing))
403        } else {
404            None
405        };
406
407        let mut flags = ShapingFlags::empty();
408        if inherited_text_style.text_rendering == TextRendering::Optimizespeed {
409            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
410            flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
411        }
412
413        let specified_word_spacing = &inherited_text_style.word_spacing;
414        let style_word_spacing: Option<Au> = specified_word_spacing.to_length().map(|l| l.into());
415
416        let segments = self
417            .segment_text_by_font(
418                layout_context,
419                formatting_context_text,
420                bidi_info,
421                &parent_style,
422            )
423            .into_iter()
424            .map(|mut segment| {
425                let word_spacing = style_word_spacing.unwrap_or_else(|| {
426                    let space_width = segment
427                        .font
428                        .glyph_index(' ')
429                        .map(|glyph_id| segment.font.glyph_h_advance(glyph_id))
430                        .unwrap_or(LAST_RESORT_GLYPH_ADVANCE);
431                    specified_word_spacing.to_used_value(Au::from_f64_px(space_width))
432                });
433
434                let mut flags = flags;
435                if segment.bidi_level.is_rtl() {
436                    flags.insert(ShapingFlags::RTL_FLAG);
437                }
438
439                // From https://www.w3.org/TR/css-text-3/#cursive-script:
440                // Cursive scripts do not admit gaps between their letters for either
441                // justification or letter-spacing.
442                let letter_spacing = if is_cursive_script(segment.script) {
443                    None
444                } else {
445                    letter_spacing
446                };
447                if letter_spacing.is_some() {
448                    flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
449                };
450
451                let shaping_options = ShapingOptions {
452                    letter_spacing,
453                    word_spacing,
454                    script: segment.script,
455                    flags,
456                };
457
458                segment.shape_text(
459                    &parent_style,
460                    formatting_context_text,
461                    linebreaker,
462                    &shaping_options,
463                );
464
465                segment
466            })
467            .collect();
468
469        let _ = std::mem::replace(&mut self.shaped_text, segments);
470    }
471
472    /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
473    /// font and script. Fonts may differ when glyphs are found in fallback fonts.
474    /// [`super::InlineFormattingContext`].
475    fn segment_text_by_font(
476        &mut self,
477        layout_context: &LayoutContext,
478        formatting_context_text: &str,
479        bidi_info: &BidiInfo,
480        parent_style: &ServoArc<ComputedValues>,
481    ) -> Vec<TextRunSegment> {
482        let font_group = layout_context
483            .font_context
484            .font_group(parent_style.clone_font());
485        let mut current: Option<TextRunSegment> = None;
486        let mut results = Vec::new();
487
488        let text_run_text = &formatting_context_text[self.text_range.clone()];
489        let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
490        let mut next_byte_index = self.text_range.start;
491
492        // This represents the current character index as we iterate relative to the entire inline formatting
493        // context.
494        let mut current_character_index = self.character_range.start;
495        for (character, next_character) in char_iterator {
496            current_character_index += 1;
497
498            let current_byte_index = next_byte_index;
499            next_byte_index += character.len_utf8();
500
501            if char_does_not_change_font(character) {
502                continue;
503            }
504
505            // If the script and BiDi level do not change, use the current font as the first fallback. This
506            // can potentially speed up fallback on long font lists or with uncommon scripts which might be
507            // at the bottom of the list.
508            let script = Script::from(character);
509            let bidi_level = bidi_info.levels[current_byte_index];
510            let current_font = current.as_ref().and_then(|text_run_segment| {
511                if text_run_segment.bidi_level == bidi_level && text_run_segment.script == script {
512                    Some(text_run_segment.font.clone())
513                } else {
514                    None
515                }
516            });
517
518            let lang = parent_style.get_font()._x_lang.clone();
519
520            let Some(font) = font_group.find_by_codepoint(
521                &layout_context.font_context,
522                character,
523                next_character,
524                current_font,
525                Some(lang.0.as_ref().to_string()),
526            ) else {
527                continue;
528            };
529
530            // If the existing segment is compatible with the character, keep going.
531            if let Some(current) = current.as_mut() {
532                if current.update_if_compatible(layout_context, &font, script, bidi_level) {
533                    continue;
534                }
535            }
536
537            // Add the new segment and finish the existing one, if we had one. If the first
538            // characters in the run were control characters we may be creating the first
539            // segment in the middle of the run (ie the start should be the start of this
540            // text run's text).
541            let (start_byte_index, start_character_index) = match current {
542                Some(_) => (current_byte_index, current_character_index),
543                None => (self.text_range.start, self.character_range.start),
544            };
545            let new = TextRunSegment::new(
546                font,
547                script,
548                bidi_level,
549                start_byte_index,
550                start_character_index,
551            );
552            if let Some(mut finished) = current.replace(new) {
553                // The end of the previous segment is the start of the next one.
554                finished.range.end = current_byte_index;
555                finished.character_range.end = current_character_index;
556                results.push(finished);
557            }
558        }
559
560        // Either we have a current segment or we only had control characters and whitespace. In both
561        // of those cases, just use the first font.
562        if current.is_none() {
563            current = font_group.first(&layout_context.font_context).map(|font| {
564                TextRunSegment::new(
565                    font,
566                    Script::Common,
567                    Level::ltr(),
568                    self.text_range.start,
569                    self.character_range.start,
570                )
571            })
572        }
573
574        // Extend the last segment to the end of the string and add it to the results.
575        if let Some(mut last_segment) = current.take() {
576            last_segment.range.end = self.text_range.end;
577            last_segment.character_range.end = self.character_range.end;
578            results.push(last_segment);
579        }
580
581        results
582    }
583
584    pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) {
585        if self.text_range.is_empty() {
586            return;
587        }
588
589        // If we are following replaced content, we should have a soft wrap opportunity, unless the
590        // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a
591        // character it should also override the LineBreaker's indication to break at the start.
592        let have_deferred_soft_wrap_opportunity =
593            mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false);
594        let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity {
595            true => SegmentStartSoftWrapPolicy::Force,
596            false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
597        };
598
599        for segment in self.shaped_text.iter() {
600            segment.layout_into_line_items(self, soft_wrap_policy, ifc);
601            soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
602        }
603    }
604}
605
606/// From <https://www.w3.org/TR/css-text-3/#cursive-script>:
607/// Cursive scripts do not admit gaps between their letters for either justification
608/// or letter-spacing. The following Unicode scripts are included: Arabic, Hanifi
609/// Rohingya, Mandaic, Mongolian, N’Ko, Phags Pa, Syriac
610fn is_cursive_script(script: Script) -> bool {
611    matches!(
612        script,
613        Script::Arabic |
614            Script::Hanifi_Rohingya |
615            Script::Mandaic |
616            Script::Mongolian |
617            Script::Nko |
618            Script::Phags_Pa |
619            Script::Syriac
620    )
621}
622
623/// Whether or not this character should be able to change the font during segmentation.  Certain
624/// character are not rendered at all, so it doesn't matter what font we use to render them. They
625/// should just be added to the current segment.
626fn char_does_not_change_font(character: char) -> bool {
627    if character.is_control() {
628        return true;
629    }
630    if character == '\u{00A0}' {
631        return true;
632    }
633    if is_bidi_control(character) {
634        return false;
635    }
636
637    let class = linebreak_property(character);
638    class == XI_LINE_BREAKING_CLASS_CM ||
639        class == XI_LINE_BREAKING_CLASS_GL ||
640        class == XI_LINE_BREAKING_CLASS_ZW ||
641        class == XI_LINE_BREAKING_CLASS_WJ ||
642        class == XI_LINE_BREAKING_CLASS_ZWJ
643}
644
645pub(super) fn get_font_for_first_font_for_style(
646    style: &ComputedValues,
647    font_context: &FontContext,
648) -> Option<FontRef> {
649    let font = font_context
650        .font_group(style.clone_font())
651        .first(font_context);
652    if font.is_none() {
653        warn!("Could not find font for style: {:?}", style.clone_font());
654    }
655    font
656}
657pub(crate) struct TwoCharsAtATimeIterator<InputIterator> {
658    /// The input character iterator.
659    iterator: InputIterator,
660    /// The first character to produce in the next run of the iterator.
661    next_character: Option<char>,
662}
663
664impl<InputIterator> TwoCharsAtATimeIterator<InputIterator> {
665    fn new(iterator: InputIterator) -> Self {
666        Self {
667            iterator,
668            next_character: None,
669        }
670    }
671}
672
673impl<InputIterator> Iterator for TwoCharsAtATimeIterator<InputIterator>
674where
675    InputIterator: Iterator<Item = char>,
676{
677    type Item = (char, Option<char>);
678
679    fn next(&mut self) -> Option<Self::Item> {
680        // If the iterator isn't initialized do that now.
681        if self.next_character.is_none() {
682            self.next_character = self.iterator.next();
683        }
684        let character = self.next_character?;
685        self.next_character = self.iterator.next();
686        Some((character, self.next_character))
687    }
688}