layout/flow/inline/
text_run.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::mem;
6use std::ops::Range;
7use std::sync::Arc;
8
9use app_units::Au;
10use base::text::is_bidi_control;
11use fonts::{
12    FontContext, FontRef, GlyphStore, LAST_RESORT_GLYPH_ADVANCE, ShapingFlags, ShapingOptions,
13};
14use log::warn;
15use malloc_size_of_derive::MallocSizeOf;
16use servo_arc::Arc as ServoArc;
17use style::computed_values::text_rendering::T as TextRendering;
18use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
19use style::computed_values::word_break::T as WordBreak;
20use style::properties::ComputedValues;
21use style::str::char_is_whitespace;
22use style::values::computed::OverflowWrap;
23use unicode_bidi::{BidiInfo, Level};
24use unicode_script::Script;
25use xi_unicode::linebreak_property;
26
27use super::line_breaker::LineBreaker;
28use super::{InlineFormattingContextLayout, SharedInlineStyles};
29use crate::context::LayoutContext;
30use crate::dom::WeakLayoutBox;
31use crate::flow::inline::line::TextRunOffsets;
32use crate::fragment_tree::BaseFragmentInfo;
33
34// These constants are the xi-unicode line breaking classes that are defined in
35// `table.rs`. Unfortunately, they are only identified by number.
36pub(crate) const XI_LINE_BREAKING_CLASS_CM: u8 = 9;
37pub(crate) const XI_LINE_BREAKING_CLASS_GL: u8 = 12;
38pub(crate) const XI_LINE_BREAKING_CLASS_ZW: u8 = 28;
39pub(crate) const XI_LINE_BREAKING_CLASS_WJ: u8 = 30;
40pub(crate) const XI_LINE_BREAKING_CLASS_ZWJ: u8 = 42;
41
42// There are two reasons why we might want to break at the start:
43//
44//  1. The line breaker told us that a break was necessary between two separate
45//     instances of sending text to it.
46//  2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`.
47//
48// In both cases, we don't want to do this if the first character prevents a
49// soft wrap opportunity.
50#[derive(PartialEq)]
51enum SegmentStartSoftWrapPolicy {
52    Force,
53    FollowLinebreaker,
54}
55
56#[derive(Debug, MallocSizeOf)]
57pub(crate) struct TextRunSegment {
58    /// The index of this font in the parent [`super::InlineFormattingContext`]'s collection of font
59    /// information.
60    pub font: FontRef,
61
62    /// The [`Script`] of this segment.
63    pub script: Script,
64
65    /// The bidi Level of this segment.
66    pub bidi_level: Level,
67
68    /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
69    pub range: Range<usize>,
70
71    /// The range of characters in the parent [`super::InlineFormattingContext`]'s text content.
72    pub character_range: Range<usize>,
73
74    /// Whether or not the linebreaker said that we should allow a line break at the start of this
75    /// segment.
76    pub break_at_start: bool,
77
78    /// The shaped runs within this segment.
79    #[conditional_malloc_size_of]
80    pub runs: Vec<Arc<GlyphStore>>,
81}
82
83impl TextRunSegment {
84    fn new(
85        font: FontRef,
86        script: Script,
87        bidi_level: Level,
88        start_offset: usize,
89        start_character_offset: usize,
90    ) -> Self {
91        Self {
92            font,
93            script,
94            bidi_level,
95            range: start_offset..start_offset,
96            character_range: start_character_offset..start_character_offset,
97            runs: Vec::new(),
98            break_at_start: false,
99        }
100    }
101
102    /// Update this segment if the Font and Script are compatible. The update will only
103    /// ever make the Script specific. Returns true if the new Font and Script are
104    /// compatible with this segment or false otherwise.
105    fn update_if_compatible(
106        &mut self,
107        layout_context: &LayoutContext,
108        new_font: &FontRef,
109        script: Script,
110        bidi_level: Level,
111    ) -> bool {
112        fn is_specific(script: Script) -> bool {
113            script != Script::Common && script != Script::Inherited
114        }
115
116        if bidi_level != self.bidi_level {
117            return false;
118        }
119
120        let painter_id = layout_context.painter_id;
121        let font_context = &layout_context.font_context;
122        if new_font.key(painter_id, font_context) !=
123            self.font
124                .key(layout_context.painter_id, &layout_context.font_context) ||
125            new_font.descriptor.pt_size != self.font.descriptor.pt_size
126        {
127            return false;
128        }
129
130        if !is_specific(self.script) && is_specific(script) {
131            self.script = script;
132        }
133        script == self.script || !is_specific(script)
134    }
135
136    fn layout_into_line_items(
137        &self,
138        text_run: &TextRun,
139        mut soft_wrap_policy: SegmentStartSoftWrapPolicy,
140        ifc: &mut InlineFormattingContextLayout,
141    ) {
142        if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker
143        {
144            soft_wrap_policy = SegmentStartSoftWrapPolicy::Force;
145        }
146
147        let mut character_range_start = self.character_range.start;
148        for (run_index, run) in self.runs.iter().enumerate() {
149            ifc.possibly_flush_deferred_forced_line_break();
150
151            let new_character_range_end = character_range_start + run.character_count();
152            let offsets = ifc
153                .ifc
154                .shared_selection
155                .clone()
156                .map(|shared_selection| TextRunOffsets {
157                    shared_selection,
158                    character_range: character_range_start..new_character_range_end,
159                });
160
161            // If this whitespace forces a line break, queue up a hard line break the next time we
162            // see any content. We don't line break immediately, because we'd like to finish processing
163            // any ongoing inline boxes before ending the line.
164            if run.is_single_preserved_newline() {
165                ifc.possibly_push_empty_text_run_to_unbreakable_segment(
166                    text_run,
167                    &self.font,
168                    self.bidi_level,
169                    offsets,
170                );
171                character_range_start = new_character_range_end;
172                ifc.defer_forced_line_break();
173                continue;
174            }
175
176            // Break before each unbreakable run in this TextRun, except the first unless the
177            // linebreaker was set to break before the first run.
178            if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
179                ifc.process_soft_wrap_opportunity();
180            }
181
182            ifc.push_glyph_store_to_unbreakable_segment(
183                run.clone(),
184                text_run,
185                &self.font,
186                self.bidi_level,
187                offsets,
188            );
189            character_range_start = new_character_range_end;
190        }
191    }
192
193    fn shape_and_push_range(
194        &mut self,
195        range: &Range<usize>,
196        formatting_context_text: &str,
197        options: &ShapingOptions,
198    ) {
199        self.runs.push(
200            self.font
201                .shape_text(&formatting_context_text[range.clone()], options),
202        );
203    }
204
205    /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing
206    /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered,
207    /// based on the style of the parent inline box.
208    fn shape_text(
209        &mut self,
210        parent_style: &ComputedValues,
211        formatting_context_text: &str,
212        linebreaker: &mut LineBreaker,
213        shaping_options: &ShapingOptions,
214    ) {
215        // Gather the linebreaks that apply to this segment from the inline formatting context's collection
216        // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
217        // piece of text is processed.
218        let range = self.range.clone();
219        let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
220        let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
221
222        self.runs.clear();
223        self.runs.reserve(linebreaks.len());
224        self.break_at_start = false;
225
226        let text_style = parent_style.get_inherited_text().clone();
227        let can_break_anywhere = text_style.word_break == WordBreak::BreakAll ||
228            text_style.overflow_wrap == OverflowWrap::Anywhere ||
229            text_style.overflow_wrap == OverflowWrap::BreakWord;
230
231        let mut last_slice = self.range.start..self.range.start;
232        for break_index in linebreak_iter {
233            if *break_index == self.range.start {
234                self.break_at_start = true;
235                continue;
236            }
237
238            let mut options = *shaping_options;
239
240            // Extend the slice to the next UAX#14 line break opportunity.
241            let mut slice = last_slice.end..*break_index;
242            let word = &formatting_context_text[slice.clone()];
243
244            // Split off any trailing whitespace into a separate glyph run.
245            let mut whitespace = slice.end..slice.end;
246            let mut rev_char_indices = word.char_indices().rev().peekable();
247
248            let mut ends_with_whitespace = false;
249            let ends_with_newline = rev_char_indices
250                .peek()
251                .is_some_and(|&(_, character)| character == '\n');
252            if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices
253                .take_while(|&(_, character)| char_is_whitespace(character))
254                .last()
255            {
256                ends_with_whitespace = true;
257                whitespace.start = slice.start + first_white_space_index;
258
259                // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there
260                // is a line break opportunity *after* every preserved space, but not before. This means
261                // that we should not split off the first whitespace, unless that white-space is a preserved
262                // newline.
263                //
264                // An exception to this is if the style tells us that we can break in the middle of words.
265                if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces &&
266                    first_white_space_character != '\n' &&
267                    !can_break_anywhere
268                {
269                    whitespace.start += first_white_space_character.len_utf8();
270                    options
271                        .flags
272                        .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG);
273                }
274
275                slice.end = whitespace.start;
276            }
277
278            // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
279            // TODO: This should only happen for CJK text.
280            if !ends_with_whitespace &&
281                *break_index != self.range.end &&
282                text_style.word_break == WordBreak::KeepAll &&
283                !can_break_anywhere
284            {
285                continue;
286            }
287
288            // Only advance the last slice if we are not going to try to expand the slice.
289            last_slice = slice.start..*break_index;
290
291            // Push the non-whitespace part of the range.
292            if !slice.is_empty() {
293                self.shape_and_push_range(&slice, formatting_context_text, &options);
294            }
295
296            if whitespace.is_empty() {
297                continue;
298            }
299
300            options.flags.insert(
301                ShapingFlags::IS_WHITESPACE_SHAPING_FLAG |
302                    ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG,
303            );
304
305            // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity
306            // between each white space character in the white space that we trimmed off.
307            if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces {
308                let start_index = whitespace.start;
309                for (index, character) in formatting_context_text[whitespace].char_indices() {
310                    let index = start_index + index;
311                    self.shape_and_push_range(
312                        &(index..index + character.len_utf8()),
313                        formatting_context_text,
314                        &options,
315                    );
316                }
317                continue;
318            }
319
320            // The breaker breaks after every newline, so either there is none,
321            // or there is exactly one at the very end. In the latter case,
322            // split it into a different run. That's because shaping considers
323            // a newline to have the same advance as a space, but during layout
324            // we want to treat the newline as having no advance.
325            if ends_with_newline && whitespace.len() > 1 {
326                self.shape_and_push_range(
327                    &(whitespace.start..whitespace.end - 1),
328                    formatting_context_text,
329                    &options,
330                );
331                self.shape_and_push_range(
332                    &(whitespace.end - 1..whitespace.end),
333                    formatting_context_text,
334                    &options,
335                );
336            } else {
337                self.shape_and_push_range(&whitespace, formatting_context_text, &options);
338            }
339        }
340    }
341}
342
343/// A single [`TextRun`] for the box tree. These are all descendants of
344/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`].  During
345/// box tree construction, text is split into [`TextRun`]s based on their font, script,
346/// etc. When these are created text is already shaped.
347///
348/// <https://www.w3.org/TR/css-display-3/#css-text-run>
349#[derive(Debug, MallocSizeOf)]
350pub(crate) struct TextRun {
351    /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the
352    /// original text node in the DOM for the text.
353    pub base_fragment_info: BaseFragmentInfo,
354
355    /// A weak reference to the parent of this layout box. This becomes valid as soon
356    /// as the *parent* of this box is added to the tree.
357    pub parent_box: Option<WeakLayoutBox>,
358
359    /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is
360    /// shared so that incremental layout can simply update the parent element and
361    /// this [`TextRun`] will be updated automatically.
362    pub inline_styles: SharedInlineStyles,
363
364    /// The range of text in [`super::InlineFormattingContext::text_content`] of the
365    /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets.
366    pub text_range: Range<usize>,
367
368    /// The range of characters in this text in [`super::InlineFormattingContext::text_content`]
369    /// of the [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are *not*
370    /// UTF-8 offsets.
371    pub character_range: Range<usize>,
372
373    /// The text of this [`TextRun`] with a font selected, broken into unbreakable
374    /// segments, and shaped.
375    pub shaped_text: Vec<TextRunSegment>,
376}
377
378impl TextRun {
379    pub(crate) fn new(
380        base_fragment_info: BaseFragmentInfo,
381        inline_styles: SharedInlineStyles,
382        text_range: Range<usize>,
383        character_range: Range<usize>,
384    ) -> Self {
385        Self {
386            base_fragment_info,
387            parent_box: None,
388            inline_styles,
389            text_range,
390            character_range,
391            shaped_text: Vec::new(),
392        }
393    }
394
395    pub(super) fn segment_and_shape(
396        &mut self,
397        formatting_context_text: &str,
398        layout_context: &LayoutContext,
399        linebreaker: &mut LineBreaker,
400        bidi_info: &BidiInfo,
401    ) {
402        let parent_style = self.inline_styles.style.borrow().clone();
403        let inherited_text_style = parent_style.get_inherited_text().clone();
404        let letter_spacing = inherited_text_style
405            .letter_spacing
406            .0
407            .resolve(parent_style.clone_font().font_size.computed_size());
408        let letter_spacing = if letter_spacing.px() != 0. {
409            Some(app_units::Au::from(letter_spacing))
410        } else {
411            None
412        };
413
414        let mut flags = ShapingFlags::empty();
415        if inherited_text_style.text_rendering == TextRendering::Optimizespeed {
416            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
417            flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
418        }
419
420        let specified_word_spacing = &inherited_text_style.word_spacing;
421        let style_word_spacing: Option<Au> = specified_word_spacing.to_length().map(|l| l.into());
422
423        let segments = self
424            .segment_text_by_font(
425                layout_context,
426                formatting_context_text,
427                bidi_info,
428                &parent_style,
429            )
430            .into_iter()
431            .map(|mut segment| {
432                let word_spacing = style_word_spacing.unwrap_or_else(|| {
433                    let space_width = segment
434                        .font
435                        .glyph_index(' ')
436                        .map(|glyph_id| segment.font.glyph_h_advance(glyph_id))
437                        .unwrap_or(LAST_RESORT_GLYPH_ADVANCE);
438                    specified_word_spacing.to_used_value(Au::from_f64_px(space_width))
439                });
440
441                let mut flags = flags;
442                if segment.bidi_level.is_rtl() {
443                    flags.insert(ShapingFlags::RTL_FLAG);
444                }
445
446                // From https://www.w3.org/TR/css-text-3/#cursive-script:
447                // Cursive scripts do not admit gaps between their letters for either
448                // justification or letter-spacing.
449                let letter_spacing = if is_cursive_script(segment.script) {
450                    None
451                } else {
452                    letter_spacing
453                };
454                if letter_spacing.is_some() {
455                    flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
456                };
457
458                let shaping_options = ShapingOptions {
459                    letter_spacing,
460                    word_spacing,
461                    script: segment.script,
462                    flags,
463                };
464
465                segment.shape_text(
466                    &parent_style,
467                    formatting_context_text,
468                    linebreaker,
469                    &shaping_options,
470                );
471
472                segment
473            })
474            .collect();
475
476        let _ = std::mem::replace(&mut self.shaped_text, segments);
477    }
478
479    /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
480    /// font and script. Fonts may differ when glyphs are found in fallback fonts.
481    /// [`super::InlineFormattingContext`].
482    fn segment_text_by_font(
483        &mut self,
484        layout_context: &LayoutContext,
485        formatting_context_text: &str,
486        bidi_info: &BidiInfo,
487        parent_style: &ServoArc<ComputedValues>,
488    ) -> Vec<TextRunSegment> {
489        let font_group = layout_context
490            .font_context
491            .font_group(parent_style.clone_font());
492        let mut current: Option<TextRunSegment> = None;
493        let mut results = Vec::new();
494
495        let lang = parent_style.get_font()._x_lang.clone();
496        let text_run_text = &formatting_context_text[self.text_range.clone()];
497        let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
498
499        // The next current character index within the entire inline formatting context's text.
500        let mut next_character_index = self.character_range.start;
501        // The next bytes index of the charcter within the entire inline formatting context's text.
502        let mut next_byte_index = self.text_range.start;
503
504        for (character, next_character) in char_iterator {
505            let current_character_index = next_character_index;
506            next_character_index += 1;
507
508            let current_byte_index = next_byte_index;
509            next_byte_index += character.len_utf8();
510
511            if char_does_not_change_font(character) {
512                continue;
513            }
514
515            // If the script and BiDi level do not change, use the current font as the first fallback. This
516            // can potentially speed up fallback on long font lists or with uncommon scripts which might be
517            // at the bottom of the list.
518            let script = Script::from(character);
519            let bidi_level = bidi_info.levels[current_byte_index];
520
521            let Some(font) = font_group.find_by_codepoint(
522                &layout_context.font_context,
523                character,
524                next_character,
525                lang.clone(),
526            ) else {
527                continue;
528            };
529
530            // If the existing segment is compatible with the character, keep going.
531            if let Some(current) = current.as_mut() {
532                if current.update_if_compatible(layout_context, &font, script, bidi_level) {
533                    continue;
534                }
535            }
536
537            // Add the new segment and finish the existing one, if we had one. If the first
538            // characters in the run were control characters we may be creating the first
539            // segment in the middle of the run (ie the start should be the start of this
540            // text run's text).
541            let (start_byte_index, start_character_index) = match current {
542                Some(_) => (current_byte_index, current_character_index),
543                None => (self.text_range.start, self.character_range.start),
544            };
545            let new = TextRunSegment::new(
546                font,
547                script,
548                bidi_level,
549                start_byte_index,
550                start_character_index,
551            );
552            if let Some(mut finished) = current.replace(new) {
553                // The end of the previous segment is the start of the next one.
554                finished.range.end = current_byte_index;
555                finished.character_range.end = current_character_index;
556                results.push(finished);
557            }
558        }
559
560        // Either we have a current segment or we only had control characters and whitespace. In both
561        // of those cases, just use the first font.
562        if current.is_none() {
563            current = font_group.first(&layout_context.font_context).map(|font| {
564                TextRunSegment::new(
565                    font,
566                    Script::Common,
567                    Level::ltr(),
568                    self.text_range.start,
569                    self.character_range.start,
570                )
571            })
572        }
573
574        // Extend the last segment to the end of the string and add it to the results.
575        if let Some(mut last_segment) = current.take() {
576            last_segment.range.end = self.text_range.end;
577            last_segment.character_range.end = self.character_range.end;
578            results.push(last_segment);
579        }
580
581        results
582    }
583
584    pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) {
585        if self.text_range.is_empty() {
586            return;
587        }
588
589        // If we are following replaced content, we should have a soft wrap opportunity, unless the
590        // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a
591        // character it should also override the LineBreaker's indication to break at the start.
592        let have_deferred_soft_wrap_opportunity =
593            mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false);
594        let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity {
595            true => SegmentStartSoftWrapPolicy::Force,
596            false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
597        };
598
599        for segment in self.shaped_text.iter() {
600            segment.layout_into_line_items(self, soft_wrap_policy, ifc);
601            soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
602        }
603    }
604}
605
606/// From <https://www.w3.org/TR/css-text-3/#cursive-script>:
607/// Cursive scripts do not admit gaps between their letters for either justification
608/// or letter-spacing. The following Unicode scripts are included: Arabic, Hanifi
609/// Rohingya, Mandaic, Mongolian, N’Ko, Phags Pa, Syriac
610fn is_cursive_script(script: Script) -> bool {
611    matches!(
612        script,
613        Script::Arabic |
614            Script::Hanifi_Rohingya |
615            Script::Mandaic |
616            Script::Mongolian |
617            Script::Nko |
618            Script::Phags_Pa |
619            Script::Syriac
620    )
621}
622
623/// Whether or not this character should be able to change the font during segmentation.  Certain
624/// character are not rendered at all, so it doesn't matter what font we use to render them. They
625/// should just be added to the current segment.
626fn char_does_not_change_font(character: char) -> bool {
627    if character.is_control() {
628        return true;
629    }
630    if character == '\u{00A0}' {
631        return true;
632    }
633    if is_bidi_control(character) {
634        return false;
635    }
636
637    let class = linebreak_property(character);
638    class == XI_LINE_BREAKING_CLASS_CM ||
639        class == XI_LINE_BREAKING_CLASS_GL ||
640        class == XI_LINE_BREAKING_CLASS_ZW ||
641        class == XI_LINE_BREAKING_CLASS_WJ ||
642        class == XI_LINE_BREAKING_CLASS_ZWJ
643}
644
645pub(super) fn get_font_for_first_font_for_style(
646    style: &ComputedValues,
647    font_context: &FontContext,
648) -> Option<FontRef> {
649    let font = font_context
650        .font_group(style.clone_font())
651        .first(font_context);
652    if font.is_none() {
653        warn!("Could not find font for style: {:?}", style.clone_font());
654    }
655    font
656}
657pub(crate) struct TwoCharsAtATimeIterator<InputIterator> {
658    /// The input character iterator.
659    iterator: InputIterator,
660    /// The first character to produce in the next run of the iterator.
661    next_character: Option<char>,
662}
663
664impl<InputIterator> TwoCharsAtATimeIterator<InputIterator> {
665    fn new(iterator: InputIterator) -> Self {
666        Self {
667            iterator,
668            next_character: None,
669        }
670    }
671}
672
673impl<InputIterator> Iterator for TwoCharsAtATimeIterator<InputIterator>
674where
675    InputIterator: Iterator<Item = char>,
676{
677    type Item = (char, Option<char>);
678
679    fn next(&mut self) -> Option<Self::Item> {
680        // If the iterator isn't initialized do that now.
681        if self.next_character.is_none() {
682            self.next_character = self.iterator.next();
683        }
684        let character = self.next_character?;
685        self.next_character = self.iterator.next();
686        Some((character, self.next_character))
687    }
688}