layout/flow/inline/
text_run.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::mem;
6use std::ops::Range;
7use std::sync::Arc;
8
9use app_units::Au;
10use fonts::{FontContext, FontRef, GlyphStore, ShapingFlags, ShapingOptions};
11use icu_locid::subtags::Language;
12use icu_properties::{self, LineBreak};
13use log::warn;
14use malloc_size_of_derive::MallocSizeOf;
15use servo_arc::Arc as ServoArc;
16use servo_base::text::is_bidi_control;
17use style::Zero;
18use style::computed_values::text_rendering::T as TextRendering;
19use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
20use style::computed_values::word_break::T as WordBreak;
21use style::properties::ComputedValues;
22use style::str::char_is_whitespace;
23use style::values::computed::OverflowWrap;
24use unicode_bidi::{BidiInfo, Level};
25use unicode_script::Script;
26
27use super::line_breaker::LineBreaker;
28use super::{InlineFormattingContextLayout, SharedInlineStyles};
29use crate::context::LayoutContext;
30use crate::dom::WeakLayoutBox;
31use crate::flow::inline::line::TextRunOffsets;
32use crate::fragment_tree::BaseFragmentInfo;
33
34// There are two reasons why we might want to break at the start:
35//
36//  1. The line breaker told us that a break was necessary between two separate
37//     instances of sending text to it.
38//  2. We are following replaced content ie `have_deferred_soft_wrap_opportunity`.
39//
40// In both cases, we don't want to do this if the first character prevents a
41// soft wrap opportunity.
42#[derive(PartialEq)]
43enum SegmentStartSoftWrapPolicy {
44    Force,
45    FollowLinebreaker,
46}
47
48/// A data structure which contains information used when shaping a [`TextRunSegment`].
49#[derive(Clone, Debug, MallocSizeOf)]
50pub(crate) struct FontAndScriptInfo {
51    /// The font used when shaping a [`TextRunSegment`].
52    pub font: FontRef,
53    /// The script used when shaping a [`TextRunSegment`].
54    pub script: Script,
55    /// The BiDi [`Level`] used when shaping a [`TextRunSegment`].
56    pub bidi_level: Level,
57    /// The [`Language`] used when shaping a [`TextRunSegment`].
58    pub language: Language,
59    /// Spacing to add between each letter. Corresponds to the CSS 2.1 `letter-spacing` property.
60    /// NB: You will probably want to set the `IGNORE_LIGATURES_SHAPING_FLAG` if this is non-null.
61    ///
62    /// Letter spacing is not applied to all characters. Use [Self::letter_spacing_for_character] to
63    /// determine the amount of spacing to apply.
64    pub letter_spacing: Option<Au>,
65    /// Spacing to add between each word. Corresponds to the CSS 2.1 `word-spacing` property.
66    pub word_spacing: Option<Au>,
67    /// The [`TextRendering`] value from the original style.
68    pub text_rendering: TextRendering,
69}
70
71impl From<&FontAndScriptInfo> for ShapingOptions {
72    fn from(info: &FontAndScriptInfo) -> Self {
73        let mut flags = ShapingFlags::empty();
74        if info.bidi_level.is_rtl() {
75            flags.insert(ShapingFlags::RTL_FLAG);
76        }
77        if info.letter_spacing.is_some() {
78            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
79        };
80        if info.text_rendering == TextRendering::Optimizespeed {
81            flags.insert(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG);
82            flags.insert(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
83        }
84        Self {
85            letter_spacing: info.letter_spacing,
86            word_spacing: info.word_spacing,
87            script: info.script,
88            language: info.language,
89            flags,
90        }
91    }
92}
93
94#[derive(Debug, MallocSizeOf)]
95pub(crate) struct TextRunSegment {
96    /// Information about the font and language used in this text run. This is produced by
97    /// segmenting the inline formatting context's text content by font, script, and bidi level.
98    #[conditional_malloc_size_of]
99    pub info: Arc<FontAndScriptInfo>,
100
101    /// The range of bytes in the parent [`super::InlineFormattingContext`]'s text content.
102    pub range: Range<usize>,
103
104    /// The range of characters in the parent [`super::InlineFormattingContext`]'s text content.
105    pub character_range: Range<usize>,
106
107    /// Whether or not the linebreaker said that we should allow a line break at the start of this
108    /// segment.
109    pub break_at_start: bool,
110
111    /// The shaped runs within this segment.
112    #[conditional_malloc_size_of]
113    pub runs: Vec<Arc<GlyphStore>>,
114}
115
116impl TextRunSegment {
117    fn new(
118        info: Arc<FontAndScriptInfo>,
119        start_offset: usize,
120        start_character_offset: usize,
121    ) -> Self {
122        Self {
123            info,
124            range: start_offset..start_offset,
125            character_range: start_character_offset..start_character_offset,
126            runs: Vec::new(),
127            break_at_start: false,
128        }
129    }
130
131    /// Update this segment if the Font and Script are compatible. The update will only
132    /// ever make the Script specific. Returns true if the new Font and Script are
133    /// compatible with this segment or false otherwise.
134    fn update_if_compatible(
135        &mut self,
136        new_font: &FontRef,
137        new_script: Script,
138        new_bidi_level: Level,
139    ) -> bool {
140        if self.info.bidi_level != new_bidi_level || !Arc::ptr_eq(&self.info.font, new_font) {
141            return false;
142        }
143
144        fn is_specific(script: Script) -> bool {
145            script != Script::Common && script != Script::Inherited
146        }
147        if !is_specific(self.info.script) && is_specific(new_script) {
148            self.info = Arc::new(FontAndScriptInfo {
149                script: new_script,
150                ..(*self.info).clone()
151            });
152        }
153        new_script == self.info.script || !is_specific(new_script)
154    }
155
156    fn layout_into_line_items(
157        &self,
158        text_run: &TextRun,
159        mut soft_wrap_policy: SegmentStartSoftWrapPolicy,
160        ifc: &mut InlineFormattingContextLayout,
161    ) {
162        if self.break_at_start && soft_wrap_policy == SegmentStartSoftWrapPolicy::FollowLinebreaker
163        {
164            soft_wrap_policy = SegmentStartSoftWrapPolicy::Force;
165        }
166
167        let mut character_range_start = self.character_range.start;
168        for (run_index, run) in self.runs.iter().enumerate() {
169            ifc.possibly_flush_deferred_forced_line_break();
170
171            let new_character_range_end = character_range_start + run.character_count();
172            let offsets = ifc
173                .ifc
174                .shared_selection
175                .clone()
176                .map(|shared_selection| TextRunOffsets {
177                    shared_selection,
178                    character_range: character_range_start..new_character_range_end,
179                });
180
181            // If this whitespace forces a line break, queue up a hard line break the next time we
182            // see any content. We don't line break immediately, because we'd like to finish processing
183            // any ongoing inline boxes before ending the line.
184            if run.is_single_preserved_newline() {
185                ifc.possibly_push_empty_text_run_to_unbreakable_segment(
186                    text_run, &self.info, offsets,
187                );
188                character_range_start = new_character_range_end;
189                ifc.defer_forced_line_break();
190                continue;
191            }
192
193            // Break before each unbreakable run in this TextRun, except the first unless the
194            // linebreaker was set to break before the first run.
195            if run_index != 0 || soft_wrap_policy == SegmentStartSoftWrapPolicy::Force {
196                ifc.process_soft_wrap_opportunity();
197            }
198
199            ifc.push_glyph_store_to_unbreakable_segment(run.clone(), text_run, &self.info, offsets);
200            character_range_start = new_character_range_end;
201        }
202    }
203
204    fn shape_and_push_range(
205        &mut self,
206        range: &Range<usize>,
207        formatting_context_text: &str,
208        options: &ShapingOptions,
209    ) {
210        self.runs.push(
211            self.info
212                .font
213                .shape_text(&formatting_context_text[range.clone()], options),
214        );
215    }
216
217    /// Shape the text of this [`TextRunSegment`], first finding "words" for the shaper by processing
218    /// the linebreaks found in the owning [`super::InlineFormattingContext`]. Linebreaks are filtered,
219    /// based on the style of the parent inline box.
220    fn shape_text(
221        &mut self,
222        parent_style: &ComputedValues,
223        formatting_context_text: &str,
224        linebreaker: &mut LineBreaker,
225    ) {
226        let options: ShapingOptions = (&*self.info).into();
227
228        // Gather the linebreaks that apply to this segment from the inline formatting context's collection
229        // of line breaks. Also add a simulated break at the end of the segment in order to ensure the final
230        // piece of text is processed.
231        let range = self.range.clone();
232        let linebreaks = linebreaker.advance_to_linebreaks_in_range(self.range.clone());
233        let linebreak_iter = linebreaks.iter().chain(std::iter::once(&range.end));
234
235        self.runs.clear();
236        self.runs.reserve(linebreaks.len());
237        self.break_at_start = false;
238
239        let text_style = parent_style.get_inherited_text().clone();
240        let can_break_anywhere = text_style.word_break == WordBreak::BreakAll ||
241            text_style.overflow_wrap == OverflowWrap::Anywhere ||
242            text_style.overflow_wrap == OverflowWrap::BreakWord;
243
244        let mut last_slice = self.range.start..self.range.start;
245        for break_index in linebreak_iter {
246            let mut options = options;
247            if *break_index == self.range.start {
248                self.break_at_start = true;
249                continue;
250            }
251
252            // Extend the slice to the next UAX#14 line break opportunity.
253            let mut slice = last_slice.end..*break_index;
254            let word = &formatting_context_text[slice.clone()];
255
256            // Split off any trailing whitespace into a separate glyph run.
257            let mut whitespace = slice.end..slice.end;
258            let mut rev_char_indices = word.char_indices().rev().peekable();
259
260            let mut ends_with_whitespace = false;
261            let ends_with_newline = rev_char_indices
262                .peek()
263                .is_some_and(|&(_, character)| character == '\n');
264            if let Some((first_white_space_index, first_white_space_character)) = rev_char_indices
265                .take_while(|&(_, character)| char_is_whitespace(character))
266                .last()
267            {
268                ends_with_whitespace = true;
269                whitespace.start = slice.start + first_white_space_index;
270
271                // If line breaking for a piece of text that has `white-space-collapse: break-spaces` there
272                // is a line break opportunity *after* every preserved space, but not before. This means
273                // that we should not split off the first whitespace, unless that white-space is a preserved
274                // newline.
275                //
276                // An exception to this is if the style tells us that we can break in the middle of words.
277                if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces &&
278                    first_white_space_character != '\n' &&
279                    !can_break_anywhere
280                {
281                    whitespace.start += first_white_space_character.len_utf8();
282                    options
283                        .flags
284                        .insert(ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG);
285                }
286
287                slice.end = whitespace.start;
288            }
289
290            // If there's no whitespace and `word-break` is set to `keep-all`, try increasing the slice.
291            // TODO: This should only happen for CJK text.
292            if !ends_with_whitespace &&
293                *break_index != self.range.end &&
294                text_style.word_break == WordBreak::KeepAll &&
295                !can_break_anywhere
296            {
297                continue;
298            }
299
300            // Only advance the last slice if we are not going to try to expand the slice.
301            last_slice = slice.start..*break_index;
302
303            // Push the non-whitespace part of the range.
304            if !slice.is_empty() {
305                self.shape_and_push_range(&slice, formatting_context_text, &options);
306            }
307
308            if whitespace.is_empty() {
309                continue;
310            }
311
312            options.flags.insert(
313                ShapingFlags::IS_WHITESPACE_SHAPING_FLAG |
314                    ShapingFlags::ENDS_WITH_WHITESPACE_SHAPING_FLAG,
315            );
316
317            // If `white-space-collapse: break-spaces` is active, insert a line breaking opportunity
318            // between each white space character in the white space that we trimmed off.
319            if text_style.white_space_collapse == WhiteSpaceCollapse::BreakSpaces {
320                let start_index = whitespace.start;
321                for (index, character) in formatting_context_text[whitespace].char_indices() {
322                    let index = start_index + index;
323                    self.shape_and_push_range(
324                        &(index..index + character.len_utf8()),
325                        formatting_context_text,
326                        &options,
327                    );
328                }
329                continue;
330            }
331
332            // The breaker breaks after every newline, so either there is none,
333            // or there is exactly one at the very end. In the latter case,
334            // split it into a different run. That's because shaping considers
335            // a newline to have the same advance as a space, but during layout
336            // we want to treat the newline as having no advance.
337            if ends_with_newline && whitespace.len() > 1 {
338                self.shape_and_push_range(
339                    &(whitespace.start..whitespace.end - 1),
340                    formatting_context_text,
341                    &options,
342                );
343                self.shape_and_push_range(
344                    &(whitespace.end - 1..whitespace.end),
345                    formatting_context_text,
346                    &options,
347                );
348            } else {
349                self.shape_and_push_range(&whitespace, formatting_context_text, &options);
350            }
351        }
352    }
353}
354
355/// A single [`TextRun`] for the box tree. These are all descendants of
356/// [`super::InlineBox`] or the root of the [`super::InlineFormattingContext`].  During
357/// box tree construction, text is split into [`TextRun`]s based on their font, script,
358/// etc. When these are created text is already shaped.
359///
360/// <https://www.w3.org/TR/css-display-3/#css-text-run>
361#[derive(Debug, MallocSizeOf)]
362pub(crate) struct TextRun {
363    /// The [`BaseFragmentInfo`] for this [`TextRun`]. Usually this comes from the
364    /// original text node in the DOM for the text.
365    pub base_fragment_info: BaseFragmentInfo,
366
367    /// A weak reference to the parent of this layout box. This becomes valid as soon
368    /// as the *parent* of this box is added to the tree.
369    pub parent_box: Option<WeakLayoutBox>,
370
371    /// The [`crate::SharedStyle`] from this [`TextRun`]s parent element. This is
372    /// shared so that incremental layout can simply update the parent element and
373    /// this [`TextRun`] will be updated automatically.
374    pub inline_styles: SharedInlineStyles,
375
376    /// The range of text in [`super::InlineFormattingContext::text_content`] of the
377    /// [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are UTF-8 offsets.
378    pub text_range: Range<usize>,
379
380    /// The range of characters in this text in [`super::InlineFormattingContext::text_content`]
381    /// of the [`super::InlineFormattingContext`] that owns this [`TextRun`]. These are *not*
382    /// UTF-8 offsets.
383    pub character_range: Range<usize>,
384
385    /// The text of this [`TextRun`] with a font selected, broken into unbreakable
386    /// segments, and shaped.
387    pub shaped_text: Vec<TextRunSegment>,
388}
389
390impl TextRun {
391    pub(crate) fn new(
392        base_fragment_info: BaseFragmentInfo,
393        inline_styles: SharedInlineStyles,
394        text_range: Range<usize>,
395        character_range: Range<usize>,
396    ) -> Self {
397        Self {
398            base_fragment_info,
399            parent_box: None,
400            inline_styles,
401            text_range,
402            character_range,
403            shaped_text: Vec::new(),
404        }
405    }
406
407    pub(super) fn segment_and_shape(
408        &mut self,
409        formatting_context_text: &str,
410        layout_context: &LayoutContext,
411        linebreaker: &mut LineBreaker,
412        bidi_info: &BidiInfo,
413    ) {
414        let parent_style = self.inline_styles.style.borrow().clone();
415        let mut segments = self.segment_text_by_font(
416            layout_context,
417            formatting_context_text,
418            bidi_info,
419            &parent_style,
420        );
421        for segment in segments.iter_mut() {
422            segment.shape_text(&parent_style, formatting_context_text, linebreaker);
423        }
424        let _ = std::mem::replace(&mut self.shaped_text, segments);
425    }
426
427    /// Take the [`TextRun`]'s text and turn it into [`TextRunSegment`]s. Each segment has a matched
428    /// font and script. Fonts may differ when glyphs are found in fallback fonts.
429    /// [`super::InlineFormattingContext`].
430    fn segment_text_by_font(
431        &mut self,
432        layout_context: &LayoutContext,
433        formatting_context_text: &str,
434        bidi_info: &BidiInfo,
435        parent_style: &ServoArc<ComputedValues>,
436    ) -> Vec<TextRunSegment> {
437        let font_style = parent_style.clone_font();
438        let language = font_style._x_lang.0.parse().unwrap_or(Language::UND);
439        let font_size = font_style.font_size.computed_size().into();
440        let font_group = layout_context.font_context.font_group(font_style);
441        let mut current: Option<TextRunSegment> = None;
442        let mut results = Vec::new();
443
444        let text_run_text = &formatting_context_text[self.text_range.clone()];
445        let char_iterator = TwoCharsAtATimeIterator::new(text_run_text.chars());
446
447        let inherited_text_style = parent_style.get_inherited_text().clone();
448        let word_spacing = Some(inherited_text_style.word_spacing.to_used_value(font_size));
449        let letter_spacing = inherited_text_style
450            .letter_spacing
451            .0
452            .to_used_value(font_size);
453        let letter_spacing = if !letter_spacing.is_zero() {
454            Some(letter_spacing)
455        } else {
456            None
457        };
458        let text_rendering = inherited_text_style.text_rendering;
459
460        // The next bytes index of the charcter within the entire inline formatting context's text.
461        let mut next_byte_index = self.text_range.start;
462
463        // next_character_index: The next current character index within the entire inline formatting context's text.
464        for (next_character_index, (character, next_character)) in
465            (self.character_range.start..).zip(char_iterator)
466        {
467            let current_character_index = next_character_index;
468
469            let current_byte_index = next_byte_index;
470            next_byte_index += character.len_utf8();
471
472            if char_does_not_change_font(character) {
473                continue;
474            }
475
476            let Some(font) = font_group.find_by_codepoint(
477                &layout_context.font_context,
478                character,
479                next_character,
480                language,
481            ) else {
482                continue;
483            };
484
485            let script = Script::from(character);
486            let bidi_level = bidi_info.levels[current_byte_index];
487
488            // If the existing segment is compatible with the character, keep going.
489            if let Some(current) = current.as_mut() {
490                if current.update_if_compatible(&font, script, bidi_level) {
491                    continue;
492                }
493            }
494
495            // From https://www.w3.org/TR/css-text-3/#cursive-script:
496            // Cursive scripts do not admit gaps between their letters for either
497            // justification or letter-spacing.
498            let letter_spacing = if is_cursive_script(script) {
499                None
500            } else {
501                letter_spacing
502            };
503
504            let info = FontAndScriptInfo {
505                font,
506                script,
507                bidi_level,
508                language,
509                word_spacing,
510                letter_spacing,
511                text_rendering,
512            };
513
514            // Add the new segment and finish the existing one, if we had one. If the first
515            // characters in the run were control characters we may be creating the first
516            // segment in the middle of the run (ie the start should be the start of this
517            // text run's text).
518            let (start_byte_index, start_character_index) = match current {
519                Some(_) => (current_byte_index, current_character_index),
520                None => (self.text_range.start, self.character_range.start),
521            };
522            let new = TextRunSegment::new(Arc::new(info), start_byte_index, start_character_index);
523            if let Some(mut finished) = current.replace(new) {
524                // The end of the previous segment is the start of the next one.
525                finished.range.end = current_byte_index;
526                finished.character_range.end = current_character_index;
527                results.push(finished);
528            }
529        }
530
531        // Either we have a current segment or we only had control characters and whitespace. In both
532        // of those cases, just use the first font.
533        if current.is_none() {
534            current = font_group.first(&layout_context.font_context).map(|font| {
535                TextRunSegment::new(
536                    Arc::new(FontAndScriptInfo {
537                        font,
538                        script: Script::Common,
539                        language,
540                        bidi_level: Level::ltr(),
541                        letter_spacing,
542                        word_spacing,
543                        text_rendering,
544                    }),
545                    self.text_range.start,
546                    self.character_range.start,
547                )
548            })
549        }
550
551        // Extend the last segment to the end of the string and add it to the results.
552        if let Some(mut last_segment) = current.take() {
553            last_segment.range.end = self.text_range.end;
554            last_segment.character_range.end = self.character_range.end;
555            results.push(last_segment);
556        }
557
558        results
559    }
560
561    pub(super) fn layout_into_line_items(&self, ifc: &mut InlineFormattingContextLayout) {
562        if self.text_range.is_empty() {
563            return;
564        }
565
566        // If we are following replaced content, we should have a soft wrap opportunity, unless the
567        // first character of this `TextRun` prevents that soft wrap opportunity. If we see such a
568        // character it should also override the LineBreaker's indication to break at the start.
569        let have_deferred_soft_wrap_opportunity =
570            mem::replace(&mut ifc.have_deferred_soft_wrap_opportunity, false);
571        let mut soft_wrap_policy = match have_deferred_soft_wrap_opportunity {
572            true => SegmentStartSoftWrapPolicy::Force,
573            false => SegmentStartSoftWrapPolicy::FollowLinebreaker,
574        };
575
576        for segment in self.shaped_text.iter() {
577            segment.layout_into_line_items(self, soft_wrap_policy, ifc);
578            soft_wrap_policy = SegmentStartSoftWrapPolicy::FollowLinebreaker;
579        }
580    }
581}
582
583/// From <https://www.w3.org/TR/css-text-3/#cursive-script>:
584/// Cursive scripts do not admit gaps between their letters for either justification
585/// or letter-spacing. The following Unicode scripts are included: Arabic, Hanifi
586/// Rohingya, Mandaic, Mongolian, N’Ko, Phags Pa, Syriac
587fn is_cursive_script(script: Script) -> bool {
588    matches!(
589        script,
590        Script::Arabic |
591            Script::Hanifi_Rohingya |
592            Script::Mandaic |
593            Script::Mongolian |
594            Script::Nko |
595            Script::Phags_Pa |
596            Script::Syriac
597    )
598}
599
600/// Whether or not this character should be able to change the font during segmentation.  Certain
601/// character are not rendered at all, so it doesn't matter what font we use to render them. They
602/// should just be added to the current segment.
603fn char_does_not_change_font(character: char) -> bool {
604    if character.is_control() {
605        return true;
606    }
607    if character == '\u{00A0}' {
608        return true;
609    }
610    if is_bidi_control(character) {
611        return false;
612    }
613
614    matches!(
615        icu_properties::maps::line_break().get(character),
616        LineBreak::CombiningMark |
617            LineBreak::Glue |
618            LineBreak::ZWSpace |
619            LineBreak::WordJoiner |
620            LineBreak::ZWJ
621    )
622}
623
624pub(super) fn get_font_for_first_font_for_style(
625    style: &ComputedValues,
626    font_context: &FontContext,
627) -> Option<FontRef> {
628    let font = font_context
629        .font_group(style.clone_font())
630        .first(font_context);
631    if font.is_none() {
632        warn!("Could not find font for style: {:?}", style.clone_font());
633    }
634    font
635}
636pub(crate) struct TwoCharsAtATimeIterator<InputIterator> {
637    /// The input character iterator.
638    iterator: InputIterator,
639    /// The first character to produce in the next run of the iterator.
640    next_character: Option<char>,
641}
642
643impl<InputIterator> TwoCharsAtATimeIterator<InputIterator> {
644    fn new(iterator: InputIterator) -> Self {
645        Self {
646            iterator,
647            next_character: None,
648        }
649    }
650}
651
652impl<InputIterator> Iterator for TwoCharsAtATimeIterator<InputIterator>
653where
654    InputIterator: Iterator<Item = char>,
655{
656    type Item = (char, Option<char>);
657
658    fn next(&mut self) -> Option<Self::Item> {
659        // If the iterator isn't initialized do that now.
660        if self.next_character.is_none() {
661            self.next_character = self.iterator.next();
662        }
663        let character = self.next_character?;
664        self.next_character = self.iterator.next();
665        Some((character, self.next_character))
666    }
667}