layout/flow/inline/
construct.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::char::{ToLowercase, ToUppercase};
7
8use icu_segmenter::WordSegmenter;
9use itertools::izip;
10use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
11use style::values::specified::text::TextTransformCase;
12use unicode_bidi::Level;
13
14use super::text_run::TextRun;
15use super::{
16    InlineBox, InlineBoxIdentifier, InlineBoxes, InlineFormattingContext, InlineItem,
17    SharedInlineStyles,
18};
19use crate::cell::ArcRefCell;
20use crate::context::LayoutContext;
21use crate::dom::LayoutBox;
22use crate::dom_traversal::NodeAndStyleInfo;
23use crate::flow::float::FloatBox;
24use crate::formatting_contexts::IndependentFormattingContext;
25use crate::positioned::AbsolutelyPositionedBox;
26use crate::style_ext::ComputedValuesExt;
27
28#[derive(Default)]
29pub(crate) struct InlineFormattingContextBuilder {
30    /// A stack of [`SharedInlineStyles`] including one for the root, one for each inline box on the
31    /// inline box stack, and importantly, one for every `display: contents` element that we are
32    /// currently processing. Normally `display: contents` elements don't affect the structure of
33    /// the [`InlineFormattingContext`], but the styles they provide do style their children.
34    pub shared_inline_styles_stack: Vec<SharedInlineStyles>,
35
36    /// The collection of text strings that make up this [`InlineFormattingContext`] under
37    /// construction.
38    pub text_segments: Vec<String>,
39
40    /// The current offset in the final text string of this [`InlineFormattingContext`],
41    /// used to properly set the text range of new [`InlineItem::TextRun`]s.
42    current_text_offset: usize,
43
44    /// Whether the last processed node ended with whitespace. This is used to
45    /// implement rule 4 of <https://www.w3.org/TR/css-text-3/#collapse>:
46    ///
47    /// > Any collapsible space immediately following another collapsible space—even one
48    /// > outside the boundary of the inline containing that space, provided both spaces are
49    /// > within the same inline formatting context—is collapsed to have zero advance width.
50    /// > (It is invisible, but retains its soft wrap opportunity, if any.)
51    last_inline_box_ended_with_collapsible_white_space: bool,
52
53    /// Whether or not the current state of the inline formatting context is on a word boundary
54    /// for the purposes of `text-transform: capitalize`.
55    on_word_boundary: bool,
56
57    /// Whether or not this inline formatting context will contain floats.
58    pub contains_floats: bool,
59
60    /// The current list of [`InlineItem`]s in this [`InlineFormattingContext`] under
61    /// construction. This is stored in a flat list to make it easy to access the last
62    /// item.
63    pub inline_items: Vec<ArcRefCell<InlineItem>>,
64
65    /// The current [`InlineBox`] tree of this [`InlineFormattingContext`] under construction.
66    pub inline_boxes: InlineBoxes,
67
68    /// The ongoing stack of inline boxes stack of the builder.
69    ///
70    /// Contains all the currently ongoing inline boxes we entered so far.
71    /// The traversal is at all times as deep in the tree as this stack is,
72    /// which is why the code doesn't need to keep track of the actual
73    /// container root (see `handle_inline_level_element`).
74    ///
75    /// When an inline box ends, it's removed from this stack.
76    inline_box_stack: Vec<InlineBoxIdentifier>,
77
78    /// Normally, an inline box produces a single box tree [`InlineItem`]. When a block
79    /// element causes an inline box [to be split], it can produce multiple
80    /// [`InlineItem`]s, all inserted into different [`InlineFormattingContext`]s.
81    /// [`Self::block_in_inline_splits`] is responsible for tracking all of these split
82    /// inline box results, so that they can be inserted into the [`crate::dom::BoxSlot`]
83    /// for the DOM element once it has been processed for BoxTree construction.
84    ///
85    /// [to be split]: https://www.w3.org/TR/CSS2/visuren.html#anonymous-block-level
86    block_in_inline_splits: Vec<Vec<ArcRefCell<InlineItem>>>,
87
88    /// If the [`InlineBox`] of an inline-level element is not damaged, it can be reused
89    /// to support incremental layout. An [`InlineBox`] can be split by block elements
90    /// into multiple [`InlineBox`]es, all inserted into different
91    /// [`InlineFormattingContext`]s. Therefore, [`Self::old_block_in_inline_splits`] is
92    /// used to hold all these split inline boxes from the previous box tree construction
93    /// that are about to be reused, ensuring they can be sequentially inserted into each
94    /// newly built [`InlineFormattingContext`].
95    old_block_in_inline_splits: Vec<Vec<ArcRefCell<InlineBox>>>,
96
97    /// Whether this [`InlineFormattingContextBuilder`] is empty for the purposes of ignoring
98    /// during box tree construction. An IFC is empty if it only contains TextRuns with
99    /// completely collapsible whitespace. When that happens it can be ignored completely.
100    pub is_empty: bool,
101}
102
103impl InlineFormattingContextBuilder {
104    pub(crate) fn new(info: &NodeAndStyleInfo) -> Self {
105        Self::new_for_shared_styles(vec![info.into()])
106    }
107
108    pub(crate) fn new_for_shared_styles(
109        shared_inline_styles_stack: Vec<SharedInlineStyles>,
110    ) -> Self {
111        Self {
112            // For the purposes of `text-transform: capitalize` the start of the IFC is a word boundary.
113            on_word_boundary: true,
114            is_empty: true,
115            shared_inline_styles_stack,
116            ..Default::default()
117        }
118    }
119
120    pub(crate) fn currently_processing_inline_box(&self) -> bool {
121        !self.inline_box_stack.is_empty()
122    }
123
124    fn push_control_character_string(&mut self, string_to_push: &str) {
125        self.text_segments.push(string_to_push.to_owned());
126        self.current_text_offset += string_to_push.len();
127    }
128
129    fn shared_inline_styles(&self) -> SharedInlineStyles {
130        self.shared_inline_styles_stack
131            .last()
132            .expect("Should always have at least one SharedInlineStyles")
133            .clone()
134    }
135
136    pub(crate) fn push_atomic(
137        &mut self,
138        independent_formatting_context_creator: impl FnOnce()
139            -> ArcRefCell<IndependentFormattingContext>,
140        old_layout_box: Option<LayoutBox>,
141    ) -> ArcRefCell<InlineItem> {
142        // If there is an existing undamaged layout box that's compatible, use that.
143        let independent_formatting_context = old_layout_box
144            .and_then(LayoutBox::unsplit_inline_level_layout_box)
145            .and_then(|inline_item| match &*inline_item.borrow() {
146                InlineItem::Atomic(atomic, ..) => Some(atomic.clone()),
147                _ => None,
148            })
149            .unwrap_or_else(independent_formatting_context_creator);
150
151        let inline_level_box = ArcRefCell::new(InlineItem::Atomic(
152            independent_formatting_context,
153            self.current_text_offset,
154            Level::ltr(), /* This will be assigned later if necessary. */
155        ));
156        self.inline_items.push(inline_level_box.clone());
157        self.is_empty = false;
158
159        // Push an object replacement character for this atomic, which will ensure that the line breaker
160        // inserts a line breaking opportunity here.
161        self.push_control_character_string("\u{fffc}");
162
163        self.last_inline_box_ended_with_collapsible_white_space = false;
164        self.on_word_boundary = true;
165
166        inline_level_box
167    }
168
169    pub(crate) fn push_absolutely_positioned_box(
170        &mut self,
171        absolutely_positioned_box_creator: impl FnOnce() -> ArcRefCell<AbsolutelyPositionedBox>,
172        old_layout_box: Option<LayoutBox>,
173    ) -> ArcRefCell<InlineItem> {
174        let absolutely_positioned_box = old_layout_box
175            .and_then(LayoutBox::unsplit_inline_level_layout_box)
176            .and_then(|inline_item| match &*inline_item.borrow() {
177                InlineItem::OutOfFlowAbsolutelyPositionedBox(positioned_box, ..) => {
178                    Some(positioned_box.clone())
179                },
180                _ => None,
181            })
182            .unwrap_or_else(absolutely_positioned_box_creator);
183
184        // We cannot just reuse the old inline item, because the `current_text_offset` may have changed.
185        let inline_level_box = ArcRefCell::new(InlineItem::OutOfFlowAbsolutelyPositionedBox(
186            absolutely_positioned_box,
187            self.current_text_offset,
188        ));
189
190        self.inline_items.push(inline_level_box.clone());
191        self.is_empty = false;
192        inline_level_box
193    }
194
195    pub(crate) fn push_float_box(
196        &mut self,
197        float_box_creator: impl FnOnce() -> ArcRefCell<FloatBox>,
198        old_layout_box: Option<LayoutBox>,
199    ) -> ArcRefCell<InlineItem> {
200        let inline_level_box = old_layout_box
201            .and_then(LayoutBox::unsplit_inline_level_layout_box)
202            .unwrap_or_else(|| ArcRefCell::new(InlineItem::OutOfFlowFloatBox(float_box_creator())));
203
204        debug_assert!(
205            matches!(
206                &*inline_level_box.borrow(),
207                InlineItem::OutOfFlowFloatBox(..),
208            ),
209            "Created float box with incompatible `old_layout_box`"
210        );
211
212        self.inline_items.push(inline_level_box.clone());
213        self.is_empty = false;
214        self.contains_floats = true;
215        inline_level_box
216    }
217
218    pub(crate) fn start_inline_box(
219        &mut self,
220        inline_box_creator: impl FnOnce() -> ArcRefCell<InlineBox>,
221        block_in_inline_splits: Option<Vec<ArcRefCell<InlineItem>>>,
222        old_layout_box: Option<LayoutBox>,
223    ) {
224        // If there is an existing undamaged layout box that's compatible, use the `InlineBox` within it.
225        if let Some(LayoutBox::InlineLevel(inline_level_box)) = old_layout_box {
226            let old_block_in_inline_splits: Vec<ArcRefCell<InlineBox>> = inline_level_box
227                .iter()
228                .rev() // reverse to facilate the `Vec::pop` operation
229                .filter_map(|inline_item| match &*inline_item.borrow() {
230                    InlineItem::StartInlineBox(inline_box) => Some(inline_box.clone()),
231                    _ => None,
232                })
233                .collect();
234
235            debug_assert!(
236                old_block_in_inline_splits.is_empty() ||
237                    old_block_in_inline_splits.len() == inline_level_box.len(),
238                "Create inline box with incompatible `old_layout_box`"
239            );
240
241            self.start_inline_box_internal(
242                inline_box_creator,
243                block_in_inline_splits,
244                old_block_in_inline_splits,
245            );
246        } else {
247            self.start_inline_box_internal(inline_box_creator, block_in_inline_splits, vec![]);
248        }
249    }
250
251    pub fn start_inline_box_internal(
252        &mut self,
253        inline_box_creator: impl FnOnce() -> ArcRefCell<InlineBox>,
254        block_in_inline_splits: Option<Vec<ArcRefCell<InlineItem>>>,
255        mut old_block_in_inline_splits: Vec<ArcRefCell<InlineBox>>,
256    ) {
257        let inline_box = old_block_in_inline_splits
258            .pop()
259            .unwrap_or_else(inline_box_creator);
260
261        let borrowed_inline_box = inline_box.borrow();
262        self.push_control_character_string(borrowed_inline_box.base.style.bidi_control_chars().0);
263
264        // Don't push a `SharedInlineStyles` if we are pushing this box when splitting
265        // an IFC for a block-in-inline split. Shared styles are pushed as part of setting
266        // up the second split of the IFC.
267        if borrowed_inline_box.is_first_split {
268            self.shared_inline_styles_stack
269                .push(borrowed_inline_box.shared_inline_styles.clone());
270        }
271        std::mem::drop(borrowed_inline_box);
272
273        let identifier = self.inline_boxes.start_inline_box(inline_box.clone());
274        let inline_level_box = ArcRefCell::new(InlineItem::StartInlineBox(inline_box));
275        self.inline_items.push(inline_level_box.clone());
276        self.inline_box_stack.push(identifier);
277        self.is_empty = false;
278
279        let mut block_in_inline_splits = block_in_inline_splits.unwrap_or_default();
280        block_in_inline_splits.push(inline_level_box);
281        self.block_in_inline_splits.push(block_in_inline_splits);
282
283        self.old_block_in_inline_splits
284            .push(old_block_in_inline_splits);
285    }
286
287    /// End the ongoing inline box in this [`InlineFormattingContextBuilder`], returning
288    /// shared references to all of the box tree items that were created for it. More than
289    /// a single box tree items may be produced for a single inline box when that inline
290    /// box is split around a block-level element.
291    pub(crate) fn end_inline_box(&mut self) -> Vec<ArcRefCell<InlineItem>> {
292        self.shared_inline_styles_stack.pop();
293
294        let (identifier, block_in_inline_splits) = self.end_inline_box_internal();
295        let inline_level_box = self.inline_boxes.get(&identifier);
296        {
297            let mut inline_level_box = inline_level_box.borrow_mut();
298            inline_level_box.is_last_split = true;
299            self.push_control_character_string(inline_level_box.base.style.bidi_control_chars().1);
300        }
301
302        debug_assert!(
303            self.old_block_in_inline_splits
304                .last()
305                .is_some_and(|inline_boxes| inline_boxes.is_empty()),
306            "Reuse incompatible `old_block_in_inline_splits` for inline boxes",
307        );
308        let _ = self.old_block_in_inline_splits.pop();
309
310        block_in_inline_splits.unwrap_or_default()
311    }
312
313    fn end_inline_box_internal(
314        &mut self,
315    ) -> (InlineBoxIdentifier, Option<Vec<ArcRefCell<InlineItem>>>) {
316        let identifier = self
317            .inline_box_stack
318            .pop()
319            .expect("Ended non-existent inline box");
320        self.inline_items
321            .push(ArcRefCell::new(InlineItem::EndInlineBox));
322        self.is_empty = false;
323
324        self.inline_boxes.end_inline_box(identifier);
325
326        // This might be `None` if this builder has already drained its block-in-inline-splits
327        // into the new builder on the other side of a new block-in-inline split.
328        let block_in_inline_splits = self.block_in_inline_splits.pop();
329
330        (identifier, block_in_inline_splits)
331    }
332
333    pub(crate) fn push_text<'dom>(&mut self, text: Cow<'dom, str>, info: &NodeAndStyleInfo<'dom>) {
334        let white_space_collapse = info.style.clone_white_space_collapse();
335        let collapsed = WhitespaceCollapse::new(
336            text.chars(),
337            white_space_collapse,
338            self.last_inline_box_ended_with_collapsible_white_space,
339        );
340
341        // TODO: Not all text transforms are about case, this logic should stop ignoring
342        // TextTransform::FULL_WIDTH and TextTransform::FULL_SIZE_KANA.
343        let text_transform = info.style.clone_text_transform().case();
344        let capitalized_text: String;
345        let char_iterator: Box<dyn Iterator<Item = char>> = match text_transform {
346            TextTransformCase::None => Box::new(collapsed),
347            TextTransformCase::Capitalize => {
348                // `TextTransformation` doesn't support capitalization, so we must capitalize the whole
349                // string at once and make a copy. Here `on_word_boundary` indicates whether or not the
350                // inline formatting context as a whole is on a word boundary. This is different from
351                // `last_inline_box_ended_with_collapsible_white_space` because the word boundaries are
352                // between atomic inlines and at the start of the IFC, and because preserved spaces
353                // are a word boundary.
354                let collapsed_string: String = collapsed.collect();
355                capitalized_text = capitalize_string(&collapsed_string, self.on_word_boundary);
356                Box::new(capitalized_text.chars())
357            },
358            _ => {
359                // If `text-transform` is active, wrap the `WhitespaceCollapse` iterator in
360                // a `TextTransformation` iterator.
361                Box::new(TextTransformation::new(collapsed, text_transform))
362            },
363        };
364
365        let white_space_collapse = info.style.clone_white_space_collapse();
366        let new_text: String = char_iterator
367            .inspect(|&character| {
368                self.is_empty = self.is_empty &&
369                    match white_space_collapse {
370                        WhiteSpaceCollapse::Collapse => character.is_ascii_whitespace(),
371                        WhiteSpaceCollapse::PreserveBreaks => {
372                            character.is_ascii_whitespace() && character != '\n'
373                        },
374                        WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces => false,
375                    };
376            })
377            .collect();
378
379        if new_text.is_empty() {
380            return;
381        }
382
383        let selection_range = info.get_selection_range();
384        if let Some(last_character) = new_text.chars().next_back() {
385            self.on_word_boundary = last_character.is_whitespace();
386            self.last_inline_box_ended_with_collapsible_white_space =
387                self.on_word_boundary && white_space_collapse != WhiteSpaceCollapse::Preserve;
388        }
389
390        let new_range = self.current_text_offset..self.current_text_offset + new_text.len();
391        self.current_text_offset = new_range.end;
392        self.text_segments.push(new_text);
393
394        if let Some(inline_item) = self.inline_items.last() {
395            if let InlineItem::TextRun(text_run) = &mut *inline_item.borrow_mut() {
396                text_run.borrow_mut().text_range.end = new_range.end;
397                return;
398            }
399        }
400
401        self.inline_items
402            .push(ArcRefCell::new(InlineItem::TextRun(ArcRefCell::new(
403                TextRun::new(
404                    info.into(),
405                    self.shared_inline_styles(),
406                    new_range,
407                    selection_range,
408                ),
409            ))));
410    }
411
412    pub(crate) fn enter_display_contents(&mut self, shared_inline_styles: SharedInlineStyles) {
413        self.shared_inline_styles_stack.push(shared_inline_styles);
414    }
415
416    pub(crate) fn leave_display_contents(&mut self) {
417        self.shared_inline_styles_stack.pop();
418    }
419
420    pub(crate) fn split_around_block_and_finish(
421        &mut self,
422        layout_context: &LayoutContext,
423        has_first_formatted_line: bool,
424        default_bidi_level: Level,
425    ) -> Option<InlineFormattingContext> {
426        if self.is_empty {
427            return None;
428        }
429
430        // Create a new inline builder which will be active after the block splits this inline formatting
431        // context. It has the same inline box structure as this builder, except the boxes are
432        // marked as not being the first fragment. No inline content is carried over to this new
433        // builder.
434        let mut new_builder = Self::new_for_shared_styles(self.shared_inline_styles_stack.clone());
435
436        let block_in_inline_splits = std::mem::take(&mut self.block_in_inline_splits);
437        let old_block_in_inline_splits = std::mem::take(&mut self.old_block_in_inline_splits);
438        for (identifier, already_collected_inline_boxes, being_recollected_inline_boxes) in izip!(
439            self.inline_box_stack.iter(),
440            block_in_inline_splits,
441            old_block_in_inline_splits
442        ) {
443            // Start a new inline box for every ongoing inline box in this
444            // InlineFormattingContext once we are done processing this block element,
445            // being sure to give the block-in-inline-split to the new
446            // InlineFormattingContext. These will finally be inserted into the DOM's
447            // BoxSlot once the inline box has been fully processed. Meanwhile, being
448            // sure to give the old-block-in-inline-split to new InlineFormattingContext,
449            // so that them will be inserted into each following InlineFormattingContext.
450            let split_inline_box_callback = || {
451                ArcRefCell::new(
452                    self.inline_boxes
453                        .get(identifier)
454                        .borrow()
455                        .split_around_block(),
456                )
457            };
458            new_builder.start_inline_box_internal(
459                split_inline_box_callback,
460                Some(already_collected_inline_boxes),
461                being_recollected_inline_boxes,
462            );
463        }
464        let mut inline_builder_from_before_split = std::mem::replace(self, new_builder);
465
466        // End all ongoing inline boxes in the first builder, but ensure that they are not
467        // marked as the final fragments, so that they do not get inline end margin, borders,
468        // and padding.
469        while !inline_builder_from_before_split.inline_box_stack.is_empty() {
470            inline_builder_from_before_split.end_inline_box_internal();
471        }
472
473        inline_builder_from_before_split.finish(
474            layout_context,
475            has_first_formatted_line,
476            /* is_single_line_text_input = */ false,
477            default_bidi_level,
478        )
479    }
480
481    /// Finish the current inline formatting context, returning [`None`] if the context was empty.
482    pub(crate) fn finish(
483        self,
484        layout_context: &LayoutContext,
485        has_first_formatted_line: bool,
486        is_single_line_text_input: bool,
487        default_bidi_level: Level,
488    ) -> Option<InlineFormattingContext> {
489        if self.is_empty {
490            return None;
491        }
492
493        assert!(self.inline_box_stack.is_empty());
494        debug_assert!(self.old_block_in_inline_splits.is_empty());
495        Some(InlineFormattingContext::new_with_builder(
496            self,
497            layout_context,
498            has_first_formatted_line,
499            is_single_line_text_input,
500            default_bidi_level,
501        ))
502    }
503}
504
505fn preserve_segment_break() -> bool {
506    true
507}
508
509pub struct WhitespaceCollapse<InputIterator> {
510    char_iterator: InputIterator,
511    white_space_collapse: WhiteSpaceCollapse,
512
513    /// Whether or not we should collapse white space completely at the start of the string.
514    /// This is true when the last character handled in our owning [`super::InlineFormattingContext`]
515    /// was collapsible white space.
516    remove_collapsible_white_space_at_start: bool,
517
518    /// Whether or not the last character produced was newline. There is special behavior
519    /// we do after each newline.
520    following_newline: bool,
521
522    /// Whether or not we have seen any non-white space characters, indicating that we are not
523    /// in a collapsible white space section at the beginning of the string.
524    have_seen_non_white_space_characters: bool,
525
526    /// Whether the last character that we processed was a non-newline white space character. When
527    /// collapsing white space we need to wait until the next non-white space character or the end
528    /// of the string to push a single white space.
529    inside_white_space: bool,
530
531    /// When we enter a collapsible white space region, we may need to wait to produce a single
532    /// white space character as soon as we encounter a non-white space character. When that
533    /// happens we queue up the non-white space character for the next iterator call.
534    character_pending_to_return: Option<char>,
535}
536
537impl<InputIterator> WhitespaceCollapse<InputIterator> {
538    pub fn new(
539        char_iterator: InputIterator,
540        white_space_collapse: WhiteSpaceCollapse,
541        trim_beginning_white_space: bool,
542    ) -> Self {
543        Self {
544            char_iterator,
545            white_space_collapse,
546            remove_collapsible_white_space_at_start: trim_beginning_white_space,
547            inside_white_space: false,
548            following_newline: false,
549            have_seen_non_white_space_characters: false,
550            character_pending_to_return: None,
551        }
552    }
553
554    fn is_leading_trimmed_white_space(&self) -> bool {
555        !self.have_seen_non_white_space_characters && self.remove_collapsible_white_space_at_start
556    }
557
558    /// Whether or not we need to produce a space character if the next character is not a newline
559    /// and not white space. This happens when we are exiting a section of white space and we
560    /// waited to produce a single space character for the entire section of white space (but
561    /// not following or preceding a newline).
562    fn need_to_produce_space_character_after_white_space(&self) -> bool {
563        self.inside_white_space && !self.following_newline && !self.is_leading_trimmed_white_space()
564    }
565}
566
567impl<InputIterator> Iterator for WhitespaceCollapse<InputIterator>
568where
569    InputIterator: Iterator<Item = char>,
570{
571    type Item = char;
572
573    fn next(&mut self) -> Option<Self::Item> {
574        // Point 4.1.1 first bullet:
575        // > If white-space is set to normal, nowrap, or pre-line, whitespace
576        // > characters are considered collapsible
577        // If whitespace is not considered collapsible, it is preserved entirely, which
578        // means that we can simply return the input string exactly.
579        if self.white_space_collapse == WhiteSpaceCollapse::Preserve ||
580            self.white_space_collapse == WhiteSpaceCollapse::BreakSpaces
581        {
582            // From <https://drafts.csswg.org/css-text-3/#white-space-processing>:
583            // > Carriage returns (U+000D) are treated identically to spaces (U+0020) in all respects.
584            //
585            // In the non-preserved case these are converted to space below.
586            return match self.char_iterator.next() {
587                Some('\r') => Some(' '),
588                next => next,
589            };
590        }
591
592        if let Some(character) = self.character_pending_to_return.take() {
593            self.inside_white_space = false;
594            self.have_seen_non_white_space_characters = true;
595            self.following_newline = false;
596            return Some(character);
597        }
598
599        while let Some(character) = self.char_iterator.next() {
600            // Don't push non-newline whitespace immediately. Instead wait to push it until we
601            // know that it isn't followed by a newline. See `push_pending_whitespace_if_needed`
602            // above.
603            if character.is_ascii_whitespace() && character != '\n' {
604                self.inside_white_space = true;
605                continue;
606            }
607
608            // Point 4.1.1:
609            // > 2. Collapsible segment breaks are transformed for rendering according to the
610            // >    segment break transformation rules.
611            if character == '\n' {
612                // From <https://drafts.csswg.org/css-text-3/#line-break-transform>
613                // (4.1.3 -- the segment break transformation rules):
614                //
615                // > When white-space is pre, pre-wrap, or pre-line, segment breaks are not
616                // > collapsible and are instead transformed into a preserved line feed"
617                if self.white_space_collapse != WhiteSpaceCollapse::Collapse {
618                    self.inside_white_space = false;
619                    self.following_newline = true;
620                    return Some(character);
621
622                // Point 4.1.3:
623                // > 1. First, any collapsible segment break immediately following another
624                // >    collapsible segment break is removed.
625                // > 2. Then any remaining segment break is either transformed into a space (U+0020)
626                // >    or removed depending on the context before and after the break.
627                } else if !self.following_newline &&
628                    preserve_segment_break() &&
629                    !self.is_leading_trimmed_white_space()
630                {
631                    self.inside_white_space = false;
632                    self.following_newline = true;
633                    return Some(' ');
634                } else {
635                    self.following_newline = true;
636                    continue;
637                }
638            }
639
640            // Point 4.1.1:
641            // > 2. Any sequence of collapsible spaces and tabs immediately preceding or
642            // >    following a segment break is removed.
643            // > 3. Every collapsible tab is converted to a collapsible space (U+0020).
644            // > 4. Any collapsible space immediately following another collapsible space—even
645            // >    one outside the boundary of the inline containing that space, provided both
646            // >    spaces are within the same inline formatting context—is collapsed to have zero
647            // >    advance width.
648            if self.need_to_produce_space_character_after_white_space() {
649                self.inside_white_space = false;
650                self.character_pending_to_return = Some(character);
651                return Some(' ');
652            }
653
654            self.inside_white_space = false;
655            self.have_seen_non_white_space_characters = true;
656            self.following_newline = false;
657            return Some(character);
658        }
659
660        if self.need_to_produce_space_character_after_white_space() {
661            self.inside_white_space = false;
662            return Some(' ');
663        }
664
665        None
666    }
667
668    fn size_hint(&self) -> (usize, Option<usize>) {
669        self.char_iterator.size_hint()
670    }
671
672    fn count(self) -> usize
673    where
674        Self: Sized,
675    {
676        self.char_iterator.count()
677    }
678}
679
680enum PendingCaseConversionResult {
681    Uppercase(ToUppercase),
682    Lowercase(ToLowercase),
683}
684
685impl PendingCaseConversionResult {
686    fn next(&mut self) -> Option<char> {
687        match self {
688            PendingCaseConversionResult::Uppercase(to_uppercase) => to_uppercase.next(),
689            PendingCaseConversionResult::Lowercase(to_lowercase) => to_lowercase.next(),
690        }
691    }
692}
693
694/// This is an interator that consumes a char iterator and produces character transformed
695/// by the given CSS `text-transform` value. It currently does not support
696/// `text-transform: capitalize` because Unicode segmentation libraries do not support
697/// streaming input one character at a time.
698pub struct TextTransformation<InputIterator> {
699    /// The input character iterator.
700    char_iterator: InputIterator,
701    /// The `text-transform` value to use.
702    text_transform: TextTransformCase,
703    /// If an uppercasing or lowercasing produces more than one character, this
704    /// caches them so that they can be returned in subsequent iterator calls.
705    pending_case_conversion_result: Option<PendingCaseConversionResult>,
706}
707
708impl<InputIterator> TextTransformation<InputIterator> {
709    pub fn new(char_iterator: InputIterator, text_transform: TextTransformCase) -> Self {
710        Self {
711            char_iterator,
712            text_transform,
713            pending_case_conversion_result: None,
714        }
715    }
716}
717
718impl<InputIterator> Iterator for TextTransformation<InputIterator>
719where
720    InputIterator: Iterator<Item = char>,
721{
722    type Item = char;
723
724    fn next(&mut self) -> Option<Self::Item> {
725        if let Some(character) = self
726            .pending_case_conversion_result
727            .as_mut()
728            .and_then(|result| result.next())
729        {
730            return Some(character);
731        }
732        self.pending_case_conversion_result = None;
733
734        for character in self.char_iterator.by_ref() {
735            match self.text_transform {
736                TextTransformCase::None => return Some(character),
737                TextTransformCase::Uppercase => {
738                    let mut pending_result =
739                        PendingCaseConversionResult::Uppercase(character.to_uppercase());
740                    if let Some(character) = pending_result.next() {
741                        self.pending_case_conversion_result = Some(pending_result);
742                        return Some(character);
743                    }
744                },
745                TextTransformCase::Lowercase => {
746                    let mut pending_result =
747                        PendingCaseConversionResult::Lowercase(character.to_lowercase());
748                    if let Some(character) = pending_result.next() {
749                        self.pending_case_conversion_result = Some(pending_result);
750                        return Some(character);
751                    }
752                },
753                // `text-transform: capitalize` currently cannot work on a per-character basis,
754                // so must be handled outside of this iterator.
755                TextTransformCase::Capitalize => return Some(character),
756            }
757        }
758        None
759    }
760}
761
762/// Given a string and whether the start of the string represents a word boundary, create a copy of
763/// the string with letters after word boundaries capitalized.
764pub(crate) fn capitalize_string(string: &str, allow_word_at_start: bool) -> String {
765    let mut output_string = String::new();
766    output_string.reserve(string.len());
767
768    let word_segmenter = WordSegmenter::new_auto();
769    let mut bounds = word_segmenter.segment_str(string).peekable();
770    let mut byte_index = 0;
771    for character in string.chars() {
772        let current_byte_index = byte_index;
773        byte_index += character.len_utf8();
774
775        if let Some(next_index) = bounds.peek() {
776            if *next_index == current_byte_index {
777                bounds.next();
778
779                if current_byte_index != 0 || allow_word_at_start {
780                    output_string.extend(character.to_uppercase());
781                    continue;
782                }
783            }
784        }
785
786        output_string.push(character);
787    }
788
789    output_string
790}