layout/flow/inline/
construct.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::char::{ToLowercase, ToUppercase};
7
8use base::id::RenderingGroupId;
9use icu_segmenter::WordSegmenter;
10use itertools::izip;
11use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
12use style::values::specified::text::TextTransformCase;
13use unicode_bidi::Level;
14
15use super::text_run::TextRun;
16use super::{
17    InlineBox, InlineBoxIdentifier, InlineBoxes, InlineFormattingContext, InlineItem,
18    SharedInlineStyles,
19};
20use crate::cell::ArcRefCell;
21use crate::context::LayoutContext;
22use crate::dom::LayoutBox;
23use crate::dom_traversal::NodeAndStyleInfo;
24use crate::flow::float::FloatBox;
25use crate::formatting_contexts::IndependentFormattingContext;
26use crate::positioned::AbsolutelyPositionedBox;
27use crate::style_ext::ComputedValuesExt;
28
29#[derive(Default)]
30pub(crate) struct InlineFormattingContextBuilder {
31    /// A stack of [`SharedInlineStyles`] including one for the root, one for each inline box on the
32    /// inline box stack, and importantly, one for every `display: contents` element that we are
33    /// currently processing. Normally `display: contents` elements don't affect the structure of
34    /// the [`InlineFormattingContext`], but the styles they provide do style their children.
35    pub shared_inline_styles_stack: Vec<SharedInlineStyles>,
36
37    /// The collection of text strings that make up this [`InlineFormattingContext`] under
38    /// construction.
39    pub text_segments: Vec<String>,
40
41    /// The current offset in the final text string of this [`InlineFormattingContext`],
42    /// used to properly set the text range of new [`InlineItem::TextRun`]s.
43    current_text_offset: usize,
44
45    /// Whether the last processed node ended with whitespace. This is used to
46    /// implement rule 4 of <https://www.w3.org/TR/css-text-3/#collapse>:
47    ///
48    /// > Any collapsible space immediately following another collapsible space—even one
49    /// > outside the boundary of the inline containing that space, provided both spaces are
50    /// > within the same inline formatting context—is collapsed to have zero advance width.
51    /// > (It is invisible, but retains its soft wrap opportunity, if any.)
52    last_inline_box_ended_with_collapsible_white_space: bool,
53
54    /// Whether or not the current state of the inline formatting context is on a word boundary
55    /// for the purposes of `text-transform: capitalize`.
56    on_word_boundary: bool,
57
58    /// Whether or not this inline formatting context will contain floats.
59    pub contains_floats: bool,
60
61    /// The current list of [`InlineItem`]s in this [`InlineFormattingContext`] under
62    /// construction. This is stored in a flat list to make it easy to access the last
63    /// item.
64    pub inline_items: Vec<ArcRefCell<InlineItem>>,
65
66    /// The current [`InlineBox`] tree of this [`InlineFormattingContext`] under construction.
67    pub inline_boxes: InlineBoxes,
68
69    /// The ongoing stack of inline boxes stack of the builder.
70    ///
71    /// Contains all the currently ongoing inline boxes we entered so far.
72    /// The traversal is at all times as deep in the tree as this stack is,
73    /// which is why the code doesn't need to keep track of the actual
74    /// container root (see `handle_inline_level_element`).
75    ///
76    /// When an inline box ends, it's removed from this stack.
77    inline_box_stack: Vec<InlineBoxIdentifier>,
78
79    /// Normally, an inline box produces a single box tree [`InlineItem`]. When a block
80    /// element causes an inline box [to be split], it can produce multiple
81    /// [`InlineItem`]s, all inserted into different [`InlineFormattingContext`]s.
82    /// [`Self::block_in_inline_splits`] is responsible for tracking all of these split
83    /// inline box results, so that they can be inserted into the [`crate::dom::BoxSlot`]
84    /// for the DOM element once it has been processed for BoxTree construction.
85    ///
86    /// [to be split]: https://www.w3.org/TR/CSS2/visuren.html#anonymous-block-level
87    block_in_inline_splits: Vec<Vec<ArcRefCell<InlineItem>>>,
88
89    /// If the [`InlineBox`] of an inline-level element is not damaged, it can be reused
90    /// to support incremental layout. An [`InlineBox`] can be split by block elements
91    /// into multiple [`InlineBox`]es, all inserted into different
92    /// [`InlineFormattingContext`]s. Therefore, [`Self::old_block_in_inline_splits`] is
93    /// used to hold all these split inline boxes from the previous box tree construction
94    /// that are about to be reused, ensuring they can be sequentially inserted into each
95    /// newly built [`InlineFormattingContext`].
96    old_block_in_inline_splits: Vec<Vec<ArcRefCell<InlineBox>>>,
97
98    /// Whether this [`InlineFormattingContextBuilder`] is empty for the purposes of ignoring
99    /// during box tree construction. An IFC is empty if it only contains TextRuns with
100    /// completely collapsible whitespace. When that happens it can be ignored completely.
101    pub is_empty: bool,
102}
103
104impl InlineFormattingContextBuilder {
105    pub(crate) fn new(info: &NodeAndStyleInfo) -> Self {
106        Self::new_for_shared_styles(vec![info.into()])
107    }
108
109    pub(crate) fn new_for_shared_styles(
110        shared_inline_styles_stack: Vec<SharedInlineStyles>,
111    ) -> Self {
112        Self {
113            // For the purposes of `text-transform: capitalize` the start of the IFC is a word boundary.
114            on_word_boundary: true,
115            is_empty: true,
116            shared_inline_styles_stack,
117            ..Default::default()
118        }
119    }
120
121    pub(crate) fn currently_processing_inline_box(&self) -> bool {
122        !self.inline_box_stack.is_empty()
123    }
124
125    fn push_control_character_string(&mut self, string_to_push: &str) {
126        self.text_segments.push(string_to_push.to_owned());
127        self.current_text_offset += string_to_push.len();
128    }
129
130    fn shared_inline_styles(&self) -> SharedInlineStyles {
131        self.shared_inline_styles_stack
132            .last()
133            .expect("Should always have at least one SharedInlineStyles")
134            .clone()
135    }
136
137    pub(crate) fn push_atomic(
138        &mut self,
139        independent_formatting_context_creator: impl FnOnce()
140            -> ArcRefCell<IndependentFormattingContext>,
141        old_layout_box: Option<LayoutBox>,
142    ) -> ArcRefCell<InlineItem> {
143        // If there is an existing undamaged layout box that's compatible, use that.
144        let independent_formatting_context = old_layout_box
145            .and_then(LayoutBox::unsplit_inline_level_layout_box)
146            .and_then(|inline_item| match &*inline_item.borrow() {
147                InlineItem::Atomic(atomic, ..) => Some(atomic.clone()),
148                _ => None,
149            })
150            .unwrap_or_else(independent_formatting_context_creator);
151
152        let inline_level_box = ArcRefCell::new(InlineItem::Atomic(
153            independent_formatting_context,
154            self.current_text_offset,
155            Level::ltr(), /* This will be assigned later if necessary. */
156        ));
157        self.inline_items.push(inline_level_box.clone());
158        self.is_empty = false;
159
160        // Push an object replacement character for this atomic, which will ensure that the line breaker
161        // inserts a line breaking opportunity here.
162        self.push_control_character_string("\u{fffc}");
163
164        self.last_inline_box_ended_with_collapsible_white_space = false;
165        self.on_word_boundary = true;
166
167        inline_level_box
168    }
169
170    pub(crate) fn push_absolutely_positioned_box(
171        &mut self,
172        absolutely_positioned_box_creator: impl FnOnce() -> ArcRefCell<AbsolutelyPositionedBox>,
173        old_layout_box: Option<LayoutBox>,
174    ) -> ArcRefCell<InlineItem> {
175        let absolutely_positioned_box = old_layout_box
176            .and_then(LayoutBox::unsplit_inline_level_layout_box)
177            .and_then(|inline_item| match &*inline_item.borrow() {
178                InlineItem::OutOfFlowAbsolutelyPositionedBox(positioned_box, ..) => {
179                    Some(positioned_box.clone())
180                },
181                _ => None,
182            })
183            .unwrap_or_else(absolutely_positioned_box_creator);
184
185        // We cannot just reuse the old inline item, because the `current_text_offset` may have changed.
186        let inline_level_box = ArcRefCell::new(InlineItem::OutOfFlowAbsolutelyPositionedBox(
187            absolutely_positioned_box,
188            self.current_text_offset,
189        ));
190
191        self.inline_items.push(inline_level_box.clone());
192        self.is_empty = false;
193        inline_level_box
194    }
195
196    pub(crate) fn push_float_box(
197        &mut self,
198        float_box_creator: impl FnOnce() -> ArcRefCell<FloatBox>,
199        old_layout_box: Option<LayoutBox>,
200    ) -> ArcRefCell<InlineItem> {
201        let inline_level_box = old_layout_box
202            .and_then(LayoutBox::unsplit_inline_level_layout_box)
203            .unwrap_or_else(|| ArcRefCell::new(InlineItem::OutOfFlowFloatBox(float_box_creator())));
204
205        debug_assert!(
206            matches!(
207                &*inline_level_box.borrow(),
208                InlineItem::OutOfFlowFloatBox(..),
209            ),
210            "Created float box with incompatible `old_layout_box`"
211        );
212
213        self.inline_items.push(inline_level_box.clone());
214        self.is_empty = false;
215        self.contains_floats = true;
216        inline_level_box
217    }
218
219    pub(crate) fn start_inline_box(
220        &mut self,
221        inline_box_creator: impl FnOnce() -> ArcRefCell<InlineBox>,
222        block_in_inline_splits: Option<Vec<ArcRefCell<InlineItem>>>,
223        old_layout_box: Option<LayoutBox>,
224    ) {
225        // If there is an existing undamaged layout box that's compatible, use the `InlineBox` within it.
226        if let Some(LayoutBox::InlineLevel(inline_level_box)) = old_layout_box {
227            let old_block_in_inline_splits: Vec<ArcRefCell<InlineBox>> = inline_level_box
228                .iter()
229                .rev() // reverse to facilate the `Vec::pop` operation
230                .filter_map(|inline_item| match &*inline_item.borrow() {
231                    InlineItem::StartInlineBox(inline_box) => Some(inline_box.clone()),
232                    _ => None,
233                })
234                .collect();
235
236            debug_assert!(
237                old_block_in_inline_splits.is_empty() ||
238                    old_block_in_inline_splits.len() == inline_level_box.len(),
239                "Create inline box with incompatible `old_layout_box`"
240            );
241
242            self.start_inline_box_internal(
243                inline_box_creator,
244                block_in_inline_splits,
245                old_block_in_inline_splits,
246            );
247        } else {
248            self.start_inline_box_internal(inline_box_creator, block_in_inline_splits, vec![]);
249        }
250    }
251
252    pub fn start_inline_box_internal(
253        &mut self,
254        inline_box_creator: impl FnOnce() -> ArcRefCell<InlineBox>,
255        block_in_inline_splits: Option<Vec<ArcRefCell<InlineItem>>>,
256        mut old_block_in_inline_splits: Vec<ArcRefCell<InlineBox>>,
257    ) {
258        let inline_box = old_block_in_inline_splits
259            .pop()
260            .unwrap_or_else(inline_box_creator);
261
262        let borrowed_inline_box = inline_box.borrow();
263        self.push_control_character_string(borrowed_inline_box.base.style.bidi_control_chars().0);
264
265        // Don't push a `SharedInlineStyles` if we are pushing this box when splitting
266        // an IFC for a block-in-inline split. Shared styles are pushed as part of setting
267        // up the second split of the IFC.
268        if borrowed_inline_box.is_first_split {
269            self.shared_inline_styles_stack
270                .push(borrowed_inline_box.shared_inline_styles.clone());
271        }
272        std::mem::drop(borrowed_inline_box);
273
274        let identifier = self.inline_boxes.start_inline_box(inline_box.clone());
275        let inline_level_box = ArcRefCell::new(InlineItem::StartInlineBox(inline_box));
276        self.inline_items.push(inline_level_box.clone());
277        self.inline_box_stack.push(identifier);
278        self.is_empty = false;
279
280        let mut block_in_inline_splits = block_in_inline_splits.unwrap_or_default();
281        block_in_inline_splits.push(inline_level_box);
282        self.block_in_inline_splits.push(block_in_inline_splits);
283
284        self.old_block_in_inline_splits
285            .push(old_block_in_inline_splits);
286    }
287
288    /// End the ongoing inline box in this [`InlineFormattingContextBuilder`], returning
289    /// shared references to all of the box tree items that were created for it. More than
290    /// a single box tree items may be produced for a single inline box when that inline
291    /// box is split around a block-level element.
292    pub(crate) fn end_inline_box(&mut self) -> Vec<ArcRefCell<InlineItem>> {
293        self.shared_inline_styles_stack.pop();
294
295        let (identifier, block_in_inline_splits) = self.end_inline_box_internal();
296        let inline_level_box = self.inline_boxes.get(&identifier);
297        {
298            let mut inline_level_box = inline_level_box.borrow_mut();
299            inline_level_box.is_last_split = true;
300            self.push_control_character_string(inline_level_box.base.style.bidi_control_chars().1);
301        }
302
303        debug_assert!(
304            self.old_block_in_inline_splits
305                .last()
306                .is_some_and(|inline_boxes| inline_boxes.is_empty()),
307            "Reuse incompatible `old_block_in_inline_splits` for inline boxes",
308        );
309        let _ = self.old_block_in_inline_splits.pop();
310
311        block_in_inline_splits.unwrap_or_default()
312    }
313
314    fn end_inline_box_internal(
315        &mut self,
316    ) -> (InlineBoxIdentifier, Option<Vec<ArcRefCell<InlineItem>>>) {
317        let identifier = self
318            .inline_box_stack
319            .pop()
320            .expect("Ended non-existent inline box");
321        self.inline_items
322            .push(ArcRefCell::new(InlineItem::EndInlineBox));
323        self.is_empty = false;
324
325        self.inline_boxes.end_inline_box(identifier);
326
327        // This might be `None` if this builder has already drained its block-in-inline-splits
328        // into the new builder on the other side of a new block-in-inline split.
329        let block_in_inline_splits = self.block_in_inline_splits.pop();
330
331        (identifier, block_in_inline_splits)
332    }
333
334    pub(crate) fn push_text<'dom>(&mut self, text: Cow<'dom, str>, info: &NodeAndStyleInfo<'dom>) {
335        let white_space_collapse = info.style.clone_white_space_collapse();
336        let collapsed = WhitespaceCollapse::new(
337            text.chars(),
338            white_space_collapse,
339            self.last_inline_box_ended_with_collapsible_white_space,
340        );
341
342        // TODO: Not all text transforms are about case, this logic should stop ignoring
343        // TextTransform::FULL_WIDTH and TextTransform::FULL_SIZE_KANA.
344        let text_transform = info.style.clone_text_transform().case();
345        let capitalized_text: String;
346        let char_iterator: Box<dyn Iterator<Item = char>> = match text_transform {
347            TextTransformCase::None => Box::new(collapsed),
348            TextTransformCase::Capitalize => {
349                // `TextTransformation` doesn't support capitalization, so we must capitalize the whole
350                // string at once and make a copy. Here `on_word_boundary` indicates whether or not the
351                // inline formatting context as a whole is on a word boundary. This is different from
352                // `last_inline_box_ended_with_collapsible_white_space` because the word boundaries are
353                // between atomic inlines and at the start of the IFC, and because preserved spaces
354                // are a word boundary.
355                let collapsed_string: String = collapsed.collect();
356                capitalized_text = capitalize_string(&collapsed_string, self.on_word_boundary);
357                Box::new(capitalized_text.chars())
358            },
359            _ => {
360                // If `text-transform` is active, wrap the `WhitespaceCollapse` iterator in
361                // a `TextTransformation` iterator.
362                Box::new(TextTransformation::new(collapsed, text_transform))
363            },
364        };
365
366        let white_space_collapse = info.style.clone_white_space_collapse();
367        let new_text: String = char_iterator
368            .inspect(|&character| {
369                self.is_empty = self.is_empty &&
370                    match white_space_collapse {
371                        WhiteSpaceCollapse::Collapse => character.is_ascii_whitespace(),
372                        WhiteSpaceCollapse::PreserveBreaks => {
373                            character.is_ascii_whitespace() && character != '\n'
374                        },
375                        WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces => false,
376                    };
377            })
378            .collect();
379
380        if new_text.is_empty() {
381            return;
382        }
383
384        let selection_range = info.get_selection_range();
385        if let Some(last_character) = new_text.chars().next_back() {
386            self.on_word_boundary = last_character.is_whitespace();
387            self.last_inline_box_ended_with_collapsible_white_space =
388                self.on_word_boundary && white_space_collapse != WhiteSpaceCollapse::Preserve;
389        }
390
391        let new_range = self.current_text_offset..self.current_text_offset + new_text.len();
392        self.current_text_offset = new_range.end;
393        self.text_segments.push(new_text);
394
395        if let Some(inline_item) = self.inline_items.last() {
396            if let InlineItem::TextRun(text_run) = &mut *inline_item.borrow_mut() {
397                text_run.borrow_mut().text_range.end = new_range.end;
398                return;
399            }
400        }
401
402        self.inline_items
403            .push(ArcRefCell::new(InlineItem::TextRun(ArcRefCell::new(
404                TextRun::new(
405                    info.into(),
406                    self.shared_inline_styles(),
407                    new_range,
408                    selection_range,
409                ),
410            ))));
411    }
412
413    pub(crate) fn enter_display_contents(&mut self, shared_inline_styles: SharedInlineStyles) {
414        self.shared_inline_styles_stack.push(shared_inline_styles);
415    }
416
417    pub(crate) fn leave_display_contents(&mut self) {
418        self.shared_inline_styles_stack.pop();
419    }
420
421    pub(crate) fn split_around_block_and_finish(
422        &mut self,
423        layout_context: &LayoutContext,
424        has_first_formatted_line: bool,
425        default_bidi_level: Level,
426    ) -> Option<InlineFormattingContext> {
427        if self.is_empty {
428            return None;
429        }
430
431        // Create a new inline builder which will be active after the block splits this inline formatting
432        // context. It has the same inline box structure as this builder, except the boxes are
433        // marked as not being the first fragment. No inline content is carried over to this new
434        // builder.
435        let mut new_builder = Self::new_for_shared_styles(self.shared_inline_styles_stack.clone());
436
437        let block_in_inline_splits = std::mem::take(&mut self.block_in_inline_splits);
438        let old_block_in_inline_splits = std::mem::take(&mut self.old_block_in_inline_splits);
439        for (identifier, already_collected_inline_boxes, being_recollected_inline_boxes) in izip!(
440            self.inline_box_stack.iter(),
441            block_in_inline_splits,
442            old_block_in_inline_splits
443        ) {
444            // Start a new inline box for every ongoing inline box in this
445            // InlineFormattingContext once we are done processing this block element,
446            // being sure to give the block-in-inline-split to the new
447            // InlineFormattingContext. These will finally be inserted into the DOM's
448            // BoxSlot once the inline box has been fully processed. Meanwhile, being
449            // sure to give the old-block-in-inline-split to new InlineFormattingContext,
450            // so that them will be inserted into each following InlineFormattingContext.
451            let split_inline_box_callback = || {
452                ArcRefCell::new(
453                    self.inline_boxes
454                        .get(identifier)
455                        .borrow()
456                        .split_around_block(),
457                )
458            };
459            new_builder.start_inline_box_internal(
460                split_inline_box_callback,
461                Some(already_collected_inline_boxes),
462                being_recollected_inline_boxes,
463            );
464        }
465        let mut inline_builder_from_before_split = std::mem::replace(self, new_builder);
466
467        // End all ongoing inline boxes in the first builder, but ensure that they are not
468        // marked as the final fragments, so that they do not get inline end margin, borders,
469        // and padding.
470        while !inline_builder_from_before_split.inline_box_stack.is_empty() {
471            inline_builder_from_before_split.end_inline_box_internal();
472        }
473
474        inline_builder_from_before_split.finish(
475            layout_context,
476            has_first_formatted_line,
477            /* is_single_line_text_input = */ false,
478            default_bidi_level,
479            layout_context.rendering_group_id,
480        )
481    }
482
483    /// Finish the current inline formatting context, returning [`None`] if the context was empty.
484    pub(crate) fn finish(
485        self,
486        layout_context: &LayoutContext,
487        has_first_formatted_line: bool,
488        is_single_line_text_input: bool,
489        default_bidi_level: Level,
490        rendering_group_id: RenderingGroupId,
491    ) -> Option<InlineFormattingContext> {
492        if self.is_empty {
493            return None;
494        }
495
496        assert!(self.inline_box_stack.is_empty());
497        debug_assert!(self.old_block_in_inline_splits.is_empty());
498        Some(InlineFormattingContext::new_with_builder(
499            self,
500            layout_context,
501            has_first_formatted_line,
502            is_single_line_text_input,
503            default_bidi_level,
504            rendering_group_id,
505        ))
506    }
507}
508
509fn preserve_segment_break() -> bool {
510    true
511}
512
513pub struct WhitespaceCollapse<InputIterator> {
514    char_iterator: InputIterator,
515    white_space_collapse: WhiteSpaceCollapse,
516
517    /// Whether or not we should collapse white space completely at the start of the string.
518    /// This is true when the last character handled in our owning [`super::InlineFormattingContext`]
519    /// was collapsible white space.
520    remove_collapsible_white_space_at_start: bool,
521
522    /// Whether or not the last character produced was newline. There is special behavior
523    /// we do after each newline.
524    following_newline: bool,
525
526    /// Whether or not we have seen any non-white space characters, indicating that we are not
527    /// in a collapsible white space section at the beginning of the string.
528    have_seen_non_white_space_characters: bool,
529
530    /// Whether the last character that we processed was a non-newline white space character. When
531    /// collapsing white space we need to wait until the next non-white space character or the end
532    /// of the string to push a single white space.
533    inside_white_space: bool,
534
535    /// When we enter a collapsible white space region, we may need to wait to produce a single
536    /// white space character as soon as we encounter a non-white space character. When that
537    /// happens we queue up the non-white space character for the next iterator call.
538    character_pending_to_return: Option<char>,
539}
540
541impl<InputIterator> WhitespaceCollapse<InputIterator> {
542    pub fn new(
543        char_iterator: InputIterator,
544        white_space_collapse: WhiteSpaceCollapse,
545        trim_beginning_white_space: bool,
546    ) -> Self {
547        Self {
548            char_iterator,
549            white_space_collapse,
550            remove_collapsible_white_space_at_start: trim_beginning_white_space,
551            inside_white_space: false,
552            following_newline: false,
553            have_seen_non_white_space_characters: false,
554            character_pending_to_return: None,
555        }
556    }
557
558    fn is_leading_trimmed_white_space(&self) -> bool {
559        !self.have_seen_non_white_space_characters && self.remove_collapsible_white_space_at_start
560    }
561
562    /// Whether or not we need to produce a space character if the next character is not a newline
563    /// and not white space. This happens when we are exiting a section of white space and we
564    /// waited to produce a single space character for the entire section of white space (but
565    /// not following or preceding a newline).
566    fn need_to_produce_space_character_after_white_space(&self) -> bool {
567        self.inside_white_space && !self.following_newline && !self.is_leading_trimmed_white_space()
568    }
569}
570
571impl<InputIterator> Iterator for WhitespaceCollapse<InputIterator>
572where
573    InputIterator: Iterator<Item = char>,
574{
575    type Item = char;
576
577    fn next(&mut self) -> Option<Self::Item> {
578        // Point 4.1.1 first bullet:
579        // > If white-space is set to normal, nowrap, or pre-line, whitespace
580        // > characters are considered collapsible
581        // If whitespace is not considered collapsible, it is preserved entirely, which
582        // means that we can simply return the input string exactly.
583        if self.white_space_collapse == WhiteSpaceCollapse::Preserve ||
584            self.white_space_collapse == WhiteSpaceCollapse::BreakSpaces
585        {
586            // From <https://drafts.csswg.org/css-text-3/#white-space-processing>:
587            // > Carriage returns (U+000D) are treated identically to spaces (U+0020) in all respects.
588            //
589            // In the non-preserved case these are converted to space below.
590            return match self.char_iterator.next() {
591                Some('\r') => Some(' '),
592                next => next,
593            };
594        }
595
596        if let Some(character) = self.character_pending_to_return.take() {
597            self.inside_white_space = false;
598            self.have_seen_non_white_space_characters = true;
599            self.following_newline = false;
600            return Some(character);
601        }
602
603        while let Some(character) = self.char_iterator.next() {
604            // Don't push non-newline whitespace immediately. Instead wait to push it until we
605            // know that it isn't followed by a newline. See `push_pending_whitespace_if_needed`
606            // above.
607            if character.is_ascii_whitespace() && character != '\n' {
608                self.inside_white_space = true;
609                continue;
610            }
611
612            // Point 4.1.1:
613            // > 2. Collapsible segment breaks are transformed for rendering according to the
614            // >    segment break transformation rules.
615            if character == '\n' {
616                // From <https://drafts.csswg.org/css-text-3/#line-break-transform>
617                // (4.1.3 -- the segment break transformation rules):
618                //
619                // > When white-space is pre, pre-wrap, or pre-line, segment breaks are not
620                // > collapsible and are instead transformed into a preserved line feed"
621                if self.white_space_collapse != WhiteSpaceCollapse::Collapse {
622                    self.inside_white_space = false;
623                    self.following_newline = true;
624                    return Some(character);
625
626                // Point 4.1.3:
627                // > 1. First, any collapsible segment break immediately following another
628                // >    collapsible segment break is removed.
629                // > 2. Then any remaining segment break is either transformed into a space (U+0020)
630                // >    or removed depending on the context before and after the break.
631                } else if !self.following_newline &&
632                    preserve_segment_break() &&
633                    !self.is_leading_trimmed_white_space()
634                {
635                    self.inside_white_space = false;
636                    self.following_newline = true;
637                    return Some(' ');
638                } else {
639                    self.following_newline = true;
640                    continue;
641                }
642            }
643
644            // Point 4.1.1:
645            // > 2. Any sequence of collapsible spaces and tabs immediately preceding or
646            // >    following a segment break is removed.
647            // > 3. Every collapsible tab is converted to a collapsible space (U+0020).
648            // > 4. Any collapsible space immediately following another collapsible space—even
649            // >    one outside the boundary of the inline containing that space, provided both
650            // >    spaces are within the same inline formatting context—is collapsed to have zero
651            // >    advance width.
652            if self.need_to_produce_space_character_after_white_space() {
653                self.inside_white_space = false;
654                self.character_pending_to_return = Some(character);
655                return Some(' ');
656            }
657
658            self.inside_white_space = false;
659            self.have_seen_non_white_space_characters = true;
660            self.following_newline = false;
661            return Some(character);
662        }
663
664        if self.need_to_produce_space_character_after_white_space() {
665            self.inside_white_space = false;
666            return Some(' ');
667        }
668
669        None
670    }
671
672    fn size_hint(&self) -> (usize, Option<usize>) {
673        self.char_iterator.size_hint()
674    }
675
676    fn count(self) -> usize
677    where
678        Self: Sized,
679    {
680        self.char_iterator.count()
681    }
682}
683
684enum PendingCaseConversionResult {
685    Uppercase(ToUppercase),
686    Lowercase(ToLowercase),
687}
688
689impl PendingCaseConversionResult {
690    fn next(&mut self) -> Option<char> {
691        match self {
692            PendingCaseConversionResult::Uppercase(to_uppercase) => to_uppercase.next(),
693            PendingCaseConversionResult::Lowercase(to_lowercase) => to_lowercase.next(),
694        }
695    }
696}
697
698/// This is an interator that consumes a char iterator and produces character transformed
699/// by the given CSS `text-transform` value. It currently does not support
700/// `text-transform: capitalize` because Unicode segmentation libraries do not support
701/// streaming input one character at a time.
702pub struct TextTransformation<InputIterator> {
703    /// The input character iterator.
704    char_iterator: InputIterator,
705    /// The `text-transform` value to use.
706    text_transform: TextTransformCase,
707    /// If an uppercasing or lowercasing produces more than one character, this
708    /// caches them so that they can be returned in subsequent iterator calls.
709    pending_case_conversion_result: Option<PendingCaseConversionResult>,
710}
711
712impl<InputIterator> TextTransformation<InputIterator> {
713    pub fn new(char_iterator: InputIterator, text_transform: TextTransformCase) -> Self {
714        Self {
715            char_iterator,
716            text_transform,
717            pending_case_conversion_result: None,
718        }
719    }
720}
721
722impl<InputIterator> Iterator for TextTransformation<InputIterator>
723where
724    InputIterator: Iterator<Item = char>,
725{
726    type Item = char;
727
728    fn next(&mut self) -> Option<Self::Item> {
729        if let Some(character) = self
730            .pending_case_conversion_result
731            .as_mut()
732            .and_then(|result| result.next())
733        {
734            return Some(character);
735        }
736        self.pending_case_conversion_result = None;
737
738        for character in self.char_iterator.by_ref() {
739            match self.text_transform {
740                TextTransformCase::None => return Some(character),
741                TextTransformCase::Uppercase => {
742                    let mut pending_result =
743                        PendingCaseConversionResult::Uppercase(character.to_uppercase());
744                    if let Some(character) = pending_result.next() {
745                        self.pending_case_conversion_result = Some(pending_result);
746                        return Some(character);
747                    }
748                },
749                TextTransformCase::Lowercase => {
750                    let mut pending_result =
751                        PendingCaseConversionResult::Lowercase(character.to_lowercase());
752                    if let Some(character) = pending_result.next() {
753                        self.pending_case_conversion_result = Some(pending_result);
754                        return Some(character);
755                    }
756                },
757                // `text-transform: capitalize` currently cannot work on a per-character basis,
758                // so must be handled outside of this iterator.
759                TextTransformCase::Capitalize => return Some(character),
760            }
761        }
762        None
763    }
764}
765
766/// Given a string and whether the start of the string represents a word boundary, create a copy of
767/// the string with letters after word boundaries capitalized.
768pub(crate) fn capitalize_string(string: &str, allow_word_at_start: bool) -> String {
769    let mut output_string = String::new();
770    output_string.reserve(string.len());
771
772    let word_segmenter = WordSegmenter::new_auto();
773    let mut bounds = word_segmenter.segment_str(string).peekable();
774    let mut byte_index = 0;
775    for character in string.chars() {
776        let current_byte_index = byte_index;
777        byte_index += character.len_utf8();
778
779        if let Some(next_index) = bounds.peek() {
780            if *next_index == current_byte_index {
781                bounds.next();
782
783                if current_byte_index != 0 || allow_word_at_start {
784                    output_string.extend(character.to_uppercase());
785                    continue;
786                }
787            }
788        }
789
790        output_string.push(character);
791    }
792
793    output_string
794}