Skip to main content

layout/flow/inline/
construct.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::char::{ToLowercase, ToUppercase};
7use std::ops::Range;
8
9use icu_properties::BidiClass;
10use icu_segmenter::WordSegmenter;
11use layout_api::{LayoutNode, SharedSelection};
12use style::computed_values::_webkit_text_security::T as WebKitTextSecurity;
13use style::computed_values::direction::T as Direction;
14use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
15use style::dom::NodeInfo;
16use style::selector_parser::PseudoElement;
17use style::values::specified::text::TextTransformCase;
18use unicode_bidi::Level;
19use unicode_categories::UnicodeCategories;
20
21use super::text_run::TextRun;
22use super::{
23    InlineBox, InlineBoxIdentifier, InlineBoxes, InlineFormattingContext, InlineItem,
24    SharedInlineStyles,
25};
26use crate::cell::ArcRefCell;
27use crate::context::LayoutContext;
28use crate::dom::{LayoutBox, NodeExt};
29use crate::dom_traversal::NodeAndStyleInfo;
30use crate::flow::BlockLevelBox;
31use crate::flow::float::FloatBox;
32use crate::formatting_contexts::IndependentFormattingContext;
33use crate::positioned::AbsolutelyPositionedBox;
34use crate::style_ext::ComputedValuesExt;
35
36#[derive(Default)]
37pub(crate) struct InlineFormattingContextBuilder {
38    /// A stack of [`SharedInlineStyles`] including one for the root, one for each inline box on the
39    /// inline box stack, and importantly, one for every `display: contents` element that we are
40    /// currently processing. Normally `display: contents` elements don't affect the structure of
41    /// the [`InlineFormattingContext`], but the styles they provide do style their children.
42    pub shared_inline_styles_stack: Vec<SharedInlineStyles>,
43
44    /// The collection of text strings that make up this [`InlineFormattingContext`] under
45    /// construction.
46    pub text_segments: Vec<String>,
47
48    /// The current offset in the final text string of this [`InlineFormattingContext`],
49    /// used to properly set the text range of new [`InlineItem::TextRun`]s.
50    current_text_offset: usize,
51
52    /// The current character offset in the final text string of this [`InlineFormattingContext`],
53    /// used to properly set the text range of new [`InlineItem::TextRun`]s. Note that this is
54    /// different from the UTF-8 code point offset.
55    current_character_offset: usize,
56
57    /// If the [`InlineFormattingContext`] that we are building has a selection shared with its
58    /// originating node in the DOM, this will not be `None`.
59    pub shared_selection: Option<SharedSelection>,
60
61    /// Whether the last processed node ended with whitespace. This is used to
62    /// implement rule 4 of <https://www.w3.org/TR/css-text-3/#collapse>:
63    ///
64    /// > Any collapsible space immediately following another collapsible space—even one
65    /// > outside the boundary of the inline containing that space, provided both spaces are
66    /// > within the same inline formatting context—is collapsed to have zero advance width.
67    /// > (It is invisible, but retains its soft wrap opportunity, if any.)
68    last_inline_box_ended_with_collapsible_white_space: bool,
69
70    /// Whether or not the current state of the inline formatting context is on a word boundary
71    /// for the purposes of `text-transform: capitalize`.
72    on_word_boundary: bool,
73
74    /// Whether or not this inline formatting context will contain floats.
75    pub contains_floats: bool,
76
77    /// The current list of [`InlineItem`]s in this [`InlineFormattingContext`] under
78    /// construction. This is stored in a flat list to make it easy to access the last
79    /// item.
80    pub inline_items: Vec<InlineItem>,
81
82    /// The current [`InlineBox`] tree of this [`InlineFormattingContext`] under construction.
83    pub inline_boxes: InlineBoxes,
84
85    /// The ongoing stack of inline boxes stack of the builder.
86    ///
87    /// Contains all the currently ongoing inline boxes we entered so far.
88    /// The traversal is at all times as deep in the tree as this stack is,
89    /// which is why the code doesn't need to keep track of the actual
90    /// container root (see `handle_inline_level_element`).
91    ///
92    /// When an inline box ends, it's removed from this stack.
93    inline_box_stack: Vec<InlineBoxIdentifier>,
94
95    /// Whether this [`InlineFormattingContextBuilder`] is empty for the purposes of ignoring
96    /// during box tree construction. An IFC is empty if it only contains TextRuns with
97    /// completely collapsible whitespace. When that happens it can be ignored completely.
98    pub is_empty: bool,
99
100    /// Whether or not the `::first-letter` pseudo-element of this inline formatting context
101    /// has been processed yet.
102    has_processed_first_letter: bool,
103
104    /// Whether or not the inline formatting context under construction has any kind of
105    /// right-to-left content such as a character with an RTL character class or a `dir`
106    /// attribute specifying right-to-left content.
107    pub(crate) has_right_to_left_content: bool,
108}
109
110impl InlineFormattingContextBuilder {
111    /// <https://drafts.csswg.org/css-text/#white-space>:
112    /// > Except where specified otherwise, white space processing in CSS affects only the document
113    /// > white space characters: spaces (U+0020), tabs (U+0009), and segment breaks.
114    ///
115    /// From <https://github.com/w3c/csswg-drafts/issues/5147#issuecomment-637816669>:
116    /// > HTML clearly treats CR, LF, and CRLF as segment breaks.
117    ///
118    /// Other browsers also consider the form feed character (0x0c) to be document white space, it
119    /// seems.
120    ///
121    /// Taken all together, this is equivalent to the WhatWG Infra Standard's definition of ASCII
122    /// white space.
123    pub(crate) fn is_document_white_space(character: char) -> bool {
124        character.is_ascii_whitespace()
125    }
126
127    pub(crate) fn new(info: &NodeAndStyleInfo, context: &LayoutContext) -> Self {
128        let has_right_to_left_content = info.style.get_inherited_box().direction == Direction::Rtl;
129        Self {
130            // For the purposes of `text-transform: capitalize` the start of the IFC is a word boundary.
131            on_word_boundary: true,
132            is_empty: true,
133            shared_inline_styles_stack: vec![SharedInlineStyles::from_info_and_context(
134                info, context,
135            )],
136            shared_selection: info.node.selection(),
137            has_right_to_left_content,
138            ..Default::default()
139        }
140    }
141
142    pub(crate) fn currently_processing_inline_box(&self) -> bool {
143        !self.inline_box_stack.is_empty()
144    }
145
146    fn push_control_character_string(&mut self, string_to_push: &str) {
147        self.text_segments.push(string_to_push.to_owned());
148        self.current_text_offset += string_to_push.len();
149        self.current_character_offset += string_to_push.chars().count();
150    }
151
152    fn shared_inline_styles(&self) -> SharedInlineStyles {
153        self.shared_inline_styles_stack
154            .last()
155            .expect("Should always have at least one SharedInlineStyles")
156            .clone()
157    }
158
159    pub(crate) fn push_atomic(
160        &mut self,
161        independent_formatting_context_creator: impl FnOnce()
162            -> ArcRefCell<IndependentFormattingContext>,
163        old_layout_box: Option<LayoutBox>,
164    ) -> InlineItem {
165        // If there is an existing undamaged layout box that's compatible, use that.
166        let independent_formatting_context = old_layout_box
167            .and_then(|layout_box| match layout_box {
168                LayoutBox::InlineLevel(InlineItem::Atomic(atomic, ..)) => Some(atomic),
169                _ => None,
170            })
171            .unwrap_or_else(independent_formatting_context_creator);
172
173        let inline_level_box = InlineItem::Atomic(
174            independent_formatting_context,
175            self.current_text_offset,
176            Level::ltr(), /* This will be assigned later if necessary. */
177        );
178        self.inline_items.push(inline_level_box.clone());
179        self.is_empty = false;
180
181        // Push an object replacement character for this atomic, which will ensure that the line breaker
182        // inserts a line breaking opportunity here.
183        self.push_control_character_string("\u{fffc}");
184
185        self.last_inline_box_ended_with_collapsible_white_space = false;
186        self.on_word_boundary = true;
187
188        // Atomics such as images should prevent any following text as being interpreted as the first letter.
189        self.has_processed_first_letter = true;
190
191        inline_level_box
192    }
193
194    pub(crate) fn push_absolutely_positioned_box(
195        &mut self,
196        absolutely_positioned_box_creator: impl FnOnce() -> ArcRefCell<AbsolutelyPositionedBox>,
197        old_layout_box: Option<LayoutBox>,
198    ) -> InlineItem {
199        let absolutely_positioned_box = old_layout_box
200            .and_then(|layout_box| match layout_box {
201                LayoutBox::InlineLevel(InlineItem::OutOfFlowAbsolutelyPositionedBox(
202                    positioned_box,
203                    ..,
204                )) => Some(positioned_box),
205                _ => None,
206            })
207            .unwrap_or_else(absolutely_positioned_box_creator);
208
209        // We cannot just reuse the old inline item, because the `current_text_offset` may have changed.
210        let inline_level_box = InlineItem::OutOfFlowAbsolutelyPositionedBox(
211            absolutely_positioned_box,
212            self.current_text_offset,
213        );
214
215        self.inline_items.push(inline_level_box.clone());
216        self.is_empty = false;
217        inline_level_box
218    }
219
220    pub(crate) fn push_float_box(
221        &mut self,
222        float_box_creator: impl FnOnce() -> ArcRefCell<FloatBox>,
223        old_layout_box: Option<LayoutBox>,
224    ) -> InlineItem {
225        let inline_level_box = old_layout_box
226            .and_then(|layout_box| match layout_box {
227                LayoutBox::InlineLevel(inline_item) => Some(inline_item),
228                _ => None,
229            })
230            .unwrap_or_else(|| InlineItem::OutOfFlowFloatBox(float_box_creator()));
231
232        debug_assert!(
233            matches!(inline_level_box, InlineItem::OutOfFlowFloatBox(..),),
234            "Created float box with incompatible `old_layout_box`"
235        );
236
237        self.inline_items.push(inline_level_box.clone());
238        self.is_empty = false;
239        self.contains_floats = true;
240        inline_level_box
241    }
242
243    pub(crate) fn push_block_level_box(&mut self, block_level: ArcRefCell<BlockLevelBox>) {
244        assert!(self.currently_processing_inline_box());
245        self.contains_floats = self.contains_floats || block_level.borrow().contains_floats();
246        self.inline_items.push(InlineItem::BlockLevel(block_level));
247    }
248
249    pub(crate) fn start_inline_box(
250        &mut self,
251        inline_box_creator: impl FnOnce() -> ArcRefCell<InlineBox>,
252        old_layout_box: Option<LayoutBox>,
253    ) -> InlineItem {
254        // If there is an existing undamaged layout box that's compatible, use the `InlineBox` within it.
255        let inline_box = old_layout_box
256            .and_then(|layout_box| match layout_box {
257                LayoutBox::InlineLevel(InlineItem::StartInlineBox(inline_box)) => Some(inline_box),
258                _ => None,
259            })
260            .unwrap_or_else(inline_box_creator);
261
262        let borrowed_inline_box = inline_box.borrow();
263
264        let style = &borrowed_inline_box.base.style;
265        self.push_control_character_string(style.bidi_control_chars().0);
266        self.has_right_to_left_content =
267            self.has_right_to_left_content || style.get_inherited_box().direction == Direction::Rtl;
268
269        self.shared_inline_styles_stack
270            .push(borrowed_inline_box.shared_inline_styles.clone());
271        std::mem::drop(borrowed_inline_box);
272
273        let identifier = self.inline_boxes.start_inline_box(inline_box.clone());
274        let inline_item = InlineItem::StartInlineBox(inline_box);
275        self.inline_items.push(inline_item.clone());
276        self.inline_box_stack.push(identifier);
277        self.is_empty = false;
278        inline_item
279    }
280
281    /// End the ongoing inline box in this [`InlineFormattingContextBuilder`], returning
282    /// shared references to all of the box tree items that were created for it. More than
283    /// a single box tree items may be produced for a single inline box when that inline
284    /// box is split around a block-level element.
285    pub(crate) fn end_inline_box(&mut self) {
286        self.shared_inline_styles_stack.pop();
287        self.inline_items.push(InlineItem::EndInlineBox);
288        let identifier = self
289            .inline_box_stack
290            .pop()
291            .expect("Ended non-existent inline box");
292        self.inline_boxes.end_inline_box(identifier);
293        let inline_level_box = self.inline_boxes.get(&identifier);
294        let bidi_control_chars = inline_level_box.borrow().base.style.bidi_control_chars();
295        self.push_control_character_string(bidi_control_chars.1);
296    }
297
298    /// This is like [`Self::push_text`], except that it might possibly add an anonymous box if
299    ///
300    ///  - This inline formatting context has a `::first-letter` style.
301    ///  - No anonymous box for `::first-letter` has been added yet.
302    ///  - First letter content is detected in this text.
303    ///
304    /// Note that this should only be used when processing text in block containers.
305    pub(crate) fn push_text_with_possible_first_letter<'dom>(
306        &mut self,
307        text: Cow<'dom, str>,
308        info: &NodeAndStyleInfo<'dom>,
309        container_info: &NodeAndStyleInfo<'dom>,
310        layout_context: &LayoutContext,
311    ) -> bool {
312        if self.has_processed_first_letter || !container_info.pseudo_element_chain().is_empty() {
313            self.push_text(text, info);
314            return false;
315        }
316
317        let Some(first_letter_info) =
318            container_info.with_pseudo_element(layout_context, PseudoElement::FirstLetter)
319        else {
320            self.push_text(text, info);
321            return false;
322        };
323
324        let first_letter_range = first_letter_range(&text[..]);
325        if first_letter_range.is_empty() {
326            return false;
327        }
328
329        // Push any leading white space first.
330        if first_letter_range.start != 0 {
331            self.push_text(Cow::Borrowed(&text[0..first_letter_range.start]), info);
332        }
333
334        // Push the first-letter text into an anonymous box with the `::first-letter` style.
335        let box_slot = first_letter_info.node.box_slot();
336        let inline_item = self.start_inline_box(
337            || ArcRefCell::new(InlineBox::new(&first_letter_info, layout_context)),
338            None,
339        );
340        box_slot.set(LayoutBox::InlineLevel(inline_item));
341
342        let first_letter_text = Cow::Borrowed(&text[first_letter_range.clone()]);
343        self.push_text(first_letter_text, &first_letter_info);
344        self.end_inline_box();
345        self.has_processed_first_letter = true;
346
347        // Now push the non-first-letter text.
348        self.push_text(Cow::Borrowed(&text[first_letter_range.end..]), info);
349
350        true
351    }
352
353    pub(crate) fn push_text<'dom>(&mut self, text: Cow<'dom, str>, info: &NodeAndStyleInfo<'dom>) {
354        let white_space_collapse = info.style.clone_white_space_collapse();
355        let collapsed = WhitespaceCollapse::new(
356            text.chars(),
357            white_space_collapse,
358            self.last_inline_box_ended_with_collapsible_white_space,
359        );
360
361        // TODO: Not all text transforms are about case, this logic should stop ignoring
362        // TextTransform::FULL_WIDTH and TextTransform::FULL_SIZE_KANA.
363        let text_transform = info.style.clone_text_transform().case();
364        let capitalized_text: String;
365        let char_iterator: Box<dyn Iterator<Item = char>> = match text_transform {
366            TextTransformCase::None => Box::new(collapsed),
367            TextTransformCase::Capitalize => {
368                // `TextTransformation` doesn't support capitalization, so we must capitalize the whole
369                // string at once and make a copy. Here `on_word_boundary` indicates whether or not the
370                // inline formatting context as a whole is on a word boundary. This is different from
371                // `last_inline_box_ended_with_collapsible_white_space` because the word boundaries are
372                // between atomic inlines and at the start of the IFC, and because preserved spaces
373                // are a word boundary.
374                let collapsed_string: String = collapsed.collect();
375                capitalized_text = capitalize_string(&collapsed_string, self.on_word_boundary);
376                Box::new(capitalized_text.chars())
377            },
378            _ => {
379                // If `text-transform` is active, wrap the `WhitespaceCollapse` iterator in
380                // a `TextTransformation` iterator.
381                Box::new(TextTransformation::new(collapsed, text_transform))
382            },
383        };
384
385        let char_iterator = if info.style.clone__webkit_text_security() != WebKitTextSecurity::None
386        {
387            Box::new(TextSecurityTransform::new(
388                char_iterator,
389                info.style.clone__webkit_text_security(),
390            ))
391        } else {
392            char_iterator
393        };
394
395        let bidi_class_map = icu_properties::maps::bidi_class();
396        let white_space_collapse = info.style.clone_white_space_collapse();
397        let mut character_count = 0;
398        let new_text: String = char_iterator
399            .inspect(|&character| {
400                character_count += 1;
401
402                // If this character has a strong right-to-left class the new inline formatting context will
403                // need to be BiDi-aware. This match is derived from the list of strong right-to-left classes
404                // at https://www.unicode.org/reports/tr44/#Bidi_Class_Values.
405                self.has_right_to_left_content = self.has_right_to_left_content ||
406                    matches!(
407                        bidi_class_map.get(character),
408                        BidiClass::RightToLeft |
409                            BidiClass::ArabicLetter |
410                            BidiClass::RightToLeftEmbedding |
411                            BidiClass::RightToLeftIsolate |
412                            BidiClass::RightToLeftOverride
413                    );
414
415                self.is_empty = self.is_empty &&
416                    match white_space_collapse {
417                        WhiteSpaceCollapse::Collapse => Self::is_document_white_space(character),
418                        WhiteSpaceCollapse::PreserveBreaks => {
419                            Self::is_document_white_space(character) && character != '\n'
420                        },
421                        WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces => false,
422                    };
423            })
424            .collect();
425
426        if new_text.is_empty() {
427            return;
428        }
429
430        if let Some(last_character) = new_text.chars().next_back() {
431            self.on_word_boundary = last_character.is_whitespace();
432            self.last_inline_box_ended_with_collapsible_white_space =
433                self.on_word_boundary && white_space_collapse != WhiteSpaceCollapse::Preserve;
434        }
435
436        let new_range = self.current_text_offset..self.current_text_offset + new_text.len();
437        self.current_text_offset = new_range.end;
438
439        let new_character_range =
440            self.current_character_offset..self.current_character_offset + character_count;
441        self.current_character_offset = new_character_range.end;
442
443        self.text_segments.push(new_text);
444
445        let current_inline_styles = self.shared_inline_styles();
446
447        if let Some(InlineItem::TextRun(text_run)) = self.inline_items.last() &&
448            text_run
449                .borrow()
450                .inline_styles
451                .ptr_eq(&current_inline_styles)
452        {
453            let box_slot = info.node.box_slot();
454            let old_text_run = box_slot.take_layout_box_as_text_run();
455
456            {
457                let mut text_run = text_run.borrow_mut();
458                text_run.text_range.end = new_range.end;
459                text_run.character_range.end = new_character_range.end;
460
461                // If this text node does not have a `TextRun` in the box slot, this means that
462                // it is either new or dirty, which means that the entire `TextRun` just extended
463                // is dirty as well. In this case, never reuse existing shaping results. Clear
464                // all old items to ensure this.
465                if old_text_run.is_none() {
466                    text_run.items.clear();
467                }
468            }
469
470            box_slot.set(LayoutBox::Text(text_run.clone()));
471            return;
472        }
473
474        let box_slot = info.node.is_text_node().then(|| info.node.box_slot());
475        let text_run = ArcRefCell::new(TextRun::new(
476            info.into(),
477            current_inline_styles,
478            new_range,
479            new_character_range,
480            box_slot
481                .as_ref()
482                .and_then(|box_slot| box_slot.take_layout_box_as_text_run()),
483        ));
484        self.inline_items
485            .push(InlineItem::TextRun(text_run.clone()));
486
487        if let Some(box_slot) = box_slot {
488            box_slot.set(LayoutBox::Text(text_run));
489        }
490    }
491
492    pub(crate) fn enter_display_contents(&mut self, shared_inline_styles: SharedInlineStyles) {
493        self.shared_inline_styles_stack.push(shared_inline_styles);
494    }
495
496    pub(crate) fn leave_display_contents(&mut self) {
497        self.shared_inline_styles_stack.pop();
498    }
499
500    /// Finish the current inline formatting context, returning [`None`] if the context was empty.
501    pub(crate) fn finish(
502        self,
503        layout_context: &LayoutContext,
504        has_first_formatted_line: bool,
505        is_single_line_text_input: bool,
506        default_bidi_level: Level,
507    ) -> Option<InlineFormattingContext> {
508        if self.is_empty {
509            return None;
510        }
511
512        assert!(self.inline_box_stack.is_empty());
513        Some(InlineFormattingContext::new_with_builder(
514            self,
515            layout_context,
516            has_first_formatted_line,
517            is_single_line_text_input,
518            default_bidi_level,
519        ))
520    }
521}
522
523fn preserve_segment_break() -> bool {
524    true
525}
526
527pub struct WhitespaceCollapse<InputIterator> {
528    char_iterator: InputIterator,
529    white_space_collapse: WhiteSpaceCollapse,
530
531    /// Whether or not we should collapse white space completely at the start of the string.
532    /// This is true when the last character handled in our owning [`super::InlineFormattingContext`]
533    /// was collapsible white space.
534    remove_collapsible_white_space_at_start: bool,
535
536    /// Whether or not the last character produced was newline. There is special behavior
537    /// we do after each newline.
538    following_newline: bool,
539
540    /// Whether or not we have seen any non-white space characters, indicating that we are not
541    /// in a collapsible white space section at the beginning of the string.
542    have_seen_non_white_space_characters: bool,
543
544    /// Whether the last character that we processed was a non-newline white space character. When
545    /// collapsing white space we need to wait until the next non-white space character or the end
546    /// of the string to push a single white space.
547    inside_white_space: bool,
548
549    /// When we enter a collapsible white space region, we may need to wait to produce a single
550    /// white space character as soon as we encounter a non-white space character. When that
551    /// happens we queue up the non-white space character for the next iterator call.
552    character_pending_to_return: Option<char>,
553}
554
555impl<InputIterator> WhitespaceCollapse<InputIterator> {
556    pub fn new(
557        char_iterator: InputIterator,
558        white_space_collapse: WhiteSpaceCollapse,
559        trim_beginning_white_space: bool,
560    ) -> Self {
561        Self {
562            char_iterator,
563            white_space_collapse,
564            remove_collapsible_white_space_at_start: trim_beginning_white_space,
565            inside_white_space: false,
566            following_newline: false,
567            have_seen_non_white_space_characters: false,
568            character_pending_to_return: None,
569        }
570    }
571
572    fn is_leading_trimmed_white_space(&self) -> bool {
573        !self.have_seen_non_white_space_characters && self.remove_collapsible_white_space_at_start
574    }
575
576    /// Whether or not we need to produce a space character if the next character is not a newline
577    /// and not white space. This happens when we are exiting a section of white space and we
578    /// waited to produce a single space character for the entire section of white space (but
579    /// not following or preceding a newline).
580    fn need_to_produce_space_character_after_white_space(&self) -> bool {
581        self.inside_white_space && !self.following_newline && !self.is_leading_trimmed_white_space()
582    }
583}
584
585impl<InputIterator> Iterator for WhitespaceCollapse<InputIterator>
586where
587    InputIterator: Iterator<Item = char>,
588{
589    type Item = char;
590
591    fn next(&mut self) -> Option<Self::Item> {
592        // Point 4.1.1 first bullet:
593        // > If white-space is set to normal, nowrap, or pre-line, whitespace
594        // > characters are considered collapsible
595        // If whitespace is not considered collapsible, it is preserved entirely, which
596        // means that we can simply return the input string exactly.
597        if self.white_space_collapse == WhiteSpaceCollapse::Preserve ||
598            self.white_space_collapse == WhiteSpaceCollapse::BreakSpaces
599        {
600            // From <https://drafts.csswg.org/css-text-3/#white-space-processing>:
601            // > Carriage returns (U+000D) are treated identically to spaces (U+0020) in all respects.
602            //
603            // In the non-preserved case these are converted to space below.
604            return match self.char_iterator.next() {
605                Some('\r') => Some(' '),
606                next => next,
607            };
608        }
609
610        if let Some(character) = self.character_pending_to_return.take() {
611            self.inside_white_space = false;
612            self.have_seen_non_white_space_characters = true;
613            self.following_newline = false;
614            return Some(character);
615        }
616
617        while let Some(character) = self.char_iterator.next() {
618            // Don't push non-newline whitespace immediately. Instead wait to push it until we
619            // know that it isn't followed by a newline. See `push_pending_whitespace_if_needed`
620            // above.
621            if InlineFormattingContextBuilder::is_document_white_space(character) &&
622                character != '\n'
623            {
624                self.inside_white_space = true;
625                continue;
626            }
627
628            // Point 4.1.1:
629            // > 2. Collapsible segment breaks are transformed for rendering according to the
630            // >    segment break transformation rules.
631            if character == '\n' {
632                // From <https://drafts.csswg.org/css-text-3/#line-break-transform>
633                // (4.1.3 -- the segment break transformation rules):
634                //
635                // > When white-space is pre, pre-wrap, or pre-line, segment breaks are not
636                // > collapsible and are instead transformed into a preserved line feed"
637                if self.white_space_collapse != WhiteSpaceCollapse::Collapse {
638                    self.inside_white_space = false;
639                    self.following_newline = true;
640                    return Some(character);
641
642                // Point 4.1.3:
643                // > 1. First, any collapsible segment break immediately following another
644                // >    collapsible segment break is removed.
645                // > 2. Then any remaining segment break is either transformed into a space (U+0020)
646                // >    or removed depending on the context before and after the break.
647                } else if !self.following_newline &&
648                    preserve_segment_break() &&
649                    !self.is_leading_trimmed_white_space()
650                {
651                    self.inside_white_space = false;
652                    self.following_newline = true;
653                    return Some(' ');
654                } else {
655                    self.following_newline = true;
656                    continue;
657                }
658            }
659
660            // Point 4.1.1:
661            // > 2. Any sequence of collapsible spaces and tabs immediately preceding or
662            // >    following a segment break is removed.
663            // > 3. Every collapsible tab is converted to a collapsible space (U+0020).
664            // > 4. Any collapsible space immediately following another collapsible space—even
665            // >    one outside the boundary of the inline containing that space, provided both
666            // >    spaces are within the same inline formatting context—is collapsed to have zero
667            // >    advance width.
668            if self.need_to_produce_space_character_after_white_space() {
669                self.inside_white_space = false;
670                self.character_pending_to_return = Some(character);
671                return Some(' ');
672            }
673
674            self.inside_white_space = false;
675            self.have_seen_non_white_space_characters = true;
676            self.following_newline = false;
677            return Some(character);
678        }
679
680        if self.need_to_produce_space_character_after_white_space() {
681            self.inside_white_space = false;
682            return Some(' ');
683        }
684
685        None
686    }
687
688    fn size_hint(&self) -> (usize, Option<usize>) {
689        self.char_iterator.size_hint()
690    }
691
692    fn count(self) -> usize
693    where
694        Self: Sized,
695    {
696        self.char_iterator.count()
697    }
698}
699
700enum PendingCaseConversionResult {
701    Uppercase(ToUppercase),
702    Lowercase(ToLowercase),
703}
704
705impl PendingCaseConversionResult {
706    fn next(&mut self) -> Option<char> {
707        match self {
708            PendingCaseConversionResult::Uppercase(to_uppercase) => to_uppercase.next(),
709            PendingCaseConversionResult::Lowercase(to_lowercase) => to_lowercase.next(),
710        }
711    }
712}
713
714/// This is an iterator that consumes a char iterator and produces character transformed
715/// by the given CSS `text-transform` value. It currently does not support
716/// `text-transform: capitalize` because Unicode segmentation libraries do not support
717/// streaming input one character at a time.
718pub struct TextTransformation<InputIterator> {
719    /// The input character iterator.
720    char_iterator: InputIterator,
721    /// The `text-transform` value to use.
722    text_transform: TextTransformCase,
723    /// If an uppercasing or lowercasing produces more than one character, this
724    /// caches them so that they can be returned in subsequent iterator calls.
725    pending_case_conversion_result: Option<PendingCaseConversionResult>,
726}
727
728impl<InputIterator> TextTransformation<InputIterator> {
729    pub fn new(char_iterator: InputIterator, text_transform: TextTransformCase) -> Self {
730        Self {
731            char_iterator,
732            text_transform,
733            pending_case_conversion_result: None,
734        }
735    }
736}
737
738impl<InputIterator> Iterator for TextTransformation<InputIterator>
739where
740    InputIterator: Iterator<Item = char>,
741{
742    type Item = char;
743
744    fn next(&mut self) -> Option<Self::Item> {
745        if let Some(character) = self
746            .pending_case_conversion_result
747            .as_mut()
748            .and_then(|result| result.next())
749        {
750            return Some(character);
751        }
752        self.pending_case_conversion_result = None;
753
754        for character in self.char_iterator.by_ref() {
755            match self.text_transform {
756                TextTransformCase::None => return Some(character),
757                TextTransformCase::Uppercase => {
758                    let mut pending_result =
759                        PendingCaseConversionResult::Uppercase(character.to_uppercase());
760                    if let Some(character) = pending_result.next() {
761                        self.pending_case_conversion_result = Some(pending_result);
762                        return Some(character);
763                    }
764                },
765                TextTransformCase::Lowercase => {
766                    let mut pending_result =
767                        PendingCaseConversionResult::Lowercase(character.to_lowercase());
768                    if let Some(character) = pending_result.next() {
769                        self.pending_case_conversion_result = Some(pending_result);
770                        return Some(character);
771                    }
772                },
773                // `text-transform: capitalize` currently cannot work on a per-character basis,
774                // so must be handled outside of this iterator.
775                TextTransformCase::Capitalize => return Some(character),
776            }
777        }
778        None
779    }
780}
781
782pub struct TextSecurityTransform<InputIterator> {
783    /// The input character iterator.
784    char_iterator: InputIterator,
785    /// The `-webkit-text-security` value to use.
786    text_security: WebKitTextSecurity,
787}
788
789impl<InputIterator> TextSecurityTransform<InputIterator> {
790    pub fn new(char_iterator: InputIterator, text_security: WebKitTextSecurity) -> Self {
791        Self {
792            char_iterator,
793            text_security,
794        }
795    }
796}
797
798impl<InputIterator> Iterator for TextSecurityTransform<InputIterator>
799where
800    InputIterator: Iterator<Item = char>,
801{
802    type Item = char;
803
804    fn next(&mut self) -> Option<Self::Item> {
805        // The behavior of `-webkit-text-security` isn't specified, so we have some
806        // flexibility in the implementation. We just need to maintain a rough
807        // compatability with other browsers.
808        Some(match self.char_iterator.next()? {
809            // This is not ideal, but zero width space is used for some special reasons in
810            // `<input>` fields, so these remain untransformed, otherwise they would show up
811            // in empty text fields.
812            '\u{200B}' => '\u{200B}',
813            // Newlines are preserved, so that `<br>` keeps working as expected.
814            '\n' => '\n',
815            character => match self.text_security {
816                WebKitTextSecurity::None => character,
817                WebKitTextSecurity::Circle => '○',
818                WebKitTextSecurity::Disc => '●',
819                WebKitTextSecurity::Square => '■',
820            },
821        })
822    }
823}
824
825/// Given a string and whether the start of the string represents a word boundary, create a copy of
826/// the string with letters after word boundaries capitalized.
827pub(crate) fn capitalize_string(string: &str, allow_word_at_start: bool) -> String {
828    let mut output_string = String::new();
829    output_string.reserve(string.len());
830
831    let word_segmenter = WordSegmenter::new_auto();
832    let mut bounds = word_segmenter.segment_str(string).peekable();
833    let mut byte_index = 0;
834    for character in string.chars() {
835        let current_byte_index = byte_index;
836        byte_index += character.len_utf8();
837
838        if let Some(next_index) = bounds.peek() &&
839            *next_index == current_byte_index
840        {
841            bounds.next();
842
843            if current_byte_index != 0 || allow_word_at_start {
844                output_string.extend(character.to_uppercase());
845                continue;
846            }
847        }
848
849        output_string.push(character);
850    }
851
852    output_string
853}
854
855/// Computes the range of the first letter.
856///
857/// The range includes any preceding punctuation and white space, and any trailing punctuation. Any
858/// non-punctuation following the letter/number/symbol of first-letter ends the range. Intervening
859/// spaces within trailing punctuation are not supported yet.
860///
861/// If the resulting range is empty, no compatible first-letter text was found.
862///
863/// <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
864fn first_letter_range(text: &str) -> Range<usize> {
865    enum State {
866        /// All characters that precede the `PrecedingWhitespaceAndPunctuation` state.
867        Start,
868        /// All preceding punctuation and intervening whitepace that precedes the `Lns` state.
869        PrecedingPunctuation,
870        /// Unicode general category L: letter, N: number and S: symbol
871        Lns,
872        /// All punctuation (but no whitespace or other characters), that
873        /// come after the `Lns` state.
874        TrailingPunctuation,
875    }
876
877    let mut start = 0;
878    let mut state = State::Start;
879    for (index, character) in text.char_indices() {
880        match &mut state {
881            State::Start => {
882                if character.is_letter() || character.is_number() || character.is_symbol() {
883                    start = index;
884                    state = State::Lns;
885                } else if character.is_punctuation() {
886                    start = index;
887                    state = State::PrecedingPunctuation
888                }
889            },
890            State::PrecedingPunctuation => {
891                if character.is_letter() || character.is_number() || character.is_symbol() {
892                    state = State::Lns;
893                } else if !character.is_separator_space() && !character.is_punctuation() {
894                    return 0..0;
895                }
896            },
897            State::Lns => {
898                // TODO: Implement support for intervening spaces
899                // <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
900                if character.is_punctuation() &&
901                    !character.is_punctuation_open() &&
902                    !character.is_punctuation_dash()
903                {
904                    state = State::TrailingPunctuation;
905                } else {
906                    return start..index;
907                }
908            },
909            State::TrailingPunctuation => {
910                // TODO: Implement support for intervening spaces
911                // <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
912                if character.is_punctuation() &&
913                    !character.is_punctuation_open() &&
914                    !character.is_punctuation_dash()
915                {
916                    continue;
917                } else {
918                    return start..index;
919                }
920            },
921        }
922    }
923
924    match state {
925        State::Start | State::PrecedingPunctuation => 0..0,
926        State::Lns | State::TrailingPunctuation => start..text.len(),
927    }
928}
929
930#[cfg(test)]
931mod tests {
932    use super::*;
933
934    fn assert_first_letter_eq(text: &str, expected: &str) {
935        let range = first_letter_range(text);
936        assert_eq!(&text[range], expected);
937    }
938
939    #[test]
940    fn test_first_letter_range() {
941        // All spaces
942        assert_first_letter_eq("", "");
943        assert_first_letter_eq("  ", "");
944
945        // Spaces and punctuation only
946        assert_first_letter_eq("(", "");
947        assert_first_letter_eq(" (", "");
948        assert_first_letter_eq("( ", "");
949        assert_first_letter_eq("()", "");
950
951        // Invalid chars
952        assert_first_letter_eq("\u{0903}", "");
953
954        // First letter only
955        assert_first_letter_eq("A", "A");
956        assert_first_letter_eq(" A", "A");
957        assert_first_letter_eq("A ", "A");
958        assert_first_letter_eq(" A ", "A");
959
960        // Word
961        assert_first_letter_eq("App", "A");
962        assert_first_letter_eq(" App", "A");
963        assert_first_letter_eq("App ", "A");
964
965        // Preceding punctuation(s), intervening spaces and first letter
966        assert_first_letter_eq(r#""A"#, r#""A"#);
967        assert_first_letter_eq(r#" "A"#, r#""A"#);
968        assert_first_letter_eq(r#""A "#, r#""A"#);
969        assert_first_letter_eq(r#"" A"#, r#"" A"#);
970        assert_first_letter_eq(r#" "A "#, r#""A"#);
971        assert_first_letter_eq(r#"("A"#, r#"("A"#);
972        assert_first_letter_eq(r#" ("A"#, r#"("A"#);
973        assert_first_letter_eq(r#"( "A"#, r#"( "A"#);
974        assert_first_letter_eq(r#"[ ( "A"#, r#"[ ( "A"#);
975
976        // First letter and succeeding punctuation(s)
977        // TODO: modify test cases when intervening spaces in succeeding puntuations is supported
978        assert_first_letter_eq(r#"A""#, r#"A""#);
979        assert_first_letter_eq(r#"A" "#, r#"A""#);
980        assert_first_letter_eq(r#"A)]"#, r#"A)]"#);
981        assert_first_letter_eq(r#"A" )]"#, r#"A""#);
982        assert_first_letter_eq(r#"A)] >"#, r#"A)]"#);
983
984        // All
985        assert_first_letter_eq(r#" ("A" )]"#, r#"("A""#);
986        assert_first_letter_eq(r#" ("A")] >"#, r#"("A")]"#);
987
988        // Non ASCII chars
989        assert_first_letter_eq("一", "一");
990        assert_first_letter_eq(" 一 ", "一");
991        assert_first_letter_eq("一二三", "一");
992        assert_first_letter_eq(" 一二三 ", "一");
993        assert_first_letter_eq("(一二三)", "(一");
994        assert_first_letter_eq(" (一二三) ", "(一");
995        assert_first_letter_eq("((一", "((一");
996        assert_first_letter_eq(" ( (一", "( (一");
997        assert_first_letter_eq("一)", "一)");
998        assert_first_letter_eq("一))", "一))");
999        assert_first_letter_eq("一) )", "一)");
1000    }
1001}