layout/flow/inline/
construct.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::char::{ToLowercase, ToUppercase};
7use std::ops::Range;
8
9use icu_segmenter::WordSegmenter;
10use layout_api::{LayoutNode, SharedSelection};
11use style::computed_values::_webkit_text_security::T as WebKitTextSecurity;
12use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
13use style::selector_parser::PseudoElement;
14use style::values::specified::text::TextTransformCase;
15use unicode_bidi::Level;
16use unicode_categories::UnicodeCategories;
17
18use super::text_run::TextRun;
19use super::{
20    InlineBox, InlineBoxIdentifier, InlineBoxes, InlineFormattingContext, InlineItem,
21    SharedInlineStyles,
22};
23use crate::cell::ArcRefCell;
24use crate::context::LayoutContext;
25use crate::dom::{LayoutBox, NodeExt};
26use crate::dom_traversal::NodeAndStyleInfo;
27use crate::flow::BlockLevelBox;
28use crate::flow::float::FloatBox;
29use crate::formatting_contexts::IndependentFormattingContext;
30use crate::positioned::AbsolutelyPositionedBox;
31use crate::style_ext::ComputedValuesExt;
32
33#[derive(Default)]
34pub(crate) struct InlineFormattingContextBuilder {
35    /// A stack of [`SharedInlineStyles`] including one for the root, one for each inline box on the
36    /// inline box stack, and importantly, one for every `display: contents` element that we are
37    /// currently processing. Normally `display: contents` elements don't affect the structure of
38    /// the [`InlineFormattingContext`], but the styles they provide do style their children.
39    pub shared_inline_styles_stack: Vec<SharedInlineStyles>,
40
41    /// The collection of text strings that make up this [`InlineFormattingContext`] under
42    /// construction.
43    pub text_segments: Vec<String>,
44
45    /// The current offset in the final text string of this [`InlineFormattingContext`],
46    /// used to properly set the text range of new [`InlineItem::TextRun`]s.
47    current_text_offset: usize,
48
49    /// The current character offset in the final text string of this [`InlineFormattingContext`],
50    /// used to properly set the text range of new [`InlineItem::TextRun`]s. Note that this is
51    /// different from the UTF-8 code point offset.
52    current_character_offset: usize,
53
54    /// If the [`InlineFormattingContext`] that we are building has a selection shared with its
55    /// originating node in the DOM, this will not be `None`.
56    pub shared_selection: Option<SharedSelection>,
57
58    /// Whether the last processed node ended with whitespace. This is used to
59    /// implement rule 4 of <https://www.w3.org/TR/css-text-3/#collapse>:
60    ///
61    /// > Any collapsible space immediately following another collapsible space—even one
62    /// > outside the boundary of the inline containing that space, provided both spaces are
63    /// > within the same inline formatting context—is collapsed to have zero advance width.
64    /// > (It is invisible, but retains its soft wrap opportunity, if any.)
65    last_inline_box_ended_with_collapsible_white_space: bool,
66
67    /// Whether or not the current state of the inline formatting context is on a word boundary
68    /// for the purposes of `text-transform: capitalize`.
69    on_word_boundary: bool,
70
71    /// Whether or not this inline formatting context will contain floats.
72    pub contains_floats: bool,
73
74    /// The current list of [`InlineItem`]s in this [`InlineFormattingContext`] under
75    /// construction. This is stored in a flat list to make it easy to access the last
76    /// item.
77    pub inline_items: Vec<InlineItem>,
78
79    /// The current [`InlineBox`] tree of this [`InlineFormattingContext`] under construction.
80    pub inline_boxes: InlineBoxes,
81
82    /// The ongoing stack of inline boxes stack of the builder.
83    ///
84    /// Contains all the currently ongoing inline boxes we entered so far.
85    /// The traversal is at all times as deep in the tree as this stack is,
86    /// which is why the code doesn't need to keep track of the actual
87    /// container root (see `handle_inline_level_element`).
88    ///
89    /// When an inline box ends, it's removed from this stack.
90    inline_box_stack: Vec<InlineBoxIdentifier>,
91
92    /// Whether this [`InlineFormattingContextBuilder`] is empty for the purposes of ignoring
93    /// during box tree construction. An IFC is empty if it only contains TextRuns with
94    /// completely collapsible whitespace. When that happens it can be ignored completely.
95    pub is_empty: bool,
96
97    /// Whether or not the `::first-letter` pseudo-element of this inline formatting context
98    /// has been processed yet.
99    has_processed_first_letter: bool,
100}
101
102impl InlineFormattingContextBuilder {
103    pub(crate) fn new(info: &NodeAndStyleInfo, context: &LayoutContext) -> Self {
104        Self {
105            // For the purposes of `text-transform: capitalize` the start of the IFC is a word boundary.
106            on_word_boundary: true,
107            is_empty: true,
108            shared_inline_styles_stack: vec![SharedInlineStyles::from_info_and_context(
109                info, context,
110            )],
111            shared_selection: info.node.selection(),
112            ..Default::default()
113        }
114    }
115
116    pub(crate) fn currently_processing_inline_box(&self) -> bool {
117        !self.inline_box_stack.is_empty()
118    }
119
120    fn push_control_character_string(&mut self, string_to_push: &str) {
121        self.text_segments.push(string_to_push.to_owned());
122        self.current_text_offset += string_to_push.len();
123        self.current_character_offset += string_to_push.chars().count();
124    }
125
126    fn shared_inline_styles(&self) -> SharedInlineStyles {
127        self.shared_inline_styles_stack
128            .last()
129            .expect("Should always have at least one SharedInlineStyles")
130            .clone()
131    }
132
133    pub(crate) fn push_atomic(
134        &mut self,
135        independent_formatting_context_creator: impl FnOnce()
136            -> ArcRefCell<IndependentFormattingContext>,
137        old_layout_box: Option<LayoutBox>,
138    ) -> InlineItem {
139        // If there is an existing undamaged layout box that's compatible, use that.
140        let independent_formatting_context = old_layout_box
141            .and_then(|layout_box| match layout_box {
142                LayoutBox::InlineLevel(InlineItem::Atomic(atomic, ..)) => Some(atomic),
143                _ => None,
144            })
145            .unwrap_or_else(independent_formatting_context_creator);
146
147        let inline_level_box = InlineItem::Atomic(
148            independent_formatting_context,
149            self.current_text_offset,
150            Level::ltr(), /* This will be assigned later if necessary. */
151        );
152        self.inline_items.push(inline_level_box.clone());
153        self.is_empty = false;
154
155        // Push an object replacement character for this atomic, which will ensure that the line breaker
156        // inserts a line breaking opportunity here.
157        self.push_control_character_string("\u{fffc}");
158
159        self.last_inline_box_ended_with_collapsible_white_space = false;
160        self.on_word_boundary = true;
161
162        // Atomics such as images should prevent any following text as being interpreted as the first letter.
163        self.has_processed_first_letter = true;
164
165        inline_level_box
166    }
167
168    pub(crate) fn push_absolutely_positioned_box(
169        &mut self,
170        absolutely_positioned_box_creator: impl FnOnce() -> ArcRefCell<AbsolutelyPositionedBox>,
171        old_layout_box: Option<LayoutBox>,
172    ) -> InlineItem {
173        let absolutely_positioned_box = old_layout_box
174            .and_then(|layout_box| match layout_box {
175                LayoutBox::InlineLevel(InlineItem::OutOfFlowAbsolutelyPositionedBox(
176                    positioned_box,
177                    ..,
178                )) => Some(positioned_box),
179                _ => None,
180            })
181            .unwrap_or_else(absolutely_positioned_box_creator);
182
183        // We cannot just reuse the old inline item, because the `current_text_offset` may have changed.
184        let inline_level_box = InlineItem::OutOfFlowAbsolutelyPositionedBox(
185            absolutely_positioned_box,
186            self.current_text_offset,
187        );
188
189        self.inline_items.push(inline_level_box.clone());
190        self.is_empty = false;
191        inline_level_box
192    }
193
194    pub(crate) fn push_float_box(
195        &mut self,
196        float_box_creator: impl FnOnce() -> ArcRefCell<FloatBox>,
197        old_layout_box: Option<LayoutBox>,
198    ) -> InlineItem {
199        let inline_level_box = old_layout_box
200            .and_then(|layout_box| match layout_box {
201                LayoutBox::InlineLevel(inline_item) => Some(inline_item),
202                _ => None,
203            })
204            .unwrap_or_else(|| InlineItem::OutOfFlowFloatBox(float_box_creator()));
205
206        debug_assert!(
207            matches!(inline_level_box, InlineItem::OutOfFlowFloatBox(..),),
208            "Created float box with incompatible `old_layout_box`"
209        );
210
211        self.inline_items.push(inline_level_box.clone());
212        self.is_empty = false;
213        self.contains_floats = true;
214        inline_level_box
215    }
216
217    pub(crate) fn push_block_level_box(&mut self, block_level: ArcRefCell<BlockLevelBox>) {
218        assert!(self.currently_processing_inline_box());
219        self.contains_floats = self.contains_floats || block_level.borrow().contains_floats();
220        self.inline_items.push(InlineItem::BlockLevel(block_level));
221    }
222
223    pub(crate) fn start_inline_box(
224        &mut self,
225        inline_box_creator: impl FnOnce() -> ArcRefCell<InlineBox>,
226        old_layout_box: Option<LayoutBox>,
227    ) -> InlineItem {
228        // If there is an existing undamaged layout box that's compatible, use the `InlineBox` within it.
229        let inline_box = old_layout_box
230            .and_then(|layout_box| match layout_box {
231                LayoutBox::InlineLevel(InlineItem::StartInlineBox(inline_box)) => Some(inline_box),
232                _ => None,
233            })
234            .unwrap_or_else(inline_box_creator);
235
236        let borrowed_inline_box = inline_box.borrow();
237        self.push_control_character_string(borrowed_inline_box.base.style.bidi_control_chars().0);
238
239        self.shared_inline_styles_stack
240            .push(borrowed_inline_box.shared_inline_styles.clone());
241        std::mem::drop(borrowed_inline_box);
242
243        let identifier = self.inline_boxes.start_inline_box(inline_box.clone());
244        let inline_item = InlineItem::StartInlineBox(inline_box);
245        self.inline_items.push(inline_item.clone());
246        self.inline_box_stack.push(identifier);
247        self.is_empty = false;
248        inline_item
249    }
250
251    /// End the ongoing inline box in this [`InlineFormattingContextBuilder`], returning
252    /// shared references to all of the box tree items that were created for it. More than
253    /// a single box tree items may be produced for a single inline box when that inline
254    /// box is split around a block-level element.
255    pub(crate) fn end_inline_box(&mut self) {
256        self.shared_inline_styles_stack.pop();
257        self.inline_items.push(InlineItem::EndInlineBox);
258        let identifier = self
259            .inline_box_stack
260            .pop()
261            .expect("Ended non-existent inline box");
262        self.inline_boxes.end_inline_box(identifier);
263        let inline_level_box = self.inline_boxes.get(&identifier);
264        let bidi_control_chars = inline_level_box.borrow().base.style.bidi_control_chars();
265        self.push_control_character_string(bidi_control_chars.1);
266    }
267
268    /// This is like [`Self::push_text`], except that it might possibly add an anonymous box if
269    ///
270    ///  - This inline formatting context has a `::first-letter` style.
271    ///  - No anonymous box for `::first-letter` has been added yet.
272    ///  - First letter content is detected in this text.
273    ///
274    /// Note that this should only be used when processing text in block containers.
275    pub(crate) fn push_text_with_possible_first_letter<'dom>(
276        &mut self,
277        text: Cow<'dom, str>,
278        info: &NodeAndStyleInfo<'dom>,
279        container_info: &NodeAndStyleInfo<'dom>,
280        layout_context: &LayoutContext,
281    ) -> bool {
282        if self.has_processed_first_letter || !container_info.pseudo_element_chain().is_empty() {
283            self.push_text(text, info);
284            return false;
285        }
286
287        let Some(first_letter_info) =
288            container_info.with_pseudo_element(layout_context, PseudoElement::FirstLetter)
289        else {
290            self.push_text(text, info);
291            return false;
292        };
293
294        let first_letter_range = first_letter_range(&text[..]);
295        if first_letter_range.is_empty() {
296            return false;
297        }
298
299        // Push any leading white space first.
300        if first_letter_range.start != 0 {
301            self.push_text(Cow::Borrowed(&text[0..first_letter_range.start]), info);
302        }
303
304        // Push the first-letter text into an anonymous box with the `::first-letter` style.
305        let box_slot = first_letter_info.node.box_slot();
306        let inline_item = self.start_inline_box(
307            || ArcRefCell::new(InlineBox::new(&first_letter_info, layout_context)),
308            None,
309        );
310        box_slot.set(LayoutBox::InlineLevel(inline_item));
311
312        let first_letter_text = Cow::Borrowed(&text[first_letter_range.clone()]);
313        self.push_text(first_letter_text, &first_letter_info);
314        self.end_inline_box();
315        self.has_processed_first_letter = true;
316
317        // Now push the non-first-letter text.
318        self.push_text(Cow::Borrowed(&text[first_letter_range.end..]), info);
319
320        true
321    }
322
323    pub(crate) fn push_text<'dom>(&mut self, text: Cow<'dom, str>, info: &NodeAndStyleInfo<'dom>) {
324        let white_space_collapse = info.style.clone_white_space_collapse();
325        let collapsed = WhitespaceCollapse::new(
326            text.chars(),
327            white_space_collapse,
328            self.last_inline_box_ended_with_collapsible_white_space,
329        );
330
331        // TODO: Not all text transforms are about case, this logic should stop ignoring
332        // TextTransform::FULL_WIDTH and TextTransform::FULL_SIZE_KANA.
333        let text_transform = info.style.clone_text_transform().case();
334        let capitalized_text: String;
335        let char_iterator: Box<dyn Iterator<Item = char>> = match text_transform {
336            TextTransformCase::None => Box::new(collapsed),
337            TextTransformCase::Capitalize => {
338                // `TextTransformation` doesn't support capitalization, so we must capitalize the whole
339                // string at once and make a copy. Here `on_word_boundary` indicates whether or not the
340                // inline formatting context as a whole is on a word boundary. This is different from
341                // `last_inline_box_ended_with_collapsible_white_space` because the word boundaries are
342                // between atomic inlines and at the start of the IFC, and because preserved spaces
343                // are a word boundary.
344                let collapsed_string: String = collapsed.collect();
345                capitalized_text = capitalize_string(&collapsed_string, self.on_word_boundary);
346                Box::new(capitalized_text.chars())
347            },
348            _ => {
349                // If `text-transform` is active, wrap the `WhitespaceCollapse` iterator in
350                // a `TextTransformation` iterator.
351                Box::new(TextTransformation::new(collapsed, text_transform))
352            },
353        };
354
355        let char_iterator = if info.style.clone__webkit_text_security() != WebKitTextSecurity::None
356        {
357            Box::new(TextSecurityTransform::new(
358                char_iterator,
359                info.style.clone__webkit_text_security(),
360            ))
361        } else {
362            char_iterator
363        };
364
365        let white_space_collapse = info.style.clone_white_space_collapse();
366        let mut character_count = 0;
367        let new_text: String = char_iterator
368            .inspect(|&character| {
369                character_count += 1;
370
371                self.is_empty = self.is_empty &&
372                    match white_space_collapse {
373                        WhiteSpaceCollapse::Collapse => character.is_ascii_whitespace(),
374                        WhiteSpaceCollapse::PreserveBreaks => {
375                            character.is_ascii_whitespace() && character != '\n'
376                        },
377                        WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces => false,
378                    };
379            })
380            .collect();
381
382        if new_text.is_empty() {
383            return;
384        }
385
386        if let Some(last_character) = new_text.chars().next_back() {
387            self.on_word_boundary = last_character.is_whitespace();
388            self.last_inline_box_ended_with_collapsible_white_space =
389                self.on_word_boundary && white_space_collapse != WhiteSpaceCollapse::Preserve;
390        }
391
392        let new_range = self.current_text_offset..self.current_text_offset + new_text.len();
393        self.current_text_offset = new_range.end;
394
395        let new_character_range =
396            self.current_character_offset..self.current_character_offset + character_count;
397        self.current_character_offset = new_character_range.end;
398
399        self.text_segments.push(new_text);
400
401        let current_inline_styles = self.shared_inline_styles();
402
403        if let Some(InlineItem::TextRun(text_run)) = self.inline_items.last() {
404            if text_run
405                .borrow()
406                .inline_styles
407                .ptr_eq(&current_inline_styles)
408            {
409                text_run.borrow_mut().text_range.end = new_range.end;
410                text_run.borrow_mut().character_range.end = new_character_range.end;
411                return;
412            }
413        }
414
415        self.inline_items
416            .push(InlineItem::TextRun(ArcRefCell::new(TextRun::new(
417                info.into(),
418                current_inline_styles,
419                new_range,
420                new_character_range,
421            ))));
422    }
423
424    pub(crate) fn enter_display_contents(&mut self, shared_inline_styles: SharedInlineStyles) {
425        self.shared_inline_styles_stack.push(shared_inline_styles);
426    }
427
428    pub(crate) fn leave_display_contents(&mut self) {
429        self.shared_inline_styles_stack.pop();
430    }
431
432    /// Finish the current inline formatting context, returning [`None`] if the context was empty.
433    pub(crate) fn finish(
434        self,
435        layout_context: &LayoutContext,
436        has_first_formatted_line: bool,
437        is_single_line_text_input: bool,
438        default_bidi_level: Level,
439    ) -> Option<InlineFormattingContext> {
440        if self.is_empty {
441            return None;
442        }
443
444        assert!(self.inline_box_stack.is_empty());
445        Some(InlineFormattingContext::new_with_builder(
446            self,
447            layout_context,
448            has_first_formatted_line,
449            is_single_line_text_input,
450            default_bidi_level,
451        ))
452    }
453}
454
455fn preserve_segment_break() -> bool {
456    true
457}
458
459pub struct WhitespaceCollapse<InputIterator> {
460    char_iterator: InputIterator,
461    white_space_collapse: WhiteSpaceCollapse,
462
463    /// Whether or not we should collapse white space completely at the start of the string.
464    /// This is true when the last character handled in our owning [`super::InlineFormattingContext`]
465    /// was collapsible white space.
466    remove_collapsible_white_space_at_start: bool,
467
468    /// Whether or not the last character produced was newline. There is special behavior
469    /// we do after each newline.
470    following_newline: bool,
471
472    /// Whether or not we have seen any non-white space characters, indicating that we are not
473    /// in a collapsible white space section at the beginning of the string.
474    have_seen_non_white_space_characters: bool,
475
476    /// Whether the last character that we processed was a non-newline white space character. When
477    /// collapsing white space we need to wait until the next non-white space character or the end
478    /// of the string to push a single white space.
479    inside_white_space: bool,
480
481    /// When we enter a collapsible white space region, we may need to wait to produce a single
482    /// white space character as soon as we encounter a non-white space character. When that
483    /// happens we queue up the non-white space character for the next iterator call.
484    character_pending_to_return: Option<char>,
485}
486
487impl<InputIterator> WhitespaceCollapse<InputIterator> {
488    pub fn new(
489        char_iterator: InputIterator,
490        white_space_collapse: WhiteSpaceCollapse,
491        trim_beginning_white_space: bool,
492    ) -> Self {
493        Self {
494            char_iterator,
495            white_space_collapse,
496            remove_collapsible_white_space_at_start: trim_beginning_white_space,
497            inside_white_space: false,
498            following_newline: false,
499            have_seen_non_white_space_characters: false,
500            character_pending_to_return: None,
501        }
502    }
503
504    fn is_leading_trimmed_white_space(&self) -> bool {
505        !self.have_seen_non_white_space_characters && self.remove_collapsible_white_space_at_start
506    }
507
508    /// Whether or not we need to produce a space character if the next character is not a newline
509    /// and not white space. This happens when we are exiting a section of white space and we
510    /// waited to produce a single space character for the entire section of white space (but
511    /// not following or preceding a newline).
512    fn need_to_produce_space_character_after_white_space(&self) -> bool {
513        self.inside_white_space && !self.following_newline && !self.is_leading_trimmed_white_space()
514    }
515}
516
517impl<InputIterator> Iterator for WhitespaceCollapse<InputIterator>
518where
519    InputIterator: Iterator<Item = char>,
520{
521    type Item = char;
522
523    fn next(&mut self) -> Option<Self::Item> {
524        // Point 4.1.1 first bullet:
525        // > If white-space is set to normal, nowrap, or pre-line, whitespace
526        // > characters are considered collapsible
527        // If whitespace is not considered collapsible, it is preserved entirely, which
528        // means that we can simply return the input string exactly.
529        if self.white_space_collapse == WhiteSpaceCollapse::Preserve ||
530            self.white_space_collapse == WhiteSpaceCollapse::BreakSpaces
531        {
532            // From <https://drafts.csswg.org/css-text-3/#white-space-processing>:
533            // > Carriage returns (U+000D) are treated identically to spaces (U+0020) in all respects.
534            //
535            // In the non-preserved case these are converted to space below.
536            return match self.char_iterator.next() {
537                Some('\r') => Some(' '),
538                next => next,
539            };
540        }
541
542        if let Some(character) = self.character_pending_to_return.take() {
543            self.inside_white_space = false;
544            self.have_seen_non_white_space_characters = true;
545            self.following_newline = false;
546            return Some(character);
547        }
548
549        while let Some(character) = self.char_iterator.next() {
550            // Don't push non-newline whitespace immediately. Instead wait to push it until we
551            // know that it isn't followed by a newline. See `push_pending_whitespace_if_needed`
552            // above.
553            if character.is_ascii_whitespace() && character != '\n' {
554                self.inside_white_space = true;
555                continue;
556            }
557
558            // Point 4.1.1:
559            // > 2. Collapsible segment breaks are transformed for rendering according to the
560            // >    segment break transformation rules.
561            if character == '\n' {
562                // From <https://drafts.csswg.org/css-text-3/#line-break-transform>
563                // (4.1.3 -- the segment break transformation rules):
564                //
565                // > When white-space is pre, pre-wrap, or pre-line, segment breaks are not
566                // > collapsible and are instead transformed into a preserved line feed"
567                if self.white_space_collapse != WhiteSpaceCollapse::Collapse {
568                    self.inside_white_space = false;
569                    self.following_newline = true;
570                    return Some(character);
571
572                // Point 4.1.3:
573                // > 1. First, any collapsible segment break immediately following another
574                // >    collapsible segment break is removed.
575                // > 2. Then any remaining segment break is either transformed into a space (U+0020)
576                // >    or removed depending on the context before and after the break.
577                } else if !self.following_newline &&
578                    preserve_segment_break() &&
579                    !self.is_leading_trimmed_white_space()
580                {
581                    self.inside_white_space = false;
582                    self.following_newline = true;
583                    return Some(' ');
584                } else {
585                    self.following_newline = true;
586                    continue;
587                }
588            }
589
590            // Point 4.1.1:
591            // > 2. Any sequence of collapsible spaces and tabs immediately preceding or
592            // >    following a segment break is removed.
593            // > 3. Every collapsible tab is converted to a collapsible space (U+0020).
594            // > 4. Any collapsible space immediately following another collapsible space—even
595            // >    one outside the boundary of the inline containing that space, provided both
596            // >    spaces are within the same inline formatting context—is collapsed to have zero
597            // >    advance width.
598            if self.need_to_produce_space_character_after_white_space() {
599                self.inside_white_space = false;
600                self.character_pending_to_return = Some(character);
601                return Some(' ');
602            }
603
604            self.inside_white_space = false;
605            self.have_seen_non_white_space_characters = true;
606            self.following_newline = false;
607            return Some(character);
608        }
609
610        if self.need_to_produce_space_character_after_white_space() {
611            self.inside_white_space = false;
612            return Some(' ');
613        }
614
615        None
616    }
617
618    fn size_hint(&self) -> (usize, Option<usize>) {
619        self.char_iterator.size_hint()
620    }
621
622    fn count(self) -> usize
623    where
624        Self: Sized,
625    {
626        self.char_iterator.count()
627    }
628}
629
630enum PendingCaseConversionResult {
631    Uppercase(ToUppercase),
632    Lowercase(ToLowercase),
633}
634
635impl PendingCaseConversionResult {
636    fn next(&mut self) -> Option<char> {
637        match self {
638            PendingCaseConversionResult::Uppercase(to_uppercase) => to_uppercase.next(),
639            PendingCaseConversionResult::Lowercase(to_lowercase) => to_lowercase.next(),
640        }
641    }
642}
643
644/// This is an iterator that consumes a char iterator and produces character transformed
645/// by the given CSS `text-transform` value. It currently does not support
646/// `text-transform: capitalize` because Unicode segmentation libraries do not support
647/// streaming input one character at a time.
648pub struct TextTransformation<InputIterator> {
649    /// The input character iterator.
650    char_iterator: InputIterator,
651    /// The `text-transform` value to use.
652    text_transform: TextTransformCase,
653    /// If an uppercasing or lowercasing produces more than one character, this
654    /// caches them so that they can be returned in subsequent iterator calls.
655    pending_case_conversion_result: Option<PendingCaseConversionResult>,
656}
657
658impl<InputIterator> TextTransformation<InputIterator> {
659    pub fn new(char_iterator: InputIterator, text_transform: TextTransformCase) -> Self {
660        Self {
661            char_iterator,
662            text_transform,
663            pending_case_conversion_result: None,
664        }
665    }
666}
667
668impl<InputIterator> Iterator for TextTransformation<InputIterator>
669where
670    InputIterator: Iterator<Item = char>,
671{
672    type Item = char;
673
674    fn next(&mut self) -> Option<Self::Item> {
675        if let Some(character) = self
676            .pending_case_conversion_result
677            .as_mut()
678            .and_then(|result| result.next())
679        {
680            return Some(character);
681        }
682        self.pending_case_conversion_result = None;
683
684        for character in self.char_iterator.by_ref() {
685            match self.text_transform {
686                TextTransformCase::None => return Some(character),
687                TextTransformCase::Uppercase => {
688                    let mut pending_result =
689                        PendingCaseConversionResult::Uppercase(character.to_uppercase());
690                    if let Some(character) = pending_result.next() {
691                        self.pending_case_conversion_result = Some(pending_result);
692                        return Some(character);
693                    }
694                },
695                TextTransformCase::Lowercase => {
696                    let mut pending_result =
697                        PendingCaseConversionResult::Lowercase(character.to_lowercase());
698                    if let Some(character) = pending_result.next() {
699                        self.pending_case_conversion_result = Some(pending_result);
700                        return Some(character);
701                    }
702                },
703                // `text-transform: capitalize` currently cannot work on a per-character basis,
704                // so must be handled outside of this iterator.
705                TextTransformCase::Capitalize => return Some(character),
706            }
707        }
708        None
709    }
710}
711
712pub struct TextSecurityTransform<InputIterator> {
713    /// The input character iterator.
714    char_iterator: InputIterator,
715    /// The `-webkit-text-security` value to use.
716    text_security: WebKitTextSecurity,
717}
718
719impl<InputIterator> TextSecurityTransform<InputIterator> {
720    pub fn new(char_iterator: InputIterator, text_security: WebKitTextSecurity) -> Self {
721        Self {
722            char_iterator,
723            text_security,
724        }
725    }
726}
727
728impl<InputIterator> Iterator for TextSecurityTransform<InputIterator>
729where
730    InputIterator: Iterator<Item = char>,
731{
732    type Item = char;
733
734    fn next(&mut self) -> Option<Self::Item> {
735        // The behavior of `-webkit-text-security` isn't specified, so we have some
736        // flexibility in the implementation. We just need to maintain a rough
737        // compatability with other browsers.
738        Some(match self.char_iterator.next()? {
739            // This is not ideal, but zero width space is used for some special reasons in
740            // `<input>` fields, so these remain untransformed, otherwise they would show up
741            // in empty text fields.
742            '\u{200B}' => '\u{200B}',
743            // Newlines are preserved, so that `<br>` keeps working as expected.
744            '\n' => '\n',
745            character => match self.text_security {
746                WebKitTextSecurity::None => character,
747                WebKitTextSecurity::Circle => '○',
748                WebKitTextSecurity::Disc => '●',
749                WebKitTextSecurity::Square => '■',
750            },
751        })
752    }
753}
754
755/// Given a string and whether the start of the string represents a word boundary, create a copy of
756/// the string with letters after word boundaries capitalized.
757pub(crate) fn capitalize_string(string: &str, allow_word_at_start: bool) -> String {
758    let mut output_string = String::new();
759    output_string.reserve(string.len());
760
761    let word_segmenter = WordSegmenter::new_auto();
762    let mut bounds = word_segmenter.segment_str(string).peekable();
763    let mut byte_index = 0;
764    for character in string.chars() {
765        let current_byte_index = byte_index;
766        byte_index += character.len_utf8();
767
768        if let Some(next_index) = bounds.peek() {
769            if *next_index == current_byte_index {
770                bounds.next();
771
772                if current_byte_index != 0 || allow_word_at_start {
773                    output_string.extend(character.to_uppercase());
774                    continue;
775                }
776            }
777        }
778
779        output_string.push(character);
780    }
781
782    output_string
783}
784
785/// Computes the range of the first letter.
786///
787/// The range includes any preceding punctuation and white space, and any trailing punctuation. Any
788/// non-punctuation following the letter/number/symbol of first-letter ends the range. Intervening
789/// spaces within trailing punctuation are not supported yet.
790///
791/// If the resulting range is empty, no compatible first-letter text was found.
792///
793/// <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
794fn first_letter_range(text: &str) -> Range<usize> {
795    enum State {
796        /// All characters that precede the `PrecedingWhitespaceAndPunctuation` state.
797        Start,
798        /// All preceding punctuation and intervening whitepace that precedes the `Lns` state.
799        PrecedingPunctuation,
800        /// Unicode general category L: letter, N: number and S: symbol
801        Lns,
802        /// All punctuation (but no whitespace or other characters), that
803        /// come after the `Lns` state.
804        TrailingPunctuation,
805    }
806
807    let mut start = 0;
808    let mut state = State::Start;
809    for (index, character) in text.char_indices() {
810        match &mut state {
811            State::Start => {
812                if character.is_letter() || character.is_number() || character.is_symbol() {
813                    start = index;
814                    state = State::Lns;
815                } else if character.is_punctuation() {
816                    start = index;
817                    state = State::PrecedingPunctuation
818                }
819            },
820            State::PrecedingPunctuation => {
821                if character.is_letter() || character.is_number() || character.is_symbol() {
822                    state = State::Lns;
823                } else if !character.is_separator_space() && !character.is_punctuation() {
824                    return 0..0;
825                }
826            },
827            State::Lns => {
828                // TODO: Implement support for intervening spaces
829                // <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
830                if character.is_punctuation() &&
831                    !character.is_punctuation_open() &&
832                    !character.is_punctuation_dash()
833                {
834                    state = State::TrailingPunctuation;
835                } else {
836                    return start..index;
837                }
838            },
839            State::TrailingPunctuation => {
840                // TODO: Implement support for intervening spaces
841                // <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
842                if character.is_punctuation() &&
843                    !character.is_punctuation_open() &&
844                    !character.is_punctuation_dash()
845                {
846                    continue;
847                } else {
848                    return start..index;
849                }
850            },
851        }
852    }
853
854    match state {
855        State::Start | State::PrecedingPunctuation => 0..0,
856        State::Lns | State::TrailingPunctuation => start..text.len(),
857    }
858}
859
860#[cfg(test)]
861mod tests {
862    use super::*;
863
864    fn assert_first_letter_eq(text: &str, expected: &str) {
865        let range = first_letter_range(text);
866        assert_eq!(&text[range], expected);
867    }
868
869    #[test]
870    fn test_first_letter_range() {
871        // All spaces
872        assert_first_letter_eq("", "");
873        assert_first_letter_eq("  ", "");
874
875        // Spaces and punctuation only
876        assert_first_letter_eq("(", "");
877        assert_first_letter_eq(" (", "");
878        assert_first_letter_eq("( ", "");
879        assert_first_letter_eq("()", "");
880
881        // Invalid chars
882        assert_first_letter_eq("\u{0903}", "");
883
884        // First letter only
885        assert_first_letter_eq("A", "A");
886        assert_first_letter_eq(" A", "A");
887        assert_first_letter_eq("A ", "A");
888        assert_first_letter_eq(" A ", "A");
889
890        // Word
891        assert_first_letter_eq("App", "A");
892        assert_first_letter_eq(" App", "A");
893        assert_first_letter_eq("App ", "A");
894
895        // Preceding punctuation(s), intervening spaces and first letter
896        assert_first_letter_eq(r#""A"#, r#""A"#);
897        assert_first_letter_eq(r#" "A"#, r#""A"#);
898        assert_first_letter_eq(r#""A "#, r#""A"#);
899        assert_first_letter_eq(r#"" A"#, r#"" A"#);
900        assert_first_letter_eq(r#" "A "#, r#""A"#);
901        assert_first_letter_eq(r#"("A"#, r#"("A"#);
902        assert_first_letter_eq(r#" ("A"#, r#"("A"#);
903        assert_first_letter_eq(r#"( "A"#, r#"( "A"#);
904        assert_first_letter_eq(r#"[ ( "A"#, r#"[ ( "A"#);
905
906        // First letter and succeeding punctuation(s)
907        // TODO: modify test cases when intervening spaces in succeeding puntuations is supported
908        assert_first_letter_eq(r#"A""#, r#"A""#);
909        assert_first_letter_eq(r#"A" "#, r#"A""#);
910        assert_first_letter_eq(r#"A)]"#, r#"A)]"#);
911        assert_first_letter_eq(r#"A" )]"#, r#"A""#);
912        assert_first_letter_eq(r#"A)] >"#, r#"A)]"#);
913
914        // All
915        assert_first_letter_eq(r#" ("A" )]"#, r#"("A""#);
916        assert_first_letter_eq(r#" ("A")] >"#, r#"("A")]"#);
917
918        // Non ASCII chars
919        assert_first_letter_eq("一", "一");
920        assert_first_letter_eq(" 一 ", "一");
921        assert_first_letter_eq("一二三", "一");
922        assert_first_letter_eq(" 一二三 ", "一");
923        assert_first_letter_eq("(一二三)", "(一");
924        assert_first_letter_eq(" (一二三) ", "(一");
925        assert_first_letter_eq("((一", "((一");
926        assert_first_letter_eq(" ( (一", "( (一");
927        assert_first_letter_eq("一)", "一)");
928        assert_first_letter_eq("一))", "一))");
929        assert_first_letter_eq("一) )", "一)");
930    }
931}