Skip to main content

layout/flow/inline/
construct.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::char::{ToLowercase, ToUppercase};
7use std::ops::Range;
8
9use icu_segmenter::WordSegmenter;
10use layout_api::{LayoutNode, SharedSelection};
11use style::computed_values::_webkit_text_security::T as WebKitTextSecurity;
12use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
13use style::selector_parser::PseudoElement;
14use style::values::specified::text::TextTransformCase;
15use unicode_bidi::Level;
16use unicode_categories::UnicodeCategories;
17
18use super::text_run::TextRun;
19use super::{
20    InlineBox, InlineBoxIdentifier, InlineBoxes, InlineFormattingContext, InlineItem,
21    SharedInlineStyles,
22};
23use crate::cell::ArcRefCell;
24use crate::context::LayoutContext;
25use crate::dom::{LayoutBox, NodeExt};
26use crate::dom_traversal::NodeAndStyleInfo;
27use crate::flow::BlockLevelBox;
28use crate::flow::float::FloatBox;
29use crate::formatting_contexts::IndependentFormattingContext;
30use crate::positioned::AbsolutelyPositionedBox;
31use crate::style_ext::ComputedValuesExt;
32
33#[derive(Default)]
34pub(crate) struct InlineFormattingContextBuilder {
35    /// A stack of [`SharedInlineStyles`] including one for the root, one for each inline box on the
36    /// inline box stack, and importantly, one for every `display: contents` element that we are
37    /// currently processing. Normally `display: contents` elements don't affect the structure of
38    /// the [`InlineFormattingContext`], but the styles they provide do style their children.
39    pub shared_inline_styles_stack: Vec<SharedInlineStyles>,
40
41    /// The collection of text strings that make up this [`InlineFormattingContext`] under
42    /// construction.
43    pub text_segments: Vec<String>,
44
45    /// The current offset in the final text string of this [`InlineFormattingContext`],
46    /// used to properly set the text range of new [`InlineItem::TextRun`]s.
47    current_text_offset: usize,
48
49    /// The current character offset in the final text string of this [`InlineFormattingContext`],
50    /// used to properly set the text range of new [`InlineItem::TextRun`]s. Note that this is
51    /// different from the UTF-8 code point offset.
52    current_character_offset: usize,
53
54    /// If the [`InlineFormattingContext`] that we are building has a selection shared with its
55    /// originating node in the DOM, this will not be `None`.
56    pub shared_selection: Option<SharedSelection>,
57
58    /// Whether the last processed node ended with whitespace. This is used to
59    /// implement rule 4 of <https://www.w3.org/TR/css-text-3/#collapse>:
60    ///
61    /// > Any collapsible space immediately following another collapsible space—even one
62    /// > outside the boundary of the inline containing that space, provided both spaces are
63    /// > within the same inline formatting context—is collapsed to have zero advance width.
64    /// > (It is invisible, but retains its soft wrap opportunity, if any.)
65    last_inline_box_ended_with_collapsible_white_space: bool,
66
67    /// Whether or not the current state of the inline formatting context is on a word boundary
68    /// for the purposes of `text-transform: capitalize`.
69    on_word_boundary: bool,
70
71    /// Whether or not this inline formatting context will contain floats.
72    pub contains_floats: bool,
73
74    /// The current list of [`InlineItem`]s in this [`InlineFormattingContext`] under
75    /// construction. This is stored in a flat list to make it easy to access the last
76    /// item.
77    pub inline_items: Vec<InlineItem>,
78
79    /// The current [`InlineBox`] tree of this [`InlineFormattingContext`] under construction.
80    pub inline_boxes: InlineBoxes,
81
82    /// The ongoing stack of inline boxes stack of the builder.
83    ///
84    /// Contains all the currently ongoing inline boxes we entered so far.
85    /// The traversal is at all times as deep in the tree as this stack is,
86    /// which is why the code doesn't need to keep track of the actual
87    /// container root (see `handle_inline_level_element`).
88    ///
89    /// When an inline box ends, it's removed from this stack.
90    inline_box_stack: Vec<InlineBoxIdentifier>,
91
92    /// Whether this [`InlineFormattingContextBuilder`] is empty for the purposes of ignoring
93    /// during box tree construction. An IFC is empty if it only contains TextRuns with
94    /// completely collapsible whitespace. When that happens it can be ignored completely.
95    pub is_empty: bool,
96
97    /// Whether or not the `::first-letter` pseudo-element of this inline formatting context
98    /// has been processed yet.
99    has_processed_first_letter: bool,
100}
101
102impl InlineFormattingContextBuilder {
103    /// <https://drafts.csswg.org/css-text/#white-space>:
104    /// > Except where specified otherwise, white space processing in CSS affects only the document
105    /// > white space characters: spaces (U+0020), tabs (U+0009), and segment breaks.
106    ///
107    /// From <https://github.com/w3c/csswg-drafts/issues/5147#issuecomment-637816669>:
108    /// > HTML clearly treats CR, LF, and CRLF as segment breaks.
109    ///
110    /// Other browsers also consider the form feed character (0x0c) to be document white space, it
111    /// seems.
112    ///
113    /// Taken all together, this is equivalent to the WhatWG Infra Standard's definition of ASCII
114    /// white space.
115    pub(crate) fn is_document_white_space(character: char) -> bool {
116        character.is_ascii_whitespace()
117    }
118
119    pub(crate) fn new(info: &NodeAndStyleInfo, context: &LayoutContext) -> Self {
120        Self {
121            // For the purposes of `text-transform: capitalize` the start of the IFC is a word boundary.
122            on_word_boundary: true,
123            is_empty: true,
124            shared_inline_styles_stack: vec![SharedInlineStyles::from_info_and_context(
125                info, context,
126            )],
127            shared_selection: info.node.selection(),
128            ..Default::default()
129        }
130    }
131
132    pub(crate) fn currently_processing_inline_box(&self) -> bool {
133        !self.inline_box_stack.is_empty()
134    }
135
136    fn push_control_character_string(&mut self, string_to_push: &str) {
137        self.text_segments.push(string_to_push.to_owned());
138        self.current_text_offset += string_to_push.len();
139        self.current_character_offset += string_to_push.chars().count();
140    }
141
142    fn shared_inline_styles(&self) -> SharedInlineStyles {
143        self.shared_inline_styles_stack
144            .last()
145            .expect("Should always have at least one SharedInlineStyles")
146            .clone()
147    }
148
149    pub(crate) fn push_atomic(
150        &mut self,
151        independent_formatting_context_creator: impl FnOnce()
152            -> ArcRefCell<IndependentFormattingContext>,
153        old_layout_box: Option<LayoutBox>,
154    ) -> InlineItem {
155        // If there is an existing undamaged layout box that's compatible, use that.
156        let independent_formatting_context = old_layout_box
157            .and_then(|layout_box| match layout_box {
158                LayoutBox::InlineLevel(InlineItem::Atomic(atomic, ..)) => Some(atomic),
159                _ => None,
160            })
161            .unwrap_or_else(independent_formatting_context_creator);
162
163        let inline_level_box = InlineItem::Atomic(
164            independent_formatting_context,
165            self.current_text_offset,
166            Level::ltr(), /* This will be assigned later if necessary. */
167        );
168        self.inline_items.push(inline_level_box.clone());
169        self.is_empty = false;
170
171        // Push an object replacement character for this atomic, which will ensure that the line breaker
172        // inserts a line breaking opportunity here.
173        self.push_control_character_string("\u{fffc}");
174
175        self.last_inline_box_ended_with_collapsible_white_space = false;
176        self.on_word_boundary = true;
177
178        // Atomics such as images should prevent any following text as being interpreted as the first letter.
179        self.has_processed_first_letter = true;
180
181        inline_level_box
182    }
183
184    pub(crate) fn push_absolutely_positioned_box(
185        &mut self,
186        absolutely_positioned_box_creator: impl FnOnce() -> ArcRefCell<AbsolutelyPositionedBox>,
187        old_layout_box: Option<LayoutBox>,
188    ) -> InlineItem {
189        let absolutely_positioned_box = old_layout_box
190            .and_then(|layout_box| match layout_box {
191                LayoutBox::InlineLevel(InlineItem::OutOfFlowAbsolutelyPositionedBox(
192                    positioned_box,
193                    ..,
194                )) => Some(positioned_box),
195                _ => None,
196            })
197            .unwrap_or_else(absolutely_positioned_box_creator);
198
199        // We cannot just reuse the old inline item, because the `current_text_offset` may have changed.
200        let inline_level_box = InlineItem::OutOfFlowAbsolutelyPositionedBox(
201            absolutely_positioned_box,
202            self.current_text_offset,
203        );
204
205        self.inline_items.push(inline_level_box.clone());
206        self.is_empty = false;
207        inline_level_box
208    }
209
210    pub(crate) fn push_float_box(
211        &mut self,
212        float_box_creator: impl FnOnce() -> ArcRefCell<FloatBox>,
213        old_layout_box: Option<LayoutBox>,
214    ) -> InlineItem {
215        let inline_level_box = old_layout_box
216            .and_then(|layout_box| match layout_box {
217                LayoutBox::InlineLevel(inline_item) => Some(inline_item),
218                _ => None,
219            })
220            .unwrap_or_else(|| InlineItem::OutOfFlowFloatBox(float_box_creator()));
221
222        debug_assert!(
223            matches!(inline_level_box, InlineItem::OutOfFlowFloatBox(..),),
224            "Created float box with incompatible `old_layout_box`"
225        );
226
227        self.inline_items.push(inline_level_box.clone());
228        self.is_empty = false;
229        self.contains_floats = true;
230        inline_level_box
231    }
232
233    pub(crate) fn push_block_level_box(&mut self, block_level: ArcRefCell<BlockLevelBox>) {
234        assert!(self.currently_processing_inline_box());
235        self.contains_floats = self.contains_floats || block_level.borrow().contains_floats();
236        self.inline_items.push(InlineItem::BlockLevel(block_level));
237    }
238
239    pub(crate) fn start_inline_box(
240        &mut self,
241        inline_box_creator: impl FnOnce() -> ArcRefCell<InlineBox>,
242        old_layout_box: Option<LayoutBox>,
243    ) -> InlineItem {
244        // If there is an existing undamaged layout box that's compatible, use the `InlineBox` within it.
245        let inline_box = old_layout_box
246            .and_then(|layout_box| match layout_box {
247                LayoutBox::InlineLevel(InlineItem::StartInlineBox(inline_box)) => Some(inline_box),
248                _ => None,
249            })
250            .unwrap_or_else(inline_box_creator);
251
252        let borrowed_inline_box = inline_box.borrow();
253        self.push_control_character_string(borrowed_inline_box.base.style.bidi_control_chars().0);
254
255        self.shared_inline_styles_stack
256            .push(borrowed_inline_box.shared_inline_styles.clone());
257        std::mem::drop(borrowed_inline_box);
258
259        let identifier = self.inline_boxes.start_inline_box(inline_box.clone());
260        let inline_item = InlineItem::StartInlineBox(inline_box);
261        self.inline_items.push(inline_item.clone());
262        self.inline_box_stack.push(identifier);
263        self.is_empty = false;
264        inline_item
265    }
266
267    /// End the ongoing inline box in this [`InlineFormattingContextBuilder`], returning
268    /// shared references to all of the box tree items that were created for it. More than
269    /// a single box tree items may be produced for a single inline box when that inline
270    /// box is split around a block-level element.
271    pub(crate) fn end_inline_box(&mut self) {
272        self.shared_inline_styles_stack.pop();
273        self.inline_items.push(InlineItem::EndInlineBox);
274        let identifier = self
275            .inline_box_stack
276            .pop()
277            .expect("Ended non-existent inline box");
278        self.inline_boxes.end_inline_box(identifier);
279        let inline_level_box = self.inline_boxes.get(&identifier);
280        let bidi_control_chars = inline_level_box.borrow().base.style.bidi_control_chars();
281        self.push_control_character_string(bidi_control_chars.1);
282    }
283
284    /// This is like [`Self::push_text`], except that it might possibly add an anonymous box if
285    ///
286    ///  - This inline formatting context has a `::first-letter` style.
287    ///  - No anonymous box for `::first-letter` has been added yet.
288    ///  - First letter content is detected in this text.
289    ///
290    /// Note that this should only be used when processing text in block containers.
291    pub(crate) fn push_text_with_possible_first_letter<'dom>(
292        &mut self,
293        text: Cow<'dom, str>,
294        info: &NodeAndStyleInfo<'dom>,
295        container_info: &NodeAndStyleInfo<'dom>,
296        layout_context: &LayoutContext,
297    ) -> bool {
298        if self.has_processed_first_letter || !container_info.pseudo_element_chain().is_empty() {
299            self.push_text(text, info);
300            return false;
301        }
302
303        let Some(first_letter_info) =
304            container_info.with_pseudo_element(layout_context, PseudoElement::FirstLetter)
305        else {
306            self.push_text(text, info);
307            return false;
308        };
309
310        let first_letter_range = first_letter_range(&text[..]);
311        if first_letter_range.is_empty() {
312            return false;
313        }
314
315        // Push any leading white space first.
316        if first_letter_range.start != 0 {
317            self.push_text(Cow::Borrowed(&text[0..first_letter_range.start]), info);
318        }
319
320        // Push the first-letter text into an anonymous box with the `::first-letter` style.
321        let box_slot = first_letter_info.node.box_slot();
322        let inline_item = self.start_inline_box(
323            || ArcRefCell::new(InlineBox::new(&first_letter_info, layout_context)),
324            None,
325        );
326        box_slot.set(LayoutBox::InlineLevel(inline_item));
327
328        let first_letter_text = Cow::Borrowed(&text[first_letter_range.clone()]);
329        self.push_text(first_letter_text, &first_letter_info);
330        self.end_inline_box();
331        self.has_processed_first_letter = true;
332
333        // Now push the non-first-letter text.
334        self.push_text(Cow::Borrowed(&text[first_letter_range.end..]), info);
335
336        true
337    }
338
339    pub(crate) fn push_text<'dom>(&mut self, text: Cow<'dom, str>, info: &NodeAndStyleInfo<'dom>) {
340        let white_space_collapse = info.style.clone_white_space_collapse();
341        let collapsed = WhitespaceCollapse::new(
342            text.chars(),
343            white_space_collapse,
344            self.last_inline_box_ended_with_collapsible_white_space,
345        );
346
347        // TODO: Not all text transforms are about case, this logic should stop ignoring
348        // TextTransform::FULL_WIDTH and TextTransform::FULL_SIZE_KANA.
349        let text_transform = info.style.clone_text_transform().case();
350        let capitalized_text: String;
351        let char_iterator: Box<dyn Iterator<Item = char>> = match text_transform {
352            TextTransformCase::None => Box::new(collapsed),
353            TextTransformCase::Capitalize => {
354                // `TextTransformation` doesn't support capitalization, so we must capitalize the whole
355                // string at once and make a copy. Here `on_word_boundary` indicates whether or not the
356                // inline formatting context as a whole is on a word boundary. This is different from
357                // `last_inline_box_ended_with_collapsible_white_space` because the word boundaries are
358                // between atomic inlines and at the start of the IFC, and because preserved spaces
359                // are a word boundary.
360                let collapsed_string: String = collapsed.collect();
361                capitalized_text = capitalize_string(&collapsed_string, self.on_word_boundary);
362                Box::new(capitalized_text.chars())
363            },
364            _ => {
365                // If `text-transform` is active, wrap the `WhitespaceCollapse` iterator in
366                // a `TextTransformation` iterator.
367                Box::new(TextTransformation::new(collapsed, text_transform))
368            },
369        };
370
371        let char_iterator = if info.style.clone__webkit_text_security() != WebKitTextSecurity::None
372        {
373            Box::new(TextSecurityTransform::new(
374                char_iterator,
375                info.style.clone__webkit_text_security(),
376            ))
377        } else {
378            char_iterator
379        };
380
381        let white_space_collapse = info.style.clone_white_space_collapse();
382        let mut character_count = 0;
383        let new_text: String = char_iterator
384            .inspect(|&character| {
385                character_count += 1;
386
387                self.is_empty = self.is_empty &&
388                    match white_space_collapse {
389                        WhiteSpaceCollapse::Collapse => Self::is_document_white_space(character),
390                        WhiteSpaceCollapse::PreserveBreaks => {
391                            Self::is_document_white_space(character) && character != '\n'
392                        },
393                        WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces => false,
394                    };
395            })
396            .collect();
397
398        if new_text.is_empty() {
399            return;
400        }
401
402        if let Some(last_character) = new_text.chars().next_back() {
403            self.on_word_boundary = last_character.is_whitespace();
404            self.last_inline_box_ended_with_collapsible_white_space =
405                self.on_word_boundary && white_space_collapse != WhiteSpaceCollapse::Preserve;
406        }
407
408        let new_range = self.current_text_offset..self.current_text_offset + new_text.len();
409        self.current_text_offset = new_range.end;
410
411        let new_character_range =
412            self.current_character_offset..self.current_character_offset + character_count;
413        self.current_character_offset = new_character_range.end;
414
415        self.text_segments.push(new_text);
416
417        let current_inline_styles = self.shared_inline_styles();
418
419        if let Some(InlineItem::TextRun(text_run)) = self.inline_items.last() {
420            if text_run
421                .borrow()
422                .inline_styles
423                .ptr_eq(&current_inline_styles)
424            {
425                let box_slot = info.node.box_slot();
426                let old_text_run = box_slot.take_layout_box_as_text_run();
427
428                {
429                    let mut text_run = text_run.borrow_mut();
430                    text_run.text_range.end = new_range.end;
431                    text_run.character_range.end = new_character_range.end;
432
433                    // If this text node does not have a `TextRun` in the box slot, this means that
434                    // it is either new or dirty, which means that the entire `TextRun` just extended
435                    // is dirty as well. In this case, never reuse existing shaping results. Clear
436                    // all old items to ensure this.
437                    if old_text_run.is_none() {
438                        text_run.items.clear();
439                    }
440                }
441
442                box_slot.set(LayoutBox::Text(text_run.clone()));
443                return;
444            }
445        }
446
447        let box_slot = info.node.box_slot();
448        let text_run = ArcRefCell::new(TextRun::new(
449            info.into(),
450            current_inline_styles,
451            new_range,
452            new_character_range,
453            box_slot.take_layout_box_as_text_run(),
454        ));
455        self.inline_items
456            .push(InlineItem::TextRun(text_run.clone()));
457        box_slot.set(LayoutBox::Text(text_run));
458    }
459
460    pub(crate) fn enter_display_contents(&mut self, shared_inline_styles: SharedInlineStyles) {
461        self.shared_inline_styles_stack.push(shared_inline_styles);
462    }
463
464    pub(crate) fn leave_display_contents(&mut self) {
465        self.shared_inline_styles_stack.pop();
466    }
467
468    /// Finish the current inline formatting context, returning [`None`] if the context was empty.
469    pub(crate) fn finish(
470        self,
471        layout_context: &LayoutContext,
472        has_first_formatted_line: bool,
473        is_single_line_text_input: bool,
474        default_bidi_level: Level,
475    ) -> Option<InlineFormattingContext> {
476        if self.is_empty {
477            return None;
478        }
479
480        assert!(self.inline_box_stack.is_empty());
481        Some(InlineFormattingContext::new_with_builder(
482            self,
483            layout_context,
484            has_first_formatted_line,
485            is_single_line_text_input,
486            default_bidi_level,
487        ))
488    }
489}
490
491fn preserve_segment_break() -> bool {
492    true
493}
494
495pub struct WhitespaceCollapse<InputIterator> {
496    char_iterator: InputIterator,
497    white_space_collapse: WhiteSpaceCollapse,
498
499    /// Whether or not we should collapse white space completely at the start of the string.
500    /// This is true when the last character handled in our owning [`super::InlineFormattingContext`]
501    /// was collapsible white space.
502    remove_collapsible_white_space_at_start: bool,
503
504    /// Whether or not the last character produced was newline. There is special behavior
505    /// we do after each newline.
506    following_newline: bool,
507
508    /// Whether or not we have seen any non-white space characters, indicating that we are not
509    /// in a collapsible white space section at the beginning of the string.
510    have_seen_non_white_space_characters: bool,
511
512    /// Whether the last character that we processed was a non-newline white space character. When
513    /// collapsing white space we need to wait until the next non-white space character or the end
514    /// of the string to push a single white space.
515    inside_white_space: bool,
516
517    /// When we enter a collapsible white space region, we may need to wait to produce a single
518    /// white space character as soon as we encounter a non-white space character. When that
519    /// happens we queue up the non-white space character for the next iterator call.
520    character_pending_to_return: Option<char>,
521}
522
523impl<InputIterator> WhitespaceCollapse<InputIterator> {
524    pub fn new(
525        char_iterator: InputIterator,
526        white_space_collapse: WhiteSpaceCollapse,
527        trim_beginning_white_space: bool,
528    ) -> Self {
529        Self {
530            char_iterator,
531            white_space_collapse,
532            remove_collapsible_white_space_at_start: trim_beginning_white_space,
533            inside_white_space: false,
534            following_newline: false,
535            have_seen_non_white_space_characters: false,
536            character_pending_to_return: None,
537        }
538    }
539
540    fn is_leading_trimmed_white_space(&self) -> bool {
541        !self.have_seen_non_white_space_characters && self.remove_collapsible_white_space_at_start
542    }
543
544    /// Whether or not we need to produce a space character if the next character is not a newline
545    /// and not white space. This happens when we are exiting a section of white space and we
546    /// waited to produce a single space character for the entire section of white space (but
547    /// not following or preceding a newline).
548    fn need_to_produce_space_character_after_white_space(&self) -> bool {
549        self.inside_white_space && !self.following_newline && !self.is_leading_trimmed_white_space()
550    }
551}
552
553impl<InputIterator> Iterator for WhitespaceCollapse<InputIterator>
554where
555    InputIterator: Iterator<Item = char>,
556{
557    type Item = char;
558
559    fn next(&mut self) -> Option<Self::Item> {
560        // Point 4.1.1 first bullet:
561        // > If white-space is set to normal, nowrap, or pre-line, whitespace
562        // > characters are considered collapsible
563        // If whitespace is not considered collapsible, it is preserved entirely, which
564        // means that we can simply return the input string exactly.
565        if self.white_space_collapse == WhiteSpaceCollapse::Preserve ||
566            self.white_space_collapse == WhiteSpaceCollapse::BreakSpaces
567        {
568            // From <https://drafts.csswg.org/css-text-3/#white-space-processing>:
569            // > Carriage returns (U+000D) are treated identically to spaces (U+0020) in all respects.
570            //
571            // In the non-preserved case these are converted to space below.
572            return match self.char_iterator.next() {
573                Some('\r') => Some(' '),
574                next => next,
575            };
576        }
577
578        if let Some(character) = self.character_pending_to_return.take() {
579            self.inside_white_space = false;
580            self.have_seen_non_white_space_characters = true;
581            self.following_newline = false;
582            return Some(character);
583        }
584
585        while let Some(character) = self.char_iterator.next() {
586            // Don't push non-newline whitespace immediately. Instead wait to push it until we
587            // know that it isn't followed by a newline. See `push_pending_whitespace_if_needed`
588            // above.
589            if InlineFormattingContextBuilder::is_document_white_space(character) &&
590                character != '\n'
591            {
592                self.inside_white_space = true;
593                continue;
594            }
595
596            // Point 4.1.1:
597            // > 2. Collapsible segment breaks are transformed for rendering according to the
598            // >    segment break transformation rules.
599            if character == '\n' {
600                // From <https://drafts.csswg.org/css-text-3/#line-break-transform>
601                // (4.1.3 -- the segment break transformation rules):
602                //
603                // > When white-space is pre, pre-wrap, or pre-line, segment breaks are not
604                // > collapsible and are instead transformed into a preserved line feed"
605                if self.white_space_collapse != WhiteSpaceCollapse::Collapse {
606                    self.inside_white_space = false;
607                    self.following_newline = true;
608                    return Some(character);
609
610                // Point 4.1.3:
611                // > 1. First, any collapsible segment break immediately following another
612                // >    collapsible segment break is removed.
613                // > 2. Then any remaining segment break is either transformed into a space (U+0020)
614                // >    or removed depending on the context before and after the break.
615                } else if !self.following_newline &&
616                    preserve_segment_break() &&
617                    !self.is_leading_trimmed_white_space()
618                {
619                    self.inside_white_space = false;
620                    self.following_newline = true;
621                    return Some(' ');
622                } else {
623                    self.following_newline = true;
624                    continue;
625                }
626            }
627
628            // Point 4.1.1:
629            // > 2. Any sequence of collapsible spaces and tabs immediately preceding or
630            // >    following a segment break is removed.
631            // > 3. Every collapsible tab is converted to a collapsible space (U+0020).
632            // > 4. Any collapsible space immediately following another collapsible space—even
633            // >    one outside the boundary of the inline containing that space, provided both
634            // >    spaces are within the same inline formatting context—is collapsed to have zero
635            // >    advance width.
636            if self.need_to_produce_space_character_after_white_space() {
637                self.inside_white_space = false;
638                self.character_pending_to_return = Some(character);
639                return Some(' ');
640            }
641
642            self.inside_white_space = false;
643            self.have_seen_non_white_space_characters = true;
644            self.following_newline = false;
645            return Some(character);
646        }
647
648        if self.need_to_produce_space_character_after_white_space() {
649            self.inside_white_space = false;
650            return Some(' ');
651        }
652
653        None
654    }
655
656    fn size_hint(&self) -> (usize, Option<usize>) {
657        self.char_iterator.size_hint()
658    }
659
660    fn count(self) -> usize
661    where
662        Self: Sized,
663    {
664        self.char_iterator.count()
665    }
666}
667
668enum PendingCaseConversionResult {
669    Uppercase(ToUppercase),
670    Lowercase(ToLowercase),
671}
672
673impl PendingCaseConversionResult {
674    fn next(&mut self) -> Option<char> {
675        match self {
676            PendingCaseConversionResult::Uppercase(to_uppercase) => to_uppercase.next(),
677            PendingCaseConversionResult::Lowercase(to_lowercase) => to_lowercase.next(),
678        }
679    }
680}
681
682/// This is an iterator that consumes a char iterator and produces character transformed
683/// by the given CSS `text-transform` value. It currently does not support
684/// `text-transform: capitalize` because Unicode segmentation libraries do not support
685/// streaming input one character at a time.
686pub struct TextTransformation<InputIterator> {
687    /// The input character iterator.
688    char_iterator: InputIterator,
689    /// The `text-transform` value to use.
690    text_transform: TextTransformCase,
691    /// If an uppercasing or lowercasing produces more than one character, this
692    /// caches them so that they can be returned in subsequent iterator calls.
693    pending_case_conversion_result: Option<PendingCaseConversionResult>,
694}
695
696impl<InputIterator> TextTransformation<InputIterator> {
697    pub fn new(char_iterator: InputIterator, text_transform: TextTransformCase) -> Self {
698        Self {
699            char_iterator,
700            text_transform,
701            pending_case_conversion_result: None,
702        }
703    }
704}
705
706impl<InputIterator> Iterator for TextTransformation<InputIterator>
707where
708    InputIterator: Iterator<Item = char>,
709{
710    type Item = char;
711
712    fn next(&mut self) -> Option<Self::Item> {
713        if let Some(character) = self
714            .pending_case_conversion_result
715            .as_mut()
716            .and_then(|result| result.next())
717        {
718            return Some(character);
719        }
720        self.pending_case_conversion_result = None;
721
722        for character in self.char_iterator.by_ref() {
723            match self.text_transform {
724                TextTransformCase::None => return Some(character),
725                TextTransformCase::Uppercase => {
726                    let mut pending_result =
727                        PendingCaseConversionResult::Uppercase(character.to_uppercase());
728                    if let Some(character) = pending_result.next() {
729                        self.pending_case_conversion_result = Some(pending_result);
730                        return Some(character);
731                    }
732                },
733                TextTransformCase::Lowercase => {
734                    let mut pending_result =
735                        PendingCaseConversionResult::Lowercase(character.to_lowercase());
736                    if let Some(character) = pending_result.next() {
737                        self.pending_case_conversion_result = Some(pending_result);
738                        return Some(character);
739                    }
740                },
741                // `text-transform: capitalize` currently cannot work on a per-character basis,
742                // so must be handled outside of this iterator.
743                TextTransformCase::Capitalize => return Some(character),
744            }
745        }
746        None
747    }
748}
749
750pub struct TextSecurityTransform<InputIterator> {
751    /// The input character iterator.
752    char_iterator: InputIterator,
753    /// The `-webkit-text-security` value to use.
754    text_security: WebKitTextSecurity,
755}
756
757impl<InputIterator> TextSecurityTransform<InputIterator> {
758    pub fn new(char_iterator: InputIterator, text_security: WebKitTextSecurity) -> Self {
759        Self {
760            char_iterator,
761            text_security,
762        }
763    }
764}
765
766impl<InputIterator> Iterator for TextSecurityTransform<InputIterator>
767where
768    InputIterator: Iterator<Item = char>,
769{
770    type Item = char;
771
772    fn next(&mut self) -> Option<Self::Item> {
773        // The behavior of `-webkit-text-security` isn't specified, so we have some
774        // flexibility in the implementation. We just need to maintain a rough
775        // compatability with other browsers.
776        Some(match self.char_iterator.next()? {
777            // This is not ideal, but zero width space is used for some special reasons in
778            // `<input>` fields, so these remain untransformed, otherwise they would show up
779            // in empty text fields.
780            '\u{200B}' => '\u{200B}',
781            // Newlines are preserved, so that `<br>` keeps working as expected.
782            '\n' => '\n',
783            character => match self.text_security {
784                WebKitTextSecurity::None => character,
785                WebKitTextSecurity::Circle => '○',
786                WebKitTextSecurity::Disc => '●',
787                WebKitTextSecurity::Square => '■',
788            },
789        })
790    }
791}
792
793/// Given a string and whether the start of the string represents a word boundary, create a copy of
794/// the string with letters after word boundaries capitalized.
795pub(crate) fn capitalize_string(string: &str, allow_word_at_start: bool) -> String {
796    let mut output_string = String::new();
797    output_string.reserve(string.len());
798
799    let word_segmenter = WordSegmenter::new_auto();
800    let mut bounds = word_segmenter.segment_str(string).peekable();
801    let mut byte_index = 0;
802    for character in string.chars() {
803        let current_byte_index = byte_index;
804        byte_index += character.len_utf8();
805
806        if let Some(next_index) = bounds.peek() {
807            if *next_index == current_byte_index {
808                bounds.next();
809
810                if current_byte_index != 0 || allow_word_at_start {
811                    output_string.extend(character.to_uppercase());
812                    continue;
813                }
814            }
815        }
816
817        output_string.push(character);
818    }
819
820    output_string
821}
822
823/// Computes the range of the first letter.
824///
825/// The range includes any preceding punctuation and white space, and any trailing punctuation. Any
826/// non-punctuation following the letter/number/symbol of first-letter ends the range. Intervening
827/// spaces within trailing punctuation are not supported yet.
828///
829/// If the resulting range is empty, no compatible first-letter text was found.
830///
831/// <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
832fn first_letter_range(text: &str) -> Range<usize> {
833    enum State {
834        /// All characters that precede the `PrecedingWhitespaceAndPunctuation` state.
835        Start,
836        /// All preceding punctuation and intervening whitepace that precedes the `Lns` state.
837        PrecedingPunctuation,
838        /// Unicode general category L: letter, N: number and S: symbol
839        Lns,
840        /// All punctuation (but no whitespace or other characters), that
841        /// come after the `Lns` state.
842        TrailingPunctuation,
843    }
844
845    let mut start = 0;
846    let mut state = State::Start;
847    for (index, character) in text.char_indices() {
848        match &mut state {
849            State::Start => {
850                if character.is_letter() || character.is_number() || character.is_symbol() {
851                    start = index;
852                    state = State::Lns;
853                } else if character.is_punctuation() {
854                    start = index;
855                    state = State::PrecedingPunctuation
856                }
857            },
858            State::PrecedingPunctuation => {
859                if character.is_letter() || character.is_number() || character.is_symbol() {
860                    state = State::Lns;
861                } else if !character.is_separator_space() && !character.is_punctuation() {
862                    return 0..0;
863                }
864            },
865            State::Lns => {
866                // TODO: Implement support for intervening spaces
867                // <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
868                if character.is_punctuation() &&
869                    !character.is_punctuation_open() &&
870                    !character.is_punctuation_dash()
871                {
872                    state = State::TrailingPunctuation;
873                } else {
874                    return start..index;
875                }
876            },
877            State::TrailingPunctuation => {
878                // TODO: Implement support for intervening spaces
879                // <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
880                if character.is_punctuation() &&
881                    !character.is_punctuation_open() &&
882                    !character.is_punctuation_dash()
883                {
884                    continue;
885                } else {
886                    return start..index;
887                }
888            },
889        }
890    }
891
892    match state {
893        State::Start | State::PrecedingPunctuation => 0..0,
894        State::Lns | State::TrailingPunctuation => start..text.len(),
895    }
896}
897
898#[cfg(test)]
899mod tests {
900    use super::*;
901
902    fn assert_first_letter_eq(text: &str, expected: &str) {
903        let range = first_letter_range(text);
904        assert_eq!(&text[range], expected);
905    }
906
907    #[test]
908    fn test_first_letter_range() {
909        // All spaces
910        assert_first_letter_eq("", "");
911        assert_first_letter_eq("  ", "");
912
913        // Spaces and punctuation only
914        assert_first_letter_eq("(", "");
915        assert_first_letter_eq(" (", "");
916        assert_first_letter_eq("( ", "");
917        assert_first_letter_eq("()", "");
918
919        // Invalid chars
920        assert_first_letter_eq("\u{0903}", "");
921
922        // First letter only
923        assert_first_letter_eq("A", "A");
924        assert_first_letter_eq(" A", "A");
925        assert_first_letter_eq("A ", "A");
926        assert_first_letter_eq(" A ", "A");
927
928        // Word
929        assert_first_letter_eq("App", "A");
930        assert_first_letter_eq(" App", "A");
931        assert_first_letter_eq("App ", "A");
932
933        // Preceding punctuation(s), intervening spaces and first letter
934        assert_first_letter_eq(r#""A"#, r#""A"#);
935        assert_first_letter_eq(r#" "A"#, r#""A"#);
936        assert_first_letter_eq(r#""A "#, r#""A"#);
937        assert_first_letter_eq(r#"" A"#, r#"" A"#);
938        assert_first_letter_eq(r#" "A "#, r#""A"#);
939        assert_first_letter_eq(r#"("A"#, r#"("A"#);
940        assert_first_letter_eq(r#" ("A"#, r#"("A"#);
941        assert_first_letter_eq(r#"( "A"#, r#"( "A"#);
942        assert_first_letter_eq(r#"[ ( "A"#, r#"[ ( "A"#);
943
944        // First letter and succeeding punctuation(s)
945        // TODO: modify test cases when intervening spaces in succeeding puntuations is supported
946        assert_first_letter_eq(r#"A""#, r#"A""#);
947        assert_first_letter_eq(r#"A" "#, r#"A""#);
948        assert_first_letter_eq(r#"A)]"#, r#"A)]"#);
949        assert_first_letter_eq(r#"A" )]"#, r#"A""#);
950        assert_first_letter_eq(r#"A)] >"#, r#"A)]"#);
951
952        // All
953        assert_first_letter_eq(r#" ("A" )]"#, r#"("A""#);
954        assert_first_letter_eq(r#" ("A")] >"#, r#"("A")]"#);
955
956        // Non ASCII chars
957        assert_first_letter_eq("一", "一");
958        assert_first_letter_eq(" 一 ", "一");
959        assert_first_letter_eq("一二三", "一");
960        assert_first_letter_eq(" 一二三 ", "一");
961        assert_first_letter_eq("(一二三)", "(一");
962        assert_first_letter_eq(" (一二三) ", "(一");
963        assert_first_letter_eq("((一", "((一");
964        assert_first_letter_eq(" ( (一", "( (一");
965        assert_first_letter_eq("一)", "一)");
966        assert_first_letter_eq("一))", "一))");
967        assert_first_letter_eq("一) )", "一)");
968    }
969}