Skip to main content

layout/flow/inline/
construct.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::char::{ToLowercase, ToUppercase};
7use std::ops::Range;
8
9use icu_segmenter::WordSegmenter;
10use layout_api::{LayoutNode, SharedSelection};
11use style::computed_values::_webkit_text_security::T as WebKitTextSecurity;
12use style::computed_values::white_space_collapse::T as WhiteSpaceCollapse;
13use style::dom::NodeInfo;
14use style::selector_parser::PseudoElement;
15use style::values::specified::text::TextTransformCase;
16use unicode_bidi::Level;
17use unicode_categories::UnicodeCategories;
18
19use super::text_run::TextRun;
20use super::{
21    InlineBox, InlineBoxIdentifier, InlineBoxes, InlineFormattingContext, InlineItem,
22    SharedInlineStyles,
23};
24use crate::cell::ArcRefCell;
25use crate::context::LayoutContext;
26use crate::dom::{LayoutBox, NodeExt};
27use crate::dom_traversal::NodeAndStyleInfo;
28use crate::flow::BlockLevelBox;
29use crate::flow::float::FloatBox;
30use crate::formatting_contexts::IndependentFormattingContext;
31use crate::positioned::AbsolutelyPositionedBox;
32use crate::style_ext::ComputedValuesExt;
33
34#[derive(Default)]
35pub(crate) struct InlineFormattingContextBuilder {
36    /// A stack of [`SharedInlineStyles`] including one for the root, one for each inline box on the
37    /// inline box stack, and importantly, one for every `display: contents` element that we are
38    /// currently processing. Normally `display: contents` elements don't affect the structure of
39    /// the [`InlineFormattingContext`], but the styles they provide do style their children.
40    pub shared_inline_styles_stack: Vec<SharedInlineStyles>,
41
42    /// The collection of text strings that make up this [`InlineFormattingContext`] under
43    /// construction.
44    pub text_segments: Vec<String>,
45
46    /// The current offset in the final text string of this [`InlineFormattingContext`],
47    /// used to properly set the text range of new [`InlineItem::TextRun`]s.
48    current_text_offset: usize,
49
50    /// The current character offset in the final text string of this [`InlineFormattingContext`],
51    /// used to properly set the text range of new [`InlineItem::TextRun`]s. Note that this is
52    /// different from the UTF-8 code point offset.
53    current_character_offset: usize,
54
55    /// If the [`InlineFormattingContext`] that we are building has a selection shared with its
56    /// originating node in the DOM, this will not be `None`.
57    pub shared_selection: Option<SharedSelection>,
58
59    /// Whether the last processed node ended with whitespace. This is used to
60    /// implement rule 4 of <https://www.w3.org/TR/css-text-3/#collapse>:
61    ///
62    /// > Any collapsible space immediately following another collapsible space—even one
63    /// > outside the boundary of the inline containing that space, provided both spaces are
64    /// > within the same inline formatting context—is collapsed to have zero advance width.
65    /// > (It is invisible, but retains its soft wrap opportunity, if any.)
66    last_inline_box_ended_with_collapsible_white_space: bool,
67
68    /// Whether or not the current state of the inline formatting context is on a word boundary
69    /// for the purposes of `text-transform: capitalize`.
70    on_word_boundary: bool,
71
72    /// Whether or not this inline formatting context will contain floats.
73    pub contains_floats: bool,
74
75    /// The current list of [`InlineItem`]s in this [`InlineFormattingContext`] under
76    /// construction. This is stored in a flat list to make it easy to access the last
77    /// item.
78    pub inline_items: Vec<InlineItem>,
79
80    /// The current [`InlineBox`] tree of this [`InlineFormattingContext`] under construction.
81    pub inline_boxes: InlineBoxes,
82
83    /// The ongoing stack of inline boxes stack of the builder.
84    ///
85    /// Contains all the currently ongoing inline boxes we entered so far.
86    /// The traversal is at all times as deep in the tree as this stack is,
87    /// which is why the code doesn't need to keep track of the actual
88    /// container root (see `handle_inline_level_element`).
89    ///
90    /// When an inline box ends, it's removed from this stack.
91    inline_box_stack: Vec<InlineBoxIdentifier>,
92
93    /// Whether this [`InlineFormattingContextBuilder`] is empty for the purposes of ignoring
94    /// during box tree construction. An IFC is empty if it only contains TextRuns with
95    /// completely collapsible whitespace. When that happens it can be ignored completely.
96    pub is_empty: bool,
97
98    /// Whether or not the `::first-letter` pseudo-element of this inline formatting context
99    /// has been processed yet.
100    has_processed_first_letter: bool,
101}
102
103impl InlineFormattingContextBuilder {
104    /// <https://drafts.csswg.org/css-text/#white-space>:
105    /// > Except where specified otherwise, white space processing in CSS affects only the document
106    /// > white space characters: spaces (U+0020), tabs (U+0009), and segment breaks.
107    ///
108    /// From <https://github.com/w3c/csswg-drafts/issues/5147#issuecomment-637816669>:
109    /// > HTML clearly treats CR, LF, and CRLF as segment breaks.
110    ///
111    /// Other browsers also consider the form feed character (0x0c) to be document white space, it
112    /// seems.
113    ///
114    /// Taken all together, this is equivalent to the WhatWG Infra Standard's definition of ASCII
115    /// white space.
116    pub(crate) fn is_document_white_space(character: char) -> bool {
117        character.is_ascii_whitespace()
118    }
119
120    pub(crate) fn new(info: &NodeAndStyleInfo, context: &LayoutContext) -> Self {
121        Self {
122            // For the purposes of `text-transform: capitalize` the start of the IFC is a word boundary.
123            on_word_boundary: true,
124            is_empty: true,
125            shared_inline_styles_stack: vec![SharedInlineStyles::from_info_and_context(
126                info, context,
127            )],
128            shared_selection: info.node.selection(),
129            ..Default::default()
130        }
131    }
132
133    pub(crate) fn currently_processing_inline_box(&self) -> bool {
134        !self.inline_box_stack.is_empty()
135    }
136
137    fn push_control_character_string(&mut self, string_to_push: &str) {
138        self.text_segments.push(string_to_push.to_owned());
139        self.current_text_offset += string_to_push.len();
140        self.current_character_offset += string_to_push.chars().count();
141    }
142
143    fn shared_inline_styles(&self) -> SharedInlineStyles {
144        self.shared_inline_styles_stack
145            .last()
146            .expect("Should always have at least one SharedInlineStyles")
147            .clone()
148    }
149
150    pub(crate) fn push_atomic(
151        &mut self,
152        independent_formatting_context_creator: impl FnOnce()
153            -> ArcRefCell<IndependentFormattingContext>,
154        old_layout_box: Option<LayoutBox>,
155    ) -> InlineItem {
156        // If there is an existing undamaged layout box that's compatible, use that.
157        let independent_formatting_context = old_layout_box
158            .and_then(|layout_box| match layout_box {
159                LayoutBox::InlineLevel(InlineItem::Atomic(atomic, ..)) => Some(atomic),
160                _ => None,
161            })
162            .unwrap_or_else(independent_formatting_context_creator);
163
164        let inline_level_box = InlineItem::Atomic(
165            independent_formatting_context,
166            self.current_text_offset,
167            Level::ltr(), /* This will be assigned later if necessary. */
168        );
169        self.inline_items.push(inline_level_box.clone());
170        self.is_empty = false;
171
172        // Push an object replacement character for this atomic, which will ensure that the line breaker
173        // inserts a line breaking opportunity here.
174        self.push_control_character_string("\u{fffc}");
175
176        self.last_inline_box_ended_with_collapsible_white_space = false;
177        self.on_word_boundary = true;
178
179        // Atomics such as images should prevent any following text as being interpreted as the first letter.
180        self.has_processed_first_letter = true;
181
182        inline_level_box
183    }
184
185    pub(crate) fn push_absolutely_positioned_box(
186        &mut self,
187        absolutely_positioned_box_creator: impl FnOnce() -> ArcRefCell<AbsolutelyPositionedBox>,
188        old_layout_box: Option<LayoutBox>,
189    ) -> InlineItem {
190        let absolutely_positioned_box = old_layout_box
191            .and_then(|layout_box| match layout_box {
192                LayoutBox::InlineLevel(InlineItem::OutOfFlowAbsolutelyPositionedBox(
193                    positioned_box,
194                    ..,
195                )) => Some(positioned_box),
196                _ => None,
197            })
198            .unwrap_or_else(absolutely_positioned_box_creator);
199
200        // We cannot just reuse the old inline item, because the `current_text_offset` may have changed.
201        let inline_level_box = InlineItem::OutOfFlowAbsolutelyPositionedBox(
202            absolutely_positioned_box,
203            self.current_text_offset,
204        );
205
206        self.inline_items.push(inline_level_box.clone());
207        self.is_empty = false;
208        inline_level_box
209    }
210
211    pub(crate) fn push_float_box(
212        &mut self,
213        float_box_creator: impl FnOnce() -> ArcRefCell<FloatBox>,
214        old_layout_box: Option<LayoutBox>,
215    ) -> InlineItem {
216        let inline_level_box = old_layout_box
217            .and_then(|layout_box| match layout_box {
218                LayoutBox::InlineLevel(inline_item) => Some(inline_item),
219                _ => None,
220            })
221            .unwrap_or_else(|| InlineItem::OutOfFlowFloatBox(float_box_creator()));
222
223        debug_assert!(
224            matches!(inline_level_box, InlineItem::OutOfFlowFloatBox(..),),
225            "Created float box with incompatible `old_layout_box`"
226        );
227
228        self.inline_items.push(inline_level_box.clone());
229        self.is_empty = false;
230        self.contains_floats = true;
231        inline_level_box
232    }
233
234    pub(crate) fn push_block_level_box(&mut self, block_level: ArcRefCell<BlockLevelBox>) {
235        assert!(self.currently_processing_inline_box());
236        self.contains_floats = self.contains_floats || block_level.borrow().contains_floats();
237        self.inline_items.push(InlineItem::BlockLevel(block_level));
238    }
239
240    pub(crate) fn start_inline_box(
241        &mut self,
242        inline_box_creator: impl FnOnce() -> ArcRefCell<InlineBox>,
243        old_layout_box: Option<LayoutBox>,
244    ) -> InlineItem {
245        // If there is an existing undamaged layout box that's compatible, use the `InlineBox` within it.
246        let inline_box = old_layout_box
247            .and_then(|layout_box| match layout_box {
248                LayoutBox::InlineLevel(InlineItem::StartInlineBox(inline_box)) => Some(inline_box),
249                _ => None,
250            })
251            .unwrap_or_else(inline_box_creator);
252
253        let borrowed_inline_box = inline_box.borrow();
254        self.push_control_character_string(borrowed_inline_box.base.style.bidi_control_chars().0);
255
256        self.shared_inline_styles_stack
257            .push(borrowed_inline_box.shared_inline_styles.clone());
258        std::mem::drop(borrowed_inline_box);
259
260        let identifier = self.inline_boxes.start_inline_box(inline_box.clone());
261        let inline_item = InlineItem::StartInlineBox(inline_box);
262        self.inline_items.push(inline_item.clone());
263        self.inline_box_stack.push(identifier);
264        self.is_empty = false;
265        inline_item
266    }
267
268    /// End the ongoing inline box in this [`InlineFormattingContextBuilder`], returning
269    /// shared references to all of the box tree items that were created for it. More than
270    /// a single box tree items may be produced for a single inline box when that inline
271    /// box is split around a block-level element.
272    pub(crate) fn end_inline_box(&mut self) {
273        self.shared_inline_styles_stack.pop();
274        self.inline_items.push(InlineItem::EndInlineBox);
275        let identifier = self
276            .inline_box_stack
277            .pop()
278            .expect("Ended non-existent inline box");
279        self.inline_boxes.end_inline_box(identifier);
280        let inline_level_box = self.inline_boxes.get(&identifier);
281        let bidi_control_chars = inline_level_box.borrow().base.style.bidi_control_chars();
282        self.push_control_character_string(bidi_control_chars.1);
283    }
284
285    /// This is like [`Self::push_text`], except that it might possibly add an anonymous box if
286    ///
287    ///  - This inline formatting context has a `::first-letter` style.
288    ///  - No anonymous box for `::first-letter` has been added yet.
289    ///  - First letter content is detected in this text.
290    ///
291    /// Note that this should only be used when processing text in block containers.
292    pub(crate) fn push_text_with_possible_first_letter<'dom>(
293        &mut self,
294        text: Cow<'dom, str>,
295        info: &NodeAndStyleInfo<'dom>,
296        container_info: &NodeAndStyleInfo<'dom>,
297        layout_context: &LayoutContext,
298    ) -> bool {
299        if self.has_processed_first_letter || !container_info.pseudo_element_chain().is_empty() {
300            self.push_text(text, info);
301            return false;
302        }
303
304        let Some(first_letter_info) =
305            container_info.with_pseudo_element(layout_context, PseudoElement::FirstLetter)
306        else {
307            self.push_text(text, info);
308            return false;
309        };
310
311        let first_letter_range = first_letter_range(&text[..]);
312        if first_letter_range.is_empty() {
313            return false;
314        }
315
316        // Push any leading white space first.
317        if first_letter_range.start != 0 {
318            self.push_text(Cow::Borrowed(&text[0..first_letter_range.start]), info);
319        }
320
321        // Push the first-letter text into an anonymous box with the `::first-letter` style.
322        let box_slot = first_letter_info.node.box_slot();
323        let inline_item = self.start_inline_box(
324            || ArcRefCell::new(InlineBox::new(&first_letter_info, layout_context)),
325            None,
326        );
327        box_slot.set(LayoutBox::InlineLevel(inline_item));
328
329        let first_letter_text = Cow::Borrowed(&text[first_letter_range.clone()]);
330        self.push_text(first_letter_text, &first_letter_info);
331        self.end_inline_box();
332        self.has_processed_first_letter = true;
333
334        // Now push the non-first-letter text.
335        self.push_text(Cow::Borrowed(&text[first_letter_range.end..]), info);
336
337        true
338    }
339
340    pub(crate) fn push_text<'dom>(&mut self, text: Cow<'dom, str>, info: &NodeAndStyleInfo<'dom>) {
341        let white_space_collapse = info.style.clone_white_space_collapse();
342        let collapsed = WhitespaceCollapse::new(
343            text.chars(),
344            white_space_collapse,
345            self.last_inline_box_ended_with_collapsible_white_space,
346        );
347
348        // TODO: Not all text transforms are about case, this logic should stop ignoring
349        // TextTransform::FULL_WIDTH and TextTransform::FULL_SIZE_KANA.
350        let text_transform = info.style.clone_text_transform().case();
351        let capitalized_text: String;
352        let char_iterator: Box<dyn Iterator<Item = char>> = match text_transform {
353            TextTransformCase::None => Box::new(collapsed),
354            TextTransformCase::Capitalize => {
355                // `TextTransformation` doesn't support capitalization, so we must capitalize the whole
356                // string at once and make a copy. Here `on_word_boundary` indicates whether or not the
357                // inline formatting context as a whole is on a word boundary. This is different from
358                // `last_inline_box_ended_with_collapsible_white_space` because the word boundaries are
359                // between atomic inlines and at the start of the IFC, and because preserved spaces
360                // are a word boundary.
361                let collapsed_string: String = collapsed.collect();
362                capitalized_text = capitalize_string(&collapsed_string, self.on_word_boundary);
363                Box::new(capitalized_text.chars())
364            },
365            _ => {
366                // If `text-transform` is active, wrap the `WhitespaceCollapse` iterator in
367                // a `TextTransformation` iterator.
368                Box::new(TextTransformation::new(collapsed, text_transform))
369            },
370        };
371
372        let char_iterator = if info.style.clone__webkit_text_security() != WebKitTextSecurity::None
373        {
374            Box::new(TextSecurityTransform::new(
375                char_iterator,
376                info.style.clone__webkit_text_security(),
377            ))
378        } else {
379            char_iterator
380        };
381
382        let white_space_collapse = info.style.clone_white_space_collapse();
383        let mut character_count = 0;
384        let new_text: String = char_iterator
385            .inspect(|&character| {
386                character_count += 1;
387
388                self.is_empty = self.is_empty &&
389                    match white_space_collapse {
390                        WhiteSpaceCollapse::Collapse => Self::is_document_white_space(character),
391                        WhiteSpaceCollapse::PreserveBreaks => {
392                            Self::is_document_white_space(character) && character != '\n'
393                        },
394                        WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces => false,
395                    };
396            })
397            .collect();
398
399        if new_text.is_empty() {
400            return;
401        }
402
403        if let Some(last_character) = new_text.chars().next_back() {
404            self.on_word_boundary = last_character.is_whitespace();
405            self.last_inline_box_ended_with_collapsible_white_space =
406                self.on_word_boundary && white_space_collapse != WhiteSpaceCollapse::Preserve;
407        }
408
409        let new_range = self.current_text_offset..self.current_text_offset + new_text.len();
410        self.current_text_offset = new_range.end;
411
412        let new_character_range =
413            self.current_character_offset..self.current_character_offset + character_count;
414        self.current_character_offset = new_character_range.end;
415
416        self.text_segments.push(new_text);
417
418        let current_inline_styles = self.shared_inline_styles();
419
420        if let Some(InlineItem::TextRun(text_run)) = self.inline_items.last() &&
421            text_run
422                .borrow()
423                .inline_styles
424                .ptr_eq(&current_inline_styles)
425        {
426            let box_slot = info.node.box_slot();
427            let old_text_run = box_slot.take_layout_box_as_text_run();
428
429            {
430                let mut text_run = text_run.borrow_mut();
431                text_run.text_range.end = new_range.end;
432                text_run.character_range.end = new_character_range.end;
433
434                // If this text node does not have a `TextRun` in the box slot, this means that
435                // it is either new or dirty, which means that the entire `TextRun` just extended
436                // is dirty as well. In this case, never reuse existing shaping results. Clear
437                // all old items to ensure this.
438                if old_text_run.is_none() {
439                    text_run.items.clear();
440                }
441            }
442
443            box_slot.set(LayoutBox::Text(text_run.clone()));
444            return;
445        }
446
447        let box_slot = info.node.is_text_node().then(|| info.node.box_slot());
448        let text_run = ArcRefCell::new(TextRun::new(
449            info.into(),
450            current_inline_styles,
451            new_range,
452            new_character_range,
453            box_slot
454                .as_ref()
455                .and_then(|box_slot| box_slot.take_layout_box_as_text_run()),
456        ));
457        self.inline_items
458            .push(InlineItem::TextRun(text_run.clone()));
459
460        if let Some(box_slot) = box_slot {
461            box_slot.set(LayoutBox::Text(text_run));
462        }
463    }
464
465    pub(crate) fn enter_display_contents(&mut self, shared_inline_styles: SharedInlineStyles) {
466        self.shared_inline_styles_stack.push(shared_inline_styles);
467    }
468
469    pub(crate) fn leave_display_contents(&mut self) {
470        self.shared_inline_styles_stack.pop();
471    }
472
473    /// Finish the current inline formatting context, returning [`None`] if the context was empty.
474    pub(crate) fn finish(
475        self,
476        layout_context: &LayoutContext,
477        has_first_formatted_line: bool,
478        is_single_line_text_input: bool,
479        default_bidi_level: Level,
480    ) -> Option<InlineFormattingContext> {
481        if self.is_empty {
482            return None;
483        }
484
485        assert!(self.inline_box_stack.is_empty());
486        Some(InlineFormattingContext::new_with_builder(
487            self,
488            layout_context,
489            has_first_formatted_line,
490            is_single_line_text_input,
491            default_bidi_level,
492        ))
493    }
494}
495
496fn preserve_segment_break() -> bool {
497    true
498}
499
500pub struct WhitespaceCollapse<InputIterator> {
501    char_iterator: InputIterator,
502    white_space_collapse: WhiteSpaceCollapse,
503
504    /// Whether or not we should collapse white space completely at the start of the string.
505    /// This is true when the last character handled in our owning [`super::InlineFormattingContext`]
506    /// was collapsible white space.
507    remove_collapsible_white_space_at_start: bool,
508
509    /// Whether or not the last character produced was newline. There is special behavior
510    /// we do after each newline.
511    following_newline: bool,
512
513    /// Whether or not we have seen any non-white space characters, indicating that we are not
514    /// in a collapsible white space section at the beginning of the string.
515    have_seen_non_white_space_characters: bool,
516
517    /// Whether the last character that we processed was a non-newline white space character. When
518    /// collapsing white space we need to wait until the next non-white space character or the end
519    /// of the string to push a single white space.
520    inside_white_space: bool,
521
522    /// When we enter a collapsible white space region, we may need to wait to produce a single
523    /// white space character as soon as we encounter a non-white space character. When that
524    /// happens we queue up the non-white space character for the next iterator call.
525    character_pending_to_return: Option<char>,
526}
527
528impl<InputIterator> WhitespaceCollapse<InputIterator> {
529    pub fn new(
530        char_iterator: InputIterator,
531        white_space_collapse: WhiteSpaceCollapse,
532        trim_beginning_white_space: bool,
533    ) -> Self {
534        Self {
535            char_iterator,
536            white_space_collapse,
537            remove_collapsible_white_space_at_start: trim_beginning_white_space,
538            inside_white_space: false,
539            following_newline: false,
540            have_seen_non_white_space_characters: false,
541            character_pending_to_return: None,
542        }
543    }
544
545    fn is_leading_trimmed_white_space(&self) -> bool {
546        !self.have_seen_non_white_space_characters && self.remove_collapsible_white_space_at_start
547    }
548
549    /// Whether or not we need to produce a space character if the next character is not a newline
550    /// and not white space. This happens when we are exiting a section of white space and we
551    /// waited to produce a single space character for the entire section of white space (but
552    /// not following or preceding a newline).
553    fn need_to_produce_space_character_after_white_space(&self) -> bool {
554        self.inside_white_space && !self.following_newline && !self.is_leading_trimmed_white_space()
555    }
556}
557
558impl<InputIterator> Iterator for WhitespaceCollapse<InputIterator>
559where
560    InputIterator: Iterator<Item = char>,
561{
562    type Item = char;
563
564    fn next(&mut self) -> Option<Self::Item> {
565        // Point 4.1.1 first bullet:
566        // > If white-space is set to normal, nowrap, or pre-line, whitespace
567        // > characters are considered collapsible
568        // If whitespace is not considered collapsible, it is preserved entirely, which
569        // means that we can simply return the input string exactly.
570        if self.white_space_collapse == WhiteSpaceCollapse::Preserve ||
571            self.white_space_collapse == WhiteSpaceCollapse::BreakSpaces
572        {
573            // From <https://drafts.csswg.org/css-text-3/#white-space-processing>:
574            // > Carriage returns (U+000D) are treated identically to spaces (U+0020) in all respects.
575            //
576            // In the non-preserved case these are converted to space below.
577            return match self.char_iterator.next() {
578                Some('\r') => Some(' '),
579                next => next,
580            };
581        }
582
583        if let Some(character) = self.character_pending_to_return.take() {
584            self.inside_white_space = false;
585            self.have_seen_non_white_space_characters = true;
586            self.following_newline = false;
587            return Some(character);
588        }
589
590        while let Some(character) = self.char_iterator.next() {
591            // Don't push non-newline whitespace immediately. Instead wait to push it until we
592            // know that it isn't followed by a newline. See `push_pending_whitespace_if_needed`
593            // above.
594            if InlineFormattingContextBuilder::is_document_white_space(character) &&
595                character != '\n'
596            {
597                self.inside_white_space = true;
598                continue;
599            }
600
601            // Point 4.1.1:
602            // > 2. Collapsible segment breaks are transformed for rendering according to the
603            // >    segment break transformation rules.
604            if character == '\n' {
605                // From <https://drafts.csswg.org/css-text-3/#line-break-transform>
606                // (4.1.3 -- the segment break transformation rules):
607                //
608                // > When white-space is pre, pre-wrap, or pre-line, segment breaks are not
609                // > collapsible and are instead transformed into a preserved line feed"
610                if self.white_space_collapse != WhiteSpaceCollapse::Collapse {
611                    self.inside_white_space = false;
612                    self.following_newline = true;
613                    return Some(character);
614
615                // Point 4.1.3:
616                // > 1. First, any collapsible segment break immediately following another
617                // >    collapsible segment break is removed.
618                // > 2. Then any remaining segment break is either transformed into a space (U+0020)
619                // >    or removed depending on the context before and after the break.
620                } else if !self.following_newline &&
621                    preserve_segment_break() &&
622                    !self.is_leading_trimmed_white_space()
623                {
624                    self.inside_white_space = false;
625                    self.following_newline = true;
626                    return Some(' ');
627                } else {
628                    self.following_newline = true;
629                    continue;
630                }
631            }
632
633            // Point 4.1.1:
634            // > 2. Any sequence of collapsible spaces and tabs immediately preceding or
635            // >    following a segment break is removed.
636            // > 3. Every collapsible tab is converted to a collapsible space (U+0020).
637            // > 4. Any collapsible space immediately following another collapsible space—even
638            // >    one outside the boundary of the inline containing that space, provided both
639            // >    spaces are within the same inline formatting context—is collapsed to have zero
640            // >    advance width.
641            if self.need_to_produce_space_character_after_white_space() {
642                self.inside_white_space = false;
643                self.character_pending_to_return = Some(character);
644                return Some(' ');
645            }
646
647            self.inside_white_space = false;
648            self.have_seen_non_white_space_characters = true;
649            self.following_newline = false;
650            return Some(character);
651        }
652
653        if self.need_to_produce_space_character_after_white_space() {
654            self.inside_white_space = false;
655            return Some(' ');
656        }
657
658        None
659    }
660
661    fn size_hint(&self) -> (usize, Option<usize>) {
662        self.char_iterator.size_hint()
663    }
664
665    fn count(self) -> usize
666    where
667        Self: Sized,
668    {
669        self.char_iterator.count()
670    }
671}
672
673enum PendingCaseConversionResult {
674    Uppercase(ToUppercase),
675    Lowercase(ToLowercase),
676}
677
678impl PendingCaseConversionResult {
679    fn next(&mut self) -> Option<char> {
680        match self {
681            PendingCaseConversionResult::Uppercase(to_uppercase) => to_uppercase.next(),
682            PendingCaseConversionResult::Lowercase(to_lowercase) => to_lowercase.next(),
683        }
684    }
685}
686
687/// This is an iterator that consumes a char iterator and produces character transformed
688/// by the given CSS `text-transform` value. It currently does not support
689/// `text-transform: capitalize` because Unicode segmentation libraries do not support
690/// streaming input one character at a time.
691pub struct TextTransformation<InputIterator> {
692    /// The input character iterator.
693    char_iterator: InputIterator,
694    /// The `text-transform` value to use.
695    text_transform: TextTransformCase,
696    /// If an uppercasing or lowercasing produces more than one character, this
697    /// caches them so that they can be returned in subsequent iterator calls.
698    pending_case_conversion_result: Option<PendingCaseConversionResult>,
699}
700
701impl<InputIterator> TextTransformation<InputIterator> {
702    pub fn new(char_iterator: InputIterator, text_transform: TextTransformCase) -> Self {
703        Self {
704            char_iterator,
705            text_transform,
706            pending_case_conversion_result: None,
707        }
708    }
709}
710
711impl<InputIterator> Iterator for TextTransformation<InputIterator>
712where
713    InputIterator: Iterator<Item = char>,
714{
715    type Item = char;
716
717    fn next(&mut self) -> Option<Self::Item> {
718        if let Some(character) = self
719            .pending_case_conversion_result
720            .as_mut()
721            .and_then(|result| result.next())
722        {
723            return Some(character);
724        }
725        self.pending_case_conversion_result = None;
726
727        for character in self.char_iterator.by_ref() {
728            match self.text_transform {
729                TextTransformCase::None => return Some(character),
730                TextTransformCase::Uppercase => {
731                    let mut pending_result =
732                        PendingCaseConversionResult::Uppercase(character.to_uppercase());
733                    if let Some(character) = pending_result.next() {
734                        self.pending_case_conversion_result = Some(pending_result);
735                        return Some(character);
736                    }
737                },
738                TextTransformCase::Lowercase => {
739                    let mut pending_result =
740                        PendingCaseConversionResult::Lowercase(character.to_lowercase());
741                    if let Some(character) = pending_result.next() {
742                        self.pending_case_conversion_result = Some(pending_result);
743                        return Some(character);
744                    }
745                },
746                // `text-transform: capitalize` currently cannot work on a per-character basis,
747                // so must be handled outside of this iterator.
748                TextTransformCase::Capitalize => return Some(character),
749            }
750        }
751        None
752    }
753}
754
755pub struct TextSecurityTransform<InputIterator> {
756    /// The input character iterator.
757    char_iterator: InputIterator,
758    /// The `-webkit-text-security` value to use.
759    text_security: WebKitTextSecurity,
760}
761
762impl<InputIterator> TextSecurityTransform<InputIterator> {
763    pub fn new(char_iterator: InputIterator, text_security: WebKitTextSecurity) -> Self {
764        Self {
765            char_iterator,
766            text_security,
767        }
768    }
769}
770
771impl<InputIterator> Iterator for TextSecurityTransform<InputIterator>
772where
773    InputIterator: Iterator<Item = char>,
774{
775    type Item = char;
776
777    fn next(&mut self) -> Option<Self::Item> {
778        // The behavior of `-webkit-text-security` isn't specified, so we have some
779        // flexibility in the implementation. We just need to maintain a rough
780        // compatability with other browsers.
781        Some(match self.char_iterator.next()? {
782            // This is not ideal, but zero width space is used for some special reasons in
783            // `<input>` fields, so these remain untransformed, otherwise they would show up
784            // in empty text fields.
785            '\u{200B}' => '\u{200B}',
786            // Newlines are preserved, so that `<br>` keeps working as expected.
787            '\n' => '\n',
788            character => match self.text_security {
789                WebKitTextSecurity::None => character,
790                WebKitTextSecurity::Circle => '○',
791                WebKitTextSecurity::Disc => '●',
792                WebKitTextSecurity::Square => '■',
793            },
794        })
795    }
796}
797
798/// Given a string and whether the start of the string represents a word boundary, create a copy of
799/// the string with letters after word boundaries capitalized.
800pub(crate) fn capitalize_string(string: &str, allow_word_at_start: bool) -> String {
801    let mut output_string = String::new();
802    output_string.reserve(string.len());
803
804    let word_segmenter = WordSegmenter::new_auto();
805    let mut bounds = word_segmenter.segment_str(string).peekable();
806    let mut byte_index = 0;
807    for character in string.chars() {
808        let current_byte_index = byte_index;
809        byte_index += character.len_utf8();
810
811        if let Some(next_index) = bounds.peek() &&
812            *next_index == current_byte_index
813        {
814            bounds.next();
815
816            if current_byte_index != 0 || allow_word_at_start {
817                output_string.extend(character.to_uppercase());
818                continue;
819            }
820        }
821
822        output_string.push(character);
823    }
824
825    output_string
826}
827
828/// Computes the range of the first letter.
829///
830/// The range includes any preceding punctuation and white space, and any trailing punctuation. Any
831/// non-punctuation following the letter/number/symbol of first-letter ends the range. Intervening
832/// spaces within trailing punctuation are not supported yet.
833///
834/// If the resulting range is empty, no compatible first-letter text was found.
835///
836/// <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
837fn first_letter_range(text: &str) -> Range<usize> {
838    enum State {
839        /// All characters that precede the `PrecedingWhitespaceAndPunctuation` state.
840        Start,
841        /// All preceding punctuation and intervening whitepace that precedes the `Lns` state.
842        PrecedingPunctuation,
843        /// Unicode general category L: letter, N: number and S: symbol
844        Lns,
845        /// All punctuation (but no whitespace or other characters), that
846        /// come after the `Lns` state.
847        TrailingPunctuation,
848    }
849
850    let mut start = 0;
851    let mut state = State::Start;
852    for (index, character) in text.char_indices() {
853        match &mut state {
854            State::Start => {
855                if character.is_letter() || character.is_number() || character.is_symbol() {
856                    start = index;
857                    state = State::Lns;
858                } else if character.is_punctuation() {
859                    start = index;
860                    state = State::PrecedingPunctuation
861                }
862            },
863            State::PrecedingPunctuation => {
864                if character.is_letter() || character.is_number() || character.is_symbol() {
865                    state = State::Lns;
866                } else if !character.is_separator_space() && !character.is_punctuation() {
867                    return 0..0;
868                }
869            },
870            State::Lns => {
871                // TODO: Implement support for intervening spaces
872                // <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
873                if character.is_punctuation() &&
874                    !character.is_punctuation_open() &&
875                    !character.is_punctuation_dash()
876                {
877                    state = State::TrailingPunctuation;
878                } else {
879                    return start..index;
880                }
881            },
882            State::TrailingPunctuation => {
883                // TODO: Implement support for intervening spaces
884                // <https://drafts.csswg.org/css-pseudo/#first-letter-pattern>
885                if character.is_punctuation() &&
886                    !character.is_punctuation_open() &&
887                    !character.is_punctuation_dash()
888                {
889                    continue;
890                } else {
891                    return start..index;
892                }
893            },
894        }
895    }
896
897    match state {
898        State::Start | State::PrecedingPunctuation => 0..0,
899        State::Lns | State::TrailingPunctuation => start..text.len(),
900    }
901}
902
903#[cfg(test)]
904mod tests {
905    use super::*;
906
907    fn assert_first_letter_eq(text: &str, expected: &str) {
908        let range = first_letter_range(text);
909        assert_eq!(&text[range], expected);
910    }
911
912    #[test]
913    fn test_first_letter_range() {
914        // All spaces
915        assert_first_letter_eq("", "");
916        assert_first_letter_eq("  ", "");
917
918        // Spaces and punctuation only
919        assert_first_letter_eq("(", "");
920        assert_first_letter_eq(" (", "");
921        assert_first_letter_eq("( ", "");
922        assert_first_letter_eq("()", "");
923
924        // Invalid chars
925        assert_first_letter_eq("\u{0903}", "");
926
927        // First letter only
928        assert_first_letter_eq("A", "A");
929        assert_first_letter_eq(" A", "A");
930        assert_first_letter_eq("A ", "A");
931        assert_first_letter_eq(" A ", "A");
932
933        // Word
934        assert_first_letter_eq("App", "A");
935        assert_first_letter_eq(" App", "A");
936        assert_first_letter_eq("App ", "A");
937
938        // Preceding punctuation(s), intervening spaces and first letter
939        assert_first_letter_eq(r#""A"#, r#""A"#);
940        assert_first_letter_eq(r#" "A"#, r#""A"#);
941        assert_first_letter_eq(r#""A "#, r#""A"#);
942        assert_first_letter_eq(r#"" A"#, r#"" A"#);
943        assert_first_letter_eq(r#" "A "#, r#""A"#);
944        assert_first_letter_eq(r#"("A"#, r#"("A"#);
945        assert_first_letter_eq(r#" ("A"#, r#"("A"#);
946        assert_first_letter_eq(r#"( "A"#, r#"( "A"#);
947        assert_first_letter_eq(r#"[ ( "A"#, r#"[ ( "A"#);
948
949        // First letter and succeeding punctuation(s)
950        // TODO: modify test cases when intervening spaces in succeeding puntuations is supported
951        assert_first_letter_eq(r#"A""#, r#"A""#);
952        assert_first_letter_eq(r#"A" "#, r#"A""#);
953        assert_first_letter_eq(r#"A)]"#, r#"A)]"#);
954        assert_first_letter_eq(r#"A" )]"#, r#"A""#);
955        assert_first_letter_eq(r#"A)] >"#, r#"A)]"#);
956
957        // All
958        assert_first_letter_eq(r#" ("A" )]"#, r#"("A""#);
959        assert_first_letter_eq(r#" ("A")] >"#, r#"("A")]"#);
960
961        // Non ASCII chars
962        assert_first_letter_eq("一", "一");
963        assert_first_letter_eq(" 一 ", "一");
964        assert_first_letter_eq("一二三", "一");
965        assert_first_letter_eq(" 一二三 ", "一");
966        assert_first_letter_eq("(一二三)", "(一");
967        assert_first_letter_eq(" (一二三) ", "(一");
968        assert_first_letter_eq("((一", "((一");
969        assert_first_letter_eq(" ( (一", "( (一");
970        assert_first_letter_eq("一)", "一)");
971        assert_first_letter_eq("一))", "一))");
972        assert_first_letter_eq("一) )", "一)");
973    }
974}