script/dom/
characterdata.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5//! DOM bindings for `CharacterData`.
6use std::cell::LazyCell;
7
8use dom_struct::dom_struct;
9use script_bindings::codegen::InheritTypes::{CharacterDataTypeId, NodeTypeId, TextTypeId};
10
11use crate::dom::bindings::cell::{DomRefCell, Ref};
12use crate::dom::bindings::codegen::Bindings::CharacterDataBinding::CharacterDataMethods;
13use crate::dom::bindings::codegen::Bindings::NodeBinding::Node_Binding::NodeMethods;
14use crate::dom::bindings::codegen::Bindings::ProcessingInstructionBinding::ProcessingInstructionMethods;
15use crate::dom::bindings::codegen::UnionTypes::NodeOrString;
16use crate::dom::bindings::error::{Error, ErrorResult, Fallible};
17use crate::dom::bindings::inheritance::Castable;
18use crate::dom::bindings::root::{DomRoot, LayoutDom};
19use crate::dom::bindings::str::DOMString;
20use crate::dom::cdatasection::CDATASection;
21use crate::dom::comment::Comment;
22use crate::dom::document::Document;
23use crate::dom::element::Element;
24use crate::dom::mutationobserver::{Mutation, MutationObserver};
25use crate::dom::node::{ChildrenMutation, Node, NodeDamage};
26use crate::dom::processinginstruction::ProcessingInstruction;
27use crate::dom::text::Text;
28use crate::dom::virtualmethods::vtable_for;
29use crate::script_runtime::CanGc;
30
31// https://dom.spec.whatwg.org/#characterdata
32#[dom_struct]
33pub(crate) struct CharacterData {
34    node: Node,
35    data: DomRefCell<String>,
36}
37
38impl CharacterData {
39    pub(crate) fn new_inherited(data: DOMString, document: &Document) -> CharacterData {
40        CharacterData {
41            node: Node::new_inherited(document),
42            data: DomRefCell::new(String::from(data.str())),
43        }
44    }
45
46    pub(crate) fn clone_with_data(
47        &self,
48        data: DOMString,
49        document: &Document,
50        can_gc: CanGc,
51    ) -> DomRoot<Node> {
52        match self.upcast::<Node>().type_id() {
53            NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => {
54                DomRoot::upcast(Comment::new(data, document, None, can_gc))
55            },
56            NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => {
57                let pi = self.downcast::<ProcessingInstruction>().unwrap();
58                DomRoot::upcast(ProcessingInstruction::new(
59                    pi.Target(),
60                    data,
61                    document,
62                    can_gc,
63                ))
64            },
65            NodeTypeId::CharacterData(CharacterDataTypeId::Text(TextTypeId::CDATASection)) => {
66                DomRoot::upcast(CDATASection::new(data, document, can_gc))
67            },
68            NodeTypeId::CharacterData(CharacterDataTypeId::Text(TextTypeId::Text)) => {
69                DomRoot::upcast(Text::new(data, document, can_gc))
70            },
71            _ => unreachable!(),
72        }
73    }
74
75    #[inline]
76    pub(crate) fn data(&self) -> Ref<'_, String> {
77        self.data.borrow()
78    }
79
80    #[inline]
81    pub(crate) fn append_data(&self, data: &str) {
82        self.queue_mutation_record();
83        self.data.borrow_mut().push_str(data);
84        self.content_changed();
85    }
86
87    fn content_changed(&self) {
88        let node = self.upcast::<Node>();
89        node.dirty(NodeDamage::Other);
90
91        // If this is a Text node, we might need to re-parse (say, if our parent
92        // is a <style> element.) We don't need to if this is a Comment or
93        // ProcessingInstruction.
94        if self.is::<Text>() {
95            if let Some(parent_node) = node.GetParentNode() {
96                let mutation = ChildrenMutation::ChangeText;
97                vtable_for(&parent_node).children_changed(&mutation, CanGc::note());
98            }
99        }
100    }
101
102    // Queue a MutationObserver record before changing the content.
103    fn queue_mutation_record(&self) {
104        let mutation = LazyCell::new(|| Mutation::CharacterData {
105            old_value: self.data.borrow().clone(),
106        });
107        MutationObserver::queue_a_mutation_record(self.upcast::<Node>(), mutation);
108    }
109}
110
111impl CharacterDataMethods<crate::DomTypeHolder> for CharacterData {
112    /// <https://dom.spec.whatwg.org/#dom-characterdata-data>
113    fn Data(&self) -> DOMString {
114        DOMString::from(self.data.borrow().clone())
115    }
116
117    /// <https://dom.spec.whatwg.org/#dom-characterdata-data>
118    fn SetData(&self, data: DOMString) {
119        self.queue_mutation_record();
120        let old_length = self.Length();
121        let new_length = data.str().encode_utf16().count() as u32;
122        *self.data.borrow_mut() = String::from(data.str());
123        self.content_changed();
124        let node = self.upcast::<Node>();
125        node.ranges()
126            .replace_code_units(node, 0, old_length, new_length);
127    }
128
129    /// <https://dom.spec.whatwg.org/#dom-characterdata-length>
130    fn Length(&self) -> u32 {
131        self.data.borrow().encode_utf16().count() as u32
132    }
133
134    /// <https://dom.spec.whatwg.org/#dom-characterdata-substringdata>
135    fn SubstringData(&self, offset: u32, count: u32) -> Fallible<DOMString> {
136        let data = self.data.borrow();
137        // Step 1.
138        let mut substring = String::new();
139        let remaining = match split_at_utf16_code_unit_offset(&data, offset) {
140            Ok((_, astral, s)) => {
141                // As if we had split the UTF-16 surrogate pair in half
142                // and then transcoded that to UTF-8 lossily,
143                // since our DOMString is currently strict UTF-8.
144                if astral.is_some() {
145                    substring += "\u{FFFD}";
146                }
147                s
148            },
149            // Step 2.
150            Err(()) => return Err(Error::IndexSize(None)),
151        };
152        match split_at_utf16_code_unit_offset(remaining, count) {
153            // Steps 3.
154            Err(()) => substring += remaining,
155            // Steps 4.
156            Ok((s, astral, _)) => {
157                substring += s;
158                // As if we had split the UTF-16 surrogate pair in half
159                // and then transcoded that to UTF-8 lossily,
160                // since our DOMString is currently strict UTF-8.
161                if astral.is_some() {
162                    substring += "\u{FFFD}";
163                }
164            },
165        };
166        Ok(DOMString::from(substring))
167    }
168
169    /// <https://dom.spec.whatwg.org/#dom-characterdata-appenddata>
170    fn AppendData(&self, data: DOMString) {
171        // > The appendData(data) method steps are to replace data of this with this’s length, 0, and data.
172        //
173        // FIXME(ajeffrey): Efficient append on DOMStrings?
174        self.append_data(&data.str());
175    }
176
177    /// <https://dom.spec.whatwg.org/#dom-characterdata-insertdata>
178    fn InsertData(&self, offset: u32, arg: DOMString) -> ErrorResult {
179        // > The insertData(offset, data) method steps are to replace data of this with offset, 0, and data.
180        self.ReplaceData(offset, 0, arg)
181    }
182
183    /// <https://dom.spec.whatwg.org/#dom-characterdata-deletedata>
184    fn DeleteData(&self, offset: u32, count: u32) -> ErrorResult {
185        // > The deleteData(offset, count) method steps are to replace data of this with offset, count, and the empty string.
186        self.ReplaceData(offset, count, DOMString::new())
187    }
188
189    /// <https://dom.spec.whatwg.org/#dom-characterdata-replacedata>
190    fn ReplaceData(&self, offset: u32, count: u32, arg: DOMString) -> ErrorResult {
191        let mut new_data;
192        {
193            let data = self.data.borrow();
194            let prefix;
195            let replacement_before;
196            let remaining;
197            match split_at_utf16_code_unit_offset(&data, offset) {
198                Ok((p, astral, r)) => {
199                    prefix = p;
200                    // As if we had split the UTF-16 surrogate pair in half
201                    // and then transcoded that to UTF-8 lossily,
202                    // since our DOMString is currently strict UTF-8.
203                    replacement_before = if astral.is_some() { "\u{FFFD}" } else { "" };
204                    remaining = r;
205                },
206                // Step 2.
207                Err(()) => return Err(Error::IndexSize(None)),
208            };
209            let replacement_after;
210            let suffix;
211            match split_at_utf16_code_unit_offset(remaining, count) {
212                // Steps 3.
213                Err(()) => {
214                    replacement_after = "";
215                    suffix = "";
216                },
217                Ok((_, astral, s)) => {
218                    // As if we had split the UTF-16 surrogate pair in half
219                    // and then transcoded that to UTF-8 lossily,
220                    // since our DOMString is currently strict UTF-8.
221                    replacement_after = if astral.is_some() { "\u{FFFD}" } else { "" };
222                    suffix = s;
223                },
224            };
225            // Step 4: Mutation observers.
226            self.queue_mutation_record();
227
228            // Step 5 to 7.
229            new_data = String::with_capacity(
230                prefix.len() +
231                    replacement_before.len() +
232                    arg.len() +
233                    replacement_after.len() +
234                    suffix.len(),
235            );
236            new_data.push_str(prefix);
237            new_data.push_str(replacement_before);
238            new_data.push_str(&arg.str());
239            new_data.push_str(replacement_after);
240            new_data.push_str(suffix);
241        }
242        *self.data.borrow_mut() = new_data;
243        self.content_changed();
244        // Steps 8-11.
245        let node = self.upcast::<Node>();
246        node.ranges().replace_code_units(
247            node,
248            offset,
249            count,
250            arg.str().encode_utf16().count() as u32,
251        );
252        Ok(())
253    }
254
255    /// <https://dom.spec.whatwg.org/#dom-childnode-before>
256    fn Before(&self, nodes: Vec<NodeOrString>, can_gc: CanGc) -> ErrorResult {
257        self.upcast::<Node>().before(nodes, can_gc)
258    }
259
260    /// <https://dom.spec.whatwg.org/#dom-childnode-after>
261    fn After(&self, nodes: Vec<NodeOrString>, can_gc: CanGc) -> ErrorResult {
262        self.upcast::<Node>().after(nodes, can_gc)
263    }
264
265    /// <https://dom.spec.whatwg.org/#dom-childnode-replacewith>
266    fn ReplaceWith(&self, nodes: Vec<NodeOrString>, can_gc: CanGc) -> ErrorResult {
267        self.upcast::<Node>().replace_with(nodes, can_gc)
268    }
269
270    /// <https://dom.spec.whatwg.org/#dom-childnode-remove>
271    fn Remove(&self, can_gc: CanGc) {
272        let node = self.upcast::<Node>();
273        node.remove_self(can_gc);
274    }
275
276    /// <https://dom.spec.whatwg.org/#dom-nondocumenttypechildnode-previouselementsibling>
277    fn GetPreviousElementSibling(&self) -> Option<DomRoot<Element>> {
278        self.upcast::<Node>()
279            .preceding_siblings()
280            .find_map(DomRoot::downcast)
281    }
282
283    /// <https://dom.spec.whatwg.org/#dom-nondocumenttypechildnode-nextelementsibling>
284    fn GetNextElementSibling(&self) -> Option<DomRoot<Element>> {
285        self.upcast::<Node>()
286            .following_siblings()
287            .find_map(DomRoot::downcast)
288    }
289}
290
291pub(crate) trait LayoutCharacterDataHelpers<'dom> {
292    fn data_for_layout(self) -> &'dom str;
293}
294
295impl<'dom> LayoutCharacterDataHelpers<'dom> for LayoutDom<'dom, CharacterData> {
296    #[expect(unsafe_code)]
297    #[inline]
298    fn data_for_layout(self) -> &'dom str {
299        unsafe { self.unsafe_get().data.borrow_for_layout() }
300    }
301}
302
303/// Split the given string at the given position measured in UTF-16 code units from the start.
304///
305/// * `Err(())` indicates that `offset` if after the end of the string
306/// * `Ok((before, None, after))` indicates that `offset` is between Unicode code points.
307///   The two string slices are such that:
308///   `before == s.to_utf16()[..offset].to_utf8()` and
309///   `after == s.to_utf16()[offset..].to_utf8()`
310/// * `Ok((before, Some(ch), after))` indicates that `offset` is "in the middle"
311///   of a single Unicode code point that would be represented in UTF-16 by a surrogate pair
312///   of two 16-bit code units.
313///   `ch` is that code point.
314///   The two string slices are such that:
315///   `before == s.to_utf16()[..offset - 1].to_utf8()` and
316///   `after == s.to_utf16()[offset + 1..].to_utf8()`
317fn split_at_utf16_code_unit_offset(s: &str, offset: u32) -> Result<(&str, Option<char>, &str), ()> {
318    let mut code_units = 0;
319    for (i, c) in s.char_indices() {
320        if code_units == offset {
321            let (a, b) = s.split_at(i);
322            return Ok((a, None, b));
323        }
324        code_units += 1;
325        if c > '\u{FFFF}' {
326            if code_units == offset {
327                debug_assert_eq!(c.len_utf8(), 4);
328                warn!("Splitting a surrogate pair in CharacterData API.");
329                return Ok((&s[..i], Some(c), &s[i + c.len_utf8()..]));
330            }
331            code_units += 1;
332        }
333    }
334    if code_units == offset {
335        Ok((s, None, ""))
336    } else {
337        Err(())
338    }
339}