script/dom/
characterdata.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5//! DOM bindings for `CharacterData`.
6use std::cell::LazyCell;
7
8use dom_struct::dom_struct;
9use js::context::JSContext;
10use script_bindings::codegen::InheritTypes::{CharacterDataTypeId, NodeTypeId, TextTypeId};
11
12use crate::dom::bindings::cell::{DomRefCell, Ref};
13use crate::dom::bindings::codegen::Bindings::CharacterDataBinding::CharacterDataMethods;
14use crate::dom::bindings::codegen::Bindings::NodeBinding::Node_Binding::NodeMethods;
15use crate::dom::bindings::codegen::Bindings::ProcessingInstructionBinding::ProcessingInstructionMethods;
16use crate::dom::bindings::codegen::UnionTypes::NodeOrString;
17use crate::dom::bindings::error::{Error, ErrorResult, Fallible};
18use crate::dom::bindings::inheritance::Castable;
19use crate::dom::bindings::root::{DomRoot, LayoutDom};
20use crate::dom::bindings::str::DOMString;
21use crate::dom::cdatasection::CDATASection;
22use crate::dom::comment::Comment;
23use crate::dom::document::Document;
24use crate::dom::element::Element;
25use crate::dom::mutationobserver::{Mutation, MutationObserver};
26use crate::dom::node::{ChildrenMutation, Node, NodeDamage};
27use crate::dom::processinginstruction::ProcessingInstruction;
28use crate::dom::text::Text;
29use crate::dom::virtualmethods::vtable_for;
30use crate::script_runtime::CanGc;
31
32// https://dom.spec.whatwg.org/#characterdata
33#[dom_struct]
34pub(crate) struct CharacterData {
35    node: Node,
36    data: DomRefCell<String>,
37}
38
39impl CharacterData {
40    pub(crate) fn new_inherited(data: DOMString, document: &Document) -> CharacterData {
41        CharacterData {
42            node: Node::new_inherited(document),
43            data: DomRefCell::new(String::from(data.str())),
44        }
45    }
46
47    pub(crate) fn clone_with_data(
48        &self,
49        data: DOMString,
50        document: &Document,
51        can_gc: CanGc,
52    ) -> DomRoot<Node> {
53        match self.upcast::<Node>().type_id() {
54            NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => {
55                DomRoot::upcast(Comment::new(data, document, None, can_gc))
56            },
57            NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => {
58                let pi = self.downcast::<ProcessingInstruction>().unwrap();
59                DomRoot::upcast(ProcessingInstruction::new(
60                    pi.Target(),
61                    data,
62                    document,
63                    can_gc,
64                ))
65            },
66            NodeTypeId::CharacterData(CharacterDataTypeId::Text(TextTypeId::CDATASection)) => {
67                DomRoot::upcast(CDATASection::new(data, document, can_gc))
68            },
69            NodeTypeId::CharacterData(CharacterDataTypeId::Text(TextTypeId::Text)) => {
70                DomRoot::upcast(Text::new(data, document, can_gc))
71            },
72            _ => unreachable!(),
73        }
74    }
75
76    #[inline]
77    pub(crate) fn data(&self) -> Ref<'_, String> {
78        self.data.borrow()
79    }
80
81    #[inline]
82    pub(crate) fn append_data(&self, data: &str) {
83        self.queue_mutation_record();
84        self.data.borrow_mut().push_str(data);
85        self.content_changed();
86    }
87
88    #[expect(unsafe_code)]
89    fn content_changed(&self) {
90        // TODO https://github.com/servo/servo/issues/43234
91        let mut cx = unsafe { script_bindings::script_runtime::temp_cx() };
92        let cx = &mut cx;
93
94        let node = self.upcast::<Node>();
95        node.dirty(NodeDamage::Other);
96
97        // If this is a Text node, we might need to re-parse (say, if our parent
98        // is a <style> element.) We don't need to if this is a Comment or
99        // ProcessingInstruction.
100        if self.is::<Text>() {
101            if let Some(parent_node) = node.GetParentNode() {
102                let mutation = ChildrenMutation::ChangeText;
103                vtable_for(&parent_node).children_changed(cx, &mutation);
104            }
105        }
106    }
107
108    // Queue a MutationObserver record before changing the content.
109    fn queue_mutation_record(&self) {
110        let mutation = LazyCell::new(|| Mutation::CharacterData {
111            old_value: self.data.borrow().clone(),
112        });
113        MutationObserver::queue_a_mutation_record(self.upcast::<Node>(), mutation);
114    }
115}
116
117impl CharacterDataMethods<crate::DomTypeHolder> for CharacterData {
118    /// <https://dom.spec.whatwg.org/#dom-characterdata-data>
119    fn Data(&self) -> DOMString {
120        DOMString::from(self.data.borrow().clone())
121    }
122
123    /// <https://dom.spec.whatwg.org/#dom-characterdata-data>
124    fn SetData(&self, data: DOMString) {
125        self.queue_mutation_record();
126        let old_length = self.Length();
127        let new_length = data.str().encode_utf16().count() as u32;
128        *self.data.borrow_mut() = String::from(data.str());
129        self.content_changed();
130        let node = self.upcast::<Node>();
131        node.ranges()
132            .replace_code_units(node, 0, old_length, new_length);
133    }
134
135    /// <https://dom.spec.whatwg.org/#dom-characterdata-length>
136    fn Length(&self) -> u32 {
137        self.data.borrow().encode_utf16().count() as u32
138    }
139
140    /// <https://dom.spec.whatwg.org/#dom-characterdata-substringdata>
141    fn SubstringData(&self, offset: u32, count: u32) -> Fallible<DOMString> {
142        let data = self.data.borrow();
143        // Step 1.
144        let mut substring = String::new();
145        let remaining = match split_at_utf16_code_unit_offset(&data, offset) {
146            Ok((_, astral, s)) => {
147                // As if we had split the UTF-16 surrogate pair in half
148                // and then transcoded that to UTF-8 lossily,
149                // since our DOMString is currently strict UTF-8.
150                if astral.is_some() {
151                    substring += "\u{FFFD}";
152                }
153                s
154            },
155            // Step 2.
156            Err(()) => return Err(Error::IndexSize(None)),
157        };
158        match split_at_utf16_code_unit_offset(remaining, count) {
159            // Steps 3.
160            Err(()) => substring += remaining,
161            // Steps 4.
162            Ok((s, astral, _)) => {
163                substring += s;
164                // As if we had split the UTF-16 surrogate pair in half
165                // and then transcoded that to UTF-8 lossily,
166                // since our DOMString is currently strict UTF-8.
167                if astral.is_some() {
168                    substring += "\u{FFFD}";
169                }
170            },
171        };
172        Ok(DOMString::from(substring))
173    }
174
175    /// <https://dom.spec.whatwg.org/#dom-characterdata-appenddata>
176    fn AppendData(&self, data: DOMString) {
177        // > The appendData(data) method steps are to replace data of this with this’s length, 0, and data.
178        //
179        // FIXME(ajeffrey): Efficient append on DOMStrings?
180        self.append_data(&data.str());
181    }
182
183    /// <https://dom.spec.whatwg.org/#dom-characterdata-insertdata>
184    fn InsertData(&self, offset: u32, arg: DOMString) -> ErrorResult {
185        // > The insertData(offset, data) method steps are to replace data of this with offset, 0, and data.
186        self.ReplaceData(offset, 0, arg)
187    }
188
189    /// <https://dom.spec.whatwg.org/#dom-characterdata-deletedata>
190    fn DeleteData(&self, offset: u32, count: u32) -> ErrorResult {
191        // > The deleteData(offset, count) method steps are to replace data of this with offset, count, and the empty string.
192        self.ReplaceData(offset, count, DOMString::new())
193    }
194
195    /// <https://dom.spec.whatwg.org/#dom-characterdata-replacedata>
196    fn ReplaceData(&self, offset: u32, count: u32, arg: DOMString) -> ErrorResult {
197        let mut new_data;
198        {
199            let data = self.data.borrow();
200            let prefix;
201            let replacement_before;
202            let remaining;
203            match split_at_utf16_code_unit_offset(&data, offset) {
204                Ok((p, astral, r)) => {
205                    prefix = p;
206                    // As if we had split the UTF-16 surrogate pair in half
207                    // and then transcoded that to UTF-8 lossily,
208                    // since our DOMString is currently strict UTF-8.
209                    replacement_before = if astral.is_some() { "\u{FFFD}" } else { "" };
210                    remaining = r;
211                },
212                // Step 2.
213                Err(()) => return Err(Error::IndexSize(None)),
214            };
215            let replacement_after;
216            let suffix;
217            match split_at_utf16_code_unit_offset(remaining, count) {
218                // Steps 3.
219                Err(()) => {
220                    replacement_after = "";
221                    suffix = "";
222                },
223                Ok((_, astral, s)) => {
224                    // As if we had split the UTF-16 surrogate pair in half
225                    // and then transcoded that to UTF-8 lossily,
226                    // since our DOMString is currently strict UTF-8.
227                    replacement_after = if astral.is_some() { "\u{FFFD}" } else { "" };
228                    suffix = s;
229                },
230            };
231            // Step 4: Mutation observers.
232            self.queue_mutation_record();
233
234            // Step 5 to 7.
235            new_data = String::with_capacity(
236                prefix.len() +
237                    replacement_before.len() +
238                    arg.len() +
239                    replacement_after.len() +
240                    suffix.len(),
241            );
242            new_data.push_str(prefix);
243            new_data.push_str(replacement_before);
244            new_data.push_str(&arg.str());
245            new_data.push_str(replacement_after);
246            new_data.push_str(suffix);
247        }
248        *self.data.borrow_mut() = new_data;
249        self.content_changed();
250        // Steps 8-11.
251        let node = self.upcast::<Node>();
252        node.ranges().replace_code_units(
253            node,
254            offset,
255            count,
256            arg.str().encode_utf16().count() as u32,
257        );
258        Ok(())
259    }
260
261    /// <https://dom.spec.whatwg.org/#dom-childnode-before>
262    fn Before(&self, cx: &mut JSContext, nodes: Vec<NodeOrString>) -> ErrorResult {
263        self.upcast::<Node>().before(cx, nodes)
264    }
265
266    /// <https://dom.spec.whatwg.org/#dom-childnode-after>
267    fn After(&self, cx: &mut JSContext, nodes: Vec<NodeOrString>) -> ErrorResult {
268        self.upcast::<Node>().after(cx, nodes)
269    }
270
271    /// <https://dom.spec.whatwg.org/#dom-childnode-replacewith>
272    fn ReplaceWith(&self, cx: &mut JSContext, nodes: Vec<NodeOrString>) -> ErrorResult {
273        self.upcast::<Node>().replace_with(cx, nodes)
274    }
275
276    /// <https://dom.spec.whatwg.org/#dom-childnode-remove>
277    fn Remove(&self, cx: &mut JSContext) {
278        self.upcast::<Node>().remove_self(cx);
279    }
280
281    /// <https://dom.spec.whatwg.org/#dom-nondocumenttypechildnode-previouselementsibling>
282    fn GetPreviousElementSibling(&self) -> Option<DomRoot<Element>> {
283        self.upcast::<Node>()
284            .preceding_siblings()
285            .find_map(DomRoot::downcast)
286    }
287
288    /// <https://dom.spec.whatwg.org/#dom-nondocumenttypechildnode-nextelementsibling>
289    fn GetNextElementSibling(&self) -> Option<DomRoot<Element>> {
290        self.upcast::<Node>()
291            .following_siblings()
292            .find_map(DomRoot::downcast)
293    }
294}
295
296pub(crate) trait LayoutCharacterDataHelpers<'dom> {
297    fn data_for_layout(self) -> &'dom str;
298}
299
300impl<'dom> LayoutCharacterDataHelpers<'dom> for LayoutDom<'dom, CharacterData> {
301    #[expect(unsafe_code)]
302    #[inline]
303    fn data_for_layout(self) -> &'dom str {
304        unsafe { self.unsafe_get().data.borrow_for_layout() }
305    }
306}
307
308/// Split the given string at the given position measured in UTF-16 code units from the start.
309///
310/// * `Err(())` indicates that `offset` if after the end of the string
311/// * `Ok((before, None, after))` indicates that `offset` is between Unicode code points.
312///   The two string slices are such that:
313///   `before == s.to_utf16()[..offset].to_utf8()` and
314///   `after == s.to_utf16()[offset..].to_utf8()`
315/// * `Ok((before, Some(ch), after))` indicates that `offset` is "in the middle"
316///   of a single Unicode code point that would be represented in UTF-16 by a surrogate pair
317///   of two 16-bit code units.
318///   `ch` is that code point.
319///   The two string slices are such that:
320///   `before == s.to_utf16()[..offset - 1].to_utf8()` and
321///   `after == s.to_utf16()[offset + 1..].to_utf8()`
322fn split_at_utf16_code_unit_offset(s: &str, offset: u32) -> Result<(&str, Option<char>, &str), ()> {
323    let mut code_units = 0;
324    for (i, c) in s.char_indices() {
325        if code_units == offset {
326            let (a, b) = s.split_at(i);
327            return Ok((a, None, b));
328        }
329        code_units += 1;
330        if c > '\u{FFFF}' {
331            if code_units == offset {
332                debug_assert_eq!(c.len_utf8(), 4);
333                warn!("Splitting a surrogate pair in CharacterData API.");
334                return Ok((&s[..i], Some(c), &s[i + c.len_utf8()..]));
335            }
336            code_units += 1;
337        }
338    }
339    if code_units == offset {
340        Ok((s, None, ""))
341    } else {
342        Err(())
343    }
344}