script_bindings/
str.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5//! The `ByteString` struct.
6use std::borrow::{Borrow, Cow, ToOwned};
7use std::default::Default;
8use std::hash::{Hash, Hasher};
9use std::marker::PhantomData;
10use std::ops::{Deref, DerefMut};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, ops, slice, str};
14
15use cssparser::CowRcStr;
16use html5ever::{LocalName, Namespace};
17use js::rust::wrappers::ToJSON;
18use js::rust::{HandleObject, HandleValue};
19use num_traits::Zero;
20use regex::Regex;
21use stylo_atoms::Atom;
22
23use crate::error::Error;
24use crate::script_runtime::JSContext as SafeJSContext;
25
26/// Encapsulates the IDL `ByteString` type.
27#[derive(Clone, Debug, Default, Eq, JSTraceable, MallocSizeOf, PartialEq)]
28pub struct ByteString(Vec<u8>);
29
30impl ByteString {
31    /// Creates a new `ByteString`.
32    pub fn new(value: Vec<u8>) -> ByteString {
33        ByteString(value)
34    }
35
36    /// Returns `self` as a string, if it encodes valid UTF-8, and `None`
37    /// otherwise.
38    pub fn as_str(&self) -> Option<&str> {
39        str::from_utf8(&self.0).ok()
40    }
41
42    /// Returns the length.
43    pub fn len(&self) -> usize {
44        self.0.len()
45    }
46
47    /// Checks if the ByteString is empty.
48    pub fn is_empty(&self) -> bool {
49        self.0.is_empty()
50    }
51
52    /// Returns `self` with A–Z replaced by a–z.
53    pub fn to_lower(&self) -> ByteString {
54        ByteString::new(self.0.to_ascii_lowercase())
55    }
56}
57
58impl From<ByteString> for Vec<u8> {
59    fn from(byte_string: ByteString) -> Vec<u8> {
60        byte_string.0
61    }
62}
63
64impl Hash for ByteString {
65    fn hash<H: Hasher>(&self, state: &mut H) {
66        self.0.hash(state);
67    }
68}
69
70impl FromStr for ByteString {
71    type Err = ();
72    fn from_str(s: &str) -> Result<ByteString, ()> {
73        Ok(ByteString::new(s.to_owned().into_bytes()))
74    }
75}
76
77impl ops::Deref for ByteString {
78    type Target = [u8];
79    fn deref(&self) -> &[u8] {
80        &self.0
81    }
82}
83
84/// A string that is constructed from a UCS-2 buffer by replacing invalid code
85/// points with the replacement character.
86#[derive(Clone, Debug, Default, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)]
87pub struct USVString(pub String);
88
89impl Borrow<str> for USVString {
90    #[inline]
91    fn borrow(&self) -> &str {
92        &self.0
93    }
94}
95
96impl Deref for USVString {
97    type Target = str;
98
99    #[inline]
100    fn deref(&self) -> &str {
101        &self.0
102    }
103}
104
105impl DerefMut for USVString {
106    #[inline]
107    fn deref_mut(&mut self) -> &mut str {
108        &mut self.0
109    }
110}
111
112impl AsRef<str> for USVString {
113    fn as_ref(&self) -> &str {
114        &self.0
115    }
116}
117
118impl fmt::Display for USVString {
119    #[inline]
120    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121        fmt::Display::fmt(&**self, f)
122    }
123}
124
125impl PartialEq<str> for USVString {
126    fn eq(&self, other: &str) -> bool {
127        &**self == other
128    }
129}
130
131impl<'a> PartialEq<&'a str> for USVString {
132    fn eq(&self, other: &&'a str) -> bool {
133        &**self == *other
134    }
135}
136
137impl From<String> for USVString {
138    fn from(contents: String) -> USVString {
139        USVString(contents)
140    }
141}
142
143/// Returns whether `s` is a `token`, as defined by
144/// [RFC 2616](http://tools.ietf.org/html/rfc2616#page-17).
145pub fn is_token(s: &[u8]) -> bool {
146    if s.is_empty() {
147        return false; // A token must be at least a single character
148    }
149    s.iter().all(|&x| {
150        // http://tools.ietf.org/html/rfc2616#section-2.2
151        match x {
152            0..=31 | 127 => false, // CTLs
153            40 | 41 | 60 | 62 | 64 | 44 | 59 | 58 | 92 | 34 | 47 | 91 | 93 | 63 | 61 | 123 |
154            125 | 32 => false, // separators
155            x if x > 127 => false, // non-CHARs
156            _ => true,
157        }
158    })
159}
160
161/// A DOMString.
162///
163/// This type corresponds to the [`DOMString`] type in WebIDL.
164///
165/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
166///
167/// Conceptually, a DOMString has the same value space as a JavaScript String,
168/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
169/// unpaired surrogates present (also sometimes called WTF-16).
170///
171/// Currently, this type stores a Rust `String`, in order to avoid issues when
172/// integrating with the rest of the Rust ecosystem and even the rest of the
173/// browser itself.
174///
175/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
176/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
177/// can not be represented as a Rust `String`). This introduces the question of
178/// what to do with values being passed from JavaScript to Rust that contain
179/// unpaired surrogates.
180///
181/// The hypothesis is that it does not matter much how exactly those values are
182/// transformed, because  passing unpaired surrogates into the DOM is very rare.
183/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
184/// character.
185///
186/// Currently, the lack of crash reports about this issue provides some
187/// evidence to support the hypothesis. This evidence will hopefully be used to
188/// convince other browser vendors that it would be safe to replace unpaired
189/// surrogates at the boundary between JavaScript and native code. (This would
190/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
191/// and in Servo.)
192///
193/// This type is currently `!Send`, in order to help with an independent
194/// experiment to store `JSString`s rather than Rust `String`s.
195#[derive(Clone, Debug, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)]
196pub struct DOMString(String, PhantomData<*const ()>);
197
198impl DOMString {
199    /// Creates a new `DOMString`.
200    pub fn new() -> DOMString {
201        DOMString(String::new(), PhantomData)
202    }
203
204    /// Creates a new `DOMString` from a `String`.
205    pub fn from_string(s: String) -> DOMString {
206        DOMString(s, PhantomData)
207    }
208
209    /// Get the internal `&str` value of this [`DOMString`].
210    pub fn str(&self) -> &str {
211        &self.0
212    }
213
214    /// Appends a given string slice onto the end of this String.
215    pub fn push_str(&mut self, string: &str) {
216        self.0.push_str(string)
217    }
218
219    /// Clears this `DOMString`, removing all contents.
220    pub fn clear(&mut self) {
221        self.0.clear()
222    }
223
224    /// Shortens this String to the specified length.
225    pub fn truncate(&mut self, new_len: usize) {
226        self.0.truncate(new_len);
227    }
228
229    /// Removes newline characters according to <https://infra.spec.whatwg.org/#strip-newlines>.
230    pub fn strip_newlines(&mut self) {
231        self.0.retain(|c| c != '\r' && c != '\n');
232    }
233
234    /// Removes leading and trailing ASCII whitespaces according to
235    /// <https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace>.
236    pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
237        if self.0.is_empty() {
238            return;
239        }
240
241        let trailing_whitespace_len = self
242            .0
243            .trim_end_matches(|ref c| char::is_ascii_whitespace(c))
244            .len();
245        self.0.truncate(trailing_whitespace_len);
246        if self.0.is_empty() {
247            return;
248        }
249
250        let first_non_whitespace = self.0.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
251        self.0.replace_range(0..first_non_whitespace, "");
252    }
253
254    /// <https://html.spec.whatwg.org/multipage/#valid-floating-point-number>
255    pub fn is_valid_floating_point_number_string(&self) -> bool {
256        static RE: LazyLock<Regex> = LazyLock::new(|| {
257            Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
258        });
259
260        RE.is_match(&self.0) && self.parse_floating_point_number().is_some()
261    }
262
263    /// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
264    pub fn parse_floating_point_number(&self) -> Option<f64> {
265        // Steps 15-16 are telling us things about IEEE rounding modes
266        // for floating-point significands; this code assumes the Rust
267        // compiler already matches them in any cases where
268        // that actually matters. They are not
269        // related to f64::round(), which is for rounding to integers.
270        let input = &self.0;
271        if let Ok(val) = input.trim().parse::<f64>() {
272            if !(
273                // A valid number is the same as what rust considers to be valid,
274                // except for +1., NaN, and Infinity.
275                val.is_infinite() || val.is_nan() || input.ends_with('.') || input.starts_with('+')
276            ) {
277                return Some(val);
278            }
279        }
280        None
281    }
282
283    /// Applies the same processing as `parse_floating_point_number` with some additional handling
284    /// according to ECMA's string conversion steps.
285    ///
286    /// Used for specific elements when handling floating point values, namely the `number` and
287    /// `range` inputs, as well as `meter` and `progress` elements.
288    ///
289    /// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
290    /// <https://tc39.es/ecma262/#sec-numeric-types-number-tostring>
291    pub fn set_best_representation_of_the_floating_point_number(&mut self) {
292        if let Some(val) = self.parse_floating_point_number() {
293            // [tc39] Step 2: If x is either +0 or -0, return "0".
294            let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
295
296            self.0 = parsed_value.to_string()
297        }
298    }
299}
300
301/// Because this converts to a DOMString it becomes UTF-8 encoded which is closer to
302/// the spec definition of <https://infra.spec.whatwg.org/#serialize-a-javascript-value-to-json-bytes>
303/// but we generally do not operate on anything that is truly a WTF-16 string.
304///
305/// <https://infra.spec.whatwg.org/#serialize-a-javascript-value-to-a-json-string>
306pub fn serialize_jsval_to_json_utf8(
307    cx: SafeJSContext,
308    data: HandleValue,
309) -> Result<DOMString, Error> {
310    #[repr(C)]
311    struct ToJSONCallbackData {
312        string: Option<String>,
313    }
314
315    let mut out_str = ToJSONCallbackData { string: None };
316
317    #[allow(unsafe_code)]
318    unsafe extern "C" fn write_callback(
319        string: *const u16,
320        len: u32,
321        data: *mut std::ffi::c_void,
322    ) -> bool {
323        let data = data as *mut ToJSONCallbackData;
324        let string_chars = unsafe { slice::from_raw_parts(string, len as usize) };
325        unsafe { &mut *data }
326            .string
327            .get_or_insert_with(Default::default)
328            .push_str(&String::from_utf16_lossy(string_chars));
329        true
330    }
331
332    // 1. Let result be ? Call(%JSON.stringify%, undefined, « value »).
333    unsafe {
334        let stringify_result = ToJSON(
335            *cx,
336            data,
337            HandleObject::null(),
338            HandleValue::null(),
339            Some(write_callback),
340            &mut out_str as *mut ToJSONCallbackData as *mut _,
341        );
342        // Note: ToJSON returns false when a JS error is thrown, so we need to return
343        // JSFailed to propagate the raised exception
344        if !stringify_result {
345            return Err(Error::JSFailed);
346        }
347    }
348
349    // 2. If result is undefined, then throw a TypeError.
350    // Note: ToJSON will not call the callback if the data cannot be serialized.
351    // 3. Assert: result is a string.
352    // 4. Return result.
353    out_str
354        .string
355        .map(Into::into)
356        .ok_or_else(|| Error::Type("unable to serialize JSON".to_owned()))
357}
358
359impl Borrow<str> for DOMString {
360    #[inline]
361    fn borrow(&self) -> &str {
362        &self.0
363    }
364}
365
366impl Default for DOMString {
367    fn default() -> Self {
368        DOMString(String::new(), PhantomData)
369    }
370}
371
372impl Deref for DOMString {
373    type Target = str;
374
375    #[inline]
376    fn deref(&self) -> &str {
377        &self.0
378    }
379}
380
381impl DerefMut for DOMString {
382    #[inline]
383    fn deref_mut(&mut self) -> &mut str {
384        &mut self.0
385    }
386}
387
388impl AsRef<str> for DOMString {
389    fn as_ref(&self) -> &str {
390        &self.0
391    }
392}
393
394impl fmt::Display for DOMString {
395    #[inline]
396    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
397        fmt::Display::fmt(&**self, f)
398    }
399}
400
401impl PartialEq<str> for DOMString {
402    fn eq(&self, other: &str) -> bool {
403        &**self == other
404    }
405}
406
407impl<'a> PartialEq<&'a str> for DOMString {
408    fn eq(&self, other: &&'a str) -> bool {
409        &**self == *other
410    }
411}
412
413impl From<String> for DOMString {
414    fn from(contents: String) -> DOMString {
415        DOMString(contents, PhantomData)
416    }
417}
418
419impl From<&str> for DOMString {
420    fn from(contents: &str) -> DOMString {
421        DOMString::from(String::from(contents))
422    }
423}
424
425impl<'a> From<Cow<'a, str>> for DOMString {
426    fn from(contents: Cow<'a, str>) -> DOMString {
427        match contents {
428            Cow::Owned(s) => DOMString::from(s),
429            Cow::Borrowed(s) => DOMString::from(s),
430        }
431    }
432}
433
434impl From<DOMString> for LocalName {
435    fn from(contents: DOMString) -> LocalName {
436        LocalName::from(contents.0)
437    }
438}
439
440impl From<DOMString> for Namespace {
441    fn from(contents: DOMString) -> Namespace {
442        Namespace::from(contents.0)
443    }
444}
445
446impl From<DOMString> for Atom {
447    fn from(contents: DOMString) -> Atom {
448        Atom::from(contents.0)
449    }
450}
451
452impl From<DOMString> for String {
453    fn from(contents: DOMString) -> String {
454        contents.0
455    }
456}
457
458impl From<DOMString> for Vec<u8> {
459    fn from(contents: DOMString) -> Vec<u8> {
460        contents.0.into()
461    }
462}
463
464impl<'a> From<DOMString> for Cow<'a, str> {
465    fn from(contents: DOMString) -> Cow<'a, str> {
466        contents.0.into()
467    }
468}
469
470impl<'a> From<DOMString> for CowRcStr<'a> {
471    fn from(contents: DOMString) -> CowRcStr<'a> {
472        contents.0.into()
473    }
474}
475
476impl Extend<char> for DOMString {
477    fn extend<I>(&mut self, iterable: I)
478    where
479        I: IntoIterator<Item = char>,
480    {
481        self.0.extend(iterable)
482    }
483}