Skip to main content

script_bindings/
domstring.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::context::{JSContext, RawJSContext};
17use js::conversions::{ToJSValConvertible, jsstr_to_string};
18use js::gc::{HandleValue, MutableHandleValue};
19use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSString};
20use js::jsval::StringValue;
21use js::rust::{Runtime, Trace};
22use malloc_size_of::MallocSizeOfOps;
23use num_traits::{ToPrimitive, Zero};
24use regex::Regex;
25use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
26use style::Atom;
27use style::str::HTML_SPACE_CHARACTERS;
28use zeroize::Zeroize;
29
30use crate::trace::RootedTraceableBox;
31
32const ASCII_END: u8 = 0x7E;
33const ASCII_CAPITAL_A: u8 = 0x41;
34const ASCII_CAPITAL_Z: u8 = 0x5A;
35const ASCII_LOWERCASE_A: u8 = 0x61;
36const ASCII_LOWERCASE_Z: u8 = 0x7A;
37const ASCII_TAB: u8 = 0x09;
38const ASCII_NEWLINE: u8 = 0x0A;
39const ASCII_FORMFEED: u8 = 0x0C;
40const ASCII_CR: u8 = 0x0D;
41const ASCII_SPACE: u8 = 0x20;
42
43/// Gets the latin1 bytes from the js engine.
44/// Safety: Make sure the *mut JSString is not null.
45unsafe fn get_latin1_string_bytes(
46    rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
47) -> &[u8] {
48    debug_assert!(!rooted_traceable_box.get().is_null());
49    let mut length = 0;
50    unsafe {
51        let chars = JS_GetLatin1StringCharsAndLength(
52            Runtime::get().expect("JS runtime has shut down").as_ptr(),
53            ptr::null(),
54            rooted_traceable_box.get(),
55            &mut length,
56        );
57        assert!(!chars.is_null());
58        slice::from_raw_parts(chars, length)
59    }
60}
61
62/// A type representing the underlying encoded bytes of a [`DOMString`].
63#[derive(Debug)]
64pub enum EncodedBytes<'a> {
65    /// These bytes are Latin1 encoded.
66    Latin1(Ref<'a, [u8]>),
67    /// These bytes are UTF-8 encoded.
68    Utf8(Ref<'a, [u8]>),
69}
70
71impl EncodedBytes<'_> {
72    /// Return a reference to the raw bytes of this [`EncodedBytes`] without any information about
73    /// the underlying encoding.
74    pub fn bytes(&self) -> &[u8] {
75        match self {
76            Self::Latin1(bytes) => bytes,
77            Self::Utf8(bytes) => bytes,
78        }
79    }
80
81    pub fn len(&self) -> usize {
82        match self {
83            Self::Latin1(bytes) => bytes
84                .iter()
85                .map(|b| if *b <= ASCII_END { 1 } else { 2 })
86                .sum(),
87            Self::Utf8(bytes) => bytes.len(),
88        }
89    }
90
91    /// Return whether or not there is any data in this collection of bytes.
92    pub fn is_empty(&self) -> bool {
93        self.bytes().is_empty()
94    }
95}
96
97#[derive(Zeroize)]
98enum DOMStringType {
99    /// A simple rust string
100    Rust(String),
101    /// A JS String stored in mozjs.
102    #[zeroize(skip)]
103    JSString(RootedTraceableBox<Heap<*mut JSString>>),
104    #[cfg(test)]
105    /// This is used for testing of the bindings to give
106    /// a raw u8 Latin1 encoded string without having a js engine.
107    Latin1Vec(Vec<u8>),
108}
109
110impl Default for DOMStringType {
111    fn default() -> Self {
112        Self::Rust(Default::default())
113    }
114}
115
116impl DOMStringType {
117    /// Warning:
118    /// This function does not checking and just returns the raw bytes of the string,
119    /// independently if they are  utf8 or latin1.
120    /// The caller needs to take care that these make sense in context.
121    fn as_raw_bytes(&self) -> &[u8] {
122        match self {
123            DOMStringType::Rust(s) => s.as_bytes(),
124            DOMStringType::JSString(rooted_traceable_box) => unsafe {
125                get_latin1_string_bytes(rooted_traceable_box)
126            },
127            #[cfg(test)]
128            DOMStringType::Latin1Vec(items) => items,
129        }
130    }
131
132    fn ensure_rust_string(&mut self) -> &mut String {
133        let new_string = match self {
134            DOMStringType::Rust(string) => return string,
135            DOMStringType::JSString(rooted_traceable_box) => {
136                let cx = unsafe { JSContext::get_from_thread() };
137                let cx = cx.as_ref().expect("JS runtime has shut down");
138                unsafe { jsstr_to_string(cx, NonNull::new(rooted_traceable_box.get()).unwrap()) }
139            },
140            #[cfg(test)]
141            DOMStringType::Latin1Vec(items) => {
142                let mut v = vec![0; items.len() * 2];
143                let real_size =
144                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
145                v.truncate(real_size);
146
147                // Safety: convert_latin1_to_utf8 converts the raw bytes to utf8 and the
148                // buffer is the size specified in the documentation, so this should be safe.
149                unsafe { String::from_utf8_unchecked(v) }
150            },
151        };
152        *self = DOMStringType::Rust(new_string);
153        self.ensure_rust_string()
154    }
155}
156
157/// A reference to a Rust `str` of UTF-8 encoded bytes, used to get a Rust
158/// string from a [`DOMString`].
159#[derive(Debug)]
160pub struct StringView<'a>(Ref<'a, str>);
161
162impl StringView<'_> {
163    pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
164        self.split(HTML_SPACE_CHARACTERS)
165            .filter(|string| !string.is_empty())
166    }
167}
168
169impl From<StringView<'_>> for String {
170    fn from(string_view: StringView<'_>) -> Self {
171        string_view.0.to_string()
172    }
173}
174
175impl Deref for StringView<'_> {
176    type Target = str;
177    fn deref(&self) -> &str {
178        &(self.0)
179    }
180}
181
182impl AsRef<str> for StringView<'_> {
183    fn as_ref(&self) -> &str {
184        &(self.0)
185    }
186}
187
188impl PartialEq for StringView<'_> {
189    fn eq(&self, other: &Self) -> bool {
190        self.0.eq(&*(other.0))
191    }
192}
193
194impl PartialEq<&str> for StringView<'_> {
195    fn eq(&self, other: &&str) -> bool {
196        self.0.eq(*other)
197    }
198}
199
200impl Eq for StringView<'_> {}
201
202impl PartialOrd for StringView<'_> {
203    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
204        self.0.partial_cmp(&**other)
205    }
206}
207
208impl Ord for StringView<'_> {
209    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
210        self.0.cmp(other)
211    }
212}
213
214/// Safety comment:
215///
216/// This method will _not_ trace the pointer if the rust string exists.
217/// The js string could be garbage collected and, hence, violating this
218/// could lead to undefined behavior
219unsafe impl Trace for DOMStringType {
220    unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
221        unsafe {
222            match self {
223                DOMStringType::Rust(_s) => {},
224                DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
225                #[cfg(test)]
226                DOMStringType::Latin1Vec(_s) => {},
227            }
228        }
229    }
230}
231
232impl malloc_size_of::MallocSizeOf for DOMStringType {
233    fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
234        match self {
235            DOMStringType::Rust(s) => s.size_of(ops),
236            DOMStringType::JSString(_rooted_traceable_box) => {
237                // Managed by JS Engine
238                0
239            },
240            #[cfg(test)]
241            DOMStringType::Latin1Vec(s) => s.size_of(ops),
242        }
243    }
244}
245
246impl std::fmt::Debug for DOMStringType {
247    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248        match self {
249            DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
250            DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
251            #[cfg(test)]
252            DOMStringType::Latin1Vec(s) => f
253                .debug_struct("DOMString")
254                .field("latin1_string", s)
255                .finish(),
256        }
257    }
258}
259
260////// A DOMString.
261///
262/// This type corresponds to the [`DOMString`] type in WebIDL.
263///
264/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
265///
266/// Conceptually, a DOMString has the same value space as a JavaScript String,
267/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
268/// unpaired surrogates present (also sometimes called WTF-16).
269///
270/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
271/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
272/// can not be represented as a Rust `String`). This introduces the question of
273/// what to do with values being passed from JavaScript to Rust that contain
274/// unpaired surrogates.
275///
276/// The hypothesis is that it does not matter much how exactly those values are
277/// transformed, because  passing unpaired surrogates into the DOM is very rare.
278/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
279/// character.
280///
281/// Currently, the lack of crash reports about this issue provides some
282/// evidence to support the hypothesis. This evidence will hopefully be used to
283/// convince other browser vendors that it would be safe to replace unpaired
284/// surrogates at the boundary between JavaScript and native code. (This would
285/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
286/// and in Servo.)
287///
288/// This string class will keep either the Reference to the mozjs object alive
289/// or will have an internal rust string.
290/// We currently default to doing most of the string operation on the rust side.
291/// You should use `str()` to get the Rust string (represented by a `StringView`
292/// which you can deref to a `&str`). You should assume that this conversion is
293/// expensive. For now, you should assume that all the functions incur this
294/// conversion cost.
295#[repr(transparent)]
296#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
297pub struct DOMString(RefCell<DOMStringType>);
298
299impl Clone for DOMString {
300    fn clone(&self) -> Self {
301        self.ensure_rust_string().clone().into()
302    }
303}
304
305pub enum DOMStringErrorType {
306    JSConversionError,
307}
308
309impl DOMString {
310    /// Creates a new `DOMString`.
311    pub fn new() -> DOMString {
312        Default::default()
313    }
314
315    /// Creates the string from js. If the string can be encoded in latin1, just take the reference
316    /// to the JSString. Otherwise do the conversion to utf8 now.
317    pub fn from_js_string(
318        cx: &mut JSContext,
319        value: HandleValue,
320    ) -> Result<DOMString, DOMStringErrorType> {
321        let string_ptr = unsafe { js::rust::ToString(cx, value) };
322        if string_ptr.is_null() {
323            debug!("ToString failed");
324            Err(DOMStringErrorType::JSConversionError)
325        } else {
326            let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
327            let inner = if latin1 {
328                let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
329                DOMStringType::JSString(h)
330            } else {
331                // We need to convert the string anyway as it is not just latin1
332                DOMStringType::Rust(unsafe {
333                    jsstr_to_string(cx, NonNull::new(string_ptr).unwrap())
334                })
335            };
336            Ok(DOMString(RefCell::new(inner)))
337        }
338    }
339
340    /// Transforms the internal storage of this [`DOMString`] into a Rust string if it is not
341    /// yet one. This will make a copy of the underlying string data.
342    fn ensure_rust_string(&self) -> RefMut<'_, String> {
343        let inner = self.0.borrow_mut();
344        RefMut::map(inner, |inner| inner.ensure_rust_string())
345    }
346
347    /// Debug the current  state of the string without modifying it.
348    #[expect(unused)]
349    fn debug_js(&self, cx: &JSContext) {
350        match *self.0.borrow() {
351            DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
352            DOMStringType::JSString(ref rooted_traceable_box) => {
353                let s = unsafe {
354                    jsstr_to_string(cx, NonNull::new(rooted_traceable_box.get()).unwrap())
355                };
356                info!("JSString ({})", s);
357            },
358            #[cfg(test)]
359            DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
360        }
361    }
362
363    /// Returns the underlying rust string.
364    pub fn str(&self) -> StringView<'_> {
365        {
366            let inner = self.0.borrow();
367            if matches!(&*inner, DOMStringType::Rust(..)) {
368                return StringView(Ref::map(inner, |inner| match inner {
369                    DOMStringType::Rust(string) => string.as_str(),
370                    _ => unreachable!("Guaranteed by condition above"),
371                }));
372            }
373        }
374
375        self.ensure_rust_string();
376        self.str()
377    }
378
379    /// Return the [`EncodedBytes`] of this [`DOMString`]. This returns the original encoded
380    /// bytes of the string without doing any conversions.
381    pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
382        let inner = self.0.borrow();
383        match &*inner {
384            DOMStringType::Rust(..) => {
385                EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
386            },
387            _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
388        }
389    }
390
391    pub fn clear(&mut self) {
392        let mut inner = self.0.borrow_mut();
393        let DOMStringType::Rust(string) = &mut *inner else {
394            *inner = DOMStringType::Rust(String::new());
395            return;
396        };
397        string.clear();
398    }
399
400    pub fn is_empty(&self) -> bool {
401        self.encoded_bytes().is_empty()
402    }
403
404    /// The length of this string in UTF-8 code units, each one being one byte in size.
405    ///
406    /// Note: This is different than the number of Unicode characters (or code points). A
407    /// character may require multiple UTF-8 code units.
408    pub fn len(&self) -> usize {
409        self.encoded_bytes().len()
410    }
411
412    /// The length of this string in UTF-8 code units, each one being one byte in size.
413    /// This method is the same as [`DOMString::len`], but the result is wrapped in a
414    /// `Utf8CodeUnitLength` to be used in code that mixes different kinds of offsets.
415    ///
416    /// Note: This is different than the number of Unicode characters (or code points). A
417    /// character may require multiple UTF-8 code units.
418    pub fn len_utf8(&self) -> Utf8CodeUnitLength {
419        Utf8CodeUnitLength(self.len())
420    }
421
422    /// The length of this string in UTF-16 code units, each one being one two bytes in size.
423    ///
424    /// Note: This is different than the number of Unicode characters (or code points). A
425    /// character may require multiple UTF-16 code units.
426    pub fn len_utf16(&self) -> Utf16CodeUnitLength {
427        Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
428    }
429
430    pub fn make_ascii_lowercase(&mut self) {
431        self.0
432            .borrow_mut()
433            .ensure_rust_string()
434            .make_ascii_lowercase();
435    }
436
437    pub fn push_str(&mut self, string_to_push: &str) {
438        self.0
439            .borrow_mut()
440            .ensure_rust_string()
441            .push_str(string_to_push);
442    }
443
444    /// <https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace>
445    pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
446        if self.is_empty() {
447            return;
448        }
449
450        let mut inner = self.0.borrow_mut();
451        let string = inner.ensure_rust_string();
452        let trailing_whitespace_len = string
453            .trim_end_matches(|character: char| character.is_ascii_whitespace())
454            .len();
455        string.truncate(trailing_whitespace_len);
456        if string.is_empty() {
457            return;
458        }
459
460        let first_non_whitespace = string
461            .find(|character: char| !character.is_ascii_whitespace())
462            .unwrap();
463        string.replace_range(0..first_non_whitespace, "");
464    }
465
466    /// <https://html.spec.whatwg.org/multipage/#valid-floating-point-number>
467    pub fn is_valid_floating_point_number_string(&self) -> bool {
468        static RE: LazyLock<Regex> = LazyLock::new(|| {
469            Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
470        });
471
472        RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
473            self.parse_floating_point_number().is_some()
474    }
475
476    pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
477        self.str().parse::<T>()
478    }
479
480    /// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
481    pub fn parse_floating_point_number(&self) -> Option<f64> {
482        parse_floating_point_number(&self.str())
483    }
484
485    /// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
486    pub fn set_best_representation_of_the_floating_point_number(&mut self) {
487        if let Some(val) = self.parse_floating_point_number() {
488            // [tc39] Step 2: If x is either +0 or -0, return "0".
489            let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
490
491            *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
492        }
493    }
494
495    pub fn to_lowercase(&self) -> String {
496        self.str().to_lowercase()
497    }
498
499    pub fn to_uppercase(&self) -> String {
500        self.str().to_uppercase()
501    }
502
503    pub fn strip_newlines(&mut self) {
504        // > To strip newlines from a string, remove any U+000A LF and U+000D CR code
505        // > points from the string.
506        self.0
507            .borrow_mut()
508            .ensure_rust_string()
509            .retain(|character| character != '\r' && character != '\n');
510    }
511
512    /// Normalize newlines according to <https://infra.spec.whatwg.org/#normalize-newlines>.
513    pub fn normalize_newlines(&mut self) {
514        // > To normalize newlines in a string, replace every U+000D CR U+000A LF code point
515        // > pair with a single U+000A LF code point, and then replace every remaining
516        // > U+000D CR code point with a U+000A LF code point.
517        let mut inner = self.0.borrow_mut();
518        let string = inner.ensure_rust_string();
519        *string = string.replace("\r\n", "\n").replace("\r", "\n")
520    }
521
522    pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
523        let new_string = self.str().to_owned();
524        DOMString(RefCell::new(DOMStringType::Rust(
525            new_string.replace(needle, replace_char),
526        )))
527    }
528
529    /// Pattern is not yet stable in rust, hence, we need different methods for str and char
530    pub fn starts_with(&self, c: char) -> bool {
531        if !c.is_ascii() {
532            self.str().starts_with(c)
533        } else {
534            // As this is an ASCII character, it is guaranteed to be a single byte, no matter if the
535            // underlying encoding is UTF-8 or Latin1.
536            self.encoded_bytes().bytes().starts_with(&[c as u8])
537        }
538    }
539
540    pub fn starts_with_str(&self, needle: &str) -> bool {
541        self.str().starts_with(needle)
542    }
543
544    pub fn ends_with_str(&self, needle: &str) -> bool {
545        self.str().ends_with(needle)
546    }
547
548    pub fn contains(&self, needle: &str) -> bool {
549        self.str().contains(needle)
550    }
551
552    pub fn to_ascii_lowercase(&self) -> String {
553        let conversion = match self.encoded_bytes() {
554            EncodedBytes::Latin1(bytes) => {
555                if bytes.iter().all(|c| *c <= ASCII_END) {
556                    // We are just simple ascii
557                    Some(unsafe {
558                        String::from_utf8_unchecked(
559                            bytes
560                                .iter()
561                                .map(|c| {
562                                    if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
563                                        c + 32
564                                    } else {
565                                        *c
566                                    }
567                                })
568                                .collect(),
569                        )
570                    })
571                } else {
572                    None
573                }
574            },
575            EncodedBytes::Utf8(bytes) => unsafe {
576                // Save because we know it was a utf8 string
577                Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
578            },
579        };
580        // We otherwise would double borrow the refcell
581        if let Some(conversion) = conversion {
582            conversion
583        } else {
584            self.str().to_ascii_lowercase()
585        }
586    }
587
588    fn contains_space_characters(
589        &self,
590        latin1_characters: &'static [u8],
591        utf8_characters: &'static [char],
592    ) -> bool {
593        match self.encoded_bytes() {
594            EncodedBytes::Latin1(items) => {
595                latin1_characters.iter().any(|byte| items.contains(byte))
596            },
597            EncodedBytes::Utf8(bytes) => {
598                // Save because we know it was a utf8 string
599                let s = unsafe { str::from_utf8_unchecked(&bytes) };
600                s.contains(utf8_characters)
601            },
602        }
603    }
604
605    /// <https://infra.spec.whatwg.org/#ascii-tab-or-newline>
606    pub fn contains_tab_or_newline(&self) -> bool {
607        const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
608        const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
609
610        self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
611    }
612
613    /// <https://infra.spec.whatwg.org/#ascii-whitespace>
614    pub fn contains_html_space_characters(&self) -> bool {
615        const SPACE_BYTES: [u8; 5] = [
616            ASCII_TAB,
617            ASCII_NEWLINE,
618            ASCII_FORMFEED,
619            ASCII_CR,
620            ASCII_SPACE,
621        ];
622        self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
623    }
624
625    /// This returns the string in utf8 bytes, i.e., `[u8]` encoded with utf8.
626    pub fn as_bytes(&self) -> BytesView<'_> {
627        // BytesView will just give the raw bytes on dereference.
628        // If we are ascii this is the same for latin1 and utf8.
629        // Otherwise we convert to rust.
630        if self.is_ascii() {
631            BytesView(self.0.borrow())
632        } else {
633            self.ensure_rust_string();
634            BytesView(self.0.borrow())
635        }
636    }
637
638    /// Tests if there are only ascii lowercase characters. Does not include special characters.
639    pub fn is_ascii_lowercase(&self) -> bool {
640        match self.encoded_bytes() {
641            EncodedBytes::Latin1(items) => items
642                .iter()
643                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
644            EncodedBytes::Utf8(s) => s
645                .iter()
646                .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
647                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
648        }
649    }
650
651    /// Is the string only ascii characters
652    pub fn is_ascii(&self) -> bool {
653        self.encoded_bytes().bytes().is_ascii()
654    }
655
656    /// Returns true if the slice only contains bytes that are safe to use in cookie strings.
657    /// <https://www.ietf.org/archive/id/draft-ietf-httpbis-rfc6265bis-15.html#section-5.6-6>
658    /// Not using ServoCookie::is_valid_name_or_value to prevent dependency on the net crate.
659    pub fn is_valid_for_cookie(&self) -> bool {
660        match self.encoded_bytes() {
661            EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
662                .iter()
663                .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
664        }
665    }
666
667    /// Call the callback with a `&str` reference of the string stored in this [`DOMString`]. Note
668    /// that if the [`DOMString`] cannot be interpreted as a Rust string a conversion will be done.
669    fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
670        match self.encoded_bytes() {
671            // If the Latin1 string is all ASCII bytes, then it is safe to interpret it as UTF-8.
672            EncodedBytes::Latin1(latin1_bytes) => {
673                if latin1_bytes.iter().all(|character| character.is_ascii()) {
674                    // SAFETY: All characters are ASCII, so it is safe to interpret this string as
675                    // UTF-8.
676                    return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
677                }
678            },
679            EncodedBytes::Utf8(utf8_bytes) => {
680                // SAFETY: These are the bytes of a UTF-8 string already, so they can be interpreted
681                // as UTF-8.
682                return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
683            },
684        };
685        callback(self.str().deref())
686    }
687
688    /// Newline replacement routine as described in step 1 of the multipart/form-data
689    /// encoding algorithm and many steps of application/x-www-form-urlencoded.
690    /// e.g. <https://html.spec.whatwg.org/multipage/#convert-to-a-list-of-name-value-pairs>
691    ///
692    /// Replace every occurrence of U+000D (CR) not followed by U+000A (LF),
693    /// and every occurrence of U+000A (LF) not preceded by U+000D (CR), in entry's name,
694    /// by a string consisting of a U+000D (CR) and U+000A (LF).
695    pub fn normalize_crlf(&self) -> String {
696        let s = self.str();
697        let mut buf = String::new();
698        let mut prev = ' ';
699        for ch in s.chars() {
700            match ch {
701                '\n' if prev != '\r' => {
702                    buf.push('\r');
703                    buf.push('\n');
704                },
705                '\n' => {
706                    buf.push('\n');
707                },
708                // This character isn't LF but is
709                // preceded by CR
710                _ if prev == '\r' => {
711                    buf.push('\n');
712                    buf.push(ch);
713                },
714                _ => buf.push(ch),
715            };
716            prev = ch;
717        }
718        // In case the last character was CR
719        if prev == '\r' {
720            buf.push('\n');
721        }
722        buf
723    }
724}
725
726/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
727pub fn parse_floating_point_number(input: &str) -> Option<f64> {
728    // Steps 15-16 are telling us things about IEEE rounding modes
729    // for floating-point significands; this code assumes the Rust
730    // compiler already matches them in any cases where
731    // that actually matters. They are not
732    // related to f64::round(), which is for rounding to integers.
733    input.trim().parse::<f64>().ok().filter(|value| {
734        // A valid number is the same as what rust considers to be valid,
735        // except for +1., NaN, and Infinity.
736        !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
737    })
738}
739
740pub struct BytesView<'a>(Ref<'a, DOMStringType>);
741
742impl Deref for BytesView<'_> {
743    type Target = [u8];
744
745    fn deref(&self) -> &Self::Target {
746        // This does the correct thing by the construction of BytesView in `DOMString::as_bytes`.
747        self.0.as_raw_bytes()
748    }
749}
750
751impl Ord for DOMString {
752    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
753        self.str().cmp(&other.str())
754    }
755}
756
757impl PartialOrd for DOMString {
758    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
759        self.str().partial_cmp(&other.str())
760    }
761}
762
763impl Extend<char> for DOMString {
764    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
765        self.0.borrow_mut().ensure_rust_string().extend(iter)
766    }
767}
768
769impl ToJSValConvertible for DOMString {
770    unsafe fn to_jsval(&self, cx: *mut RawJSContext, mut rval: MutableHandleValue) {
771        let val = self.0.borrow();
772        match *val {
773            DOMStringType::Rust(ref s) => unsafe {
774                s.to_jsval(cx, rval);
775            },
776            DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
777                rval.set(StringValue(&*rooted_traceable_box.get()));
778            },
779            #[cfg(test)]
780            DOMStringType::Latin1Vec(ref items) => {
781                let mut v = vec![0; items.len() * 2];
782                let real_size =
783                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
784                v.truncate(real_size);
785
786                String::from_utf8(v)
787                    .expect("Error in constructin test string")
788                    .to_jsval(cx, rval);
789            },
790        };
791    }
792}
793
794impl std::hash::Hash for DOMString {
795    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
796        self.str().hash(state);
797    }
798}
799
800impl std::fmt::Display for DOMString {
801    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
802        fmt::Display::fmt(self.str().deref(), f)
803    }
804}
805
806impl std::cmp::PartialEq<str> for DOMString {
807    fn eq(&self, other: &str) -> bool {
808        if other.is_ascii() {
809            *other.as_bytes() == *self.encoded_bytes().bytes()
810        } else {
811            self.str().deref() == other
812        }
813    }
814}
815
816impl std::cmp::PartialEq<&str> for DOMString {
817    fn eq(&self, other: &&str) -> bool {
818        self.eq(*other)
819    }
820}
821
822impl std::cmp::PartialEq<String> for DOMString {
823    fn eq(&self, other: &String) -> bool {
824        self.eq(other.as_str())
825    }
826}
827
828impl std::cmp::PartialEq<DOMString> for String {
829    fn eq(&self, other: &DOMString) -> bool {
830        other.eq(self)
831    }
832}
833
834impl std::cmp::PartialEq<DOMString> for str {
835    fn eq(&self, other: &DOMString) -> bool {
836        other.eq(self)
837    }
838}
839
840impl std::cmp::PartialEq for DOMString {
841    fn eq(&self, other: &DOMString) -> bool {
842        let result = match (self.encoded_bytes(), other.encoded_bytes()) {
843            (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
844                Some(*bytes == *other_bytes)
845            },
846            (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
847                if other_bytes.is_ascii() =>
848            {
849                Some(*bytes == *other_bytes)
850            },
851            (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
852                Some(*bytes == *other_bytes)
853            },
854            (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
855                Some(*bytes == *other_bytes)
856            },
857            _ => None,
858        };
859
860        if let Some(eq_result) = result {
861            return eq_result;
862        }
863
864        *self.str() == *other.str()
865    }
866}
867
868impl std::cmp::Eq for DOMString {}
869
870impl From<std::string::String> for DOMString {
871    fn from(string: String) -> Self {
872        DOMString(RefCell::new(DOMStringType::Rust(string)))
873    }
874}
875
876impl From<&str> for DOMString {
877    fn from(string: &str) -> Self {
878        String::from(string).into()
879    }
880}
881
882impl From<DOMString> for LocalName {
883    fn from(dom_string: DOMString) -> LocalName {
884        dom_string.with_str_reference(|string| LocalName::from(string))
885    }
886}
887
888impl From<&DOMString> for LocalName {
889    fn from(dom_string: &DOMString) -> LocalName {
890        dom_string.with_str_reference(|string| LocalName::from(string))
891    }
892}
893
894impl From<DOMString> for Namespace {
895    fn from(dom_string: DOMString) -> Namespace {
896        dom_string.with_str_reference(|string| Namespace::from(string))
897    }
898}
899
900impl From<DOMString> for Atom {
901    fn from(dom_string: DOMString) -> Atom {
902        dom_string.with_str_reference(|string| Atom::from(string))
903    }
904}
905
906impl From<DOMString> for String {
907    fn from(val: DOMString) -> Self {
908        val.ensure_rust_string();
909        let inner = val.0.take();
910        match inner {
911            DOMStringType::Rust(s) => s,
912            DOMStringType::JSString(_) => unreachable!(),
913            #[cfg(test)]
914            DOMStringType::Latin1Vec(items) => String::from_utf8(items).expect("Not valid latin1"),
915        }
916    }
917}
918
919impl From<DOMString> for Vec<u8> {
920    fn from(value: DOMString) -> Self {
921        value.ensure_rust_string();
922        let inner = value.0.take();
923        match inner {
924            DOMStringType::Rust(s) => s.into_bytes(),
925            DOMStringType::JSString(_) => unreachable!(),
926            #[cfg(test)]
927            DOMStringType::Latin1Vec(items) => items,
928        }
929    }
930}
931
932impl From<Cow<'_, str>> for DOMString {
933    fn from(value: Cow<'_, str>) -> Self {
934        DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
935    }
936}
937
938impl Zeroize for DOMString {
939    fn zeroize(&mut self) {
940        self.0.get_mut().zeroize();
941    }
942}
943
944#[macro_export]
945macro_rules! match_domstring_ascii_inner {
946    ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
947        if {
948            debug_assert!(($ascii_literal).is_ascii());
949            $ascii_literal.as_bytes()
950        } == $input.bytes() {
951          $then
952        } else {
953            $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
954        }
955
956    };
957    ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
958        match $input {
959            $p => $then
960        }
961    }
962}
963
964/// Use this to match &str against lazydomstring efficiently.
965/// You are only allowed to match ascii strings otherwise this macro will
966/// lead to wrong results.
967/// ```ignore
968/// let s = DOMString::from("test");
969/// let value = match_domstring!(s,
970/// "test1" => 1,
971/// "test2" => 2,
972/// "test" => 3,
973/// _ => 4,
974/// );
975/// assert_eq!(value, 3);
976/// ```
977///
978/// The `RefCell` inside `DOMString` is borrowed for the duration of the `match`,
979/// so the string cannot be accessed again inside a `match` arm.
980#[macro_export]
981macro_rules! match_domstring_ascii {
982    ($input:expr, $($tail:tt)*) => {
983        {
984            use $crate::domstring::EncodedBytes;
985
986            let encoded_bytes = $input.encoded_bytes();
987            match encoded_bytes {
988                EncodedBytes::Latin1(_) => {
989                    $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
990                }
991                EncodedBytes::Utf8(_) => {
992                    $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
993                }
994
995            }
996        }
997    };
998}
999
1000#[cfg(test)]
1001mod tests {
1002    use super::*;
1003
1004    const LATIN1_PILLCROW: u8 = 0xB6;
1005    const UTF8_PILLCROW: [u8; 2] = [194, 182];
1006    const LATIN1_POWER2: u8 = 0xB2;
1007
1008    fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1009        DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1010    }
1011
1012    #[test]
1013    fn string_functions() {
1014        let s = DOMString::from("AbBcC❤&%$#");
1015        let s_copy = s.clone();
1016        assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1017        assert_eq!(s, s_copy);
1018        assert_eq!(s.len(), 12);
1019        assert_eq!(s_copy.len(), 12);
1020        assert!(s.starts_with('A'));
1021        let s2 = DOMString::from("");
1022        assert!(s2.is_empty());
1023    }
1024
1025    #[test]
1026    fn string_functions_latin1() {
1027        {
1028            let s = from_latin1(vec![
1029                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1030            ]);
1031            assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1032        }
1033        {
1034            let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1035            assert_eq!(s.to_ascii_lowercase(), "abbcc");
1036        }
1037        {
1038            let s = from_latin1(vec![
1039                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1040            ]);
1041            assert_eq!(s.len(), 11);
1042            assert!(s.starts_with('A'));
1043        }
1044        {
1045            let s = from_latin1(vec![]);
1046            assert!(s.is_empty());
1047        }
1048    }
1049
1050    #[test]
1051    fn test_length() {
1052        let s1 = from_latin1(vec![
1053            0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1054            0xAE, 0xAF,
1055        ]);
1056        let s2 = from_latin1(vec![
1057            0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1058            0xBE, 0xBF,
1059        ]);
1060        let s3 = from_latin1(vec![
1061            0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1062            0xCE, 0xCF,
1063        ]);
1064        let s4 = from_latin1(vec![
1065            0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1066            0xDE, 0xDF,
1067        ]);
1068        let s5 = from_latin1(vec![
1069            0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1070            0xEE, 0xEF,
1071        ]);
1072        let s6 = from_latin1(vec![
1073            0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1074            0xFE, 0xFF,
1075        ]);
1076
1077        let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1078        let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1079        let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1080        let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1081        let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1082        let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1083
1084        assert_eq!(s1.len(), s1_utf8.len());
1085        assert_eq!(s2.len(), s2_utf8.len());
1086        assert_eq!(s3.len(), s3_utf8.len());
1087        assert_eq!(s4.len(), s4_utf8.len());
1088        assert_eq!(s5.len(), s5_utf8.len());
1089        assert_eq!(s6.len(), s6_utf8.len());
1090
1091        s1.ensure_rust_string();
1092        s2.ensure_rust_string();
1093        s3.ensure_rust_string();
1094        s4.ensure_rust_string();
1095        s5.ensure_rust_string();
1096        s6.ensure_rust_string();
1097        assert_eq!(s1.len(), s1_utf8.len());
1098        assert_eq!(s2.len(), s2_utf8.len());
1099        assert_eq!(s3.len(), s3_utf8.len());
1100        assert_eq!(s4.len(), s4_utf8.len());
1101        assert_eq!(s5.len(), s5_utf8.len());
1102        assert_eq!(s6.len(), s6_utf8.len());
1103    }
1104
1105    #[test]
1106    fn test_convert() {
1107        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1108        s.ensure_rust_string();
1109        assert_eq!(&*s.str(), "abc%$");
1110    }
1111
1112    #[test]
1113    fn partial_eq() {
1114        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1115        let string = String::from("abc%$");
1116        let s2 = DOMString::from(string.clone());
1117        assert_eq!(s, s2);
1118        assert_eq!(s, string);
1119    }
1120
1121    #[test]
1122    fn encoded_latin1_bytes() {
1123        let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1124        let dom_string = from_latin1(original_latin1_bytes.clone());
1125        let string_latin1_bytes = match dom_string.encoded_bytes() {
1126            EncodedBytes::Latin1(bytes) => bytes,
1127            _ => unreachable!("Expected Latin1 encoded bytes"),
1128        };
1129        assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1130    }
1131
1132    #[test]
1133    fn testing_stringview() {
1134        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1135
1136        assert_eq!(
1137            s.str().chars().collect::<Vec<char>>(),
1138            vec!['a', 'b', 'c', '%', '$', '²']
1139        );
1140        assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1141    }
1142
1143    // We need to be extra careful here as two strings that have different
1144    // representation need to have the same hash.
1145    // Additionally, the interior mutability is only used for the conversion
1146    // which is forced by Hash. Hence, it is safe to have this interior mutability.
1147    #[test]
1148    fn test_hash() {
1149        use std::hash::{DefaultHasher, Hash, Hasher};
1150        fn hash_value(d: &DOMString) -> u64 {
1151            let mut hasher = DefaultHasher::new();
1152            d.hash(&mut hasher);
1153            hasher.finish()
1154        }
1155
1156        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1157        let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1158        s_converted.ensure_rust_string();
1159        let s2 = DOMString::from("abc%$²");
1160
1161        let hash_s = hash_value(&s);
1162        let hash_s_converted = hash_value(&s_converted);
1163        let hash_s2 = hash_value(&s2);
1164
1165        assert_eq!(hash_s, hash_s2);
1166        assert_eq!(hash_s, hash_s_converted);
1167    }
1168
1169    // Testing match_lazydomstring if it executes the statements in the match correctly
1170    #[test]
1171    fn test_match_executing() {
1172        // executing
1173        {
1174            let s = from_latin1(vec![b'a', b'b', b'c']);
1175            match_domstring_ascii!( s,
1176                "abc" => assert!(true),
1177                "bcd" => assert!(false),
1178                _ =>  (),
1179            );
1180        }
1181
1182        {
1183            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1184            match_domstring_ascii!( s,
1185                "abc/" => assert!(true),
1186                "bcd" => assert!(false),
1187                _ =>  (),
1188            );
1189        }
1190
1191        {
1192            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1193            match_domstring_ascii!( s,
1194                "bcd" => assert!(false),
1195                "abc%$" => assert!(true),
1196                _ => (),
1197            );
1198        }
1199
1200        {
1201            let s = DOMString::from("abcde");
1202            match_domstring_ascii!( s,
1203                "abc" => assert!(false),
1204                "bcd" => assert!(false),
1205                _ => assert!(true),
1206            );
1207        }
1208        {
1209            let s = DOMString::from("abc%$");
1210            match_domstring_ascii!( s,
1211                "bcd" => assert!(false),
1212                "abc%$" => assert!(true),
1213                _ =>  (),
1214            );
1215        }
1216        {
1217            let s = from_latin1(vec![b'a', b'b', b'c']);
1218            match_domstring_ascii!( s,
1219                "abcdd" => assert!(false),
1220                "bcd" => assert!(false),
1221                _ => (),
1222            );
1223        }
1224    }
1225
1226    // Testing match_lazydomstring if it evaluates to the correct expression
1227    #[test]
1228    fn test_match_returning_result() {
1229        {
1230            let s = from_latin1(vec![b'a', b'b', b'c']);
1231            let res = match_domstring_ascii!( s,
1232                "abc" => true,
1233                "bcd" => false,
1234                _ => false,
1235            );
1236            assert_eq!(res, true);
1237        }
1238        {
1239            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1240            let res = match_domstring_ascii!( s,
1241                "abc/" => true,
1242                "bcd" => false,
1243                _ => false,
1244            );
1245            assert_eq!(res, true);
1246        }
1247        {
1248            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1249            let res = match_domstring_ascii!( s,
1250                "bcd" => false,
1251                "abc%$" => true,
1252                _ => false,
1253            );
1254            assert_eq!(res, true);
1255        }
1256
1257        {
1258            let s = DOMString::from("abcde");
1259            let res = match_domstring_ascii!( s,
1260                "abc" => false,
1261                "bcd" => false,
1262                _ => true,
1263            );
1264            assert_eq!(res, true);
1265        }
1266        {
1267            let s = DOMString::from("abc%$");
1268            let res = match_domstring_ascii!( s,
1269                "bcd" => false,
1270                "abc%$" => true,
1271                _ => false,
1272            );
1273            assert_eq!(res, true);
1274        }
1275        {
1276            let s = from_latin1(vec![b'a', b'b', b'c']);
1277            let res = match_domstring_ascii!( s,
1278                "abcdd" => false,
1279                "bcd" => false,
1280                _ => true,
1281            );
1282            assert_eq!(res, true);
1283        }
1284    }
1285
1286    #[test]
1287    #[cfg(debug_assertions)]
1288    #[should_panic]
1289    fn test_match_panic() {
1290        let s = DOMString::from("abcd");
1291        let _res = match_domstring_ascii!(s,
1292            "❤" => true,
1293            _ => false,);
1294    }
1295
1296    #[test]
1297    #[cfg(debug_assertions)]
1298    #[should_panic]
1299    fn test_match_panic2() {
1300        let s = DOMString::from("abcd");
1301        let _res = match_domstring_ascii!(s,
1302            "abc" => false,
1303            "❤" => true,
1304            _ => false,
1305        );
1306    }
1307
1308    #[test]
1309    fn test_strip_whitespace() {
1310        {
1311            let mut s = from_latin1(vec![
1312                b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1313            ]);
1314
1315            s.strip_leading_and_trailing_ascii_whitespace();
1316            s.ensure_rust_string();
1317            assert_eq!(&*s.str(), "abc%$²");
1318        }
1319        {
1320            let mut s = DOMString::from("   \n  abc%$ ");
1321
1322            s.strip_leading_and_trailing_ascii_whitespace();
1323            s.ensure_rust_string();
1324            assert_eq!(&*s.str(), "abc%$");
1325        }
1326    }
1327
1328    // https://infra.spec.whatwg.org/#ascii-whitespace
1329    #[test]
1330    fn contains_html_space_characters() {
1331        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); // TAB
1332        assert!(s.contains_html_space_characters());
1333        s.ensure_rust_string();
1334        assert!(s.contains_html_space_characters());
1335
1336        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); // NEWLINE
1337        assert!(s.contains_html_space_characters());
1338        s.ensure_rust_string();
1339        assert!(s.contains_html_space_characters());
1340
1341        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); // FF
1342        assert!(s.contains_html_space_characters());
1343        s.ensure_rust_string();
1344        assert!(s.contains_html_space_characters());
1345
1346        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); // Carriage Return
1347        assert!(s.contains_html_space_characters());
1348        s.ensure_rust_string();
1349        assert!(s.contains_html_space_characters());
1350
1351        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); // SPACE
1352        assert!(s.contains_html_space_characters());
1353        s.ensure_rust_string();
1354        assert!(s.contains_html_space_characters());
1355
1356        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1357        assert!(!s.contains_html_space_characters());
1358        s.ensure_rust_string();
1359        assert!(!s.contains_html_space_characters());
1360    }
1361
1362    #[test]
1363    fn atom() {
1364        let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1365        let atom1 = Atom::from(s);
1366        let s2 = DOMString::from("aaa aa");
1367        let atom2 = Atom::from(s2);
1368        assert_eq!(atom1, atom2);
1369        let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1370        let atom3 = Atom::from(s3);
1371        assert_ne!(atom1, atom3);
1372    }
1373
1374    #[test]
1375    fn namespace() {
1376        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1377        let atom1 = Namespace::from(s);
1378        let s2 = DOMString::from("aaa aa");
1379        let atom2 = Namespace::from(s2);
1380        assert_eq!(atom1, atom2);
1381        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1382        let atom3 = Namespace::from(s3);
1383        assert_ne!(atom1, atom3);
1384    }
1385
1386    #[test]
1387    fn localname() {
1388        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1389        let atom1 = LocalName::from(s);
1390        let s2 = DOMString::from("aaa aa");
1391        let atom2 = LocalName::from(s2);
1392        assert_eq!(atom1, atom2);
1393        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1394        let atom3 = LocalName::from(s3);
1395        assert_ne!(atom1, atom3);
1396    }
1397
1398    #[test]
1399    fn is_ascii_lowercase() {
1400        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1401        assert!(!s.is_ascii_lowercase());
1402        let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1403        assert!(!s.is_ascii_lowercase());
1404        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1405        assert!(s.is_ascii_lowercase());
1406        let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1407        assert!(!s.is_ascii_lowercase());
1408        let s = DOMString::from("`aaaz");
1409        assert!(!s.is_ascii_lowercase());
1410        let s = DOMString::from("aaaz");
1411        assert!(s.is_ascii_lowercase());
1412    }
1413
1414    #[test]
1415    fn test_as_bytes() {
1416        const ASCII_SMALL_A: u8 = b'a';
1417        const ASCII_SMALL_Z: u8 = b'z';
1418
1419        let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1420        let s = from_latin1(v1.clone());
1421        assert_eq!(
1422            *s.as_bytes(),
1423            [
1424                ASCII_SMALL_A,
1425                ASCII_SMALL_A,
1426                ASCII_SMALL_A,
1427                UTF8_PILLCROW[0],
1428                UTF8_PILLCROW[1],
1429                ASCII_SMALL_A,
1430                ASCII_SMALL_A
1431            ]
1432        );
1433
1434        let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1435        let s = from_latin1(v2.clone());
1436        assert_eq!(
1437            *s.as_bytes(),
1438            [
1439                ASCII_SMALL_A,
1440                ASCII_SMALL_A,
1441                ASCII_SMALL_A,
1442                ASCII_SMALL_A,
1443                ASCII_SMALL_Z
1444            ]
1445        );
1446
1447        let str = "abc%$²".to_owned();
1448        let s = DOMString::from(str.clone());
1449        assert_eq!(&*s.as_bytes(), str.as_bytes());
1450        let str = "AbBcC❤&%$#".to_owned();
1451        let s = DOMString::from(str.clone());
1452        assert_eq!(&*s.as_bytes(), str.as_bytes());
1453    }
1454}