script_bindings/
domstring.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::{Chars, FromStr};
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
16use html5ever::{LocalName, Namespace};
17use js::conversions::{ToJSValConvertible, jsstr_to_string};
18use js::gc::MutableHandleValue;
19use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
20use js::jsval::StringValue;
21use js::rust::{Runtime, Trace};
22use malloc_size_of::MallocSizeOfOps;
23use num_traits::{ToPrimitive, Zero};
24use regex::Regex;
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42/// Gets the latin1 bytes from the js engine.
43/// Safety: Make sure the *mut JSString is not null.
44unsafe fn get_latin1_string_bytes(
45    rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47    debug_assert!(!rooted_traceable_box.get().is_null());
48    let mut length = 0;
49    unsafe {
50        let chars = JS_GetLatin1StringCharsAndLength(
51            Runtime::get().expect("JS runtime has shut down").as_ptr(),
52            ptr::null(),
53            rooted_traceable_box.get(),
54            &mut length,
55        );
56        assert!(!chars.is_null());
57        slice::from_raw_parts(chars, length)
58    }
59}
60
61#[derive(Debug, PartialEq, Eq)]
62/// A type representing the underlying encoded bytes. Either Latin1 or Utf8.
63pub enum EncodedBytes<'a> {
64    /// These bytes are Latin1 encoded.
65    Latin1Bytes(&'a [u8]),
66    /// This is a normal utf8 string given in bytes.
67    Utf8Bytes(&'a [u8]),
68}
69
70enum DOMStringType {
71    /// A simple rust string
72    Rust(String),
73    /// A JS String stored in mozjs.
74    JSString(RootedTraceableBox<Heap<*mut JSString>>),
75    #[cfg(test)]
76    /// This is used for testing of the bindings to give
77    /// a raw u8 Latin1 encoded string without having a js engine.
78    Latin1Vec(Vec<u8>),
79}
80
81impl DOMStringType {
82    /// Returns the str if Rust and otherwise panic. You need to call `make_rust`.
83    fn str(&self) -> &str {
84        match self {
85            DOMStringType::Rust(s) => s,
86            DOMStringType::JSString(_rooted_traceable_box) => {
87                panic!("Cannot do a string")
88            },
89            #[cfg(test)]
90            &DOMStringType::Latin1Vec(_) => panic!("Cannot do a string"),
91        }
92    }
93
94    /// Warning:
95    /// This function does not checking and just returns the raw bytes of teh string,
96    /// independently if they are  utf8 or latin1.
97    /// The caller needs to take care that these make sense in context.
98    fn as_raw_bytes(&self) -> &[u8] {
99        match self {
100            DOMStringType::Rust(s) => s.as_bytes(),
101            DOMStringType::JSString(rooted_traceable_box) => unsafe {
102                get_latin1_string_bytes(rooted_traceable_box)
103            },
104            #[cfg(test)]
105            DOMStringType::Latin1Vec(items) => items,
106        }
107    }
108}
109
110#[derive(Debug)]
111/// A view of the underlying string. This is always converted to Utf8.
112pub struct StringView<'a>(Ref<'a, DOMStringType>);
113
114impl<'a> StringView<'a> {
115    pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
116        self.0
117            .str()
118            .split(HTML_SPACE_CHARACTERS)
119            .filter(|s| !s.is_empty())
120    }
121
122    pub fn strip_prefix(&self, needle: &str) -> Option<&str> {
123        self.0.str().strip_prefix(needle)
124    }
125
126    pub fn chars(&self) -> Chars<'_> {
127        self.0.str().chars()
128    }
129
130    pub fn as_bytes(&self) -> &[u8] {
131        self.0.str().as_bytes()
132    }
133}
134
135impl Deref for StringView<'_> {
136    type Target = str;
137    fn deref(&self) -> &str {
138        self.0.str()
139    }
140}
141
142impl AsRef<str> for StringView<'_> {
143    fn as_ref(&self) -> &str {
144        self.deref()
145    }
146}
147
148impl PartialEq for StringView<'_> {
149    fn eq(&self, other: &Self) -> bool {
150        self.0.str() == other.0.str()
151    }
152}
153
154impl PartialEq<&str> for StringView<'_> {
155    fn eq(&self, other: &&str) -> bool {
156        self.0.str() == *other
157    }
158}
159
160impl Eq for StringView<'_> {}
161
162impl PartialOrd for StringView<'_> {
163    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
164        self.0.str().partial_cmp(other.0.str())
165    }
166}
167
168impl Ord for StringView<'_> {
169    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
170        self.0.str().cmp(other.0.str())
171    }
172}
173
174impl From<StringView<'_>> for String {
175    fn from(value: StringView<'_>) -> Self {
176        String::from(value.0.str())
177    }
178}
179
180/// Safety comment:
181///
182/// This method will _not_ trace the pointer if the rust string exists.
183/// The js string could be garbage collected and, hence, violating this
184/// could lead to undefined behavior
185unsafe impl Trace for DOMStringType {
186    unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
187        unsafe {
188            match self {
189                DOMStringType::Rust(_s) => {},
190                DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
191                #[cfg(test)]
192                DOMStringType::Latin1Vec(_s) => {},
193            }
194        }
195    }
196}
197
198impl malloc_size_of::MallocSizeOf for DOMStringType {
199    fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
200        match self {
201            DOMStringType::Rust(s) => s.size_of(ops),
202            DOMStringType::JSString(_rooted_traceable_box) => {
203                // Managed by JS Engine
204                0
205            },
206            #[cfg(test)]
207            DOMStringType::Latin1Vec(s) => s.size_of(ops),
208        }
209    }
210}
211
212impl std::fmt::Debug for DOMStringType {
213    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
214        match self {
215            DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
216            DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
217            #[cfg(test)]
218            DOMStringType::Latin1Vec(s) => f
219                .debug_struct("DOMString")
220                .field("latin1_string", s)
221                .finish(),
222        }
223    }
224}
225
226#[derive(Debug)]
227/// A view of the underlying string. This is never converted to Utf8
228pub struct EncodedBytesView<'a>(Ref<'a, DOMStringType>);
229
230impl EncodedBytesView<'_> {
231    /// Get the bytes of the string in either latin1 or utf8 without costly conversion.
232    pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
233        match *self.0 {
234            DOMStringType::Rust(ref s) => EncodedBytes::Utf8Bytes(s.as_bytes()),
235            DOMStringType::JSString(ref rooted_traceable_box) => {
236                EncodedBytes::Latin1Bytes(unsafe { get_latin1_string_bytes(rooted_traceable_box) })
237            },
238            #[cfg(test)]
239            DOMStringType::Latin1Vec(ref s) => EncodedBytes::Latin1Bytes(s),
240        }
241    }
242
243    fn is_empty(&self) -> bool {
244        match self.encoded_bytes() {
245            EncodedBytes::Latin1Bytes(items) => items.is_empty(),
246            EncodedBytes::Utf8Bytes(s) => s.is_empty(),
247        }
248    }
249
250    fn len(&self) -> usize {
251        match self.encoded_bytes() {
252            EncodedBytes::Latin1Bytes(items) => items
253                .iter()
254                .map(|b| if *b <= ASCII_END { 1 } else { 2 })
255                .sum(),
256            EncodedBytes::Utf8Bytes(s) => s.len(),
257        }
258    }
259}
260
261////// A DOMString.
262///
263/// This type corresponds to the [`DOMString`] type in WebIDL.
264///
265/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
266///
267/// Conceptually, a DOMString has the same value space as a JavaScript String,
268/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
269/// unpaired surrogates present (also sometimes called WTF-16).
270///
271/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
272/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
273/// can not be represented as a Rust `String`). This introduces the question of
274/// what to do with values being passed from JavaScript to Rust that contain
275/// unpaired surrogates.
276///
277/// The hypothesis is that it does not matter much how exactly those values are
278/// transformed, because  passing unpaired surrogates into the DOM is very rare.
279/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
280/// character.
281///
282/// Currently, the lack of crash reports about this issue provides some
283/// evidence to support the hypothesis. This evidence will hopefully be used to
284/// convince other browser vendors that it would be safe to replace unpaired
285/// surrogates at the boundary between JavaScript and native code. (This would
286/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
287/// and in Servo.)
288///
289/// This string class will keep either the Reference to the mozjs object alive
290/// or will have an internal rust string.
291/// We currently default to doing most of the string operation on the rust side.
292/// You should use `str()` to get the Rust string (represented by a `StringView`
293/// which you can deref to a string). You should assume that this conversion costs.
294/// You should assume that all the functions incur the conversion cost.
295///
296#[repr(transparent)]
297#[derive(Debug, MallocSizeOf, JSTraceable)]
298pub struct DOMString(RefCell<DOMStringType>);
299
300impl Clone for DOMString {
301    fn clone(&self) -> Self {
302        self.make_rust();
303        if let DOMStringType::Rust(ref s) = *self.0.borrow() {
304            DOMString::from_string(s.to_owned())
305        } else {
306            unreachable!()
307        }
308    }
309}
310
311pub enum DOMStringErrorType {
312    JSConversionError,
313}
314
315impl DOMString {
316    /// Creates a new `DOMString`.
317    pub fn new() -> DOMString {
318        DOMString(RefCell::new(DOMStringType::Rust(String::new())))
319    }
320
321    /// Creates the string from js. If the string can be encoded in latin1, just take the reference
322    /// to the JSString. Otherwise do the conversion to utf8 now.
323    pub fn from_js_string(
324        cx: SafeJSContext,
325        value: js::gc::HandleValue,
326    ) -> Result<DOMString, DOMStringErrorType> {
327        let string_ptr = unsafe { js::rust::ToString(*cx, value) };
328        if string_ptr.is_null() {
329            debug!("ToString failed");
330            Err(DOMStringErrorType::JSConversionError)
331        } else {
332            let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
333            let inner = if latin1 {
334                let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
335                DOMStringType::JSString(h)
336            } else {
337                // We need to convert the string anyway as it is not just latin1
338                DOMStringType::Rust(unsafe {
339                    jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
340                })
341            };
342            Ok(DOMString(RefCell::new(inner)))
343        }
344    }
345
346    pub fn from_string(s: String) -> DOMString {
347        DOMString(RefCell::new(DOMStringType::Rust(s)))
348    }
349
350    /// Transforms the string into rust string if not yet a rust string.
351    fn make_rust(&self) {
352        let string = {
353            let inner = self.0.borrow();
354            match *inner {
355                DOMStringType::Rust(_) => return,
356                DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
357                    jsstr_to_string(
358                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
359                        NonNull::new(rooted_traceable_box.get()).unwrap(),
360                    )
361                },
362                #[cfg(test)]
363                DOMStringType::Latin1Vec(ref items) => {
364                    let mut v = vec![0; items.len() * 2];
365                    let real_size = encoding_rs::mem::convert_latin1_to_utf8(
366                        items.as_slice(),
367                        v.as_mut_slice(),
368                    );
369                    v.truncate(real_size);
370
371                    // Safety: convert_latin1_to_utf8 converts the raw bytes to utf8 and the
372                    // buffer is the size specified in the documentation, so this should be safe.
373                    unsafe { String::from_utf8_unchecked(v) }
374                },
375            }
376        };
377        *self.0.borrow_mut() = DOMStringType::Rust(string);
378    }
379
380    /// Debug the current  state of the string without modifying it.
381    #[expect(unused)]
382    fn debug_js(&self) {
383        match *self.0.borrow() {
384            DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
385            DOMStringType::JSString(ref rooted_traceable_box) => {
386                let s = unsafe {
387                    jsstr_to_string(
388                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
389                        ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
390                    )
391                };
392                info!("JSString ({})", s);
393            },
394            #[cfg(test)]
395            DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
396        }
397    }
398
399    /// Returns the underlying rust string.
400    pub fn str(&self) -> StringView<'_> {
401        self.make_rust();
402        StringView(self.0.borrow())
403    }
404
405    /// Use this if you want to work on the `EncodedBytes` directly.
406    /// This will not do any conversions for you.
407    pub fn view(&self) -> EncodedBytesView<'_> {
408        EncodedBytesView(self.0.borrow())
409    }
410
411    pub fn clear(&mut self) {
412        *self.0.borrow_mut() = DOMStringType::Rust(String::new())
413    }
414
415    pub fn is_empty(&self) -> bool {
416        self.view().is_empty()
417    }
418
419    /// The length of this string in UTF-8 code units, each one being one byte in size.
420    ///
421    /// Note: This is different than the number of Unicode characters (or code points). A
422    /// character may require multiple UTF-8 code units.
423    pub fn len(&self) -> usize {
424        self.view().len()
425    }
426
427    /// The length of this string in UTF-8 code units, each one being one byte in size.
428    /// This method is the same as [`DOMString::len`], but the result is wrapped in a
429    /// `Utf8CodeUnitLength` to be used in code that mixes different kinds of offsets.
430    ///
431    /// Note: This is different than the number of Unicode characters (or code points). A
432    /// character may require multiple UTF-8 code units.
433    pub fn len_utf8(&self) -> Utf8CodeUnitLength {
434        Utf8CodeUnitLength(self.len())
435    }
436
437    /// The length of this string in UTF-16 code units, each one being one two bytes in size.
438    ///
439    /// Note: This is different than the number of Unicode characters (or code points). A
440    /// character may require multiple UTF-16 code units.
441    pub fn len_utf16(&self) -> Utf16CodeUnitLength {
442        Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
443    }
444
445    pub fn make_ascii_lowercase(&mut self) {
446        self.make_rust();
447        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
448            s.make_ascii_lowercase();
449        }
450    }
451
452    pub fn push_str(&mut self, s: &str) {
453        self.make_rust();
454        if let DOMStringType::Rust(ref mut string) = *self.0.borrow_mut() {
455            string.push_str(s)
456        }
457    }
458
459    pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
460        if self.is_empty() {
461            return;
462        }
463
464        self.make_rust();
465        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
466            let trailing_whitespace_len = s
467                .trim_end_matches(|ref c| char::is_ascii_whitespace(c))
468                .len();
469            s.truncate(trailing_whitespace_len);
470            if s.is_empty() {
471                return;
472            }
473
474            let first_non_whitespace = s.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
475            s.replace_range(0..first_non_whitespace, "");
476        }
477    }
478
479    /// This is a dom spec
480    pub fn is_valid_floating_point_number_string(&self) -> bool {
481        static RE: LazyLock<Regex> = LazyLock::new(|| {
482            Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
483        });
484        self.make_rust();
485
486        if let DOMStringType::Rust(ref s) = *self.0.borrow() {
487            RE.is_match(s) && self.parse_floating_point_number().is_some()
488        } else {
489            unreachable!()
490        }
491    }
492
493    pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
494        self.make_rust();
495        self.str().parse::<T>()
496    }
497
498    /// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
499    pub fn parse_floating_point_number(&self) -> Option<f64> {
500        self.make_rust();
501        parse_floating_point_number(&self.str())
502    }
503
504    /// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
505    pub fn set_best_representation_of_the_floating_point_number(&mut self) {
506        if let Some(val) = self.parse_floating_point_number() {
507            // [tc39] Step 2: If x is either +0 or -0, return "0".
508            let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
509
510            *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
511        }
512    }
513
514    pub fn to_lowercase(&self) -> String {
515        self.make_rust();
516        self.str().to_lowercase()
517    }
518
519    pub fn to_uppercase(&self) -> String {
520        self.make_rust();
521        self.str().to_uppercase()
522    }
523
524    pub fn strip_newlines(&mut self) {
525        // > To strip newlines from a string, remove any U+000A LF and U+000D CR code
526        // > points from the string.
527        self.make_rust();
528        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
529            s.retain(|c| c != '\r' && c != '\n');
530        }
531    }
532
533    /// Normalize newlines according to <https://infra.spec.whatwg.org/#normalize-newlines>.
534    pub fn normalize_newlines(&mut self) {
535        self.make_rust();
536        // > To normalize newlines in a string, replace every U+000D CR U+000A LF code point
537        // > pair with a single U+000A LF code point, and then replace every remaining
538        // > U+000D CR code point with a U+000A LF code point.
539        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
540            *s = s.replace("\r\n", "\n").replace("\r", "\n")
541        }
542    }
543
544    pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
545        self.make_rust();
546        let new_string = self.str().to_owned();
547        DOMString(RefCell::new(DOMStringType::Rust(
548            new_string.replace(needle, replace_char),
549        )))
550    }
551
552    /// Pattern is not yet stable in rust, hence, we need different methods for str and char
553    pub fn starts_with(&self, c: char) -> bool {
554        if !c.is_ascii() {
555            self.make_rust();
556            self.str().starts_with(c)
557        } else {
558            match self.view().encoded_bytes() {
559                EncodedBytes::Latin1Bytes(items) => items,
560                EncodedBytes::Utf8Bytes(s) => s,
561            }
562            // For both cases as we tested the char being ascii we can safely convert to a single u8.
563            .starts_with(&[c as u8])
564        }
565    }
566
567    pub fn starts_with_str(&self, needle: &str) -> bool {
568        self.make_rust();
569        self.str().starts_with(needle)
570    }
571
572    pub fn contains(&self, needle: &str) -> bool {
573        self.make_rust();
574        self.str().contains(needle)
575    }
576
577    pub fn to_ascii_lowercase(&self) -> String {
578        let conversion = match self.view().encoded_bytes() {
579            EncodedBytes::Latin1Bytes(items) => {
580                if items.iter().all(|c| *c <= ASCII_END) {
581                    // We are just simple ascii
582                    Some(unsafe {
583                        String::from_utf8_unchecked(
584                            items
585                                .iter()
586                                .map(|c| {
587                                    if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
588                                        c + 32
589                                    } else {
590                                        *c
591                                    }
592                                })
593                                .collect(),
594                        )
595                    })
596                } else {
597                    None
598                }
599            },
600            EncodedBytes::Utf8Bytes(s) => unsafe {
601                // Save because we know it was a utf8 string
602                Some(str::from_utf8_unchecked(s).to_ascii_lowercase())
603            },
604        };
605        // We otherwise would double borrow the refcell
606        if let Some(conversion) = conversion {
607            conversion
608        } else {
609            self.make_rust();
610            self.str().to_ascii_lowercase()
611        }
612    }
613
614    fn contains_space_characters(
615        &self,
616        latin1_characters: &'static [u8],
617        utf8_characters: &'static [char],
618    ) -> bool {
619        match self.view().encoded_bytes() {
620            EncodedBytes::Latin1Bytes(items) => {
621                latin1_characters.iter().any(|byte| items.contains(byte))
622            },
623            EncodedBytes::Utf8Bytes(s) => {
624                // Save because we know it was a utf8 string
625                let s = unsafe { str::from_utf8_unchecked(s) };
626                s.contains(utf8_characters)
627            },
628        }
629    }
630
631    /// <https://infra.spec.whatwg.org/#ascii-tab-or-newline>
632    pub fn contains_tab_or_newline(&self) -> bool {
633        const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
634        const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
635
636        self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
637    }
638
639    /// <https://infra.spec.whatwg.org/#ascii-whitespace>
640    pub fn contains_html_space_characters(&self) -> bool {
641        const SPACE_BYTES: [u8; 5] = [
642            ASCII_TAB,
643            ASCII_NEWLINE,
644            ASCII_FORMFEED,
645            ASCII_CR,
646            ASCII_SPACE,
647        ];
648        self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
649    }
650
651    /// This returns the string in utf8 bytes, i.e., `[u8]` encoded with utf8.
652    pub fn as_bytes(&self) -> BytesView<'_> {
653        // BytesView will just give the raw bytes on dereference.
654        // If we are ascii this is the same for latin1 and utf8.
655        // Otherwise we convert to rust.
656        if self.is_ascii() {
657            BytesView(self.0.borrow())
658        } else {
659            self.make_rust();
660            BytesView(self.0.borrow())
661        }
662    }
663
664    /// Tests if there are only ascii lowercase characters. Does not include special characters.
665    pub fn is_ascii_lowercase(&self) -> bool {
666        match self.view().encoded_bytes() {
667            EncodedBytes::Latin1Bytes(items) => items
668                .iter()
669                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
670            EncodedBytes::Utf8Bytes(s) => s
671                .iter()
672                .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
673                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
674        }
675    }
676
677    /// Is the string only ascii characters
678    pub fn is_ascii(&self) -> bool {
679        match self.view().encoded_bytes() {
680            EncodedBytes::Latin1Bytes(items) => items,
681            EncodedBytes::Utf8Bytes(items) => items,
682        }
683        .is_ascii()
684    }
685
686    /// Returns true if the slice only contains bytes that are safe to use in cookie strings.
687    /// <https://www.ietf.org/archive/id/draft-ietf-httpbis-rfc6265bis-15.html#section-5.6-6>
688    /// Not using ServoCookie::is_valid_name_or_value to prevent dependency on the net crate.
689    pub fn is_valid_for_cookie(&self) -> bool {
690        match self.view().encoded_bytes() {
691            EncodedBytes::Latin1Bytes(items) | EncodedBytes::Utf8Bytes(items) => !items
692                .iter()
693                .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
694        }
695    }
696}
697
698/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
699pub fn parse_floating_point_number(input: &str) -> Option<f64> {
700    // Steps 15-16 are telling us things about IEEE rounding modes
701    // for floating-point significands; this code assumes the Rust
702    // compiler already matches them in any cases where
703    // that actually matters. They are not
704    // related to f64::round(), which is for rounding to integers.
705    input.trim().parse::<f64>().ok().filter(|value| {
706        // A valid number is the same as what rust considers to be valid,
707        // except for +1., NaN, and Infinity.
708        !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
709    })
710}
711
712pub struct BytesView<'a>(Ref<'a, DOMStringType>);
713
714impl Deref for BytesView<'_> {
715    type Target = [u8];
716
717    fn deref(&self) -> &Self::Target {
718        // This does the correct thing by the construction of BytesView in `DOMString::as_bytes`.
719        self.0.as_raw_bytes()
720    }
721}
722
723impl Ord for DOMString {
724    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
725        self.make_rust();
726        other.make_rust();
727        self.str().cmp(&other.str())
728    }
729}
730
731impl PartialOrd for DOMString {
732    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
733        self.make_rust();
734        other.make_rust();
735        self.str().partial_cmp(&other.str())
736    }
737}
738
739impl Extend<char> for DOMString {
740    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
741        self.make_rust();
742        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
743            s.extend(iter)
744        }
745    }
746}
747
748impl ToJSValConvertible for DOMString {
749    unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
750        let val = self.0.borrow();
751        match *val {
752            DOMStringType::Rust(ref s) => unsafe {
753                s.to_jsval(cx, rval);
754            },
755            DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
756                rval.set(StringValue(&*rooted_traceable_box.get()));
757            },
758            #[cfg(test)]
759            DOMStringType::Latin1Vec(ref items) => {
760                let mut v = vec![0; items.len() * 2];
761                let real_size =
762                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
763                v.truncate(real_size);
764
765                String::from_utf8(v)
766                    .expect("Error in constructin test string")
767                    .to_jsval(cx, rval);
768            },
769        };
770    }
771}
772
773impl std::hash::Hash for DOMString {
774    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
775        self.make_rust();
776        self.str().hash(state);
777    }
778}
779
780impl std::fmt::Display for DOMString {
781    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
782        self.make_rust();
783        fmt::Display::fmt(self.str().deref(), f)
784    }
785}
786
787impl Default for DOMString {
788    fn default() -> Self {
789        DOMString::new()
790    }
791}
792
793impl std::cmp::PartialEq<str> for DOMString {
794    fn eq(&self, other: &str) -> bool {
795        if other.is_ascii() {
796            other.as_bytes() ==
797                match self.view().encoded_bytes() {
798                    EncodedBytes::Latin1Bytes(items) => items,
799                    EncodedBytes::Utf8Bytes(s) => s,
800                }
801        } else {
802            self.make_rust();
803            self.str().deref() == other
804        }
805    }
806}
807
808impl std::cmp::PartialEq<&str> for DOMString {
809    fn eq(&self, other: &&str) -> bool {
810        if other.is_ascii() {
811            other.as_bytes() ==
812                match self.view().encoded_bytes() {
813                    EncodedBytes::Latin1Bytes(items) => items,
814                    EncodedBytes::Utf8Bytes(s) => s,
815                }
816        } else {
817            self.make_rust();
818            self.str().deref() == *other
819        }
820    }
821}
822
823impl std::cmp::PartialEq<String> for DOMString {
824    fn eq(&self, other: &String) -> bool {
825        if other.is_ascii() {
826            other.as_bytes() ==
827                match self.view().encoded_bytes() {
828                    EncodedBytes::Latin1Bytes(items) => items,
829                    EncodedBytes::Utf8Bytes(s) => s,
830                }
831        } else {
832            self.make_rust();
833            self.str().deref() == other
834        }
835    }
836}
837
838impl std::cmp::PartialEq<DOMString> for String {
839    fn eq(&self, other: &DOMString) -> bool {
840        other.eq(self)
841    }
842}
843
844impl std::cmp::PartialEq<DOMString> for str {
845    fn eq(&self, other: &DOMString) -> bool {
846        other.eq(self)
847    }
848}
849
850impl std::cmp::PartialEq for DOMString {
851    fn eq(&self, other: &DOMString) -> bool {
852        let result = match (self.view().encoded_bytes(), other.view().encoded_bytes()) {
853            (EncodedBytes::Latin1Bytes(items), EncodedBytes::Latin1Bytes(other_items)) => {
854                Some(items == other_items)
855            },
856            (EncodedBytes::Latin1Bytes(items), EncodedBytes::Utf8Bytes(other_s))
857                if other_s.is_ascii() =>
858            {
859                Some(items == other_s)
860            },
861            (EncodedBytes::Utf8Bytes(s), EncodedBytes::Latin1Bytes(other_items))
862                if s.is_ascii() =>
863            {
864                Some(s == other_items)
865            },
866            (EncodedBytes::Utf8Bytes(s), EncodedBytes::Utf8Bytes(other_s)) => Some(s == other_s),
867            _ => None,
868        };
869
870        if let Some(eq_result) = result {
871            eq_result
872        } else {
873            self.make_rust();
874            other.make_rust();
875            self.str() == other.str()
876        }
877    }
878}
879
880impl std::cmp::Eq for DOMString {}
881
882impl From<std::string::String> for DOMString {
883    fn from(value: String) -> Self {
884        DOMString::from_string(value)
885    }
886}
887
888impl From<DOMString> for LocalName {
889    fn from(contents: DOMString) -> LocalName {
890        {
891            let view = contents.view();
892            let bytes = view.encoded_bytes();
893            let str = match bytes {
894                EncodedBytes::Latin1Bytes(items) => {
895                    if items.iter().all(|c| c.is_ascii()) {
896                        unsafe { Some(str::from_utf8_unchecked(items)) }
897                    } else {
898                        None
899                    }
900                },
901                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
902            };
903            if let Some(s) = str {
904                return LocalName::from(s);
905            }
906        }
907        contents.make_rust();
908        LocalName::from(contents.str().deref())
909    }
910}
911
912impl From<&DOMString> for LocalName {
913    fn from(contents: &DOMString) -> LocalName {
914        {
915            let view = contents.view();
916            let bytes = view.encoded_bytes();
917            let str = match bytes {
918                EncodedBytes::Latin1Bytes(items) => {
919                    if items.iter().all(|c| c.is_ascii()) {
920                        // This is safe as the string is ascii and it comes from a DOMString
921                        unsafe { Some(str::from_utf8_unchecked(items)) }
922                    } else {
923                        None
924                    }
925                },
926                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
927            };
928            if let Some(s) = str {
929                return LocalName::from(s);
930            }
931        }
932        contents.make_rust();
933        LocalName::from(contents.str().deref())
934    }
935}
936
937impl From<DOMString> for Namespace {
938    fn from(contents: DOMString) -> Namespace {
939        {
940            let view = contents.view();
941            let bytes = view.encoded_bytes();
942            let str = match bytes {
943                EncodedBytes::Latin1Bytes(items) => {
944                    if items.iter().all(|c| c.is_ascii()) {
945                        // This is safe as the string is ascii and it comes from a DOMString
946                        unsafe { Some(str::from_utf8_unchecked(items)) }
947                    } else {
948                        None
949                    }
950                },
951                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
952            };
953            if let Some(s) = str {
954                return Namespace::from(s);
955            }
956        }
957        contents.make_rust();
958        Namespace::from(contents.str().deref())
959    }
960}
961
962impl From<DOMString> for Atom {
963    fn from(contents: DOMString) -> Atom {
964        {
965            let view = contents.view();
966            let bytes = view.encoded_bytes();
967            let str = match bytes {
968                EncodedBytes::Latin1Bytes(items) => {
969                    if items.iter().all(|c| c.is_ascii()) {
970                        // Safety: The string only has ascii chars, hence this is ok.
971                        unsafe { Some(str::from_utf8_unchecked(items)) }
972                    } else {
973                        None
974                    }
975                },
976                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
977            };
978            if let Some(s) = str {
979                return Atom::from(s);
980            }
981        }
982        contents.make_rust();
983        Atom::from(contents.str().deref())
984    }
985}
986
987impl From<&str> for DOMString {
988    fn from(contents: &str) -> DOMString {
989        DOMString(RefCell::new(DOMStringType::Rust(String::from(contents))))
990    }
991}
992
993impl From<DOMString> for String {
994    fn from(val: DOMString) -> Self {
995        val.make_rust();
996        val.str().to_owned()
997    }
998}
999
1000impl From<DOMString> for Vec<u8> {
1001    fn from(value: DOMString) -> Self {
1002        value.make_rust();
1003        value.str().as_bytes().to_vec()
1004    }
1005}
1006
1007impl From<Cow<'_, str>> for DOMString {
1008    fn from(value: Cow<'_, str>) -> Self {
1009        DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
1010    }
1011}
1012
1013#[macro_export]
1014macro_rules! match_domstring_ascii_inner {
1015    ($variant: expr, $input: expr, $p: literal => $then: expr, $($rest:tt)*) => {
1016        if {
1017            debug_assert!(($p).is_ascii());
1018            $variant($p.as_bytes())
1019        } == $input {
1020          $then
1021        } else {
1022            match_domstring_ascii_inner!($variant, $input, $($rest)*)
1023        }
1024
1025    };
1026    ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
1027        match $input {
1028            $p => $then
1029        }
1030    }
1031}
1032
1033/// Use this to match &str against lazydomstring efficiently.
1034/// You are only allowed to match ascii strings otherwise this macro will
1035/// lead to wrong results.
1036/// ```ignore
1037/// let s = DOMString::from_string(String::from("test"));
1038/// let value = match_domstring!(s,
1039/// "test1" => 1,
1040/// "test2" => 2,
1041/// "test" => 3,
1042/// _ => 4,
1043/// );
1044/// assert_eq!(value, 3);
1045/// ```
1046#[macro_export]
1047macro_rules! match_domstring_ascii {
1048    ($input:expr, $($tail:tt)*) => {
1049        {
1050            use $crate::match_domstring_ascii_inner;
1051            use $crate::domstring::EncodedBytes;
1052
1053            let view = $input.view();
1054            let s = view.encoded_bytes();
1055            if matches!(s, EncodedBytes::Latin1Bytes(_)) {
1056                match_domstring_ascii_inner!(EncodedBytes::Latin1Bytes, s, $($tail)*)
1057            } else {
1058                match_domstring_ascii_inner!(EncodedBytes::Utf8Bytes, s, $($tail)*)
1059            }
1060        }
1061    };
1062}
1063
1064#[cfg(test)]
1065mod tests {
1066    use super::*;
1067
1068    const LATIN1_PILLCROW: u8 = 0xB6;
1069    const UTF8_PILLCROW: [u8; 2] = [194, 182];
1070    const LATIN1_POWER2: u8 = 0xB2;
1071
1072    fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1073        DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1074    }
1075
1076    #[test]
1077    fn string_functions() {
1078        let s = DOMString::from("AbBcC❤&%$#");
1079        let s_copy = s.clone();
1080        assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1081        assert_eq!(s, s_copy);
1082        assert_eq!(s.len(), 12);
1083        assert_eq!(s_copy.len(), 12);
1084        assert!(s.starts_with('A'));
1085        let s2 = DOMString::from("");
1086        assert!(s2.is_empty());
1087    }
1088
1089    #[test]
1090    fn string_functions_latin1() {
1091        {
1092            let s = from_latin1(vec![
1093                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1094            ]);
1095            assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1096        }
1097        {
1098            let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1099            assert_eq!(s.to_ascii_lowercase(), "abbcc");
1100        }
1101        {
1102            let s = from_latin1(vec![
1103                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1104            ]);
1105            assert_eq!(s.len(), 11);
1106            assert!(s.starts_with('A'));
1107        }
1108        {
1109            let s = from_latin1(vec![]);
1110            assert!(s.is_empty());
1111        }
1112    }
1113
1114    #[test]
1115    fn test_length() {
1116        let s1 = from_latin1(vec![
1117            0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1118            0xAE, 0xAF,
1119        ]);
1120        let s2 = from_latin1(vec![
1121            0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1122            0xBE, 0xBF,
1123        ]);
1124        let s3 = from_latin1(vec![
1125            0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1126            0xCE, 0xCF,
1127        ]);
1128        let s4 = from_latin1(vec![
1129            0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1130            0xDE, 0xDF,
1131        ]);
1132        let s5 = from_latin1(vec![
1133            0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1134            0xEE, 0xEF,
1135        ]);
1136        let s6 = from_latin1(vec![
1137            0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1138            0xFE, 0xFF,
1139        ]);
1140
1141        let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1142        let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1143        let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1144        let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1145        let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1146        let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1147
1148        assert_eq!(s1.len(), s1_utf8.len());
1149        assert_eq!(s2.len(), s2_utf8.len());
1150        assert_eq!(s3.len(), s3_utf8.len());
1151        assert_eq!(s4.len(), s4_utf8.len());
1152        assert_eq!(s5.len(), s5_utf8.len());
1153        assert_eq!(s6.len(), s6_utf8.len());
1154
1155        s1.make_rust();
1156        s2.make_rust();
1157        s3.make_rust();
1158        s4.make_rust();
1159        s5.make_rust();
1160        s6.make_rust();
1161        assert_eq!(s1.len(), s1_utf8.len());
1162        assert_eq!(s2.len(), s2_utf8.len());
1163        assert_eq!(s3.len(), s3_utf8.len());
1164        assert_eq!(s4.len(), s4_utf8.len());
1165        assert_eq!(s5.len(), s5_utf8.len());
1166        assert_eq!(s6.len(), s6_utf8.len());
1167    }
1168
1169    #[test]
1170    fn test_convert() {
1171        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1172        s.make_rust();
1173        assert_eq!(&*s.str(), "abc%$");
1174    }
1175
1176    #[test]
1177    fn partial_eq() {
1178        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1179        let string = String::from("abc%$");
1180        let s2 = DOMString::from_string(string.clone());
1181        assert_eq!(s, s2);
1182        assert_eq!(s, string);
1183    }
1184
1185    #[test]
1186    fn encoded_bytes() {
1187        let bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1188        let s = from_latin1(bytes.clone());
1189        if let EncodedBytes::Latin1Bytes(s) = s.view().encoded_bytes() {
1190            assert_eq!(s, bytes)
1191        }
1192    }
1193
1194    #[test]
1195    fn testing_stringview() {
1196        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1197
1198        assert_eq!(
1199            s.str().chars().collect::<Vec<char>>(),
1200            vec!['a', 'b', 'c', '%', '$', '²']
1201        );
1202        assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1203    }
1204
1205    // We need to be extra careful here as two strings that have different
1206    // representation need to have the same hash.
1207    // Additionally, the interior mutability is only used for the conversion
1208    // which is forced by Hash. Hence, it is safe to have this interior mutability.
1209    #[test]
1210    fn test_hash() {
1211        use std::hash::{DefaultHasher, Hash, Hasher};
1212        fn hash_value(d: &DOMString) -> u64 {
1213            let mut hasher = DefaultHasher::new();
1214            d.hash(&mut hasher);
1215            hasher.finish()
1216        }
1217
1218        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1219        let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1220        s_converted.make_rust();
1221        let s2 = DOMString::from_string(String::from("abc%$²"));
1222
1223        let hash_s = hash_value(&s);
1224        let hash_s_converted = hash_value(&s_converted);
1225        let hash_s2 = hash_value(&s2);
1226
1227        assert_eq!(hash_s, hash_s2);
1228        assert_eq!(hash_s, hash_s_converted);
1229    }
1230
1231    // Testing match_lazydomstring if it executes the statements in the match correctly
1232    #[test]
1233    fn test_match_executing() {
1234        // executing
1235        {
1236            let s = from_latin1(vec![b'a', b'b', b'c']);
1237            match_domstring_ascii!( s,
1238                "abc" => assert!(true),
1239                "bcd" => assert!(false),
1240                _ =>  (),
1241            );
1242        }
1243
1244        {
1245            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1246            match_domstring_ascii!( s,
1247                "abc/" => assert!(true),
1248                "bcd" => assert!(false),
1249                _ =>  (),
1250            );
1251        }
1252
1253        {
1254            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1255            match_domstring_ascii!( s,
1256                "bcd" => assert!(false),
1257                "abc%$" => assert!(true),
1258                _ => (),
1259            );
1260        }
1261
1262        {
1263            let s = DOMString::from_string(String::from("abcde"));
1264            match_domstring_ascii!( s,
1265                "abc" => assert!(false),
1266                "bcd" => assert!(false),
1267                _ => assert!(true),
1268            );
1269        }
1270        {
1271            let s = DOMString::from_string(String::from("abc%$"));
1272            match_domstring_ascii!( s,
1273                "bcd" => assert!(false),
1274                "abc%$" => assert!(true),
1275                _ =>  (),
1276            );
1277        }
1278        {
1279            let s = from_latin1(vec![b'a', b'b', b'c']);
1280            match_domstring_ascii!( s,
1281                "abcdd" => assert!(false),
1282                "bcd" => assert!(false),
1283                _ => (),
1284            );
1285        }
1286    }
1287
1288    // Testing match_lazydomstring if it evaluates to the correct expression
1289    #[test]
1290    fn test_match_returning_result() {
1291        {
1292            let s = from_latin1(vec![b'a', b'b', b'c']);
1293            let res = match_domstring_ascii!( s,
1294                "abc" => true,
1295                "bcd" => false,
1296                _ => false,
1297            );
1298            assert_eq!(res, true);
1299        }
1300        {
1301            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1302            let res = match_domstring_ascii!( s,
1303                "abc/" => true,
1304                "bcd" => false,
1305                _ => false,
1306            );
1307            assert_eq!(res, true);
1308        }
1309        {
1310            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1311            let res = match_domstring_ascii!( s,
1312                "bcd" => false,
1313                "abc%$" => true,
1314                _ => false,
1315            );
1316            assert_eq!(res, true);
1317        }
1318
1319        {
1320            let s = DOMString::from_string(String::from("abcde"));
1321            let res = match_domstring_ascii!( s,
1322                "abc" => false,
1323                "bcd" => false,
1324                _ => true,
1325            );
1326            assert_eq!(res, true);
1327        }
1328        {
1329            let s = DOMString::from_string(String::from("abc%$"));
1330            let res = match_domstring_ascii!( s,
1331                "bcd" => false,
1332                "abc%$" => true,
1333                _ => false,
1334            );
1335            assert_eq!(res, true);
1336        }
1337        {
1338            let s = from_latin1(vec![b'a', b'b', b'c']);
1339            let res = match_domstring_ascii!( s,
1340                "abcdd" => false,
1341                "bcd" => false,
1342                _ => true,
1343            );
1344            assert_eq!(res, true);
1345        }
1346    }
1347
1348    #[test]
1349    #[should_panic]
1350    fn test_match_panic() {
1351        let s = DOMString::from_string(String::from("abcd"));
1352        let _res = match_domstring_ascii!(s,
1353            "❤" => true,
1354            _ => false,);
1355    }
1356
1357    #[test]
1358    #[should_panic]
1359    fn test_match_panic2() {
1360        let s = DOMString::from_string(String::from("abcd"));
1361        let _res = match_domstring_ascii!(s,
1362            "abc" => false,
1363            "❤" => true,
1364            _ => false,
1365        );
1366    }
1367
1368    #[test]
1369    fn test_strip_whitespace() {
1370        {
1371            let mut s = from_latin1(vec![
1372                b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1373            ]);
1374
1375            s.strip_leading_and_trailing_ascii_whitespace();
1376            s.make_rust();
1377            assert_eq!(&*s.str(), "abc%$²");
1378        }
1379        {
1380            let mut s = DOMString::from_string(String::from("   \n  abc%$ "));
1381
1382            s.strip_leading_and_trailing_ascii_whitespace();
1383            s.make_rust();
1384            assert_eq!(&*s.str(), "abc%$");
1385        }
1386    }
1387
1388    // https://infra.spec.whatwg.org/#ascii-whitespace
1389    #[test]
1390    fn contains_html_space_characters() {
1391        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); // TAB
1392        assert!(s.contains_html_space_characters());
1393        s.make_rust();
1394        assert!(s.contains_html_space_characters());
1395
1396        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); // NEWLINE
1397        assert!(s.contains_html_space_characters());
1398        s.make_rust();
1399        assert!(s.contains_html_space_characters());
1400
1401        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); // FF
1402        assert!(s.contains_html_space_characters());
1403        s.make_rust();
1404        assert!(s.contains_html_space_characters());
1405
1406        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); // Carriage Return
1407        assert!(s.contains_html_space_characters());
1408        s.make_rust();
1409        assert!(s.contains_html_space_characters());
1410
1411        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); // SPACE
1412        assert!(s.contains_html_space_characters());
1413        s.make_rust();
1414        assert!(s.contains_html_space_characters());
1415
1416        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1417        assert!(!s.contains_html_space_characters());
1418        s.make_rust();
1419        assert!(!s.contains_html_space_characters());
1420    }
1421
1422    #[test]
1423    fn atom() {
1424        let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1425        let atom1 = Atom::from(s);
1426        let s2 = DOMString::from_string(String::from("aaa aa"));
1427        let atom2 = Atom::from(s2);
1428        assert_eq!(atom1, atom2);
1429        let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1430        let atom3 = Atom::from(s3);
1431        assert_ne!(atom1, atom3);
1432    }
1433
1434    #[test]
1435    fn namespace() {
1436        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1437        let atom1 = Namespace::from(s);
1438        let s2 = DOMString::from_string(String::from("aaa aa"));
1439        let atom2 = Namespace::from(s2);
1440        assert_eq!(atom1, atom2);
1441        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1442        let atom3 = Namespace::from(s3);
1443        assert_ne!(atom1, atom3);
1444    }
1445
1446    #[test]
1447    fn localname() {
1448        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1449        let atom1 = LocalName::from(s);
1450        let s2 = DOMString::from_string(String::from("aaa aa"));
1451        let atom2 = LocalName::from(s2);
1452        assert_eq!(atom1, atom2);
1453        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1454        let atom3 = LocalName::from(s3);
1455        assert_ne!(atom1, atom3);
1456    }
1457
1458    #[test]
1459    fn is_ascii_lowercase() {
1460        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1461        assert!(!s.is_ascii_lowercase());
1462        let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1463        assert!(!s.is_ascii_lowercase());
1464        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1465        assert!(s.is_ascii_lowercase());
1466        let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1467        assert!(!s.is_ascii_lowercase());
1468        let s = DOMString::from_string(String::from("`aaaz"));
1469        assert!(!s.is_ascii_lowercase());
1470        let s = DOMString::from_string(String::from("aaaz"));
1471        assert!(s.is_ascii_lowercase());
1472    }
1473
1474    #[test]
1475    fn test_as_bytes() {
1476        const ASCII_SMALL_A: u8 = b'a';
1477        const ASCII_SMALL_Z: u8 = b'z';
1478
1479        let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1480        let s = from_latin1(v1.clone());
1481        assert_eq!(
1482            *s.as_bytes(),
1483            [
1484                ASCII_SMALL_A,
1485                ASCII_SMALL_A,
1486                ASCII_SMALL_A,
1487                UTF8_PILLCROW[0],
1488                UTF8_PILLCROW[1],
1489                ASCII_SMALL_A,
1490                ASCII_SMALL_A
1491            ]
1492        );
1493
1494        let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1495        let s = from_latin1(v2.clone());
1496        assert_eq!(
1497            *s.as_bytes(),
1498            [
1499                ASCII_SMALL_A,
1500                ASCII_SMALL_A,
1501                ASCII_SMALL_A,
1502                ASCII_SMALL_A,
1503                ASCII_SMALL_Z
1504            ]
1505        );
1506
1507        let str = "abc%$²".to_owned();
1508        let s = DOMString::from(str.clone());
1509        assert_eq!(&*s.as_bytes(), str.as_bytes());
1510        let str = "AbBcC❤&%$#".to_owned();
1511        let s = DOMString::from(str.clone());
1512        assert_eq!(&*s.as_bytes(), str.as_bytes());
1513    }
1514}