script_bindings/
domstring.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::{Chars, FromStr};
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
16use html5ever::{LocalName, Namespace};
17use js::conversions::{ToJSValConvertible, jsstr_to_string};
18use js::gc::MutableHandleValue;
19use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
20use js::jsval::StringValue;
21use js::rust::{Runtime, Trace};
22use malloc_size_of::MallocSizeOfOps;
23use num_traits::{ToPrimitive, Zero};
24use regex::Regex;
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42/// Gets the latin1 bytes from the js engine.
43/// Safety: Make sure the *mut JSString is not null.
44unsafe fn get_latin1_string_bytes(
45    rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47    debug_assert!(!rooted_traceable_box.get().is_null());
48    let mut length = 0;
49    unsafe {
50        let chars = JS_GetLatin1StringCharsAndLength(
51            Runtime::get().expect("JS runtime has shut down").as_ptr(),
52            ptr::null(),
53            rooted_traceable_box.get(),
54            &mut length,
55        );
56        assert!(!chars.is_null());
57        slice::from_raw_parts(chars, length)
58    }
59}
60
61#[derive(Debug, PartialEq, Eq)]
62/// A type representing the underlying encoded bytes. Either Latin1 or Utf8.
63pub enum EncodedBytes<'a> {
64    /// These bytes are Latin1 encoded.
65    Latin1Bytes(&'a [u8]),
66    /// This is a normal utf8 string given in bytes.
67    Utf8Bytes(&'a [u8]),
68}
69
70enum DOMStringType {
71    /// A simple rust string
72    Rust(String),
73    /// A JS String stored in mozjs.
74    JSString(RootedTraceableBox<Heap<*mut JSString>>),
75    #[cfg(test)]
76    /// This is used for testing of the bindings to give
77    /// a raw u8 Latin1 encoded string without having a js engine.
78    Latin1Vec(Vec<u8>),
79}
80
81impl DOMStringType {
82    /// Returns the str if Rust and otherwise panic. You need to call `make_rust`.
83    fn str(&self) -> &str {
84        match self {
85            DOMStringType::Rust(s) => s,
86            DOMStringType::JSString(_rooted_traceable_box) => {
87                panic!("Cannot do a string")
88            },
89            #[cfg(test)]
90            &DOMStringType::Latin1Vec(_) => panic!("Cannot do a string"),
91        }
92    }
93
94    /// Warning:
95    /// This function does not checking and just returns the raw bytes of teh string,
96    /// independently if they are  utf8 or latin1.
97    /// The caller needs to take care that these make sense in context.
98    fn as_raw_bytes(&self) -> &[u8] {
99        match self {
100            DOMStringType::Rust(s) => s.as_bytes(),
101            DOMStringType::JSString(rooted_traceable_box) => unsafe {
102                get_latin1_string_bytes(rooted_traceable_box)
103            },
104            #[cfg(test)]
105            DOMStringType::Latin1Vec(items) => items,
106        }
107    }
108}
109
110#[derive(Debug)]
111/// A view of the underlying string. This is always converted to Utf8.
112pub struct StringView<'a>(Ref<'a, DOMStringType>);
113
114impl<'a> StringView<'a> {
115    pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
116        self.0
117            .str()
118            .split(HTML_SPACE_CHARACTERS)
119            .filter(|s| !s.is_empty())
120    }
121
122    pub fn strip_prefix(&self, needle: &str) -> Option<&str> {
123        self.0.str().strip_prefix(needle)
124    }
125
126    pub fn chars(&self) -> Chars<'_> {
127        self.0.str().chars()
128    }
129
130    pub fn as_bytes(&self) -> &[u8] {
131        self.0.str().as_bytes()
132    }
133}
134
135impl Deref for StringView<'_> {
136    type Target = str;
137    fn deref(&self) -> &str {
138        self.0.str()
139    }
140}
141
142impl AsRef<str> for StringView<'_> {
143    fn as_ref(&self) -> &str {
144        self.deref()
145    }
146}
147
148impl PartialEq for StringView<'_> {
149    fn eq(&self, other: &Self) -> bool {
150        self.0.str() == other.0.str()
151    }
152}
153
154impl PartialEq<&str> for StringView<'_> {
155    fn eq(&self, other: &&str) -> bool {
156        self.0.str() == *other
157    }
158}
159
160impl Eq for StringView<'_> {}
161
162impl PartialOrd for StringView<'_> {
163    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
164        self.0.str().partial_cmp(other.0.str())
165    }
166}
167
168impl Ord for StringView<'_> {
169    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
170        self.0.str().cmp(other.0.str())
171    }
172}
173
174impl From<StringView<'_>> for String {
175    fn from(value: StringView<'_>) -> Self {
176        String::from(value.0.str())
177    }
178}
179
180/// Safety comment:
181///
182/// This method will _not_ trace the pointer if the rust string exists.
183/// The js string could be garbage collected and, hence, violating this
184/// could lead to undefined behavior
185unsafe impl Trace for DOMStringType {
186    unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
187        unsafe {
188            match self {
189                DOMStringType::Rust(_s) => {},
190                DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
191                #[cfg(test)]
192                DOMStringType::Latin1Vec(_s) => {},
193            }
194        }
195    }
196}
197
198impl malloc_size_of::MallocSizeOf for DOMStringType {
199    fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
200        match self {
201            DOMStringType::Rust(s) => s.size_of(ops),
202            DOMStringType::JSString(_rooted_traceable_box) => {
203                // Managed by JS Engine
204                0
205            },
206            #[cfg(test)]
207            DOMStringType::Latin1Vec(s) => s.size_of(ops),
208        }
209    }
210}
211
212impl std::fmt::Debug for DOMStringType {
213    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
214        match self {
215            DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
216            DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
217            #[cfg(test)]
218            DOMStringType::Latin1Vec(s) => f
219                .debug_struct("DOMString")
220                .field("latin1_string", s)
221                .finish(),
222        }
223    }
224}
225
226#[derive(Debug)]
227/// A view of the underlying string. This is never converted to Utf8
228pub struct EncodedBytesView<'a>(Ref<'a, DOMStringType>);
229
230impl EncodedBytesView<'_> {
231    /// Get the bytes of the string in either latin1 or utf8 without costly conversion.
232    pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
233        match *self.0 {
234            DOMStringType::Rust(ref s) => EncodedBytes::Utf8Bytes(s.as_bytes()),
235            DOMStringType::JSString(ref rooted_traceable_box) => {
236                EncodedBytes::Latin1Bytes(unsafe { get_latin1_string_bytes(rooted_traceable_box) })
237            },
238            #[cfg(test)]
239            DOMStringType::Latin1Vec(ref s) => EncodedBytes::Latin1Bytes(s),
240        }
241    }
242
243    fn is_empty(&self) -> bool {
244        match self.encoded_bytes() {
245            EncodedBytes::Latin1Bytes(items) => items.is_empty(),
246            EncodedBytes::Utf8Bytes(s) => s.is_empty(),
247        }
248    }
249
250    fn len(&self) -> usize {
251        match self.encoded_bytes() {
252            EncodedBytes::Latin1Bytes(items) => items
253                .iter()
254                .map(|b| if *b <= ASCII_END { 1 } else { 2 })
255                .sum(),
256            EncodedBytes::Utf8Bytes(s) => s.len(),
257        }
258    }
259}
260
261////// A DOMString.
262///
263/// This type corresponds to the [`DOMString`] type in WebIDL.
264///
265/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
266///
267/// Conceptually, a DOMString has the same value space as a JavaScript String,
268/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
269/// unpaired surrogates present (also sometimes called WTF-16).
270///
271/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
272/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
273/// can not be represented as a Rust `String`). This introduces the question of
274/// what to do with values being passed from JavaScript to Rust that contain
275/// unpaired surrogates.
276///
277/// The hypothesis is that it does not matter much how exactly those values are
278/// transformed, because  passing unpaired surrogates into the DOM is very rare.
279/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
280/// character.
281///
282/// Currently, the lack of crash reports about this issue provides some
283/// evidence to support the hypothesis. This evidence will hopefully be used to
284/// convince other browser vendors that it would be safe to replace unpaired
285/// surrogates at the boundary between JavaScript and native code. (This would
286/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
287/// and in Servo.)
288///
289/// This string class will keep either the Reference to the mozjs object alive
290/// or will have an internal rust string.
291/// We currently default to doing most of the string operation on the rust side.
292/// You should use `str()` to get the Rust string (represented by a `StringView`
293/// which you can deref to a string). You should assume that this conversion costs.
294/// You should assume that all the functions incur the conversion cost.
295///
296#[repr(transparent)]
297#[derive(Debug, MallocSizeOf, JSTraceable)]
298pub struct DOMString(RefCell<DOMStringType>);
299
300impl Clone for DOMString {
301    fn clone(&self) -> Self {
302        self.make_rust();
303        if let DOMStringType::Rust(ref s) = *self.0.borrow() {
304            DOMString::from_string(s.to_owned())
305        } else {
306            unreachable!()
307        }
308    }
309}
310
311pub enum DOMStringErrorType {
312    JSConversionError,
313}
314
315impl DOMString {
316    /// Creates a new `DOMString`.
317    pub fn new() -> DOMString {
318        DOMString(RefCell::new(DOMStringType::Rust(String::new())))
319    }
320
321    /// Creates the string from js. If the string can be encoded in latin1, just take the reference
322    /// to the JSString. Otherwise do the conversion to utf8 now.
323    pub fn from_js_string(
324        cx: SafeJSContext,
325        value: js::gc::HandleValue,
326    ) -> Result<DOMString, DOMStringErrorType> {
327        let string_ptr = unsafe { js::rust::ToString(*cx, value) };
328        if string_ptr.is_null() {
329            debug!("ToString failed");
330            Err(DOMStringErrorType::JSConversionError)
331        } else {
332            let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
333            let inner = if latin1 {
334                let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
335                DOMStringType::JSString(h)
336            } else {
337                // We need to convert the string anyway as it is not just latin1
338                DOMStringType::Rust(unsafe {
339                    jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
340                })
341            };
342            Ok(DOMString(RefCell::new(inner)))
343        }
344    }
345
346    pub fn from_string(s: String) -> DOMString {
347        DOMString(RefCell::new(DOMStringType::Rust(s)))
348    }
349
350    /// Transforms the string into rust string if not yet a rust string.
351    fn make_rust(&self) {
352        let string = {
353            let inner = self.0.borrow();
354            match *inner {
355                DOMStringType::Rust(_) => return,
356                DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
357                    jsstr_to_string(
358                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
359                        NonNull::new(rooted_traceable_box.get()).unwrap(),
360                    )
361                },
362                #[cfg(test)]
363                DOMStringType::Latin1Vec(ref items) => {
364                    let mut v = vec![0; items.len() * 2];
365                    let real_size = encoding_rs::mem::convert_latin1_to_utf8(
366                        items.as_slice(),
367                        v.as_mut_slice(),
368                    );
369                    v.truncate(real_size);
370
371                    // Safety: convert_latin1_to_utf8 converts the raw bytes to utf8 and the
372                    // buffer is the size specified in the documentation, so this should be safe.
373                    unsafe { String::from_utf8_unchecked(v) }
374                },
375            }
376        };
377        *self.0.borrow_mut() = DOMStringType::Rust(string);
378    }
379
380    /// Debug the current  state of the string without modifying it.
381    #[expect(unused)]
382    fn debug_js(&self) {
383        match *self.0.borrow() {
384            DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
385            DOMStringType::JSString(ref rooted_traceable_box) => {
386                let s = unsafe {
387                    jsstr_to_string(
388                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
389                        ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
390                    )
391                };
392                info!("JSString ({})", s);
393            },
394            #[cfg(test)]
395            DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
396        }
397    }
398
399    /// Returns the underlying rust string.
400    pub fn str(&self) -> StringView<'_> {
401        self.make_rust();
402        StringView(self.0.borrow())
403    }
404
405    /// Use this if you want to work on the `EncodedBytes` directly.
406    /// This will not do any conversions for you.
407    pub fn view(&self) -> EncodedBytesView<'_> {
408        EncodedBytesView(self.0.borrow())
409    }
410
411    pub fn clear(&mut self) {
412        *self.0.borrow_mut() = DOMStringType::Rust(String::new())
413    }
414
415    pub fn is_empty(&self) -> bool {
416        self.view().is_empty()
417    }
418
419    /// The length of this string in UTF-8 code units, each one being one byte in size.
420    ///
421    /// Note: This is different than the number of Unicode characters (or code points). A
422    /// character may require multiple UTF-8 code units.
423    pub fn len(&self) -> usize {
424        self.view().len()
425    }
426
427    /// The length of this string in UTF-8 code units, each one being one byte in size.
428    /// This method is the same as [`DOMString::len`], but the result is wrapped in a
429    /// `Utf8CodeUnitLength` to be used in code that mixes different kinds of offsets.
430    ///
431    /// Note: This is different than the number of Unicode characters (or code points). A
432    /// character may require multiple UTF-8 code units.
433    pub fn len_utf8(&self) -> Utf8CodeUnitLength {
434        Utf8CodeUnitLength(self.len())
435    }
436
437    /// The length of this string in UTF-16 code units, each one being one two bytes in size.
438    ///
439    /// Note: This is different than the number of Unicode characters (or code points). A
440    /// character may require multiple UTF-16 code units.
441    pub fn len_utf16(&self) -> Utf16CodeUnitLength {
442        Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
443    }
444
445    pub fn make_ascii_lowercase(&mut self) {
446        self.make_rust();
447        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
448            s.make_ascii_lowercase();
449        }
450    }
451
452    pub fn push_str(&mut self, s: &str) {
453        self.make_rust();
454        if let DOMStringType::Rust(ref mut string) = *self.0.borrow_mut() {
455            string.push_str(s)
456        }
457    }
458
459    pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
460        if self.is_empty() {
461            return;
462        }
463
464        self.make_rust();
465        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
466            let trailing_whitespace_len = s
467                .trim_end_matches(|ref c| char::is_ascii_whitespace(c))
468                .len();
469            s.truncate(trailing_whitespace_len);
470            if s.is_empty() {
471                return;
472            }
473
474            let first_non_whitespace = s.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
475            s.replace_range(0..first_non_whitespace, "");
476        }
477    }
478
479    /// This is a dom spec
480    pub fn is_valid_floating_point_number_string(&self) -> bool {
481        static RE: LazyLock<Regex> = LazyLock::new(|| {
482            Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
483        });
484        self.make_rust();
485
486        if let DOMStringType::Rust(ref s) = *self.0.borrow() {
487            RE.is_match(s) && self.parse_floating_point_number().is_some()
488        } else {
489            unreachable!()
490        }
491    }
492
493    pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
494        self.make_rust();
495        self.str().parse::<T>()
496    }
497
498    /// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
499    pub fn parse_floating_point_number(&self) -> Option<f64> {
500        self.make_rust();
501        parse_floating_point_number(&self.str())
502    }
503
504    /// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
505    pub fn set_best_representation_of_the_floating_point_number(&mut self) {
506        if let Some(val) = self.parse_floating_point_number() {
507            // [tc39] Step 2: If x is either +0 or -0, return "0".
508            let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
509
510            *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
511        }
512    }
513
514    pub fn to_lowercase(&self) -> String {
515        self.make_rust();
516        self.str().to_lowercase()
517    }
518
519    pub fn to_uppercase(&self) -> String {
520        self.make_rust();
521        self.str().to_uppercase()
522    }
523
524    pub fn strip_newlines(&mut self) {
525        // > To strip newlines from a string, remove any U+000A LF and U+000D CR code
526        // > points from the string.
527        self.make_rust();
528        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
529            s.retain(|c| c != '\r' && c != '\n');
530        }
531    }
532
533    /// Normalize newlines according to <https://infra.spec.whatwg.org/#normalize-newlines>.
534    pub fn normalize_newlines(&mut self) {
535        self.make_rust();
536        // > To normalize newlines in a string, replace every U+000D CR U+000A LF code point
537        // > pair with a single U+000A LF code point, and then replace every remaining
538        // > U+000D CR code point with a U+000A LF code point.
539        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
540            *s = s.replace("\r\n", "\n").replace("\r", "\n")
541        }
542    }
543
544    pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
545        self.make_rust();
546        let new_string = self.str().to_owned();
547        DOMString(RefCell::new(DOMStringType::Rust(
548            new_string.replace(needle, replace_char),
549        )))
550    }
551
552    /// Pattern is not yet stable in rust, hence, we need different methods for str and char
553    pub fn starts_with(&self, c: char) -> bool {
554        if !c.is_ascii() {
555            self.make_rust();
556            self.str().starts_with(c)
557        } else {
558            match self.view().encoded_bytes() {
559                EncodedBytes::Latin1Bytes(items) => items,
560                EncodedBytes::Utf8Bytes(s) => s,
561            }
562            // For both cases as we tested the char being ascii we can safely convert to a single u8.
563            .starts_with(&[c as u8])
564        }
565    }
566
567    pub fn starts_with_str(&self, needle: &str) -> bool {
568        self.make_rust();
569        self.str().starts_with(needle)
570    }
571
572    pub fn contains(&self, needle: &str) -> bool {
573        self.make_rust();
574        self.str().contains(needle)
575    }
576
577    pub fn to_ascii_lowercase(&self) -> String {
578        let conversion = match self.view().encoded_bytes() {
579            EncodedBytes::Latin1Bytes(items) => {
580                if items.iter().all(|c| *c <= ASCII_END) {
581                    // We are just simple ascii
582                    Some(unsafe {
583                        String::from_utf8_unchecked(
584                            items
585                                .iter()
586                                .map(|c| {
587                                    if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
588                                        c + 32
589                                    } else {
590                                        *c
591                                    }
592                                })
593                                .collect(),
594                        )
595                    })
596                } else {
597                    None
598                }
599            },
600            EncodedBytes::Utf8Bytes(s) => unsafe {
601                // Save because we know it was a utf8 string
602                Some(str::from_utf8_unchecked(s).to_ascii_lowercase())
603            },
604        };
605        // We otherwise would double borrow the refcell
606        if let Some(conversion) = conversion {
607            conversion
608        } else {
609            self.make_rust();
610            self.str().to_ascii_lowercase()
611        }
612    }
613
614    pub fn contains_html_space_characters(&self) -> bool {
615        const SPACE_BYTES: [u8; 5] = [
616            ASCII_TAB,
617            ASCII_NEWLINE,
618            ASCII_FORMFEED,
619            ASCII_CR,
620            ASCII_SPACE,
621        ];
622        match self.view().encoded_bytes() {
623            EncodedBytes::Latin1Bytes(items) => SPACE_BYTES.iter().any(|byte| items.contains(byte)),
624            EncodedBytes::Utf8Bytes(s) => {
625                // Save because we know it was a utf8 string
626                let s = unsafe { str::from_utf8_unchecked(s) };
627                s.contains(HTML_SPACE_CHARACTERS)
628            },
629        }
630    }
631
632    /// This returns the string in utf8 bytes, i.e., `[u8]` encoded with utf8.
633    pub fn as_bytes(&self) -> BytesView<'_> {
634        // BytesView will just give the raw bytes on dereference.
635        // If we are ascii this is the same for latin1 and utf8.
636        // Otherwise we convert to rust.
637        if self.is_ascii() {
638            BytesView(self.0.borrow())
639        } else {
640            self.make_rust();
641            BytesView(self.0.borrow())
642        }
643    }
644
645    /// Tests if there are only ascii lowercase characters. Does not include special characters.
646    pub fn is_ascii_lowercase(&self) -> bool {
647        match self.view().encoded_bytes() {
648            EncodedBytes::Latin1Bytes(items) => items
649                .iter()
650                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
651            EncodedBytes::Utf8Bytes(s) => s
652                .iter()
653                .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
654                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
655        }
656    }
657
658    /// Is the string only ascii characters
659    pub fn is_ascii(&self) -> bool {
660        match self.view().encoded_bytes() {
661            EncodedBytes::Latin1Bytes(items) => items,
662            EncodedBytes::Utf8Bytes(items) => items,
663        }
664        .is_ascii()
665    }
666
667    /// Returns true if the slice only contains bytes that are safe to use in cookie strings.
668    /// <https://www.ietf.org/archive/id/draft-ietf-httpbis-rfc6265bis-15.html#section-5.6-6>
669    /// Not using ServoCookie::is_valid_name_or_value to prevent dependency on the net crate.
670    pub fn is_valid_for_cookie(&self) -> bool {
671        match self.view().encoded_bytes() {
672            EncodedBytes::Latin1Bytes(items) | EncodedBytes::Utf8Bytes(items) => !items
673                .iter()
674                .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
675        }
676    }
677}
678
679/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
680pub fn parse_floating_point_number(input: &str) -> Option<f64> {
681    // Steps 15-16 are telling us things about IEEE rounding modes
682    // for floating-point significands; this code assumes the Rust
683    // compiler already matches them in any cases where
684    // that actually matters. They are not
685    // related to f64::round(), which is for rounding to integers.
686    input.trim().parse::<f64>().ok().filter(|value| {
687        // A valid number is the same as what rust considers to be valid,
688        // except for +1., NaN, and Infinity.
689        !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
690    })
691}
692
693pub struct BytesView<'a>(Ref<'a, DOMStringType>);
694
695impl Deref for BytesView<'_> {
696    type Target = [u8];
697
698    fn deref(&self) -> &Self::Target {
699        // This does the correct thing by the construction of BytesView in `DOMString::as_bytes`.
700        self.0.as_raw_bytes()
701    }
702}
703
704impl Ord for DOMString {
705    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
706        self.make_rust();
707        other.make_rust();
708        self.str().cmp(&other.str())
709    }
710}
711
712impl PartialOrd for DOMString {
713    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
714        self.make_rust();
715        other.make_rust();
716        self.str().partial_cmp(&other.str())
717    }
718}
719
720impl Extend<char> for DOMString {
721    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
722        self.make_rust();
723        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
724            s.extend(iter)
725        }
726    }
727}
728
729impl ToJSValConvertible for DOMString {
730    unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
731        let val = self.0.borrow();
732        match *val {
733            DOMStringType::Rust(ref s) => unsafe {
734                s.to_jsval(cx, rval);
735            },
736            DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
737                rval.set(StringValue(&*rooted_traceable_box.get()));
738            },
739            #[cfg(test)]
740            DOMStringType::Latin1Vec(ref items) => {
741                let mut v = vec![0; items.len() * 2];
742                let real_size =
743                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
744                v.truncate(real_size);
745
746                String::from_utf8(v)
747                    .expect("Error in constructin test string")
748                    .to_jsval(cx, rval);
749            },
750        };
751    }
752}
753
754impl std::hash::Hash for DOMString {
755    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
756        self.make_rust();
757        self.str().hash(state);
758    }
759}
760
761impl std::fmt::Display for DOMString {
762    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
763        self.make_rust();
764        fmt::Display::fmt(self.str().deref(), f)
765    }
766}
767
768impl Default for DOMString {
769    fn default() -> Self {
770        DOMString::new()
771    }
772}
773
774impl std::cmp::PartialEq<str> for DOMString {
775    fn eq(&self, other: &str) -> bool {
776        if other.is_ascii() {
777            other.as_bytes() ==
778                match self.view().encoded_bytes() {
779                    EncodedBytes::Latin1Bytes(items) => items,
780                    EncodedBytes::Utf8Bytes(s) => s,
781                }
782        } else {
783            self.make_rust();
784            self.str().deref() == other
785        }
786    }
787}
788
789impl std::cmp::PartialEq<&str> for DOMString {
790    fn eq(&self, other: &&str) -> bool {
791        if other.is_ascii() {
792            other.as_bytes() ==
793                match self.view().encoded_bytes() {
794                    EncodedBytes::Latin1Bytes(items) => items,
795                    EncodedBytes::Utf8Bytes(s) => s,
796                }
797        } else {
798            self.make_rust();
799            self.str().deref() == *other
800        }
801    }
802}
803
804impl std::cmp::PartialEq<String> for DOMString {
805    fn eq(&self, other: &String) -> bool {
806        if other.is_ascii() {
807            other.as_bytes() ==
808                match self.view().encoded_bytes() {
809                    EncodedBytes::Latin1Bytes(items) => items,
810                    EncodedBytes::Utf8Bytes(s) => s,
811                }
812        } else {
813            self.make_rust();
814            self.str().deref() == other
815        }
816    }
817}
818
819impl std::cmp::PartialEq<DOMString> for String {
820    fn eq(&self, other: &DOMString) -> bool {
821        other.eq(self)
822    }
823}
824
825impl std::cmp::PartialEq<DOMString> for str {
826    fn eq(&self, other: &DOMString) -> bool {
827        other.eq(self)
828    }
829}
830
831impl std::cmp::PartialEq for DOMString {
832    fn eq(&self, other: &DOMString) -> bool {
833        let result = match (self.view().encoded_bytes(), other.view().encoded_bytes()) {
834            (EncodedBytes::Latin1Bytes(items), EncodedBytes::Latin1Bytes(other_items)) => {
835                Some(items == other_items)
836            },
837            (EncodedBytes::Latin1Bytes(items), EncodedBytes::Utf8Bytes(other_s))
838                if other_s.is_ascii() =>
839            {
840                Some(items == other_s)
841            },
842            (EncodedBytes::Utf8Bytes(s), EncodedBytes::Latin1Bytes(other_items))
843                if s.is_ascii() =>
844            {
845                Some(s == other_items)
846            },
847            (EncodedBytes::Utf8Bytes(s), EncodedBytes::Utf8Bytes(other_s)) => Some(s == other_s),
848            _ => None,
849        };
850
851        if let Some(eq_result) = result {
852            eq_result
853        } else {
854            self.make_rust();
855            other.make_rust();
856            self.str() == other.str()
857        }
858    }
859}
860
861impl std::cmp::Eq for DOMString {}
862
863impl From<std::string::String> for DOMString {
864    fn from(value: String) -> Self {
865        DOMString::from_string(value)
866    }
867}
868
869impl From<DOMString> for LocalName {
870    fn from(contents: DOMString) -> LocalName {
871        {
872            let view = contents.view();
873            let bytes = view.encoded_bytes();
874            let str = match bytes {
875                EncodedBytes::Latin1Bytes(items) => {
876                    if items.iter().all(|c| c.is_ascii()) {
877                        unsafe { Some(str::from_utf8_unchecked(items)) }
878                    } else {
879                        None
880                    }
881                },
882                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
883            };
884            if let Some(s) = str {
885                return LocalName::from(s);
886            }
887        }
888        contents.make_rust();
889        LocalName::from(contents.str().deref())
890    }
891}
892
893impl From<&DOMString> for LocalName {
894    fn from(contents: &DOMString) -> LocalName {
895        {
896            let view = contents.view();
897            let bytes = view.encoded_bytes();
898            let str = match bytes {
899                EncodedBytes::Latin1Bytes(items) => {
900                    if items.iter().all(|c| c.is_ascii()) {
901                        // This is safe as the string is ascii and it comes from a DOMString
902                        unsafe { Some(str::from_utf8_unchecked(items)) }
903                    } else {
904                        None
905                    }
906                },
907                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
908            };
909            if let Some(s) = str {
910                return LocalName::from(s);
911            }
912        }
913        contents.make_rust();
914        LocalName::from(contents.str().deref())
915    }
916}
917
918impl From<DOMString> for Namespace {
919    fn from(contents: DOMString) -> Namespace {
920        {
921            let view = contents.view();
922            let bytes = view.encoded_bytes();
923            let str = match bytes {
924                EncodedBytes::Latin1Bytes(items) => {
925                    if items.iter().all(|c| c.is_ascii()) {
926                        // This is safe as the string is ascii and it comes from a DOMString
927                        unsafe { Some(str::from_utf8_unchecked(items)) }
928                    } else {
929                        None
930                    }
931                },
932                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
933            };
934            if let Some(s) = str {
935                return Namespace::from(s);
936            }
937        }
938        contents.make_rust();
939        Namespace::from(contents.str().deref())
940    }
941}
942
943impl From<DOMString> for Atom {
944    fn from(contents: DOMString) -> Atom {
945        {
946            let view = contents.view();
947            let bytes = view.encoded_bytes();
948            let str = match bytes {
949                EncodedBytes::Latin1Bytes(items) => {
950                    if items.iter().all(|c| c.is_ascii()) {
951                        // Safety: The string only has ascii chars, hence this is ok.
952                        unsafe { Some(str::from_utf8_unchecked(items)) }
953                    } else {
954                        None
955                    }
956                },
957                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
958            };
959            if let Some(s) = str {
960                return Atom::from(s);
961            }
962        }
963        contents.make_rust();
964        Atom::from(contents.str().deref())
965    }
966}
967
968impl From<&str> for DOMString {
969    fn from(contents: &str) -> DOMString {
970        DOMString(RefCell::new(DOMStringType::Rust(String::from(contents))))
971    }
972}
973
974impl From<DOMString> for String {
975    fn from(val: DOMString) -> Self {
976        val.make_rust();
977        val.str().to_owned()
978    }
979}
980
981impl From<DOMString> for Vec<u8> {
982    fn from(value: DOMString) -> Self {
983        value.make_rust();
984        value.str().as_bytes().to_vec()
985    }
986}
987
988impl From<Cow<'_, str>> for DOMString {
989    fn from(value: Cow<'_, str>) -> Self {
990        DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
991    }
992}
993
994#[macro_export]
995macro_rules! match_domstring_ascii_inner {
996    ($variant: expr, $input: expr, $p: literal => $then: expr, $($rest:tt)*) => {
997        if {
998            debug_assert!(($p).is_ascii());
999            $variant($p.as_bytes())
1000        } == $input {
1001          $then
1002        } else {
1003            match_domstring_ascii_inner!($variant, $input, $($rest)*)
1004        }
1005
1006    };
1007    ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
1008        match $input {
1009            $p => $then
1010        }
1011    }
1012}
1013
1014/// Use this to match &str against lazydomstring efficiently.
1015/// You are only allowed to match ascii strings otherwise this macro will
1016/// lead to wrong results.
1017/// ```ignore
1018/// let s = DOMString::from_string(String::from("test"));
1019/// let value = match_domstring!(s,
1020/// "test1" => 1,
1021/// "test2" => 2,
1022/// "test" => 3,
1023/// _ => 4,
1024/// );
1025/// assert_eq!(value, 3);
1026/// ```
1027#[macro_export]
1028macro_rules! match_domstring_ascii {
1029    ($input:expr, $($tail:tt)*) => {
1030        {
1031            use $crate::match_domstring_ascii_inner;
1032            use $crate::domstring::EncodedBytes;
1033
1034            let view = $input.view();
1035            let s = view.encoded_bytes();
1036            if matches!(s, EncodedBytes::Latin1Bytes(_)) {
1037                match_domstring_ascii_inner!(EncodedBytes::Latin1Bytes, s, $($tail)*)
1038            } else {
1039                match_domstring_ascii_inner!(EncodedBytes::Utf8Bytes, s, $($tail)*)
1040            }
1041        }
1042    };
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047    use super::*;
1048
1049    const LATIN1_PILLCROW: u8 = 0xB6;
1050    const UTF8_PILLCROW: [u8; 2] = [194, 182];
1051    const LATIN1_POWER2: u8 = 0xB2;
1052
1053    fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1054        DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1055    }
1056
1057    #[test]
1058    fn string_functions() {
1059        let s = DOMString::from("AbBcC❤&%$#");
1060        let s_copy = s.clone();
1061        assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1062        assert_eq!(s, s_copy);
1063        assert_eq!(s.len(), 12);
1064        assert_eq!(s_copy.len(), 12);
1065        assert!(s.starts_with('A'));
1066        let s2 = DOMString::from("");
1067        assert!(s2.is_empty());
1068    }
1069
1070    #[test]
1071    fn string_functions_latin1() {
1072        {
1073            let s = from_latin1(vec![
1074                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1075            ]);
1076            assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1077        }
1078        {
1079            let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1080            assert_eq!(s.to_ascii_lowercase(), "abbcc");
1081        }
1082        {
1083            let s = from_latin1(vec![
1084                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1085            ]);
1086            assert_eq!(s.len(), 11);
1087            assert!(s.starts_with('A'));
1088        }
1089        {
1090            let s = from_latin1(vec![]);
1091            assert!(s.is_empty());
1092        }
1093    }
1094
1095    #[test]
1096    fn test_length() {
1097        let s1 = from_latin1(vec![
1098            0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1099            0xAE, 0xAF,
1100        ]);
1101        let s2 = from_latin1(vec![
1102            0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1103            0xBE, 0xBF,
1104        ]);
1105        let s3 = from_latin1(vec![
1106            0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1107            0xCE, 0xCF,
1108        ]);
1109        let s4 = from_latin1(vec![
1110            0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1111            0xDE, 0xDF,
1112        ]);
1113        let s5 = from_latin1(vec![
1114            0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1115            0xEE, 0xEF,
1116        ]);
1117        let s6 = from_latin1(vec![
1118            0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1119            0xFE, 0xFF,
1120        ]);
1121
1122        let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1123        let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1124        let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1125        let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1126        let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1127        let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1128
1129        assert_eq!(s1.len(), s1_utf8.len());
1130        assert_eq!(s2.len(), s2_utf8.len());
1131        assert_eq!(s3.len(), s3_utf8.len());
1132        assert_eq!(s4.len(), s4_utf8.len());
1133        assert_eq!(s5.len(), s5_utf8.len());
1134        assert_eq!(s6.len(), s6_utf8.len());
1135
1136        s1.make_rust();
1137        s2.make_rust();
1138        s3.make_rust();
1139        s4.make_rust();
1140        s5.make_rust();
1141        s6.make_rust();
1142        assert_eq!(s1.len(), s1_utf8.len());
1143        assert_eq!(s2.len(), s2_utf8.len());
1144        assert_eq!(s3.len(), s3_utf8.len());
1145        assert_eq!(s4.len(), s4_utf8.len());
1146        assert_eq!(s5.len(), s5_utf8.len());
1147        assert_eq!(s6.len(), s6_utf8.len());
1148    }
1149
1150    #[test]
1151    fn test_convert() {
1152        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1153        s.make_rust();
1154        assert_eq!(&*s.str(), "abc%$");
1155    }
1156
1157    #[test]
1158    fn partial_eq() {
1159        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1160        let string = String::from("abc%$");
1161        let s2 = DOMString::from_string(string.clone());
1162        assert_eq!(s, s2);
1163        assert_eq!(s, string);
1164    }
1165
1166    #[test]
1167    fn encoded_bytes() {
1168        let bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1169        let s = from_latin1(bytes.clone());
1170        if let EncodedBytes::Latin1Bytes(s) = s.view().encoded_bytes() {
1171            assert_eq!(s, bytes)
1172        }
1173    }
1174
1175    #[test]
1176    fn testing_stringview() {
1177        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1178
1179        assert_eq!(
1180            s.str().chars().collect::<Vec<char>>(),
1181            vec!['a', 'b', 'c', '%', '$', '²']
1182        );
1183        assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1184    }
1185
1186    // We need to be extra careful here as two strings that have different
1187    // representation need to have the same hash.
1188    // Additionally, the interior mutability is only used for the conversion
1189    // which is forced by Hash. Hence, it is safe to have this interior mutability.
1190    #[test]
1191    fn test_hash() {
1192        use std::hash::{DefaultHasher, Hash, Hasher};
1193        fn hash_value(d: &DOMString) -> u64 {
1194            let mut hasher = DefaultHasher::new();
1195            d.hash(&mut hasher);
1196            hasher.finish()
1197        }
1198
1199        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1200        let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1201        s_converted.make_rust();
1202        let s2 = DOMString::from_string(String::from("abc%$²"));
1203
1204        let hash_s = hash_value(&s);
1205        let hash_s_converted = hash_value(&s_converted);
1206        let hash_s2 = hash_value(&s2);
1207
1208        assert_eq!(hash_s, hash_s2);
1209        assert_eq!(hash_s, hash_s_converted);
1210    }
1211
1212    // Testing match_lazydomstring if it executes the statements in the match correctly
1213    #[test]
1214    fn test_match_executing() {
1215        // executing
1216        {
1217            let s = from_latin1(vec![b'a', b'b', b'c']);
1218            match_domstring_ascii!( s,
1219                "abc" => assert!(true),
1220                "bcd" => assert!(false),
1221                _ =>  (),
1222            );
1223        }
1224
1225        {
1226            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1227            match_domstring_ascii!( s,
1228                "abc/" => assert!(true),
1229                "bcd" => assert!(false),
1230                _ =>  (),
1231            );
1232        }
1233
1234        {
1235            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1236            match_domstring_ascii!( s,
1237                "bcd" => assert!(false),
1238                "abc%$" => assert!(true),
1239                _ => (),
1240            );
1241        }
1242
1243        {
1244            let s = DOMString::from_string(String::from("abcde"));
1245            match_domstring_ascii!( s,
1246                "abc" => assert!(false),
1247                "bcd" => assert!(false),
1248                _ => assert!(true),
1249            );
1250        }
1251        {
1252            let s = DOMString::from_string(String::from("abc%$"));
1253            match_domstring_ascii!( s,
1254                "bcd" => assert!(false),
1255                "abc%$" => assert!(true),
1256                _ =>  (),
1257            );
1258        }
1259        {
1260            let s = from_latin1(vec![b'a', b'b', b'c']);
1261            match_domstring_ascii!( s,
1262                "abcdd" => assert!(false),
1263                "bcd" => assert!(false),
1264                _ => (),
1265            );
1266        }
1267    }
1268
1269    // Testing match_lazydomstring if it evaluates to the correct expression
1270    #[test]
1271    fn test_match_returning_result() {
1272        {
1273            let s = from_latin1(vec![b'a', b'b', b'c']);
1274            let res = match_domstring_ascii!( s,
1275                "abc" => true,
1276                "bcd" => false,
1277                _ => false,
1278            );
1279            assert_eq!(res, true);
1280        }
1281        {
1282            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1283            let res = match_domstring_ascii!( s,
1284                "abc/" => true,
1285                "bcd" => false,
1286                _ => false,
1287            );
1288            assert_eq!(res, true);
1289        }
1290        {
1291            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1292            let res = match_domstring_ascii!( s,
1293                "bcd" => false,
1294                "abc%$" => true,
1295                _ => false,
1296            );
1297            assert_eq!(res, true);
1298        }
1299
1300        {
1301            let s = DOMString::from_string(String::from("abcde"));
1302            let res = match_domstring_ascii!( s,
1303                "abc" => false,
1304                "bcd" => false,
1305                _ => true,
1306            );
1307            assert_eq!(res, true);
1308        }
1309        {
1310            let s = DOMString::from_string(String::from("abc%$"));
1311            let res = match_domstring_ascii!( s,
1312                "bcd" => false,
1313                "abc%$" => true,
1314                _ => false,
1315            );
1316            assert_eq!(res, true);
1317        }
1318        {
1319            let s = from_latin1(vec![b'a', b'b', b'c']);
1320            let res = match_domstring_ascii!( s,
1321                "abcdd" => false,
1322                "bcd" => false,
1323                _ => true,
1324            );
1325            assert_eq!(res, true);
1326        }
1327    }
1328
1329    #[test]
1330    #[should_panic]
1331    fn test_match_panic() {
1332        let s = DOMString::from_string(String::from("abcd"));
1333        let _res = match_domstring_ascii!(s,
1334            "❤" => true,
1335            _ => false,);
1336    }
1337
1338    #[test]
1339    #[should_panic]
1340    fn test_match_panic2() {
1341        let s = DOMString::from_string(String::from("abcd"));
1342        let _res = match_domstring_ascii!(s,
1343            "abc" => false,
1344            "❤" => true,
1345            _ => false,
1346        );
1347    }
1348
1349    #[test]
1350    fn test_strip_whitespace() {
1351        {
1352            let mut s = from_latin1(vec![
1353                b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1354            ]);
1355
1356            s.strip_leading_and_trailing_ascii_whitespace();
1357            s.make_rust();
1358            assert_eq!(&*s.str(), "abc%$²");
1359        }
1360        {
1361            let mut s = DOMString::from_string(String::from("   \n  abc%$ "));
1362
1363            s.strip_leading_and_trailing_ascii_whitespace();
1364            s.make_rust();
1365            assert_eq!(&*s.str(), "abc%$");
1366        }
1367    }
1368
1369    // https://infra.spec.whatwg.org/#ascii-whitespace
1370    #[test]
1371    fn contains_html_space_characters() {
1372        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); // TAB
1373        assert!(s.contains_html_space_characters());
1374        s.make_rust();
1375        assert!(s.contains_html_space_characters());
1376
1377        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); // NEWLINE
1378        assert!(s.contains_html_space_characters());
1379        s.make_rust();
1380        assert!(s.contains_html_space_characters());
1381
1382        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); // FF
1383        assert!(s.contains_html_space_characters());
1384        s.make_rust();
1385        assert!(s.contains_html_space_characters());
1386
1387        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); // Carriage Return
1388        assert!(s.contains_html_space_characters());
1389        s.make_rust();
1390        assert!(s.contains_html_space_characters());
1391
1392        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); // SPACE
1393        assert!(s.contains_html_space_characters());
1394        s.make_rust();
1395        assert!(s.contains_html_space_characters());
1396
1397        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1398        assert!(!s.contains_html_space_characters());
1399        s.make_rust();
1400        assert!(!s.contains_html_space_characters());
1401    }
1402
1403    #[test]
1404    fn atom() {
1405        let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1406        let atom1 = Atom::from(s);
1407        let s2 = DOMString::from_string(String::from("aaa aa"));
1408        let atom2 = Atom::from(s2);
1409        assert_eq!(atom1, atom2);
1410        let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1411        let atom3 = Atom::from(s3);
1412        assert_ne!(atom1, atom3);
1413    }
1414
1415    #[test]
1416    fn namespace() {
1417        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1418        let atom1 = Namespace::from(s);
1419        let s2 = DOMString::from_string(String::from("aaa aa"));
1420        let atom2 = Namespace::from(s2);
1421        assert_eq!(atom1, atom2);
1422        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1423        let atom3 = Namespace::from(s3);
1424        assert_ne!(atom1, atom3);
1425    }
1426
1427    #[test]
1428    fn localname() {
1429        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1430        let atom1 = LocalName::from(s);
1431        let s2 = DOMString::from_string(String::from("aaa aa"));
1432        let atom2 = LocalName::from(s2);
1433        assert_eq!(atom1, atom2);
1434        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1435        let atom3 = LocalName::from(s3);
1436        assert_ne!(atom1, atom3);
1437    }
1438
1439    #[test]
1440    fn is_ascii_lowercase() {
1441        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1442        assert!(!s.is_ascii_lowercase());
1443        let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1444        assert!(!s.is_ascii_lowercase());
1445        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1446        assert!(s.is_ascii_lowercase());
1447        let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1448        assert!(!s.is_ascii_lowercase());
1449        let s = DOMString::from_string(String::from("`aaaz"));
1450        assert!(!s.is_ascii_lowercase());
1451        let s = DOMString::from_string(String::from("aaaz"));
1452        assert!(s.is_ascii_lowercase());
1453    }
1454
1455    #[test]
1456    fn test_as_bytes() {
1457        const ASCII_SMALL_A: u8 = b'a';
1458        const ASCII_SMALL_Z: u8 = b'z';
1459
1460        let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1461        let s = from_latin1(v1.clone());
1462        assert_eq!(
1463            *s.as_bytes(),
1464            [
1465                ASCII_SMALL_A,
1466                ASCII_SMALL_A,
1467                ASCII_SMALL_A,
1468                UTF8_PILLCROW[0],
1469                UTF8_PILLCROW[1],
1470                ASCII_SMALL_A,
1471                ASCII_SMALL_A
1472            ]
1473        );
1474
1475        let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1476        let s = from_latin1(v2.clone());
1477        assert_eq!(
1478            *s.as_bytes(),
1479            [
1480                ASCII_SMALL_A,
1481                ASCII_SMALL_A,
1482                ASCII_SMALL_A,
1483                ASCII_SMALL_A,
1484                ASCII_SMALL_Z
1485            ]
1486        );
1487
1488        let str = "abc%$²".to_owned();
1489        let s = DOMString::from(str.clone());
1490        assert_eq!(&*s.as_bytes(), str.as_bytes());
1491        let str = "AbBcC❤&%$#".to_owned();
1492        let s = DOMString::from(str.clone());
1493        assert_eq!(&*s.as_bytes(), str.as_bytes());
1494    }
1495}