script_bindings/
domstring.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::{Chars, FromStr};
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::MutableHandleValue;
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use style::Atom;
25use style::str::HTML_SPACE_CHARACTERS;
26
27use crate::script_runtime::JSContext as SafeJSContext;
28use crate::trace::RootedTraceableBox;
29
30const ASCII_END: u8 = 0x7E;
31const ASCII_CAPITAL_A: u8 = 0x41;
32const ASCII_CAPITAL_Z: u8 = 0x5A;
33const ASCII_LOWERCASE_A: u8 = 0x61;
34const ASCII_LOWERCASE_Z: u8 = 0x7A;
35const ASCII_TAB: u8 = 0x09;
36const ASCII_NEWLINE: u8 = 0x0A;
37const ASCII_FORMFEED: u8 = 0x0C;
38const ASCII_CR: u8 = 0x0D;
39const ASCII_SPACE: u8 = 0x20;
40
41/// Gets the latin1 bytes from the js engine.
42/// Safety: Make sure the *mut JSString is not null.
43unsafe fn get_latin1_string_bytes(
44    rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
45) -> &[u8] {
46    debug_assert!(!rooted_traceable_box.get().is_null());
47    let mut length = 0;
48    unsafe {
49        let chars = JS_GetLatin1StringCharsAndLength(
50            Runtime::get().expect("JS runtime has shut down").as_ptr(),
51            ptr::null(),
52            rooted_traceable_box.get(),
53            &mut length,
54        );
55        assert!(!chars.is_null());
56        slice::from_raw_parts(chars, length)
57    }
58}
59
60#[derive(Debug, PartialEq, Eq)]
61/// A type representing the underlying encoded bytes. Either Latin1 or Utf8.
62pub enum EncodedBytes<'a> {
63    /// These bytes are Latin1 encoded.
64    Latin1Bytes(&'a [u8]),
65    /// This is a normal utf8 string given in bytes.
66    Utf8Bytes(&'a [u8]),
67}
68
69enum DOMStringType {
70    /// A simple rust string
71    Rust(String),
72    /// A JS String stored in mozjs.
73    JSString(RootedTraceableBox<Heap<*mut JSString>>),
74    #[cfg(test)]
75    /// This is used for testing of the bindings to give
76    /// a raw u8 Latin1 encoded string without having a js engine.
77    Latin1Vec(Vec<u8>),
78}
79
80impl DOMStringType {
81    /// Returns the str if Rust and otherwise panic. You need to call `make_rust`.
82    fn str(&self) -> &str {
83        match self {
84            DOMStringType::Rust(s) => s,
85            DOMStringType::JSString(_rooted_traceable_box) => {
86                panic!("Cannot do a string")
87            },
88            #[cfg(test)]
89            &DOMStringType::Latin1Vec(_) => panic!("Cannot do a string"),
90        }
91    }
92
93    /// Warning:
94    /// This function does not checking and just returns the raw bytes of teh string,
95    /// independently if they are  utf8 or latin1.
96    /// The caller needs to take care that these make sense in context.
97    fn as_raw_bytes(&self) -> &[u8] {
98        match self {
99            DOMStringType::Rust(s) => s.as_bytes(),
100            DOMStringType::JSString(rooted_traceable_box) => unsafe {
101                get_latin1_string_bytes(rooted_traceable_box)
102            },
103            #[cfg(test)]
104            DOMStringType::Latin1Vec(items) => items,
105        }
106    }
107}
108
109#[derive(Debug)]
110/// A view of the underlying string. This is always converted to Utf8.
111pub struct StringView<'a>(Ref<'a, DOMStringType>);
112
113impl<'a> StringView<'a> {
114    pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
115        self.0
116            .str()
117            .split(HTML_SPACE_CHARACTERS)
118            .filter(|s| !s.is_empty())
119    }
120
121    pub fn strip_prefix(&self, needle: &str) -> Option<&str> {
122        self.0.str().strip_prefix(needle)
123    }
124
125    pub fn chars(&self) -> Chars<'_> {
126        self.0.str().chars()
127    }
128
129    pub fn as_bytes(&self) -> &[u8] {
130        self.0.str().as_bytes()
131    }
132}
133
134impl Deref for StringView<'_> {
135    type Target = str;
136    fn deref(&self) -> &str {
137        self.0.str()
138    }
139}
140
141impl AsRef<str> for StringView<'_> {
142    fn as_ref(&self) -> &str {
143        self.deref()
144    }
145}
146
147impl PartialEq for StringView<'_> {
148    fn eq(&self, other: &Self) -> bool {
149        self.0.str() == other.0.str()
150    }
151}
152
153impl PartialEq<&str> for StringView<'_> {
154    fn eq(&self, other: &&str) -> bool {
155        self.0.str() == *other
156    }
157}
158
159impl Eq for StringView<'_> {}
160
161impl PartialOrd for StringView<'_> {
162    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
163        self.0.str().partial_cmp(other.0.str())
164    }
165}
166
167impl Ord for StringView<'_> {
168    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
169        self.0.str().cmp(other.0.str())
170    }
171}
172
173impl From<StringView<'_>> for String {
174    fn from(value: StringView<'_>) -> Self {
175        String::from(value.0.str())
176    }
177}
178
179/// Safety comment:
180///
181/// This method will _not_ trace the pointer if the rust string exists.
182/// The js string could be garbage collected and, hence, violating this
183/// could lead to undefined behavior
184unsafe impl Trace for DOMStringType {
185    unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
186        unsafe {
187            match self {
188                DOMStringType::Rust(_s) => {},
189                DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
190                #[cfg(test)]
191                DOMStringType::Latin1Vec(_s) => {},
192            }
193        }
194    }
195}
196
197impl malloc_size_of::MallocSizeOf for DOMStringType {
198    fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
199        match self {
200            DOMStringType::Rust(s) => s.size_of(ops),
201            DOMStringType::JSString(_rooted_traceable_box) => {
202                // Managed by JS Engine
203                0
204            },
205            #[cfg(test)]
206            DOMStringType::Latin1Vec(s) => s.size_of(ops),
207        }
208    }
209}
210
211impl std::fmt::Debug for DOMStringType {
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        match self {
214            DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
215            DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
216            #[cfg(test)]
217            DOMStringType::Latin1Vec(s) => f
218                .debug_struct("DOMString")
219                .field("latin1_string", s)
220                .finish(),
221        }
222    }
223}
224
225#[derive(Debug)]
226/// A view of the underlying string. This is never converted to Utf8
227pub struct EncodedBytesView<'a>(Ref<'a, DOMStringType>);
228
229impl EncodedBytesView<'_> {
230    /// Get the bytes of the string in either latin1 or utf8 without costly conversion.
231    pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
232        match *self.0 {
233            DOMStringType::Rust(ref s) => EncodedBytes::Utf8Bytes(s.as_bytes()),
234            DOMStringType::JSString(ref rooted_traceable_box) => {
235                EncodedBytes::Latin1Bytes(unsafe { get_latin1_string_bytes(rooted_traceable_box) })
236            },
237            #[cfg(test)]
238            DOMStringType::Latin1Vec(ref s) => EncodedBytes::Latin1Bytes(s),
239        }
240    }
241
242    fn is_empty(&self) -> bool {
243        match self.encoded_bytes() {
244            EncodedBytes::Latin1Bytes(items) => items.is_empty(),
245            EncodedBytes::Utf8Bytes(s) => s.is_empty(),
246        }
247    }
248
249    fn len(&self) -> usize {
250        match self.encoded_bytes() {
251            EncodedBytes::Latin1Bytes(items) => items
252                .iter()
253                .map(|b| if *b <= ASCII_END { 1 } else { 2 })
254                .sum(),
255            EncodedBytes::Utf8Bytes(s) => s.len(),
256        }
257    }
258}
259
260////// A DOMString.
261///
262/// This type corresponds to the [`DOMString`] type in WebIDL.
263///
264/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
265///
266/// Conceptually, a DOMString has the same value space as a JavaScript String,
267/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
268/// unpaired surrogates present (also sometimes called WTF-16).
269///
270/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
271/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
272/// can not be represented as a Rust `String`). This introduces the question of
273/// what to do with values being passed from JavaScript to Rust that contain
274/// unpaired surrogates.
275///
276/// The hypothesis is that it does not matter much how exactly those values are
277/// transformed, because  passing unpaired surrogates into the DOM is very rare.
278/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
279/// character.
280///
281/// Currently, the lack of crash reports about this issue provides some
282/// evidence to support the hypothesis. This evidence will hopefully be used to
283/// convince other browser vendors that it would be safe to replace unpaired
284/// surrogates at the boundary between JavaScript and native code. (This would
285/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
286/// and in Servo.)
287///
288/// This string class will keep either the Reference to the mozjs object alive
289/// or will have an internal rust string.
290/// We currently default to doing most of the string operation on the rust side.
291/// You should use `str()` to get the Rust string (represented by a `StringView`
292/// which you can deref to a string). You should assume that this conversion costs.
293/// You should assume that all the functions incur the conversion cost.
294///
295#[repr(transparent)]
296#[derive(Debug, MallocSizeOf, JSTraceable)]
297pub struct DOMString(RefCell<DOMStringType>);
298
299impl Clone for DOMString {
300    fn clone(&self) -> Self {
301        self.make_rust();
302        if let DOMStringType::Rust(ref s) = *self.0.borrow() {
303            DOMString::from_string(s.to_owned())
304        } else {
305            unreachable!()
306        }
307    }
308}
309
310pub enum DOMStringErrorType {
311    JSConversionError,
312}
313
314impl DOMString {
315    /// Creates a new `DOMString`.
316    pub fn new() -> DOMString {
317        DOMString(RefCell::new(DOMStringType::Rust(String::new())))
318    }
319
320    /// Creates the string from js. If the string can be encoded in latin1, just take the reference
321    /// to the JSString. Otherwise do the conversion to utf8 now.
322    pub fn from_js_string(
323        cx: SafeJSContext,
324        value: js::gc::HandleValue,
325    ) -> Result<DOMString, DOMStringErrorType> {
326        let string_ptr = unsafe { js::rust::ToString(*cx, value) };
327        if string_ptr.is_null() {
328            debug!("ToString failed");
329            Err(DOMStringErrorType::JSConversionError)
330        } else {
331            let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
332            let inner = if latin1 {
333                let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
334                DOMStringType::JSString(h)
335            } else {
336                // We need to convert the string anyway as it is not just latin1
337                DOMStringType::Rust(unsafe {
338                    jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
339                })
340            };
341            Ok(DOMString(RefCell::new(inner)))
342        }
343    }
344
345    pub fn from_string(s: String) -> DOMString {
346        DOMString(RefCell::new(DOMStringType::Rust(s)))
347    }
348
349    /// Transforms the string into rust string if not yet a rust string.
350    fn make_rust(&self) {
351        let string = {
352            let inner = self.0.borrow();
353            match *inner {
354                DOMStringType::Rust(_) => return,
355                DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
356                    jsstr_to_string(
357                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
358                        NonNull::new(rooted_traceable_box.get()).unwrap(),
359                    )
360                },
361                #[cfg(test)]
362                DOMStringType::Latin1Vec(ref items) => {
363                    let mut v = vec![0; items.len() * 2];
364                    let real_size = tendril::encoding_rs::mem::convert_latin1_to_utf8(
365                        items.as_slice(),
366                        v.as_mut_slice(),
367                    );
368                    v.truncate(real_size);
369
370                    // Safety: convert_latin1_to_utf8 converts the raw bytes to utf8 and the
371                    // buffer is the size specified in the documentation, so this should be safe.
372                    unsafe { String::from_utf8_unchecked(v) }
373                },
374            }
375        };
376        *self.0.borrow_mut() = DOMStringType::Rust(string);
377    }
378
379    /// Debug the current  state of the string without modifying it.
380    #[expect(unused)]
381    fn debug_js(&self) {
382        match *self.0.borrow() {
383            DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
384            DOMStringType::JSString(ref rooted_traceable_box) => {
385                let s = unsafe {
386                    jsstr_to_string(
387                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
388                        ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
389                    )
390                };
391                info!("JSString ({})", s);
392            },
393            #[cfg(test)]
394            DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
395        }
396    }
397
398    /// Returns the underlying rust string.
399    pub fn str(&self) -> StringView<'_> {
400        self.make_rust();
401        StringView(self.0.borrow())
402    }
403
404    /// Use this if you want to work on the `EncodedBytes` directly.
405    /// This will not do any conversions for you.
406    pub fn view(&self) -> EncodedBytesView<'_> {
407        EncodedBytesView(self.0.borrow())
408    }
409
410    pub fn clear(&mut self) {
411        *self.0.borrow_mut() = DOMStringType::Rust(String::new())
412    }
413
414    pub fn is_empty(&self) -> bool {
415        self.view().is_empty()
416    }
417
418    /// This length (as rust spec) is in bytes if the string would be utf8 not chars.
419    pub fn len(&self) -> usize {
420        self.view().len()
421    }
422
423    pub fn make_ascii_lowercase(&mut self) {
424        self.make_rust();
425        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
426            s.make_ascii_lowercase();
427        }
428    }
429
430    pub fn push_str(&mut self, s: &str) {
431        self.make_rust();
432        if let DOMStringType::Rust(ref mut string) = *self.0.borrow_mut() {
433            string.push_str(s)
434        }
435    }
436
437    pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
438        if self.is_empty() {
439            return;
440        }
441
442        self.make_rust();
443        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
444            let trailing_whitespace_len = s
445                .trim_end_matches(|ref c| char::is_ascii_whitespace(c))
446                .len();
447            s.truncate(trailing_whitespace_len);
448            if s.is_empty() {
449                return;
450            }
451
452            let first_non_whitespace = s.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
453            s.replace_range(0..first_non_whitespace, "");
454        }
455    }
456
457    /// This is a dom spec
458    pub fn is_valid_floating_point_number_string(&self) -> bool {
459        static RE: LazyLock<Regex> = LazyLock::new(|| {
460            Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
461        });
462        self.make_rust();
463
464        if let DOMStringType::Rust(ref s) = *self.0.borrow() {
465            RE.is_match(s) && self.parse_floating_point_number().is_some()
466        } else {
467            unreachable!()
468        }
469    }
470
471    pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
472        self.make_rust();
473        self.str().parse::<T>()
474    }
475
476    /// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
477    pub fn parse_floating_point_number(&self) -> Option<f64> {
478        self.make_rust();
479        parse_floating_point_number(&self.str())
480    }
481
482    /// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
483    pub fn set_best_representation_of_the_floating_point_number(&mut self) {
484        if let Some(val) = self.parse_floating_point_number() {
485            // [tc39] Step 2: If x is either +0 or -0, return "0".
486            let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
487
488            *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
489        }
490    }
491
492    pub fn to_lowercase(&self) -> String {
493        self.make_rust();
494        self.str().to_lowercase()
495    }
496
497    pub fn to_uppercase(&self) -> String {
498        self.make_rust();
499        self.str().to_uppercase()
500    }
501
502    pub fn strip_newlines(&mut self) {
503        // > To strip newlines from a string, remove any U+000A LF and U+000D CR code
504        // > points from the string.
505        self.make_rust();
506        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
507            s.retain(|c| c != '\r' && c != '\n');
508        }
509    }
510
511    /// Normalize newlines according to <https://infra.spec.whatwg.org/#normalize-newlines>.
512    pub fn normalize_newlines(&mut self) {
513        self.make_rust();
514        // > To normalize newlines in a string, replace every U+000D CR U+000A LF code point
515        // > pair with a single U+000A LF code point, and then replace every remaining
516        // > U+000D CR code point with a U+000A LF code point.
517        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
518            *s = s.replace("\r\n", "\n").replace("\r", "\n")
519        }
520    }
521
522    pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
523        self.make_rust();
524        let new_string = self.str().to_owned();
525        DOMString(RefCell::new(DOMStringType::Rust(
526            new_string.replace(needle, replace_char),
527        )))
528    }
529
530    /// Pattern is not yet stable in rust, hence, we need different methods for str and char
531    pub fn starts_with(&self, c: char) -> bool {
532        if !c.is_ascii() {
533            self.make_rust();
534            self.str().starts_with(c)
535        } else {
536            match self.view().encoded_bytes() {
537                EncodedBytes::Latin1Bytes(items) => items,
538                EncodedBytes::Utf8Bytes(s) => s,
539            }
540            // For both cases as we tested the char being ascii we can safely convert to a single u8.
541            .starts_with(&[c as u8])
542        }
543    }
544
545    pub fn starts_with_str(&self, needle: &str) -> bool {
546        self.make_rust();
547        self.str().starts_with(needle)
548    }
549
550    pub fn contains(&self, needle: &str) -> bool {
551        self.make_rust();
552        self.str().contains(needle)
553    }
554
555    pub fn to_ascii_lowercase(&self) -> String {
556        let conversion = match self.view().encoded_bytes() {
557            EncodedBytes::Latin1Bytes(items) => {
558                if items.iter().all(|c| *c <= ASCII_END) {
559                    // We are just simple ascii
560                    Some(unsafe {
561                        String::from_utf8_unchecked(
562                            items
563                                .iter()
564                                .map(|c| {
565                                    if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
566                                        c + 32
567                                    } else {
568                                        *c
569                                    }
570                                })
571                                .collect(),
572                        )
573                    })
574                } else {
575                    None
576                }
577            },
578            EncodedBytes::Utf8Bytes(s) => unsafe {
579                // Save because we know it was a utf8 string
580                Some(str::from_utf8_unchecked(s).to_ascii_lowercase())
581            },
582        };
583        // We otherwise would double borrow the refcell
584        if let Some(conversion) = conversion {
585            conversion
586        } else {
587            self.make_rust();
588            self.str().to_ascii_lowercase()
589        }
590    }
591
592    pub fn contains_html_space_characters(&self) -> bool {
593        const SPACE_BYTES: [u8; 5] = [
594            ASCII_TAB,
595            ASCII_NEWLINE,
596            ASCII_FORMFEED,
597            ASCII_CR,
598            ASCII_SPACE,
599        ];
600        match self.view().encoded_bytes() {
601            EncodedBytes::Latin1Bytes(items) => SPACE_BYTES.iter().any(|byte| items.contains(byte)),
602            EncodedBytes::Utf8Bytes(s) => {
603                // Save because we know it was a utf8 string
604                let s = unsafe { str::from_utf8_unchecked(s) };
605                s.contains(HTML_SPACE_CHARACTERS)
606            },
607        }
608    }
609
610    /// This returns the string in utf8 bytes, i.e., `[u8]` encoded with utf8.
611    pub fn as_bytes(&self) -> BytesView<'_> {
612        // BytesView will just give the raw bytes on dereference.
613        // If we are ascii this is the same for latin1 and utf8.
614        // Otherwise we convert to rust.
615        if self.is_ascii() {
616            BytesView(self.0.borrow())
617        } else {
618            self.make_rust();
619            BytesView(self.0.borrow())
620        }
621    }
622
623    /// Tests if there are only ascii lowercase characters. Does not include special characters.
624    pub fn is_ascii_lowercase(&self) -> bool {
625        match self.view().encoded_bytes() {
626            EncodedBytes::Latin1Bytes(items) => items
627                .iter()
628                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
629            EncodedBytes::Utf8Bytes(s) => s
630                .iter()
631                .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
632                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
633        }
634    }
635
636    /// Is the string only ascii characters
637    pub fn is_ascii(&self) -> bool {
638        match self.view().encoded_bytes() {
639            EncodedBytes::Latin1Bytes(items) => items,
640            EncodedBytes::Utf8Bytes(items) => items,
641        }
642        .is_ascii()
643    }
644
645    /// Returns true if the slice only contains bytes that are safe to use in cookie strings.
646    /// <https://www.ietf.org/archive/id/draft-ietf-httpbis-rfc6265bis-15.html#section-5.6-6>
647    /// Not using ServoCookie::is_valid_name_or_value to prevent dependency on the net crate.
648    pub fn is_valid_for_cookie(&self) -> bool {
649        match self.view().encoded_bytes() {
650            EncodedBytes::Latin1Bytes(items) | EncodedBytes::Utf8Bytes(items) => !items
651                .iter()
652                .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
653        }
654    }
655}
656
657/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
658pub fn parse_floating_point_number(input: &str) -> Option<f64> {
659    // Steps 15-16 are telling us things about IEEE rounding modes
660    // for floating-point significands; this code assumes the Rust
661    // compiler already matches them in any cases where
662    // that actually matters. They are not
663    // related to f64::round(), which is for rounding to integers.
664    input.trim().parse::<f64>().ok().filter(|value| {
665        // A valid number is the same as what rust considers to be valid,
666        // except for +1., NaN, and Infinity.
667        !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
668    })
669}
670
671pub struct BytesView<'a>(Ref<'a, DOMStringType>);
672
673impl Deref for BytesView<'_> {
674    type Target = [u8];
675
676    fn deref(&self) -> &Self::Target {
677        // This does the correct thing by the construction of BytesView in `DOMString::as_bytes`.
678        self.0.as_raw_bytes()
679    }
680}
681
682impl Ord for DOMString {
683    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
684        self.make_rust();
685        other.make_rust();
686        self.str().cmp(&other.str())
687    }
688}
689
690impl PartialOrd for DOMString {
691    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
692        self.make_rust();
693        other.make_rust();
694        self.str().partial_cmp(&other.str())
695    }
696}
697
698impl Extend<char> for DOMString {
699    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
700        self.make_rust();
701        if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
702            s.extend(iter)
703        }
704    }
705}
706
707impl ToJSValConvertible for DOMString {
708    unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
709        let val = self.0.borrow();
710        match *val {
711            DOMStringType::Rust(ref s) => unsafe {
712                s.to_jsval(cx, rval);
713            },
714            DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
715                rval.set(StringValue(&*rooted_traceable_box.get()));
716            },
717            #[cfg(test)]
718            DOMStringType::Latin1Vec(ref items) => {
719                let mut v = vec![0; items.len() * 2];
720                let real_size = tendril::encoding_rs::mem::convert_latin1_to_utf8(
721                    items.as_slice(),
722                    v.as_mut_slice(),
723                );
724                v.truncate(real_size);
725
726                String::from_utf8(v)
727                    .expect("Error in constructin test string")
728                    .to_jsval(cx, rval);
729            },
730        };
731    }
732}
733
734impl std::hash::Hash for DOMString {
735    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
736        self.make_rust();
737        self.str().hash(state);
738    }
739}
740
741impl std::fmt::Display for DOMString {
742    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
743        self.make_rust();
744        fmt::Display::fmt(self.str().deref(), f)
745    }
746}
747
748impl Default for DOMString {
749    fn default() -> Self {
750        DOMString::new()
751    }
752}
753
754impl std::cmp::PartialEq<str> for DOMString {
755    fn eq(&self, other: &str) -> bool {
756        if other.is_ascii() {
757            other.as_bytes() ==
758                match self.view().encoded_bytes() {
759                    EncodedBytes::Latin1Bytes(items) => items,
760                    EncodedBytes::Utf8Bytes(s) => s,
761                }
762        } else {
763            self.make_rust();
764            self.str().deref() == other
765        }
766    }
767}
768
769impl std::cmp::PartialEq<&str> for DOMString {
770    fn eq(&self, other: &&str) -> bool {
771        if other.is_ascii() {
772            other.as_bytes() ==
773                match self.view().encoded_bytes() {
774                    EncodedBytes::Latin1Bytes(items) => items,
775                    EncodedBytes::Utf8Bytes(s) => s,
776                }
777        } else {
778            self.make_rust();
779            self.str().deref() == *other
780        }
781    }
782}
783
784impl std::cmp::PartialEq<String> for DOMString {
785    fn eq(&self, other: &String) -> bool {
786        if other.is_ascii() {
787            other.as_bytes() ==
788                match self.view().encoded_bytes() {
789                    EncodedBytes::Latin1Bytes(items) => items,
790                    EncodedBytes::Utf8Bytes(s) => s,
791                }
792        } else {
793            self.make_rust();
794            self.str().deref() == other
795        }
796    }
797}
798
799impl std::cmp::PartialEq<DOMString> for String {
800    fn eq(&self, other: &DOMString) -> bool {
801        other.eq(self)
802    }
803}
804
805impl std::cmp::PartialEq<DOMString> for str {
806    fn eq(&self, other: &DOMString) -> bool {
807        other.eq(self)
808    }
809}
810
811impl std::cmp::PartialEq for DOMString {
812    fn eq(&self, other: &DOMString) -> bool {
813        let result = match (self.view().encoded_bytes(), other.view().encoded_bytes()) {
814            (EncodedBytes::Latin1Bytes(items), EncodedBytes::Latin1Bytes(other_items)) => {
815                Some(items == other_items)
816            },
817            (EncodedBytes::Latin1Bytes(items), EncodedBytes::Utf8Bytes(other_s))
818                if other_s.is_ascii() =>
819            {
820                Some(items == other_s)
821            },
822            (EncodedBytes::Utf8Bytes(s), EncodedBytes::Latin1Bytes(other_items))
823                if s.is_ascii() =>
824            {
825                Some(s == other_items)
826            },
827            (EncodedBytes::Utf8Bytes(s), EncodedBytes::Utf8Bytes(other_s)) => Some(s == other_s),
828            _ => None,
829        };
830
831        if let Some(eq_result) = result {
832            eq_result
833        } else {
834            self.make_rust();
835            other.make_rust();
836            self.str() == other.str()
837        }
838    }
839}
840
841impl std::cmp::Eq for DOMString {}
842
843impl From<std::string::String> for DOMString {
844    fn from(value: String) -> Self {
845        DOMString::from_string(value)
846    }
847}
848
849impl From<DOMString> for LocalName {
850    fn from(contents: DOMString) -> LocalName {
851        {
852            let view = contents.view();
853            let bytes = view.encoded_bytes();
854            let str = match bytes {
855                EncodedBytes::Latin1Bytes(items) => {
856                    if items.iter().all(|c| c.is_ascii()) {
857                        unsafe { Some(str::from_utf8_unchecked(items)) }
858                    } else {
859                        None
860                    }
861                },
862                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
863            };
864            if let Some(s) = str {
865                return LocalName::from(s);
866            }
867        }
868        contents.make_rust();
869        LocalName::from(contents.str().deref())
870    }
871}
872
873impl From<&DOMString> for LocalName {
874    fn from(contents: &DOMString) -> LocalName {
875        {
876            let view = contents.view();
877            let bytes = view.encoded_bytes();
878            let str = match bytes {
879                EncodedBytes::Latin1Bytes(items) => {
880                    if items.iter().all(|c| c.is_ascii()) {
881                        // This is safe as the string is ascii and it comes from a DOMString
882                        unsafe { Some(str::from_utf8_unchecked(items)) }
883                    } else {
884                        None
885                    }
886                },
887                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
888            };
889            if let Some(s) = str {
890                return LocalName::from(s);
891            }
892        }
893        contents.make_rust();
894        LocalName::from(contents.str().deref())
895    }
896}
897
898impl From<DOMString> for Namespace {
899    fn from(contents: DOMString) -> Namespace {
900        {
901            let view = contents.view();
902            let bytes = view.encoded_bytes();
903            let str = match bytes {
904                EncodedBytes::Latin1Bytes(items) => {
905                    if items.iter().all(|c| c.is_ascii()) {
906                        // This is safe as the string is ascii and it comes from a DOMString
907                        unsafe { Some(str::from_utf8_unchecked(items)) }
908                    } else {
909                        None
910                    }
911                },
912                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
913            };
914            if let Some(s) = str {
915                return Namespace::from(s);
916            }
917        }
918        contents.make_rust();
919        Namespace::from(contents.str().deref())
920    }
921}
922
923impl From<DOMString> for Atom {
924    fn from(contents: DOMString) -> Atom {
925        {
926            let view = contents.view();
927            let bytes = view.encoded_bytes();
928            let str = match bytes {
929                EncodedBytes::Latin1Bytes(items) => {
930                    if items.iter().all(|c| c.is_ascii()) {
931                        // Safety: The string only has ascii chars, hence this is ok.
932                        unsafe { Some(str::from_utf8_unchecked(items)) }
933                    } else {
934                        None
935                    }
936                },
937                EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
938            };
939            if let Some(s) = str {
940                return Atom::from(s);
941            }
942        }
943        contents.make_rust();
944        Atom::from(contents.str().deref())
945    }
946}
947
948impl From<&str> for DOMString {
949    fn from(contents: &str) -> DOMString {
950        DOMString(RefCell::new(DOMStringType::Rust(String::from(contents))))
951    }
952}
953
954impl From<DOMString> for String {
955    fn from(val: DOMString) -> Self {
956        val.make_rust();
957        val.str().to_owned()
958    }
959}
960
961impl From<DOMString> for Vec<u8> {
962    fn from(value: DOMString) -> Self {
963        value.make_rust();
964        value.str().as_bytes().to_vec()
965    }
966}
967
968impl From<Cow<'_, str>> for DOMString {
969    fn from(value: Cow<'_, str>) -> Self {
970        DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
971    }
972}
973
974#[macro_export]
975macro_rules! match_domstring_ascii_inner {
976    ($variant: expr, $input: expr, $p: literal => $then: expr, $($rest:tt)*) => {
977        if {
978            debug_assert!(($p).is_ascii());
979            $variant($p.as_bytes())
980        } == $input {
981          $then
982        } else {
983            match_domstring_ascii_inner!($variant, $input, $($rest)*)
984        }
985
986    };
987    ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
988        match $input {
989            $p => $then
990        }
991    }
992}
993
994/// Use this to match &str against lazydomstring efficiently.
995/// You are only allowed to match ascii strings otherwise this macro will
996/// lead to wrong results.
997/// ```ignore
998/// let s = DOMString::from_string(String::from("test"));
999/// let value = match_domstring!(s,
1000/// "test1" => 1,
1001/// "test2" => 2,
1002/// "test" => 3,
1003/// _ => 4,
1004/// );
1005/// assert_eq!(value, 3);
1006/// ```
1007#[macro_export]
1008macro_rules! match_domstring_ascii {
1009    ($input:expr, $($tail:tt)*) => {
1010        {
1011            use $crate::match_domstring_ascii_inner;
1012            use $crate::domstring::EncodedBytes;
1013
1014            let view = $input.view();
1015            let s = view.encoded_bytes();
1016            if matches!(s, EncodedBytes::Latin1Bytes(_)) {
1017                match_domstring_ascii_inner!(EncodedBytes::Latin1Bytes, s, $($tail)*)
1018            } else {
1019                match_domstring_ascii_inner!(EncodedBytes::Utf8Bytes, s, $($tail)*)
1020            }
1021        }
1022    };
1023}
1024
1025#[cfg(test)]
1026mod tests {
1027    use super::*;
1028
1029    const LATIN1_PILLCROW: u8 = 0xB6;
1030    const UTF8_PILLCROW: [u8; 2] = [194, 182];
1031    const LATIN1_POWER2: u8 = 0xB2;
1032
1033    fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1034        DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1035    }
1036
1037    #[test]
1038    fn string_functions() {
1039        let s = DOMString::from("AbBcC❤&%$#");
1040        let s_copy = s.clone();
1041        assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1042        assert_eq!(s, s_copy);
1043        assert_eq!(s.len(), 12);
1044        assert_eq!(s_copy.len(), 12);
1045        assert!(s.starts_with('A'));
1046        let s2 = DOMString::from("");
1047        assert!(s2.is_empty());
1048    }
1049
1050    #[test]
1051    fn string_functions_latin1() {
1052        {
1053            let s = from_latin1(vec![
1054                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1055            ]);
1056            assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1057        }
1058        {
1059            let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1060            assert_eq!(s.to_ascii_lowercase(), "abbcc");
1061        }
1062        {
1063            let s = from_latin1(vec![
1064                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1065            ]);
1066            assert_eq!(s.len(), 11);
1067            assert!(s.starts_with('A'));
1068        }
1069        {
1070            let s = from_latin1(vec![]);
1071            assert!(s.is_empty());
1072        }
1073    }
1074
1075    #[test]
1076    fn test_length() {
1077        let s1 = from_latin1(vec![
1078            0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1079            0xAE, 0xAF,
1080        ]);
1081        let s2 = from_latin1(vec![
1082            0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1083            0xBE, 0xBF,
1084        ]);
1085        let s3 = from_latin1(vec![
1086            0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1087            0xCE, 0xCF,
1088        ]);
1089        let s4 = from_latin1(vec![
1090            0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1091            0xDE, 0xDF,
1092        ]);
1093        let s5 = from_latin1(vec![
1094            0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1095            0xEE, 0xEF,
1096        ]);
1097        let s6 = from_latin1(vec![
1098            0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1099            0xFE, 0xFF,
1100        ]);
1101
1102        let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1103        let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1104        let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1105        let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1106        let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1107        let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1108
1109        assert_eq!(s1.len(), s1_utf8.len());
1110        assert_eq!(s2.len(), s2_utf8.len());
1111        assert_eq!(s3.len(), s3_utf8.len());
1112        assert_eq!(s4.len(), s4_utf8.len());
1113        assert_eq!(s5.len(), s5_utf8.len());
1114        assert_eq!(s6.len(), s6_utf8.len());
1115
1116        s1.make_rust();
1117        s2.make_rust();
1118        s3.make_rust();
1119        s4.make_rust();
1120        s5.make_rust();
1121        s6.make_rust();
1122        assert_eq!(s1.len(), s1_utf8.len());
1123        assert_eq!(s2.len(), s2_utf8.len());
1124        assert_eq!(s3.len(), s3_utf8.len());
1125        assert_eq!(s4.len(), s4_utf8.len());
1126        assert_eq!(s5.len(), s5_utf8.len());
1127        assert_eq!(s6.len(), s6_utf8.len());
1128    }
1129
1130    #[test]
1131    fn test_convert() {
1132        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1133        s.make_rust();
1134        assert_eq!(&*s.str(), "abc%$");
1135    }
1136
1137    #[test]
1138    fn partial_eq() {
1139        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1140        let string = String::from("abc%$");
1141        let s2 = DOMString::from_string(string.clone());
1142        assert_eq!(s, s2);
1143        assert_eq!(s, string);
1144    }
1145
1146    #[test]
1147    fn encoded_bytes() {
1148        let bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1149        let s = from_latin1(bytes.clone());
1150        if let EncodedBytes::Latin1Bytes(s) = s.view().encoded_bytes() {
1151            assert_eq!(s, bytes)
1152        }
1153    }
1154
1155    #[test]
1156    fn testing_stringview() {
1157        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1158
1159        assert_eq!(
1160            s.str().chars().collect::<Vec<char>>(),
1161            vec!['a', 'b', 'c', '%', '$', '²']
1162        );
1163        assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1164    }
1165
1166    // We need to be extra careful here as two strings that have different
1167    // representation need to have the same hash.
1168    // Additionally, the interior mutability is only used for the conversion
1169    // which is forced by Hash. Hence, it is safe to have this interior mutability.
1170    #[test]
1171    fn test_hash() {
1172        use std::hash::{DefaultHasher, Hash, Hasher};
1173        fn hash_value(d: &DOMString) -> u64 {
1174            let mut hasher = DefaultHasher::new();
1175            d.hash(&mut hasher);
1176            hasher.finish()
1177        }
1178
1179        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1180        let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1181        s_converted.make_rust();
1182        let s2 = DOMString::from_string(String::from("abc%$²"));
1183
1184        let hash_s = hash_value(&s);
1185        let hash_s_converted = hash_value(&s_converted);
1186        let hash_s2 = hash_value(&s2);
1187
1188        assert_eq!(hash_s, hash_s2);
1189        assert_eq!(hash_s, hash_s_converted);
1190    }
1191
1192    // Testing match_lazydomstring if it executes the statements in the match correctly
1193    #[test]
1194    fn test_match_executing() {
1195        // executing
1196        {
1197            let s = from_latin1(vec![b'a', b'b', b'c']);
1198            match_domstring_ascii!( s,
1199                "abc" => assert!(true),
1200                "bcd" => assert!(false),
1201                _ =>  (),
1202            );
1203        }
1204
1205        {
1206            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1207            match_domstring_ascii!( s,
1208                "abc/" => assert!(true),
1209                "bcd" => assert!(false),
1210                _ =>  (),
1211            );
1212        }
1213
1214        {
1215            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1216            match_domstring_ascii!( s,
1217                "bcd" => assert!(false),
1218                "abc%$" => assert!(true),
1219                _ => (),
1220            );
1221        }
1222
1223        {
1224            let s = DOMString::from_string(String::from("abcde"));
1225            match_domstring_ascii!( s,
1226                "abc" => assert!(false),
1227                "bcd" => assert!(false),
1228                _ => assert!(true),
1229            );
1230        }
1231        {
1232            let s = DOMString::from_string(String::from("abc%$"));
1233            match_domstring_ascii!( s,
1234                "bcd" => assert!(false),
1235                "abc%$" => assert!(true),
1236                _ =>  (),
1237            );
1238        }
1239        {
1240            let s = from_latin1(vec![b'a', b'b', b'c']);
1241            match_domstring_ascii!( s,
1242                "abcdd" => assert!(false),
1243                "bcd" => assert!(false),
1244                _ => (),
1245            );
1246        }
1247    }
1248
1249    // Testing match_lazydomstring if it evaluates to the correct expression
1250    #[test]
1251    fn test_match_returning_result() {
1252        {
1253            let s = from_latin1(vec![b'a', b'b', b'c']);
1254            let res = match_domstring_ascii!( s,
1255                "abc" => true,
1256                "bcd" => false,
1257                _ => false,
1258            );
1259            assert_eq!(res, true);
1260        }
1261        {
1262            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1263            let res = match_domstring_ascii!( s,
1264                "abc/" => true,
1265                "bcd" => false,
1266                _ => false,
1267            );
1268            assert_eq!(res, true);
1269        }
1270        {
1271            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1272            let res = match_domstring_ascii!( s,
1273                "bcd" => false,
1274                "abc%$" => true,
1275                _ => false,
1276            );
1277            assert_eq!(res, true);
1278        }
1279
1280        {
1281            let s = DOMString::from_string(String::from("abcde"));
1282            let res = match_domstring_ascii!( s,
1283                "abc" => false,
1284                "bcd" => false,
1285                _ => true,
1286            );
1287            assert_eq!(res, true);
1288        }
1289        {
1290            let s = DOMString::from_string(String::from("abc%$"));
1291            let res = match_domstring_ascii!( s,
1292                "bcd" => false,
1293                "abc%$" => true,
1294                _ => false,
1295            );
1296            assert_eq!(res, true);
1297        }
1298        {
1299            let s = from_latin1(vec![b'a', b'b', b'c']);
1300            let res = match_domstring_ascii!( s,
1301                "abcdd" => false,
1302                "bcd" => false,
1303                _ => true,
1304            );
1305            assert_eq!(res, true);
1306        }
1307    }
1308
1309    #[test]
1310    #[should_panic]
1311    fn test_match_panic() {
1312        let s = DOMString::from_string(String::from("abcd"));
1313        let _res = match_domstring_ascii!(s,
1314            "❤" => true,
1315            _ => false,);
1316    }
1317
1318    #[test]
1319    #[should_panic]
1320    fn test_match_panic2() {
1321        let s = DOMString::from_string(String::from("abcd"));
1322        let _res = match_domstring_ascii!(s,
1323            "abc" => false,
1324            "❤" => true,
1325            _ => false,
1326        );
1327    }
1328
1329    #[test]
1330    fn test_strip_whitespace() {
1331        {
1332            let mut s = from_latin1(vec![
1333                b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1334            ]);
1335
1336            s.strip_leading_and_trailing_ascii_whitespace();
1337            s.make_rust();
1338            assert_eq!(&*s.str(), "abc%$²");
1339        }
1340        {
1341            let mut s = DOMString::from_string(String::from("   \n  abc%$ "));
1342
1343            s.strip_leading_and_trailing_ascii_whitespace();
1344            s.make_rust();
1345            assert_eq!(&*s.str(), "abc%$");
1346        }
1347    }
1348
1349    // https://infra.spec.whatwg.org/#ascii-whitespace
1350    #[test]
1351    fn contains_html_space_characters() {
1352        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); // TAB
1353        assert!(s.contains_html_space_characters());
1354        s.make_rust();
1355        assert!(s.contains_html_space_characters());
1356
1357        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); // NEWLINE
1358        assert!(s.contains_html_space_characters());
1359        s.make_rust();
1360        assert!(s.contains_html_space_characters());
1361
1362        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); // FF
1363        assert!(s.contains_html_space_characters());
1364        s.make_rust();
1365        assert!(s.contains_html_space_characters());
1366
1367        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); // Carriage Return
1368        assert!(s.contains_html_space_characters());
1369        s.make_rust();
1370        assert!(s.contains_html_space_characters());
1371
1372        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); // SPACE
1373        assert!(s.contains_html_space_characters());
1374        s.make_rust();
1375        assert!(s.contains_html_space_characters());
1376
1377        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1378        assert!(!s.contains_html_space_characters());
1379        s.make_rust();
1380        assert!(!s.contains_html_space_characters());
1381    }
1382
1383    #[test]
1384    fn atom() {
1385        let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1386        let atom1 = Atom::from(s);
1387        let s2 = DOMString::from_string(String::from("aaa aa"));
1388        let atom2 = Atom::from(s2);
1389        assert_eq!(atom1, atom2);
1390        let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1391        let atom3 = Atom::from(s3);
1392        assert_ne!(atom1, atom3);
1393    }
1394
1395    #[test]
1396    fn namespace() {
1397        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1398        let atom1 = Namespace::from(s);
1399        let s2 = DOMString::from_string(String::from("aaa aa"));
1400        let atom2 = Namespace::from(s2);
1401        assert_eq!(atom1, atom2);
1402        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1403        let atom3 = Namespace::from(s3);
1404        assert_ne!(atom1, atom3);
1405    }
1406
1407    #[test]
1408    fn localname() {
1409        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1410        let atom1 = LocalName::from(s);
1411        let s2 = DOMString::from_string(String::from("aaa aa"));
1412        let atom2 = LocalName::from(s2);
1413        assert_eq!(atom1, atom2);
1414        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1415        let atom3 = LocalName::from(s3);
1416        assert_ne!(atom1, atom3);
1417    }
1418
1419    #[test]
1420    fn is_ascii_lowercase() {
1421        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1422        assert!(!s.is_ascii_lowercase());
1423        let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1424        assert!(!s.is_ascii_lowercase());
1425        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1426        assert!(s.is_ascii_lowercase());
1427        let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1428        assert!(!s.is_ascii_lowercase());
1429        let s = DOMString::from_string(String::from("`aaaz"));
1430        assert!(!s.is_ascii_lowercase());
1431        let s = DOMString::from_string(String::from("aaaz"));
1432        assert!(s.is_ascii_lowercase());
1433    }
1434
1435    #[test]
1436    fn test_as_bytes() {
1437        const ASCII_SMALL_A: u8 = b'a';
1438        const ASCII_SMALL_Z: u8 = b'z';
1439
1440        let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1441        let s = from_latin1(v1.clone());
1442        assert_eq!(
1443            *s.as_bytes(),
1444            [
1445                ASCII_SMALL_A,
1446                ASCII_SMALL_A,
1447                ASCII_SMALL_A,
1448                UTF8_PILLCROW[0],
1449                UTF8_PILLCROW[1],
1450                ASCII_SMALL_A,
1451                ASCII_SMALL_A
1452            ]
1453        );
1454
1455        let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1456        let s = from_latin1(v2.clone());
1457        assert_eq!(
1458            *s.as_bytes(),
1459            [
1460                ASCII_SMALL_A,
1461                ASCII_SMALL_A,
1462                ASCII_SMALL_A,
1463                ASCII_SMALL_A,
1464                ASCII_SMALL_Z
1465            ]
1466        );
1467
1468        let str = "abc%$²".to_owned();
1469        let s = DOMString::from(str.clone());
1470        assert_eq!(&*s.as_bytes(), str.as_bytes());
1471        let str = "AbBcC❤&%$#".to_owned();
1472        let s = DOMString::from(str.clone());
1473        assert_eq!(&*s.as_bytes(), str.as_bytes());
1474    }
1475}