Skip to main content

script_bindings/
domstring.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::MutableHandleValue;
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27use zeroize::Zeroize;
28
29use crate::script_runtime::JSContext as SafeJSContext;
30use crate::trace::RootedTraceableBox;
31
32const ASCII_END: u8 = 0x7E;
33const ASCII_CAPITAL_A: u8 = 0x41;
34const ASCII_CAPITAL_Z: u8 = 0x5A;
35const ASCII_LOWERCASE_A: u8 = 0x61;
36const ASCII_LOWERCASE_Z: u8 = 0x7A;
37const ASCII_TAB: u8 = 0x09;
38const ASCII_NEWLINE: u8 = 0x0A;
39const ASCII_FORMFEED: u8 = 0x0C;
40const ASCII_CR: u8 = 0x0D;
41const ASCII_SPACE: u8 = 0x20;
42
43/// Gets the latin1 bytes from the js engine.
44/// Safety: Make sure the *mut JSString is not null.
45unsafe fn get_latin1_string_bytes(
46    rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
47) -> &[u8] {
48    debug_assert!(!rooted_traceable_box.get().is_null());
49    let mut length = 0;
50    unsafe {
51        let chars = JS_GetLatin1StringCharsAndLength(
52            Runtime::get().expect("JS runtime has shut down").as_ptr(),
53            ptr::null(),
54            rooted_traceable_box.get(),
55            &mut length,
56        );
57        assert!(!chars.is_null());
58        slice::from_raw_parts(chars, length)
59    }
60}
61
62/// A type representing the underlying encoded bytes of a [`DOMString`].
63#[derive(Debug)]
64pub enum EncodedBytes<'a> {
65    /// These bytes are Latin1 encoded.
66    Latin1(Ref<'a, [u8]>),
67    /// These bytes are UTF-8 encoded.
68    Utf8(Ref<'a, [u8]>),
69}
70
71impl EncodedBytes<'_> {
72    /// Return a reference to the raw bytes of this [`EncodedBytes`] without any information about
73    /// the underlying encoding.
74    pub fn bytes(&self) -> &[u8] {
75        match self {
76            Self::Latin1(bytes) => bytes,
77            Self::Utf8(bytes) => bytes,
78        }
79    }
80
81    pub fn len(&self) -> usize {
82        match self {
83            Self::Latin1(bytes) => bytes
84                .iter()
85                .map(|b| if *b <= ASCII_END { 1 } else { 2 })
86                .sum(),
87            Self::Utf8(bytes) => bytes.len(),
88        }
89    }
90
91    /// Return whether or not there is any data in this collection of bytes.
92    pub fn is_empty(&self) -> bool {
93        self.bytes().is_empty()
94    }
95}
96
97enum DOMStringType {
98    /// A simple rust string
99    Rust(String),
100    /// A JS String stored in mozjs.
101    JSString(RootedTraceableBox<Heap<*mut JSString>>),
102    #[cfg(test)]
103    /// This is used for testing of the bindings to give
104    /// a raw u8 Latin1 encoded string without having a js engine.
105    Latin1Vec(Vec<u8>),
106}
107
108impl Default for DOMStringType {
109    fn default() -> Self {
110        Self::Rust(Default::default())
111    }
112}
113
114impl Zeroize for DOMStringType {
115    fn zeroize(&mut self) {
116        self.ensure_rust_string().zeroize()
117    }
118}
119
120impl DOMStringType {
121    /// Warning:
122    /// This function does not checking and just returns the raw bytes of the string,
123    /// independently if they are  utf8 or latin1.
124    /// The caller needs to take care that these make sense in context.
125    fn as_raw_bytes(&self) -> &[u8] {
126        match self {
127            DOMStringType::Rust(s) => s.as_bytes(),
128            DOMStringType::JSString(rooted_traceable_box) => unsafe {
129                get_latin1_string_bytes(rooted_traceable_box)
130            },
131            #[cfg(test)]
132            DOMStringType::Latin1Vec(items) => items,
133        }
134    }
135
136    fn ensure_rust_string(&mut self) -> &mut String {
137        let new_string = match self {
138            DOMStringType::Rust(string) => return string,
139            DOMStringType::JSString(rooted_traceable_box) => unsafe {
140                jsstr_to_string(
141                    Runtime::get().expect("JS runtime has shut down").as_ptr(),
142                    NonNull::new(rooted_traceable_box.get()).unwrap(),
143                )
144            },
145            #[cfg(test)]
146            DOMStringType::Latin1Vec(items) => {
147                let mut v = vec![0; items.len() * 2];
148                let real_size =
149                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
150                v.truncate(real_size);
151
152                // Safety: convert_latin1_to_utf8 converts the raw bytes to utf8 and the
153                // buffer is the size specified in the documentation, so this should be safe.
154                unsafe { String::from_utf8_unchecked(v) }
155            },
156        };
157        *self = DOMStringType::Rust(new_string);
158        self.ensure_rust_string()
159    }
160}
161
162/// A reference to a Rust `str` of UTF-8 encoded bytes, used to get a Rust
163/// string from a [`DOMString`].
164#[derive(Debug)]
165pub struct StringView<'a>(Ref<'a, str>);
166
167impl StringView<'_> {
168    pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
169        self.split(HTML_SPACE_CHARACTERS)
170            .filter(|string| !string.is_empty())
171    }
172}
173
174impl From<StringView<'_>> for String {
175    fn from(string_view: StringView<'_>) -> Self {
176        string_view.0.to_string()
177    }
178}
179
180impl Deref for StringView<'_> {
181    type Target = str;
182    fn deref(&self) -> &str {
183        &(self.0)
184    }
185}
186
187impl AsRef<str> for StringView<'_> {
188    fn as_ref(&self) -> &str {
189        &(self.0)
190    }
191}
192
193impl PartialEq for StringView<'_> {
194    fn eq(&self, other: &Self) -> bool {
195        self.0.eq(&*(other.0))
196    }
197}
198
199impl PartialEq<&str> for StringView<'_> {
200    fn eq(&self, other: &&str) -> bool {
201        self.0.eq(*other)
202    }
203}
204
205impl Eq for StringView<'_> {}
206
207impl PartialOrd for StringView<'_> {
208    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
209        self.0.partial_cmp(&**other)
210    }
211}
212
213impl Ord for StringView<'_> {
214    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
215        self.0.cmp(other)
216    }
217}
218
219/// Safety comment:
220///
221/// This method will _not_ trace the pointer if the rust string exists.
222/// The js string could be garbage collected and, hence, violating this
223/// could lead to undefined behavior
224unsafe impl Trace for DOMStringType {
225    unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
226        unsafe {
227            match self {
228                DOMStringType::Rust(_s) => {},
229                DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
230                #[cfg(test)]
231                DOMStringType::Latin1Vec(_s) => {},
232            }
233        }
234    }
235}
236
237impl malloc_size_of::MallocSizeOf for DOMStringType {
238    fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
239        match self {
240            DOMStringType::Rust(s) => s.size_of(ops),
241            DOMStringType::JSString(_rooted_traceable_box) => {
242                // Managed by JS Engine
243                0
244            },
245            #[cfg(test)]
246            DOMStringType::Latin1Vec(s) => s.size_of(ops),
247        }
248    }
249}
250
251impl std::fmt::Debug for DOMStringType {
252    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
253        match self {
254            DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
255            DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
256            #[cfg(test)]
257            DOMStringType::Latin1Vec(s) => f
258                .debug_struct("DOMString")
259                .field("latin1_string", s)
260                .finish(),
261        }
262    }
263}
264
265////// A DOMString.
266///
267/// This type corresponds to the [`DOMString`] type in WebIDL.
268///
269/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
270///
271/// Conceptually, a DOMString has the same value space as a JavaScript String,
272/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
273/// unpaired surrogates present (also sometimes called WTF-16).
274///
275/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
276/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
277/// can not be represented as a Rust `String`). This introduces the question of
278/// what to do with values being passed from JavaScript to Rust that contain
279/// unpaired surrogates.
280///
281/// The hypothesis is that it does not matter much how exactly those values are
282/// transformed, because  passing unpaired surrogates into the DOM is very rare.
283/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
284/// character.
285///
286/// Currently, the lack of crash reports about this issue provides some
287/// evidence to support the hypothesis. This evidence will hopefully be used to
288/// convince other browser vendors that it would be safe to replace unpaired
289/// surrogates at the boundary between JavaScript and native code. (This would
290/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
291/// and in Servo.)
292///
293/// This string class will keep either the Reference to the mozjs object alive
294/// or will have an internal rust string.
295/// We currently default to doing most of the string operation on the rust side.
296/// You should use `str()` to get the Rust string (represented by a `StringView`
297/// which you can deref to a `&str`). You should assume that this conversion is
298/// expensive. For now, you should assume that all the functions incur this
299/// conversion cost.
300#[repr(transparent)]
301#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
302pub struct DOMString(RefCell<DOMStringType>);
303
304impl Clone for DOMString {
305    fn clone(&self) -> Self {
306        self.ensure_rust_string().clone().into()
307    }
308}
309
310pub enum DOMStringErrorType {
311    JSConversionError,
312}
313
314impl DOMString {
315    /// Creates a new `DOMString`.
316    pub fn new() -> DOMString {
317        Default::default()
318    }
319
320    /// Creates the string from js. If the string can be encoded in latin1, just take the reference
321    /// to the JSString. Otherwise do the conversion to utf8 now.
322    pub fn from_js_string(
323        cx: SafeJSContext,
324        value: js::gc::HandleValue,
325    ) -> Result<DOMString, DOMStringErrorType> {
326        let string_ptr = unsafe { js::rust::ToString(*cx, value) };
327        if string_ptr.is_null() {
328            debug!("ToString failed");
329            Err(DOMStringErrorType::JSConversionError)
330        } else {
331            let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
332            let inner = if latin1 {
333                let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
334                DOMStringType::JSString(h)
335            } else {
336                // We need to convert the string anyway as it is not just latin1
337                DOMStringType::Rust(unsafe {
338                    jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
339                })
340            };
341            Ok(DOMString(RefCell::new(inner)))
342        }
343    }
344
345    /// Transforms the internal storage of this [`DOMString`] into a Rust string if it is not
346    /// yet one. This will make a copy of the underlying string data.
347    fn ensure_rust_string(&self) -> RefMut<'_, String> {
348        let inner = self.0.borrow_mut();
349        RefMut::map(inner, |inner| inner.ensure_rust_string())
350    }
351
352    /// Debug the current  state of the string without modifying it.
353    #[expect(unused)]
354    fn debug_js(&self) {
355        match *self.0.borrow() {
356            DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
357            DOMStringType::JSString(ref rooted_traceable_box) => {
358                let s = unsafe {
359                    jsstr_to_string(
360                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
361                        ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
362                    )
363                };
364                info!("JSString ({})", s);
365            },
366            #[cfg(test)]
367            DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
368        }
369    }
370
371    /// Returns the underlying rust string.
372    pub fn str(&self) -> StringView<'_> {
373        {
374            let inner = self.0.borrow();
375            if matches!(&*inner, DOMStringType::Rust(..)) {
376                return StringView(Ref::map(inner, |inner| match inner {
377                    DOMStringType::Rust(string) => string.as_str(),
378                    _ => unreachable!("Guaranteed by condition above"),
379                }));
380            }
381        }
382
383        self.ensure_rust_string();
384        self.str()
385    }
386
387    /// Return the [`EncodedBytes`] of this [`DOMString`]. This returns the original encoded
388    /// bytes of the string without doing any conversions.
389    pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
390        let inner = self.0.borrow();
391        match &*inner {
392            DOMStringType::Rust(..) => {
393                EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
394            },
395            _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
396        }
397    }
398
399    pub fn clear(&mut self) {
400        let mut inner = self.0.borrow_mut();
401        let DOMStringType::Rust(string) = &mut *inner else {
402            *inner = DOMStringType::Rust(String::new());
403            return;
404        };
405        string.clear();
406    }
407
408    pub fn is_empty(&self) -> bool {
409        self.encoded_bytes().is_empty()
410    }
411
412    /// The length of this string in UTF-8 code units, each one being one byte in size.
413    ///
414    /// Note: This is different than the number of Unicode characters (or code points). A
415    /// character may require multiple UTF-8 code units.
416    pub fn len(&self) -> usize {
417        self.encoded_bytes().len()
418    }
419
420    /// The length of this string in UTF-8 code units, each one being one byte in size.
421    /// This method is the same as [`DOMString::len`], but the result is wrapped in a
422    /// `Utf8CodeUnitLength` to be used in code that mixes different kinds of offsets.
423    ///
424    /// Note: This is different than the number of Unicode characters (or code points). A
425    /// character may require multiple UTF-8 code units.
426    pub fn len_utf8(&self) -> Utf8CodeUnitLength {
427        Utf8CodeUnitLength(self.len())
428    }
429
430    /// The length of this string in UTF-16 code units, each one being one two bytes in size.
431    ///
432    /// Note: This is different than the number of Unicode characters (or code points). A
433    /// character may require multiple UTF-16 code units.
434    pub fn len_utf16(&self) -> Utf16CodeUnitLength {
435        Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
436    }
437
438    pub fn make_ascii_lowercase(&mut self) {
439        self.0
440            .borrow_mut()
441            .ensure_rust_string()
442            .make_ascii_lowercase();
443    }
444
445    pub fn push_str(&mut self, string_to_push: &str) {
446        self.0
447            .borrow_mut()
448            .ensure_rust_string()
449            .push_str(string_to_push);
450    }
451
452    /// <https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace>
453    pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
454        if self.is_empty() {
455            return;
456        }
457
458        let mut inner = self.0.borrow_mut();
459        let string = inner.ensure_rust_string();
460        let trailing_whitespace_len = string
461            .trim_end_matches(|character: char| character.is_ascii_whitespace())
462            .len();
463        string.truncate(trailing_whitespace_len);
464        if string.is_empty() {
465            return;
466        }
467
468        let first_non_whitespace = string
469            .find(|character: char| !character.is_ascii_whitespace())
470            .unwrap();
471        string.replace_range(0..first_non_whitespace, "");
472    }
473
474    /// <https://html.spec.whatwg.org/multipage/#valid-floating-point-number>
475    pub fn is_valid_floating_point_number_string(&self) -> bool {
476        static RE: LazyLock<Regex> = LazyLock::new(|| {
477            Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
478        });
479
480        RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
481            self.parse_floating_point_number().is_some()
482    }
483
484    pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
485        self.str().parse::<T>()
486    }
487
488    /// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
489    pub fn parse_floating_point_number(&self) -> Option<f64> {
490        parse_floating_point_number(&self.str())
491    }
492
493    /// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
494    pub fn set_best_representation_of_the_floating_point_number(&mut self) {
495        if let Some(val) = self.parse_floating_point_number() {
496            // [tc39] Step 2: If x is either +0 or -0, return "0".
497            let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
498
499            *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
500        }
501    }
502
503    pub fn to_lowercase(&self) -> String {
504        self.str().to_lowercase()
505    }
506
507    pub fn to_uppercase(&self) -> String {
508        self.str().to_uppercase()
509    }
510
511    pub fn strip_newlines(&mut self) {
512        // > To strip newlines from a string, remove any U+000A LF and U+000D CR code
513        // > points from the string.
514        self.0
515            .borrow_mut()
516            .ensure_rust_string()
517            .retain(|character| character != '\r' && character != '\n');
518    }
519
520    /// Normalize newlines according to <https://infra.spec.whatwg.org/#normalize-newlines>.
521    pub fn normalize_newlines(&mut self) {
522        // > To normalize newlines in a string, replace every U+000D CR U+000A LF code point
523        // > pair with a single U+000A LF code point, and then replace every remaining
524        // > U+000D CR code point with a U+000A LF code point.
525        let mut inner = self.0.borrow_mut();
526        let string = inner.ensure_rust_string();
527        *string = string.replace("\r\n", "\n").replace("\r", "\n")
528    }
529
530    pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
531        let new_string = self.str().to_owned();
532        DOMString(RefCell::new(DOMStringType::Rust(
533            new_string.replace(needle, replace_char),
534        )))
535    }
536
537    /// Pattern is not yet stable in rust, hence, we need different methods for str and char
538    pub fn starts_with(&self, c: char) -> bool {
539        if !c.is_ascii() {
540            self.str().starts_with(c)
541        } else {
542            // As this is an ASCII character, it is guaranteed to be a single byte, no matter if the
543            // underlying encoding is UTF-8 or Latin1.
544            self.encoded_bytes().bytes().starts_with(&[c as u8])
545        }
546    }
547
548    pub fn starts_with_str(&self, needle: &str) -> bool {
549        self.str().starts_with(needle)
550    }
551
552    pub fn ends_with_str(&self, needle: &str) -> bool {
553        self.str().ends_with(needle)
554    }
555
556    pub fn contains(&self, needle: &str) -> bool {
557        self.str().contains(needle)
558    }
559
560    pub fn to_ascii_lowercase(&self) -> String {
561        let conversion = match self.encoded_bytes() {
562            EncodedBytes::Latin1(bytes) => {
563                if bytes.iter().all(|c| *c <= ASCII_END) {
564                    // We are just simple ascii
565                    Some(unsafe {
566                        String::from_utf8_unchecked(
567                            bytes
568                                .iter()
569                                .map(|c| {
570                                    if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
571                                        c + 32
572                                    } else {
573                                        *c
574                                    }
575                                })
576                                .collect(),
577                        )
578                    })
579                } else {
580                    None
581                }
582            },
583            EncodedBytes::Utf8(bytes) => unsafe {
584                // Save because we know it was a utf8 string
585                Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
586            },
587        };
588        // We otherwise would double borrow the refcell
589        if let Some(conversion) = conversion {
590            conversion
591        } else {
592            self.str().to_ascii_lowercase()
593        }
594    }
595
596    fn contains_space_characters(
597        &self,
598        latin1_characters: &'static [u8],
599        utf8_characters: &'static [char],
600    ) -> bool {
601        match self.encoded_bytes() {
602            EncodedBytes::Latin1(items) => {
603                latin1_characters.iter().any(|byte| items.contains(byte))
604            },
605            EncodedBytes::Utf8(bytes) => {
606                // Save because we know it was a utf8 string
607                let s = unsafe { str::from_utf8_unchecked(&bytes) };
608                s.contains(utf8_characters)
609            },
610        }
611    }
612
613    /// <https://infra.spec.whatwg.org/#ascii-tab-or-newline>
614    pub fn contains_tab_or_newline(&self) -> bool {
615        const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
616        const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
617
618        self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
619    }
620
621    /// <https://infra.spec.whatwg.org/#ascii-whitespace>
622    pub fn contains_html_space_characters(&self) -> bool {
623        const SPACE_BYTES: [u8; 5] = [
624            ASCII_TAB,
625            ASCII_NEWLINE,
626            ASCII_FORMFEED,
627            ASCII_CR,
628            ASCII_SPACE,
629        ];
630        self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
631    }
632
633    /// This returns the string in utf8 bytes, i.e., `[u8]` encoded with utf8.
634    pub fn as_bytes(&self) -> BytesView<'_> {
635        // BytesView will just give the raw bytes on dereference.
636        // If we are ascii this is the same for latin1 and utf8.
637        // Otherwise we convert to rust.
638        if self.is_ascii() {
639            BytesView(self.0.borrow())
640        } else {
641            self.ensure_rust_string();
642            BytesView(self.0.borrow())
643        }
644    }
645
646    /// Tests if there are only ascii lowercase characters. Does not include special characters.
647    pub fn is_ascii_lowercase(&self) -> bool {
648        match self.encoded_bytes() {
649            EncodedBytes::Latin1(items) => items
650                .iter()
651                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
652            EncodedBytes::Utf8(s) => s
653                .iter()
654                .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
655                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
656        }
657    }
658
659    /// Is the string only ascii characters
660    pub fn is_ascii(&self) -> bool {
661        self.encoded_bytes().bytes().is_ascii()
662    }
663
664    /// Returns true if the slice only contains bytes that are safe to use in cookie strings.
665    /// <https://www.ietf.org/archive/id/draft-ietf-httpbis-rfc6265bis-15.html#section-5.6-6>
666    /// Not using ServoCookie::is_valid_name_or_value to prevent dependency on the net crate.
667    pub fn is_valid_for_cookie(&self) -> bool {
668        match self.encoded_bytes() {
669            EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
670                .iter()
671                .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
672        }
673    }
674
675    /// Call the callback with a `&str` reference of the string stored in this [`DOMString`]. Note
676    /// that if the [`DOMString`] cannot be interpreted as a Rust string a conversion will be done.
677    fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
678        match self.encoded_bytes() {
679            // If the Latin1 string is all ASCII bytes, then it is safe to interpret it as UTF-8.
680            EncodedBytes::Latin1(latin1_bytes) => {
681                if latin1_bytes.iter().all(|character| character.is_ascii()) {
682                    // SAFETY: All characters are ASCII, so it is safe to interpret this string as
683                    // UTF-8.
684                    return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
685                }
686            },
687            EncodedBytes::Utf8(utf8_bytes) => {
688                // SAFETY: These are the bytes of a UTF-8 string already, so they can be interpreted
689                // as UTF-8.
690                return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
691            },
692        };
693        callback(self.str().deref())
694    }
695
696    /// Newline replacement routine as described in step 1 of the multipart/form-data
697    /// encoding algorithm and many steps of application/x-www-form-urlencoded.
698    /// e.g. <https://html.spec.whatwg.org/multipage/#convert-to-a-list-of-name-value-pairs>
699    ///
700    /// Replace every occurrence of U+000D (CR) not followed by U+000A (LF),
701    /// and every occurrence of U+000A (LF) not preceded by U+000D (CR), in entry's name,
702    /// by a string consisting of a U+000D (CR) and U+000A (LF).
703    pub fn normalize_crlf(&self) -> String {
704        let s = self.str();
705        let mut buf = String::new();
706        let mut prev = ' ';
707        for ch in s.chars() {
708            match ch {
709                '\n' if prev != '\r' => {
710                    buf.push('\r');
711                    buf.push('\n');
712                },
713                '\n' => {
714                    buf.push('\n');
715                },
716                // This character isn't LF but is
717                // preceded by CR
718                _ if prev == '\r' => {
719                    buf.push('\n');
720                    buf.push(ch);
721                },
722                _ => buf.push(ch),
723            };
724            prev = ch;
725        }
726        // In case the last character was CR
727        if prev == '\r' {
728            buf.push('\n');
729        }
730        buf
731    }
732}
733
734/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
735pub fn parse_floating_point_number(input: &str) -> Option<f64> {
736    // Steps 15-16 are telling us things about IEEE rounding modes
737    // for floating-point significands; this code assumes the Rust
738    // compiler already matches them in any cases where
739    // that actually matters. They are not
740    // related to f64::round(), which is for rounding to integers.
741    input.trim().parse::<f64>().ok().filter(|value| {
742        // A valid number is the same as what rust considers to be valid,
743        // except for +1., NaN, and Infinity.
744        !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
745    })
746}
747
748pub struct BytesView<'a>(Ref<'a, DOMStringType>);
749
750impl Deref for BytesView<'_> {
751    type Target = [u8];
752
753    fn deref(&self) -> &Self::Target {
754        // This does the correct thing by the construction of BytesView in `DOMString::as_bytes`.
755        self.0.as_raw_bytes()
756    }
757}
758
759impl Ord for DOMString {
760    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
761        self.str().cmp(&other.str())
762    }
763}
764
765impl PartialOrd for DOMString {
766    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
767        self.str().partial_cmp(&other.str())
768    }
769}
770
771impl Extend<char> for DOMString {
772    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
773        self.0.borrow_mut().ensure_rust_string().extend(iter)
774    }
775}
776
777impl ToJSValConvertible for DOMString {
778    unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
779        let val = self.0.borrow();
780        match *val {
781            DOMStringType::Rust(ref s) => unsafe {
782                s.to_jsval(cx, rval);
783            },
784            DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
785                rval.set(StringValue(&*rooted_traceable_box.get()));
786            },
787            #[cfg(test)]
788            DOMStringType::Latin1Vec(ref items) => {
789                let mut v = vec![0; items.len() * 2];
790                let real_size =
791                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
792                v.truncate(real_size);
793
794                String::from_utf8(v)
795                    .expect("Error in constructin test string")
796                    .to_jsval(cx, rval);
797            },
798        };
799    }
800}
801
802impl std::hash::Hash for DOMString {
803    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
804        self.str().hash(state);
805    }
806}
807
808impl std::fmt::Display for DOMString {
809    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
810        fmt::Display::fmt(self.str().deref(), f)
811    }
812}
813
814impl std::cmp::PartialEq<str> for DOMString {
815    fn eq(&self, other: &str) -> bool {
816        if other.is_ascii() {
817            *other.as_bytes() == *self.encoded_bytes().bytes()
818        } else {
819            self.str().deref() == other
820        }
821    }
822}
823
824impl std::cmp::PartialEq<&str> for DOMString {
825    fn eq(&self, other: &&str) -> bool {
826        self.eq(*other)
827    }
828}
829
830impl std::cmp::PartialEq<String> for DOMString {
831    fn eq(&self, other: &String) -> bool {
832        self.eq(other.as_str())
833    }
834}
835
836impl std::cmp::PartialEq<DOMString> for String {
837    fn eq(&self, other: &DOMString) -> bool {
838        other.eq(self)
839    }
840}
841
842impl std::cmp::PartialEq<DOMString> for str {
843    fn eq(&self, other: &DOMString) -> bool {
844        other.eq(self)
845    }
846}
847
848impl std::cmp::PartialEq for DOMString {
849    fn eq(&self, other: &DOMString) -> bool {
850        let result = match (self.encoded_bytes(), other.encoded_bytes()) {
851            (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
852                Some(*bytes == *other_bytes)
853            },
854            (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
855                if other_bytes.is_ascii() =>
856            {
857                Some(*bytes == *other_bytes)
858            },
859            (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
860                Some(*bytes == *other_bytes)
861            },
862            (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
863                Some(*bytes == *other_bytes)
864            },
865            _ => None,
866        };
867
868        if let Some(eq_result) = result {
869            return eq_result;
870        }
871
872        *self.str() == *other.str()
873    }
874}
875
876impl std::cmp::Eq for DOMString {}
877
878impl From<std::string::String> for DOMString {
879    fn from(string: String) -> Self {
880        DOMString(RefCell::new(DOMStringType::Rust(string)))
881    }
882}
883
884impl From<&str> for DOMString {
885    fn from(string: &str) -> Self {
886        String::from(string).into()
887    }
888}
889
890impl From<DOMString> for LocalName {
891    fn from(dom_string: DOMString) -> LocalName {
892        dom_string.with_str_reference(|string| LocalName::from(string))
893    }
894}
895
896impl From<&DOMString> for LocalName {
897    fn from(dom_string: &DOMString) -> LocalName {
898        dom_string.with_str_reference(|string| LocalName::from(string))
899    }
900}
901
902impl From<DOMString> for Namespace {
903    fn from(dom_string: DOMString) -> Namespace {
904        dom_string.with_str_reference(|string| Namespace::from(string))
905    }
906}
907
908impl From<DOMString> for Atom {
909    fn from(dom_string: DOMString) -> Atom {
910        dom_string.with_str_reference(|string| Atom::from(string))
911    }
912}
913
914impl From<DOMString> for String {
915    fn from(val: DOMString) -> Self {
916        val.str().to_owned()
917    }
918}
919
920impl From<DOMString> for Vec<u8> {
921    fn from(value: DOMString) -> Self {
922        value.str().as_bytes().to_vec()
923    }
924}
925
926impl From<Cow<'_, str>> for DOMString {
927    fn from(value: Cow<'_, str>) -> Self {
928        DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
929    }
930}
931
932impl Zeroize for DOMString {
933    fn zeroize(&mut self) {
934        self.0.borrow_mut().zeroize()
935    }
936}
937
938#[macro_export]
939macro_rules! match_domstring_ascii_inner {
940    ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
941        if {
942            debug_assert!(($ascii_literal).is_ascii());
943            $ascii_literal.as_bytes()
944        } == $input.bytes() {
945          $then
946        } else {
947            $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
948        }
949
950    };
951    ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
952        match $input {
953            $p => $then
954        }
955    }
956}
957
958/// Use this to match &str against lazydomstring efficiently.
959/// You are only allowed to match ascii strings otherwise this macro will
960/// lead to wrong results.
961/// ```ignore
962/// let s = DOMString::from("test");
963/// let value = match_domstring!(s,
964/// "test1" => 1,
965/// "test2" => 2,
966/// "test" => 3,
967/// _ => 4,
968/// );
969/// assert_eq!(value, 3);
970/// ```
971///
972/// The `RefCell` inside `DOMString` is borrowed for the duration of the `match`,
973/// so the string cannot be accessed again inside a `match` arm.
974#[macro_export]
975macro_rules! match_domstring_ascii {
976    ($input:expr, $($tail:tt)*) => {
977        {
978            use $crate::domstring::EncodedBytes;
979
980            let encoded_bytes = $input.encoded_bytes();
981            match encoded_bytes {
982                EncodedBytes::Latin1(_) => {
983                    $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
984                }
985                EncodedBytes::Utf8(_) => {
986                    $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
987                }
988
989            }
990        }
991    };
992}
993
994#[cfg(test)]
995mod tests {
996    use super::*;
997
998    const LATIN1_PILLCROW: u8 = 0xB6;
999    const UTF8_PILLCROW: [u8; 2] = [194, 182];
1000    const LATIN1_POWER2: u8 = 0xB2;
1001
1002    fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1003        DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1004    }
1005
1006    #[test]
1007    fn string_functions() {
1008        let s = DOMString::from("AbBcC❤&%$#");
1009        let s_copy = s.clone();
1010        assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1011        assert_eq!(s, s_copy);
1012        assert_eq!(s.len(), 12);
1013        assert_eq!(s_copy.len(), 12);
1014        assert!(s.starts_with('A'));
1015        let s2 = DOMString::from("");
1016        assert!(s2.is_empty());
1017    }
1018
1019    #[test]
1020    fn string_functions_latin1() {
1021        {
1022            let s = from_latin1(vec![
1023                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1024            ]);
1025            assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1026        }
1027        {
1028            let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1029            assert_eq!(s.to_ascii_lowercase(), "abbcc");
1030        }
1031        {
1032            let s = from_latin1(vec![
1033                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1034            ]);
1035            assert_eq!(s.len(), 11);
1036            assert!(s.starts_with('A'));
1037        }
1038        {
1039            let s = from_latin1(vec![]);
1040            assert!(s.is_empty());
1041        }
1042    }
1043
1044    #[test]
1045    fn test_length() {
1046        let s1 = from_latin1(vec![
1047            0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1048            0xAE, 0xAF,
1049        ]);
1050        let s2 = from_latin1(vec![
1051            0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1052            0xBE, 0xBF,
1053        ]);
1054        let s3 = from_latin1(vec![
1055            0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1056            0xCE, 0xCF,
1057        ]);
1058        let s4 = from_latin1(vec![
1059            0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1060            0xDE, 0xDF,
1061        ]);
1062        let s5 = from_latin1(vec![
1063            0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1064            0xEE, 0xEF,
1065        ]);
1066        let s6 = from_latin1(vec![
1067            0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1068            0xFE, 0xFF,
1069        ]);
1070
1071        let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1072        let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1073        let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1074        let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1075        let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1076        let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1077
1078        assert_eq!(s1.len(), s1_utf8.len());
1079        assert_eq!(s2.len(), s2_utf8.len());
1080        assert_eq!(s3.len(), s3_utf8.len());
1081        assert_eq!(s4.len(), s4_utf8.len());
1082        assert_eq!(s5.len(), s5_utf8.len());
1083        assert_eq!(s6.len(), s6_utf8.len());
1084
1085        s1.ensure_rust_string();
1086        s2.ensure_rust_string();
1087        s3.ensure_rust_string();
1088        s4.ensure_rust_string();
1089        s5.ensure_rust_string();
1090        s6.ensure_rust_string();
1091        assert_eq!(s1.len(), s1_utf8.len());
1092        assert_eq!(s2.len(), s2_utf8.len());
1093        assert_eq!(s3.len(), s3_utf8.len());
1094        assert_eq!(s4.len(), s4_utf8.len());
1095        assert_eq!(s5.len(), s5_utf8.len());
1096        assert_eq!(s6.len(), s6_utf8.len());
1097    }
1098
1099    #[test]
1100    fn test_convert() {
1101        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1102        s.ensure_rust_string();
1103        assert_eq!(&*s.str(), "abc%$");
1104    }
1105
1106    #[test]
1107    fn partial_eq() {
1108        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1109        let string = String::from("abc%$");
1110        let s2 = DOMString::from(string.clone());
1111        assert_eq!(s, s2);
1112        assert_eq!(s, string);
1113    }
1114
1115    #[test]
1116    fn encoded_latin1_bytes() {
1117        let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1118        let dom_string = from_latin1(original_latin1_bytes.clone());
1119        let string_latin1_bytes = match dom_string.encoded_bytes() {
1120            EncodedBytes::Latin1(bytes) => bytes,
1121            _ => unreachable!("Expected Latin1 encoded bytes"),
1122        };
1123        assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1124    }
1125
1126    #[test]
1127    fn testing_stringview() {
1128        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1129
1130        assert_eq!(
1131            s.str().chars().collect::<Vec<char>>(),
1132            vec!['a', 'b', 'c', '%', '$', '²']
1133        );
1134        assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1135    }
1136
1137    // We need to be extra careful here as two strings that have different
1138    // representation need to have the same hash.
1139    // Additionally, the interior mutability is only used for the conversion
1140    // which is forced by Hash. Hence, it is safe to have this interior mutability.
1141    #[test]
1142    fn test_hash() {
1143        use std::hash::{DefaultHasher, Hash, Hasher};
1144        fn hash_value(d: &DOMString) -> u64 {
1145            let mut hasher = DefaultHasher::new();
1146            d.hash(&mut hasher);
1147            hasher.finish()
1148        }
1149
1150        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1151        let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1152        s_converted.ensure_rust_string();
1153        let s2 = DOMString::from("abc%$²");
1154
1155        let hash_s = hash_value(&s);
1156        let hash_s_converted = hash_value(&s_converted);
1157        let hash_s2 = hash_value(&s2);
1158
1159        assert_eq!(hash_s, hash_s2);
1160        assert_eq!(hash_s, hash_s_converted);
1161    }
1162
1163    // Testing match_lazydomstring if it executes the statements in the match correctly
1164    #[test]
1165    fn test_match_executing() {
1166        // executing
1167        {
1168            let s = from_latin1(vec![b'a', b'b', b'c']);
1169            match_domstring_ascii!( s,
1170                "abc" => assert!(true),
1171                "bcd" => assert!(false),
1172                _ =>  (),
1173            );
1174        }
1175
1176        {
1177            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1178            match_domstring_ascii!( s,
1179                "abc/" => assert!(true),
1180                "bcd" => assert!(false),
1181                _ =>  (),
1182            );
1183        }
1184
1185        {
1186            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1187            match_domstring_ascii!( s,
1188                "bcd" => assert!(false),
1189                "abc%$" => assert!(true),
1190                _ => (),
1191            );
1192        }
1193
1194        {
1195            let s = DOMString::from("abcde");
1196            match_domstring_ascii!( s,
1197                "abc" => assert!(false),
1198                "bcd" => assert!(false),
1199                _ => assert!(true),
1200            );
1201        }
1202        {
1203            let s = DOMString::from("abc%$");
1204            match_domstring_ascii!( s,
1205                "bcd" => assert!(false),
1206                "abc%$" => assert!(true),
1207                _ =>  (),
1208            );
1209        }
1210        {
1211            let s = from_latin1(vec![b'a', b'b', b'c']);
1212            match_domstring_ascii!( s,
1213                "abcdd" => assert!(false),
1214                "bcd" => assert!(false),
1215                _ => (),
1216            );
1217        }
1218    }
1219
1220    // Testing match_lazydomstring if it evaluates to the correct expression
1221    #[test]
1222    fn test_match_returning_result() {
1223        {
1224            let s = from_latin1(vec![b'a', b'b', b'c']);
1225            let res = match_domstring_ascii!( s,
1226                "abc" => true,
1227                "bcd" => false,
1228                _ => false,
1229            );
1230            assert_eq!(res, true);
1231        }
1232        {
1233            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1234            let res = match_domstring_ascii!( s,
1235                "abc/" => true,
1236                "bcd" => false,
1237                _ => false,
1238            );
1239            assert_eq!(res, true);
1240        }
1241        {
1242            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1243            let res = match_domstring_ascii!( s,
1244                "bcd" => false,
1245                "abc%$" => true,
1246                _ => false,
1247            );
1248            assert_eq!(res, true);
1249        }
1250
1251        {
1252            let s = DOMString::from("abcde");
1253            let res = match_domstring_ascii!( s,
1254                "abc" => false,
1255                "bcd" => false,
1256                _ => true,
1257            );
1258            assert_eq!(res, true);
1259        }
1260        {
1261            let s = DOMString::from("abc%$");
1262            let res = match_domstring_ascii!( s,
1263                "bcd" => false,
1264                "abc%$" => true,
1265                _ => false,
1266            );
1267            assert_eq!(res, true);
1268        }
1269        {
1270            let s = from_latin1(vec![b'a', b'b', b'c']);
1271            let res = match_domstring_ascii!( s,
1272                "abcdd" => false,
1273                "bcd" => false,
1274                _ => true,
1275            );
1276            assert_eq!(res, true);
1277        }
1278    }
1279
1280    #[test]
1281    #[cfg(debug_assertions)]
1282    #[should_panic]
1283    fn test_match_panic() {
1284        let s = DOMString::from("abcd");
1285        let _res = match_domstring_ascii!(s,
1286            "❤" => true,
1287            _ => false,);
1288    }
1289
1290    #[test]
1291    #[cfg(debug_assertions)]
1292    #[should_panic]
1293    fn test_match_panic2() {
1294        let s = DOMString::from("abcd");
1295        let _res = match_domstring_ascii!(s,
1296            "abc" => false,
1297            "❤" => true,
1298            _ => false,
1299        );
1300    }
1301
1302    #[test]
1303    fn test_strip_whitespace() {
1304        {
1305            let mut s = from_latin1(vec![
1306                b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1307            ]);
1308
1309            s.strip_leading_and_trailing_ascii_whitespace();
1310            s.ensure_rust_string();
1311            assert_eq!(&*s.str(), "abc%$²");
1312        }
1313        {
1314            let mut s = DOMString::from("   \n  abc%$ ");
1315
1316            s.strip_leading_and_trailing_ascii_whitespace();
1317            s.ensure_rust_string();
1318            assert_eq!(&*s.str(), "abc%$");
1319        }
1320    }
1321
1322    // https://infra.spec.whatwg.org/#ascii-whitespace
1323    #[test]
1324    fn contains_html_space_characters() {
1325        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); // TAB
1326        assert!(s.contains_html_space_characters());
1327        s.ensure_rust_string();
1328        assert!(s.contains_html_space_characters());
1329
1330        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); // NEWLINE
1331        assert!(s.contains_html_space_characters());
1332        s.ensure_rust_string();
1333        assert!(s.contains_html_space_characters());
1334
1335        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); // FF
1336        assert!(s.contains_html_space_characters());
1337        s.ensure_rust_string();
1338        assert!(s.contains_html_space_characters());
1339
1340        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); // Carriage Return
1341        assert!(s.contains_html_space_characters());
1342        s.ensure_rust_string();
1343        assert!(s.contains_html_space_characters());
1344
1345        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); // SPACE
1346        assert!(s.contains_html_space_characters());
1347        s.ensure_rust_string();
1348        assert!(s.contains_html_space_characters());
1349
1350        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1351        assert!(!s.contains_html_space_characters());
1352        s.ensure_rust_string();
1353        assert!(!s.contains_html_space_characters());
1354    }
1355
1356    #[test]
1357    fn atom() {
1358        let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1359        let atom1 = Atom::from(s);
1360        let s2 = DOMString::from("aaa aa");
1361        let atom2 = Atom::from(s2);
1362        assert_eq!(atom1, atom2);
1363        let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1364        let atom3 = Atom::from(s3);
1365        assert_ne!(atom1, atom3);
1366    }
1367
1368    #[test]
1369    fn namespace() {
1370        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1371        let atom1 = Namespace::from(s);
1372        let s2 = DOMString::from("aaa aa");
1373        let atom2 = Namespace::from(s2);
1374        assert_eq!(atom1, atom2);
1375        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1376        let atom3 = Namespace::from(s3);
1377        assert_ne!(atom1, atom3);
1378    }
1379
1380    #[test]
1381    fn localname() {
1382        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1383        let atom1 = LocalName::from(s);
1384        let s2 = DOMString::from("aaa aa");
1385        let atom2 = LocalName::from(s2);
1386        assert_eq!(atom1, atom2);
1387        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1388        let atom3 = LocalName::from(s3);
1389        assert_ne!(atom1, atom3);
1390    }
1391
1392    #[test]
1393    fn is_ascii_lowercase() {
1394        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1395        assert!(!s.is_ascii_lowercase());
1396        let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1397        assert!(!s.is_ascii_lowercase());
1398        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1399        assert!(s.is_ascii_lowercase());
1400        let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1401        assert!(!s.is_ascii_lowercase());
1402        let s = DOMString::from("`aaaz");
1403        assert!(!s.is_ascii_lowercase());
1404        let s = DOMString::from("aaaz");
1405        assert!(s.is_ascii_lowercase());
1406    }
1407
1408    #[test]
1409    fn test_as_bytes() {
1410        const ASCII_SMALL_A: u8 = b'a';
1411        const ASCII_SMALL_Z: u8 = b'z';
1412
1413        let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1414        let s = from_latin1(v1.clone());
1415        assert_eq!(
1416            *s.as_bytes(),
1417            [
1418                ASCII_SMALL_A,
1419                ASCII_SMALL_A,
1420                ASCII_SMALL_A,
1421                UTF8_PILLCROW[0],
1422                UTF8_PILLCROW[1],
1423                ASCII_SMALL_A,
1424                ASCII_SMALL_A
1425            ]
1426        );
1427
1428        let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1429        let s = from_latin1(v2.clone());
1430        assert_eq!(
1431            *s.as_bytes(),
1432            [
1433                ASCII_SMALL_A,
1434                ASCII_SMALL_A,
1435                ASCII_SMALL_A,
1436                ASCII_SMALL_A,
1437                ASCII_SMALL_Z
1438            ]
1439        );
1440
1441        let str = "abc%$²".to_owned();
1442        let s = DOMString::from(str.clone());
1443        assert_eq!(&*s.as_bytes(), str.as_bytes());
1444        let str = "AbBcC❤&%$#".to_owned();
1445        let s = DOMString::from(str.clone());
1446        assert_eq!(&*s.as_bytes(), str.as_bytes());
1447    }
1448}