1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::{Chars, FromStr};
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
16use html5ever::{LocalName, Namespace};
17use js::conversions::{ToJSValConvertible, jsstr_to_string};
18use js::gc::MutableHandleValue;
19use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
20use js::jsval::StringValue;
21use js::rust::{Runtime, Trace};
22use malloc_size_of::MallocSizeOfOps;
23use num_traits::{ToPrimitive, Zero};
24use regex::Regex;
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42unsafe fn get_latin1_string_bytes(
45 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47 debug_assert!(!rooted_traceable_box.get().is_null());
48 let mut length = 0;
49 unsafe {
50 let chars = JS_GetLatin1StringCharsAndLength(
51 Runtime::get().expect("JS runtime has shut down").as_ptr(),
52 ptr::null(),
53 rooted_traceable_box.get(),
54 &mut length,
55 );
56 assert!(!chars.is_null());
57 slice::from_raw_parts(chars, length)
58 }
59}
60
61#[derive(Debug, PartialEq, Eq)]
62pub enum EncodedBytes<'a> {
64 Latin1Bytes(&'a [u8]),
66 Utf8Bytes(&'a [u8]),
68}
69
70enum DOMStringType {
71 Rust(String),
73 JSString(RootedTraceableBox<Heap<*mut JSString>>),
75 #[cfg(test)]
76 Latin1Vec(Vec<u8>),
79}
80
81impl DOMStringType {
82 fn str(&self) -> &str {
84 match self {
85 DOMStringType::Rust(s) => s,
86 DOMStringType::JSString(_rooted_traceable_box) => {
87 panic!("Cannot do a string")
88 },
89 #[cfg(test)]
90 &DOMStringType::Latin1Vec(_) => panic!("Cannot do a string"),
91 }
92 }
93
94 fn as_raw_bytes(&self) -> &[u8] {
99 match self {
100 DOMStringType::Rust(s) => s.as_bytes(),
101 DOMStringType::JSString(rooted_traceable_box) => unsafe {
102 get_latin1_string_bytes(rooted_traceable_box)
103 },
104 #[cfg(test)]
105 DOMStringType::Latin1Vec(items) => items,
106 }
107 }
108}
109
110#[derive(Debug)]
111pub struct StringView<'a>(Ref<'a, DOMStringType>);
113
114impl<'a> StringView<'a> {
115 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
116 self.0
117 .str()
118 .split(HTML_SPACE_CHARACTERS)
119 .filter(|s| !s.is_empty())
120 }
121
122 pub fn strip_prefix(&self, needle: &str) -> Option<&str> {
123 self.0.str().strip_prefix(needle)
124 }
125
126 pub fn chars(&self) -> Chars<'_> {
127 self.0.str().chars()
128 }
129
130 pub fn as_bytes(&self) -> &[u8] {
131 self.0.str().as_bytes()
132 }
133}
134
135impl Deref for StringView<'_> {
136 type Target = str;
137 fn deref(&self) -> &str {
138 self.0.str()
139 }
140}
141
142impl AsRef<str> for StringView<'_> {
143 fn as_ref(&self) -> &str {
144 self.deref()
145 }
146}
147
148impl PartialEq for StringView<'_> {
149 fn eq(&self, other: &Self) -> bool {
150 self.0.str() == other.0.str()
151 }
152}
153
154impl PartialEq<&str> for StringView<'_> {
155 fn eq(&self, other: &&str) -> bool {
156 self.0.str() == *other
157 }
158}
159
160impl Eq for StringView<'_> {}
161
162impl PartialOrd for StringView<'_> {
163 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
164 self.0.str().partial_cmp(other.0.str())
165 }
166}
167
168impl Ord for StringView<'_> {
169 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
170 self.0.str().cmp(other.0.str())
171 }
172}
173
174impl From<StringView<'_>> for String {
175 fn from(value: StringView<'_>) -> Self {
176 String::from(value.0.str())
177 }
178}
179
180unsafe impl Trace for DOMStringType {
186 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
187 unsafe {
188 match self {
189 DOMStringType::Rust(_s) => {},
190 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
191 #[cfg(test)]
192 DOMStringType::Latin1Vec(_s) => {},
193 }
194 }
195 }
196}
197
198impl malloc_size_of::MallocSizeOf for DOMStringType {
199 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
200 match self {
201 DOMStringType::Rust(s) => s.size_of(ops),
202 DOMStringType::JSString(_rooted_traceable_box) => {
203 0
205 },
206 #[cfg(test)]
207 DOMStringType::Latin1Vec(s) => s.size_of(ops),
208 }
209 }
210}
211
212impl std::fmt::Debug for DOMStringType {
213 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
214 match self {
215 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
216 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
217 #[cfg(test)]
218 DOMStringType::Latin1Vec(s) => f
219 .debug_struct("DOMString")
220 .field("latin1_string", s)
221 .finish(),
222 }
223 }
224}
225
226#[derive(Debug)]
227pub struct EncodedBytesView<'a>(Ref<'a, DOMStringType>);
229
230impl EncodedBytesView<'_> {
231 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
233 match *self.0 {
234 DOMStringType::Rust(ref s) => EncodedBytes::Utf8Bytes(s.as_bytes()),
235 DOMStringType::JSString(ref rooted_traceable_box) => {
236 EncodedBytes::Latin1Bytes(unsafe { get_latin1_string_bytes(rooted_traceable_box) })
237 },
238 #[cfg(test)]
239 DOMStringType::Latin1Vec(ref s) => EncodedBytes::Latin1Bytes(s),
240 }
241 }
242
243 fn is_empty(&self) -> bool {
244 match self.encoded_bytes() {
245 EncodedBytes::Latin1Bytes(items) => items.is_empty(),
246 EncodedBytes::Utf8Bytes(s) => s.is_empty(),
247 }
248 }
249
250 fn len(&self) -> usize {
251 match self.encoded_bytes() {
252 EncodedBytes::Latin1Bytes(items) => items
253 .iter()
254 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
255 .sum(),
256 EncodedBytes::Utf8Bytes(s) => s.len(),
257 }
258 }
259}
260
261#[repr(transparent)]
297#[derive(Debug, MallocSizeOf, JSTraceable)]
298pub struct DOMString(RefCell<DOMStringType>);
299
300impl Clone for DOMString {
301 fn clone(&self) -> Self {
302 self.make_rust();
303 if let DOMStringType::Rust(ref s) = *self.0.borrow() {
304 DOMString::from_string(s.to_owned())
305 } else {
306 unreachable!()
307 }
308 }
309}
310
311pub enum DOMStringErrorType {
312 JSConversionError,
313}
314
315impl DOMString {
316 pub fn new() -> DOMString {
318 DOMString(RefCell::new(DOMStringType::Rust(String::new())))
319 }
320
321 pub fn from_js_string(
324 cx: SafeJSContext,
325 value: js::gc::HandleValue,
326 ) -> Result<DOMString, DOMStringErrorType> {
327 let string_ptr = unsafe { js::rust::ToString(*cx, value) };
328 if string_ptr.is_null() {
329 debug!("ToString failed");
330 Err(DOMStringErrorType::JSConversionError)
331 } else {
332 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
333 let inner = if latin1 {
334 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
335 DOMStringType::JSString(h)
336 } else {
337 DOMStringType::Rust(unsafe {
339 jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
340 })
341 };
342 Ok(DOMString(RefCell::new(inner)))
343 }
344 }
345
346 pub fn from_string(s: String) -> DOMString {
347 DOMString(RefCell::new(DOMStringType::Rust(s)))
348 }
349
350 fn make_rust(&self) {
352 let string = {
353 let inner = self.0.borrow();
354 match *inner {
355 DOMStringType::Rust(_) => return,
356 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
357 jsstr_to_string(
358 Runtime::get().expect("JS runtime has shut down").as_ptr(),
359 NonNull::new(rooted_traceable_box.get()).unwrap(),
360 )
361 },
362 #[cfg(test)]
363 DOMStringType::Latin1Vec(ref items) => {
364 let mut v = vec![0; items.len() * 2];
365 let real_size = encoding_rs::mem::convert_latin1_to_utf8(
366 items.as_slice(),
367 v.as_mut_slice(),
368 );
369 v.truncate(real_size);
370
371 unsafe { String::from_utf8_unchecked(v) }
374 },
375 }
376 };
377 *self.0.borrow_mut() = DOMStringType::Rust(string);
378 }
379
380 #[expect(unused)]
382 fn debug_js(&self) {
383 match *self.0.borrow() {
384 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
385 DOMStringType::JSString(ref rooted_traceable_box) => {
386 let s = unsafe {
387 jsstr_to_string(
388 Runtime::get().expect("JS runtime has shut down").as_ptr(),
389 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
390 )
391 };
392 info!("JSString ({})", s);
393 },
394 #[cfg(test)]
395 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
396 }
397 }
398
399 pub fn str(&self) -> StringView<'_> {
401 self.make_rust();
402 StringView(self.0.borrow())
403 }
404
405 pub fn view(&self) -> EncodedBytesView<'_> {
408 EncodedBytesView(self.0.borrow())
409 }
410
411 pub fn clear(&mut self) {
412 *self.0.borrow_mut() = DOMStringType::Rust(String::new())
413 }
414
415 pub fn is_empty(&self) -> bool {
416 self.view().is_empty()
417 }
418
419 pub fn len(&self) -> usize {
424 self.view().len()
425 }
426
427 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
434 Utf8CodeUnitLength(self.len())
435 }
436
437 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
442 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
443 }
444
445 pub fn make_ascii_lowercase(&mut self) {
446 self.make_rust();
447 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
448 s.make_ascii_lowercase();
449 }
450 }
451
452 pub fn push_str(&mut self, s: &str) {
453 self.make_rust();
454 if let DOMStringType::Rust(ref mut string) = *self.0.borrow_mut() {
455 string.push_str(s)
456 }
457 }
458
459 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
460 if self.is_empty() {
461 return;
462 }
463
464 self.make_rust();
465 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
466 let trailing_whitespace_len = s
467 .trim_end_matches(|ref c| char::is_ascii_whitespace(c))
468 .len();
469 s.truncate(trailing_whitespace_len);
470 if s.is_empty() {
471 return;
472 }
473
474 let first_non_whitespace = s.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
475 s.replace_range(0..first_non_whitespace, "");
476 }
477 }
478
479 pub fn is_valid_floating_point_number_string(&self) -> bool {
481 static RE: LazyLock<Regex> = LazyLock::new(|| {
482 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
483 });
484 self.make_rust();
485
486 if let DOMStringType::Rust(ref s) = *self.0.borrow() {
487 RE.is_match(s) && self.parse_floating_point_number().is_some()
488 } else {
489 unreachable!()
490 }
491 }
492
493 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
494 self.make_rust();
495 self.str().parse::<T>()
496 }
497
498 pub fn parse_floating_point_number(&self) -> Option<f64> {
500 self.make_rust();
501 parse_floating_point_number(&self.str())
502 }
503
504 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
506 if let Some(val) = self.parse_floating_point_number() {
507 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
509
510 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
511 }
512 }
513
514 pub fn to_lowercase(&self) -> String {
515 self.make_rust();
516 self.str().to_lowercase()
517 }
518
519 pub fn to_uppercase(&self) -> String {
520 self.make_rust();
521 self.str().to_uppercase()
522 }
523
524 pub fn strip_newlines(&mut self) {
525 self.make_rust();
528 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
529 s.retain(|c| c != '\r' && c != '\n');
530 }
531 }
532
533 pub fn normalize_newlines(&mut self) {
535 self.make_rust();
536 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
540 *s = s.replace("\r\n", "\n").replace("\r", "\n")
541 }
542 }
543
544 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
545 self.make_rust();
546 let new_string = self.str().to_owned();
547 DOMString(RefCell::new(DOMStringType::Rust(
548 new_string.replace(needle, replace_char),
549 )))
550 }
551
552 pub fn starts_with(&self, c: char) -> bool {
554 if !c.is_ascii() {
555 self.make_rust();
556 self.str().starts_with(c)
557 } else {
558 match self.view().encoded_bytes() {
559 EncodedBytes::Latin1Bytes(items) => items,
560 EncodedBytes::Utf8Bytes(s) => s,
561 }
562 .starts_with(&[c as u8])
564 }
565 }
566
567 pub fn starts_with_str(&self, needle: &str) -> bool {
568 self.make_rust();
569 self.str().starts_with(needle)
570 }
571
572 pub fn contains(&self, needle: &str) -> bool {
573 self.make_rust();
574 self.str().contains(needle)
575 }
576
577 pub fn to_ascii_lowercase(&self) -> String {
578 let conversion = match self.view().encoded_bytes() {
579 EncodedBytes::Latin1Bytes(items) => {
580 if items.iter().all(|c| *c <= ASCII_END) {
581 Some(unsafe {
583 String::from_utf8_unchecked(
584 items
585 .iter()
586 .map(|c| {
587 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
588 c + 32
589 } else {
590 *c
591 }
592 })
593 .collect(),
594 )
595 })
596 } else {
597 None
598 }
599 },
600 EncodedBytes::Utf8Bytes(s) => unsafe {
601 Some(str::from_utf8_unchecked(s).to_ascii_lowercase())
603 },
604 };
605 if let Some(conversion) = conversion {
607 conversion
608 } else {
609 self.make_rust();
610 self.str().to_ascii_lowercase()
611 }
612 }
613
614 fn contains_space_characters(
615 &self,
616 latin1_characters: &'static [u8],
617 utf8_characters: &'static [char],
618 ) -> bool {
619 match self.view().encoded_bytes() {
620 EncodedBytes::Latin1Bytes(items) => {
621 latin1_characters.iter().any(|byte| items.contains(byte))
622 },
623 EncodedBytes::Utf8Bytes(s) => {
624 let s = unsafe { str::from_utf8_unchecked(s) };
626 s.contains(utf8_characters)
627 },
628 }
629 }
630
631 pub fn contains_tab_or_newline(&self) -> bool {
633 const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
634 const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
635
636 self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
637 }
638
639 pub fn contains_html_space_characters(&self) -> bool {
641 const SPACE_BYTES: [u8; 5] = [
642 ASCII_TAB,
643 ASCII_NEWLINE,
644 ASCII_FORMFEED,
645 ASCII_CR,
646 ASCII_SPACE,
647 ];
648 self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
649 }
650
651 pub fn as_bytes(&self) -> BytesView<'_> {
653 if self.is_ascii() {
657 BytesView(self.0.borrow())
658 } else {
659 self.make_rust();
660 BytesView(self.0.borrow())
661 }
662 }
663
664 pub fn is_ascii_lowercase(&self) -> bool {
666 match self.view().encoded_bytes() {
667 EncodedBytes::Latin1Bytes(items) => items
668 .iter()
669 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
670 EncodedBytes::Utf8Bytes(s) => s
671 .iter()
672 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
673 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
674 }
675 }
676
677 pub fn is_ascii(&self) -> bool {
679 match self.view().encoded_bytes() {
680 EncodedBytes::Latin1Bytes(items) => items,
681 EncodedBytes::Utf8Bytes(items) => items,
682 }
683 .is_ascii()
684 }
685
686 pub fn is_valid_for_cookie(&self) -> bool {
690 match self.view().encoded_bytes() {
691 EncodedBytes::Latin1Bytes(items) | EncodedBytes::Utf8Bytes(items) => !items
692 .iter()
693 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
694 }
695 }
696}
697
698pub fn parse_floating_point_number(input: &str) -> Option<f64> {
700 input.trim().parse::<f64>().ok().filter(|value| {
706 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
709 })
710}
711
712pub struct BytesView<'a>(Ref<'a, DOMStringType>);
713
714impl Deref for BytesView<'_> {
715 type Target = [u8];
716
717 fn deref(&self) -> &Self::Target {
718 self.0.as_raw_bytes()
720 }
721}
722
723impl Ord for DOMString {
724 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
725 self.make_rust();
726 other.make_rust();
727 self.str().cmp(&other.str())
728 }
729}
730
731impl PartialOrd for DOMString {
732 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
733 self.make_rust();
734 other.make_rust();
735 self.str().partial_cmp(&other.str())
736 }
737}
738
739impl Extend<char> for DOMString {
740 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
741 self.make_rust();
742 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
743 s.extend(iter)
744 }
745 }
746}
747
748impl ToJSValConvertible for DOMString {
749 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
750 let val = self.0.borrow();
751 match *val {
752 DOMStringType::Rust(ref s) => unsafe {
753 s.to_jsval(cx, rval);
754 },
755 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
756 rval.set(StringValue(&*rooted_traceable_box.get()));
757 },
758 #[cfg(test)]
759 DOMStringType::Latin1Vec(ref items) => {
760 let mut v = vec![0; items.len() * 2];
761 let real_size =
762 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
763 v.truncate(real_size);
764
765 String::from_utf8(v)
766 .expect("Error in constructin test string")
767 .to_jsval(cx, rval);
768 },
769 };
770 }
771}
772
773impl std::hash::Hash for DOMString {
774 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
775 self.make_rust();
776 self.str().hash(state);
777 }
778}
779
780impl std::fmt::Display for DOMString {
781 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
782 self.make_rust();
783 fmt::Display::fmt(self.str().deref(), f)
784 }
785}
786
787impl Default for DOMString {
788 fn default() -> Self {
789 DOMString::new()
790 }
791}
792
793impl std::cmp::PartialEq<str> for DOMString {
794 fn eq(&self, other: &str) -> bool {
795 if other.is_ascii() {
796 other.as_bytes() ==
797 match self.view().encoded_bytes() {
798 EncodedBytes::Latin1Bytes(items) => items,
799 EncodedBytes::Utf8Bytes(s) => s,
800 }
801 } else {
802 self.make_rust();
803 self.str().deref() == other
804 }
805 }
806}
807
808impl std::cmp::PartialEq<&str> for DOMString {
809 fn eq(&self, other: &&str) -> bool {
810 if other.is_ascii() {
811 other.as_bytes() ==
812 match self.view().encoded_bytes() {
813 EncodedBytes::Latin1Bytes(items) => items,
814 EncodedBytes::Utf8Bytes(s) => s,
815 }
816 } else {
817 self.make_rust();
818 self.str().deref() == *other
819 }
820 }
821}
822
823impl std::cmp::PartialEq<String> for DOMString {
824 fn eq(&self, other: &String) -> bool {
825 if other.is_ascii() {
826 other.as_bytes() ==
827 match self.view().encoded_bytes() {
828 EncodedBytes::Latin1Bytes(items) => items,
829 EncodedBytes::Utf8Bytes(s) => s,
830 }
831 } else {
832 self.make_rust();
833 self.str().deref() == other
834 }
835 }
836}
837
838impl std::cmp::PartialEq<DOMString> for String {
839 fn eq(&self, other: &DOMString) -> bool {
840 other.eq(self)
841 }
842}
843
844impl std::cmp::PartialEq<DOMString> for str {
845 fn eq(&self, other: &DOMString) -> bool {
846 other.eq(self)
847 }
848}
849
850impl std::cmp::PartialEq for DOMString {
851 fn eq(&self, other: &DOMString) -> bool {
852 let result = match (self.view().encoded_bytes(), other.view().encoded_bytes()) {
853 (EncodedBytes::Latin1Bytes(items), EncodedBytes::Latin1Bytes(other_items)) => {
854 Some(items == other_items)
855 },
856 (EncodedBytes::Latin1Bytes(items), EncodedBytes::Utf8Bytes(other_s))
857 if other_s.is_ascii() =>
858 {
859 Some(items == other_s)
860 },
861 (EncodedBytes::Utf8Bytes(s), EncodedBytes::Latin1Bytes(other_items))
862 if s.is_ascii() =>
863 {
864 Some(s == other_items)
865 },
866 (EncodedBytes::Utf8Bytes(s), EncodedBytes::Utf8Bytes(other_s)) => Some(s == other_s),
867 _ => None,
868 };
869
870 if let Some(eq_result) = result {
871 eq_result
872 } else {
873 self.make_rust();
874 other.make_rust();
875 self.str() == other.str()
876 }
877 }
878}
879
880impl std::cmp::Eq for DOMString {}
881
882impl From<std::string::String> for DOMString {
883 fn from(value: String) -> Self {
884 DOMString::from_string(value)
885 }
886}
887
888impl From<DOMString> for LocalName {
889 fn from(contents: DOMString) -> LocalName {
890 {
891 let view = contents.view();
892 let bytes = view.encoded_bytes();
893 let str = match bytes {
894 EncodedBytes::Latin1Bytes(items) => {
895 if items.iter().all(|c| c.is_ascii()) {
896 unsafe { Some(str::from_utf8_unchecked(items)) }
897 } else {
898 None
899 }
900 },
901 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
902 };
903 if let Some(s) = str {
904 return LocalName::from(s);
905 }
906 }
907 contents.make_rust();
908 LocalName::from(contents.str().deref())
909 }
910}
911
912impl From<&DOMString> for LocalName {
913 fn from(contents: &DOMString) -> LocalName {
914 {
915 let view = contents.view();
916 let bytes = view.encoded_bytes();
917 let str = match bytes {
918 EncodedBytes::Latin1Bytes(items) => {
919 if items.iter().all(|c| c.is_ascii()) {
920 unsafe { Some(str::from_utf8_unchecked(items)) }
922 } else {
923 None
924 }
925 },
926 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
927 };
928 if let Some(s) = str {
929 return LocalName::from(s);
930 }
931 }
932 contents.make_rust();
933 LocalName::from(contents.str().deref())
934 }
935}
936
937impl From<DOMString> for Namespace {
938 fn from(contents: DOMString) -> Namespace {
939 {
940 let view = contents.view();
941 let bytes = view.encoded_bytes();
942 let str = match bytes {
943 EncodedBytes::Latin1Bytes(items) => {
944 if items.iter().all(|c| c.is_ascii()) {
945 unsafe { Some(str::from_utf8_unchecked(items)) }
947 } else {
948 None
949 }
950 },
951 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
952 };
953 if let Some(s) = str {
954 return Namespace::from(s);
955 }
956 }
957 contents.make_rust();
958 Namespace::from(contents.str().deref())
959 }
960}
961
962impl From<DOMString> for Atom {
963 fn from(contents: DOMString) -> Atom {
964 {
965 let view = contents.view();
966 let bytes = view.encoded_bytes();
967 let str = match bytes {
968 EncodedBytes::Latin1Bytes(items) => {
969 if items.iter().all(|c| c.is_ascii()) {
970 unsafe { Some(str::from_utf8_unchecked(items)) }
972 } else {
973 None
974 }
975 },
976 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
977 };
978 if let Some(s) = str {
979 return Atom::from(s);
980 }
981 }
982 contents.make_rust();
983 Atom::from(contents.str().deref())
984 }
985}
986
987impl From<&str> for DOMString {
988 fn from(contents: &str) -> DOMString {
989 DOMString(RefCell::new(DOMStringType::Rust(String::from(contents))))
990 }
991}
992
993impl From<DOMString> for String {
994 fn from(val: DOMString) -> Self {
995 val.make_rust();
996 val.str().to_owned()
997 }
998}
999
1000impl From<DOMString> for Vec<u8> {
1001 fn from(value: DOMString) -> Self {
1002 value.make_rust();
1003 value.str().as_bytes().to_vec()
1004 }
1005}
1006
1007impl From<Cow<'_, str>> for DOMString {
1008 fn from(value: Cow<'_, str>) -> Self {
1009 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
1010 }
1011}
1012
1013#[macro_export]
1014macro_rules! match_domstring_ascii_inner {
1015 ($variant: expr, $input: expr, $p: literal => $then: expr, $($rest:tt)*) => {
1016 if {
1017 debug_assert!(($p).is_ascii());
1018 $variant($p.as_bytes())
1019 } == $input {
1020 $then
1021 } else {
1022 match_domstring_ascii_inner!($variant, $input, $($rest)*)
1023 }
1024
1025 };
1026 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
1027 match $input {
1028 $p => $then
1029 }
1030 }
1031}
1032
1033#[macro_export]
1047macro_rules! match_domstring_ascii {
1048 ($input:expr, $($tail:tt)*) => {
1049 {
1050 use $crate::match_domstring_ascii_inner;
1051 use $crate::domstring::EncodedBytes;
1052
1053 let view = $input.view();
1054 let s = view.encoded_bytes();
1055 if matches!(s, EncodedBytes::Latin1Bytes(_)) {
1056 match_domstring_ascii_inner!(EncodedBytes::Latin1Bytes, s, $($tail)*)
1057 } else {
1058 match_domstring_ascii_inner!(EncodedBytes::Utf8Bytes, s, $($tail)*)
1059 }
1060 }
1061 };
1062}
1063
1064#[cfg(test)]
1065mod tests {
1066 use super::*;
1067
1068 const LATIN1_PILLCROW: u8 = 0xB6;
1069 const UTF8_PILLCROW: [u8; 2] = [194, 182];
1070 const LATIN1_POWER2: u8 = 0xB2;
1071
1072 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1073 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1074 }
1075
1076 #[test]
1077 fn string_functions() {
1078 let s = DOMString::from("AbBcC❤&%$#");
1079 let s_copy = s.clone();
1080 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1081 assert_eq!(s, s_copy);
1082 assert_eq!(s.len(), 12);
1083 assert_eq!(s_copy.len(), 12);
1084 assert!(s.starts_with('A'));
1085 let s2 = DOMString::from("");
1086 assert!(s2.is_empty());
1087 }
1088
1089 #[test]
1090 fn string_functions_latin1() {
1091 {
1092 let s = from_latin1(vec![
1093 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1094 ]);
1095 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1096 }
1097 {
1098 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1099 assert_eq!(s.to_ascii_lowercase(), "abbcc");
1100 }
1101 {
1102 let s = from_latin1(vec![
1103 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1104 ]);
1105 assert_eq!(s.len(), 11);
1106 assert!(s.starts_with('A'));
1107 }
1108 {
1109 let s = from_latin1(vec![]);
1110 assert!(s.is_empty());
1111 }
1112 }
1113
1114 #[test]
1115 fn test_length() {
1116 let s1 = from_latin1(vec![
1117 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1118 0xAE, 0xAF,
1119 ]);
1120 let s2 = from_latin1(vec![
1121 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1122 0xBE, 0xBF,
1123 ]);
1124 let s3 = from_latin1(vec![
1125 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1126 0xCE, 0xCF,
1127 ]);
1128 let s4 = from_latin1(vec![
1129 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1130 0xDE, 0xDF,
1131 ]);
1132 let s5 = from_latin1(vec![
1133 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1134 0xEE, 0xEF,
1135 ]);
1136 let s6 = from_latin1(vec![
1137 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1138 0xFE, 0xFF,
1139 ]);
1140
1141 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1142 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1143 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1144 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1145 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1146 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1147
1148 assert_eq!(s1.len(), s1_utf8.len());
1149 assert_eq!(s2.len(), s2_utf8.len());
1150 assert_eq!(s3.len(), s3_utf8.len());
1151 assert_eq!(s4.len(), s4_utf8.len());
1152 assert_eq!(s5.len(), s5_utf8.len());
1153 assert_eq!(s6.len(), s6_utf8.len());
1154
1155 s1.make_rust();
1156 s2.make_rust();
1157 s3.make_rust();
1158 s4.make_rust();
1159 s5.make_rust();
1160 s6.make_rust();
1161 assert_eq!(s1.len(), s1_utf8.len());
1162 assert_eq!(s2.len(), s2_utf8.len());
1163 assert_eq!(s3.len(), s3_utf8.len());
1164 assert_eq!(s4.len(), s4_utf8.len());
1165 assert_eq!(s5.len(), s5_utf8.len());
1166 assert_eq!(s6.len(), s6_utf8.len());
1167 }
1168
1169 #[test]
1170 fn test_convert() {
1171 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1172 s.make_rust();
1173 assert_eq!(&*s.str(), "abc%$");
1174 }
1175
1176 #[test]
1177 fn partial_eq() {
1178 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1179 let string = String::from("abc%$");
1180 let s2 = DOMString::from_string(string.clone());
1181 assert_eq!(s, s2);
1182 assert_eq!(s, string);
1183 }
1184
1185 #[test]
1186 fn encoded_bytes() {
1187 let bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1188 let s = from_latin1(bytes.clone());
1189 if let EncodedBytes::Latin1Bytes(s) = s.view().encoded_bytes() {
1190 assert_eq!(s, bytes)
1191 }
1192 }
1193
1194 #[test]
1195 fn testing_stringview() {
1196 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1197
1198 assert_eq!(
1199 s.str().chars().collect::<Vec<char>>(),
1200 vec!['a', 'b', 'c', '%', '$', '²']
1201 );
1202 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1203 }
1204
1205 #[test]
1210 fn test_hash() {
1211 use std::hash::{DefaultHasher, Hash, Hasher};
1212 fn hash_value(d: &DOMString) -> u64 {
1213 let mut hasher = DefaultHasher::new();
1214 d.hash(&mut hasher);
1215 hasher.finish()
1216 }
1217
1218 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1219 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1220 s_converted.make_rust();
1221 let s2 = DOMString::from_string(String::from("abc%$²"));
1222
1223 let hash_s = hash_value(&s);
1224 let hash_s_converted = hash_value(&s_converted);
1225 let hash_s2 = hash_value(&s2);
1226
1227 assert_eq!(hash_s, hash_s2);
1228 assert_eq!(hash_s, hash_s_converted);
1229 }
1230
1231 #[test]
1233 fn test_match_executing() {
1234 {
1236 let s = from_latin1(vec![b'a', b'b', b'c']);
1237 match_domstring_ascii!( s,
1238 "abc" => assert!(true),
1239 "bcd" => assert!(false),
1240 _ => (),
1241 );
1242 }
1243
1244 {
1245 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1246 match_domstring_ascii!( s,
1247 "abc/" => assert!(true),
1248 "bcd" => assert!(false),
1249 _ => (),
1250 );
1251 }
1252
1253 {
1254 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1255 match_domstring_ascii!( s,
1256 "bcd" => assert!(false),
1257 "abc%$" => assert!(true),
1258 _ => (),
1259 );
1260 }
1261
1262 {
1263 let s = DOMString::from_string(String::from("abcde"));
1264 match_domstring_ascii!( s,
1265 "abc" => assert!(false),
1266 "bcd" => assert!(false),
1267 _ => assert!(true),
1268 );
1269 }
1270 {
1271 let s = DOMString::from_string(String::from("abc%$"));
1272 match_domstring_ascii!( s,
1273 "bcd" => assert!(false),
1274 "abc%$" => assert!(true),
1275 _ => (),
1276 );
1277 }
1278 {
1279 let s = from_latin1(vec![b'a', b'b', b'c']);
1280 match_domstring_ascii!( s,
1281 "abcdd" => assert!(false),
1282 "bcd" => assert!(false),
1283 _ => (),
1284 );
1285 }
1286 }
1287
1288 #[test]
1290 fn test_match_returning_result() {
1291 {
1292 let s = from_latin1(vec![b'a', b'b', b'c']);
1293 let res = match_domstring_ascii!( s,
1294 "abc" => true,
1295 "bcd" => false,
1296 _ => false,
1297 );
1298 assert_eq!(res, true);
1299 }
1300 {
1301 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1302 let res = match_domstring_ascii!( s,
1303 "abc/" => true,
1304 "bcd" => false,
1305 _ => false,
1306 );
1307 assert_eq!(res, true);
1308 }
1309 {
1310 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1311 let res = match_domstring_ascii!( s,
1312 "bcd" => false,
1313 "abc%$" => true,
1314 _ => false,
1315 );
1316 assert_eq!(res, true);
1317 }
1318
1319 {
1320 let s = DOMString::from_string(String::from("abcde"));
1321 let res = match_domstring_ascii!( s,
1322 "abc" => false,
1323 "bcd" => false,
1324 _ => true,
1325 );
1326 assert_eq!(res, true);
1327 }
1328 {
1329 let s = DOMString::from_string(String::from("abc%$"));
1330 let res = match_domstring_ascii!( s,
1331 "bcd" => false,
1332 "abc%$" => true,
1333 _ => false,
1334 );
1335 assert_eq!(res, true);
1336 }
1337 {
1338 let s = from_latin1(vec![b'a', b'b', b'c']);
1339 let res = match_domstring_ascii!( s,
1340 "abcdd" => false,
1341 "bcd" => false,
1342 _ => true,
1343 );
1344 assert_eq!(res, true);
1345 }
1346 }
1347
1348 #[test]
1349 #[should_panic]
1350 fn test_match_panic() {
1351 let s = DOMString::from_string(String::from("abcd"));
1352 let _res = match_domstring_ascii!(s,
1353 "❤" => true,
1354 _ => false,);
1355 }
1356
1357 #[test]
1358 #[should_panic]
1359 fn test_match_panic2() {
1360 let s = DOMString::from_string(String::from("abcd"));
1361 let _res = match_domstring_ascii!(s,
1362 "abc" => false,
1363 "❤" => true,
1364 _ => false,
1365 );
1366 }
1367
1368 #[test]
1369 fn test_strip_whitespace() {
1370 {
1371 let mut s = from_latin1(vec![
1372 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1373 ]);
1374
1375 s.strip_leading_and_trailing_ascii_whitespace();
1376 s.make_rust();
1377 assert_eq!(&*s.str(), "abc%$²");
1378 }
1379 {
1380 let mut s = DOMString::from_string(String::from(" \n abc%$ "));
1381
1382 s.strip_leading_and_trailing_ascii_whitespace();
1383 s.make_rust();
1384 assert_eq!(&*s.str(), "abc%$");
1385 }
1386 }
1387
1388 #[test]
1390 fn contains_html_space_characters() {
1391 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1393 s.make_rust();
1394 assert!(s.contains_html_space_characters());
1395
1396 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1398 s.make_rust();
1399 assert!(s.contains_html_space_characters());
1400
1401 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1403 s.make_rust();
1404 assert!(s.contains_html_space_characters());
1405
1406 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1408 s.make_rust();
1409 assert!(s.contains_html_space_characters());
1410
1411 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1413 s.make_rust();
1414 assert!(s.contains_html_space_characters());
1415
1416 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1417 assert!(!s.contains_html_space_characters());
1418 s.make_rust();
1419 assert!(!s.contains_html_space_characters());
1420 }
1421
1422 #[test]
1423 fn atom() {
1424 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1425 let atom1 = Atom::from(s);
1426 let s2 = DOMString::from_string(String::from("aaa aa"));
1427 let atom2 = Atom::from(s2);
1428 assert_eq!(atom1, atom2);
1429 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1430 let atom3 = Atom::from(s3);
1431 assert_ne!(atom1, atom3);
1432 }
1433
1434 #[test]
1435 fn namespace() {
1436 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1437 let atom1 = Namespace::from(s);
1438 let s2 = DOMString::from_string(String::from("aaa aa"));
1439 let atom2 = Namespace::from(s2);
1440 assert_eq!(atom1, atom2);
1441 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1442 let atom3 = Namespace::from(s3);
1443 assert_ne!(atom1, atom3);
1444 }
1445
1446 #[test]
1447 fn localname() {
1448 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1449 let atom1 = LocalName::from(s);
1450 let s2 = DOMString::from_string(String::from("aaa aa"));
1451 let atom2 = LocalName::from(s2);
1452 assert_eq!(atom1, atom2);
1453 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1454 let atom3 = LocalName::from(s3);
1455 assert_ne!(atom1, atom3);
1456 }
1457
1458 #[test]
1459 fn is_ascii_lowercase() {
1460 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1461 assert!(!s.is_ascii_lowercase());
1462 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1463 assert!(!s.is_ascii_lowercase());
1464 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1465 assert!(s.is_ascii_lowercase());
1466 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1467 assert!(!s.is_ascii_lowercase());
1468 let s = DOMString::from_string(String::from("`aaaz"));
1469 assert!(!s.is_ascii_lowercase());
1470 let s = DOMString::from_string(String::from("aaaz"));
1471 assert!(s.is_ascii_lowercase());
1472 }
1473
1474 #[test]
1475 fn test_as_bytes() {
1476 const ASCII_SMALL_A: u8 = b'a';
1477 const ASCII_SMALL_Z: u8 = b'z';
1478
1479 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1480 let s = from_latin1(v1.clone());
1481 assert_eq!(
1482 *s.as_bytes(),
1483 [
1484 ASCII_SMALL_A,
1485 ASCII_SMALL_A,
1486 ASCII_SMALL_A,
1487 UTF8_PILLCROW[0],
1488 UTF8_PILLCROW[1],
1489 ASCII_SMALL_A,
1490 ASCII_SMALL_A
1491 ]
1492 );
1493
1494 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1495 let s = from_latin1(v2.clone());
1496 assert_eq!(
1497 *s.as_bytes(),
1498 [
1499 ASCII_SMALL_A,
1500 ASCII_SMALL_A,
1501 ASCII_SMALL_A,
1502 ASCII_SMALL_A,
1503 ASCII_SMALL_Z
1504 ]
1505 );
1506
1507 let str = "abc%$²".to_owned();
1508 let s = DOMString::from(str.clone());
1509 assert_eq!(&*s.as_bytes(), str.as_bytes());
1510 let str = "AbBcC❤&%$#".to_owned();
1511 let s = DOMString::from(str.clone());
1512 assert_eq!(&*s.as_bytes(), str.as_bytes());
1513 }
1514}