1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::{Chars, FromStr};
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
16use html5ever::{LocalName, Namespace};
17use js::conversions::{ToJSValConvertible, jsstr_to_string};
18use js::gc::MutableHandleValue;
19use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
20use js::jsval::StringValue;
21use js::rust::{Runtime, Trace};
22use malloc_size_of::MallocSizeOfOps;
23use num_traits::{ToPrimitive, Zero};
24use regex::Regex;
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42unsafe fn get_latin1_string_bytes(
45 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47 debug_assert!(!rooted_traceable_box.get().is_null());
48 let mut length = 0;
49 unsafe {
50 let chars = JS_GetLatin1StringCharsAndLength(
51 Runtime::get().expect("JS runtime has shut down").as_ptr(),
52 ptr::null(),
53 rooted_traceable_box.get(),
54 &mut length,
55 );
56 assert!(!chars.is_null());
57 slice::from_raw_parts(chars, length)
58 }
59}
60
61#[derive(Debug, PartialEq, Eq)]
62pub enum EncodedBytes<'a> {
64 Latin1Bytes(&'a [u8]),
66 Utf8Bytes(&'a [u8]),
68}
69
70enum DOMStringType {
71 Rust(String),
73 JSString(RootedTraceableBox<Heap<*mut JSString>>),
75 #[cfg(test)]
76 Latin1Vec(Vec<u8>),
79}
80
81impl DOMStringType {
82 fn str(&self) -> &str {
84 match self {
85 DOMStringType::Rust(s) => s,
86 DOMStringType::JSString(_rooted_traceable_box) => {
87 panic!("Cannot do a string")
88 },
89 #[cfg(test)]
90 &DOMStringType::Latin1Vec(_) => panic!("Cannot do a string"),
91 }
92 }
93
94 fn as_raw_bytes(&self) -> &[u8] {
99 match self {
100 DOMStringType::Rust(s) => s.as_bytes(),
101 DOMStringType::JSString(rooted_traceable_box) => unsafe {
102 get_latin1_string_bytes(rooted_traceable_box)
103 },
104 #[cfg(test)]
105 DOMStringType::Latin1Vec(items) => items,
106 }
107 }
108}
109
110#[derive(Debug)]
111pub struct StringView<'a>(Ref<'a, DOMStringType>);
113
114impl<'a> StringView<'a> {
115 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
116 self.0
117 .str()
118 .split(HTML_SPACE_CHARACTERS)
119 .filter(|s| !s.is_empty())
120 }
121
122 pub fn strip_prefix(&self, needle: &str) -> Option<&str> {
123 self.0.str().strip_prefix(needle)
124 }
125
126 pub fn chars(&self) -> Chars<'_> {
127 self.0.str().chars()
128 }
129
130 pub fn as_bytes(&self) -> &[u8] {
131 self.0.str().as_bytes()
132 }
133}
134
135impl Deref for StringView<'_> {
136 type Target = str;
137 fn deref(&self) -> &str {
138 self.0.str()
139 }
140}
141
142impl AsRef<str> for StringView<'_> {
143 fn as_ref(&self) -> &str {
144 self.deref()
145 }
146}
147
148impl PartialEq for StringView<'_> {
149 fn eq(&self, other: &Self) -> bool {
150 self.0.str() == other.0.str()
151 }
152}
153
154impl PartialEq<&str> for StringView<'_> {
155 fn eq(&self, other: &&str) -> bool {
156 self.0.str() == *other
157 }
158}
159
160impl Eq for StringView<'_> {}
161
162impl PartialOrd for StringView<'_> {
163 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
164 self.0.str().partial_cmp(other.0.str())
165 }
166}
167
168impl Ord for StringView<'_> {
169 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
170 self.0.str().cmp(other.0.str())
171 }
172}
173
174impl From<StringView<'_>> for String {
175 fn from(value: StringView<'_>) -> Self {
176 String::from(value.0.str())
177 }
178}
179
180unsafe impl Trace for DOMStringType {
186 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
187 unsafe {
188 match self {
189 DOMStringType::Rust(_s) => {},
190 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
191 #[cfg(test)]
192 DOMStringType::Latin1Vec(_s) => {},
193 }
194 }
195 }
196}
197
198impl malloc_size_of::MallocSizeOf for DOMStringType {
199 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
200 match self {
201 DOMStringType::Rust(s) => s.size_of(ops),
202 DOMStringType::JSString(_rooted_traceable_box) => {
203 0
205 },
206 #[cfg(test)]
207 DOMStringType::Latin1Vec(s) => s.size_of(ops),
208 }
209 }
210}
211
212impl std::fmt::Debug for DOMStringType {
213 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
214 match self {
215 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
216 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
217 #[cfg(test)]
218 DOMStringType::Latin1Vec(s) => f
219 .debug_struct("DOMString")
220 .field("latin1_string", s)
221 .finish(),
222 }
223 }
224}
225
226#[derive(Debug)]
227pub struct EncodedBytesView<'a>(Ref<'a, DOMStringType>);
229
230impl EncodedBytesView<'_> {
231 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
233 match *self.0 {
234 DOMStringType::Rust(ref s) => EncodedBytes::Utf8Bytes(s.as_bytes()),
235 DOMStringType::JSString(ref rooted_traceable_box) => {
236 EncodedBytes::Latin1Bytes(unsafe { get_latin1_string_bytes(rooted_traceable_box) })
237 },
238 #[cfg(test)]
239 DOMStringType::Latin1Vec(ref s) => EncodedBytes::Latin1Bytes(s),
240 }
241 }
242
243 fn is_empty(&self) -> bool {
244 match self.encoded_bytes() {
245 EncodedBytes::Latin1Bytes(items) => items.is_empty(),
246 EncodedBytes::Utf8Bytes(s) => s.is_empty(),
247 }
248 }
249
250 fn len(&self) -> usize {
251 match self.encoded_bytes() {
252 EncodedBytes::Latin1Bytes(items) => items
253 .iter()
254 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
255 .sum(),
256 EncodedBytes::Utf8Bytes(s) => s.len(),
257 }
258 }
259}
260
261#[repr(transparent)]
297#[derive(Debug, MallocSizeOf, JSTraceable)]
298pub struct DOMString(RefCell<DOMStringType>);
299
300impl Clone for DOMString {
301 fn clone(&self) -> Self {
302 self.make_rust();
303 if let DOMStringType::Rust(ref s) = *self.0.borrow() {
304 DOMString::from_string(s.to_owned())
305 } else {
306 unreachable!()
307 }
308 }
309}
310
311pub enum DOMStringErrorType {
312 JSConversionError,
313}
314
315impl DOMString {
316 pub fn new() -> DOMString {
318 DOMString(RefCell::new(DOMStringType::Rust(String::new())))
319 }
320
321 pub fn from_js_string(
324 cx: SafeJSContext,
325 value: js::gc::HandleValue,
326 ) -> Result<DOMString, DOMStringErrorType> {
327 let string_ptr = unsafe { js::rust::ToString(*cx, value) };
328 if string_ptr.is_null() {
329 debug!("ToString failed");
330 Err(DOMStringErrorType::JSConversionError)
331 } else {
332 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
333 let inner = if latin1 {
334 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
335 DOMStringType::JSString(h)
336 } else {
337 DOMStringType::Rust(unsafe {
339 jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
340 })
341 };
342 Ok(DOMString(RefCell::new(inner)))
343 }
344 }
345
346 pub fn from_string(s: String) -> DOMString {
347 DOMString(RefCell::new(DOMStringType::Rust(s)))
348 }
349
350 fn make_rust(&self) {
352 let string = {
353 let inner = self.0.borrow();
354 match *inner {
355 DOMStringType::Rust(_) => return,
356 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
357 jsstr_to_string(
358 Runtime::get().expect("JS runtime has shut down").as_ptr(),
359 NonNull::new(rooted_traceable_box.get()).unwrap(),
360 )
361 },
362 #[cfg(test)]
363 DOMStringType::Latin1Vec(ref items) => {
364 let mut v = vec![0; items.len() * 2];
365 let real_size = encoding_rs::mem::convert_latin1_to_utf8(
366 items.as_slice(),
367 v.as_mut_slice(),
368 );
369 v.truncate(real_size);
370
371 unsafe { String::from_utf8_unchecked(v) }
374 },
375 }
376 };
377 *self.0.borrow_mut() = DOMStringType::Rust(string);
378 }
379
380 #[expect(unused)]
382 fn debug_js(&self) {
383 match *self.0.borrow() {
384 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
385 DOMStringType::JSString(ref rooted_traceable_box) => {
386 let s = unsafe {
387 jsstr_to_string(
388 Runtime::get().expect("JS runtime has shut down").as_ptr(),
389 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
390 )
391 };
392 info!("JSString ({})", s);
393 },
394 #[cfg(test)]
395 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
396 }
397 }
398
399 pub fn str(&self) -> StringView<'_> {
401 self.make_rust();
402 StringView(self.0.borrow())
403 }
404
405 pub fn view(&self) -> EncodedBytesView<'_> {
408 EncodedBytesView(self.0.borrow())
409 }
410
411 pub fn clear(&mut self) {
412 *self.0.borrow_mut() = DOMStringType::Rust(String::new())
413 }
414
415 pub fn is_empty(&self) -> bool {
416 self.view().is_empty()
417 }
418
419 pub fn len(&self) -> usize {
424 self.view().len()
425 }
426
427 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
434 Utf8CodeUnitLength(self.len())
435 }
436
437 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
442 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
443 }
444
445 pub fn make_ascii_lowercase(&mut self) {
446 self.make_rust();
447 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
448 s.make_ascii_lowercase();
449 }
450 }
451
452 pub fn push_str(&mut self, s: &str) {
453 self.make_rust();
454 if let DOMStringType::Rust(ref mut string) = *self.0.borrow_mut() {
455 string.push_str(s)
456 }
457 }
458
459 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
460 if self.is_empty() {
461 return;
462 }
463
464 self.make_rust();
465 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
466 let trailing_whitespace_len = s
467 .trim_end_matches(|ref c| char::is_ascii_whitespace(c))
468 .len();
469 s.truncate(trailing_whitespace_len);
470 if s.is_empty() {
471 return;
472 }
473
474 let first_non_whitespace = s.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
475 s.replace_range(0..first_non_whitespace, "");
476 }
477 }
478
479 pub fn is_valid_floating_point_number_string(&self) -> bool {
481 static RE: LazyLock<Regex> = LazyLock::new(|| {
482 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
483 });
484 self.make_rust();
485
486 if let DOMStringType::Rust(ref s) = *self.0.borrow() {
487 RE.is_match(s) && self.parse_floating_point_number().is_some()
488 } else {
489 unreachable!()
490 }
491 }
492
493 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
494 self.make_rust();
495 self.str().parse::<T>()
496 }
497
498 pub fn parse_floating_point_number(&self) -> Option<f64> {
500 self.make_rust();
501 parse_floating_point_number(&self.str())
502 }
503
504 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
506 if let Some(val) = self.parse_floating_point_number() {
507 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
509
510 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
511 }
512 }
513
514 pub fn to_lowercase(&self) -> String {
515 self.make_rust();
516 self.str().to_lowercase()
517 }
518
519 pub fn to_uppercase(&self) -> String {
520 self.make_rust();
521 self.str().to_uppercase()
522 }
523
524 pub fn strip_newlines(&mut self) {
525 self.make_rust();
528 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
529 s.retain(|c| c != '\r' && c != '\n');
530 }
531 }
532
533 pub fn normalize_newlines(&mut self) {
535 self.make_rust();
536 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
540 *s = s.replace("\r\n", "\n").replace("\r", "\n")
541 }
542 }
543
544 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
545 self.make_rust();
546 let new_string = self.str().to_owned();
547 DOMString(RefCell::new(DOMStringType::Rust(
548 new_string.replace(needle, replace_char),
549 )))
550 }
551
552 pub fn starts_with(&self, c: char) -> bool {
554 if !c.is_ascii() {
555 self.make_rust();
556 self.str().starts_with(c)
557 } else {
558 match self.view().encoded_bytes() {
559 EncodedBytes::Latin1Bytes(items) => items,
560 EncodedBytes::Utf8Bytes(s) => s,
561 }
562 .starts_with(&[c as u8])
564 }
565 }
566
567 pub fn starts_with_str(&self, needle: &str) -> bool {
568 self.make_rust();
569 self.str().starts_with(needle)
570 }
571
572 pub fn contains(&self, needle: &str) -> bool {
573 self.make_rust();
574 self.str().contains(needle)
575 }
576
577 pub fn to_ascii_lowercase(&self) -> String {
578 let conversion = match self.view().encoded_bytes() {
579 EncodedBytes::Latin1Bytes(items) => {
580 if items.iter().all(|c| *c <= ASCII_END) {
581 Some(unsafe {
583 String::from_utf8_unchecked(
584 items
585 .iter()
586 .map(|c| {
587 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
588 c + 32
589 } else {
590 *c
591 }
592 })
593 .collect(),
594 )
595 })
596 } else {
597 None
598 }
599 },
600 EncodedBytes::Utf8Bytes(s) => unsafe {
601 Some(str::from_utf8_unchecked(s).to_ascii_lowercase())
603 },
604 };
605 if let Some(conversion) = conversion {
607 conversion
608 } else {
609 self.make_rust();
610 self.str().to_ascii_lowercase()
611 }
612 }
613
614 pub fn contains_html_space_characters(&self) -> bool {
615 const SPACE_BYTES: [u8; 5] = [
616 ASCII_TAB,
617 ASCII_NEWLINE,
618 ASCII_FORMFEED,
619 ASCII_CR,
620 ASCII_SPACE,
621 ];
622 match self.view().encoded_bytes() {
623 EncodedBytes::Latin1Bytes(items) => SPACE_BYTES.iter().any(|byte| items.contains(byte)),
624 EncodedBytes::Utf8Bytes(s) => {
625 let s = unsafe { str::from_utf8_unchecked(s) };
627 s.contains(HTML_SPACE_CHARACTERS)
628 },
629 }
630 }
631
632 pub fn as_bytes(&self) -> BytesView<'_> {
634 if self.is_ascii() {
638 BytesView(self.0.borrow())
639 } else {
640 self.make_rust();
641 BytesView(self.0.borrow())
642 }
643 }
644
645 pub fn is_ascii_lowercase(&self) -> bool {
647 match self.view().encoded_bytes() {
648 EncodedBytes::Latin1Bytes(items) => items
649 .iter()
650 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
651 EncodedBytes::Utf8Bytes(s) => s
652 .iter()
653 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
654 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
655 }
656 }
657
658 pub fn is_ascii(&self) -> bool {
660 match self.view().encoded_bytes() {
661 EncodedBytes::Latin1Bytes(items) => items,
662 EncodedBytes::Utf8Bytes(items) => items,
663 }
664 .is_ascii()
665 }
666
667 pub fn is_valid_for_cookie(&self) -> bool {
671 match self.view().encoded_bytes() {
672 EncodedBytes::Latin1Bytes(items) | EncodedBytes::Utf8Bytes(items) => !items
673 .iter()
674 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
675 }
676 }
677}
678
679pub fn parse_floating_point_number(input: &str) -> Option<f64> {
681 input.trim().parse::<f64>().ok().filter(|value| {
687 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
690 })
691}
692
693pub struct BytesView<'a>(Ref<'a, DOMStringType>);
694
695impl Deref for BytesView<'_> {
696 type Target = [u8];
697
698 fn deref(&self) -> &Self::Target {
699 self.0.as_raw_bytes()
701 }
702}
703
704impl Ord for DOMString {
705 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
706 self.make_rust();
707 other.make_rust();
708 self.str().cmp(&other.str())
709 }
710}
711
712impl PartialOrd for DOMString {
713 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
714 self.make_rust();
715 other.make_rust();
716 self.str().partial_cmp(&other.str())
717 }
718}
719
720impl Extend<char> for DOMString {
721 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
722 self.make_rust();
723 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
724 s.extend(iter)
725 }
726 }
727}
728
729impl ToJSValConvertible for DOMString {
730 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
731 let val = self.0.borrow();
732 match *val {
733 DOMStringType::Rust(ref s) => unsafe {
734 s.to_jsval(cx, rval);
735 },
736 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
737 rval.set(StringValue(&*rooted_traceable_box.get()));
738 },
739 #[cfg(test)]
740 DOMStringType::Latin1Vec(ref items) => {
741 let mut v = vec![0; items.len() * 2];
742 let real_size =
743 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
744 v.truncate(real_size);
745
746 String::from_utf8(v)
747 .expect("Error in constructin test string")
748 .to_jsval(cx, rval);
749 },
750 };
751 }
752}
753
754impl std::hash::Hash for DOMString {
755 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
756 self.make_rust();
757 self.str().hash(state);
758 }
759}
760
761impl std::fmt::Display for DOMString {
762 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
763 self.make_rust();
764 fmt::Display::fmt(self.str().deref(), f)
765 }
766}
767
768impl Default for DOMString {
769 fn default() -> Self {
770 DOMString::new()
771 }
772}
773
774impl std::cmp::PartialEq<str> for DOMString {
775 fn eq(&self, other: &str) -> bool {
776 if other.is_ascii() {
777 other.as_bytes() ==
778 match self.view().encoded_bytes() {
779 EncodedBytes::Latin1Bytes(items) => items,
780 EncodedBytes::Utf8Bytes(s) => s,
781 }
782 } else {
783 self.make_rust();
784 self.str().deref() == other
785 }
786 }
787}
788
789impl std::cmp::PartialEq<&str> for DOMString {
790 fn eq(&self, other: &&str) -> bool {
791 if other.is_ascii() {
792 other.as_bytes() ==
793 match self.view().encoded_bytes() {
794 EncodedBytes::Latin1Bytes(items) => items,
795 EncodedBytes::Utf8Bytes(s) => s,
796 }
797 } else {
798 self.make_rust();
799 self.str().deref() == *other
800 }
801 }
802}
803
804impl std::cmp::PartialEq<String> for DOMString {
805 fn eq(&self, other: &String) -> bool {
806 if other.is_ascii() {
807 other.as_bytes() ==
808 match self.view().encoded_bytes() {
809 EncodedBytes::Latin1Bytes(items) => items,
810 EncodedBytes::Utf8Bytes(s) => s,
811 }
812 } else {
813 self.make_rust();
814 self.str().deref() == other
815 }
816 }
817}
818
819impl std::cmp::PartialEq<DOMString> for String {
820 fn eq(&self, other: &DOMString) -> bool {
821 other.eq(self)
822 }
823}
824
825impl std::cmp::PartialEq<DOMString> for str {
826 fn eq(&self, other: &DOMString) -> bool {
827 other.eq(self)
828 }
829}
830
831impl std::cmp::PartialEq for DOMString {
832 fn eq(&self, other: &DOMString) -> bool {
833 let result = match (self.view().encoded_bytes(), other.view().encoded_bytes()) {
834 (EncodedBytes::Latin1Bytes(items), EncodedBytes::Latin1Bytes(other_items)) => {
835 Some(items == other_items)
836 },
837 (EncodedBytes::Latin1Bytes(items), EncodedBytes::Utf8Bytes(other_s))
838 if other_s.is_ascii() =>
839 {
840 Some(items == other_s)
841 },
842 (EncodedBytes::Utf8Bytes(s), EncodedBytes::Latin1Bytes(other_items))
843 if s.is_ascii() =>
844 {
845 Some(s == other_items)
846 },
847 (EncodedBytes::Utf8Bytes(s), EncodedBytes::Utf8Bytes(other_s)) => Some(s == other_s),
848 _ => None,
849 };
850
851 if let Some(eq_result) = result {
852 eq_result
853 } else {
854 self.make_rust();
855 other.make_rust();
856 self.str() == other.str()
857 }
858 }
859}
860
861impl std::cmp::Eq for DOMString {}
862
863impl From<std::string::String> for DOMString {
864 fn from(value: String) -> Self {
865 DOMString::from_string(value)
866 }
867}
868
869impl From<DOMString> for LocalName {
870 fn from(contents: DOMString) -> LocalName {
871 {
872 let view = contents.view();
873 let bytes = view.encoded_bytes();
874 let str = match bytes {
875 EncodedBytes::Latin1Bytes(items) => {
876 if items.iter().all(|c| c.is_ascii()) {
877 unsafe { Some(str::from_utf8_unchecked(items)) }
878 } else {
879 None
880 }
881 },
882 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
883 };
884 if let Some(s) = str {
885 return LocalName::from(s);
886 }
887 }
888 contents.make_rust();
889 LocalName::from(contents.str().deref())
890 }
891}
892
893impl From<&DOMString> for LocalName {
894 fn from(contents: &DOMString) -> LocalName {
895 {
896 let view = contents.view();
897 let bytes = view.encoded_bytes();
898 let str = match bytes {
899 EncodedBytes::Latin1Bytes(items) => {
900 if items.iter().all(|c| c.is_ascii()) {
901 unsafe { Some(str::from_utf8_unchecked(items)) }
903 } else {
904 None
905 }
906 },
907 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
908 };
909 if let Some(s) = str {
910 return LocalName::from(s);
911 }
912 }
913 contents.make_rust();
914 LocalName::from(contents.str().deref())
915 }
916}
917
918impl From<DOMString> for Namespace {
919 fn from(contents: DOMString) -> Namespace {
920 {
921 let view = contents.view();
922 let bytes = view.encoded_bytes();
923 let str = match bytes {
924 EncodedBytes::Latin1Bytes(items) => {
925 if items.iter().all(|c| c.is_ascii()) {
926 unsafe { Some(str::from_utf8_unchecked(items)) }
928 } else {
929 None
930 }
931 },
932 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
933 };
934 if let Some(s) = str {
935 return Namespace::from(s);
936 }
937 }
938 contents.make_rust();
939 Namespace::from(contents.str().deref())
940 }
941}
942
943impl From<DOMString> for Atom {
944 fn from(contents: DOMString) -> Atom {
945 {
946 let view = contents.view();
947 let bytes = view.encoded_bytes();
948 let str = match bytes {
949 EncodedBytes::Latin1Bytes(items) => {
950 if items.iter().all(|c| c.is_ascii()) {
951 unsafe { Some(str::from_utf8_unchecked(items)) }
953 } else {
954 None
955 }
956 },
957 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
958 };
959 if let Some(s) = str {
960 return Atom::from(s);
961 }
962 }
963 contents.make_rust();
964 Atom::from(contents.str().deref())
965 }
966}
967
968impl From<&str> for DOMString {
969 fn from(contents: &str) -> DOMString {
970 DOMString(RefCell::new(DOMStringType::Rust(String::from(contents))))
971 }
972}
973
974impl From<DOMString> for String {
975 fn from(val: DOMString) -> Self {
976 val.make_rust();
977 val.str().to_owned()
978 }
979}
980
981impl From<DOMString> for Vec<u8> {
982 fn from(value: DOMString) -> Self {
983 value.make_rust();
984 value.str().as_bytes().to_vec()
985 }
986}
987
988impl From<Cow<'_, str>> for DOMString {
989 fn from(value: Cow<'_, str>) -> Self {
990 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
991 }
992}
993
994#[macro_export]
995macro_rules! match_domstring_ascii_inner {
996 ($variant: expr, $input: expr, $p: literal => $then: expr, $($rest:tt)*) => {
997 if {
998 debug_assert!(($p).is_ascii());
999 $variant($p.as_bytes())
1000 } == $input {
1001 $then
1002 } else {
1003 match_domstring_ascii_inner!($variant, $input, $($rest)*)
1004 }
1005
1006 };
1007 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
1008 match $input {
1009 $p => $then
1010 }
1011 }
1012}
1013
1014#[macro_export]
1028macro_rules! match_domstring_ascii {
1029 ($input:expr, $($tail:tt)*) => {
1030 {
1031 use $crate::match_domstring_ascii_inner;
1032 use $crate::domstring::EncodedBytes;
1033
1034 let view = $input.view();
1035 let s = view.encoded_bytes();
1036 if matches!(s, EncodedBytes::Latin1Bytes(_)) {
1037 match_domstring_ascii_inner!(EncodedBytes::Latin1Bytes, s, $($tail)*)
1038 } else {
1039 match_domstring_ascii_inner!(EncodedBytes::Utf8Bytes, s, $($tail)*)
1040 }
1041 }
1042 };
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047 use super::*;
1048
1049 const LATIN1_PILLCROW: u8 = 0xB6;
1050 const UTF8_PILLCROW: [u8; 2] = [194, 182];
1051 const LATIN1_POWER2: u8 = 0xB2;
1052
1053 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1054 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1055 }
1056
1057 #[test]
1058 fn string_functions() {
1059 let s = DOMString::from("AbBcC❤&%$#");
1060 let s_copy = s.clone();
1061 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1062 assert_eq!(s, s_copy);
1063 assert_eq!(s.len(), 12);
1064 assert_eq!(s_copy.len(), 12);
1065 assert!(s.starts_with('A'));
1066 let s2 = DOMString::from("");
1067 assert!(s2.is_empty());
1068 }
1069
1070 #[test]
1071 fn string_functions_latin1() {
1072 {
1073 let s = from_latin1(vec![
1074 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1075 ]);
1076 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1077 }
1078 {
1079 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1080 assert_eq!(s.to_ascii_lowercase(), "abbcc");
1081 }
1082 {
1083 let s = from_latin1(vec![
1084 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1085 ]);
1086 assert_eq!(s.len(), 11);
1087 assert!(s.starts_with('A'));
1088 }
1089 {
1090 let s = from_latin1(vec![]);
1091 assert!(s.is_empty());
1092 }
1093 }
1094
1095 #[test]
1096 fn test_length() {
1097 let s1 = from_latin1(vec![
1098 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1099 0xAE, 0xAF,
1100 ]);
1101 let s2 = from_latin1(vec![
1102 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1103 0xBE, 0xBF,
1104 ]);
1105 let s3 = from_latin1(vec![
1106 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1107 0xCE, 0xCF,
1108 ]);
1109 let s4 = from_latin1(vec![
1110 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1111 0xDE, 0xDF,
1112 ]);
1113 let s5 = from_latin1(vec![
1114 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1115 0xEE, 0xEF,
1116 ]);
1117 let s6 = from_latin1(vec![
1118 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1119 0xFE, 0xFF,
1120 ]);
1121
1122 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1123 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1124 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1125 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1126 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1127 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1128
1129 assert_eq!(s1.len(), s1_utf8.len());
1130 assert_eq!(s2.len(), s2_utf8.len());
1131 assert_eq!(s3.len(), s3_utf8.len());
1132 assert_eq!(s4.len(), s4_utf8.len());
1133 assert_eq!(s5.len(), s5_utf8.len());
1134 assert_eq!(s6.len(), s6_utf8.len());
1135
1136 s1.make_rust();
1137 s2.make_rust();
1138 s3.make_rust();
1139 s4.make_rust();
1140 s5.make_rust();
1141 s6.make_rust();
1142 assert_eq!(s1.len(), s1_utf8.len());
1143 assert_eq!(s2.len(), s2_utf8.len());
1144 assert_eq!(s3.len(), s3_utf8.len());
1145 assert_eq!(s4.len(), s4_utf8.len());
1146 assert_eq!(s5.len(), s5_utf8.len());
1147 assert_eq!(s6.len(), s6_utf8.len());
1148 }
1149
1150 #[test]
1151 fn test_convert() {
1152 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1153 s.make_rust();
1154 assert_eq!(&*s.str(), "abc%$");
1155 }
1156
1157 #[test]
1158 fn partial_eq() {
1159 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1160 let string = String::from("abc%$");
1161 let s2 = DOMString::from_string(string.clone());
1162 assert_eq!(s, s2);
1163 assert_eq!(s, string);
1164 }
1165
1166 #[test]
1167 fn encoded_bytes() {
1168 let bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1169 let s = from_latin1(bytes.clone());
1170 if let EncodedBytes::Latin1Bytes(s) = s.view().encoded_bytes() {
1171 assert_eq!(s, bytes)
1172 }
1173 }
1174
1175 #[test]
1176 fn testing_stringview() {
1177 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1178
1179 assert_eq!(
1180 s.str().chars().collect::<Vec<char>>(),
1181 vec!['a', 'b', 'c', '%', '$', '²']
1182 );
1183 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1184 }
1185
1186 #[test]
1191 fn test_hash() {
1192 use std::hash::{DefaultHasher, Hash, Hasher};
1193 fn hash_value(d: &DOMString) -> u64 {
1194 let mut hasher = DefaultHasher::new();
1195 d.hash(&mut hasher);
1196 hasher.finish()
1197 }
1198
1199 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1200 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1201 s_converted.make_rust();
1202 let s2 = DOMString::from_string(String::from("abc%$²"));
1203
1204 let hash_s = hash_value(&s);
1205 let hash_s_converted = hash_value(&s_converted);
1206 let hash_s2 = hash_value(&s2);
1207
1208 assert_eq!(hash_s, hash_s2);
1209 assert_eq!(hash_s, hash_s_converted);
1210 }
1211
1212 #[test]
1214 fn test_match_executing() {
1215 {
1217 let s = from_latin1(vec![b'a', b'b', b'c']);
1218 match_domstring_ascii!( s,
1219 "abc" => assert!(true),
1220 "bcd" => assert!(false),
1221 _ => (),
1222 );
1223 }
1224
1225 {
1226 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1227 match_domstring_ascii!( s,
1228 "abc/" => assert!(true),
1229 "bcd" => assert!(false),
1230 _ => (),
1231 );
1232 }
1233
1234 {
1235 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1236 match_domstring_ascii!( s,
1237 "bcd" => assert!(false),
1238 "abc%$" => assert!(true),
1239 _ => (),
1240 );
1241 }
1242
1243 {
1244 let s = DOMString::from_string(String::from("abcde"));
1245 match_domstring_ascii!( s,
1246 "abc" => assert!(false),
1247 "bcd" => assert!(false),
1248 _ => assert!(true),
1249 );
1250 }
1251 {
1252 let s = DOMString::from_string(String::from("abc%$"));
1253 match_domstring_ascii!( s,
1254 "bcd" => assert!(false),
1255 "abc%$" => assert!(true),
1256 _ => (),
1257 );
1258 }
1259 {
1260 let s = from_latin1(vec![b'a', b'b', b'c']);
1261 match_domstring_ascii!( s,
1262 "abcdd" => assert!(false),
1263 "bcd" => assert!(false),
1264 _ => (),
1265 );
1266 }
1267 }
1268
1269 #[test]
1271 fn test_match_returning_result() {
1272 {
1273 let s = from_latin1(vec![b'a', b'b', b'c']);
1274 let res = match_domstring_ascii!( s,
1275 "abc" => true,
1276 "bcd" => false,
1277 _ => false,
1278 );
1279 assert_eq!(res, true);
1280 }
1281 {
1282 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1283 let res = match_domstring_ascii!( s,
1284 "abc/" => true,
1285 "bcd" => false,
1286 _ => false,
1287 );
1288 assert_eq!(res, true);
1289 }
1290 {
1291 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1292 let res = match_domstring_ascii!( s,
1293 "bcd" => false,
1294 "abc%$" => true,
1295 _ => false,
1296 );
1297 assert_eq!(res, true);
1298 }
1299
1300 {
1301 let s = DOMString::from_string(String::from("abcde"));
1302 let res = match_domstring_ascii!( s,
1303 "abc" => false,
1304 "bcd" => false,
1305 _ => true,
1306 );
1307 assert_eq!(res, true);
1308 }
1309 {
1310 let s = DOMString::from_string(String::from("abc%$"));
1311 let res = match_domstring_ascii!( s,
1312 "bcd" => false,
1313 "abc%$" => true,
1314 _ => false,
1315 );
1316 assert_eq!(res, true);
1317 }
1318 {
1319 let s = from_latin1(vec![b'a', b'b', b'c']);
1320 let res = match_domstring_ascii!( s,
1321 "abcdd" => false,
1322 "bcd" => false,
1323 _ => true,
1324 );
1325 assert_eq!(res, true);
1326 }
1327 }
1328
1329 #[test]
1330 #[should_panic]
1331 fn test_match_panic() {
1332 let s = DOMString::from_string(String::from("abcd"));
1333 let _res = match_domstring_ascii!(s,
1334 "❤" => true,
1335 _ => false,);
1336 }
1337
1338 #[test]
1339 #[should_panic]
1340 fn test_match_panic2() {
1341 let s = DOMString::from_string(String::from("abcd"));
1342 let _res = match_domstring_ascii!(s,
1343 "abc" => false,
1344 "❤" => true,
1345 _ => false,
1346 );
1347 }
1348
1349 #[test]
1350 fn test_strip_whitespace() {
1351 {
1352 let mut s = from_latin1(vec![
1353 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1354 ]);
1355
1356 s.strip_leading_and_trailing_ascii_whitespace();
1357 s.make_rust();
1358 assert_eq!(&*s.str(), "abc%$²");
1359 }
1360 {
1361 let mut s = DOMString::from_string(String::from(" \n abc%$ "));
1362
1363 s.strip_leading_and_trailing_ascii_whitespace();
1364 s.make_rust();
1365 assert_eq!(&*s.str(), "abc%$");
1366 }
1367 }
1368
1369 #[test]
1371 fn contains_html_space_characters() {
1372 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1374 s.make_rust();
1375 assert!(s.contains_html_space_characters());
1376
1377 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1379 s.make_rust();
1380 assert!(s.contains_html_space_characters());
1381
1382 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1384 s.make_rust();
1385 assert!(s.contains_html_space_characters());
1386
1387 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1389 s.make_rust();
1390 assert!(s.contains_html_space_characters());
1391
1392 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1394 s.make_rust();
1395 assert!(s.contains_html_space_characters());
1396
1397 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1398 assert!(!s.contains_html_space_characters());
1399 s.make_rust();
1400 assert!(!s.contains_html_space_characters());
1401 }
1402
1403 #[test]
1404 fn atom() {
1405 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1406 let atom1 = Atom::from(s);
1407 let s2 = DOMString::from_string(String::from("aaa aa"));
1408 let atom2 = Atom::from(s2);
1409 assert_eq!(atom1, atom2);
1410 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1411 let atom3 = Atom::from(s3);
1412 assert_ne!(atom1, atom3);
1413 }
1414
1415 #[test]
1416 fn namespace() {
1417 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1418 let atom1 = Namespace::from(s);
1419 let s2 = DOMString::from_string(String::from("aaa aa"));
1420 let atom2 = Namespace::from(s2);
1421 assert_eq!(atom1, atom2);
1422 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1423 let atom3 = Namespace::from(s3);
1424 assert_ne!(atom1, atom3);
1425 }
1426
1427 #[test]
1428 fn localname() {
1429 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1430 let atom1 = LocalName::from(s);
1431 let s2 = DOMString::from_string(String::from("aaa aa"));
1432 let atom2 = LocalName::from(s2);
1433 assert_eq!(atom1, atom2);
1434 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1435 let atom3 = LocalName::from(s3);
1436 assert_ne!(atom1, atom3);
1437 }
1438
1439 #[test]
1440 fn is_ascii_lowercase() {
1441 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1442 assert!(!s.is_ascii_lowercase());
1443 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1444 assert!(!s.is_ascii_lowercase());
1445 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1446 assert!(s.is_ascii_lowercase());
1447 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1448 assert!(!s.is_ascii_lowercase());
1449 let s = DOMString::from_string(String::from("`aaaz"));
1450 assert!(!s.is_ascii_lowercase());
1451 let s = DOMString::from_string(String::from("aaaz"));
1452 assert!(s.is_ascii_lowercase());
1453 }
1454
1455 #[test]
1456 fn test_as_bytes() {
1457 const ASCII_SMALL_A: u8 = b'a';
1458 const ASCII_SMALL_Z: u8 = b'z';
1459
1460 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1461 let s = from_latin1(v1.clone());
1462 assert_eq!(
1463 *s.as_bytes(),
1464 [
1465 ASCII_SMALL_A,
1466 ASCII_SMALL_A,
1467 ASCII_SMALL_A,
1468 UTF8_PILLCROW[0],
1469 UTF8_PILLCROW[1],
1470 ASCII_SMALL_A,
1471 ASCII_SMALL_A
1472 ]
1473 );
1474
1475 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1476 let s = from_latin1(v2.clone());
1477 assert_eq!(
1478 *s.as_bytes(),
1479 [
1480 ASCII_SMALL_A,
1481 ASCII_SMALL_A,
1482 ASCII_SMALL_A,
1483 ASCII_SMALL_A,
1484 ASCII_SMALL_Z
1485 ]
1486 );
1487
1488 let str = "abc%$²".to_owned();
1489 let s = DOMString::from(str.clone());
1490 assert_eq!(&*s.as_bytes(), str.as_bytes());
1491 let str = "AbBcC❤&%$#".to_owned();
1492 let s = DOMString::from(str.clone());
1493 assert_eq!(&*s.as_bytes(), str.as_bytes());
1494 }
1495}