1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::{Chars, FromStr};
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::MutableHandleValue;
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use style::Atom;
25use style::str::HTML_SPACE_CHARACTERS;
26
27use crate::script_runtime::JSContext as SafeJSContext;
28use crate::trace::RootedTraceableBox;
29
30const ASCII_END: u8 = 0x7E;
31const ASCII_CAPITAL_A: u8 = 0x41;
32const ASCII_CAPITAL_Z: u8 = 0x5A;
33const ASCII_LOWERCASE_A: u8 = 0x61;
34const ASCII_LOWERCASE_Z: u8 = 0x7A;
35const ASCII_TAB: u8 = 0x09;
36const ASCII_NEWLINE: u8 = 0x0A;
37const ASCII_FORMFEED: u8 = 0x0C;
38const ASCII_CR: u8 = 0x0D;
39const ASCII_SPACE: u8 = 0x20;
40
41unsafe fn get_latin1_string_bytes(
44 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
45) -> &[u8] {
46 debug_assert!(!rooted_traceable_box.get().is_null());
47 let mut length = 0;
48 unsafe {
49 let chars = JS_GetLatin1StringCharsAndLength(
50 Runtime::get().expect("JS runtime has shut down").as_ptr(),
51 ptr::null(),
52 rooted_traceable_box.get(),
53 &mut length,
54 );
55 assert!(!chars.is_null());
56 slice::from_raw_parts(chars, length)
57 }
58}
59
60#[derive(Debug, PartialEq, Eq)]
61pub enum EncodedBytes<'a> {
63 Latin1Bytes(&'a [u8]),
65 Utf8Bytes(&'a [u8]),
67}
68
69enum DOMStringType {
70 Rust(String),
72 JSString(RootedTraceableBox<Heap<*mut JSString>>),
74 #[cfg(test)]
75 Latin1Vec(Vec<u8>),
78}
79
80impl DOMStringType {
81 fn str(&self) -> &str {
83 match self {
84 DOMStringType::Rust(s) => s,
85 DOMStringType::JSString(_rooted_traceable_box) => {
86 panic!("Cannot do a string")
87 },
88 #[cfg(test)]
89 &DOMStringType::Latin1Vec(_) => panic!("Cannot do a string"),
90 }
91 }
92
93 fn as_raw_bytes(&self) -> &[u8] {
98 match self {
99 DOMStringType::Rust(s) => s.as_bytes(),
100 DOMStringType::JSString(rooted_traceable_box) => unsafe {
101 get_latin1_string_bytes(rooted_traceable_box)
102 },
103 #[cfg(test)]
104 DOMStringType::Latin1Vec(items) => items,
105 }
106 }
107}
108
109#[derive(Debug)]
110pub struct StringView<'a>(Ref<'a, DOMStringType>);
112
113impl<'a> StringView<'a> {
114 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
115 self.0
116 .str()
117 .split(HTML_SPACE_CHARACTERS)
118 .filter(|s| !s.is_empty())
119 }
120
121 pub fn strip_prefix(&self, needle: &str) -> Option<&str> {
122 self.0.str().strip_prefix(needle)
123 }
124
125 pub fn chars(&self) -> Chars<'_> {
126 self.0.str().chars()
127 }
128
129 pub fn as_bytes(&self) -> &[u8] {
130 self.0.str().as_bytes()
131 }
132}
133
134impl Deref for StringView<'_> {
135 type Target = str;
136 fn deref(&self) -> &str {
137 self.0.str()
138 }
139}
140
141impl AsRef<str> for StringView<'_> {
142 fn as_ref(&self) -> &str {
143 self.deref()
144 }
145}
146
147impl PartialEq for StringView<'_> {
148 fn eq(&self, other: &Self) -> bool {
149 self.0.str() == other.0.str()
150 }
151}
152
153impl PartialEq<&str> for StringView<'_> {
154 fn eq(&self, other: &&str) -> bool {
155 self.0.str() == *other
156 }
157}
158
159impl Eq for StringView<'_> {}
160
161impl PartialOrd for StringView<'_> {
162 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
163 self.0.str().partial_cmp(other.0.str())
164 }
165}
166
167impl Ord for StringView<'_> {
168 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
169 self.0.str().cmp(other.0.str())
170 }
171}
172
173impl From<StringView<'_>> for String {
174 fn from(value: StringView<'_>) -> Self {
175 String::from(value.0.str())
176 }
177}
178
179unsafe impl Trace for DOMStringType {
185 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
186 unsafe {
187 match self {
188 DOMStringType::Rust(_s) => {},
189 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
190 #[cfg(test)]
191 DOMStringType::Latin1Vec(_s) => {},
192 }
193 }
194 }
195}
196
197impl malloc_size_of::MallocSizeOf for DOMStringType {
198 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
199 match self {
200 DOMStringType::Rust(s) => s.size_of(ops),
201 DOMStringType::JSString(_rooted_traceable_box) => {
202 0
204 },
205 #[cfg(test)]
206 DOMStringType::Latin1Vec(s) => s.size_of(ops),
207 }
208 }
209}
210
211impl std::fmt::Debug for DOMStringType {
212 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213 match self {
214 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
215 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
216 #[cfg(test)]
217 DOMStringType::Latin1Vec(s) => f
218 .debug_struct("DOMString")
219 .field("latin1_string", s)
220 .finish(),
221 }
222 }
223}
224
225#[derive(Debug)]
226pub struct EncodedBytesView<'a>(Ref<'a, DOMStringType>);
228
229impl EncodedBytesView<'_> {
230 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
232 match *self.0 {
233 DOMStringType::Rust(ref s) => EncodedBytes::Utf8Bytes(s.as_bytes()),
234 DOMStringType::JSString(ref rooted_traceable_box) => {
235 EncodedBytes::Latin1Bytes(unsafe { get_latin1_string_bytes(rooted_traceable_box) })
236 },
237 #[cfg(test)]
238 DOMStringType::Latin1Vec(ref s) => EncodedBytes::Latin1Bytes(s),
239 }
240 }
241
242 fn is_empty(&self) -> bool {
243 match self.encoded_bytes() {
244 EncodedBytes::Latin1Bytes(items) => items.is_empty(),
245 EncodedBytes::Utf8Bytes(s) => s.is_empty(),
246 }
247 }
248
249 fn len(&self) -> usize {
250 match self.encoded_bytes() {
251 EncodedBytes::Latin1Bytes(items) => items
252 .iter()
253 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
254 .sum(),
255 EncodedBytes::Utf8Bytes(s) => s.len(),
256 }
257 }
258}
259
260#[repr(transparent)]
296#[derive(Debug, MallocSizeOf, JSTraceable)]
297pub struct DOMString(RefCell<DOMStringType>);
298
299impl Clone for DOMString {
300 fn clone(&self) -> Self {
301 self.make_rust();
302 if let DOMStringType::Rust(ref s) = *self.0.borrow() {
303 DOMString::from_string(s.to_owned())
304 } else {
305 unreachable!()
306 }
307 }
308}
309
310pub enum DOMStringErrorType {
311 JSConversionError,
312}
313
314impl DOMString {
315 pub fn new() -> DOMString {
317 DOMString(RefCell::new(DOMStringType::Rust(String::new())))
318 }
319
320 pub fn from_js_string(
323 cx: SafeJSContext,
324 value: js::gc::HandleValue,
325 ) -> Result<DOMString, DOMStringErrorType> {
326 let string_ptr = unsafe { js::rust::ToString(*cx, value) };
327 if string_ptr.is_null() {
328 debug!("ToString failed");
329 Err(DOMStringErrorType::JSConversionError)
330 } else {
331 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
332 let inner = if latin1 {
333 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
334 DOMStringType::JSString(h)
335 } else {
336 DOMStringType::Rust(unsafe {
338 jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
339 })
340 };
341 Ok(DOMString(RefCell::new(inner)))
342 }
343 }
344
345 pub fn from_string(s: String) -> DOMString {
346 DOMString(RefCell::new(DOMStringType::Rust(s)))
347 }
348
349 fn make_rust(&self) {
351 let string = {
352 let inner = self.0.borrow();
353 match *inner {
354 DOMStringType::Rust(_) => return,
355 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
356 jsstr_to_string(
357 Runtime::get().expect("JS runtime has shut down").as_ptr(),
358 NonNull::new(rooted_traceable_box.get()).unwrap(),
359 )
360 },
361 #[cfg(test)]
362 DOMStringType::Latin1Vec(ref items) => {
363 let mut v = vec![0; items.len() * 2];
364 let real_size = tendril::encoding_rs::mem::convert_latin1_to_utf8(
365 items.as_slice(),
366 v.as_mut_slice(),
367 );
368 v.truncate(real_size);
369
370 unsafe { String::from_utf8_unchecked(v) }
373 },
374 }
375 };
376 *self.0.borrow_mut() = DOMStringType::Rust(string);
377 }
378
379 #[expect(unused)]
381 fn debug_js(&self) {
382 match *self.0.borrow() {
383 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
384 DOMStringType::JSString(ref rooted_traceable_box) => {
385 let s = unsafe {
386 jsstr_to_string(
387 Runtime::get().expect("JS runtime has shut down").as_ptr(),
388 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
389 )
390 };
391 info!("JSString ({})", s);
392 },
393 #[cfg(test)]
394 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
395 }
396 }
397
398 pub fn str(&self) -> StringView<'_> {
400 self.make_rust();
401 StringView(self.0.borrow())
402 }
403
404 pub fn view(&self) -> EncodedBytesView<'_> {
407 EncodedBytesView(self.0.borrow())
408 }
409
410 pub fn clear(&mut self) {
411 *self.0.borrow_mut() = DOMStringType::Rust(String::new())
412 }
413
414 pub fn is_empty(&self) -> bool {
415 self.view().is_empty()
416 }
417
418 pub fn len(&self) -> usize {
420 self.view().len()
421 }
422
423 pub fn make_ascii_lowercase(&mut self) {
424 self.make_rust();
425 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
426 s.make_ascii_lowercase();
427 }
428 }
429
430 pub fn push_str(&mut self, s: &str) {
431 self.make_rust();
432 if let DOMStringType::Rust(ref mut string) = *self.0.borrow_mut() {
433 string.push_str(s)
434 }
435 }
436
437 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
438 if self.is_empty() {
439 return;
440 }
441
442 self.make_rust();
443 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
444 let trailing_whitespace_len = s
445 .trim_end_matches(|ref c| char::is_ascii_whitespace(c))
446 .len();
447 s.truncate(trailing_whitespace_len);
448 if s.is_empty() {
449 return;
450 }
451
452 let first_non_whitespace = s.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
453 s.replace_range(0..first_non_whitespace, "");
454 }
455 }
456
457 pub fn is_valid_floating_point_number_string(&self) -> bool {
459 static RE: LazyLock<Regex> = LazyLock::new(|| {
460 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
461 });
462 self.make_rust();
463
464 if let DOMStringType::Rust(ref s) = *self.0.borrow() {
465 RE.is_match(s) && self.parse_floating_point_number().is_some()
466 } else {
467 unreachable!()
468 }
469 }
470
471 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
472 self.make_rust();
473 self.str().parse::<T>()
474 }
475
476 pub fn parse_floating_point_number(&self) -> Option<f64> {
478 self.make_rust();
479 parse_floating_point_number(&self.str())
480 }
481
482 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
484 if let Some(val) = self.parse_floating_point_number() {
485 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
487
488 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
489 }
490 }
491
492 pub fn to_lowercase(&self) -> String {
493 self.make_rust();
494 self.str().to_lowercase()
495 }
496
497 pub fn to_uppercase(&self) -> String {
498 self.make_rust();
499 self.str().to_uppercase()
500 }
501
502 pub fn strip_newlines(&mut self) {
503 self.make_rust();
506 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
507 s.retain(|c| c != '\r' && c != '\n');
508 }
509 }
510
511 pub fn normalize_newlines(&mut self) {
513 self.make_rust();
514 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
518 *s = s.replace("\r\n", "\n").replace("\r", "\n")
519 }
520 }
521
522 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
523 self.make_rust();
524 let new_string = self.str().to_owned();
525 DOMString(RefCell::new(DOMStringType::Rust(
526 new_string.replace(needle, replace_char),
527 )))
528 }
529
530 pub fn starts_with(&self, c: char) -> bool {
532 if !c.is_ascii() {
533 self.make_rust();
534 self.str().starts_with(c)
535 } else {
536 match self.view().encoded_bytes() {
537 EncodedBytes::Latin1Bytes(items) => items,
538 EncodedBytes::Utf8Bytes(s) => s,
539 }
540 .starts_with(&[c as u8])
542 }
543 }
544
545 pub fn starts_with_str(&self, needle: &str) -> bool {
546 self.make_rust();
547 self.str().starts_with(needle)
548 }
549
550 pub fn contains(&self, needle: &str) -> bool {
551 self.make_rust();
552 self.str().contains(needle)
553 }
554
555 pub fn to_ascii_lowercase(&self) -> String {
556 let conversion = match self.view().encoded_bytes() {
557 EncodedBytes::Latin1Bytes(items) => {
558 if items.iter().all(|c| *c <= ASCII_END) {
559 Some(unsafe {
561 String::from_utf8_unchecked(
562 items
563 .iter()
564 .map(|c| {
565 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
566 c + 32
567 } else {
568 *c
569 }
570 })
571 .collect(),
572 )
573 })
574 } else {
575 None
576 }
577 },
578 EncodedBytes::Utf8Bytes(s) => unsafe {
579 Some(str::from_utf8_unchecked(s).to_ascii_lowercase())
581 },
582 };
583 if let Some(conversion) = conversion {
585 conversion
586 } else {
587 self.make_rust();
588 self.str().to_ascii_lowercase()
589 }
590 }
591
592 pub fn contains_html_space_characters(&self) -> bool {
593 const SPACE_BYTES: [u8; 5] = [
594 ASCII_TAB,
595 ASCII_NEWLINE,
596 ASCII_FORMFEED,
597 ASCII_CR,
598 ASCII_SPACE,
599 ];
600 match self.view().encoded_bytes() {
601 EncodedBytes::Latin1Bytes(items) => SPACE_BYTES.iter().any(|byte| items.contains(byte)),
602 EncodedBytes::Utf8Bytes(s) => {
603 let s = unsafe { str::from_utf8_unchecked(s) };
605 s.contains(HTML_SPACE_CHARACTERS)
606 },
607 }
608 }
609
610 pub fn as_bytes(&self) -> BytesView<'_> {
612 if self.is_ascii() {
616 BytesView(self.0.borrow())
617 } else {
618 self.make_rust();
619 BytesView(self.0.borrow())
620 }
621 }
622
623 pub fn is_ascii_lowercase(&self) -> bool {
625 match self.view().encoded_bytes() {
626 EncodedBytes::Latin1Bytes(items) => items
627 .iter()
628 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
629 EncodedBytes::Utf8Bytes(s) => s
630 .iter()
631 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
632 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
633 }
634 }
635
636 pub fn is_ascii(&self) -> bool {
638 match self.view().encoded_bytes() {
639 EncodedBytes::Latin1Bytes(items) => items,
640 EncodedBytes::Utf8Bytes(items) => items,
641 }
642 .is_ascii()
643 }
644
645 pub fn is_valid_for_cookie(&self) -> bool {
649 match self.view().encoded_bytes() {
650 EncodedBytes::Latin1Bytes(items) | EncodedBytes::Utf8Bytes(items) => !items
651 .iter()
652 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
653 }
654 }
655}
656
657pub fn parse_floating_point_number(input: &str) -> Option<f64> {
659 input.trim().parse::<f64>().ok().filter(|value| {
665 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
668 })
669}
670
671pub struct BytesView<'a>(Ref<'a, DOMStringType>);
672
673impl Deref for BytesView<'_> {
674 type Target = [u8];
675
676 fn deref(&self) -> &Self::Target {
677 self.0.as_raw_bytes()
679 }
680}
681
682impl Ord for DOMString {
683 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
684 self.make_rust();
685 other.make_rust();
686 self.str().cmp(&other.str())
687 }
688}
689
690impl PartialOrd for DOMString {
691 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
692 self.make_rust();
693 other.make_rust();
694 self.str().partial_cmp(&other.str())
695 }
696}
697
698impl Extend<char> for DOMString {
699 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
700 self.make_rust();
701 if let DOMStringType::Rust(ref mut s) = *self.0.borrow_mut() {
702 s.extend(iter)
703 }
704 }
705}
706
707impl ToJSValConvertible for DOMString {
708 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
709 let val = self.0.borrow();
710 match *val {
711 DOMStringType::Rust(ref s) => unsafe {
712 s.to_jsval(cx, rval);
713 },
714 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
715 rval.set(StringValue(&*rooted_traceable_box.get()));
716 },
717 #[cfg(test)]
718 DOMStringType::Latin1Vec(ref items) => {
719 let mut v = vec![0; items.len() * 2];
720 let real_size = tendril::encoding_rs::mem::convert_latin1_to_utf8(
721 items.as_slice(),
722 v.as_mut_slice(),
723 );
724 v.truncate(real_size);
725
726 String::from_utf8(v)
727 .expect("Error in constructin test string")
728 .to_jsval(cx, rval);
729 },
730 };
731 }
732}
733
734impl std::hash::Hash for DOMString {
735 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
736 self.make_rust();
737 self.str().hash(state);
738 }
739}
740
741impl std::fmt::Display for DOMString {
742 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
743 self.make_rust();
744 fmt::Display::fmt(self.str().deref(), f)
745 }
746}
747
748impl Default for DOMString {
749 fn default() -> Self {
750 DOMString::new()
751 }
752}
753
754impl std::cmp::PartialEq<str> for DOMString {
755 fn eq(&self, other: &str) -> bool {
756 if other.is_ascii() {
757 other.as_bytes() ==
758 match self.view().encoded_bytes() {
759 EncodedBytes::Latin1Bytes(items) => items,
760 EncodedBytes::Utf8Bytes(s) => s,
761 }
762 } else {
763 self.make_rust();
764 self.str().deref() == other
765 }
766 }
767}
768
769impl std::cmp::PartialEq<&str> for DOMString {
770 fn eq(&self, other: &&str) -> bool {
771 if other.is_ascii() {
772 other.as_bytes() ==
773 match self.view().encoded_bytes() {
774 EncodedBytes::Latin1Bytes(items) => items,
775 EncodedBytes::Utf8Bytes(s) => s,
776 }
777 } else {
778 self.make_rust();
779 self.str().deref() == *other
780 }
781 }
782}
783
784impl std::cmp::PartialEq<String> for DOMString {
785 fn eq(&self, other: &String) -> bool {
786 if other.is_ascii() {
787 other.as_bytes() ==
788 match self.view().encoded_bytes() {
789 EncodedBytes::Latin1Bytes(items) => items,
790 EncodedBytes::Utf8Bytes(s) => s,
791 }
792 } else {
793 self.make_rust();
794 self.str().deref() == other
795 }
796 }
797}
798
799impl std::cmp::PartialEq<DOMString> for String {
800 fn eq(&self, other: &DOMString) -> bool {
801 other.eq(self)
802 }
803}
804
805impl std::cmp::PartialEq<DOMString> for str {
806 fn eq(&self, other: &DOMString) -> bool {
807 other.eq(self)
808 }
809}
810
811impl std::cmp::PartialEq for DOMString {
812 fn eq(&self, other: &DOMString) -> bool {
813 let result = match (self.view().encoded_bytes(), other.view().encoded_bytes()) {
814 (EncodedBytes::Latin1Bytes(items), EncodedBytes::Latin1Bytes(other_items)) => {
815 Some(items == other_items)
816 },
817 (EncodedBytes::Latin1Bytes(items), EncodedBytes::Utf8Bytes(other_s))
818 if other_s.is_ascii() =>
819 {
820 Some(items == other_s)
821 },
822 (EncodedBytes::Utf8Bytes(s), EncodedBytes::Latin1Bytes(other_items))
823 if s.is_ascii() =>
824 {
825 Some(s == other_items)
826 },
827 (EncodedBytes::Utf8Bytes(s), EncodedBytes::Utf8Bytes(other_s)) => Some(s == other_s),
828 _ => None,
829 };
830
831 if let Some(eq_result) = result {
832 eq_result
833 } else {
834 self.make_rust();
835 other.make_rust();
836 self.str() == other.str()
837 }
838 }
839}
840
841impl std::cmp::Eq for DOMString {}
842
843impl From<std::string::String> for DOMString {
844 fn from(value: String) -> Self {
845 DOMString::from_string(value)
846 }
847}
848
849impl From<DOMString> for LocalName {
850 fn from(contents: DOMString) -> LocalName {
851 {
852 let view = contents.view();
853 let bytes = view.encoded_bytes();
854 let str = match bytes {
855 EncodedBytes::Latin1Bytes(items) => {
856 if items.iter().all(|c| c.is_ascii()) {
857 unsafe { Some(str::from_utf8_unchecked(items)) }
858 } else {
859 None
860 }
861 },
862 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
863 };
864 if let Some(s) = str {
865 return LocalName::from(s);
866 }
867 }
868 contents.make_rust();
869 LocalName::from(contents.str().deref())
870 }
871}
872
873impl From<&DOMString> for LocalName {
874 fn from(contents: &DOMString) -> LocalName {
875 {
876 let view = contents.view();
877 let bytes = view.encoded_bytes();
878 let str = match bytes {
879 EncodedBytes::Latin1Bytes(items) => {
880 if items.iter().all(|c| c.is_ascii()) {
881 unsafe { Some(str::from_utf8_unchecked(items)) }
883 } else {
884 None
885 }
886 },
887 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
888 };
889 if let Some(s) = str {
890 return LocalName::from(s);
891 }
892 }
893 contents.make_rust();
894 LocalName::from(contents.str().deref())
895 }
896}
897
898impl From<DOMString> for Namespace {
899 fn from(contents: DOMString) -> Namespace {
900 {
901 let view = contents.view();
902 let bytes = view.encoded_bytes();
903 let str = match bytes {
904 EncodedBytes::Latin1Bytes(items) => {
905 if items.iter().all(|c| c.is_ascii()) {
906 unsafe { Some(str::from_utf8_unchecked(items)) }
908 } else {
909 None
910 }
911 },
912 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
913 };
914 if let Some(s) = str {
915 return Namespace::from(s);
916 }
917 }
918 contents.make_rust();
919 Namespace::from(contents.str().deref())
920 }
921}
922
923impl From<DOMString> for Atom {
924 fn from(contents: DOMString) -> Atom {
925 {
926 let view = contents.view();
927 let bytes = view.encoded_bytes();
928 let str = match bytes {
929 EncodedBytes::Latin1Bytes(items) => {
930 if items.iter().all(|c| c.is_ascii()) {
931 unsafe { Some(str::from_utf8_unchecked(items)) }
933 } else {
934 None
935 }
936 },
937 EncodedBytes::Utf8Bytes(s) => Some(unsafe { str::from_utf8_unchecked(s) }),
938 };
939 if let Some(s) = str {
940 return Atom::from(s);
941 }
942 }
943 contents.make_rust();
944 Atom::from(contents.str().deref())
945 }
946}
947
948impl From<&str> for DOMString {
949 fn from(contents: &str) -> DOMString {
950 DOMString(RefCell::new(DOMStringType::Rust(String::from(contents))))
951 }
952}
953
954impl From<DOMString> for String {
955 fn from(val: DOMString) -> Self {
956 val.make_rust();
957 val.str().to_owned()
958 }
959}
960
961impl From<DOMString> for Vec<u8> {
962 fn from(value: DOMString) -> Self {
963 value.make_rust();
964 value.str().as_bytes().to_vec()
965 }
966}
967
968impl From<Cow<'_, str>> for DOMString {
969 fn from(value: Cow<'_, str>) -> Self {
970 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
971 }
972}
973
974#[macro_export]
975macro_rules! match_domstring_ascii_inner {
976 ($variant: expr, $input: expr, $p: literal => $then: expr, $($rest:tt)*) => {
977 if {
978 debug_assert!(($p).is_ascii());
979 $variant($p.as_bytes())
980 } == $input {
981 $then
982 } else {
983 match_domstring_ascii_inner!($variant, $input, $($rest)*)
984 }
985
986 };
987 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
988 match $input {
989 $p => $then
990 }
991 }
992}
993
994#[macro_export]
1008macro_rules! match_domstring_ascii {
1009 ($input:expr, $($tail:tt)*) => {
1010 {
1011 use $crate::match_domstring_ascii_inner;
1012 use $crate::domstring::EncodedBytes;
1013
1014 let view = $input.view();
1015 let s = view.encoded_bytes();
1016 if matches!(s, EncodedBytes::Latin1Bytes(_)) {
1017 match_domstring_ascii_inner!(EncodedBytes::Latin1Bytes, s, $($tail)*)
1018 } else {
1019 match_domstring_ascii_inner!(EncodedBytes::Utf8Bytes, s, $($tail)*)
1020 }
1021 }
1022 };
1023}
1024
1025#[cfg(test)]
1026mod tests {
1027 use super::*;
1028
1029 const LATIN1_PILLCROW: u8 = 0xB6;
1030 const UTF8_PILLCROW: [u8; 2] = [194, 182];
1031 const LATIN1_POWER2: u8 = 0xB2;
1032
1033 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1034 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1035 }
1036
1037 #[test]
1038 fn string_functions() {
1039 let s = DOMString::from("AbBcC❤&%$#");
1040 let s_copy = s.clone();
1041 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1042 assert_eq!(s, s_copy);
1043 assert_eq!(s.len(), 12);
1044 assert_eq!(s_copy.len(), 12);
1045 assert!(s.starts_with('A'));
1046 let s2 = DOMString::from("");
1047 assert!(s2.is_empty());
1048 }
1049
1050 #[test]
1051 fn string_functions_latin1() {
1052 {
1053 let s = from_latin1(vec![
1054 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1055 ]);
1056 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1057 }
1058 {
1059 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1060 assert_eq!(s.to_ascii_lowercase(), "abbcc");
1061 }
1062 {
1063 let s = from_latin1(vec![
1064 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1065 ]);
1066 assert_eq!(s.len(), 11);
1067 assert!(s.starts_with('A'));
1068 }
1069 {
1070 let s = from_latin1(vec![]);
1071 assert!(s.is_empty());
1072 }
1073 }
1074
1075 #[test]
1076 fn test_length() {
1077 let s1 = from_latin1(vec![
1078 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1079 0xAE, 0xAF,
1080 ]);
1081 let s2 = from_latin1(vec![
1082 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1083 0xBE, 0xBF,
1084 ]);
1085 let s3 = from_latin1(vec![
1086 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1087 0xCE, 0xCF,
1088 ]);
1089 let s4 = from_latin1(vec![
1090 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1091 0xDE, 0xDF,
1092 ]);
1093 let s5 = from_latin1(vec![
1094 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1095 0xEE, 0xEF,
1096 ]);
1097 let s6 = from_latin1(vec![
1098 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1099 0xFE, 0xFF,
1100 ]);
1101
1102 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1103 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1104 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1105 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1106 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1107 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1108
1109 assert_eq!(s1.len(), s1_utf8.len());
1110 assert_eq!(s2.len(), s2_utf8.len());
1111 assert_eq!(s3.len(), s3_utf8.len());
1112 assert_eq!(s4.len(), s4_utf8.len());
1113 assert_eq!(s5.len(), s5_utf8.len());
1114 assert_eq!(s6.len(), s6_utf8.len());
1115
1116 s1.make_rust();
1117 s2.make_rust();
1118 s3.make_rust();
1119 s4.make_rust();
1120 s5.make_rust();
1121 s6.make_rust();
1122 assert_eq!(s1.len(), s1_utf8.len());
1123 assert_eq!(s2.len(), s2_utf8.len());
1124 assert_eq!(s3.len(), s3_utf8.len());
1125 assert_eq!(s4.len(), s4_utf8.len());
1126 assert_eq!(s5.len(), s5_utf8.len());
1127 assert_eq!(s6.len(), s6_utf8.len());
1128 }
1129
1130 #[test]
1131 fn test_convert() {
1132 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1133 s.make_rust();
1134 assert_eq!(&*s.str(), "abc%$");
1135 }
1136
1137 #[test]
1138 fn partial_eq() {
1139 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1140 let string = String::from("abc%$");
1141 let s2 = DOMString::from_string(string.clone());
1142 assert_eq!(s, s2);
1143 assert_eq!(s, string);
1144 }
1145
1146 #[test]
1147 fn encoded_bytes() {
1148 let bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1149 let s = from_latin1(bytes.clone());
1150 if let EncodedBytes::Latin1Bytes(s) = s.view().encoded_bytes() {
1151 assert_eq!(s, bytes)
1152 }
1153 }
1154
1155 #[test]
1156 fn testing_stringview() {
1157 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1158
1159 assert_eq!(
1160 s.str().chars().collect::<Vec<char>>(),
1161 vec!['a', 'b', 'c', '%', '$', '²']
1162 );
1163 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1164 }
1165
1166 #[test]
1171 fn test_hash() {
1172 use std::hash::{DefaultHasher, Hash, Hasher};
1173 fn hash_value(d: &DOMString) -> u64 {
1174 let mut hasher = DefaultHasher::new();
1175 d.hash(&mut hasher);
1176 hasher.finish()
1177 }
1178
1179 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1180 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1181 s_converted.make_rust();
1182 let s2 = DOMString::from_string(String::from("abc%$²"));
1183
1184 let hash_s = hash_value(&s);
1185 let hash_s_converted = hash_value(&s_converted);
1186 let hash_s2 = hash_value(&s2);
1187
1188 assert_eq!(hash_s, hash_s2);
1189 assert_eq!(hash_s, hash_s_converted);
1190 }
1191
1192 #[test]
1194 fn test_match_executing() {
1195 {
1197 let s = from_latin1(vec![b'a', b'b', b'c']);
1198 match_domstring_ascii!( s,
1199 "abc" => assert!(true),
1200 "bcd" => assert!(false),
1201 _ => (),
1202 );
1203 }
1204
1205 {
1206 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1207 match_domstring_ascii!( s,
1208 "abc/" => assert!(true),
1209 "bcd" => assert!(false),
1210 _ => (),
1211 );
1212 }
1213
1214 {
1215 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1216 match_domstring_ascii!( s,
1217 "bcd" => assert!(false),
1218 "abc%$" => assert!(true),
1219 _ => (),
1220 );
1221 }
1222
1223 {
1224 let s = DOMString::from_string(String::from("abcde"));
1225 match_domstring_ascii!( s,
1226 "abc" => assert!(false),
1227 "bcd" => assert!(false),
1228 _ => assert!(true),
1229 );
1230 }
1231 {
1232 let s = DOMString::from_string(String::from("abc%$"));
1233 match_domstring_ascii!( s,
1234 "bcd" => assert!(false),
1235 "abc%$" => assert!(true),
1236 _ => (),
1237 );
1238 }
1239 {
1240 let s = from_latin1(vec![b'a', b'b', b'c']);
1241 match_domstring_ascii!( s,
1242 "abcdd" => assert!(false),
1243 "bcd" => assert!(false),
1244 _ => (),
1245 );
1246 }
1247 }
1248
1249 #[test]
1251 fn test_match_returning_result() {
1252 {
1253 let s = from_latin1(vec![b'a', b'b', b'c']);
1254 let res = match_domstring_ascii!( s,
1255 "abc" => true,
1256 "bcd" => false,
1257 _ => false,
1258 );
1259 assert_eq!(res, true);
1260 }
1261 {
1262 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1263 let res = match_domstring_ascii!( s,
1264 "abc/" => true,
1265 "bcd" => false,
1266 _ => false,
1267 );
1268 assert_eq!(res, true);
1269 }
1270 {
1271 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1272 let res = match_domstring_ascii!( s,
1273 "bcd" => false,
1274 "abc%$" => true,
1275 _ => false,
1276 );
1277 assert_eq!(res, true);
1278 }
1279
1280 {
1281 let s = DOMString::from_string(String::from("abcde"));
1282 let res = match_domstring_ascii!( s,
1283 "abc" => false,
1284 "bcd" => false,
1285 _ => true,
1286 );
1287 assert_eq!(res, true);
1288 }
1289 {
1290 let s = DOMString::from_string(String::from("abc%$"));
1291 let res = match_domstring_ascii!( s,
1292 "bcd" => false,
1293 "abc%$" => true,
1294 _ => false,
1295 );
1296 assert_eq!(res, true);
1297 }
1298 {
1299 let s = from_latin1(vec![b'a', b'b', b'c']);
1300 let res = match_domstring_ascii!( s,
1301 "abcdd" => false,
1302 "bcd" => false,
1303 _ => true,
1304 );
1305 assert_eq!(res, true);
1306 }
1307 }
1308
1309 #[test]
1310 #[should_panic]
1311 fn test_match_panic() {
1312 let s = DOMString::from_string(String::from("abcd"));
1313 let _res = match_domstring_ascii!(s,
1314 "❤" => true,
1315 _ => false,);
1316 }
1317
1318 #[test]
1319 #[should_panic]
1320 fn test_match_panic2() {
1321 let s = DOMString::from_string(String::from("abcd"));
1322 let _res = match_domstring_ascii!(s,
1323 "abc" => false,
1324 "❤" => true,
1325 _ => false,
1326 );
1327 }
1328
1329 #[test]
1330 fn test_strip_whitespace() {
1331 {
1332 let mut s = from_latin1(vec![
1333 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1334 ]);
1335
1336 s.strip_leading_and_trailing_ascii_whitespace();
1337 s.make_rust();
1338 assert_eq!(&*s.str(), "abc%$²");
1339 }
1340 {
1341 let mut s = DOMString::from_string(String::from(" \n abc%$ "));
1342
1343 s.strip_leading_and_trailing_ascii_whitespace();
1344 s.make_rust();
1345 assert_eq!(&*s.str(), "abc%$");
1346 }
1347 }
1348
1349 #[test]
1351 fn contains_html_space_characters() {
1352 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1354 s.make_rust();
1355 assert!(s.contains_html_space_characters());
1356
1357 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1359 s.make_rust();
1360 assert!(s.contains_html_space_characters());
1361
1362 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1364 s.make_rust();
1365 assert!(s.contains_html_space_characters());
1366
1367 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1369 s.make_rust();
1370 assert!(s.contains_html_space_characters());
1371
1372 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1374 s.make_rust();
1375 assert!(s.contains_html_space_characters());
1376
1377 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1378 assert!(!s.contains_html_space_characters());
1379 s.make_rust();
1380 assert!(!s.contains_html_space_characters());
1381 }
1382
1383 #[test]
1384 fn atom() {
1385 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1386 let atom1 = Atom::from(s);
1387 let s2 = DOMString::from_string(String::from("aaa aa"));
1388 let atom2 = Atom::from(s2);
1389 assert_eq!(atom1, atom2);
1390 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1391 let atom3 = Atom::from(s3);
1392 assert_ne!(atom1, atom3);
1393 }
1394
1395 #[test]
1396 fn namespace() {
1397 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1398 let atom1 = Namespace::from(s);
1399 let s2 = DOMString::from_string(String::from("aaa aa"));
1400 let atom2 = Namespace::from(s2);
1401 assert_eq!(atom1, atom2);
1402 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1403 let atom3 = Namespace::from(s3);
1404 assert_ne!(atom1, atom3);
1405 }
1406
1407 #[test]
1408 fn localname() {
1409 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1410 let atom1 = LocalName::from(s);
1411 let s2 = DOMString::from_string(String::from("aaa aa"));
1412 let atom2 = LocalName::from(s2);
1413 assert_eq!(atom1, atom2);
1414 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1415 let atom3 = LocalName::from(s3);
1416 assert_ne!(atom1, atom3);
1417 }
1418
1419 #[test]
1420 fn is_ascii_lowercase() {
1421 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1422 assert!(!s.is_ascii_lowercase());
1423 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1424 assert!(!s.is_ascii_lowercase());
1425 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1426 assert!(s.is_ascii_lowercase());
1427 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1428 assert!(!s.is_ascii_lowercase());
1429 let s = DOMString::from_string(String::from("`aaaz"));
1430 assert!(!s.is_ascii_lowercase());
1431 let s = DOMString::from_string(String::from("aaaz"));
1432 assert!(s.is_ascii_lowercase());
1433 }
1434
1435 #[test]
1436 fn test_as_bytes() {
1437 const ASCII_SMALL_A: u8 = b'a';
1438 const ASCII_SMALL_Z: u8 = b'z';
1439
1440 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1441 let s = from_latin1(v1.clone());
1442 assert_eq!(
1443 *s.as_bytes(),
1444 [
1445 ASCII_SMALL_A,
1446 ASCII_SMALL_A,
1447 ASCII_SMALL_A,
1448 UTF8_PILLCROW[0],
1449 UTF8_PILLCROW[1],
1450 ASCII_SMALL_A,
1451 ASCII_SMALL_A
1452 ]
1453 );
1454
1455 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1456 let s = from_latin1(v2.clone());
1457 assert_eq!(
1458 *s.as_bytes(),
1459 [
1460 ASCII_SMALL_A,
1461 ASCII_SMALL_A,
1462 ASCII_SMALL_A,
1463 ASCII_SMALL_A,
1464 ASCII_SMALL_Z
1465 ]
1466 );
1467
1468 let str = "abc%$²".to_owned();
1469 let s = DOMString::from(str.clone());
1470 assert_eq!(&*s.as_bytes(), str.as_bytes());
1471 let str = "AbBcC❤&%$#".to_owned();
1472 let s = DOMString::from(str.clone());
1473 assert_eq!(&*s.as_bytes(), str.as_bytes());
1474 }
1475}