1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
16use html5ever::{LocalName, Namespace};
17use js::conversions::{ToJSValConvertible, jsstr_to_string};
18use js::gc::MutableHandleValue;
19use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
20use js::jsval::StringValue;
21use js::rust::{Runtime, Trace};
22use malloc_size_of::MallocSizeOfOps;
23use num_traits::{ToPrimitive, Zero};
24use regex::Regex;
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42unsafe fn get_latin1_string_bytes(
45 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47 debug_assert!(!rooted_traceable_box.get().is_null());
48 let mut length = 0;
49 unsafe {
50 let chars = JS_GetLatin1StringCharsAndLength(
51 Runtime::get().expect("JS runtime has shut down").as_ptr(),
52 ptr::null(),
53 rooted_traceable_box.get(),
54 &mut length,
55 );
56 assert!(!chars.is_null());
57 slice::from_raw_parts(chars, length)
58 }
59}
60
61#[derive(Debug)]
63pub enum EncodedBytes<'a> {
64 Latin1(Ref<'a, [u8]>),
66 Utf8(Ref<'a, [u8]>),
68}
69
70impl EncodedBytes<'_> {
71 pub fn bytes(&self) -> &[u8] {
74 match self {
75 Self::Latin1(bytes) => bytes,
76 Self::Utf8(bytes) => bytes,
77 }
78 }
79
80 pub fn len(&self) -> usize {
81 match self {
82 Self::Latin1(bytes) => bytes
83 .iter()
84 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
85 .sum(),
86 Self::Utf8(bytes) => bytes.len(),
87 }
88 }
89
90 pub fn is_empty(&self) -> bool {
92 self.bytes().is_empty()
93 }
94}
95
96enum DOMStringType {
97 Rust(String),
99 JSString(RootedTraceableBox<Heap<*mut JSString>>),
101 #[cfg(test)]
102 Latin1Vec(Vec<u8>),
105}
106
107impl Default for DOMStringType {
108 fn default() -> Self {
109 Self::Rust(Default::default())
110 }
111}
112
113impl DOMStringType {
114 fn as_raw_bytes(&self) -> &[u8] {
119 match self {
120 DOMStringType::Rust(s) => s.as_bytes(),
121 DOMStringType::JSString(rooted_traceable_box) => unsafe {
122 get_latin1_string_bytes(rooted_traceable_box)
123 },
124 #[cfg(test)]
125 DOMStringType::Latin1Vec(items) => items,
126 }
127 }
128
129 fn ensure_rust_string(&mut self) -> &mut String {
130 let new_string = match self {
131 DOMStringType::Rust(string) => return string,
132 DOMStringType::JSString(rooted_traceable_box) => unsafe {
133 jsstr_to_string(
134 Runtime::get().expect("JS runtime has shut down").as_ptr(),
135 NonNull::new(rooted_traceable_box.get()).unwrap(),
136 )
137 },
138 #[cfg(test)]
139 DOMStringType::Latin1Vec(items) => {
140 let mut v = vec![0; items.len() * 2];
141 let real_size =
142 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
143 v.truncate(real_size);
144
145 unsafe { String::from_utf8_unchecked(v) }
148 },
149 };
150 *self = DOMStringType::Rust(new_string);
151 self.ensure_rust_string()
152 }
153}
154
155#[derive(Debug)]
158pub struct StringView<'a>(Ref<'a, str>);
159
160impl StringView<'_> {
161 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
162 self.split(HTML_SPACE_CHARACTERS)
163 .filter(|string| !string.is_empty())
164 }
165}
166
167impl From<StringView<'_>> for String {
168 fn from(string_view: StringView<'_>) -> Self {
169 string_view.0.to_string()
170 }
171}
172
173impl Deref for StringView<'_> {
174 type Target = str;
175 fn deref(&self) -> &str {
176 &(self.0)
177 }
178}
179
180impl AsRef<str> for StringView<'_> {
181 fn as_ref(&self) -> &str {
182 &(self.0)
183 }
184}
185
186impl PartialEq for StringView<'_> {
187 fn eq(&self, other: &Self) -> bool {
188 self.0.eq(&*(other.0))
189 }
190}
191
192impl PartialEq<&str> for StringView<'_> {
193 fn eq(&self, other: &&str) -> bool {
194 self.0.eq(*other)
195 }
196}
197
198impl Eq for StringView<'_> {}
199
200impl PartialOrd for StringView<'_> {
201 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
202 self.0.partial_cmp(&**other)
203 }
204}
205
206impl Ord for StringView<'_> {
207 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
208 self.0.cmp(other)
209 }
210}
211
212unsafe impl Trace for DOMStringType {
218 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
219 unsafe {
220 match self {
221 DOMStringType::Rust(_s) => {},
222 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
223 #[cfg(test)]
224 DOMStringType::Latin1Vec(_s) => {},
225 }
226 }
227 }
228}
229
230impl malloc_size_of::MallocSizeOf for DOMStringType {
231 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
232 match self {
233 DOMStringType::Rust(s) => s.size_of(ops),
234 DOMStringType::JSString(_rooted_traceable_box) => {
235 0
237 },
238 #[cfg(test)]
239 DOMStringType::Latin1Vec(s) => s.size_of(ops),
240 }
241 }
242}
243
244impl std::fmt::Debug for DOMStringType {
245 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246 match self {
247 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
248 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
249 #[cfg(test)]
250 DOMStringType::Latin1Vec(s) => f
251 .debug_struct("DOMString")
252 .field("latin1_string", s)
253 .finish(),
254 }
255 }
256}
257
258#[repr(transparent)]
294#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
295pub struct DOMString(RefCell<DOMStringType>);
296
297impl Clone for DOMString {
298 fn clone(&self) -> Self {
299 self.ensure_rust_string().clone().into()
300 }
301}
302
303pub enum DOMStringErrorType {
304 JSConversionError,
305}
306
307impl DOMString {
308 pub fn new() -> DOMString {
310 Default::default()
311 }
312
313 pub fn from_js_string(
316 cx: SafeJSContext,
317 value: js::gc::HandleValue,
318 ) -> Result<DOMString, DOMStringErrorType> {
319 let string_ptr = unsafe { js::rust::ToString(*cx, value) };
320 if string_ptr.is_null() {
321 debug!("ToString failed");
322 Err(DOMStringErrorType::JSConversionError)
323 } else {
324 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
325 let inner = if latin1 {
326 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
327 DOMStringType::JSString(h)
328 } else {
329 DOMStringType::Rust(unsafe {
331 jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
332 })
333 };
334 Ok(DOMString(RefCell::new(inner)))
335 }
336 }
337
338 fn ensure_rust_string(&self) -> RefMut<'_, String> {
341 let inner = self.0.borrow_mut();
342 RefMut::map(inner, |inner| inner.ensure_rust_string())
343 }
344
345 #[expect(unused)]
347 fn debug_js(&self) {
348 match *self.0.borrow() {
349 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
350 DOMStringType::JSString(ref rooted_traceable_box) => {
351 let s = unsafe {
352 jsstr_to_string(
353 Runtime::get().expect("JS runtime has shut down").as_ptr(),
354 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
355 )
356 };
357 info!("JSString ({})", s);
358 },
359 #[cfg(test)]
360 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
361 }
362 }
363
364 pub fn str(&self) -> StringView<'_> {
366 {
367 let inner = self.0.borrow();
368 if matches!(&*inner, DOMStringType::Rust(..)) {
369 return StringView(Ref::map(inner, |inner| match inner {
370 DOMStringType::Rust(string) => string.as_str(),
371 _ => unreachable!("Guaranteed by condition above"),
372 }));
373 }
374 }
375
376 self.ensure_rust_string();
377 self.str()
378 }
379
380 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
383 let inner = self.0.borrow();
384 match &*inner {
385 DOMStringType::Rust(..) => {
386 EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
387 },
388 _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
389 }
390 }
391
392 pub fn clear(&mut self) {
393 *self.0.borrow_mut() = DOMStringType::Rust(String::new())
394 }
395
396 pub fn is_empty(&self) -> bool {
397 self.encoded_bytes().is_empty()
398 }
399
400 pub fn len(&self) -> usize {
405 self.encoded_bytes().len()
406 }
407
408 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
415 Utf8CodeUnitLength(self.len())
416 }
417
418 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
423 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
424 }
425
426 pub fn make_ascii_lowercase(&mut self) {
427 self.0
428 .borrow_mut()
429 .ensure_rust_string()
430 .make_ascii_lowercase();
431 }
432
433 pub fn push_str(&mut self, string_to_push: &str) {
434 self.0
435 .borrow_mut()
436 .ensure_rust_string()
437 .push_str(string_to_push);
438 }
439
440 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
441 if self.is_empty() {
442 return;
443 }
444
445 let mut inner = self.0.borrow_mut();
446 let string = inner.ensure_rust_string();
447 let trailing_whitespace_len = string
448 .trim_end_matches(|character: char| character.is_ascii_whitespace())
449 .len();
450 string.truncate(trailing_whitespace_len);
451 if string.is_empty() {
452 return;
453 }
454
455 let first_non_whitespace = string
456 .find(|character: char| !character.is_ascii_whitespace())
457 .unwrap();
458 string.replace_range(0..first_non_whitespace, "");
459 }
460
461 pub fn is_valid_floating_point_number_string(&self) -> bool {
463 static RE: LazyLock<Regex> = LazyLock::new(|| {
464 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
465 });
466
467 RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
468 self.parse_floating_point_number().is_some()
469 }
470
471 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
472 self.str().parse::<T>()
473 }
474
475 pub fn parse_floating_point_number(&self) -> Option<f64> {
477 parse_floating_point_number(&self.str())
478 }
479
480 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
482 if let Some(val) = self.parse_floating_point_number() {
483 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
485
486 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
487 }
488 }
489
490 pub fn to_lowercase(&self) -> String {
491 self.str().to_lowercase()
492 }
493
494 pub fn to_uppercase(&self) -> String {
495 self.str().to_uppercase()
496 }
497
498 pub fn strip_newlines(&mut self) {
499 self.0
502 .borrow_mut()
503 .ensure_rust_string()
504 .retain(|character| character != '\r' && character != '\n');
505 }
506
507 pub fn normalize_newlines(&mut self) {
509 let mut inner = self.0.borrow_mut();
513 let string = inner.ensure_rust_string();
514 *string = string.replace("\r\n", "\n").replace("\r", "\n")
515 }
516
517 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
518 let new_string = self.str().to_owned();
519 DOMString(RefCell::new(DOMStringType::Rust(
520 new_string.replace(needle, replace_char),
521 )))
522 }
523
524 pub fn starts_with(&self, c: char) -> bool {
526 if !c.is_ascii() {
527 self.str().starts_with(c)
528 } else {
529 self.encoded_bytes().bytes().starts_with(&[c as u8])
532 }
533 }
534
535 pub fn starts_with_str(&self, needle: &str) -> bool {
536 self.str().starts_with(needle)
537 }
538
539 pub fn contains(&self, needle: &str) -> bool {
540 self.str().contains(needle)
541 }
542
543 pub fn to_ascii_lowercase(&self) -> String {
544 let conversion = match self.encoded_bytes() {
545 EncodedBytes::Latin1(bytes) => {
546 if bytes.iter().all(|c| *c <= ASCII_END) {
547 Some(unsafe {
549 String::from_utf8_unchecked(
550 bytes
551 .iter()
552 .map(|c| {
553 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
554 c + 32
555 } else {
556 *c
557 }
558 })
559 .collect(),
560 )
561 })
562 } else {
563 None
564 }
565 },
566 EncodedBytes::Utf8(bytes) => unsafe {
567 Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
569 },
570 };
571 if let Some(conversion) = conversion {
573 conversion
574 } else {
575 self.str().to_ascii_lowercase()
576 }
577 }
578
579 fn contains_space_characters(
580 &self,
581 latin1_characters: &'static [u8],
582 utf8_characters: &'static [char],
583 ) -> bool {
584 match self.encoded_bytes() {
585 EncodedBytes::Latin1(items) => {
586 latin1_characters.iter().any(|byte| items.contains(byte))
587 },
588 EncodedBytes::Utf8(bytes) => {
589 let s = unsafe { str::from_utf8_unchecked(&bytes) };
591 s.contains(utf8_characters)
592 },
593 }
594 }
595
596 pub fn contains_tab_or_newline(&self) -> bool {
598 const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
599 const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
600
601 self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
602 }
603
604 pub fn contains_html_space_characters(&self) -> bool {
606 const SPACE_BYTES: [u8; 5] = [
607 ASCII_TAB,
608 ASCII_NEWLINE,
609 ASCII_FORMFEED,
610 ASCII_CR,
611 ASCII_SPACE,
612 ];
613 self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
614 }
615
616 pub fn as_bytes(&self) -> BytesView<'_> {
618 if self.is_ascii() {
622 BytesView(self.0.borrow())
623 } else {
624 self.ensure_rust_string();
625 BytesView(self.0.borrow())
626 }
627 }
628
629 pub fn is_ascii_lowercase(&self) -> bool {
631 match self.encoded_bytes() {
632 EncodedBytes::Latin1(items) => items
633 .iter()
634 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
635 EncodedBytes::Utf8(s) => s
636 .iter()
637 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
638 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
639 }
640 }
641
642 pub fn is_ascii(&self) -> bool {
644 self.encoded_bytes().bytes().is_ascii()
645 }
646
647 pub fn is_valid_for_cookie(&self) -> bool {
651 match self.encoded_bytes() {
652 EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
653 .iter()
654 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
655 }
656 }
657
658 fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
661 match self.encoded_bytes() {
662 EncodedBytes::Latin1(latin1_bytes) => {
664 if latin1_bytes.iter().all(|character| character.is_ascii()) {
665 return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
668 }
669 },
670 EncodedBytes::Utf8(utf8_bytes) => {
671 return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
674 },
675 };
676 callback(self.str().deref())
677 }
678}
679
680pub fn parse_floating_point_number(input: &str) -> Option<f64> {
682 input.trim().parse::<f64>().ok().filter(|value| {
688 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
691 })
692}
693
694pub struct BytesView<'a>(Ref<'a, DOMStringType>);
695
696impl Deref for BytesView<'_> {
697 type Target = [u8];
698
699 fn deref(&self) -> &Self::Target {
700 self.0.as_raw_bytes()
702 }
703}
704
705impl Ord for DOMString {
706 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
707 self.str().cmp(&other.str())
708 }
709}
710
711impl PartialOrd for DOMString {
712 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
713 self.str().partial_cmp(&other.str())
714 }
715}
716
717impl Extend<char> for DOMString {
718 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
719 self.0.borrow_mut().ensure_rust_string().extend(iter)
720 }
721}
722
723impl ToJSValConvertible for DOMString {
724 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
725 let val = self.0.borrow();
726 match *val {
727 DOMStringType::Rust(ref s) => unsafe {
728 s.to_jsval(cx, rval);
729 },
730 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
731 rval.set(StringValue(&*rooted_traceable_box.get()));
732 },
733 #[cfg(test)]
734 DOMStringType::Latin1Vec(ref items) => {
735 let mut v = vec![0; items.len() * 2];
736 let real_size =
737 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
738 v.truncate(real_size);
739
740 String::from_utf8(v)
741 .expect("Error in constructin test string")
742 .to_jsval(cx, rval);
743 },
744 };
745 }
746}
747
748impl std::hash::Hash for DOMString {
749 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
750 self.str().hash(state);
751 }
752}
753
754impl std::fmt::Display for DOMString {
755 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
756 fmt::Display::fmt(self.str().deref(), f)
757 }
758}
759
760impl std::cmp::PartialEq<str> for DOMString {
761 fn eq(&self, other: &str) -> bool {
762 if other.is_ascii() {
763 *other.as_bytes() == *self.encoded_bytes().bytes()
764 } else {
765 self.str().deref() == other
766 }
767 }
768}
769
770impl std::cmp::PartialEq<&str> for DOMString {
771 fn eq(&self, other: &&str) -> bool {
772 self.eq(*other)
773 }
774}
775
776impl std::cmp::PartialEq<String> for DOMString {
777 fn eq(&self, other: &String) -> bool {
778 self.eq(other.as_str())
779 }
780}
781
782impl std::cmp::PartialEq<DOMString> for String {
783 fn eq(&self, other: &DOMString) -> bool {
784 other.eq(self)
785 }
786}
787
788impl std::cmp::PartialEq<DOMString> for str {
789 fn eq(&self, other: &DOMString) -> bool {
790 other.eq(self)
791 }
792}
793
794impl std::cmp::PartialEq for DOMString {
795 fn eq(&self, other: &DOMString) -> bool {
796 let result = match (self.encoded_bytes(), other.encoded_bytes()) {
797 (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
798 Some(*bytes == *other_bytes)
799 },
800 (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
801 if other_bytes.is_ascii() =>
802 {
803 Some(*bytes == *other_bytes)
804 },
805 (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
806 Some(*bytes == *other_bytes)
807 },
808 (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
809 Some(*bytes == *other_bytes)
810 },
811 _ => None,
812 };
813
814 if let Some(eq_result) = result {
815 return eq_result;
816 }
817
818 *self.str() == *other.str()
819 }
820}
821
822impl std::cmp::Eq for DOMString {}
823
824impl From<std::string::String> for DOMString {
825 fn from(string: String) -> Self {
826 DOMString(RefCell::new(DOMStringType::Rust(string)))
827 }
828}
829
830impl From<&str> for DOMString {
831 fn from(string: &str) -> Self {
832 String::from(string).into()
833 }
834}
835
836impl From<DOMString> for LocalName {
837 fn from(dom_string: DOMString) -> LocalName {
838 dom_string.with_str_reference(|string| LocalName::from(string))
839 }
840}
841
842impl From<&DOMString> for LocalName {
843 fn from(dom_string: &DOMString) -> LocalName {
844 dom_string.with_str_reference(|string| LocalName::from(string))
845 }
846}
847
848impl From<DOMString> for Namespace {
849 fn from(dom_string: DOMString) -> Namespace {
850 dom_string.with_str_reference(|string| Namespace::from(string))
851 }
852}
853
854impl From<DOMString> for Atom {
855 fn from(dom_string: DOMString) -> Atom {
856 dom_string.with_str_reference(|string| Atom::from(string))
857 }
858}
859
860impl From<DOMString> for String {
861 fn from(val: DOMString) -> Self {
862 val.str().to_owned()
863 }
864}
865
866impl From<DOMString> for Vec<u8> {
867 fn from(value: DOMString) -> Self {
868 value.str().as_bytes().to_vec()
869 }
870}
871
872impl From<Cow<'_, str>> for DOMString {
873 fn from(value: Cow<'_, str>) -> Self {
874 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
875 }
876}
877
878#[macro_export]
879macro_rules! match_domstring_ascii_inner {
880 ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
881 if {
882 debug_assert!(($ascii_literal).is_ascii());
883 $ascii_literal.as_bytes()
884 } == $input.bytes() {
885 $then
886 } else {
887 $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
888 }
889
890 };
891 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
892 match $input {
893 $p => $then
894 }
895 }
896}
897
898#[macro_export]
912macro_rules! match_domstring_ascii {
913 ($input:expr, $($tail:tt)*) => {
914 {
915 use $crate::domstring::EncodedBytes;
916
917 let encoded_bytes = $input.encoded_bytes();
918 match encoded_bytes {
919 EncodedBytes::Latin1(_) => {
920 $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
921 }
922 EncodedBytes::Utf8(_) => {
923 $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
924 }
925
926 }
927 }
928 };
929}
930
931#[cfg(test)]
932mod tests {
933 use super::*;
934
935 const LATIN1_PILLCROW: u8 = 0xB6;
936 const UTF8_PILLCROW: [u8; 2] = [194, 182];
937 const LATIN1_POWER2: u8 = 0xB2;
938
939 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
940 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
941 }
942
943 #[test]
944 fn string_functions() {
945 let s = DOMString::from("AbBcC❤&%$#");
946 let s_copy = s.clone();
947 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
948 assert_eq!(s, s_copy);
949 assert_eq!(s.len(), 12);
950 assert_eq!(s_copy.len(), 12);
951 assert!(s.starts_with('A'));
952 let s2 = DOMString::from("");
953 assert!(s2.is_empty());
954 }
955
956 #[test]
957 fn string_functions_latin1() {
958 {
959 let s = from_latin1(vec![
960 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
961 ]);
962 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
963 }
964 {
965 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
966 assert_eq!(s.to_ascii_lowercase(), "abbcc");
967 }
968 {
969 let s = from_latin1(vec![
970 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
971 ]);
972 assert_eq!(s.len(), 11);
973 assert!(s.starts_with('A'));
974 }
975 {
976 let s = from_latin1(vec![]);
977 assert!(s.is_empty());
978 }
979 }
980
981 #[test]
982 fn test_length() {
983 let s1 = from_latin1(vec![
984 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
985 0xAE, 0xAF,
986 ]);
987 let s2 = from_latin1(vec![
988 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
989 0xBE, 0xBF,
990 ]);
991 let s3 = from_latin1(vec![
992 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
993 0xCE, 0xCF,
994 ]);
995 let s4 = from_latin1(vec![
996 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
997 0xDE, 0xDF,
998 ]);
999 let s5 = from_latin1(vec![
1000 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1001 0xEE, 0xEF,
1002 ]);
1003 let s6 = from_latin1(vec![
1004 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1005 0xFE, 0xFF,
1006 ]);
1007
1008 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1009 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1010 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1011 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1012 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1013 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1014
1015 assert_eq!(s1.len(), s1_utf8.len());
1016 assert_eq!(s2.len(), s2_utf8.len());
1017 assert_eq!(s3.len(), s3_utf8.len());
1018 assert_eq!(s4.len(), s4_utf8.len());
1019 assert_eq!(s5.len(), s5_utf8.len());
1020 assert_eq!(s6.len(), s6_utf8.len());
1021
1022 s1.ensure_rust_string();
1023 s2.ensure_rust_string();
1024 s3.ensure_rust_string();
1025 s4.ensure_rust_string();
1026 s5.ensure_rust_string();
1027 s6.ensure_rust_string();
1028 assert_eq!(s1.len(), s1_utf8.len());
1029 assert_eq!(s2.len(), s2_utf8.len());
1030 assert_eq!(s3.len(), s3_utf8.len());
1031 assert_eq!(s4.len(), s4_utf8.len());
1032 assert_eq!(s5.len(), s5_utf8.len());
1033 assert_eq!(s6.len(), s6_utf8.len());
1034 }
1035
1036 #[test]
1037 fn test_convert() {
1038 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1039 s.ensure_rust_string();
1040 assert_eq!(&*s.str(), "abc%$");
1041 }
1042
1043 #[test]
1044 fn partial_eq() {
1045 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1046 let string = String::from("abc%$");
1047 let s2 = DOMString::from(string.clone());
1048 assert_eq!(s, s2);
1049 assert_eq!(s, string);
1050 }
1051
1052 #[test]
1053 fn encoded_latin1_bytes() {
1054 let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1055 let dom_string = from_latin1(original_latin1_bytes.clone());
1056 let string_latin1_bytes = match dom_string.encoded_bytes() {
1057 EncodedBytes::Latin1(bytes) => bytes,
1058 _ => unreachable!("Expected Latin1 encoded bytes"),
1059 };
1060 assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1061 }
1062
1063 #[test]
1064 fn testing_stringview() {
1065 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1066
1067 assert_eq!(
1068 s.str().chars().collect::<Vec<char>>(),
1069 vec!['a', 'b', 'c', '%', '$', '²']
1070 );
1071 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1072 }
1073
1074 #[test]
1079 fn test_hash() {
1080 use std::hash::{DefaultHasher, Hash, Hasher};
1081 fn hash_value(d: &DOMString) -> u64 {
1082 let mut hasher = DefaultHasher::new();
1083 d.hash(&mut hasher);
1084 hasher.finish()
1085 }
1086
1087 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1088 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1089 s_converted.ensure_rust_string();
1090 let s2 = DOMString::from("abc%$²");
1091
1092 let hash_s = hash_value(&s);
1093 let hash_s_converted = hash_value(&s_converted);
1094 let hash_s2 = hash_value(&s2);
1095
1096 assert_eq!(hash_s, hash_s2);
1097 assert_eq!(hash_s, hash_s_converted);
1098 }
1099
1100 #[test]
1102 fn test_match_executing() {
1103 {
1105 let s = from_latin1(vec![b'a', b'b', b'c']);
1106 match_domstring_ascii!( s,
1107 "abc" => assert!(true),
1108 "bcd" => assert!(false),
1109 _ => (),
1110 );
1111 }
1112
1113 {
1114 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1115 match_domstring_ascii!( s,
1116 "abc/" => assert!(true),
1117 "bcd" => assert!(false),
1118 _ => (),
1119 );
1120 }
1121
1122 {
1123 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1124 match_domstring_ascii!( s,
1125 "bcd" => assert!(false),
1126 "abc%$" => assert!(true),
1127 _ => (),
1128 );
1129 }
1130
1131 {
1132 let s = DOMString::from("abcde");
1133 match_domstring_ascii!( s,
1134 "abc" => assert!(false),
1135 "bcd" => assert!(false),
1136 _ => assert!(true),
1137 );
1138 }
1139 {
1140 let s = DOMString::from("abc%$");
1141 match_domstring_ascii!( s,
1142 "bcd" => assert!(false),
1143 "abc%$" => assert!(true),
1144 _ => (),
1145 );
1146 }
1147 {
1148 let s = from_latin1(vec![b'a', b'b', b'c']);
1149 match_domstring_ascii!( s,
1150 "abcdd" => assert!(false),
1151 "bcd" => assert!(false),
1152 _ => (),
1153 );
1154 }
1155 }
1156
1157 #[test]
1159 fn test_match_returning_result() {
1160 {
1161 let s = from_latin1(vec![b'a', b'b', b'c']);
1162 let res = match_domstring_ascii!( s,
1163 "abc" => true,
1164 "bcd" => false,
1165 _ => false,
1166 );
1167 assert_eq!(res, true);
1168 }
1169 {
1170 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1171 let res = match_domstring_ascii!( s,
1172 "abc/" => true,
1173 "bcd" => false,
1174 _ => false,
1175 );
1176 assert_eq!(res, true);
1177 }
1178 {
1179 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1180 let res = match_domstring_ascii!( s,
1181 "bcd" => false,
1182 "abc%$" => true,
1183 _ => false,
1184 );
1185 assert_eq!(res, true);
1186 }
1187
1188 {
1189 let s = DOMString::from("abcde");
1190 let res = match_domstring_ascii!( s,
1191 "abc" => false,
1192 "bcd" => false,
1193 _ => true,
1194 );
1195 assert_eq!(res, true);
1196 }
1197 {
1198 let s = DOMString::from("abc%$");
1199 let res = match_domstring_ascii!( s,
1200 "bcd" => false,
1201 "abc%$" => true,
1202 _ => false,
1203 );
1204 assert_eq!(res, true);
1205 }
1206 {
1207 let s = from_latin1(vec![b'a', b'b', b'c']);
1208 let res = match_domstring_ascii!( s,
1209 "abcdd" => false,
1210 "bcd" => false,
1211 _ => true,
1212 );
1213 assert_eq!(res, true);
1214 }
1215 }
1216
1217 #[test]
1218 #[should_panic]
1219 fn test_match_panic() {
1220 let s = DOMString::from("abcd");
1221 let _res = match_domstring_ascii!(s,
1222 "❤" => true,
1223 _ => false,);
1224 }
1225
1226 #[test]
1227 #[should_panic]
1228 fn test_match_panic2() {
1229 let s = DOMString::from("abcd");
1230 let _res = match_domstring_ascii!(s,
1231 "abc" => false,
1232 "❤" => true,
1233 _ => false,
1234 );
1235 }
1236
1237 #[test]
1238 fn test_strip_whitespace() {
1239 {
1240 let mut s = from_latin1(vec![
1241 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1242 ]);
1243
1244 s.strip_leading_and_trailing_ascii_whitespace();
1245 s.ensure_rust_string();
1246 assert_eq!(&*s.str(), "abc%$²");
1247 }
1248 {
1249 let mut s = DOMString::from(" \n abc%$ ");
1250
1251 s.strip_leading_and_trailing_ascii_whitespace();
1252 s.ensure_rust_string();
1253 assert_eq!(&*s.str(), "abc%$");
1254 }
1255 }
1256
1257 #[test]
1259 fn contains_html_space_characters() {
1260 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1262 s.ensure_rust_string();
1263 assert!(s.contains_html_space_characters());
1264
1265 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1267 s.ensure_rust_string();
1268 assert!(s.contains_html_space_characters());
1269
1270 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1272 s.ensure_rust_string();
1273 assert!(s.contains_html_space_characters());
1274
1275 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1277 s.ensure_rust_string();
1278 assert!(s.contains_html_space_characters());
1279
1280 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1282 s.ensure_rust_string();
1283 assert!(s.contains_html_space_characters());
1284
1285 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1286 assert!(!s.contains_html_space_characters());
1287 s.ensure_rust_string();
1288 assert!(!s.contains_html_space_characters());
1289 }
1290
1291 #[test]
1292 fn atom() {
1293 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1294 let atom1 = Atom::from(s);
1295 let s2 = DOMString::from("aaa aa");
1296 let atom2 = Atom::from(s2);
1297 assert_eq!(atom1, atom2);
1298 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1299 let atom3 = Atom::from(s3);
1300 assert_ne!(atom1, atom3);
1301 }
1302
1303 #[test]
1304 fn namespace() {
1305 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1306 let atom1 = Namespace::from(s);
1307 let s2 = DOMString::from("aaa aa");
1308 let atom2 = Namespace::from(s2);
1309 assert_eq!(atom1, atom2);
1310 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1311 let atom3 = Namespace::from(s3);
1312 assert_ne!(atom1, atom3);
1313 }
1314
1315 #[test]
1316 fn localname() {
1317 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1318 let atom1 = LocalName::from(s);
1319 let s2 = DOMString::from("aaa aa");
1320 let atom2 = LocalName::from(s2);
1321 assert_eq!(atom1, atom2);
1322 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1323 let atom3 = LocalName::from(s3);
1324 assert_ne!(atom1, atom3);
1325 }
1326
1327 #[test]
1328 fn is_ascii_lowercase() {
1329 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1330 assert!(!s.is_ascii_lowercase());
1331 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1332 assert!(!s.is_ascii_lowercase());
1333 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1334 assert!(s.is_ascii_lowercase());
1335 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1336 assert!(!s.is_ascii_lowercase());
1337 let s = DOMString::from("`aaaz");
1338 assert!(!s.is_ascii_lowercase());
1339 let s = DOMString::from("aaaz");
1340 assert!(s.is_ascii_lowercase());
1341 }
1342
1343 #[test]
1344 fn test_as_bytes() {
1345 const ASCII_SMALL_A: u8 = b'a';
1346 const ASCII_SMALL_Z: u8 = b'z';
1347
1348 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1349 let s = from_latin1(v1.clone());
1350 assert_eq!(
1351 *s.as_bytes(),
1352 [
1353 ASCII_SMALL_A,
1354 ASCII_SMALL_A,
1355 ASCII_SMALL_A,
1356 UTF8_PILLCROW[0],
1357 UTF8_PILLCROW[1],
1358 ASCII_SMALL_A,
1359 ASCII_SMALL_A
1360 ]
1361 );
1362
1363 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1364 let s = from_latin1(v2.clone());
1365 assert_eq!(
1366 *s.as_bytes(),
1367 [
1368 ASCII_SMALL_A,
1369 ASCII_SMALL_A,
1370 ASCII_SMALL_A,
1371 ASCII_SMALL_A,
1372 ASCII_SMALL_Z
1373 ]
1374 );
1375
1376 let str = "abc%$²".to_owned();
1377 let s = DOMString::from(str.clone());
1378 assert_eq!(&*s.as_bytes(), str.as_bytes());
1379 let str = "AbBcC❤&%$#".to_owned();
1380 let s = DOMString::from(str.clone());
1381 assert_eq!(&*s.as_bytes(), str.as_bytes());
1382 }
1383}