1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::MutableHandleValue;
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42unsafe fn get_latin1_string_bytes(
45 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47 debug_assert!(!rooted_traceable_box.get().is_null());
48 let mut length = 0;
49 unsafe {
50 let chars = JS_GetLatin1StringCharsAndLength(
51 Runtime::get().expect("JS runtime has shut down").as_ptr(),
52 ptr::null(),
53 rooted_traceable_box.get(),
54 &mut length,
55 );
56 assert!(!chars.is_null());
57 slice::from_raw_parts(chars, length)
58 }
59}
60
61#[derive(Debug)]
63pub enum EncodedBytes<'a> {
64 Latin1(Ref<'a, [u8]>),
66 Utf8(Ref<'a, [u8]>),
68}
69
70impl EncodedBytes<'_> {
71 pub fn bytes(&self) -> &[u8] {
74 match self {
75 Self::Latin1(bytes) => bytes,
76 Self::Utf8(bytes) => bytes,
77 }
78 }
79
80 pub fn len(&self) -> usize {
81 match self {
82 Self::Latin1(bytes) => bytes
83 .iter()
84 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
85 .sum(),
86 Self::Utf8(bytes) => bytes.len(),
87 }
88 }
89
90 pub fn is_empty(&self) -> bool {
92 self.bytes().is_empty()
93 }
94}
95
96enum DOMStringType {
97 Rust(String),
99 JSString(RootedTraceableBox<Heap<*mut JSString>>),
101 #[cfg(test)]
102 Latin1Vec(Vec<u8>),
105}
106
107impl Default for DOMStringType {
108 fn default() -> Self {
109 Self::Rust(Default::default())
110 }
111}
112
113impl DOMStringType {
114 fn as_raw_bytes(&self) -> &[u8] {
119 match self {
120 DOMStringType::Rust(s) => s.as_bytes(),
121 DOMStringType::JSString(rooted_traceable_box) => unsafe {
122 get_latin1_string_bytes(rooted_traceable_box)
123 },
124 #[cfg(test)]
125 DOMStringType::Latin1Vec(items) => items,
126 }
127 }
128
129 fn ensure_rust_string(&mut self) -> &mut String {
130 let new_string = match self {
131 DOMStringType::Rust(string) => return string,
132 DOMStringType::JSString(rooted_traceable_box) => unsafe {
133 jsstr_to_string(
134 Runtime::get().expect("JS runtime has shut down").as_ptr(),
135 NonNull::new(rooted_traceable_box.get()).unwrap(),
136 )
137 },
138 #[cfg(test)]
139 DOMStringType::Latin1Vec(items) => {
140 let mut v = vec![0; items.len() * 2];
141 let real_size =
142 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
143 v.truncate(real_size);
144
145 unsafe { String::from_utf8_unchecked(v) }
148 },
149 };
150 *self = DOMStringType::Rust(new_string);
151 self.ensure_rust_string()
152 }
153}
154
155#[derive(Debug)]
158pub struct StringView<'a>(Ref<'a, str>);
159
160impl StringView<'_> {
161 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
162 self.split(HTML_SPACE_CHARACTERS)
163 .filter(|string| !string.is_empty())
164 }
165}
166
167impl From<StringView<'_>> for String {
168 fn from(string_view: StringView<'_>) -> Self {
169 string_view.0.to_string()
170 }
171}
172
173impl Deref for StringView<'_> {
174 type Target = str;
175 fn deref(&self) -> &str {
176 &(self.0)
177 }
178}
179
180impl AsRef<str> for StringView<'_> {
181 fn as_ref(&self) -> &str {
182 &(self.0)
183 }
184}
185
186impl PartialEq for StringView<'_> {
187 fn eq(&self, other: &Self) -> bool {
188 self.0.eq(&*(other.0))
189 }
190}
191
192impl PartialEq<&str> for StringView<'_> {
193 fn eq(&self, other: &&str) -> bool {
194 self.0.eq(*other)
195 }
196}
197
198impl Eq for StringView<'_> {}
199
200impl PartialOrd for StringView<'_> {
201 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
202 self.0.partial_cmp(&**other)
203 }
204}
205
206impl Ord for StringView<'_> {
207 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
208 self.0.cmp(other)
209 }
210}
211
212unsafe impl Trace for DOMStringType {
218 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
219 unsafe {
220 match self {
221 DOMStringType::Rust(_s) => {},
222 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
223 #[cfg(test)]
224 DOMStringType::Latin1Vec(_s) => {},
225 }
226 }
227 }
228}
229
230impl malloc_size_of::MallocSizeOf for DOMStringType {
231 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
232 match self {
233 DOMStringType::Rust(s) => s.size_of(ops),
234 DOMStringType::JSString(_rooted_traceable_box) => {
235 0
237 },
238 #[cfg(test)]
239 DOMStringType::Latin1Vec(s) => s.size_of(ops),
240 }
241 }
242}
243
244impl std::fmt::Debug for DOMStringType {
245 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246 match self {
247 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
248 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
249 #[cfg(test)]
250 DOMStringType::Latin1Vec(s) => f
251 .debug_struct("DOMString")
252 .field("latin1_string", s)
253 .finish(),
254 }
255 }
256}
257
258#[repr(transparent)]
294#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
295pub struct DOMString(RefCell<DOMStringType>);
296
297impl Clone for DOMString {
298 fn clone(&self) -> Self {
299 self.ensure_rust_string().clone().into()
300 }
301}
302
303pub enum DOMStringErrorType {
304 JSConversionError,
305}
306
307impl DOMString {
308 pub fn new() -> DOMString {
310 Default::default()
311 }
312
313 pub fn from_js_string(
316 cx: SafeJSContext,
317 value: js::gc::HandleValue,
318 ) -> Result<DOMString, DOMStringErrorType> {
319 let string_ptr = unsafe { js::rust::ToString(*cx, value) };
320 if string_ptr.is_null() {
321 debug!("ToString failed");
322 Err(DOMStringErrorType::JSConversionError)
323 } else {
324 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
325 let inner = if latin1 {
326 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
327 DOMStringType::JSString(h)
328 } else {
329 DOMStringType::Rust(unsafe {
331 jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
332 })
333 };
334 Ok(DOMString(RefCell::new(inner)))
335 }
336 }
337
338 fn ensure_rust_string(&self) -> RefMut<'_, String> {
341 let inner = self.0.borrow_mut();
342 RefMut::map(inner, |inner| inner.ensure_rust_string())
343 }
344
345 #[expect(unused)]
347 fn debug_js(&self) {
348 match *self.0.borrow() {
349 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
350 DOMStringType::JSString(ref rooted_traceable_box) => {
351 let s = unsafe {
352 jsstr_to_string(
353 Runtime::get().expect("JS runtime has shut down").as_ptr(),
354 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
355 )
356 };
357 info!("JSString ({})", s);
358 },
359 #[cfg(test)]
360 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
361 }
362 }
363
364 pub fn str(&self) -> StringView<'_> {
366 {
367 let inner = self.0.borrow();
368 if matches!(&*inner, DOMStringType::Rust(..)) {
369 return StringView(Ref::map(inner, |inner| match inner {
370 DOMStringType::Rust(string) => string.as_str(),
371 _ => unreachable!("Guaranteed by condition above"),
372 }));
373 }
374 }
375
376 self.ensure_rust_string();
377 self.str()
378 }
379
380 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
383 let inner = self.0.borrow();
384 match &*inner {
385 DOMStringType::Rust(..) => {
386 EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
387 },
388 _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
389 }
390 }
391
392 pub fn clear(&mut self) {
393 let mut inner = self.0.borrow_mut();
394 let DOMStringType::Rust(string) = &mut *inner else {
395 *inner = DOMStringType::Rust(String::new());
396 return;
397 };
398 string.clear();
399 }
400
401 pub fn is_empty(&self) -> bool {
402 self.encoded_bytes().is_empty()
403 }
404
405 pub fn len(&self) -> usize {
410 self.encoded_bytes().len()
411 }
412
413 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
420 Utf8CodeUnitLength(self.len())
421 }
422
423 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
428 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
429 }
430
431 pub fn make_ascii_lowercase(&mut self) {
432 self.0
433 .borrow_mut()
434 .ensure_rust_string()
435 .make_ascii_lowercase();
436 }
437
438 pub fn push_str(&mut self, string_to_push: &str) {
439 self.0
440 .borrow_mut()
441 .ensure_rust_string()
442 .push_str(string_to_push);
443 }
444
445 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
447 if self.is_empty() {
448 return;
449 }
450
451 let mut inner = self.0.borrow_mut();
452 let string = inner.ensure_rust_string();
453 let trailing_whitespace_len = string
454 .trim_end_matches(|character: char| character.is_ascii_whitespace())
455 .len();
456 string.truncate(trailing_whitespace_len);
457 if string.is_empty() {
458 return;
459 }
460
461 let first_non_whitespace = string
462 .find(|character: char| !character.is_ascii_whitespace())
463 .unwrap();
464 string.replace_range(0..first_non_whitespace, "");
465 }
466
467 pub fn is_valid_floating_point_number_string(&self) -> bool {
469 static RE: LazyLock<Regex> = LazyLock::new(|| {
470 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
471 });
472
473 RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
474 self.parse_floating_point_number().is_some()
475 }
476
477 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
478 self.str().parse::<T>()
479 }
480
481 pub fn parse_floating_point_number(&self) -> Option<f64> {
483 parse_floating_point_number(&self.str())
484 }
485
486 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
488 if let Some(val) = self.parse_floating_point_number() {
489 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
491
492 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
493 }
494 }
495
496 pub fn to_lowercase(&self) -> String {
497 self.str().to_lowercase()
498 }
499
500 pub fn to_uppercase(&self) -> String {
501 self.str().to_uppercase()
502 }
503
504 pub fn strip_newlines(&mut self) {
505 self.0
508 .borrow_mut()
509 .ensure_rust_string()
510 .retain(|character| character != '\r' && character != '\n');
511 }
512
513 pub fn normalize_newlines(&mut self) {
515 let mut inner = self.0.borrow_mut();
519 let string = inner.ensure_rust_string();
520 *string = string.replace("\r\n", "\n").replace("\r", "\n")
521 }
522
523 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
524 let new_string = self.str().to_owned();
525 DOMString(RefCell::new(DOMStringType::Rust(
526 new_string.replace(needle, replace_char),
527 )))
528 }
529
530 pub fn starts_with(&self, c: char) -> bool {
532 if !c.is_ascii() {
533 self.str().starts_with(c)
534 } else {
535 self.encoded_bytes().bytes().starts_with(&[c as u8])
538 }
539 }
540
541 pub fn starts_with_str(&self, needle: &str) -> bool {
542 self.str().starts_with(needle)
543 }
544
545 pub fn ends_with_str(&self, needle: &str) -> bool {
546 self.str().ends_with(needle)
547 }
548
549 pub fn contains(&self, needle: &str) -> bool {
550 self.str().contains(needle)
551 }
552
553 pub fn to_ascii_lowercase(&self) -> String {
554 let conversion = match self.encoded_bytes() {
555 EncodedBytes::Latin1(bytes) => {
556 if bytes.iter().all(|c| *c <= ASCII_END) {
557 Some(unsafe {
559 String::from_utf8_unchecked(
560 bytes
561 .iter()
562 .map(|c| {
563 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
564 c + 32
565 } else {
566 *c
567 }
568 })
569 .collect(),
570 )
571 })
572 } else {
573 None
574 }
575 },
576 EncodedBytes::Utf8(bytes) => unsafe {
577 Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
579 },
580 };
581 if let Some(conversion) = conversion {
583 conversion
584 } else {
585 self.str().to_ascii_lowercase()
586 }
587 }
588
589 fn contains_space_characters(
590 &self,
591 latin1_characters: &'static [u8],
592 utf8_characters: &'static [char],
593 ) -> bool {
594 match self.encoded_bytes() {
595 EncodedBytes::Latin1(items) => {
596 latin1_characters.iter().any(|byte| items.contains(byte))
597 },
598 EncodedBytes::Utf8(bytes) => {
599 let s = unsafe { str::from_utf8_unchecked(&bytes) };
601 s.contains(utf8_characters)
602 },
603 }
604 }
605
606 pub fn contains_tab_or_newline(&self) -> bool {
608 const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
609 const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
610
611 self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
612 }
613
614 pub fn contains_html_space_characters(&self) -> bool {
616 const SPACE_BYTES: [u8; 5] = [
617 ASCII_TAB,
618 ASCII_NEWLINE,
619 ASCII_FORMFEED,
620 ASCII_CR,
621 ASCII_SPACE,
622 ];
623 self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
624 }
625
626 pub fn as_bytes(&self) -> BytesView<'_> {
628 if self.is_ascii() {
632 BytesView(self.0.borrow())
633 } else {
634 self.ensure_rust_string();
635 BytesView(self.0.borrow())
636 }
637 }
638
639 pub fn is_ascii_lowercase(&self) -> bool {
641 match self.encoded_bytes() {
642 EncodedBytes::Latin1(items) => items
643 .iter()
644 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
645 EncodedBytes::Utf8(s) => s
646 .iter()
647 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
648 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
649 }
650 }
651
652 pub fn is_ascii(&self) -> bool {
654 self.encoded_bytes().bytes().is_ascii()
655 }
656
657 pub fn is_valid_for_cookie(&self) -> bool {
661 match self.encoded_bytes() {
662 EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
663 .iter()
664 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
665 }
666 }
667
668 fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
671 match self.encoded_bytes() {
672 EncodedBytes::Latin1(latin1_bytes) => {
674 if latin1_bytes.iter().all(|character| character.is_ascii()) {
675 return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
678 }
679 },
680 EncodedBytes::Utf8(utf8_bytes) => {
681 return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
684 },
685 };
686 callback(self.str().deref())
687 }
688}
689
690pub fn parse_floating_point_number(input: &str) -> Option<f64> {
692 input.trim().parse::<f64>().ok().filter(|value| {
698 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
701 })
702}
703
704pub struct BytesView<'a>(Ref<'a, DOMStringType>);
705
706impl Deref for BytesView<'_> {
707 type Target = [u8];
708
709 fn deref(&self) -> &Self::Target {
710 self.0.as_raw_bytes()
712 }
713}
714
715impl Ord for DOMString {
716 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
717 self.str().cmp(&other.str())
718 }
719}
720
721impl PartialOrd for DOMString {
722 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
723 self.str().partial_cmp(&other.str())
724 }
725}
726
727impl Extend<char> for DOMString {
728 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
729 self.0.borrow_mut().ensure_rust_string().extend(iter)
730 }
731}
732
733impl ToJSValConvertible for DOMString {
734 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
735 let val = self.0.borrow();
736 match *val {
737 DOMStringType::Rust(ref s) => unsafe {
738 s.to_jsval(cx, rval);
739 },
740 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
741 rval.set(StringValue(&*rooted_traceable_box.get()));
742 },
743 #[cfg(test)]
744 DOMStringType::Latin1Vec(ref items) => {
745 let mut v = vec![0; items.len() * 2];
746 let real_size =
747 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
748 v.truncate(real_size);
749
750 String::from_utf8(v)
751 .expect("Error in constructin test string")
752 .to_jsval(cx, rval);
753 },
754 };
755 }
756}
757
758impl std::hash::Hash for DOMString {
759 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
760 self.str().hash(state);
761 }
762}
763
764impl std::fmt::Display for DOMString {
765 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
766 fmt::Display::fmt(self.str().deref(), f)
767 }
768}
769
770impl std::cmp::PartialEq<str> for DOMString {
771 fn eq(&self, other: &str) -> bool {
772 if other.is_ascii() {
773 *other.as_bytes() == *self.encoded_bytes().bytes()
774 } else {
775 self.str().deref() == other
776 }
777 }
778}
779
780impl std::cmp::PartialEq<&str> for DOMString {
781 fn eq(&self, other: &&str) -> bool {
782 self.eq(*other)
783 }
784}
785
786impl std::cmp::PartialEq<String> for DOMString {
787 fn eq(&self, other: &String) -> bool {
788 self.eq(other.as_str())
789 }
790}
791
792impl std::cmp::PartialEq<DOMString> for String {
793 fn eq(&self, other: &DOMString) -> bool {
794 other.eq(self)
795 }
796}
797
798impl std::cmp::PartialEq<DOMString> for str {
799 fn eq(&self, other: &DOMString) -> bool {
800 other.eq(self)
801 }
802}
803
804impl std::cmp::PartialEq for DOMString {
805 fn eq(&self, other: &DOMString) -> bool {
806 let result = match (self.encoded_bytes(), other.encoded_bytes()) {
807 (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
808 Some(*bytes == *other_bytes)
809 },
810 (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
811 if other_bytes.is_ascii() =>
812 {
813 Some(*bytes == *other_bytes)
814 },
815 (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
816 Some(*bytes == *other_bytes)
817 },
818 (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
819 Some(*bytes == *other_bytes)
820 },
821 _ => None,
822 };
823
824 if let Some(eq_result) = result {
825 return eq_result;
826 }
827
828 *self.str() == *other.str()
829 }
830}
831
832impl std::cmp::Eq for DOMString {}
833
834impl From<std::string::String> for DOMString {
835 fn from(string: String) -> Self {
836 DOMString(RefCell::new(DOMStringType::Rust(string)))
837 }
838}
839
840impl From<&str> for DOMString {
841 fn from(string: &str) -> Self {
842 String::from(string).into()
843 }
844}
845
846impl From<DOMString> for LocalName {
847 fn from(dom_string: DOMString) -> LocalName {
848 dom_string.with_str_reference(|string| LocalName::from(string))
849 }
850}
851
852impl From<&DOMString> for LocalName {
853 fn from(dom_string: &DOMString) -> LocalName {
854 dom_string.with_str_reference(|string| LocalName::from(string))
855 }
856}
857
858impl From<DOMString> for Namespace {
859 fn from(dom_string: DOMString) -> Namespace {
860 dom_string.with_str_reference(|string| Namespace::from(string))
861 }
862}
863
864impl From<DOMString> for Atom {
865 fn from(dom_string: DOMString) -> Atom {
866 dom_string.with_str_reference(|string| Atom::from(string))
867 }
868}
869
870impl From<DOMString> for String {
871 fn from(val: DOMString) -> Self {
872 val.str().to_owned()
873 }
874}
875
876impl From<DOMString> for Vec<u8> {
877 fn from(value: DOMString) -> Self {
878 value.str().as_bytes().to_vec()
879 }
880}
881
882impl From<Cow<'_, str>> for DOMString {
883 fn from(value: Cow<'_, str>) -> Self {
884 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
885 }
886}
887
888#[macro_export]
889macro_rules! match_domstring_ascii_inner {
890 ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
891 if {
892 debug_assert!(($ascii_literal).is_ascii());
893 $ascii_literal.as_bytes()
894 } == $input.bytes() {
895 $then
896 } else {
897 $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
898 }
899
900 };
901 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
902 match $input {
903 $p => $then
904 }
905 }
906}
907
908#[macro_export]
922macro_rules! match_domstring_ascii {
923 ($input:expr, $($tail:tt)*) => {
924 {
925 use $crate::domstring::EncodedBytes;
926
927 let encoded_bytes = $input.encoded_bytes();
928 match encoded_bytes {
929 EncodedBytes::Latin1(_) => {
930 $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
931 }
932 EncodedBytes::Utf8(_) => {
933 $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
934 }
935
936 }
937 }
938 };
939}
940
941#[cfg(test)]
942mod tests {
943 use super::*;
944
945 const LATIN1_PILLCROW: u8 = 0xB6;
946 const UTF8_PILLCROW: [u8; 2] = [194, 182];
947 const LATIN1_POWER2: u8 = 0xB2;
948
949 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
950 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
951 }
952
953 #[test]
954 fn string_functions() {
955 let s = DOMString::from("AbBcC❤&%$#");
956 let s_copy = s.clone();
957 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
958 assert_eq!(s, s_copy);
959 assert_eq!(s.len(), 12);
960 assert_eq!(s_copy.len(), 12);
961 assert!(s.starts_with('A'));
962 let s2 = DOMString::from("");
963 assert!(s2.is_empty());
964 }
965
966 #[test]
967 fn string_functions_latin1() {
968 {
969 let s = from_latin1(vec![
970 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
971 ]);
972 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
973 }
974 {
975 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
976 assert_eq!(s.to_ascii_lowercase(), "abbcc");
977 }
978 {
979 let s = from_latin1(vec![
980 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
981 ]);
982 assert_eq!(s.len(), 11);
983 assert!(s.starts_with('A'));
984 }
985 {
986 let s = from_latin1(vec![]);
987 assert!(s.is_empty());
988 }
989 }
990
991 #[test]
992 fn test_length() {
993 let s1 = from_latin1(vec![
994 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
995 0xAE, 0xAF,
996 ]);
997 let s2 = from_latin1(vec![
998 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
999 0xBE, 0xBF,
1000 ]);
1001 let s3 = from_latin1(vec![
1002 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1003 0xCE, 0xCF,
1004 ]);
1005 let s4 = from_latin1(vec![
1006 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1007 0xDE, 0xDF,
1008 ]);
1009 let s5 = from_latin1(vec![
1010 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1011 0xEE, 0xEF,
1012 ]);
1013 let s6 = from_latin1(vec![
1014 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1015 0xFE, 0xFF,
1016 ]);
1017
1018 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1019 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1020 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1021 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1022 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1023 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1024
1025 assert_eq!(s1.len(), s1_utf8.len());
1026 assert_eq!(s2.len(), s2_utf8.len());
1027 assert_eq!(s3.len(), s3_utf8.len());
1028 assert_eq!(s4.len(), s4_utf8.len());
1029 assert_eq!(s5.len(), s5_utf8.len());
1030 assert_eq!(s6.len(), s6_utf8.len());
1031
1032 s1.ensure_rust_string();
1033 s2.ensure_rust_string();
1034 s3.ensure_rust_string();
1035 s4.ensure_rust_string();
1036 s5.ensure_rust_string();
1037 s6.ensure_rust_string();
1038 assert_eq!(s1.len(), s1_utf8.len());
1039 assert_eq!(s2.len(), s2_utf8.len());
1040 assert_eq!(s3.len(), s3_utf8.len());
1041 assert_eq!(s4.len(), s4_utf8.len());
1042 assert_eq!(s5.len(), s5_utf8.len());
1043 assert_eq!(s6.len(), s6_utf8.len());
1044 }
1045
1046 #[test]
1047 fn test_convert() {
1048 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1049 s.ensure_rust_string();
1050 assert_eq!(&*s.str(), "abc%$");
1051 }
1052
1053 #[test]
1054 fn partial_eq() {
1055 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1056 let string = String::from("abc%$");
1057 let s2 = DOMString::from(string.clone());
1058 assert_eq!(s, s2);
1059 assert_eq!(s, string);
1060 }
1061
1062 #[test]
1063 fn encoded_latin1_bytes() {
1064 let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1065 let dom_string = from_latin1(original_latin1_bytes.clone());
1066 let string_latin1_bytes = match dom_string.encoded_bytes() {
1067 EncodedBytes::Latin1(bytes) => bytes,
1068 _ => unreachable!("Expected Latin1 encoded bytes"),
1069 };
1070 assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1071 }
1072
1073 #[test]
1074 fn testing_stringview() {
1075 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1076
1077 assert_eq!(
1078 s.str().chars().collect::<Vec<char>>(),
1079 vec!['a', 'b', 'c', '%', '$', '²']
1080 );
1081 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1082 }
1083
1084 #[test]
1089 fn test_hash() {
1090 use std::hash::{DefaultHasher, Hash, Hasher};
1091 fn hash_value(d: &DOMString) -> u64 {
1092 let mut hasher = DefaultHasher::new();
1093 d.hash(&mut hasher);
1094 hasher.finish()
1095 }
1096
1097 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1098 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1099 s_converted.ensure_rust_string();
1100 let s2 = DOMString::from("abc%$²");
1101
1102 let hash_s = hash_value(&s);
1103 let hash_s_converted = hash_value(&s_converted);
1104 let hash_s2 = hash_value(&s2);
1105
1106 assert_eq!(hash_s, hash_s2);
1107 assert_eq!(hash_s, hash_s_converted);
1108 }
1109
1110 #[test]
1112 fn test_match_executing() {
1113 {
1115 let s = from_latin1(vec![b'a', b'b', b'c']);
1116 match_domstring_ascii!( s,
1117 "abc" => assert!(true),
1118 "bcd" => assert!(false),
1119 _ => (),
1120 );
1121 }
1122
1123 {
1124 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1125 match_domstring_ascii!( s,
1126 "abc/" => assert!(true),
1127 "bcd" => assert!(false),
1128 _ => (),
1129 );
1130 }
1131
1132 {
1133 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1134 match_domstring_ascii!( s,
1135 "bcd" => assert!(false),
1136 "abc%$" => assert!(true),
1137 _ => (),
1138 );
1139 }
1140
1141 {
1142 let s = DOMString::from("abcde");
1143 match_domstring_ascii!( s,
1144 "abc" => assert!(false),
1145 "bcd" => assert!(false),
1146 _ => assert!(true),
1147 );
1148 }
1149 {
1150 let s = DOMString::from("abc%$");
1151 match_domstring_ascii!( s,
1152 "bcd" => assert!(false),
1153 "abc%$" => assert!(true),
1154 _ => (),
1155 );
1156 }
1157 {
1158 let s = from_latin1(vec![b'a', b'b', b'c']);
1159 match_domstring_ascii!( s,
1160 "abcdd" => assert!(false),
1161 "bcd" => assert!(false),
1162 _ => (),
1163 );
1164 }
1165 }
1166
1167 #[test]
1169 fn test_match_returning_result() {
1170 {
1171 let s = from_latin1(vec![b'a', b'b', b'c']);
1172 let res = match_domstring_ascii!( s,
1173 "abc" => true,
1174 "bcd" => false,
1175 _ => false,
1176 );
1177 assert_eq!(res, true);
1178 }
1179 {
1180 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1181 let res = match_domstring_ascii!( s,
1182 "abc/" => true,
1183 "bcd" => false,
1184 _ => false,
1185 );
1186 assert_eq!(res, true);
1187 }
1188 {
1189 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1190 let res = match_domstring_ascii!( s,
1191 "bcd" => false,
1192 "abc%$" => true,
1193 _ => false,
1194 );
1195 assert_eq!(res, true);
1196 }
1197
1198 {
1199 let s = DOMString::from("abcde");
1200 let res = match_domstring_ascii!( s,
1201 "abc" => false,
1202 "bcd" => false,
1203 _ => true,
1204 );
1205 assert_eq!(res, true);
1206 }
1207 {
1208 let s = DOMString::from("abc%$");
1209 let res = match_domstring_ascii!( s,
1210 "bcd" => false,
1211 "abc%$" => true,
1212 _ => false,
1213 );
1214 assert_eq!(res, true);
1215 }
1216 {
1217 let s = from_latin1(vec![b'a', b'b', b'c']);
1218 let res = match_domstring_ascii!( s,
1219 "abcdd" => false,
1220 "bcd" => false,
1221 _ => true,
1222 );
1223 assert_eq!(res, true);
1224 }
1225 }
1226
1227 #[test]
1228 #[should_panic]
1229 fn test_match_panic() {
1230 let s = DOMString::from("abcd");
1231 let _res = match_domstring_ascii!(s,
1232 "❤" => true,
1233 _ => false,);
1234 }
1235
1236 #[test]
1237 #[should_panic]
1238 fn test_match_panic2() {
1239 let s = DOMString::from("abcd");
1240 let _res = match_domstring_ascii!(s,
1241 "abc" => false,
1242 "❤" => true,
1243 _ => false,
1244 );
1245 }
1246
1247 #[test]
1248 fn test_strip_whitespace() {
1249 {
1250 let mut s = from_latin1(vec![
1251 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1252 ]);
1253
1254 s.strip_leading_and_trailing_ascii_whitespace();
1255 s.ensure_rust_string();
1256 assert_eq!(&*s.str(), "abc%$²");
1257 }
1258 {
1259 let mut s = DOMString::from(" \n abc%$ ");
1260
1261 s.strip_leading_and_trailing_ascii_whitespace();
1262 s.ensure_rust_string();
1263 assert_eq!(&*s.str(), "abc%$");
1264 }
1265 }
1266
1267 #[test]
1269 fn contains_html_space_characters() {
1270 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1272 s.ensure_rust_string();
1273 assert!(s.contains_html_space_characters());
1274
1275 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1277 s.ensure_rust_string();
1278 assert!(s.contains_html_space_characters());
1279
1280 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1282 s.ensure_rust_string();
1283 assert!(s.contains_html_space_characters());
1284
1285 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1287 s.ensure_rust_string();
1288 assert!(s.contains_html_space_characters());
1289
1290 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1292 s.ensure_rust_string();
1293 assert!(s.contains_html_space_characters());
1294
1295 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1296 assert!(!s.contains_html_space_characters());
1297 s.ensure_rust_string();
1298 assert!(!s.contains_html_space_characters());
1299 }
1300
1301 #[test]
1302 fn atom() {
1303 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1304 let atom1 = Atom::from(s);
1305 let s2 = DOMString::from("aaa aa");
1306 let atom2 = Atom::from(s2);
1307 assert_eq!(atom1, atom2);
1308 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1309 let atom3 = Atom::from(s3);
1310 assert_ne!(atom1, atom3);
1311 }
1312
1313 #[test]
1314 fn namespace() {
1315 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1316 let atom1 = Namespace::from(s);
1317 let s2 = DOMString::from("aaa aa");
1318 let atom2 = Namespace::from(s2);
1319 assert_eq!(atom1, atom2);
1320 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1321 let atom3 = Namespace::from(s3);
1322 assert_ne!(atom1, atom3);
1323 }
1324
1325 #[test]
1326 fn localname() {
1327 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1328 let atom1 = LocalName::from(s);
1329 let s2 = DOMString::from("aaa aa");
1330 let atom2 = LocalName::from(s2);
1331 assert_eq!(atom1, atom2);
1332 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1333 let atom3 = LocalName::from(s3);
1334 assert_ne!(atom1, atom3);
1335 }
1336
1337 #[test]
1338 fn is_ascii_lowercase() {
1339 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1340 assert!(!s.is_ascii_lowercase());
1341 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1342 assert!(!s.is_ascii_lowercase());
1343 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1344 assert!(s.is_ascii_lowercase());
1345 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1346 assert!(!s.is_ascii_lowercase());
1347 let s = DOMString::from("`aaaz");
1348 assert!(!s.is_ascii_lowercase());
1349 let s = DOMString::from("aaaz");
1350 assert!(s.is_ascii_lowercase());
1351 }
1352
1353 #[test]
1354 fn test_as_bytes() {
1355 const ASCII_SMALL_A: u8 = b'a';
1356 const ASCII_SMALL_Z: u8 = b'z';
1357
1358 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1359 let s = from_latin1(v1.clone());
1360 assert_eq!(
1361 *s.as_bytes(),
1362 [
1363 ASCII_SMALL_A,
1364 ASCII_SMALL_A,
1365 ASCII_SMALL_A,
1366 UTF8_PILLCROW[0],
1367 UTF8_PILLCROW[1],
1368 ASCII_SMALL_A,
1369 ASCII_SMALL_A
1370 ]
1371 );
1372
1373 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1374 let s = from_latin1(v2.clone());
1375 assert_eq!(
1376 *s.as_bytes(),
1377 [
1378 ASCII_SMALL_A,
1379 ASCII_SMALL_A,
1380 ASCII_SMALL_A,
1381 ASCII_SMALL_A,
1382 ASCII_SMALL_Z
1383 ]
1384 );
1385
1386 let str = "abc%$²".to_owned();
1387 let s = DOMString::from(str.clone());
1388 assert_eq!(&*s.as_bytes(), str.as_bytes());
1389 let str = "AbBcC❤&%$#".to_owned();
1390 let s = DOMString::from(str.clone());
1391 assert_eq!(&*s.as_bytes(), str.as_bytes());
1392 }
1393}