1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::MutableHandleValue;
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42unsafe fn get_latin1_string_bytes(
45 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47 debug_assert!(!rooted_traceable_box.get().is_null());
48 let mut length = 0;
49 unsafe {
50 let chars = JS_GetLatin1StringCharsAndLength(
51 Runtime::get().expect("JS runtime has shut down").as_ptr(),
52 ptr::null(),
53 rooted_traceable_box.get(),
54 &mut length,
55 );
56 assert!(!chars.is_null());
57 slice::from_raw_parts(chars, length)
58 }
59}
60
61#[derive(Debug)]
63pub enum EncodedBytes<'a> {
64 Latin1(Ref<'a, [u8]>),
66 Utf8(Ref<'a, [u8]>),
68}
69
70impl EncodedBytes<'_> {
71 pub fn bytes(&self) -> &[u8] {
74 match self {
75 Self::Latin1(bytes) => bytes,
76 Self::Utf8(bytes) => bytes,
77 }
78 }
79
80 pub fn len(&self) -> usize {
81 match self {
82 Self::Latin1(bytes) => bytes
83 .iter()
84 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
85 .sum(),
86 Self::Utf8(bytes) => bytes.len(),
87 }
88 }
89
90 pub fn is_empty(&self) -> bool {
92 self.bytes().is_empty()
93 }
94}
95
96enum DOMStringType {
97 Rust(String),
99 JSString(RootedTraceableBox<Heap<*mut JSString>>),
101 #[cfg(test)]
102 Latin1Vec(Vec<u8>),
105}
106
107impl Default for DOMStringType {
108 fn default() -> Self {
109 Self::Rust(Default::default())
110 }
111}
112
113impl DOMStringType {
114 fn as_raw_bytes(&self) -> &[u8] {
119 match self {
120 DOMStringType::Rust(s) => s.as_bytes(),
121 DOMStringType::JSString(rooted_traceable_box) => unsafe {
122 get_latin1_string_bytes(rooted_traceable_box)
123 },
124 #[cfg(test)]
125 DOMStringType::Latin1Vec(items) => items,
126 }
127 }
128
129 fn ensure_rust_string(&mut self) -> &mut String {
130 let new_string = match self {
131 DOMStringType::Rust(string) => return string,
132 DOMStringType::JSString(rooted_traceable_box) => unsafe {
133 jsstr_to_string(
134 Runtime::get().expect("JS runtime has shut down").as_ptr(),
135 NonNull::new(rooted_traceable_box.get()).unwrap(),
136 )
137 },
138 #[cfg(test)]
139 DOMStringType::Latin1Vec(items) => {
140 let mut v = vec![0; items.len() * 2];
141 let real_size =
142 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
143 v.truncate(real_size);
144
145 unsafe { String::from_utf8_unchecked(v) }
148 },
149 };
150 *self = DOMStringType::Rust(new_string);
151 self.ensure_rust_string()
152 }
153}
154
155#[derive(Debug)]
158pub struct StringView<'a>(Ref<'a, str>);
159
160impl StringView<'_> {
161 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
162 self.split(HTML_SPACE_CHARACTERS)
163 .filter(|string| !string.is_empty())
164 }
165}
166
167impl From<StringView<'_>> for String {
168 fn from(string_view: StringView<'_>) -> Self {
169 string_view.0.to_string()
170 }
171}
172
173impl Deref for StringView<'_> {
174 type Target = str;
175 fn deref(&self) -> &str {
176 &(self.0)
177 }
178}
179
180impl AsRef<str> for StringView<'_> {
181 fn as_ref(&self) -> &str {
182 &(self.0)
183 }
184}
185
186impl PartialEq for StringView<'_> {
187 fn eq(&self, other: &Self) -> bool {
188 self.0.eq(&*(other.0))
189 }
190}
191
192impl PartialEq<&str> for StringView<'_> {
193 fn eq(&self, other: &&str) -> bool {
194 self.0.eq(*other)
195 }
196}
197
198impl Eq for StringView<'_> {}
199
200impl PartialOrd for StringView<'_> {
201 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
202 self.0.partial_cmp(&**other)
203 }
204}
205
206impl Ord for StringView<'_> {
207 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
208 self.0.cmp(other)
209 }
210}
211
212unsafe impl Trace for DOMStringType {
218 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
219 unsafe {
220 match self {
221 DOMStringType::Rust(_s) => {},
222 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
223 #[cfg(test)]
224 DOMStringType::Latin1Vec(_s) => {},
225 }
226 }
227 }
228}
229
230impl malloc_size_of::MallocSizeOf for DOMStringType {
231 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
232 match self {
233 DOMStringType::Rust(s) => s.size_of(ops),
234 DOMStringType::JSString(_rooted_traceable_box) => {
235 0
237 },
238 #[cfg(test)]
239 DOMStringType::Latin1Vec(s) => s.size_of(ops),
240 }
241 }
242}
243
244impl std::fmt::Debug for DOMStringType {
245 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246 match self {
247 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
248 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
249 #[cfg(test)]
250 DOMStringType::Latin1Vec(s) => f
251 .debug_struct("DOMString")
252 .field("latin1_string", s)
253 .finish(),
254 }
255 }
256}
257
258#[repr(transparent)]
294#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
295pub struct DOMString(RefCell<DOMStringType>);
296
297impl Clone for DOMString {
298 fn clone(&self) -> Self {
299 self.ensure_rust_string().clone().into()
300 }
301}
302
303pub enum DOMStringErrorType {
304 JSConversionError,
305}
306
307impl DOMString {
308 pub fn new() -> DOMString {
310 Default::default()
311 }
312
313 pub fn from_js_string(
316 cx: SafeJSContext,
317 value: js::gc::HandleValue,
318 ) -> Result<DOMString, DOMStringErrorType> {
319 let string_ptr = unsafe { js::rust::ToString(*cx, value) };
320 if string_ptr.is_null() {
321 debug!("ToString failed");
322 Err(DOMStringErrorType::JSConversionError)
323 } else {
324 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
325 let inner = if latin1 {
326 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
327 DOMStringType::JSString(h)
328 } else {
329 DOMStringType::Rust(unsafe {
331 jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
332 })
333 };
334 Ok(DOMString(RefCell::new(inner)))
335 }
336 }
337
338 fn ensure_rust_string(&self) -> RefMut<'_, String> {
341 let inner = self.0.borrow_mut();
342 RefMut::map(inner, |inner| inner.ensure_rust_string())
343 }
344
345 #[expect(unused)]
347 fn debug_js(&self) {
348 match *self.0.borrow() {
349 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
350 DOMStringType::JSString(ref rooted_traceable_box) => {
351 let s = unsafe {
352 jsstr_to_string(
353 Runtime::get().expect("JS runtime has shut down").as_ptr(),
354 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
355 )
356 };
357 info!("JSString ({})", s);
358 },
359 #[cfg(test)]
360 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
361 }
362 }
363
364 pub fn str(&self) -> StringView<'_> {
366 {
367 let inner = self.0.borrow();
368 if matches!(&*inner, DOMStringType::Rust(..)) {
369 return StringView(Ref::map(inner, |inner| match inner {
370 DOMStringType::Rust(string) => string.as_str(),
371 _ => unreachable!("Guaranteed by condition above"),
372 }));
373 }
374 }
375
376 self.ensure_rust_string();
377 self.str()
378 }
379
380 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
383 let inner = self.0.borrow();
384 match &*inner {
385 DOMStringType::Rust(..) => {
386 EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
387 },
388 _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
389 }
390 }
391
392 pub fn clear(&mut self) {
393 let mut inner = self.0.borrow_mut();
394 let DOMStringType::Rust(string) = &mut *inner else {
395 *inner = DOMStringType::Rust(String::new());
396 return;
397 };
398 string.clear();
399 }
400
401 pub fn is_empty(&self) -> bool {
402 self.encoded_bytes().is_empty()
403 }
404
405 pub fn len(&self) -> usize {
410 self.encoded_bytes().len()
411 }
412
413 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
420 Utf8CodeUnitLength(self.len())
421 }
422
423 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
428 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
429 }
430
431 pub fn make_ascii_lowercase(&mut self) {
432 self.0
433 .borrow_mut()
434 .ensure_rust_string()
435 .make_ascii_lowercase();
436 }
437
438 pub fn push_str(&mut self, string_to_push: &str) {
439 self.0
440 .borrow_mut()
441 .ensure_rust_string()
442 .push_str(string_to_push);
443 }
444
445 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
446 if self.is_empty() {
447 return;
448 }
449
450 let mut inner = self.0.borrow_mut();
451 let string = inner.ensure_rust_string();
452 let trailing_whitespace_len = string
453 .trim_end_matches(|character: char| character.is_ascii_whitespace())
454 .len();
455 string.truncate(trailing_whitespace_len);
456 if string.is_empty() {
457 return;
458 }
459
460 let first_non_whitespace = string
461 .find(|character: char| !character.is_ascii_whitespace())
462 .unwrap();
463 string.replace_range(0..first_non_whitespace, "");
464 }
465
466 pub fn is_valid_floating_point_number_string(&self) -> bool {
468 static RE: LazyLock<Regex> = LazyLock::new(|| {
469 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
470 });
471
472 RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
473 self.parse_floating_point_number().is_some()
474 }
475
476 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
477 self.str().parse::<T>()
478 }
479
480 pub fn parse_floating_point_number(&self) -> Option<f64> {
482 parse_floating_point_number(&self.str())
483 }
484
485 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
487 if let Some(val) = self.parse_floating_point_number() {
488 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
490
491 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
492 }
493 }
494
495 pub fn to_lowercase(&self) -> String {
496 self.str().to_lowercase()
497 }
498
499 pub fn to_uppercase(&self) -> String {
500 self.str().to_uppercase()
501 }
502
503 pub fn strip_newlines(&mut self) {
504 self.0
507 .borrow_mut()
508 .ensure_rust_string()
509 .retain(|character| character != '\r' && character != '\n');
510 }
511
512 pub fn normalize_newlines(&mut self) {
514 let mut inner = self.0.borrow_mut();
518 let string = inner.ensure_rust_string();
519 *string = string.replace("\r\n", "\n").replace("\r", "\n")
520 }
521
522 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
523 let new_string = self.str().to_owned();
524 DOMString(RefCell::new(DOMStringType::Rust(
525 new_string.replace(needle, replace_char),
526 )))
527 }
528
529 pub fn starts_with(&self, c: char) -> bool {
531 if !c.is_ascii() {
532 self.str().starts_with(c)
533 } else {
534 self.encoded_bytes().bytes().starts_with(&[c as u8])
537 }
538 }
539
540 pub fn starts_with_str(&self, needle: &str) -> bool {
541 self.str().starts_with(needle)
542 }
543
544 pub fn contains(&self, needle: &str) -> bool {
545 self.str().contains(needle)
546 }
547
548 pub fn to_ascii_lowercase(&self) -> String {
549 let conversion = match self.encoded_bytes() {
550 EncodedBytes::Latin1(bytes) => {
551 if bytes.iter().all(|c| *c <= ASCII_END) {
552 Some(unsafe {
554 String::from_utf8_unchecked(
555 bytes
556 .iter()
557 .map(|c| {
558 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
559 c + 32
560 } else {
561 *c
562 }
563 })
564 .collect(),
565 )
566 })
567 } else {
568 None
569 }
570 },
571 EncodedBytes::Utf8(bytes) => unsafe {
572 Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
574 },
575 };
576 if let Some(conversion) = conversion {
578 conversion
579 } else {
580 self.str().to_ascii_lowercase()
581 }
582 }
583
584 fn contains_space_characters(
585 &self,
586 latin1_characters: &'static [u8],
587 utf8_characters: &'static [char],
588 ) -> bool {
589 match self.encoded_bytes() {
590 EncodedBytes::Latin1(items) => {
591 latin1_characters.iter().any(|byte| items.contains(byte))
592 },
593 EncodedBytes::Utf8(bytes) => {
594 let s = unsafe { str::from_utf8_unchecked(&bytes) };
596 s.contains(utf8_characters)
597 },
598 }
599 }
600
601 pub fn contains_tab_or_newline(&self) -> bool {
603 const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
604 const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
605
606 self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
607 }
608
609 pub fn contains_html_space_characters(&self) -> bool {
611 const SPACE_BYTES: [u8; 5] = [
612 ASCII_TAB,
613 ASCII_NEWLINE,
614 ASCII_FORMFEED,
615 ASCII_CR,
616 ASCII_SPACE,
617 ];
618 self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
619 }
620
621 pub fn as_bytes(&self) -> BytesView<'_> {
623 if self.is_ascii() {
627 BytesView(self.0.borrow())
628 } else {
629 self.ensure_rust_string();
630 BytesView(self.0.borrow())
631 }
632 }
633
634 pub fn is_ascii_lowercase(&self) -> bool {
636 match self.encoded_bytes() {
637 EncodedBytes::Latin1(items) => items
638 .iter()
639 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
640 EncodedBytes::Utf8(s) => s
641 .iter()
642 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
643 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
644 }
645 }
646
647 pub fn is_ascii(&self) -> bool {
649 self.encoded_bytes().bytes().is_ascii()
650 }
651
652 pub fn is_valid_for_cookie(&self) -> bool {
656 match self.encoded_bytes() {
657 EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
658 .iter()
659 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
660 }
661 }
662
663 fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
666 match self.encoded_bytes() {
667 EncodedBytes::Latin1(latin1_bytes) => {
669 if latin1_bytes.iter().all(|character| character.is_ascii()) {
670 return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
673 }
674 },
675 EncodedBytes::Utf8(utf8_bytes) => {
676 return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
679 },
680 };
681 callback(self.str().deref())
682 }
683}
684
685pub fn parse_floating_point_number(input: &str) -> Option<f64> {
687 input.trim().parse::<f64>().ok().filter(|value| {
693 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
696 })
697}
698
699pub struct BytesView<'a>(Ref<'a, DOMStringType>);
700
701impl Deref for BytesView<'_> {
702 type Target = [u8];
703
704 fn deref(&self) -> &Self::Target {
705 self.0.as_raw_bytes()
707 }
708}
709
710impl Ord for DOMString {
711 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
712 self.str().cmp(&other.str())
713 }
714}
715
716impl PartialOrd for DOMString {
717 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
718 self.str().partial_cmp(&other.str())
719 }
720}
721
722impl Extend<char> for DOMString {
723 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
724 self.0.borrow_mut().ensure_rust_string().extend(iter)
725 }
726}
727
728impl ToJSValConvertible for DOMString {
729 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
730 let val = self.0.borrow();
731 match *val {
732 DOMStringType::Rust(ref s) => unsafe {
733 s.to_jsval(cx, rval);
734 },
735 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
736 rval.set(StringValue(&*rooted_traceable_box.get()));
737 },
738 #[cfg(test)]
739 DOMStringType::Latin1Vec(ref items) => {
740 let mut v = vec![0; items.len() * 2];
741 let real_size =
742 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
743 v.truncate(real_size);
744
745 String::from_utf8(v)
746 .expect("Error in constructin test string")
747 .to_jsval(cx, rval);
748 },
749 };
750 }
751}
752
753impl std::hash::Hash for DOMString {
754 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
755 self.str().hash(state);
756 }
757}
758
759impl std::fmt::Display for DOMString {
760 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
761 fmt::Display::fmt(self.str().deref(), f)
762 }
763}
764
765impl std::cmp::PartialEq<str> for DOMString {
766 fn eq(&self, other: &str) -> bool {
767 if other.is_ascii() {
768 *other.as_bytes() == *self.encoded_bytes().bytes()
769 } else {
770 self.str().deref() == other
771 }
772 }
773}
774
775impl std::cmp::PartialEq<&str> for DOMString {
776 fn eq(&self, other: &&str) -> bool {
777 self.eq(*other)
778 }
779}
780
781impl std::cmp::PartialEq<String> for DOMString {
782 fn eq(&self, other: &String) -> bool {
783 self.eq(other.as_str())
784 }
785}
786
787impl std::cmp::PartialEq<DOMString> for String {
788 fn eq(&self, other: &DOMString) -> bool {
789 other.eq(self)
790 }
791}
792
793impl std::cmp::PartialEq<DOMString> for str {
794 fn eq(&self, other: &DOMString) -> bool {
795 other.eq(self)
796 }
797}
798
799impl std::cmp::PartialEq for DOMString {
800 fn eq(&self, other: &DOMString) -> bool {
801 let result = match (self.encoded_bytes(), other.encoded_bytes()) {
802 (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
803 Some(*bytes == *other_bytes)
804 },
805 (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
806 if other_bytes.is_ascii() =>
807 {
808 Some(*bytes == *other_bytes)
809 },
810 (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
811 Some(*bytes == *other_bytes)
812 },
813 (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
814 Some(*bytes == *other_bytes)
815 },
816 _ => None,
817 };
818
819 if let Some(eq_result) = result {
820 return eq_result;
821 }
822
823 *self.str() == *other.str()
824 }
825}
826
827impl std::cmp::Eq for DOMString {}
828
829impl From<std::string::String> for DOMString {
830 fn from(string: String) -> Self {
831 DOMString(RefCell::new(DOMStringType::Rust(string)))
832 }
833}
834
835impl From<&str> for DOMString {
836 fn from(string: &str) -> Self {
837 String::from(string).into()
838 }
839}
840
841impl From<DOMString> for LocalName {
842 fn from(dom_string: DOMString) -> LocalName {
843 dom_string.with_str_reference(|string| LocalName::from(string))
844 }
845}
846
847impl From<&DOMString> for LocalName {
848 fn from(dom_string: &DOMString) -> LocalName {
849 dom_string.with_str_reference(|string| LocalName::from(string))
850 }
851}
852
853impl From<DOMString> for Namespace {
854 fn from(dom_string: DOMString) -> Namespace {
855 dom_string.with_str_reference(|string| Namespace::from(string))
856 }
857}
858
859impl From<DOMString> for Atom {
860 fn from(dom_string: DOMString) -> Atom {
861 dom_string.with_str_reference(|string| Atom::from(string))
862 }
863}
864
865impl From<DOMString> for String {
866 fn from(val: DOMString) -> Self {
867 val.str().to_owned()
868 }
869}
870
871impl From<DOMString> for Vec<u8> {
872 fn from(value: DOMString) -> Self {
873 value.str().as_bytes().to_vec()
874 }
875}
876
877impl From<Cow<'_, str>> for DOMString {
878 fn from(value: Cow<'_, str>) -> Self {
879 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
880 }
881}
882
883#[macro_export]
884macro_rules! match_domstring_ascii_inner {
885 ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
886 if {
887 debug_assert!(($ascii_literal).is_ascii());
888 $ascii_literal.as_bytes()
889 } == $input.bytes() {
890 $then
891 } else {
892 $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
893 }
894
895 };
896 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
897 match $input {
898 $p => $then
899 }
900 }
901}
902
903#[macro_export]
917macro_rules! match_domstring_ascii {
918 ($input:expr, $($tail:tt)*) => {
919 {
920 use $crate::domstring::EncodedBytes;
921
922 let encoded_bytes = $input.encoded_bytes();
923 match encoded_bytes {
924 EncodedBytes::Latin1(_) => {
925 $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
926 }
927 EncodedBytes::Utf8(_) => {
928 $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
929 }
930
931 }
932 }
933 };
934}
935
936#[cfg(test)]
937mod tests {
938 use super::*;
939
940 const LATIN1_PILLCROW: u8 = 0xB6;
941 const UTF8_PILLCROW: [u8; 2] = [194, 182];
942 const LATIN1_POWER2: u8 = 0xB2;
943
944 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
945 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
946 }
947
948 #[test]
949 fn string_functions() {
950 let s = DOMString::from("AbBcC❤&%$#");
951 let s_copy = s.clone();
952 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
953 assert_eq!(s, s_copy);
954 assert_eq!(s.len(), 12);
955 assert_eq!(s_copy.len(), 12);
956 assert!(s.starts_with('A'));
957 let s2 = DOMString::from("");
958 assert!(s2.is_empty());
959 }
960
961 #[test]
962 fn string_functions_latin1() {
963 {
964 let s = from_latin1(vec![
965 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
966 ]);
967 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
968 }
969 {
970 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
971 assert_eq!(s.to_ascii_lowercase(), "abbcc");
972 }
973 {
974 let s = from_latin1(vec![
975 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
976 ]);
977 assert_eq!(s.len(), 11);
978 assert!(s.starts_with('A'));
979 }
980 {
981 let s = from_latin1(vec![]);
982 assert!(s.is_empty());
983 }
984 }
985
986 #[test]
987 fn test_length() {
988 let s1 = from_latin1(vec![
989 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
990 0xAE, 0xAF,
991 ]);
992 let s2 = from_latin1(vec![
993 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
994 0xBE, 0xBF,
995 ]);
996 let s3 = from_latin1(vec![
997 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
998 0xCE, 0xCF,
999 ]);
1000 let s4 = from_latin1(vec![
1001 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1002 0xDE, 0xDF,
1003 ]);
1004 let s5 = from_latin1(vec![
1005 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1006 0xEE, 0xEF,
1007 ]);
1008 let s6 = from_latin1(vec![
1009 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1010 0xFE, 0xFF,
1011 ]);
1012
1013 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1014 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1015 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1016 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1017 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1018 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1019
1020 assert_eq!(s1.len(), s1_utf8.len());
1021 assert_eq!(s2.len(), s2_utf8.len());
1022 assert_eq!(s3.len(), s3_utf8.len());
1023 assert_eq!(s4.len(), s4_utf8.len());
1024 assert_eq!(s5.len(), s5_utf8.len());
1025 assert_eq!(s6.len(), s6_utf8.len());
1026
1027 s1.ensure_rust_string();
1028 s2.ensure_rust_string();
1029 s3.ensure_rust_string();
1030 s4.ensure_rust_string();
1031 s5.ensure_rust_string();
1032 s6.ensure_rust_string();
1033 assert_eq!(s1.len(), s1_utf8.len());
1034 assert_eq!(s2.len(), s2_utf8.len());
1035 assert_eq!(s3.len(), s3_utf8.len());
1036 assert_eq!(s4.len(), s4_utf8.len());
1037 assert_eq!(s5.len(), s5_utf8.len());
1038 assert_eq!(s6.len(), s6_utf8.len());
1039 }
1040
1041 #[test]
1042 fn test_convert() {
1043 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1044 s.ensure_rust_string();
1045 assert_eq!(&*s.str(), "abc%$");
1046 }
1047
1048 #[test]
1049 fn partial_eq() {
1050 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1051 let string = String::from("abc%$");
1052 let s2 = DOMString::from(string.clone());
1053 assert_eq!(s, s2);
1054 assert_eq!(s, string);
1055 }
1056
1057 #[test]
1058 fn encoded_latin1_bytes() {
1059 let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1060 let dom_string = from_latin1(original_latin1_bytes.clone());
1061 let string_latin1_bytes = match dom_string.encoded_bytes() {
1062 EncodedBytes::Latin1(bytes) => bytes,
1063 _ => unreachable!("Expected Latin1 encoded bytes"),
1064 };
1065 assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1066 }
1067
1068 #[test]
1069 fn testing_stringview() {
1070 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1071
1072 assert_eq!(
1073 s.str().chars().collect::<Vec<char>>(),
1074 vec!['a', 'b', 'c', '%', '$', '²']
1075 );
1076 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1077 }
1078
1079 #[test]
1084 fn test_hash() {
1085 use std::hash::{DefaultHasher, Hash, Hasher};
1086 fn hash_value(d: &DOMString) -> u64 {
1087 let mut hasher = DefaultHasher::new();
1088 d.hash(&mut hasher);
1089 hasher.finish()
1090 }
1091
1092 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1093 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1094 s_converted.ensure_rust_string();
1095 let s2 = DOMString::from("abc%$²");
1096
1097 let hash_s = hash_value(&s);
1098 let hash_s_converted = hash_value(&s_converted);
1099 let hash_s2 = hash_value(&s2);
1100
1101 assert_eq!(hash_s, hash_s2);
1102 assert_eq!(hash_s, hash_s_converted);
1103 }
1104
1105 #[test]
1107 fn test_match_executing() {
1108 {
1110 let s = from_latin1(vec![b'a', b'b', b'c']);
1111 match_domstring_ascii!( s,
1112 "abc" => assert!(true),
1113 "bcd" => assert!(false),
1114 _ => (),
1115 );
1116 }
1117
1118 {
1119 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1120 match_domstring_ascii!( s,
1121 "abc/" => assert!(true),
1122 "bcd" => assert!(false),
1123 _ => (),
1124 );
1125 }
1126
1127 {
1128 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1129 match_domstring_ascii!( s,
1130 "bcd" => assert!(false),
1131 "abc%$" => assert!(true),
1132 _ => (),
1133 );
1134 }
1135
1136 {
1137 let s = DOMString::from("abcde");
1138 match_domstring_ascii!( s,
1139 "abc" => assert!(false),
1140 "bcd" => assert!(false),
1141 _ => assert!(true),
1142 );
1143 }
1144 {
1145 let s = DOMString::from("abc%$");
1146 match_domstring_ascii!( s,
1147 "bcd" => assert!(false),
1148 "abc%$" => assert!(true),
1149 _ => (),
1150 );
1151 }
1152 {
1153 let s = from_latin1(vec![b'a', b'b', b'c']);
1154 match_domstring_ascii!( s,
1155 "abcdd" => assert!(false),
1156 "bcd" => assert!(false),
1157 _ => (),
1158 );
1159 }
1160 }
1161
1162 #[test]
1164 fn test_match_returning_result() {
1165 {
1166 let s = from_latin1(vec![b'a', b'b', b'c']);
1167 let res = match_domstring_ascii!( s,
1168 "abc" => true,
1169 "bcd" => false,
1170 _ => false,
1171 );
1172 assert_eq!(res, true);
1173 }
1174 {
1175 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1176 let res = match_domstring_ascii!( s,
1177 "abc/" => true,
1178 "bcd" => false,
1179 _ => false,
1180 );
1181 assert_eq!(res, true);
1182 }
1183 {
1184 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1185 let res = match_domstring_ascii!( s,
1186 "bcd" => false,
1187 "abc%$" => true,
1188 _ => false,
1189 );
1190 assert_eq!(res, true);
1191 }
1192
1193 {
1194 let s = DOMString::from("abcde");
1195 let res = match_domstring_ascii!( s,
1196 "abc" => false,
1197 "bcd" => false,
1198 _ => true,
1199 );
1200 assert_eq!(res, true);
1201 }
1202 {
1203 let s = DOMString::from("abc%$");
1204 let res = match_domstring_ascii!( s,
1205 "bcd" => false,
1206 "abc%$" => true,
1207 _ => false,
1208 );
1209 assert_eq!(res, true);
1210 }
1211 {
1212 let s = from_latin1(vec![b'a', b'b', b'c']);
1213 let res = match_domstring_ascii!( s,
1214 "abcdd" => false,
1215 "bcd" => false,
1216 _ => true,
1217 );
1218 assert_eq!(res, true);
1219 }
1220 }
1221
1222 #[test]
1223 #[should_panic]
1224 fn test_match_panic() {
1225 let s = DOMString::from("abcd");
1226 let _res = match_domstring_ascii!(s,
1227 "❤" => true,
1228 _ => false,);
1229 }
1230
1231 #[test]
1232 #[should_panic]
1233 fn test_match_panic2() {
1234 let s = DOMString::from("abcd");
1235 let _res = match_domstring_ascii!(s,
1236 "abc" => false,
1237 "❤" => true,
1238 _ => false,
1239 );
1240 }
1241
1242 #[test]
1243 fn test_strip_whitespace() {
1244 {
1245 let mut s = from_latin1(vec![
1246 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1247 ]);
1248
1249 s.strip_leading_and_trailing_ascii_whitespace();
1250 s.ensure_rust_string();
1251 assert_eq!(&*s.str(), "abc%$²");
1252 }
1253 {
1254 let mut s = DOMString::from(" \n abc%$ ");
1255
1256 s.strip_leading_and_trailing_ascii_whitespace();
1257 s.ensure_rust_string();
1258 assert_eq!(&*s.str(), "abc%$");
1259 }
1260 }
1261
1262 #[test]
1264 fn contains_html_space_characters() {
1265 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1267 s.ensure_rust_string();
1268 assert!(s.contains_html_space_characters());
1269
1270 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1272 s.ensure_rust_string();
1273 assert!(s.contains_html_space_characters());
1274
1275 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1277 s.ensure_rust_string();
1278 assert!(s.contains_html_space_characters());
1279
1280 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1282 s.ensure_rust_string();
1283 assert!(s.contains_html_space_characters());
1284
1285 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1287 s.ensure_rust_string();
1288 assert!(s.contains_html_space_characters());
1289
1290 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1291 assert!(!s.contains_html_space_characters());
1292 s.ensure_rust_string();
1293 assert!(!s.contains_html_space_characters());
1294 }
1295
1296 #[test]
1297 fn atom() {
1298 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1299 let atom1 = Atom::from(s);
1300 let s2 = DOMString::from("aaa aa");
1301 let atom2 = Atom::from(s2);
1302 assert_eq!(atom1, atom2);
1303 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1304 let atom3 = Atom::from(s3);
1305 assert_ne!(atom1, atom3);
1306 }
1307
1308 #[test]
1309 fn namespace() {
1310 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1311 let atom1 = Namespace::from(s);
1312 let s2 = DOMString::from("aaa aa");
1313 let atom2 = Namespace::from(s2);
1314 assert_eq!(atom1, atom2);
1315 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1316 let atom3 = Namespace::from(s3);
1317 assert_ne!(atom1, atom3);
1318 }
1319
1320 #[test]
1321 fn localname() {
1322 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1323 let atom1 = LocalName::from(s);
1324 let s2 = DOMString::from("aaa aa");
1325 let atom2 = LocalName::from(s2);
1326 assert_eq!(atom1, atom2);
1327 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1328 let atom3 = LocalName::from(s3);
1329 assert_ne!(atom1, atom3);
1330 }
1331
1332 #[test]
1333 fn is_ascii_lowercase() {
1334 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1335 assert!(!s.is_ascii_lowercase());
1336 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1337 assert!(!s.is_ascii_lowercase());
1338 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1339 assert!(s.is_ascii_lowercase());
1340 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1341 assert!(!s.is_ascii_lowercase());
1342 let s = DOMString::from("`aaaz");
1343 assert!(!s.is_ascii_lowercase());
1344 let s = DOMString::from("aaaz");
1345 assert!(s.is_ascii_lowercase());
1346 }
1347
1348 #[test]
1349 fn test_as_bytes() {
1350 const ASCII_SMALL_A: u8 = b'a';
1351 const ASCII_SMALL_Z: u8 = b'z';
1352
1353 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1354 let s = from_latin1(v1.clone());
1355 assert_eq!(
1356 *s.as_bytes(),
1357 [
1358 ASCII_SMALL_A,
1359 ASCII_SMALL_A,
1360 ASCII_SMALL_A,
1361 UTF8_PILLCROW[0],
1362 UTF8_PILLCROW[1],
1363 ASCII_SMALL_A,
1364 ASCII_SMALL_A
1365 ]
1366 );
1367
1368 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1369 let s = from_latin1(v2.clone());
1370 assert_eq!(
1371 *s.as_bytes(),
1372 [
1373 ASCII_SMALL_A,
1374 ASCII_SMALL_A,
1375 ASCII_SMALL_A,
1376 ASCII_SMALL_A,
1377 ASCII_SMALL_Z
1378 ]
1379 );
1380
1381 let str = "abc%$²".to_owned();
1382 let s = DOMString::from(str.clone());
1383 assert_eq!(&*s.as_bytes(), str.as_bytes());
1384 let str = "AbBcC❤&%$#".to_owned();
1385 let s = DOMString::from(str.clone());
1386 assert_eq!(&*s.as_bytes(), str.as_bytes());
1387 }
1388}