1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::MutableHandleValue;
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27use zeroize::Zeroize;
28
29use crate::script_runtime::JSContext as SafeJSContext;
30use crate::trace::RootedTraceableBox;
31
32const ASCII_END: u8 = 0x7E;
33const ASCII_CAPITAL_A: u8 = 0x41;
34const ASCII_CAPITAL_Z: u8 = 0x5A;
35const ASCII_LOWERCASE_A: u8 = 0x61;
36const ASCII_LOWERCASE_Z: u8 = 0x7A;
37const ASCII_TAB: u8 = 0x09;
38const ASCII_NEWLINE: u8 = 0x0A;
39const ASCII_FORMFEED: u8 = 0x0C;
40const ASCII_CR: u8 = 0x0D;
41const ASCII_SPACE: u8 = 0x20;
42
43unsafe fn get_latin1_string_bytes(
46 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
47) -> &[u8] {
48 debug_assert!(!rooted_traceable_box.get().is_null());
49 let mut length = 0;
50 unsafe {
51 let chars = JS_GetLatin1StringCharsAndLength(
52 Runtime::get().expect("JS runtime has shut down").as_ptr(),
53 ptr::null(),
54 rooted_traceable_box.get(),
55 &mut length,
56 );
57 assert!(!chars.is_null());
58 slice::from_raw_parts(chars, length)
59 }
60}
61
62#[derive(Debug)]
64pub enum EncodedBytes<'a> {
65 Latin1(Ref<'a, [u8]>),
67 Utf8(Ref<'a, [u8]>),
69}
70
71impl EncodedBytes<'_> {
72 pub fn bytes(&self) -> &[u8] {
75 match self {
76 Self::Latin1(bytes) => bytes,
77 Self::Utf8(bytes) => bytes,
78 }
79 }
80
81 pub fn len(&self) -> usize {
82 match self {
83 Self::Latin1(bytes) => bytes
84 .iter()
85 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
86 .sum(),
87 Self::Utf8(bytes) => bytes.len(),
88 }
89 }
90
91 pub fn is_empty(&self) -> bool {
93 self.bytes().is_empty()
94 }
95}
96
97enum DOMStringType {
98 Rust(String),
100 JSString(RootedTraceableBox<Heap<*mut JSString>>),
102 #[cfg(test)]
103 Latin1Vec(Vec<u8>),
106}
107
108impl Default for DOMStringType {
109 fn default() -> Self {
110 Self::Rust(Default::default())
111 }
112}
113
114impl Zeroize for DOMStringType {
115 fn zeroize(&mut self) {
116 self.ensure_rust_string().zeroize()
117 }
118}
119
120impl DOMStringType {
121 fn as_raw_bytes(&self) -> &[u8] {
126 match self {
127 DOMStringType::Rust(s) => s.as_bytes(),
128 DOMStringType::JSString(rooted_traceable_box) => unsafe {
129 get_latin1_string_bytes(rooted_traceable_box)
130 },
131 #[cfg(test)]
132 DOMStringType::Latin1Vec(items) => items,
133 }
134 }
135
136 fn ensure_rust_string(&mut self) -> &mut String {
137 let new_string = match self {
138 DOMStringType::Rust(string) => return string,
139 DOMStringType::JSString(rooted_traceable_box) => unsafe {
140 jsstr_to_string(
141 Runtime::get().expect("JS runtime has shut down").as_ptr(),
142 NonNull::new(rooted_traceable_box.get()).unwrap(),
143 )
144 },
145 #[cfg(test)]
146 DOMStringType::Latin1Vec(items) => {
147 let mut v = vec![0; items.len() * 2];
148 let real_size =
149 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
150 v.truncate(real_size);
151
152 unsafe { String::from_utf8_unchecked(v) }
155 },
156 };
157 *self = DOMStringType::Rust(new_string);
158 self.ensure_rust_string()
159 }
160}
161
162#[derive(Debug)]
165pub struct StringView<'a>(Ref<'a, str>);
166
167impl StringView<'_> {
168 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
169 self.split(HTML_SPACE_CHARACTERS)
170 .filter(|string| !string.is_empty())
171 }
172}
173
174impl From<StringView<'_>> for String {
175 fn from(string_view: StringView<'_>) -> Self {
176 string_view.0.to_string()
177 }
178}
179
180impl Deref for StringView<'_> {
181 type Target = str;
182 fn deref(&self) -> &str {
183 &(self.0)
184 }
185}
186
187impl AsRef<str> for StringView<'_> {
188 fn as_ref(&self) -> &str {
189 &(self.0)
190 }
191}
192
193impl PartialEq for StringView<'_> {
194 fn eq(&self, other: &Self) -> bool {
195 self.0.eq(&*(other.0))
196 }
197}
198
199impl PartialEq<&str> for StringView<'_> {
200 fn eq(&self, other: &&str) -> bool {
201 self.0.eq(*other)
202 }
203}
204
205impl Eq for StringView<'_> {}
206
207impl PartialOrd for StringView<'_> {
208 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
209 self.0.partial_cmp(&**other)
210 }
211}
212
213impl Ord for StringView<'_> {
214 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
215 self.0.cmp(other)
216 }
217}
218
219unsafe impl Trace for DOMStringType {
225 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
226 unsafe {
227 match self {
228 DOMStringType::Rust(_s) => {},
229 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
230 #[cfg(test)]
231 DOMStringType::Latin1Vec(_s) => {},
232 }
233 }
234 }
235}
236
237impl malloc_size_of::MallocSizeOf for DOMStringType {
238 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
239 match self {
240 DOMStringType::Rust(s) => s.size_of(ops),
241 DOMStringType::JSString(_rooted_traceable_box) => {
242 0
244 },
245 #[cfg(test)]
246 DOMStringType::Latin1Vec(s) => s.size_of(ops),
247 }
248 }
249}
250
251impl std::fmt::Debug for DOMStringType {
252 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
253 match self {
254 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
255 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
256 #[cfg(test)]
257 DOMStringType::Latin1Vec(s) => f
258 .debug_struct("DOMString")
259 .field("latin1_string", s)
260 .finish(),
261 }
262 }
263}
264
265#[repr(transparent)]
301#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
302pub struct DOMString(RefCell<DOMStringType>);
303
304impl Clone for DOMString {
305 fn clone(&self) -> Self {
306 self.ensure_rust_string().clone().into()
307 }
308}
309
310pub enum DOMStringErrorType {
311 JSConversionError,
312}
313
314impl DOMString {
315 pub fn new() -> DOMString {
317 Default::default()
318 }
319
320 pub fn from_js_string(
323 cx: SafeJSContext,
324 value: js::gc::HandleValue,
325 ) -> Result<DOMString, DOMStringErrorType> {
326 let string_ptr = unsafe { js::rust::ToString(*cx, value) };
327 if string_ptr.is_null() {
328 debug!("ToString failed");
329 Err(DOMStringErrorType::JSConversionError)
330 } else {
331 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
332 let inner = if latin1 {
333 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
334 DOMStringType::JSString(h)
335 } else {
336 DOMStringType::Rust(unsafe {
338 jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
339 })
340 };
341 Ok(DOMString(RefCell::new(inner)))
342 }
343 }
344
345 fn ensure_rust_string(&self) -> RefMut<'_, String> {
348 let inner = self.0.borrow_mut();
349 RefMut::map(inner, |inner| inner.ensure_rust_string())
350 }
351
352 #[expect(unused)]
354 fn debug_js(&self) {
355 match *self.0.borrow() {
356 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
357 DOMStringType::JSString(ref rooted_traceable_box) => {
358 let s = unsafe {
359 jsstr_to_string(
360 Runtime::get().expect("JS runtime has shut down").as_ptr(),
361 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
362 )
363 };
364 info!("JSString ({})", s);
365 },
366 #[cfg(test)]
367 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
368 }
369 }
370
371 pub fn str(&self) -> StringView<'_> {
373 {
374 let inner = self.0.borrow();
375 if matches!(&*inner, DOMStringType::Rust(..)) {
376 return StringView(Ref::map(inner, |inner| match inner {
377 DOMStringType::Rust(string) => string.as_str(),
378 _ => unreachable!("Guaranteed by condition above"),
379 }));
380 }
381 }
382
383 self.ensure_rust_string();
384 self.str()
385 }
386
387 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
390 let inner = self.0.borrow();
391 match &*inner {
392 DOMStringType::Rust(..) => {
393 EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
394 },
395 _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
396 }
397 }
398
399 pub fn clear(&mut self) {
400 let mut inner = self.0.borrow_mut();
401 let DOMStringType::Rust(string) = &mut *inner else {
402 *inner = DOMStringType::Rust(String::new());
403 return;
404 };
405 string.clear();
406 }
407
408 pub fn is_empty(&self) -> bool {
409 self.encoded_bytes().is_empty()
410 }
411
412 pub fn len(&self) -> usize {
417 self.encoded_bytes().len()
418 }
419
420 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
427 Utf8CodeUnitLength(self.len())
428 }
429
430 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
435 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
436 }
437
438 pub fn make_ascii_lowercase(&mut self) {
439 self.0
440 .borrow_mut()
441 .ensure_rust_string()
442 .make_ascii_lowercase();
443 }
444
445 pub fn push_str(&mut self, string_to_push: &str) {
446 self.0
447 .borrow_mut()
448 .ensure_rust_string()
449 .push_str(string_to_push);
450 }
451
452 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
454 if self.is_empty() {
455 return;
456 }
457
458 let mut inner = self.0.borrow_mut();
459 let string = inner.ensure_rust_string();
460 let trailing_whitespace_len = string
461 .trim_end_matches(|character: char| character.is_ascii_whitespace())
462 .len();
463 string.truncate(trailing_whitespace_len);
464 if string.is_empty() {
465 return;
466 }
467
468 let first_non_whitespace = string
469 .find(|character: char| !character.is_ascii_whitespace())
470 .unwrap();
471 string.replace_range(0..first_non_whitespace, "");
472 }
473
474 pub fn is_valid_floating_point_number_string(&self) -> bool {
476 static RE: LazyLock<Regex> = LazyLock::new(|| {
477 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
478 });
479
480 RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
481 self.parse_floating_point_number().is_some()
482 }
483
484 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
485 self.str().parse::<T>()
486 }
487
488 pub fn parse_floating_point_number(&self) -> Option<f64> {
490 parse_floating_point_number(&self.str())
491 }
492
493 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
495 if let Some(val) = self.parse_floating_point_number() {
496 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
498
499 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
500 }
501 }
502
503 pub fn to_lowercase(&self) -> String {
504 self.str().to_lowercase()
505 }
506
507 pub fn to_uppercase(&self) -> String {
508 self.str().to_uppercase()
509 }
510
511 pub fn strip_newlines(&mut self) {
512 self.0
515 .borrow_mut()
516 .ensure_rust_string()
517 .retain(|character| character != '\r' && character != '\n');
518 }
519
520 pub fn normalize_newlines(&mut self) {
522 let mut inner = self.0.borrow_mut();
526 let string = inner.ensure_rust_string();
527 *string = string.replace("\r\n", "\n").replace("\r", "\n")
528 }
529
530 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
531 let new_string = self.str().to_owned();
532 DOMString(RefCell::new(DOMStringType::Rust(
533 new_string.replace(needle, replace_char),
534 )))
535 }
536
537 pub fn starts_with(&self, c: char) -> bool {
539 if !c.is_ascii() {
540 self.str().starts_with(c)
541 } else {
542 self.encoded_bytes().bytes().starts_with(&[c as u8])
545 }
546 }
547
548 pub fn starts_with_str(&self, needle: &str) -> bool {
549 self.str().starts_with(needle)
550 }
551
552 pub fn ends_with_str(&self, needle: &str) -> bool {
553 self.str().ends_with(needle)
554 }
555
556 pub fn contains(&self, needle: &str) -> bool {
557 self.str().contains(needle)
558 }
559
560 pub fn to_ascii_lowercase(&self) -> String {
561 let conversion = match self.encoded_bytes() {
562 EncodedBytes::Latin1(bytes) => {
563 if bytes.iter().all(|c| *c <= ASCII_END) {
564 Some(unsafe {
566 String::from_utf8_unchecked(
567 bytes
568 .iter()
569 .map(|c| {
570 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
571 c + 32
572 } else {
573 *c
574 }
575 })
576 .collect(),
577 )
578 })
579 } else {
580 None
581 }
582 },
583 EncodedBytes::Utf8(bytes) => unsafe {
584 Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
586 },
587 };
588 if let Some(conversion) = conversion {
590 conversion
591 } else {
592 self.str().to_ascii_lowercase()
593 }
594 }
595
596 fn contains_space_characters(
597 &self,
598 latin1_characters: &'static [u8],
599 utf8_characters: &'static [char],
600 ) -> bool {
601 match self.encoded_bytes() {
602 EncodedBytes::Latin1(items) => {
603 latin1_characters.iter().any(|byte| items.contains(byte))
604 },
605 EncodedBytes::Utf8(bytes) => {
606 let s = unsafe { str::from_utf8_unchecked(&bytes) };
608 s.contains(utf8_characters)
609 },
610 }
611 }
612
613 pub fn contains_tab_or_newline(&self) -> bool {
615 const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
616 const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
617
618 self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
619 }
620
621 pub fn contains_html_space_characters(&self) -> bool {
623 const SPACE_BYTES: [u8; 5] = [
624 ASCII_TAB,
625 ASCII_NEWLINE,
626 ASCII_FORMFEED,
627 ASCII_CR,
628 ASCII_SPACE,
629 ];
630 self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
631 }
632
633 pub fn as_bytes(&self) -> BytesView<'_> {
635 if self.is_ascii() {
639 BytesView(self.0.borrow())
640 } else {
641 self.ensure_rust_string();
642 BytesView(self.0.borrow())
643 }
644 }
645
646 pub fn is_ascii_lowercase(&self) -> bool {
648 match self.encoded_bytes() {
649 EncodedBytes::Latin1(items) => items
650 .iter()
651 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
652 EncodedBytes::Utf8(s) => s
653 .iter()
654 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
655 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
656 }
657 }
658
659 pub fn is_ascii(&self) -> bool {
661 self.encoded_bytes().bytes().is_ascii()
662 }
663
664 pub fn is_valid_for_cookie(&self) -> bool {
668 match self.encoded_bytes() {
669 EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
670 .iter()
671 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
672 }
673 }
674
675 fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
678 match self.encoded_bytes() {
679 EncodedBytes::Latin1(latin1_bytes) => {
681 if latin1_bytes.iter().all(|character| character.is_ascii()) {
682 return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
685 }
686 },
687 EncodedBytes::Utf8(utf8_bytes) => {
688 return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
691 },
692 };
693 callback(self.str().deref())
694 }
695
696 pub fn normalize_crlf(&self) -> String {
704 let s = self.str();
705 let mut buf = String::new();
706 let mut prev = ' ';
707 for ch in s.chars() {
708 match ch {
709 '\n' if prev != '\r' => {
710 buf.push('\r');
711 buf.push('\n');
712 },
713 '\n' => {
714 buf.push('\n');
715 },
716 _ if prev == '\r' => {
719 buf.push('\n');
720 buf.push(ch);
721 },
722 _ => buf.push(ch),
723 };
724 prev = ch;
725 }
726 if prev == '\r' {
728 buf.push('\n');
729 }
730 buf
731 }
732}
733
734pub fn parse_floating_point_number(input: &str) -> Option<f64> {
736 input.trim().parse::<f64>().ok().filter(|value| {
742 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
745 })
746}
747
748pub struct BytesView<'a>(Ref<'a, DOMStringType>);
749
750impl Deref for BytesView<'_> {
751 type Target = [u8];
752
753 fn deref(&self) -> &Self::Target {
754 self.0.as_raw_bytes()
756 }
757}
758
759impl Ord for DOMString {
760 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
761 self.str().cmp(&other.str())
762 }
763}
764
765impl PartialOrd for DOMString {
766 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
767 self.str().partial_cmp(&other.str())
768 }
769}
770
771impl Extend<char> for DOMString {
772 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
773 self.0.borrow_mut().ensure_rust_string().extend(iter)
774 }
775}
776
777impl ToJSValConvertible for DOMString {
778 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
779 let val = self.0.borrow();
780 match *val {
781 DOMStringType::Rust(ref s) => unsafe {
782 s.to_jsval(cx, rval);
783 },
784 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
785 rval.set(StringValue(&*rooted_traceable_box.get()));
786 },
787 #[cfg(test)]
788 DOMStringType::Latin1Vec(ref items) => {
789 let mut v = vec![0; items.len() * 2];
790 let real_size =
791 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
792 v.truncate(real_size);
793
794 String::from_utf8(v)
795 .expect("Error in constructin test string")
796 .to_jsval(cx, rval);
797 },
798 };
799 }
800}
801
802impl std::hash::Hash for DOMString {
803 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
804 self.str().hash(state);
805 }
806}
807
808impl std::fmt::Display for DOMString {
809 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
810 fmt::Display::fmt(self.str().deref(), f)
811 }
812}
813
814impl std::cmp::PartialEq<str> for DOMString {
815 fn eq(&self, other: &str) -> bool {
816 if other.is_ascii() {
817 *other.as_bytes() == *self.encoded_bytes().bytes()
818 } else {
819 self.str().deref() == other
820 }
821 }
822}
823
824impl std::cmp::PartialEq<&str> for DOMString {
825 fn eq(&self, other: &&str) -> bool {
826 self.eq(*other)
827 }
828}
829
830impl std::cmp::PartialEq<String> for DOMString {
831 fn eq(&self, other: &String) -> bool {
832 self.eq(other.as_str())
833 }
834}
835
836impl std::cmp::PartialEq<DOMString> for String {
837 fn eq(&self, other: &DOMString) -> bool {
838 other.eq(self)
839 }
840}
841
842impl std::cmp::PartialEq<DOMString> for str {
843 fn eq(&self, other: &DOMString) -> bool {
844 other.eq(self)
845 }
846}
847
848impl std::cmp::PartialEq for DOMString {
849 fn eq(&self, other: &DOMString) -> bool {
850 let result = match (self.encoded_bytes(), other.encoded_bytes()) {
851 (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
852 Some(*bytes == *other_bytes)
853 },
854 (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
855 if other_bytes.is_ascii() =>
856 {
857 Some(*bytes == *other_bytes)
858 },
859 (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
860 Some(*bytes == *other_bytes)
861 },
862 (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
863 Some(*bytes == *other_bytes)
864 },
865 _ => None,
866 };
867
868 if let Some(eq_result) = result {
869 return eq_result;
870 }
871
872 *self.str() == *other.str()
873 }
874}
875
876impl std::cmp::Eq for DOMString {}
877
878impl From<std::string::String> for DOMString {
879 fn from(string: String) -> Self {
880 DOMString(RefCell::new(DOMStringType::Rust(string)))
881 }
882}
883
884impl From<&str> for DOMString {
885 fn from(string: &str) -> Self {
886 String::from(string).into()
887 }
888}
889
890impl From<DOMString> for LocalName {
891 fn from(dom_string: DOMString) -> LocalName {
892 dom_string.with_str_reference(|string| LocalName::from(string))
893 }
894}
895
896impl From<&DOMString> for LocalName {
897 fn from(dom_string: &DOMString) -> LocalName {
898 dom_string.with_str_reference(|string| LocalName::from(string))
899 }
900}
901
902impl From<DOMString> for Namespace {
903 fn from(dom_string: DOMString) -> Namespace {
904 dom_string.with_str_reference(|string| Namespace::from(string))
905 }
906}
907
908impl From<DOMString> for Atom {
909 fn from(dom_string: DOMString) -> Atom {
910 dom_string.with_str_reference(|string| Atom::from(string))
911 }
912}
913
914impl From<DOMString> for String {
915 fn from(val: DOMString) -> Self {
916 val.str().to_owned()
917 }
918}
919
920impl From<DOMString> for Vec<u8> {
921 fn from(value: DOMString) -> Self {
922 value.str().as_bytes().to_vec()
923 }
924}
925
926impl From<Cow<'_, str>> for DOMString {
927 fn from(value: Cow<'_, str>) -> Self {
928 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
929 }
930}
931
932impl Zeroize for DOMString {
933 fn zeroize(&mut self) {
934 self.0.borrow_mut().zeroize()
935 }
936}
937
938#[macro_export]
939macro_rules! match_domstring_ascii_inner {
940 ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
941 if {
942 debug_assert!(($ascii_literal).is_ascii());
943 $ascii_literal.as_bytes()
944 } == $input.bytes() {
945 $then
946 } else {
947 $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
948 }
949
950 };
951 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
952 match $input {
953 $p => $then
954 }
955 }
956}
957
958#[macro_export]
975macro_rules! match_domstring_ascii {
976 ($input:expr, $($tail:tt)*) => {
977 {
978 use $crate::domstring::EncodedBytes;
979
980 let encoded_bytes = $input.encoded_bytes();
981 match encoded_bytes {
982 EncodedBytes::Latin1(_) => {
983 $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
984 }
985 EncodedBytes::Utf8(_) => {
986 $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
987 }
988
989 }
990 }
991 };
992}
993
994#[cfg(test)]
995mod tests {
996 use super::*;
997
998 const LATIN1_PILLCROW: u8 = 0xB6;
999 const UTF8_PILLCROW: [u8; 2] = [194, 182];
1000 const LATIN1_POWER2: u8 = 0xB2;
1001
1002 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1003 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1004 }
1005
1006 #[test]
1007 fn string_functions() {
1008 let s = DOMString::from("AbBcC❤&%$#");
1009 let s_copy = s.clone();
1010 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1011 assert_eq!(s, s_copy);
1012 assert_eq!(s.len(), 12);
1013 assert_eq!(s_copy.len(), 12);
1014 assert!(s.starts_with('A'));
1015 let s2 = DOMString::from("");
1016 assert!(s2.is_empty());
1017 }
1018
1019 #[test]
1020 fn string_functions_latin1() {
1021 {
1022 let s = from_latin1(vec![
1023 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1024 ]);
1025 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1026 }
1027 {
1028 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1029 assert_eq!(s.to_ascii_lowercase(), "abbcc");
1030 }
1031 {
1032 let s = from_latin1(vec![
1033 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1034 ]);
1035 assert_eq!(s.len(), 11);
1036 assert!(s.starts_with('A'));
1037 }
1038 {
1039 let s = from_latin1(vec![]);
1040 assert!(s.is_empty());
1041 }
1042 }
1043
1044 #[test]
1045 fn test_length() {
1046 let s1 = from_latin1(vec![
1047 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1048 0xAE, 0xAF,
1049 ]);
1050 let s2 = from_latin1(vec![
1051 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1052 0xBE, 0xBF,
1053 ]);
1054 let s3 = from_latin1(vec![
1055 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1056 0xCE, 0xCF,
1057 ]);
1058 let s4 = from_latin1(vec![
1059 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1060 0xDE, 0xDF,
1061 ]);
1062 let s5 = from_latin1(vec![
1063 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1064 0xEE, 0xEF,
1065 ]);
1066 let s6 = from_latin1(vec![
1067 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1068 0xFE, 0xFF,
1069 ]);
1070
1071 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1072 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1073 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1074 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1075 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1076 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1077
1078 assert_eq!(s1.len(), s1_utf8.len());
1079 assert_eq!(s2.len(), s2_utf8.len());
1080 assert_eq!(s3.len(), s3_utf8.len());
1081 assert_eq!(s4.len(), s4_utf8.len());
1082 assert_eq!(s5.len(), s5_utf8.len());
1083 assert_eq!(s6.len(), s6_utf8.len());
1084
1085 s1.ensure_rust_string();
1086 s2.ensure_rust_string();
1087 s3.ensure_rust_string();
1088 s4.ensure_rust_string();
1089 s5.ensure_rust_string();
1090 s6.ensure_rust_string();
1091 assert_eq!(s1.len(), s1_utf8.len());
1092 assert_eq!(s2.len(), s2_utf8.len());
1093 assert_eq!(s3.len(), s3_utf8.len());
1094 assert_eq!(s4.len(), s4_utf8.len());
1095 assert_eq!(s5.len(), s5_utf8.len());
1096 assert_eq!(s6.len(), s6_utf8.len());
1097 }
1098
1099 #[test]
1100 fn test_convert() {
1101 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1102 s.ensure_rust_string();
1103 assert_eq!(&*s.str(), "abc%$");
1104 }
1105
1106 #[test]
1107 fn partial_eq() {
1108 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1109 let string = String::from("abc%$");
1110 let s2 = DOMString::from(string.clone());
1111 assert_eq!(s, s2);
1112 assert_eq!(s, string);
1113 }
1114
1115 #[test]
1116 fn encoded_latin1_bytes() {
1117 let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1118 let dom_string = from_latin1(original_latin1_bytes.clone());
1119 let string_latin1_bytes = match dom_string.encoded_bytes() {
1120 EncodedBytes::Latin1(bytes) => bytes,
1121 _ => unreachable!("Expected Latin1 encoded bytes"),
1122 };
1123 assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1124 }
1125
1126 #[test]
1127 fn testing_stringview() {
1128 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1129
1130 assert_eq!(
1131 s.str().chars().collect::<Vec<char>>(),
1132 vec!['a', 'b', 'c', '%', '$', '²']
1133 );
1134 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1135 }
1136
1137 #[test]
1142 fn test_hash() {
1143 use std::hash::{DefaultHasher, Hash, Hasher};
1144 fn hash_value(d: &DOMString) -> u64 {
1145 let mut hasher = DefaultHasher::new();
1146 d.hash(&mut hasher);
1147 hasher.finish()
1148 }
1149
1150 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1151 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1152 s_converted.ensure_rust_string();
1153 let s2 = DOMString::from("abc%$²");
1154
1155 let hash_s = hash_value(&s);
1156 let hash_s_converted = hash_value(&s_converted);
1157 let hash_s2 = hash_value(&s2);
1158
1159 assert_eq!(hash_s, hash_s2);
1160 assert_eq!(hash_s, hash_s_converted);
1161 }
1162
1163 #[test]
1165 fn test_match_executing() {
1166 {
1168 let s = from_latin1(vec![b'a', b'b', b'c']);
1169 match_domstring_ascii!( s,
1170 "abc" => assert!(true),
1171 "bcd" => assert!(false),
1172 _ => (),
1173 );
1174 }
1175
1176 {
1177 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1178 match_domstring_ascii!( s,
1179 "abc/" => assert!(true),
1180 "bcd" => assert!(false),
1181 _ => (),
1182 );
1183 }
1184
1185 {
1186 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1187 match_domstring_ascii!( s,
1188 "bcd" => assert!(false),
1189 "abc%$" => assert!(true),
1190 _ => (),
1191 );
1192 }
1193
1194 {
1195 let s = DOMString::from("abcde");
1196 match_domstring_ascii!( s,
1197 "abc" => assert!(false),
1198 "bcd" => assert!(false),
1199 _ => assert!(true),
1200 );
1201 }
1202 {
1203 let s = DOMString::from("abc%$");
1204 match_domstring_ascii!( s,
1205 "bcd" => assert!(false),
1206 "abc%$" => assert!(true),
1207 _ => (),
1208 );
1209 }
1210 {
1211 let s = from_latin1(vec![b'a', b'b', b'c']);
1212 match_domstring_ascii!( s,
1213 "abcdd" => assert!(false),
1214 "bcd" => assert!(false),
1215 _ => (),
1216 );
1217 }
1218 }
1219
1220 #[test]
1222 fn test_match_returning_result() {
1223 {
1224 let s = from_latin1(vec![b'a', b'b', b'c']);
1225 let res = match_domstring_ascii!( s,
1226 "abc" => true,
1227 "bcd" => false,
1228 _ => false,
1229 );
1230 assert_eq!(res, true);
1231 }
1232 {
1233 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1234 let res = match_domstring_ascii!( s,
1235 "abc/" => true,
1236 "bcd" => false,
1237 _ => false,
1238 );
1239 assert_eq!(res, true);
1240 }
1241 {
1242 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1243 let res = match_domstring_ascii!( s,
1244 "bcd" => false,
1245 "abc%$" => true,
1246 _ => false,
1247 );
1248 assert_eq!(res, true);
1249 }
1250
1251 {
1252 let s = DOMString::from("abcde");
1253 let res = match_domstring_ascii!( s,
1254 "abc" => false,
1255 "bcd" => false,
1256 _ => true,
1257 );
1258 assert_eq!(res, true);
1259 }
1260 {
1261 let s = DOMString::from("abc%$");
1262 let res = match_domstring_ascii!( s,
1263 "bcd" => false,
1264 "abc%$" => true,
1265 _ => false,
1266 );
1267 assert_eq!(res, true);
1268 }
1269 {
1270 let s = from_latin1(vec![b'a', b'b', b'c']);
1271 let res = match_domstring_ascii!( s,
1272 "abcdd" => false,
1273 "bcd" => false,
1274 _ => true,
1275 );
1276 assert_eq!(res, true);
1277 }
1278 }
1279
1280 #[test]
1281 #[cfg(debug_assertions)]
1282 #[should_panic]
1283 fn test_match_panic() {
1284 let s = DOMString::from("abcd");
1285 let _res = match_domstring_ascii!(s,
1286 "❤" => true,
1287 _ => false,);
1288 }
1289
1290 #[test]
1291 #[cfg(debug_assertions)]
1292 #[should_panic]
1293 fn test_match_panic2() {
1294 let s = DOMString::from("abcd");
1295 let _res = match_domstring_ascii!(s,
1296 "abc" => false,
1297 "❤" => true,
1298 _ => false,
1299 );
1300 }
1301
1302 #[test]
1303 fn test_strip_whitespace() {
1304 {
1305 let mut s = from_latin1(vec![
1306 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1307 ]);
1308
1309 s.strip_leading_and_trailing_ascii_whitespace();
1310 s.ensure_rust_string();
1311 assert_eq!(&*s.str(), "abc%$²");
1312 }
1313 {
1314 let mut s = DOMString::from(" \n abc%$ ");
1315
1316 s.strip_leading_and_trailing_ascii_whitespace();
1317 s.ensure_rust_string();
1318 assert_eq!(&*s.str(), "abc%$");
1319 }
1320 }
1321
1322 #[test]
1324 fn contains_html_space_characters() {
1325 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1327 s.ensure_rust_string();
1328 assert!(s.contains_html_space_characters());
1329
1330 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1332 s.ensure_rust_string();
1333 assert!(s.contains_html_space_characters());
1334
1335 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1337 s.ensure_rust_string();
1338 assert!(s.contains_html_space_characters());
1339
1340 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1342 s.ensure_rust_string();
1343 assert!(s.contains_html_space_characters());
1344
1345 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1347 s.ensure_rust_string();
1348 assert!(s.contains_html_space_characters());
1349
1350 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1351 assert!(!s.contains_html_space_characters());
1352 s.ensure_rust_string();
1353 assert!(!s.contains_html_space_characters());
1354 }
1355
1356 #[test]
1357 fn atom() {
1358 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1359 let atom1 = Atom::from(s);
1360 let s2 = DOMString::from("aaa aa");
1361 let atom2 = Atom::from(s2);
1362 assert_eq!(atom1, atom2);
1363 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1364 let atom3 = Atom::from(s3);
1365 assert_ne!(atom1, atom3);
1366 }
1367
1368 #[test]
1369 fn namespace() {
1370 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1371 let atom1 = Namespace::from(s);
1372 let s2 = DOMString::from("aaa aa");
1373 let atom2 = Namespace::from(s2);
1374 assert_eq!(atom1, atom2);
1375 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1376 let atom3 = Namespace::from(s3);
1377 assert_ne!(atom1, atom3);
1378 }
1379
1380 #[test]
1381 fn localname() {
1382 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1383 let atom1 = LocalName::from(s);
1384 let s2 = DOMString::from("aaa aa");
1385 let atom2 = LocalName::from(s2);
1386 assert_eq!(atom1, atom2);
1387 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1388 let atom3 = LocalName::from(s3);
1389 assert_ne!(atom1, atom3);
1390 }
1391
1392 #[test]
1393 fn is_ascii_lowercase() {
1394 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1395 assert!(!s.is_ascii_lowercase());
1396 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1397 assert!(!s.is_ascii_lowercase());
1398 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1399 assert!(s.is_ascii_lowercase());
1400 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1401 assert!(!s.is_ascii_lowercase());
1402 let s = DOMString::from("`aaaz");
1403 assert!(!s.is_ascii_lowercase());
1404 let s = DOMString::from("aaaz");
1405 assert!(s.is_ascii_lowercase());
1406 }
1407
1408 #[test]
1409 fn test_as_bytes() {
1410 const ASCII_SMALL_A: u8 = b'a';
1411 const ASCII_SMALL_Z: u8 = b'z';
1412
1413 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1414 let s = from_latin1(v1.clone());
1415 assert_eq!(
1416 *s.as_bytes(),
1417 [
1418 ASCII_SMALL_A,
1419 ASCII_SMALL_A,
1420 ASCII_SMALL_A,
1421 UTF8_PILLCROW[0],
1422 UTF8_PILLCROW[1],
1423 ASCII_SMALL_A,
1424 ASCII_SMALL_A
1425 ]
1426 );
1427
1428 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1429 let s = from_latin1(v2.clone());
1430 assert_eq!(
1431 *s.as_bytes(),
1432 [
1433 ASCII_SMALL_A,
1434 ASCII_SMALL_A,
1435 ASCII_SMALL_A,
1436 ASCII_SMALL_A,
1437 ASCII_SMALL_Z
1438 ]
1439 );
1440
1441 let str = "abc%$²".to_owned();
1442 let s = DOMString::from(str.clone());
1443 assert_eq!(&*s.as_bytes(), str.as_bytes());
1444 let str = "AbBcC❤&%$#".to_owned();
1445 let s = DOMString::from(str.clone());
1446 assert_eq!(&*s.as_bytes(), str.as_bytes());
1447 }
1448}