1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::context::{JSContext, RawJSContext};
17use js::conversions::{ToJSValConvertible, jsstr_to_string};
18use js::gc::{HandleValue, MutableHandleValue};
19use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSString};
20use js::jsval::StringValue;
21use js::rust::{Runtime, Trace};
22use malloc_size_of::MallocSizeOfOps;
23use num_traits::{ToPrimitive, Zero};
24use regex::Regex;
25use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
26use style::Atom;
27use style::str::HTML_SPACE_CHARACTERS;
28use zeroize::Zeroize;
29
30use crate::trace::RootedTraceableBox;
31
32const ASCII_END: u8 = 0x7E;
33const ASCII_CAPITAL_A: u8 = 0x41;
34const ASCII_CAPITAL_Z: u8 = 0x5A;
35const ASCII_LOWERCASE_A: u8 = 0x61;
36const ASCII_LOWERCASE_Z: u8 = 0x7A;
37const ASCII_TAB: u8 = 0x09;
38const ASCII_NEWLINE: u8 = 0x0A;
39const ASCII_FORMFEED: u8 = 0x0C;
40const ASCII_CR: u8 = 0x0D;
41const ASCII_SPACE: u8 = 0x20;
42
43unsafe fn get_latin1_string_bytes(
46 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
47) -> &[u8] {
48 debug_assert!(!rooted_traceable_box.get().is_null());
49 let mut length = 0;
50 unsafe {
51 let chars = JS_GetLatin1StringCharsAndLength(
52 Runtime::get().expect("JS runtime has shut down").as_ptr(),
53 ptr::null(),
54 rooted_traceable_box.get(),
55 &mut length,
56 );
57 assert!(!chars.is_null());
58 slice::from_raw_parts(chars, length)
59 }
60}
61
62#[derive(Debug)]
64pub enum EncodedBytes<'a> {
65 Latin1(Ref<'a, [u8]>),
67 Utf8(Ref<'a, [u8]>),
69}
70
71impl EncodedBytes<'_> {
72 pub fn bytes(&self) -> &[u8] {
75 match self {
76 Self::Latin1(bytes) => bytes,
77 Self::Utf8(bytes) => bytes,
78 }
79 }
80
81 pub fn len(&self) -> usize {
82 match self {
83 Self::Latin1(bytes) => bytes
84 .iter()
85 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
86 .sum(),
87 Self::Utf8(bytes) => bytes.len(),
88 }
89 }
90
91 pub fn is_empty(&self) -> bool {
93 self.bytes().is_empty()
94 }
95}
96
97#[derive(Zeroize)]
98enum DOMStringType {
99 Rust(String),
101 #[zeroize(skip)]
103 JSString(RootedTraceableBox<Heap<*mut JSString>>),
104 #[cfg(test)]
105 Latin1Vec(Vec<u8>),
108}
109
110impl Default for DOMStringType {
111 fn default() -> Self {
112 Self::Rust(Default::default())
113 }
114}
115
116impl DOMStringType {
117 fn as_raw_bytes(&self) -> &[u8] {
122 match self {
123 DOMStringType::Rust(s) => s.as_bytes(),
124 DOMStringType::JSString(rooted_traceable_box) => unsafe {
125 get_latin1_string_bytes(rooted_traceable_box)
126 },
127 #[cfg(test)]
128 DOMStringType::Latin1Vec(items) => items,
129 }
130 }
131
132 fn ensure_rust_string(&mut self) -> &mut String {
133 let new_string = match self {
134 DOMStringType::Rust(string) => return string,
135 DOMStringType::JSString(rooted_traceable_box) => {
136 let cx = unsafe { JSContext::get_from_thread() };
137 let cx = cx.as_ref().expect("JS runtime has shut down");
138 unsafe { jsstr_to_string(cx, NonNull::new(rooted_traceable_box.get()).unwrap()) }
139 },
140 #[cfg(test)]
141 DOMStringType::Latin1Vec(items) => {
142 let mut v = vec![0; items.len() * 2];
143 let real_size =
144 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
145 v.truncate(real_size);
146
147 unsafe { String::from_utf8_unchecked(v) }
150 },
151 };
152 *self = DOMStringType::Rust(new_string);
153 self.ensure_rust_string()
154 }
155}
156
157#[derive(Debug)]
160pub struct StringView<'a>(Ref<'a, str>);
161
162impl StringView<'_> {
163 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
164 self.split(HTML_SPACE_CHARACTERS)
165 .filter(|string| !string.is_empty())
166 }
167}
168
169impl From<StringView<'_>> for String {
170 fn from(string_view: StringView<'_>) -> Self {
171 string_view.0.to_string()
172 }
173}
174
175impl Deref for StringView<'_> {
176 type Target = str;
177 fn deref(&self) -> &str {
178 &(self.0)
179 }
180}
181
182impl AsRef<str> for StringView<'_> {
183 fn as_ref(&self) -> &str {
184 &(self.0)
185 }
186}
187
188impl PartialEq for StringView<'_> {
189 fn eq(&self, other: &Self) -> bool {
190 self.0.eq(&*(other.0))
191 }
192}
193
194impl PartialEq<&str> for StringView<'_> {
195 fn eq(&self, other: &&str) -> bool {
196 self.0.eq(*other)
197 }
198}
199
200impl Eq for StringView<'_> {}
201
202impl PartialOrd for StringView<'_> {
203 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
204 self.0.partial_cmp(&**other)
205 }
206}
207
208impl Ord for StringView<'_> {
209 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
210 self.0.cmp(other)
211 }
212}
213
214unsafe impl Trace for DOMStringType {
220 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
221 unsafe {
222 match self {
223 DOMStringType::Rust(_s) => {},
224 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
225 #[cfg(test)]
226 DOMStringType::Latin1Vec(_s) => {},
227 }
228 }
229 }
230}
231
232impl malloc_size_of::MallocSizeOf for DOMStringType {
233 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
234 match self {
235 DOMStringType::Rust(s) => s.size_of(ops),
236 DOMStringType::JSString(_rooted_traceable_box) => {
237 0
239 },
240 #[cfg(test)]
241 DOMStringType::Latin1Vec(s) => s.size_of(ops),
242 }
243 }
244}
245
246impl std::fmt::Debug for DOMStringType {
247 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248 match self {
249 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
250 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
251 #[cfg(test)]
252 DOMStringType::Latin1Vec(s) => f
253 .debug_struct("DOMString")
254 .field("latin1_string", s)
255 .finish(),
256 }
257 }
258}
259
260#[repr(transparent)]
296#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
297pub struct DOMString(RefCell<DOMStringType>);
298
299impl Clone for DOMString {
300 fn clone(&self) -> Self {
301 self.ensure_rust_string().clone().into()
302 }
303}
304
305pub enum DOMStringErrorType {
306 JSConversionError,
307}
308
309impl DOMString {
310 pub fn new() -> DOMString {
312 Default::default()
313 }
314
315 pub fn from_js_string(
318 cx: &mut JSContext,
319 value: HandleValue,
320 ) -> Result<DOMString, DOMStringErrorType> {
321 let string_ptr = unsafe { js::rust::ToString(cx, value) };
322 if string_ptr.is_null() {
323 debug!("ToString failed");
324 Err(DOMStringErrorType::JSConversionError)
325 } else {
326 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
327 let inner = if latin1 {
328 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
329 DOMStringType::JSString(h)
330 } else {
331 DOMStringType::Rust(unsafe {
333 jsstr_to_string(cx, NonNull::new(string_ptr).unwrap())
334 })
335 };
336 Ok(DOMString(RefCell::new(inner)))
337 }
338 }
339
340 fn ensure_rust_string(&self) -> RefMut<'_, String> {
343 let inner = self.0.borrow_mut();
344 RefMut::map(inner, |inner| inner.ensure_rust_string())
345 }
346
347 #[expect(unused)]
349 fn debug_js(&self, cx: &JSContext) {
350 match *self.0.borrow() {
351 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
352 DOMStringType::JSString(ref rooted_traceable_box) => {
353 let s = unsafe {
354 jsstr_to_string(cx, NonNull::new(rooted_traceable_box.get()).unwrap())
355 };
356 info!("JSString ({})", s);
357 },
358 #[cfg(test)]
359 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
360 }
361 }
362
363 pub fn str(&self) -> StringView<'_> {
365 {
366 let inner = self.0.borrow();
367 if matches!(&*inner, DOMStringType::Rust(..)) {
368 return StringView(Ref::map(inner, |inner| match inner {
369 DOMStringType::Rust(string) => string.as_str(),
370 _ => unreachable!("Guaranteed by condition above"),
371 }));
372 }
373 }
374
375 self.ensure_rust_string();
376 self.str()
377 }
378
379 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
382 let inner = self.0.borrow();
383 match &*inner {
384 DOMStringType::Rust(..) => {
385 EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
386 },
387 _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
388 }
389 }
390
391 pub fn clear(&mut self) {
392 let mut inner = self.0.borrow_mut();
393 let DOMStringType::Rust(string) = &mut *inner else {
394 *inner = DOMStringType::Rust(String::new());
395 return;
396 };
397 string.clear();
398 }
399
400 pub fn is_empty(&self) -> bool {
401 self.encoded_bytes().is_empty()
402 }
403
404 pub fn len(&self) -> usize {
409 self.encoded_bytes().len()
410 }
411
412 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
419 Utf8CodeUnitLength(self.len())
420 }
421
422 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
427 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
428 }
429
430 pub fn make_ascii_lowercase(&mut self) {
431 self.0
432 .borrow_mut()
433 .ensure_rust_string()
434 .make_ascii_lowercase();
435 }
436
437 pub fn push_str(&mut self, string_to_push: &str) {
438 self.0
439 .borrow_mut()
440 .ensure_rust_string()
441 .push_str(string_to_push);
442 }
443
444 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
446 if self.is_empty() {
447 return;
448 }
449
450 let mut inner = self.0.borrow_mut();
451 let string = inner.ensure_rust_string();
452 let trailing_whitespace_len = string
453 .trim_end_matches(|character: char| character.is_ascii_whitespace())
454 .len();
455 string.truncate(trailing_whitespace_len);
456 if string.is_empty() {
457 return;
458 }
459
460 let first_non_whitespace = string
461 .find(|character: char| !character.is_ascii_whitespace())
462 .unwrap();
463 string.replace_range(0..first_non_whitespace, "");
464 }
465
466 pub fn is_valid_floating_point_number_string(&self) -> bool {
468 static RE: LazyLock<Regex> = LazyLock::new(|| {
469 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
470 });
471
472 RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
473 self.parse_floating_point_number().is_some()
474 }
475
476 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
477 self.str().parse::<T>()
478 }
479
480 pub fn parse_floating_point_number(&self) -> Option<f64> {
482 parse_floating_point_number(&self.str())
483 }
484
485 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
487 if let Some(val) = self.parse_floating_point_number() {
488 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
490
491 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
492 }
493 }
494
495 pub fn to_lowercase(&self) -> String {
496 self.str().to_lowercase()
497 }
498
499 pub fn to_uppercase(&self) -> String {
500 self.str().to_uppercase()
501 }
502
503 pub fn strip_newlines(&mut self) {
504 self.0
507 .borrow_mut()
508 .ensure_rust_string()
509 .retain(|character| character != '\r' && character != '\n');
510 }
511
512 pub fn normalize_newlines(&mut self) {
514 let mut inner = self.0.borrow_mut();
518 let string = inner.ensure_rust_string();
519 *string = string.replace("\r\n", "\n").replace("\r", "\n")
520 }
521
522 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
523 let new_string = self.str().to_owned();
524 DOMString(RefCell::new(DOMStringType::Rust(
525 new_string.replace(needle, replace_char),
526 )))
527 }
528
529 pub fn starts_with(&self, c: char) -> bool {
531 if !c.is_ascii() {
532 self.str().starts_with(c)
533 } else {
534 self.encoded_bytes().bytes().starts_with(&[c as u8])
537 }
538 }
539
540 pub fn starts_with_str(&self, needle: &str) -> bool {
541 self.str().starts_with(needle)
542 }
543
544 pub fn ends_with_str(&self, needle: &str) -> bool {
545 self.str().ends_with(needle)
546 }
547
548 pub fn contains(&self, needle: &str) -> bool {
549 self.str().contains(needle)
550 }
551
552 pub fn to_ascii_lowercase(&self) -> String {
553 let conversion = match self.encoded_bytes() {
554 EncodedBytes::Latin1(bytes) => {
555 if bytes.iter().all(|c| *c <= ASCII_END) {
556 Some(unsafe {
558 String::from_utf8_unchecked(
559 bytes
560 .iter()
561 .map(|c| {
562 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
563 c + 32
564 } else {
565 *c
566 }
567 })
568 .collect(),
569 )
570 })
571 } else {
572 None
573 }
574 },
575 EncodedBytes::Utf8(bytes) => unsafe {
576 Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
578 },
579 };
580 if let Some(conversion) = conversion {
582 conversion
583 } else {
584 self.str().to_ascii_lowercase()
585 }
586 }
587
588 fn contains_space_characters(
589 &self,
590 latin1_characters: &'static [u8],
591 utf8_characters: &'static [char],
592 ) -> bool {
593 match self.encoded_bytes() {
594 EncodedBytes::Latin1(items) => {
595 latin1_characters.iter().any(|byte| items.contains(byte))
596 },
597 EncodedBytes::Utf8(bytes) => {
598 let s = unsafe { str::from_utf8_unchecked(&bytes) };
600 s.contains(utf8_characters)
601 },
602 }
603 }
604
605 pub fn contains_tab_or_newline(&self) -> bool {
607 const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
608 const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
609
610 self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
611 }
612
613 pub fn contains_html_space_characters(&self) -> bool {
615 const SPACE_BYTES: [u8; 5] = [
616 ASCII_TAB,
617 ASCII_NEWLINE,
618 ASCII_FORMFEED,
619 ASCII_CR,
620 ASCII_SPACE,
621 ];
622 self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
623 }
624
625 pub fn as_bytes(&self) -> BytesView<'_> {
627 if self.is_ascii() {
631 BytesView(self.0.borrow())
632 } else {
633 self.ensure_rust_string();
634 BytesView(self.0.borrow())
635 }
636 }
637
638 pub fn is_ascii_lowercase(&self) -> bool {
640 match self.encoded_bytes() {
641 EncodedBytes::Latin1(items) => items
642 .iter()
643 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
644 EncodedBytes::Utf8(s) => s
645 .iter()
646 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
647 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
648 }
649 }
650
651 pub fn is_ascii(&self) -> bool {
653 self.encoded_bytes().bytes().is_ascii()
654 }
655
656 pub fn is_valid_for_cookie(&self) -> bool {
660 match self.encoded_bytes() {
661 EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
662 .iter()
663 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
664 }
665 }
666
667 fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
670 match self.encoded_bytes() {
671 EncodedBytes::Latin1(latin1_bytes) => {
673 if latin1_bytes.iter().all(|character| character.is_ascii()) {
674 return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
677 }
678 },
679 EncodedBytes::Utf8(utf8_bytes) => {
680 return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
683 },
684 };
685 callback(self.str().deref())
686 }
687
688 pub fn normalize_crlf(&self) -> String {
696 let s = self.str();
697 let mut buf = String::new();
698 let mut prev = ' ';
699 for ch in s.chars() {
700 match ch {
701 '\n' if prev != '\r' => {
702 buf.push('\r');
703 buf.push('\n');
704 },
705 '\n' => {
706 buf.push('\n');
707 },
708 _ if prev == '\r' => {
711 buf.push('\n');
712 buf.push(ch);
713 },
714 _ => buf.push(ch),
715 };
716 prev = ch;
717 }
718 if prev == '\r' {
720 buf.push('\n');
721 }
722 buf
723 }
724}
725
726pub fn parse_floating_point_number(input: &str) -> Option<f64> {
728 input.trim().parse::<f64>().ok().filter(|value| {
734 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
737 })
738}
739
740pub struct BytesView<'a>(Ref<'a, DOMStringType>);
741
742impl Deref for BytesView<'_> {
743 type Target = [u8];
744
745 fn deref(&self) -> &Self::Target {
746 self.0.as_raw_bytes()
748 }
749}
750
751impl Ord for DOMString {
752 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
753 self.str().cmp(&other.str())
754 }
755}
756
757impl PartialOrd for DOMString {
758 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
759 self.str().partial_cmp(&other.str())
760 }
761}
762
763impl Extend<char> for DOMString {
764 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
765 self.0.borrow_mut().ensure_rust_string().extend(iter)
766 }
767}
768
769impl ToJSValConvertible for DOMString {
770 unsafe fn to_jsval(&self, cx: *mut RawJSContext, mut rval: MutableHandleValue) {
771 let val = self.0.borrow();
772 match *val {
773 DOMStringType::Rust(ref s) => unsafe {
774 s.to_jsval(cx, rval);
775 },
776 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
777 rval.set(StringValue(&*rooted_traceable_box.get()));
778 },
779 #[cfg(test)]
780 DOMStringType::Latin1Vec(ref items) => {
781 let mut v = vec![0; items.len() * 2];
782 let real_size =
783 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
784 v.truncate(real_size);
785
786 String::from_utf8(v)
787 .expect("Error in constructin test string")
788 .to_jsval(cx, rval);
789 },
790 };
791 }
792}
793
794impl std::hash::Hash for DOMString {
795 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
796 self.str().hash(state);
797 }
798}
799
800impl std::fmt::Display for DOMString {
801 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
802 fmt::Display::fmt(self.str().deref(), f)
803 }
804}
805
806impl std::cmp::PartialEq<str> for DOMString {
807 fn eq(&self, other: &str) -> bool {
808 if other.is_ascii() {
809 *other.as_bytes() == *self.encoded_bytes().bytes()
810 } else {
811 self.str().deref() == other
812 }
813 }
814}
815
816impl std::cmp::PartialEq<&str> for DOMString {
817 fn eq(&self, other: &&str) -> bool {
818 self.eq(*other)
819 }
820}
821
822impl std::cmp::PartialEq<String> for DOMString {
823 fn eq(&self, other: &String) -> bool {
824 self.eq(other.as_str())
825 }
826}
827
828impl std::cmp::PartialEq<DOMString> for String {
829 fn eq(&self, other: &DOMString) -> bool {
830 other.eq(self)
831 }
832}
833
834impl std::cmp::PartialEq<DOMString> for str {
835 fn eq(&self, other: &DOMString) -> bool {
836 other.eq(self)
837 }
838}
839
840impl std::cmp::PartialEq for DOMString {
841 fn eq(&self, other: &DOMString) -> bool {
842 let result = match (self.encoded_bytes(), other.encoded_bytes()) {
843 (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
844 Some(*bytes == *other_bytes)
845 },
846 (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
847 if other_bytes.is_ascii() =>
848 {
849 Some(*bytes == *other_bytes)
850 },
851 (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
852 Some(*bytes == *other_bytes)
853 },
854 (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
855 Some(*bytes == *other_bytes)
856 },
857 _ => None,
858 };
859
860 if let Some(eq_result) = result {
861 return eq_result;
862 }
863
864 *self.str() == *other.str()
865 }
866}
867
868impl std::cmp::Eq for DOMString {}
869
870impl From<std::string::String> for DOMString {
871 fn from(string: String) -> Self {
872 DOMString(RefCell::new(DOMStringType::Rust(string)))
873 }
874}
875
876impl From<&str> for DOMString {
877 fn from(string: &str) -> Self {
878 String::from(string).into()
879 }
880}
881
882impl From<DOMString> for LocalName {
883 fn from(dom_string: DOMString) -> LocalName {
884 dom_string.with_str_reference(|string| LocalName::from(string))
885 }
886}
887
888impl From<&DOMString> for LocalName {
889 fn from(dom_string: &DOMString) -> LocalName {
890 dom_string.with_str_reference(|string| LocalName::from(string))
891 }
892}
893
894impl From<DOMString> for Namespace {
895 fn from(dom_string: DOMString) -> Namespace {
896 dom_string.with_str_reference(|string| Namespace::from(string))
897 }
898}
899
900impl From<DOMString> for Atom {
901 fn from(dom_string: DOMString) -> Atom {
902 dom_string.with_str_reference(|string| Atom::from(string))
903 }
904}
905
906impl From<DOMString> for String {
907 fn from(val: DOMString) -> Self {
908 val.ensure_rust_string();
909 let inner = val.0.take();
910 match inner {
911 DOMStringType::Rust(s) => s,
912 DOMStringType::JSString(_) => unreachable!(),
913 #[cfg(test)]
914 DOMStringType::Latin1Vec(items) => String::from_utf8(items).expect("Not valid latin1"),
915 }
916 }
917}
918
919impl From<DOMString> for Vec<u8> {
920 fn from(value: DOMString) -> Self {
921 value.ensure_rust_string();
922 let inner = value.0.take();
923 match inner {
924 DOMStringType::Rust(s) => s.into_bytes(),
925 DOMStringType::JSString(_) => unreachable!(),
926 #[cfg(test)]
927 DOMStringType::Latin1Vec(items) => items,
928 }
929 }
930}
931
932impl From<Cow<'_, str>> for DOMString {
933 fn from(value: Cow<'_, str>) -> Self {
934 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
935 }
936}
937
938impl Zeroize for DOMString {
939 fn zeroize(&mut self) {
940 self.0.get_mut().zeroize();
941 }
942}
943
944#[macro_export]
945macro_rules! match_domstring_ascii_inner {
946 ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
947 if {
948 debug_assert!(($ascii_literal).is_ascii());
949 $ascii_literal.as_bytes()
950 } == $input.bytes() {
951 $then
952 } else {
953 $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
954 }
955
956 };
957 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
958 match $input {
959 $p => $then
960 }
961 }
962}
963
964#[macro_export]
981macro_rules! match_domstring_ascii {
982 ($input:expr, $($tail:tt)*) => {
983 {
984 use $crate::domstring::EncodedBytes;
985
986 let encoded_bytes = $input.encoded_bytes();
987 match encoded_bytes {
988 EncodedBytes::Latin1(_) => {
989 $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
990 }
991 EncodedBytes::Utf8(_) => {
992 $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
993 }
994
995 }
996 }
997 };
998}
999
1000#[cfg(test)]
1001mod tests {
1002 use super::*;
1003
1004 const LATIN1_PILLCROW: u8 = 0xB6;
1005 const UTF8_PILLCROW: [u8; 2] = [194, 182];
1006 const LATIN1_POWER2: u8 = 0xB2;
1007
1008 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1009 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1010 }
1011
1012 #[test]
1013 fn string_functions() {
1014 let s = DOMString::from("AbBcC❤&%$#");
1015 let s_copy = s.clone();
1016 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1017 assert_eq!(s, s_copy);
1018 assert_eq!(s.len(), 12);
1019 assert_eq!(s_copy.len(), 12);
1020 assert!(s.starts_with('A'));
1021 let s2 = DOMString::from("");
1022 assert!(s2.is_empty());
1023 }
1024
1025 #[test]
1026 fn string_functions_latin1() {
1027 {
1028 let s = from_latin1(vec![
1029 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1030 ]);
1031 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1032 }
1033 {
1034 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1035 assert_eq!(s.to_ascii_lowercase(), "abbcc");
1036 }
1037 {
1038 let s = from_latin1(vec![
1039 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1040 ]);
1041 assert_eq!(s.len(), 11);
1042 assert!(s.starts_with('A'));
1043 }
1044 {
1045 let s = from_latin1(vec![]);
1046 assert!(s.is_empty());
1047 }
1048 }
1049
1050 #[test]
1051 fn test_length() {
1052 let s1 = from_latin1(vec![
1053 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1054 0xAE, 0xAF,
1055 ]);
1056 let s2 = from_latin1(vec![
1057 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1058 0xBE, 0xBF,
1059 ]);
1060 let s3 = from_latin1(vec![
1061 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1062 0xCE, 0xCF,
1063 ]);
1064 let s4 = from_latin1(vec![
1065 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1066 0xDE, 0xDF,
1067 ]);
1068 let s5 = from_latin1(vec![
1069 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1070 0xEE, 0xEF,
1071 ]);
1072 let s6 = from_latin1(vec![
1073 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1074 0xFE, 0xFF,
1075 ]);
1076
1077 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1078 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1079 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1080 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1081 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1082 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1083
1084 assert_eq!(s1.len(), s1_utf8.len());
1085 assert_eq!(s2.len(), s2_utf8.len());
1086 assert_eq!(s3.len(), s3_utf8.len());
1087 assert_eq!(s4.len(), s4_utf8.len());
1088 assert_eq!(s5.len(), s5_utf8.len());
1089 assert_eq!(s6.len(), s6_utf8.len());
1090
1091 s1.ensure_rust_string();
1092 s2.ensure_rust_string();
1093 s3.ensure_rust_string();
1094 s4.ensure_rust_string();
1095 s5.ensure_rust_string();
1096 s6.ensure_rust_string();
1097 assert_eq!(s1.len(), s1_utf8.len());
1098 assert_eq!(s2.len(), s2_utf8.len());
1099 assert_eq!(s3.len(), s3_utf8.len());
1100 assert_eq!(s4.len(), s4_utf8.len());
1101 assert_eq!(s5.len(), s5_utf8.len());
1102 assert_eq!(s6.len(), s6_utf8.len());
1103 }
1104
1105 #[test]
1106 fn test_convert() {
1107 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1108 s.ensure_rust_string();
1109 assert_eq!(&*s.str(), "abc%$");
1110 }
1111
1112 #[test]
1113 fn partial_eq() {
1114 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1115 let string = String::from("abc%$");
1116 let s2 = DOMString::from(string.clone());
1117 assert_eq!(s, s2);
1118 assert_eq!(s, string);
1119 }
1120
1121 #[test]
1122 fn encoded_latin1_bytes() {
1123 let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1124 let dom_string = from_latin1(original_latin1_bytes.clone());
1125 let string_latin1_bytes = match dom_string.encoded_bytes() {
1126 EncodedBytes::Latin1(bytes) => bytes,
1127 _ => unreachable!("Expected Latin1 encoded bytes"),
1128 };
1129 assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1130 }
1131
1132 #[test]
1133 fn testing_stringview() {
1134 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1135
1136 assert_eq!(
1137 s.str().chars().collect::<Vec<char>>(),
1138 vec!['a', 'b', 'c', '%', '$', '²']
1139 );
1140 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1141 }
1142
1143 #[test]
1148 fn test_hash() {
1149 use std::hash::{DefaultHasher, Hash, Hasher};
1150 fn hash_value(d: &DOMString) -> u64 {
1151 let mut hasher = DefaultHasher::new();
1152 d.hash(&mut hasher);
1153 hasher.finish()
1154 }
1155
1156 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1157 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1158 s_converted.ensure_rust_string();
1159 let s2 = DOMString::from("abc%$²");
1160
1161 let hash_s = hash_value(&s);
1162 let hash_s_converted = hash_value(&s_converted);
1163 let hash_s2 = hash_value(&s2);
1164
1165 assert_eq!(hash_s, hash_s2);
1166 assert_eq!(hash_s, hash_s_converted);
1167 }
1168
1169 #[test]
1171 fn test_match_executing() {
1172 {
1174 let s = from_latin1(vec![b'a', b'b', b'c']);
1175 match_domstring_ascii!( s,
1176 "abc" => assert!(true),
1177 "bcd" => assert!(false),
1178 _ => (),
1179 );
1180 }
1181
1182 {
1183 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1184 match_domstring_ascii!( s,
1185 "abc/" => assert!(true),
1186 "bcd" => assert!(false),
1187 _ => (),
1188 );
1189 }
1190
1191 {
1192 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1193 match_domstring_ascii!( s,
1194 "bcd" => assert!(false),
1195 "abc%$" => assert!(true),
1196 _ => (),
1197 );
1198 }
1199
1200 {
1201 let s = DOMString::from("abcde");
1202 match_domstring_ascii!( s,
1203 "abc" => assert!(false),
1204 "bcd" => assert!(false),
1205 _ => assert!(true),
1206 );
1207 }
1208 {
1209 let s = DOMString::from("abc%$");
1210 match_domstring_ascii!( s,
1211 "bcd" => assert!(false),
1212 "abc%$" => assert!(true),
1213 _ => (),
1214 );
1215 }
1216 {
1217 let s = from_latin1(vec![b'a', b'b', b'c']);
1218 match_domstring_ascii!( s,
1219 "abcdd" => assert!(false),
1220 "bcd" => assert!(false),
1221 _ => (),
1222 );
1223 }
1224 }
1225
1226 #[test]
1228 fn test_match_returning_result() {
1229 {
1230 let s = from_latin1(vec![b'a', b'b', b'c']);
1231 let res = match_domstring_ascii!( s,
1232 "abc" => true,
1233 "bcd" => false,
1234 _ => false,
1235 );
1236 assert_eq!(res, true);
1237 }
1238 {
1239 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1240 let res = match_domstring_ascii!( s,
1241 "abc/" => true,
1242 "bcd" => false,
1243 _ => false,
1244 );
1245 assert_eq!(res, true);
1246 }
1247 {
1248 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1249 let res = match_domstring_ascii!( s,
1250 "bcd" => false,
1251 "abc%$" => true,
1252 _ => false,
1253 );
1254 assert_eq!(res, true);
1255 }
1256
1257 {
1258 let s = DOMString::from("abcde");
1259 let res = match_domstring_ascii!( s,
1260 "abc" => false,
1261 "bcd" => false,
1262 _ => true,
1263 );
1264 assert_eq!(res, true);
1265 }
1266 {
1267 let s = DOMString::from("abc%$");
1268 let res = match_domstring_ascii!( s,
1269 "bcd" => false,
1270 "abc%$" => true,
1271 _ => false,
1272 );
1273 assert_eq!(res, true);
1274 }
1275 {
1276 let s = from_latin1(vec![b'a', b'b', b'c']);
1277 let res = match_domstring_ascii!( s,
1278 "abcdd" => false,
1279 "bcd" => false,
1280 _ => true,
1281 );
1282 assert_eq!(res, true);
1283 }
1284 }
1285
1286 #[test]
1287 #[cfg(debug_assertions)]
1288 #[should_panic]
1289 fn test_match_panic() {
1290 let s = DOMString::from("abcd");
1291 let _res = match_domstring_ascii!(s,
1292 "❤" => true,
1293 _ => false,);
1294 }
1295
1296 #[test]
1297 #[cfg(debug_assertions)]
1298 #[should_panic]
1299 fn test_match_panic2() {
1300 let s = DOMString::from("abcd");
1301 let _res = match_domstring_ascii!(s,
1302 "abc" => false,
1303 "❤" => true,
1304 _ => false,
1305 );
1306 }
1307
1308 #[test]
1309 fn test_strip_whitespace() {
1310 {
1311 let mut s = from_latin1(vec![
1312 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1313 ]);
1314
1315 s.strip_leading_and_trailing_ascii_whitespace();
1316 s.ensure_rust_string();
1317 assert_eq!(&*s.str(), "abc%$²");
1318 }
1319 {
1320 let mut s = DOMString::from(" \n abc%$ ");
1321
1322 s.strip_leading_and_trailing_ascii_whitespace();
1323 s.ensure_rust_string();
1324 assert_eq!(&*s.str(), "abc%$");
1325 }
1326 }
1327
1328 #[test]
1330 fn contains_html_space_characters() {
1331 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1333 s.ensure_rust_string();
1334 assert!(s.contains_html_space_characters());
1335
1336 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1338 s.ensure_rust_string();
1339 assert!(s.contains_html_space_characters());
1340
1341 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1343 s.ensure_rust_string();
1344 assert!(s.contains_html_space_characters());
1345
1346 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1348 s.ensure_rust_string();
1349 assert!(s.contains_html_space_characters());
1350
1351 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1353 s.ensure_rust_string();
1354 assert!(s.contains_html_space_characters());
1355
1356 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1357 assert!(!s.contains_html_space_characters());
1358 s.ensure_rust_string();
1359 assert!(!s.contains_html_space_characters());
1360 }
1361
1362 #[test]
1363 fn atom() {
1364 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1365 let atom1 = Atom::from(s);
1366 let s2 = DOMString::from("aaa aa");
1367 let atom2 = Atom::from(s2);
1368 assert_eq!(atom1, atom2);
1369 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1370 let atom3 = Atom::from(s3);
1371 assert_ne!(atom1, atom3);
1372 }
1373
1374 #[test]
1375 fn namespace() {
1376 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1377 let atom1 = Namespace::from(s);
1378 let s2 = DOMString::from("aaa aa");
1379 let atom2 = Namespace::from(s2);
1380 assert_eq!(atom1, atom2);
1381 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1382 let atom3 = Namespace::from(s3);
1383 assert_ne!(atom1, atom3);
1384 }
1385
1386 #[test]
1387 fn localname() {
1388 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1389 let atom1 = LocalName::from(s);
1390 let s2 = DOMString::from("aaa aa");
1391 let atom2 = LocalName::from(s2);
1392 assert_eq!(atom1, atom2);
1393 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1394 let atom3 = LocalName::from(s3);
1395 assert_ne!(atom1, atom3);
1396 }
1397
1398 #[test]
1399 fn is_ascii_lowercase() {
1400 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1401 assert!(!s.is_ascii_lowercase());
1402 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1403 assert!(!s.is_ascii_lowercase());
1404 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1405 assert!(s.is_ascii_lowercase());
1406 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1407 assert!(!s.is_ascii_lowercase());
1408 let s = DOMString::from("`aaaz");
1409 assert!(!s.is_ascii_lowercase());
1410 let s = DOMString::from("aaaz");
1411 assert!(s.is_ascii_lowercase());
1412 }
1413
1414 #[test]
1415 fn test_as_bytes() {
1416 const ASCII_SMALL_A: u8 = b'a';
1417 const ASCII_SMALL_Z: u8 = b'z';
1418
1419 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1420 let s = from_latin1(v1.clone());
1421 assert_eq!(
1422 *s.as_bytes(),
1423 [
1424 ASCII_SMALL_A,
1425 ASCII_SMALL_A,
1426 ASCII_SMALL_A,
1427 UTF8_PILLCROW[0],
1428 UTF8_PILLCROW[1],
1429 ASCII_SMALL_A,
1430 ASCII_SMALL_A
1431 ]
1432 );
1433
1434 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1435 let s = from_latin1(v2.clone());
1436 assert_eq!(
1437 *s.as_bytes(),
1438 [
1439 ASCII_SMALL_A,
1440 ASCII_SMALL_A,
1441 ASCII_SMALL_A,
1442 ASCII_SMALL_A,
1443 ASCII_SMALL_Z
1444 ]
1445 );
1446
1447 let str = "abc%$²".to_owned();
1448 let s = DOMString::from(str.clone());
1449 assert_eq!(&*s.as_bytes(), str.as_bytes());
1450 let str = "AbBcC❤&%$#".to_owned();
1451 let s = DOMString::from(str.clone());
1452 assert_eq!(&*s.as_bytes(), str.as_bytes());
1453 }
1454}