1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::MutableHandleValue;
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42unsafe fn get_latin1_string_bytes(
45 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47 debug_assert!(!rooted_traceable_box.get().is_null());
48 let mut length = 0;
49 unsafe {
50 let chars = JS_GetLatin1StringCharsAndLength(
51 Runtime::get().expect("JS runtime has shut down").as_ptr(),
52 ptr::null(),
53 rooted_traceable_box.get(),
54 &mut length,
55 );
56 assert!(!chars.is_null());
57 slice::from_raw_parts(chars, length)
58 }
59}
60
61#[derive(Debug)]
63pub enum EncodedBytes<'a> {
64 Latin1(Ref<'a, [u8]>),
66 Utf8(Ref<'a, [u8]>),
68}
69
70impl EncodedBytes<'_> {
71 pub fn bytes(&self) -> &[u8] {
74 match self {
75 Self::Latin1(bytes) => bytes,
76 Self::Utf8(bytes) => bytes,
77 }
78 }
79
80 pub fn len(&self) -> usize {
81 match self {
82 Self::Latin1(bytes) => bytes
83 .iter()
84 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
85 .sum(),
86 Self::Utf8(bytes) => bytes.len(),
87 }
88 }
89
90 pub fn is_empty(&self) -> bool {
92 self.bytes().is_empty()
93 }
94}
95
96enum DOMStringType {
97 Rust(String),
99 JSString(RootedTraceableBox<Heap<*mut JSString>>),
101 #[cfg(test)]
102 Latin1Vec(Vec<u8>),
105}
106
107impl Default for DOMStringType {
108 fn default() -> Self {
109 Self::Rust(Default::default())
110 }
111}
112
113impl DOMStringType {
114 fn as_raw_bytes(&self) -> &[u8] {
119 match self {
120 DOMStringType::Rust(s) => s.as_bytes(),
121 DOMStringType::JSString(rooted_traceable_box) => unsafe {
122 get_latin1_string_bytes(rooted_traceable_box)
123 },
124 #[cfg(test)]
125 DOMStringType::Latin1Vec(items) => items,
126 }
127 }
128
129 fn ensure_rust_string(&mut self) -> &mut String {
130 let new_string = match self {
131 DOMStringType::Rust(string) => return string,
132 DOMStringType::JSString(rooted_traceable_box) => unsafe {
133 jsstr_to_string(
134 Runtime::get().expect("JS runtime has shut down").as_ptr(),
135 NonNull::new(rooted_traceable_box.get()).unwrap(),
136 )
137 },
138 #[cfg(test)]
139 DOMStringType::Latin1Vec(items) => {
140 let mut v = vec![0; items.len() * 2];
141 let real_size =
142 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
143 v.truncate(real_size);
144
145 unsafe { String::from_utf8_unchecked(v) }
148 },
149 };
150 *self = DOMStringType::Rust(new_string);
151 self.ensure_rust_string()
152 }
153}
154
155#[derive(Debug)]
158pub struct StringView<'a>(Ref<'a, str>);
159
160impl StringView<'_> {
161 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
162 self.split(HTML_SPACE_CHARACTERS)
163 .filter(|string| !string.is_empty())
164 }
165}
166
167impl From<StringView<'_>> for String {
168 fn from(string_view: StringView<'_>) -> Self {
169 string_view.0.to_string()
170 }
171}
172
173impl Deref for StringView<'_> {
174 type Target = str;
175 fn deref(&self) -> &str {
176 &(self.0)
177 }
178}
179
180impl AsRef<str> for StringView<'_> {
181 fn as_ref(&self) -> &str {
182 &(self.0)
183 }
184}
185
186impl PartialEq for StringView<'_> {
187 fn eq(&self, other: &Self) -> bool {
188 self.0.eq(&*(other.0))
189 }
190}
191
192impl PartialEq<&str> for StringView<'_> {
193 fn eq(&self, other: &&str) -> bool {
194 self.0.eq(*other)
195 }
196}
197
198impl Eq for StringView<'_> {}
199
200impl PartialOrd for StringView<'_> {
201 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
202 self.0.partial_cmp(&**other)
203 }
204}
205
206impl Ord for StringView<'_> {
207 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
208 self.0.cmp(other)
209 }
210}
211
212unsafe impl Trace for DOMStringType {
218 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
219 unsafe {
220 match self {
221 DOMStringType::Rust(_s) => {},
222 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
223 #[cfg(test)]
224 DOMStringType::Latin1Vec(_s) => {},
225 }
226 }
227 }
228}
229
230impl malloc_size_of::MallocSizeOf for DOMStringType {
231 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
232 match self {
233 DOMStringType::Rust(s) => s.size_of(ops),
234 DOMStringType::JSString(_rooted_traceable_box) => {
235 0
237 },
238 #[cfg(test)]
239 DOMStringType::Latin1Vec(s) => s.size_of(ops),
240 }
241 }
242}
243
244impl std::fmt::Debug for DOMStringType {
245 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246 match self {
247 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
248 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
249 #[cfg(test)]
250 DOMStringType::Latin1Vec(s) => f
251 .debug_struct("DOMString")
252 .field("latin1_string", s)
253 .finish(),
254 }
255 }
256}
257
258#[repr(transparent)]
294#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
295pub struct DOMString(RefCell<DOMStringType>);
296
297impl Clone for DOMString {
298 fn clone(&self) -> Self {
299 self.ensure_rust_string().clone().into()
300 }
301}
302
303pub enum DOMStringErrorType {
304 JSConversionError,
305}
306
307impl DOMString {
308 pub fn new() -> DOMString {
310 Default::default()
311 }
312
313 pub fn from_js_string(
316 cx: SafeJSContext,
317 value: js::gc::HandleValue,
318 ) -> Result<DOMString, DOMStringErrorType> {
319 let string_ptr = unsafe { js::rust::ToString(*cx, value) };
320 if string_ptr.is_null() {
321 debug!("ToString failed");
322 Err(DOMStringErrorType::JSConversionError)
323 } else {
324 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
325 let inner = if latin1 {
326 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
327 DOMStringType::JSString(h)
328 } else {
329 DOMStringType::Rust(unsafe {
331 jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
332 })
333 };
334 Ok(DOMString(RefCell::new(inner)))
335 }
336 }
337
338 fn ensure_rust_string(&self) -> RefMut<'_, String> {
341 let inner = self.0.borrow_mut();
342 RefMut::map(inner, |inner| inner.ensure_rust_string())
343 }
344
345 #[expect(unused)]
347 fn debug_js(&self) {
348 match *self.0.borrow() {
349 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
350 DOMStringType::JSString(ref rooted_traceable_box) => {
351 let s = unsafe {
352 jsstr_to_string(
353 Runtime::get().expect("JS runtime has shut down").as_ptr(),
354 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
355 )
356 };
357 info!("JSString ({})", s);
358 },
359 #[cfg(test)]
360 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
361 }
362 }
363
364 pub fn str(&self) -> StringView<'_> {
366 {
367 let inner = self.0.borrow();
368 if matches!(&*inner, DOMStringType::Rust(..)) {
369 return StringView(Ref::map(inner, |inner| match inner {
370 DOMStringType::Rust(string) => string.as_str(),
371 _ => unreachable!("Guaranteed by condition above"),
372 }));
373 }
374 }
375
376 self.ensure_rust_string();
377 self.str()
378 }
379
380 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
383 let inner = self.0.borrow();
384 match &*inner {
385 DOMStringType::Rust(..) => {
386 EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
387 },
388 _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
389 }
390 }
391
392 pub fn clear(&mut self) {
393 let mut inner = self.0.borrow_mut();
394 let DOMStringType::Rust(string) = &mut *inner else {
395 *inner = DOMStringType::Rust(String::new());
396 return;
397 };
398 string.clear();
399 }
400
401 pub fn is_empty(&self) -> bool {
402 self.encoded_bytes().is_empty()
403 }
404
405 pub fn len(&self) -> usize {
410 self.encoded_bytes().len()
411 }
412
413 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
420 Utf8CodeUnitLength(self.len())
421 }
422
423 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
428 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
429 }
430
431 pub fn make_ascii_lowercase(&mut self) {
432 self.0
433 .borrow_mut()
434 .ensure_rust_string()
435 .make_ascii_lowercase();
436 }
437
438 pub fn push_str(&mut self, string_to_push: &str) {
439 self.0
440 .borrow_mut()
441 .ensure_rust_string()
442 .push_str(string_to_push);
443 }
444
445 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
447 if self.is_empty() {
448 return;
449 }
450
451 let mut inner = self.0.borrow_mut();
452 let string = inner.ensure_rust_string();
453 let trailing_whitespace_len = string
454 .trim_end_matches(|character: char| character.is_ascii_whitespace())
455 .len();
456 string.truncate(trailing_whitespace_len);
457 if string.is_empty() {
458 return;
459 }
460
461 let first_non_whitespace = string
462 .find(|character: char| !character.is_ascii_whitespace())
463 .unwrap();
464 string.replace_range(0..first_non_whitespace, "");
465 }
466
467 pub fn is_valid_floating_point_number_string(&self) -> bool {
469 static RE: LazyLock<Regex> = LazyLock::new(|| {
470 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
471 });
472
473 RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
474 self.parse_floating_point_number().is_some()
475 }
476
477 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
478 self.str().parse::<T>()
479 }
480
481 pub fn parse_floating_point_number(&self) -> Option<f64> {
483 parse_floating_point_number(&self.str())
484 }
485
486 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
488 if let Some(val) = self.parse_floating_point_number() {
489 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
491
492 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
493 }
494 }
495
496 pub fn to_lowercase(&self) -> String {
497 self.str().to_lowercase()
498 }
499
500 pub fn to_uppercase(&self) -> String {
501 self.str().to_uppercase()
502 }
503
504 pub fn strip_newlines(&mut self) {
505 self.0
508 .borrow_mut()
509 .ensure_rust_string()
510 .retain(|character| character != '\r' && character != '\n');
511 }
512
513 pub fn normalize_newlines(&mut self) {
515 let mut inner = self.0.borrow_mut();
519 let string = inner.ensure_rust_string();
520 *string = string.replace("\r\n", "\n").replace("\r", "\n")
521 }
522
523 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
524 let new_string = self.str().to_owned();
525 DOMString(RefCell::new(DOMStringType::Rust(
526 new_string.replace(needle, replace_char),
527 )))
528 }
529
530 pub fn starts_with(&self, c: char) -> bool {
532 if !c.is_ascii() {
533 self.str().starts_with(c)
534 } else {
535 self.encoded_bytes().bytes().starts_with(&[c as u8])
538 }
539 }
540
541 pub fn starts_with_str(&self, needle: &str) -> bool {
542 self.str().starts_with(needle)
543 }
544
545 pub fn ends_with_str(&self, needle: &str) -> bool {
546 self.str().ends_with(needle)
547 }
548
549 pub fn contains(&self, needle: &str) -> bool {
550 self.str().contains(needle)
551 }
552
553 pub fn to_ascii_lowercase(&self) -> String {
554 let conversion = match self.encoded_bytes() {
555 EncodedBytes::Latin1(bytes) => {
556 if bytes.iter().all(|c| *c <= ASCII_END) {
557 Some(unsafe {
559 String::from_utf8_unchecked(
560 bytes
561 .iter()
562 .map(|c| {
563 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
564 c + 32
565 } else {
566 *c
567 }
568 })
569 .collect(),
570 )
571 })
572 } else {
573 None
574 }
575 },
576 EncodedBytes::Utf8(bytes) => unsafe {
577 Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
579 },
580 };
581 if let Some(conversion) = conversion {
583 conversion
584 } else {
585 self.str().to_ascii_lowercase()
586 }
587 }
588
589 fn contains_space_characters(
590 &self,
591 latin1_characters: &'static [u8],
592 utf8_characters: &'static [char],
593 ) -> bool {
594 match self.encoded_bytes() {
595 EncodedBytes::Latin1(items) => {
596 latin1_characters.iter().any(|byte| items.contains(byte))
597 },
598 EncodedBytes::Utf8(bytes) => {
599 let s = unsafe { str::from_utf8_unchecked(&bytes) };
601 s.contains(utf8_characters)
602 },
603 }
604 }
605
606 pub fn contains_tab_or_newline(&self) -> bool {
608 const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
609 const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
610
611 self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
612 }
613
614 pub fn contains_html_space_characters(&self) -> bool {
616 const SPACE_BYTES: [u8; 5] = [
617 ASCII_TAB,
618 ASCII_NEWLINE,
619 ASCII_FORMFEED,
620 ASCII_CR,
621 ASCII_SPACE,
622 ];
623 self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
624 }
625
626 pub fn as_bytes(&self) -> BytesView<'_> {
628 if self.is_ascii() {
632 BytesView(self.0.borrow())
633 } else {
634 self.ensure_rust_string();
635 BytesView(self.0.borrow())
636 }
637 }
638
639 pub fn is_ascii_lowercase(&self) -> bool {
641 match self.encoded_bytes() {
642 EncodedBytes::Latin1(items) => items
643 .iter()
644 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
645 EncodedBytes::Utf8(s) => s
646 .iter()
647 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
648 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
649 }
650 }
651
652 pub fn is_ascii(&self) -> bool {
654 self.encoded_bytes().bytes().is_ascii()
655 }
656
657 pub fn is_valid_for_cookie(&self) -> bool {
661 match self.encoded_bytes() {
662 EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
663 .iter()
664 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
665 }
666 }
667
668 fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
671 match self.encoded_bytes() {
672 EncodedBytes::Latin1(latin1_bytes) => {
674 if latin1_bytes.iter().all(|character| character.is_ascii()) {
675 return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
678 }
679 },
680 EncodedBytes::Utf8(utf8_bytes) => {
681 return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
684 },
685 };
686 callback(self.str().deref())
687 }
688}
689
690pub fn parse_floating_point_number(input: &str) -> Option<f64> {
692 input.trim().parse::<f64>().ok().filter(|value| {
698 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
701 })
702}
703
704pub struct BytesView<'a>(Ref<'a, DOMStringType>);
705
706impl Deref for BytesView<'_> {
707 type Target = [u8];
708
709 fn deref(&self) -> &Self::Target {
710 self.0.as_raw_bytes()
712 }
713}
714
715impl Ord for DOMString {
716 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
717 self.str().cmp(&other.str())
718 }
719}
720
721impl PartialOrd for DOMString {
722 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
723 self.str().partial_cmp(&other.str())
724 }
725}
726
727impl Extend<char> for DOMString {
728 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
729 self.0.borrow_mut().ensure_rust_string().extend(iter)
730 }
731}
732
733impl ToJSValConvertible for DOMString {
734 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
735 let val = self.0.borrow();
736 match *val {
737 DOMStringType::Rust(ref s) => unsafe {
738 s.to_jsval(cx, rval);
739 },
740 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
741 rval.set(StringValue(&*rooted_traceable_box.get()));
742 },
743 #[cfg(test)]
744 DOMStringType::Latin1Vec(ref items) => {
745 let mut v = vec![0; items.len() * 2];
746 let real_size =
747 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
748 v.truncate(real_size);
749
750 String::from_utf8(v)
751 .expect("Error in constructin test string")
752 .to_jsval(cx, rval);
753 },
754 };
755 }
756}
757
758impl std::hash::Hash for DOMString {
759 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
760 self.str().hash(state);
761 }
762}
763
764impl std::fmt::Display for DOMString {
765 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
766 fmt::Display::fmt(self.str().deref(), f)
767 }
768}
769
770impl std::cmp::PartialEq<str> for DOMString {
771 fn eq(&self, other: &str) -> bool {
772 if other.is_ascii() {
773 *other.as_bytes() == *self.encoded_bytes().bytes()
774 } else {
775 self.str().deref() == other
776 }
777 }
778}
779
780impl std::cmp::PartialEq<&str> for DOMString {
781 fn eq(&self, other: &&str) -> bool {
782 self.eq(*other)
783 }
784}
785
786impl std::cmp::PartialEq<String> for DOMString {
787 fn eq(&self, other: &String) -> bool {
788 self.eq(other.as_str())
789 }
790}
791
792impl std::cmp::PartialEq<DOMString> for String {
793 fn eq(&self, other: &DOMString) -> bool {
794 other.eq(self)
795 }
796}
797
798impl std::cmp::PartialEq<DOMString> for str {
799 fn eq(&self, other: &DOMString) -> bool {
800 other.eq(self)
801 }
802}
803
804impl std::cmp::PartialEq for DOMString {
805 fn eq(&self, other: &DOMString) -> bool {
806 let result = match (self.encoded_bytes(), other.encoded_bytes()) {
807 (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
808 Some(*bytes == *other_bytes)
809 },
810 (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
811 if other_bytes.is_ascii() =>
812 {
813 Some(*bytes == *other_bytes)
814 },
815 (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
816 Some(*bytes == *other_bytes)
817 },
818 (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
819 Some(*bytes == *other_bytes)
820 },
821 _ => None,
822 };
823
824 if let Some(eq_result) = result {
825 return eq_result;
826 }
827
828 *self.str() == *other.str()
829 }
830}
831
832impl std::cmp::Eq for DOMString {}
833
834impl From<std::string::String> for DOMString {
835 fn from(string: String) -> Self {
836 DOMString(RefCell::new(DOMStringType::Rust(string)))
837 }
838}
839
840impl From<&str> for DOMString {
841 fn from(string: &str) -> Self {
842 String::from(string).into()
843 }
844}
845
846impl From<DOMString> for LocalName {
847 fn from(dom_string: DOMString) -> LocalName {
848 dom_string.with_str_reference(|string| LocalName::from(string))
849 }
850}
851
852impl From<&DOMString> for LocalName {
853 fn from(dom_string: &DOMString) -> LocalName {
854 dom_string.with_str_reference(|string| LocalName::from(string))
855 }
856}
857
858impl From<DOMString> for Namespace {
859 fn from(dom_string: DOMString) -> Namespace {
860 dom_string.with_str_reference(|string| Namespace::from(string))
861 }
862}
863
864impl From<DOMString> for Atom {
865 fn from(dom_string: DOMString) -> Atom {
866 dom_string.with_str_reference(|string| Atom::from(string))
867 }
868}
869
870impl From<DOMString> for String {
871 fn from(val: DOMString) -> Self {
872 val.str().to_owned()
873 }
874}
875
876impl From<DOMString> for Vec<u8> {
877 fn from(value: DOMString) -> Self {
878 value.str().as_bytes().to_vec()
879 }
880}
881
882impl From<Cow<'_, str>> for DOMString {
883 fn from(value: Cow<'_, str>) -> Self {
884 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
885 }
886}
887
888#[macro_export]
889macro_rules! match_domstring_ascii_inner {
890 ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
891 if {
892 debug_assert!(($ascii_literal).is_ascii());
893 $ascii_literal.as_bytes()
894 } == $input.bytes() {
895 $then
896 } else {
897 $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
898 }
899
900 };
901 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
902 match $input {
903 $p => $then
904 }
905 }
906}
907
908#[macro_export]
925macro_rules! match_domstring_ascii {
926 ($input:expr, $($tail:tt)*) => {
927 {
928 use $crate::domstring::EncodedBytes;
929
930 let encoded_bytes = $input.encoded_bytes();
931 match encoded_bytes {
932 EncodedBytes::Latin1(_) => {
933 $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
934 }
935 EncodedBytes::Utf8(_) => {
936 $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
937 }
938
939 }
940 }
941 };
942}
943
944#[cfg(test)]
945mod tests {
946 use super::*;
947
948 const LATIN1_PILLCROW: u8 = 0xB6;
949 const UTF8_PILLCROW: [u8; 2] = [194, 182];
950 const LATIN1_POWER2: u8 = 0xB2;
951
952 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
953 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
954 }
955
956 #[test]
957 fn string_functions() {
958 let s = DOMString::from("AbBcC❤&%$#");
959 let s_copy = s.clone();
960 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
961 assert_eq!(s, s_copy);
962 assert_eq!(s.len(), 12);
963 assert_eq!(s_copy.len(), 12);
964 assert!(s.starts_with('A'));
965 let s2 = DOMString::from("");
966 assert!(s2.is_empty());
967 }
968
969 #[test]
970 fn string_functions_latin1() {
971 {
972 let s = from_latin1(vec![
973 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
974 ]);
975 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
976 }
977 {
978 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
979 assert_eq!(s.to_ascii_lowercase(), "abbcc");
980 }
981 {
982 let s = from_latin1(vec![
983 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
984 ]);
985 assert_eq!(s.len(), 11);
986 assert!(s.starts_with('A'));
987 }
988 {
989 let s = from_latin1(vec![]);
990 assert!(s.is_empty());
991 }
992 }
993
994 #[test]
995 fn test_length() {
996 let s1 = from_latin1(vec![
997 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
998 0xAE, 0xAF,
999 ]);
1000 let s2 = from_latin1(vec![
1001 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1002 0xBE, 0xBF,
1003 ]);
1004 let s3 = from_latin1(vec![
1005 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1006 0xCE, 0xCF,
1007 ]);
1008 let s4 = from_latin1(vec![
1009 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1010 0xDE, 0xDF,
1011 ]);
1012 let s5 = from_latin1(vec![
1013 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1014 0xEE, 0xEF,
1015 ]);
1016 let s6 = from_latin1(vec![
1017 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1018 0xFE, 0xFF,
1019 ]);
1020
1021 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1022 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1023 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1024 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1025 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1026 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1027
1028 assert_eq!(s1.len(), s1_utf8.len());
1029 assert_eq!(s2.len(), s2_utf8.len());
1030 assert_eq!(s3.len(), s3_utf8.len());
1031 assert_eq!(s4.len(), s4_utf8.len());
1032 assert_eq!(s5.len(), s5_utf8.len());
1033 assert_eq!(s6.len(), s6_utf8.len());
1034
1035 s1.ensure_rust_string();
1036 s2.ensure_rust_string();
1037 s3.ensure_rust_string();
1038 s4.ensure_rust_string();
1039 s5.ensure_rust_string();
1040 s6.ensure_rust_string();
1041 assert_eq!(s1.len(), s1_utf8.len());
1042 assert_eq!(s2.len(), s2_utf8.len());
1043 assert_eq!(s3.len(), s3_utf8.len());
1044 assert_eq!(s4.len(), s4_utf8.len());
1045 assert_eq!(s5.len(), s5_utf8.len());
1046 assert_eq!(s6.len(), s6_utf8.len());
1047 }
1048
1049 #[test]
1050 fn test_convert() {
1051 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1052 s.ensure_rust_string();
1053 assert_eq!(&*s.str(), "abc%$");
1054 }
1055
1056 #[test]
1057 fn partial_eq() {
1058 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1059 let string = String::from("abc%$");
1060 let s2 = DOMString::from(string.clone());
1061 assert_eq!(s, s2);
1062 assert_eq!(s, string);
1063 }
1064
1065 #[test]
1066 fn encoded_latin1_bytes() {
1067 let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1068 let dom_string = from_latin1(original_latin1_bytes.clone());
1069 let string_latin1_bytes = match dom_string.encoded_bytes() {
1070 EncodedBytes::Latin1(bytes) => bytes,
1071 _ => unreachable!("Expected Latin1 encoded bytes"),
1072 };
1073 assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1074 }
1075
1076 #[test]
1077 fn testing_stringview() {
1078 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1079
1080 assert_eq!(
1081 s.str().chars().collect::<Vec<char>>(),
1082 vec!['a', 'b', 'c', '%', '$', '²']
1083 );
1084 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1085 }
1086
1087 #[test]
1092 fn test_hash() {
1093 use std::hash::{DefaultHasher, Hash, Hasher};
1094 fn hash_value(d: &DOMString) -> u64 {
1095 let mut hasher = DefaultHasher::new();
1096 d.hash(&mut hasher);
1097 hasher.finish()
1098 }
1099
1100 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1101 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1102 s_converted.ensure_rust_string();
1103 let s2 = DOMString::from("abc%$²");
1104
1105 let hash_s = hash_value(&s);
1106 let hash_s_converted = hash_value(&s_converted);
1107 let hash_s2 = hash_value(&s2);
1108
1109 assert_eq!(hash_s, hash_s2);
1110 assert_eq!(hash_s, hash_s_converted);
1111 }
1112
1113 #[test]
1115 fn test_match_executing() {
1116 {
1118 let s = from_latin1(vec![b'a', b'b', b'c']);
1119 match_domstring_ascii!( s,
1120 "abc" => assert!(true),
1121 "bcd" => assert!(false),
1122 _ => (),
1123 );
1124 }
1125
1126 {
1127 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1128 match_domstring_ascii!( s,
1129 "abc/" => assert!(true),
1130 "bcd" => assert!(false),
1131 _ => (),
1132 );
1133 }
1134
1135 {
1136 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1137 match_domstring_ascii!( s,
1138 "bcd" => assert!(false),
1139 "abc%$" => assert!(true),
1140 _ => (),
1141 );
1142 }
1143
1144 {
1145 let s = DOMString::from("abcde");
1146 match_domstring_ascii!( s,
1147 "abc" => assert!(false),
1148 "bcd" => assert!(false),
1149 _ => assert!(true),
1150 );
1151 }
1152 {
1153 let s = DOMString::from("abc%$");
1154 match_domstring_ascii!( s,
1155 "bcd" => assert!(false),
1156 "abc%$" => assert!(true),
1157 _ => (),
1158 );
1159 }
1160 {
1161 let s = from_latin1(vec![b'a', b'b', b'c']);
1162 match_domstring_ascii!( s,
1163 "abcdd" => assert!(false),
1164 "bcd" => assert!(false),
1165 _ => (),
1166 );
1167 }
1168 }
1169
1170 #[test]
1172 fn test_match_returning_result() {
1173 {
1174 let s = from_latin1(vec![b'a', b'b', b'c']);
1175 let res = match_domstring_ascii!( s,
1176 "abc" => true,
1177 "bcd" => false,
1178 _ => false,
1179 );
1180 assert_eq!(res, true);
1181 }
1182 {
1183 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1184 let res = match_domstring_ascii!( s,
1185 "abc/" => true,
1186 "bcd" => false,
1187 _ => false,
1188 );
1189 assert_eq!(res, true);
1190 }
1191 {
1192 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1193 let res = match_domstring_ascii!( s,
1194 "bcd" => false,
1195 "abc%$" => true,
1196 _ => false,
1197 );
1198 assert_eq!(res, true);
1199 }
1200
1201 {
1202 let s = DOMString::from("abcde");
1203 let res = match_domstring_ascii!( s,
1204 "abc" => false,
1205 "bcd" => false,
1206 _ => true,
1207 );
1208 assert_eq!(res, true);
1209 }
1210 {
1211 let s = DOMString::from("abc%$");
1212 let res = match_domstring_ascii!( s,
1213 "bcd" => false,
1214 "abc%$" => true,
1215 _ => false,
1216 );
1217 assert_eq!(res, true);
1218 }
1219 {
1220 let s = from_latin1(vec![b'a', b'b', b'c']);
1221 let res = match_domstring_ascii!( s,
1222 "abcdd" => false,
1223 "bcd" => false,
1224 _ => true,
1225 );
1226 assert_eq!(res, true);
1227 }
1228 }
1229
1230 #[test]
1231 #[should_panic]
1232 fn test_match_panic() {
1233 let s = DOMString::from("abcd");
1234 let _res = match_domstring_ascii!(s,
1235 "❤" => true,
1236 _ => false,);
1237 }
1238
1239 #[test]
1240 #[should_panic]
1241 fn test_match_panic2() {
1242 let s = DOMString::from("abcd");
1243 let _res = match_domstring_ascii!(s,
1244 "abc" => false,
1245 "❤" => true,
1246 _ => false,
1247 );
1248 }
1249
1250 #[test]
1251 fn test_strip_whitespace() {
1252 {
1253 let mut s = from_latin1(vec![
1254 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1255 ]);
1256
1257 s.strip_leading_and_trailing_ascii_whitespace();
1258 s.ensure_rust_string();
1259 assert_eq!(&*s.str(), "abc%$²");
1260 }
1261 {
1262 let mut s = DOMString::from(" \n abc%$ ");
1263
1264 s.strip_leading_and_trailing_ascii_whitespace();
1265 s.ensure_rust_string();
1266 assert_eq!(&*s.str(), "abc%$");
1267 }
1268 }
1269
1270 #[test]
1272 fn contains_html_space_characters() {
1273 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1275 s.ensure_rust_string();
1276 assert!(s.contains_html_space_characters());
1277
1278 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1280 s.ensure_rust_string();
1281 assert!(s.contains_html_space_characters());
1282
1283 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1285 s.ensure_rust_string();
1286 assert!(s.contains_html_space_characters());
1287
1288 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1290 s.ensure_rust_string();
1291 assert!(s.contains_html_space_characters());
1292
1293 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1295 s.ensure_rust_string();
1296 assert!(s.contains_html_space_characters());
1297
1298 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1299 assert!(!s.contains_html_space_characters());
1300 s.ensure_rust_string();
1301 assert!(!s.contains_html_space_characters());
1302 }
1303
1304 #[test]
1305 fn atom() {
1306 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1307 let atom1 = Atom::from(s);
1308 let s2 = DOMString::from("aaa aa");
1309 let atom2 = Atom::from(s2);
1310 assert_eq!(atom1, atom2);
1311 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1312 let atom3 = Atom::from(s3);
1313 assert_ne!(atom1, atom3);
1314 }
1315
1316 #[test]
1317 fn namespace() {
1318 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1319 let atom1 = Namespace::from(s);
1320 let s2 = DOMString::from("aaa aa");
1321 let atom2 = Namespace::from(s2);
1322 assert_eq!(atom1, atom2);
1323 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1324 let atom3 = Namespace::from(s3);
1325 assert_ne!(atom1, atom3);
1326 }
1327
1328 #[test]
1329 fn localname() {
1330 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1331 let atom1 = LocalName::from(s);
1332 let s2 = DOMString::from("aaa aa");
1333 let atom2 = LocalName::from(s2);
1334 assert_eq!(atom1, atom2);
1335 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1336 let atom3 = LocalName::from(s3);
1337 assert_ne!(atom1, atom3);
1338 }
1339
1340 #[test]
1341 fn is_ascii_lowercase() {
1342 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1343 assert!(!s.is_ascii_lowercase());
1344 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1345 assert!(!s.is_ascii_lowercase());
1346 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1347 assert!(s.is_ascii_lowercase());
1348 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1349 assert!(!s.is_ascii_lowercase());
1350 let s = DOMString::from("`aaaz");
1351 assert!(!s.is_ascii_lowercase());
1352 let s = DOMString::from("aaaz");
1353 assert!(s.is_ascii_lowercase());
1354 }
1355
1356 #[test]
1357 fn test_as_bytes() {
1358 const ASCII_SMALL_A: u8 = b'a';
1359 const ASCII_SMALL_Z: u8 = b'z';
1360
1361 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1362 let s = from_latin1(v1.clone());
1363 assert_eq!(
1364 *s.as_bytes(),
1365 [
1366 ASCII_SMALL_A,
1367 ASCII_SMALL_A,
1368 ASCII_SMALL_A,
1369 UTF8_PILLCROW[0],
1370 UTF8_PILLCROW[1],
1371 ASCII_SMALL_A,
1372 ASCII_SMALL_A
1373 ]
1374 );
1375
1376 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1377 let s = from_latin1(v2.clone());
1378 assert_eq!(
1379 *s.as_bytes(),
1380 [
1381 ASCII_SMALL_A,
1382 ASCII_SMALL_A,
1383 ASCII_SMALL_A,
1384 ASCII_SMALL_A,
1385 ASCII_SMALL_Z
1386 ]
1387 );
1388
1389 let str = "abc%$²".to_owned();
1390 let s = DOMString::from(str.clone());
1391 assert_eq!(&*s.as_bytes(), str.as_bytes());
1392 let str = "AbBcC❤&%$#".to_owned();
1393 let s = DOMString::from(str.clone());
1394 assert_eq!(&*s.as_bytes(), str.as_bytes());
1395 }
1396}