1#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::{HandleValue, MutableHandleValue};
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27use zeroize::Zeroize;
28
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42unsafe fn get_latin1_string_bytes(
45 rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47 debug_assert!(!rooted_traceable_box.get().is_null());
48 let mut length = 0;
49 unsafe {
50 let chars = JS_GetLatin1StringCharsAndLength(
51 Runtime::get().expect("JS runtime has shut down").as_ptr(),
52 ptr::null(),
53 rooted_traceable_box.get(),
54 &mut length,
55 );
56 assert!(!chars.is_null());
57 slice::from_raw_parts(chars, length)
58 }
59}
60
61#[derive(Debug)]
63pub enum EncodedBytes<'a> {
64 Latin1(Ref<'a, [u8]>),
66 Utf8(Ref<'a, [u8]>),
68}
69
70impl EncodedBytes<'_> {
71 pub fn bytes(&self) -> &[u8] {
74 match self {
75 Self::Latin1(bytes) => bytes,
76 Self::Utf8(bytes) => bytes,
77 }
78 }
79
80 pub fn len(&self) -> usize {
81 match self {
82 Self::Latin1(bytes) => bytes
83 .iter()
84 .map(|b| if *b <= ASCII_END { 1 } else { 2 })
85 .sum(),
86 Self::Utf8(bytes) => bytes.len(),
87 }
88 }
89
90 pub fn is_empty(&self) -> bool {
92 self.bytes().is_empty()
93 }
94}
95
96#[derive(Zeroize)]
97enum DOMStringType {
98 Rust(String),
100 #[zeroize(skip)]
102 JSString(RootedTraceableBox<Heap<*mut JSString>>),
103 #[cfg(test)]
104 Latin1Vec(Vec<u8>),
107}
108
109impl Default for DOMStringType {
110 fn default() -> Self {
111 Self::Rust(Default::default())
112 }
113}
114
115impl DOMStringType {
116 fn as_raw_bytes(&self) -> &[u8] {
121 match self {
122 DOMStringType::Rust(s) => s.as_bytes(),
123 DOMStringType::JSString(rooted_traceable_box) => unsafe {
124 get_latin1_string_bytes(rooted_traceable_box)
125 },
126 #[cfg(test)]
127 DOMStringType::Latin1Vec(items) => items,
128 }
129 }
130
131 fn ensure_rust_string(&mut self) -> &mut String {
132 let new_string = match self {
133 DOMStringType::Rust(string) => return string,
134 DOMStringType::JSString(rooted_traceable_box) => unsafe {
135 jsstr_to_string(
136 Runtime::get().expect("JS runtime has shut down").as_ptr(),
137 NonNull::new(rooted_traceable_box.get()).unwrap(),
138 )
139 },
140 #[cfg(test)]
141 DOMStringType::Latin1Vec(items) => {
142 let mut v = vec![0; items.len() * 2];
143 let real_size =
144 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
145 v.truncate(real_size);
146
147 unsafe { String::from_utf8_unchecked(v) }
150 },
151 };
152 *self = DOMStringType::Rust(new_string);
153 self.ensure_rust_string()
154 }
155}
156
157#[derive(Debug)]
160pub struct StringView<'a>(Ref<'a, str>);
161
162impl StringView<'_> {
163 pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
164 self.split(HTML_SPACE_CHARACTERS)
165 .filter(|string| !string.is_empty())
166 }
167}
168
169impl From<StringView<'_>> for String {
170 fn from(string_view: StringView<'_>) -> Self {
171 string_view.0.to_string()
172 }
173}
174
175impl Deref for StringView<'_> {
176 type Target = str;
177 fn deref(&self) -> &str {
178 &(self.0)
179 }
180}
181
182impl AsRef<str> for StringView<'_> {
183 fn as_ref(&self) -> &str {
184 &(self.0)
185 }
186}
187
188impl PartialEq for StringView<'_> {
189 fn eq(&self, other: &Self) -> bool {
190 self.0.eq(&*(other.0))
191 }
192}
193
194impl PartialEq<&str> for StringView<'_> {
195 fn eq(&self, other: &&str) -> bool {
196 self.0.eq(*other)
197 }
198}
199
200impl Eq for StringView<'_> {}
201
202impl PartialOrd for StringView<'_> {
203 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
204 self.0.partial_cmp(&**other)
205 }
206}
207
208impl Ord for StringView<'_> {
209 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
210 self.0.cmp(other)
211 }
212}
213
214unsafe impl Trace for DOMStringType {
220 unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
221 unsafe {
222 match self {
223 DOMStringType::Rust(_s) => {},
224 DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
225 #[cfg(test)]
226 DOMStringType::Latin1Vec(_s) => {},
227 }
228 }
229 }
230}
231
232impl malloc_size_of::MallocSizeOf for DOMStringType {
233 fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
234 match self {
235 DOMStringType::Rust(s) => s.size_of(ops),
236 DOMStringType::JSString(_rooted_traceable_box) => {
237 0
239 },
240 #[cfg(test)]
241 DOMStringType::Latin1Vec(s) => s.size_of(ops),
242 }
243 }
244}
245
246impl std::fmt::Debug for DOMStringType {
247 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248 match self {
249 DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
250 DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
251 #[cfg(test)]
252 DOMStringType::Latin1Vec(s) => f
253 .debug_struct("DOMString")
254 .field("latin1_string", s)
255 .finish(),
256 }
257 }
258}
259
260#[repr(transparent)]
296#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
297pub struct DOMString(RefCell<DOMStringType>);
298
299impl Clone for DOMString {
300 fn clone(&self) -> Self {
301 self.ensure_rust_string().clone().into()
302 }
303}
304
305pub enum DOMStringErrorType {
306 JSConversionError,
307}
308
309impl DOMString {
310 pub fn new() -> DOMString {
312 Default::default()
313 }
314
315 pub fn from_js_string(
318 cx: &mut js::context::JSContext,
319 value: HandleValue,
320 ) -> Result<DOMString, DOMStringErrorType> {
321 let string_ptr = unsafe { js::rust::ToString(cx.raw_cx(), value) };
322 if string_ptr.is_null() {
323 debug!("ToString failed");
324 Err(DOMStringErrorType::JSConversionError)
325 } else {
326 let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
327 let inner = if latin1 {
328 let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
329 DOMStringType::JSString(h)
330 } else {
331 DOMStringType::Rust(unsafe {
333 jsstr_to_string(cx.raw_cx(), ptr::NonNull::new(string_ptr).unwrap())
334 })
335 };
336 Ok(DOMString(RefCell::new(inner)))
337 }
338 }
339
340 fn ensure_rust_string(&self) -> RefMut<'_, String> {
343 let inner = self.0.borrow_mut();
344 RefMut::map(inner, |inner| inner.ensure_rust_string())
345 }
346
347 #[expect(unused)]
349 fn debug_js(&self) {
350 match *self.0.borrow() {
351 DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
352 DOMStringType::JSString(ref rooted_traceable_box) => {
353 let s = unsafe {
354 jsstr_to_string(
355 Runtime::get().expect("JS runtime has shut down").as_ptr(),
356 ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
357 )
358 };
359 info!("JSString ({})", s);
360 },
361 #[cfg(test)]
362 DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
363 }
364 }
365
366 pub fn str(&self) -> StringView<'_> {
368 {
369 let inner = self.0.borrow();
370 if matches!(&*inner, DOMStringType::Rust(..)) {
371 return StringView(Ref::map(inner, |inner| match inner {
372 DOMStringType::Rust(string) => string.as_str(),
373 _ => unreachable!("Guaranteed by condition above"),
374 }));
375 }
376 }
377
378 self.ensure_rust_string();
379 self.str()
380 }
381
382 pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
385 let inner = self.0.borrow();
386 match &*inner {
387 DOMStringType::Rust(..) => {
388 EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
389 },
390 _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
391 }
392 }
393
394 pub fn clear(&mut self) {
395 let mut inner = self.0.borrow_mut();
396 let DOMStringType::Rust(string) = &mut *inner else {
397 *inner = DOMStringType::Rust(String::new());
398 return;
399 };
400 string.clear();
401 }
402
403 pub fn is_empty(&self) -> bool {
404 self.encoded_bytes().is_empty()
405 }
406
407 pub fn len(&self) -> usize {
412 self.encoded_bytes().len()
413 }
414
415 pub fn len_utf8(&self) -> Utf8CodeUnitLength {
422 Utf8CodeUnitLength(self.len())
423 }
424
425 pub fn len_utf16(&self) -> Utf16CodeUnitLength {
430 Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
431 }
432
433 pub fn make_ascii_lowercase(&mut self) {
434 self.0
435 .borrow_mut()
436 .ensure_rust_string()
437 .make_ascii_lowercase();
438 }
439
440 pub fn push_str(&mut self, string_to_push: &str) {
441 self.0
442 .borrow_mut()
443 .ensure_rust_string()
444 .push_str(string_to_push);
445 }
446
447 pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
449 if self.is_empty() {
450 return;
451 }
452
453 let mut inner = self.0.borrow_mut();
454 let string = inner.ensure_rust_string();
455 let trailing_whitespace_len = string
456 .trim_end_matches(|character: char| character.is_ascii_whitespace())
457 .len();
458 string.truncate(trailing_whitespace_len);
459 if string.is_empty() {
460 return;
461 }
462
463 let first_non_whitespace = string
464 .find(|character: char| !character.is_ascii_whitespace())
465 .unwrap();
466 string.replace_range(0..first_non_whitespace, "");
467 }
468
469 pub fn is_valid_floating_point_number_string(&self) -> bool {
471 static RE: LazyLock<Regex> = LazyLock::new(|| {
472 Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
473 });
474
475 RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
476 self.parse_floating_point_number().is_some()
477 }
478
479 pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
480 self.str().parse::<T>()
481 }
482
483 pub fn parse_floating_point_number(&self) -> Option<f64> {
485 parse_floating_point_number(&self.str())
486 }
487
488 pub fn set_best_representation_of_the_floating_point_number(&mut self) {
490 if let Some(val) = self.parse_floating_point_number() {
491 let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
493
494 *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
495 }
496 }
497
498 pub fn to_lowercase(&self) -> String {
499 self.str().to_lowercase()
500 }
501
502 pub fn to_uppercase(&self) -> String {
503 self.str().to_uppercase()
504 }
505
506 pub fn strip_newlines(&mut self) {
507 self.0
510 .borrow_mut()
511 .ensure_rust_string()
512 .retain(|character| character != '\r' && character != '\n');
513 }
514
515 pub fn normalize_newlines(&mut self) {
517 let mut inner = self.0.borrow_mut();
521 let string = inner.ensure_rust_string();
522 *string = string.replace("\r\n", "\n").replace("\r", "\n")
523 }
524
525 pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
526 let new_string = self.str().to_owned();
527 DOMString(RefCell::new(DOMStringType::Rust(
528 new_string.replace(needle, replace_char),
529 )))
530 }
531
532 pub fn starts_with(&self, c: char) -> bool {
534 if !c.is_ascii() {
535 self.str().starts_with(c)
536 } else {
537 self.encoded_bytes().bytes().starts_with(&[c as u8])
540 }
541 }
542
543 pub fn starts_with_str(&self, needle: &str) -> bool {
544 self.str().starts_with(needle)
545 }
546
547 pub fn ends_with_str(&self, needle: &str) -> bool {
548 self.str().ends_with(needle)
549 }
550
551 pub fn contains(&self, needle: &str) -> bool {
552 self.str().contains(needle)
553 }
554
555 pub fn to_ascii_lowercase(&self) -> String {
556 let conversion = match self.encoded_bytes() {
557 EncodedBytes::Latin1(bytes) => {
558 if bytes.iter().all(|c| *c <= ASCII_END) {
559 Some(unsafe {
561 String::from_utf8_unchecked(
562 bytes
563 .iter()
564 .map(|c| {
565 if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
566 c + 32
567 } else {
568 *c
569 }
570 })
571 .collect(),
572 )
573 })
574 } else {
575 None
576 }
577 },
578 EncodedBytes::Utf8(bytes) => unsafe {
579 Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
581 },
582 };
583 if let Some(conversion) = conversion {
585 conversion
586 } else {
587 self.str().to_ascii_lowercase()
588 }
589 }
590
591 fn contains_space_characters(
592 &self,
593 latin1_characters: &'static [u8],
594 utf8_characters: &'static [char],
595 ) -> bool {
596 match self.encoded_bytes() {
597 EncodedBytes::Latin1(items) => {
598 latin1_characters.iter().any(|byte| items.contains(byte))
599 },
600 EncodedBytes::Utf8(bytes) => {
601 let s = unsafe { str::from_utf8_unchecked(&bytes) };
603 s.contains(utf8_characters)
604 },
605 }
606 }
607
608 pub fn contains_tab_or_newline(&self) -> bool {
610 const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
611 const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
612
613 self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
614 }
615
616 pub fn contains_html_space_characters(&self) -> bool {
618 const SPACE_BYTES: [u8; 5] = [
619 ASCII_TAB,
620 ASCII_NEWLINE,
621 ASCII_FORMFEED,
622 ASCII_CR,
623 ASCII_SPACE,
624 ];
625 self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
626 }
627
628 pub fn as_bytes(&self) -> BytesView<'_> {
630 if self.is_ascii() {
634 BytesView(self.0.borrow())
635 } else {
636 self.ensure_rust_string();
637 BytesView(self.0.borrow())
638 }
639 }
640
641 pub fn is_ascii_lowercase(&self) -> bool {
643 match self.encoded_bytes() {
644 EncodedBytes::Latin1(items) => items
645 .iter()
646 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
647 EncodedBytes::Utf8(s) => s
648 .iter()
649 .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
650 .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
651 }
652 }
653
654 pub fn is_ascii(&self) -> bool {
656 self.encoded_bytes().bytes().is_ascii()
657 }
658
659 pub fn is_valid_for_cookie(&self) -> bool {
663 match self.encoded_bytes() {
664 EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
665 .iter()
666 .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
667 }
668 }
669
670 fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
673 match self.encoded_bytes() {
674 EncodedBytes::Latin1(latin1_bytes) => {
676 if latin1_bytes.iter().all(|character| character.is_ascii()) {
677 return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
680 }
681 },
682 EncodedBytes::Utf8(utf8_bytes) => {
683 return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
686 },
687 };
688 callback(self.str().deref())
689 }
690
691 pub fn normalize_crlf(&self) -> String {
699 let s = self.str();
700 let mut buf = String::new();
701 let mut prev = ' ';
702 for ch in s.chars() {
703 match ch {
704 '\n' if prev != '\r' => {
705 buf.push('\r');
706 buf.push('\n');
707 },
708 '\n' => {
709 buf.push('\n');
710 },
711 _ if prev == '\r' => {
714 buf.push('\n');
715 buf.push(ch);
716 },
717 _ => buf.push(ch),
718 };
719 prev = ch;
720 }
721 if prev == '\r' {
723 buf.push('\n');
724 }
725 buf
726 }
727}
728
729pub fn parse_floating_point_number(input: &str) -> Option<f64> {
731 input.trim().parse::<f64>().ok().filter(|value| {
737 !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
740 })
741}
742
743pub struct BytesView<'a>(Ref<'a, DOMStringType>);
744
745impl Deref for BytesView<'_> {
746 type Target = [u8];
747
748 fn deref(&self) -> &Self::Target {
749 self.0.as_raw_bytes()
751 }
752}
753
754impl Ord for DOMString {
755 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
756 self.str().cmp(&other.str())
757 }
758}
759
760impl PartialOrd for DOMString {
761 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
762 self.str().partial_cmp(&other.str())
763 }
764}
765
766impl Extend<char> for DOMString {
767 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
768 self.0.borrow_mut().ensure_rust_string().extend(iter)
769 }
770}
771
772impl ToJSValConvertible for DOMString {
773 unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
774 let val = self.0.borrow();
775 match *val {
776 DOMStringType::Rust(ref s) => unsafe {
777 s.to_jsval(cx, rval);
778 },
779 DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
780 rval.set(StringValue(&*rooted_traceable_box.get()));
781 },
782 #[cfg(test)]
783 DOMStringType::Latin1Vec(ref items) => {
784 let mut v = vec![0; items.len() * 2];
785 let real_size =
786 encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
787 v.truncate(real_size);
788
789 String::from_utf8(v)
790 .expect("Error in constructin test string")
791 .to_jsval(cx, rval);
792 },
793 };
794 }
795}
796
797impl std::hash::Hash for DOMString {
798 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
799 self.str().hash(state);
800 }
801}
802
803impl std::fmt::Display for DOMString {
804 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
805 fmt::Display::fmt(self.str().deref(), f)
806 }
807}
808
809impl std::cmp::PartialEq<str> for DOMString {
810 fn eq(&self, other: &str) -> bool {
811 if other.is_ascii() {
812 *other.as_bytes() == *self.encoded_bytes().bytes()
813 } else {
814 self.str().deref() == other
815 }
816 }
817}
818
819impl std::cmp::PartialEq<&str> for DOMString {
820 fn eq(&self, other: &&str) -> bool {
821 self.eq(*other)
822 }
823}
824
825impl std::cmp::PartialEq<String> for DOMString {
826 fn eq(&self, other: &String) -> bool {
827 self.eq(other.as_str())
828 }
829}
830
831impl std::cmp::PartialEq<DOMString> for String {
832 fn eq(&self, other: &DOMString) -> bool {
833 other.eq(self)
834 }
835}
836
837impl std::cmp::PartialEq<DOMString> for str {
838 fn eq(&self, other: &DOMString) -> bool {
839 other.eq(self)
840 }
841}
842
843impl std::cmp::PartialEq for DOMString {
844 fn eq(&self, other: &DOMString) -> bool {
845 let result = match (self.encoded_bytes(), other.encoded_bytes()) {
846 (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
847 Some(*bytes == *other_bytes)
848 },
849 (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
850 if other_bytes.is_ascii() =>
851 {
852 Some(*bytes == *other_bytes)
853 },
854 (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
855 Some(*bytes == *other_bytes)
856 },
857 (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
858 Some(*bytes == *other_bytes)
859 },
860 _ => None,
861 };
862
863 if let Some(eq_result) = result {
864 return eq_result;
865 }
866
867 *self.str() == *other.str()
868 }
869}
870
871impl std::cmp::Eq for DOMString {}
872
873impl From<std::string::String> for DOMString {
874 fn from(string: String) -> Self {
875 DOMString(RefCell::new(DOMStringType::Rust(string)))
876 }
877}
878
879impl From<&str> for DOMString {
880 fn from(string: &str) -> Self {
881 String::from(string).into()
882 }
883}
884
885impl From<DOMString> for LocalName {
886 fn from(dom_string: DOMString) -> LocalName {
887 dom_string.with_str_reference(|string| LocalName::from(string))
888 }
889}
890
891impl From<&DOMString> for LocalName {
892 fn from(dom_string: &DOMString) -> LocalName {
893 dom_string.with_str_reference(|string| LocalName::from(string))
894 }
895}
896
897impl From<DOMString> for Namespace {
898 fn from(dom_string: DOMString) -> Namespace {
899 dom_string.with_str_reference(|string| Namespace::from(string))
900 }
901}
902
903impl From<DOMString> for Atom {
904 fn from(dom_string: DOMString) -> Atom {
905 dom_string.with_str_reference(|string| Atom::from(string))
906 }
907}
908
909impl From<DOMString> for String {
910 fn from(val: DOMString) -> Self {
911 val.ensure_rust_string();
912 let inner = val.0.take();
913 match inner {
914 DOMStringType::Rust(s) => s,
915 DOMStringType::JSString(_) => unreachable!(),
916 #[cfg(test)]
917 DOMStringType::Latin1Vec(items) => String::from_utf8(items).expect("Not valid latin1"),
918 }
919 }
920}
921
922impl From<DOMString> for Vec<u8> {
923 fn from(value: DOMString) -> Self {
924 value.ensure_rust_string();
925 let inner = value.0.take();
926 match inner {
927 DOMStringType::Rust(s) => s.into_bytes(),
928 DOMStringType::JSString(_) => unreachable!(),
929 #[cfg(test)]
930 DOMStringType::Latin1Vec(items) => items,
931 }
932 }
933}
934
935impl From<Cow<'_, str>> for DOMString {
936 fn from(value: Cow<'_, str>) -> Self {
937 DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
938 }
939}
940
941impl Zeroize for DOMString {
942 fn zeroize(&mut self) {
943 self.0.get_mut().zeroize();
944 }
945}
946
947#[macro_export]
948macro_rules! match_domstring_ascii_inner {
949 ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
950 if {
951 debug_assert!(($ascii_literal).is_ascii());
952 $ascii_literal.as_bytes()
953 } == $input.bytes() {
954 $then
955 } else {
956 $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
957 }
958
959 };
960 ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
961 match $input {
962 $p => $then
963 }
964 }
965}
966
967#[macro_export]
984macro_rules! match_domstring_ascii {
985 ($input:expr, $($tail:tt)*) => {
986 {
987 use $crate::domstring::EncodedBytes;
988
989 let encoded_bytes = $input.encoded_bytes();
990 match encoded_bytes {
991 EncodedBytes::Latin1(_) => {
992 $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
993 }
994 EncodedBytes::Utf8(_) => {
995 $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
996 }
997
998 }
999 }
1000 };
1001}
1002
1003#[cfg(test)]
1004mod tests {
1005 use super::*;
1006
1007 const LATIN1_PILLCROW: u8 = 0xB6;
1008 const UTF8_PILLCROW: [u8; 2] = [194, 182];
1009 const LATIN1_POWER2: u8 = 0xB2;
1010
1011 fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1012 DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1013 }
1014
1015 #[test]
1016 fn string_functions() {
1017 let s = DOMString::from("AbBcC❤&%$#");
1018 let s_copy = s.clone();
1019 assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1020 assert_eq!(s, s_copy);
1021 assert_eq!(s.len(), 12);
1022 assert_eq!(s_copy.len(), 12);
1023 assert!(s.starts_with('A'));
1024 let s2 = DOMString::from("");
1025 assert!(s2.is_empty());
1026 }
1027
1028 #[test]
1029 fn string_functions_latin1() {
1030 {
1031 let s = from_latin1(vec![
1032 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1033 ]);
1034 assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1035 }
1036 {
1037 let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1038 assert_eq!(s.to_ascii_lowercase(), "abbcc");
1039 }
1040 {
1041 let s = from_latin1(vec![
1042 b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1043 ]);
1044 assert_eq!(s.len(), 11);
1045 assert!(s.starts_with('A'));
1046 }
1047 {
1048 let s = from_latin1(vec![]);
1049 assert!(s.is_empty());
1050 }
1051 }
1052
1053 #[test]
1054 fn test_length() {
1055 let s1 = from_latin1(vec![
1056 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1057 0xAE, 0xAF,
1058 ]);
1059 let s2 = from_latin1(vec![
1060 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1061 0xBE, 0xBF,
1062 ]);
1063 let s3 = from_latin1(vec![
1064 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1065 0xCE, 0xCF,
1066 ]);
1067 let s4 = from_latin1(vec![
1068 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1069 0xDE, 0xDF,
1070 ]);
1071 let s5 = from_latin1(vec![
1072 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1073 0xEE, 0xEF,
1074 ]);
1075 let s6 = from_latin1(vec![
1076 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1077 0xFE, 0xFF,
1078 ]);
1079
1080 let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1081 let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1082 let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1083 let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1084 let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1085 let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1086
1087 assert_eq!(s1.len(), s1_utf8.len());
1088 assert_eq!(s2.len(), s2_utf8.len());
1089 assert_eq!(s3.len(), s3_utf8.len());
1090 assert_eq!(s4.len(), s4_utf8.len());
1091 assert_eq!(s5.len(), s5_utf8.len());
1092 assert_eq!(s6.len(), s6_utf8.len());
1093
1094 s1.ensure_rust_string();
1095 s2.ensure_rust_string();
1096 s3.ensure_rust_string();
1097 s4.ensure_rust_string();
1098 s5.ensure_rust_string();
1099 s6.ensure_rust_string();
1100 assert_eq!(s1.len(), s1_utf8.len());
1101 assert_eq!(s2.len(), s2_utf8.len());
1102 assert_eq!(s3.len(), s3_utf8.len());
1103 assert_eq!(s4.len(), s4_utf8.len());
1104 assert_eq!(s5.len(), s5_utf8.len());
1105 assert_eq!(s6.len(), s6_utf8.len());
1106 }
1107
1108 #[test]
1109 fn test_convert() {
1110 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1111 s.ensure_rust_string();
1112 assert_eq!(&*s.str(), "abc%$");
1113 }
1114
1115 #[test]
1116 fn partial_eq() {
1117 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1118 let string = String::from("abc%$");
1119 let s2 = DOMString::from(string.clone());
1120 assert_eq!(s, s2);
1121 assert_eq!(s, string);
1122 }
1123
1124 #[test]
1125 fn encoded_latin1_bytes() {
1126 let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1127 let dom_string = from_latin1(original_latin1_bytes.clone());
1128 let string_latin1_bytes = match dom_string.encoded_bytes() {
1129 EncodedBytes::Latin1(bytes) => bytes,
1130 _ => unreachable!("Expected Latin1 encoded bytes"),
1131 };
1132 assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1133 }
1134
1135 #[test]
1136 fn testing_stringview() {
1137 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1138
1139 assert_eq!(
1140 s.str().chars().collect::<Vec<char>>(),
1141 vec!['a', 'b', 'c', '%', '$', '²']
1142 );
1143 assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1144 }
1145
1146 #[test]
1151 fn test_hash() {
1152 use std::hash::{DefaultHasher, Hash, Hasher};
1153 fn hash_value(d: &DOMString) -> u64 {
1154 let mut hasher = DefaultHasher::new();
1155 d.hash(&mut hasher);
1156 hasher.finish()
1157 }
1158
1159 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1160 let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1161 s_converted.ensure_rust_string();
1162 let s2 = DOMString::from("abc%$²");
1163
1164 let hash_s = hash_value(&s);
1165 let hash_s_converted = hash_value(&s_converted);
1166 let hash_s2 = hash_value(&s2);
1167
1168 assert_eq!(hash_s, hash_s2);
1169 assert_eq!(hash_s, hash_s_converted);
1170 }
1171
1172 #[test]
1174 fn test_match_executing() {
1175 {
1177 let s = from_latin1(vec![b'a', b'b', b'c']);
1178 match_domstring_ascii!( s,
1179 "abc" => assert!(true),
1180 "bcd" => assert!(false),
1181 _ => (),
1182 );
1183 }
1184
1185 {
1186 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1187 match_domstring_ascii!( s,
1188 "abc/" => assert!(true),
1189 "bcd" => assert!(false),
1190 _ => (),
1191 );
1192 }
1193
1194 {
1195 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1196 match_domstring_ascii!( s,
1197 "bcd" => assert!(false),
1198 "abc%$" => assert!(true),
1199 _ => (),
1200 );
1201 }
1202
1203 {
1204 let s = DOMString::from("abcde");
1205 match_domstring_ascii!( s,
1206 "abc" => assert!(false),
1207 "bcd" => assert!(false),
1208 _ => assert!(true),
1209 );
1210 }
1211 {
1212 let s = DOMString::from("abc%$");
1213 match_domstring_ascii!( s,
1214 "bcd" => assert!(false),
1215 "abc%$" => assert!(true),
1216 _ => (),
1217 );
1218 }
1219 {
1220 let s = from_latin1(vec![b'a', b'b', b'c']);
1221 match_domstring_ascii!( s,
1222 "abcdd" => assert!(false),
1223 "bcd" => assert!(false),
1224 _ => (),
1225 );
1226 }
1227 }
1228
1229 #[test]
1231 fn test_match_returning_result() {
1232 {
1233 let s = from_latin1(vec![b'a', b'b', b'c']);
1234 let res = match_domstring_ascii!( s,
1235 "abc" => true,
1236 "bcd" => false,
1237 _ => false,
1238 );
1239 assert_eq!(res, true);
1240 }
1241 {
1242 let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1243 let res = match_domstring_ascii!( s,
1244 "abc/" => true,
1245 "bcd" => false,
1246 _ => false,
1247 );
1248 assert_eq!(res, true);
1249 }
1250 {
1251 let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1252 let res = match_domstring_ascii!( s,
1253 "bcd" => false,
1254 "abc%$" => true,
1255 _ => false,
1256 );
1257 assert_eq!(res, true);
1258 }
1259
1260 {
1261 let s = DOMString::from("abcde");
1262 let res = match_domstring_ascii!( s,
1263 "abc" => false,
1264 "bcd" => false,
1265 _ => true,
1266 );
1267 assert_eq!(res, true);
1268 }
1269 {
1270 let s = DOMString::from("abc%$");
1271 let res = match_domstring_ascii!( s,
1272 "bcd" => false,
1273 "abc%$" => true,
1274 _ => false,
1275 );
1276 assert_eq!(res, true);
1277 }
1278 {
1279 let s = from_latin1(vec![b'a', b'b', b'c']);
1280 let res = match_domstring_ascii!( s,
1281 "abcdd" => false,
1282 "bcd" => false,
1283 _ => true,
1284 );
1285 assert_eq!(res, true);
1286 }
1287 }
1288
1289 #[test]
1290 #[cfg(debug_assertions)]
1291 #[should_panic]
1292 fn test_match_panic() {
1293 let s = DOMString::from("abcd");
1294 let _res = match_domstring_ascii!(s,
1295 "❤" => true,
1296 _ => false,);
1297 }
1298
1299 #[test]
1300 #[cfg(debug_assertions)]
1301 #[should_panic]
1302 fn test_match_panic2() {
1303 let s = DOMString::from("abcd");
1304 let _res = match_domstring_ascii!(s,
1305 "abc" => false,
1306 "❤" => true,
1307 _ => false,
1308 );
1309 }
1310
1311 #[test]
1312 fn test_strip_whitespace() {
1313 {
1314 let mut s = from_latin1(vec![
1315 b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1316 ]);
1317
1318 s.strip_leading_and_trailing_ascii_whitespace();
1319 s.ensure_rust_string();
1320 assert_eq!(&*s.str(), "abc%$²");
1321 }
1322 {
1323 let mut s = DOMString::from(" \n abc%$ ");
1324
1325 s.strip_leading_and_trailing_ascii_whitespace();
1326 s.ensure_rust_string();
1327 assert_eq!(&*s.str(), "abc%$");
1328 }
1329 }
1330
1331 #[test]
1333 fn contains_html_space_characters() {
1334 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); assert!(s.contains_html_space_characters());
1336 s.ensure_rust_string();
1337 assert!(s.contains_html_space_characters());
1338
1339 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); assert!(s.contains_html_space_characters());
1341 s.ensure_rust_string();
1342 assert!(s.contains_html_space_characters());
1343
1344 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); assert!(s.contains_html_space_characters());
1346 s.ensure_rust_string();
1347 assert!(s.contains_html_space_characters());
1348
1349 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); assert!(s.contains_html_space_characters());
1351 s.ensure_rust_string();
1352 assert!(s.contains_html_space_characters());
1353
1354 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); assert!(s.contains_html_space_characters());
1356 s.ensure_rust_string();
1357 assert!(s.contains_html_space_characters());
1358
1359 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1360 assert!(!s.contains_html_space_characters());
1361 s.ensure_rust_string();
1362 assert!(!s.contains_html_space_characters());
1363 }
1364
1365 #[test]
1366 fn atom() {
1367 let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1368 let atom1 = Atom::from(s);
1369 let s2 = DOMString::from("aaa aa");
1370 let atom2 = Atom::from(s2);
1371 assert_eq!(atom1, atom2);
1372 let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1373 let atom3 = Atom::from(s3);
1374 assert_ne!(atom1, atom3);
1375 }
1376
1377 #[test]
1378 fn namespace() {
1379 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1380 let atom1 = Namespace::from(s);
1381 let s2 = DOMString::from("aaa aa");
1382 let atom2 = Namespace::from(s2);
1383 assert_eq!(atom1, atom2);
1384 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1385 let atom3 = Namespace::from(s3);
1386 assert_ne!(atom1, atom3);
1387 }
1388
1389 #[test]
1390 fn localname() {
1391 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1392 let atom1 = LocalName::from(s);
1393 let s2 = DOMString::from("aaa aa");
1394 let atom2 = LocalName::from(s2);
1395 assert_eq!(atom1, atom2);
1396 let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1397 let atom3 = LocalName::from(s3);
1398 assert_ne!(atom1, atom3);
1399 }
1400
1401 #[test]
1402 fn is_ascii_lowercase() {
1403 let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1404 assert!(!s.is_ascii_lowercase());
1405 let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1406 assert!(!s.is_ascii_lowercase());
1407 let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1408 assert!(s.is_ascii_lowercase());
1409 let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1410 assert!(!s.is_ascii_lowercase());
1411 let s = DOMString::from("`aaaz");
1412 assert!(!s.is_ascii_lowercase());
1413 let s = DOMString::from("aaaz");
1414 assert!(s.is_ascii_lowercase());
1415 }
1416
1417 #[test]
1418 fn test_as_bytes() {
1419 const ASCII_SMALL_A: u8 = b'a';
1420 const ASCII_SMALL_Z: u8 = b'z';
1421
1422 let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1423 let s = from_latin1(v1.clone());
1424 assert_eq!(
1425 *s.as_bytes(),
1426 [
1427 ASCII_SMALL_A,
1428 ASCII_SMALL_A,
1429 ASCII_SMALL_A,
1430 UTF8_PILLCROW[0],
1431 UTF8_PILLCROW[1],
1432 ASCII_SMALL_A,
1433 ASCII_SMALL_A
1434 ]
1435 );
1436
1437 let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1438 let s = from_latin1(v2.clone());
1439 assert_eq!(
1440 *s.as_bytes(),
1441 [
1442 ASCII_SMALL_A,
1443 ASCII_SMALL_A,
1444 ASCII_SMALL_A,
1445 ASCII_SMALL_A,
1446 ASCII_SMALL_Z
1447 ]
1448 );
1449
1450 let str = "abc%$²".to_owned();
1451 let s = DOMString::from(str.clone());
1452 assert_eq!(&*s.as_bytes(), str.as_bytes());
1453 let str = "AbBcC❤&%$#".to_owned();
1454 let s = DOMString::from(str.clone());
1455 assert_eq!(&*s.as_bytes(), str.as_bytes());
1456 }
1457}