script_bindings/
domstring.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::{HandleValue, MutableHandleValue};
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27use zeroize::Zeroize;
28
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42/// Gets the latin1 bytes from the js engine.
43/// Safety: Make sure the *mut JSString is not null.
44unsafe fn get_latin1_string_bytes(
45    rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47    debug_assert!(!rooted_traceable_box.get().is_null());
48    let mut length = 0;
49    unsafe {
50        let chars = JS_GetLatin1StringCharsAndLength(
51            Runtime::get().expect("JS runtime has shut down").as_ptr(),
52            ptr::null(),
53            rooted_traceable_box.get(),
54            &mut length,
55        );
56        assert!(!chars.is_null());
57        slice::from_raw_parts(chars, length)
58    }
59}
60
61/// A type representing the underlying encoded bytes of a [`DOMString`].
62#[derive(Debug)]
63pub enum EncodedBytes<'a> {
64    /// These bytes are Latin1 encoded.
65    Latin1(Ref<'a, [u8]>),
66    /// These bytes are UTF-8 encoded.
67    Utf8(Ref<'a, [u8]>),
68}
69
70impl EncodedBytes<'_> {
71    /// Return a reference to the raw bytes of this [`EncodedBytes`] without any information about
72    /// the underlying encoding.
73    pub fn bytes(&self) -> &[u8] {
74        match self {
75            Self::Latin1(bytes) => bytes,
76            Self::Utf8(bytes) => bytes,
77        }
78    }
79
80    pub fn len(&self) -> usize {
81        match self {
82            Self::Latin1(bytes) => bytes
83                .iter()
84                .map(|b| if *b <= ASCII_END { 1 } else { 2 })
85                .sum(),
86            Self::Utf8(bytes) => bytes.len(),
87        }
88    }
89
90    /// Return whether or not there is any data in this collection of bytes.
91    pub fn is_empty(&self) -> bool {
92        self.bytes().is_empty()
93    }
94}
95
96#[derive(Zeroize)]
97enum DOMStringType {
98    /// A simple rust string
99    Rust(String),
100    /// A JS String stored in mozjs.
101    #[zeroize(skip)]
102    JSString(RootedTraceableBox<Heap<*mut JSString>>),
103    #[cfg(test)]
104    /// This is used for testing of the bindings to give
105    /// a raw u8 Latin1 encoded string without having a js engine.
106    Latin1Vec(Vec<u8>),
107}
108
109impl Default for DOMStringType {
110    fn default() -> Self {
111        Self::Rust(Default::default())
112    }
113}
114
115impl DOMStringType {
116    /// Warning:
117    /// This function does not checking and just returns the raw bytes of the string,
118    /// independently if they are  utf8 or latin1.
119    /// The caller needs to take care that these make sense in context.
120    fn as_raw_bytes(&self) -> &[u8] {
121        match self {
122            DOMStringType::Rust(s) => s.as_bytes(),
123            DOMStringType::JSString(rooted_traceable_box) => unsafe {
124                get_latin1_string_bytes(rooted_traceable_box)
125            },
126            #[cfg(test)]
127            DOMStringType::Latin1Vec(items) => items,
128        }
129    }
130
131    fn ensure_rust_string(&mut self) -> &mut String {
132        let new_string = match self {
133            DOMStringType::Rust(string) => return string,
134            DOMStringType::JSString(rooted_traceable_box) => unsafe {
135                jsstr_to_string(
136                    Runtime::get().expect("JS runtime has shut down").as_ptr(),
137                    NonNull::new(rooted_traceable_box.get()).unwrap(),
138                )
139            },
140            #[cfg(test)]
141            DOMStringType::Latin1Vec(items) => {
142                let mut v = vec![0; items.len() * 2];
143                let real_size =
144                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
145                v.truncate(real_size);
146
147                // Safety: convert_latin1_to_utf8 converts the raw bytes to utf8 and the
148                // buffer is the size specified in the documentation, so this should be safe.
149                unsafe { String::from_utf8_unchecked(v) }
150            },
151        };
152        *self = DOMStringType::Rust(new_string);
153        self.ensure_rust_string()
154    }
155}
156
157/// A reference to a Rust `str` of UTF-8 encoded bytes, used to get a Rust
158/// string from a [`DOMString`].
159#[derive(Debug)]
160pub struct StringView<'a>(Ref<'a, str>);
161
162impl StringView<'_> {
163    pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
164        self.split(HTML_SPACE_CHARACTERS)
165            .filter(|string| !string.is_empty())
166    }
167}
168
169impl From<StringView<'_>> for String {
170    fn from(string_view: StringView<'_>) -> Self {
171        string_view.0.to_string()
172    }
173}
174
175impl Deref for StringView<'_> {
176    type Target = str;
177    fn deref(&self) -> &str {
178        &(self.0)
179    }
180}
181
182impl AsRef<str> for StringView<'_> {
183    fn as_ref(&self) -> &str {
184        &(self.0)
185    }
186}
187
188impl PartialEq for StringView<'_> {
189    fn eq(&self, other: &Self) -> bool {
190        self.0.eq(&*(other.0))
191    }
192}
193
194impl PartialEq<&str> for StringView<'_> {
195    fn eq(&self, other: &&str) -> bool {
196        self.0.eq(*other)
197    }
198}
199
200impl Eq for StringView<'_> {}
201
202impl PartialOrd for StringView<'_> {
203    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
204        self.0.partial_cmp(&**other)
205    }
206}
207
208impl Ord for StringView<'_> {
209    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
210        self.0.cmp(other)
211    }
212}
213
214/// Safety comment:
215///
216/// This method will _not_ trace the pointer if the rust string exists.
217/// The js string could be garbage collected and, hence, violating this
218/// could lead to undefined behavior
219unsafe impl Trace for DOMStringType {
220    unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
221        unsafe {
222            match self {
223                DOMStringType::Rust(_s) => {},
224                DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
225                #[cfg(test)]
226                DOMStringType::Latin1Vec(_s) => {},
227            }
228        }
229    }
230}
231
232impl malloc_size_of::MallocSizeOf for DOMStringType {
233    fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
234        match self {
235            DOMStringType::Rust(s) => s.size_of(ops),
236            DOMStringType::JSString(_rooted_traceable_box) => {
237                // Managed by JS Engine
238                0
239            },
240            #[cfg(test)]
241            DOMStringType::Latin1Vec(s) => s.size_of(ops),
242        }
243    }
244}
245
246impl std::fmt::Debug for DOMStringType {
247    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248        match self {
249            DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
250            DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
251            #[cfg(test)]
252            DOMStringType::Latin1Vec(s) => f
253                .debug_struct("DOMString")
254                .field("latin1_string", s)
255                .finish(),
256        }
257    }
258}
259
260////// A DOMString.
261///
262/// This type corresponds to the [`DOMString`] type in WebIDL.
263///
264/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
265///
266/// Conceptually, a DOMString has the same value space as a JavaScript String,
267/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
268/// unpaired surrogates present (also sometimes called WTF-16).
269///
270/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
271/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
272/// can not be represented as a Rust `String`). This introduces the question of
273/// what to do with values being passed from JavaScript to Rust that contain
274/// unpaired surrogates.
275///
276/// The hypothesis is that it does not matter much how exactly those values are
277/// transformed, because  passing unpaired surrogates into the DOM is very rare.
278/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
279/// character.
280///
281/// Currently, the lack of crash reports about this issue provides some
282/// evidence to support the hypothesis. This evidence will hopefully be used to
283/// convince other browser vendors that it would be safe to replace unpaired
284/// surrogates at the boundary between JavaScript and native code. (This would
285/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
286/// and in Servo.)
287///
288/// This string class will keep either the Reference to the mozjs object alive
289/// or will have an internal rust string.
290/// We currently default to doing most of the string operation on the rust side.
291/// You should use `str()` to get the Rust string (represented by a `StringView`
292/// which you can deref to a `&str`). You should assume that this conversion is
293/// expensive. For now, you should assume that all the functions incur this
294/// conversion cost.
295#[repr(transparent)]
296#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
297pub struct DOMString(RefCell<DOMStringType>);
298
299impl Clone for DOMString {
300    fn clone(&self) -> Self {
301        self.ensure_rust_string().clone().into()
302    }
303}
304
305pub enum DOMStringErrorType {
306    JSConversionError,
307}
308
309impl DOMString {
310    /// Creates a new `DOMString`.
311    pub fn new() -> DOMString {
312        Default::default()
313    }
314
315    /// Creates the string from js. If the string can be encoded in latin1, just take the reference
316    /// to the JSString. Otherwise do the conversion to utf8 now.
317    pub fn from_js_string(
318        cx: &mut js::context::JSContext,
319        value: HandleValue,
320    ) -> Result<DOMString, DOMStringErrorType> {
321        let string_ptr = unsafe { js::rust::ToString(cx.raw_cx(), value) };
322        if string_ptr.is_null() {
323            debug!("ToString failed");
324            Err(DOMStringErrorType::JSConversionError)
325        } else {
326            let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
327            let inner = if latin1 {
328                let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
329                DOMStringType::JSString(h)
330            } else {
331                // We need to convert the string anyway as it is not just latin1
332                DOMStringType::Rust(unsafe {
333                    jsstr_to_string(cx.raw_cx(), ptr::NonNull::new(string_ptr).unwrap())
334                })
335            };
336            Ok(DOMString(RefCell::new(inner)))
337        }
338    }
339
340    /// Transforms the internal storage of this [`DOMString`] into a Rust string if it is not
341    /// yet one. This will make a copy of the underlying string data.
342    fn ensure_rust_string(&self) -> RefMut<'_, String> {
343        let inner = self.0.borrow_mut();
344        RefMut::map(inner, |inner| inner.ensure_rust_string())
345    }
346
347    /// Debug the current  state of the string without modifying it.
348    #[expect(unused)]
349    fn debug_js(&self) {
350        match *self.0.borrow() {
351            DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
352            DOMStringType::JSString(ref rooted_traceable_box) => {
353                let s = unsafe {
354                    jsstr_to_string(
355                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
356                        ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
357                    )
358                };
359                info!("JSString ({})", s);
360            },
361            #[cfg(test)]
362            DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
363        }
364    }
365
366    /// Returns the underlying rust string.
367    pub fn str(&self) -> StringView<'_> {
368        {
369            let inner = self.0.borrow();
370            if matches!(&*inner, DOMStringType::Rust(..)) {
371                return StringView(Ref::map(inner, |inner| match inner {
372                    DOMStringType::Rust(string) => string.as_str(),
373                    _ => unreachable!("Guaranteed by condition above"),
374                }));
375            }
376        }
377
378        self.ensure_rust_string();
379        self.str()
380    }
381
382    /// Return the [`EncodedBytes`] of this [`DOMString`]. This returns the original encoded
383    /// bytes of the string without doing any conversions.
384    pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
385        let inner = self.0.borrow();
386        match &*inner {
387            DOMStringType::Rust(..) => {
388                EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
389            },
390            _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
391        }
392    }
393
394    pub fn clear(&mut self) {
395        let mut inner = self.0.borrow_mut();
396        let DOMStringType::Rust(string) = &mut *inner else {
397            *inner = DOMStringType::Rust(String::new());
398            return;
399        };
400        string.clear();
401    }
402
403    pub fn is_empty(&self) -> bool {
404        self.encoded_bytes().is_empty()
405    }
406
407    /// The length of this string in UTF-8 code units, each one being one byte in size.
408    ///
409    /// Note: This is different than the number of Unicode characters (or code points). A
410    /// character may require multiple UTF-8 code units.
411    pub fn len(&self) -> usize {
412        self.encoded_bytes().len()
413    }
414
415    /// The length of this string in UTF-8 code units, each one being one byte in size.
416    /// This method is the same as [`DOMString::len`], but the result is wrapped in a
417    /// `Utf8CodeUnitLength` to be used in code that mixes different kinds of offsets.
418    ///
419    /// Note: This is different than the number of Unicode characters (or code points). A
420    /// character may require multiple UTF-8 code units.
421    pub fn len_utf8(&self) -> Utf8CodeUnitLength {
422        Utf8CodeUnitLength(self.len())
423    }
424
425    /// The length of this string in UTF-16 code units, each one being one two bytes in size.
426    ///
427    /// Note: This is different than the number of Unicode characters (or code points). A
428    /// character may require multiple UTF-16 code units.
429    pub fn len_utf16(&self) -> Utf16CodeUnitLength {
430        Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
431    }
432
433    pub fn make_ascii_lowercase(&mut self) {
434        self.0
435            .borrow_mut()
436            .ensure_rust_string()
437            .make_ascii_lowercase();
438    }
439
440    pub fn push_str(&mut self, string_to_push: &str) {
441        self.0
442            .borrow_mut()
443            .ensure_rust_string()
444            .push_str(string_to_push);
445    }
446
447    /// <https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace>
448    pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
449        if self.is_empty() {
450            return;
451        }
452
453        let mut inner = self.0.borrow_mut();
454        let string = inner.ensure_rust_string();
455        let trailing_whitespace_len = string
456            .trim_end_matches(|character: char| character.is_ascii_whitespace())
457            .len();
458        string.truncate(trailing_whitespace_len);
459        if string.is_empty() {
460            return;
461        }
462
463        let first_non_whitespace = string
464            .find(|character: char| !character.is_ascii_whitespace())
465            .unwrap();
466        string.replace_range(0..first_non_whitespace, "");
467    }
468
469    /// <https://html.spec.whatwg.org/multipage/#valid-floating-point-number>
470    pub fn is_valid_floating_point_number_string(&self) -> bool {
471        static RE: LazyLock<Regex> = LazyLock::new(|| {
472            Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
473        });
474
475        RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
476            self.parse_floating_point_number().is_some()
477    }
478
479    pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
480        self.str().parse::<T>()
481    }
482
483    /// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
484    pub fn parse_floating_point_number(&self) -> Option<f64> {
485        parse_floating_point_number(&self.str())
486    }
487
488    /// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
489    pub fn set_best_representation_of_the_floating_point_number(&mut self) {
490        if let Some(val) = self.parse_floating_point_number() {
491            // [tc39] Step 2: If x is either +0 or -0, return "0".
492            let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
493
494            *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
495        }
496    }
497
498    pub fn to_lowercase(&self) -> String {
499        self.str().to_lowercase()
500    }
501
502    pub fn to_uppercase(&self) -> String {
503        self.str().to_uppercase()
504    }
505
506    pub fn strip_newlines(&mut self) {
507        // > To strip newlines from a string, remove any U+000A LF and U+000D CR code
508        // > points from the string.
509        self.0
510            .borrow_mut()
511            .ensure_rust_string()
512            .retain(|character| character != '\r' && character != '\n');
513    }
514
515    /// Normalize newlines according to <https://infra.spec.whatwg.org/#normalize-newlines>.
516    pub fn normalize_newlines(&mut self) {
517        // > To normalize newlines in a string, replace every U+000D CR U+000A LF code point
518        // > pair with a single U+000A LF code point, and then replace every remaining
519        // > U+000D CR code point with a U+000A LF code point.
520        let mut inner = self.0.borrow_mut();
521        let string = inner.ensure_rust_string();
522        *string = string.replace("\r\n", "\n").replace("\r", "\n")
523    }
524
525    pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
526        let new_string = self.str().to_owned();
527        DOMString(RefCell::new(DOMStringType::Rust(
528            new_string.replace(needle, replace_char),
529        )))
530    }
531
532    /// Pattern is not yet stable in rust, hence, we need different methods for str and char
533    pub fn starts_with(&self, c: char) -> bool {
534        if !c.is_ascii() {
535            self.str().starts_with(c)
536        } else {
537            // As this is an ASCII character, it is guaranteed to be a single byte, no matter if the
538            // underlying encoding is UTF-8 or Latin1.
539            self.encoded_bytes().bytes().starts_with(&[c as u8])
540        }
541    }
542
543    pub fn starts_with_str(&self, needle: &str) -> bool {
544        self.str().starts_with(needle)
545    }
546
547    pub fn ends_with_str(&self, needle: &str) -> bool {
548        self.str().ends_with(needle)
549    }
550
551    pub fn contains(&self, needle: &str) -> bool {
552        self.str().contains(needle)
553    }
554
555    pub fn to_ascii_lowercase(&self) -> String {
556        let conversion = match self.encoded_bytes() {
557            EncodedBytes::Latin1(bytes) => {
558                if bytes.iter().all(|c| *c <= ASCII_END) {
559                    // We are just simple ascii
560                    Some(unsafe {
561                        String::from_utf8_unchecked(
562                            bytes
563                                .iter()
564                                .map(|c| {
565                                    if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
566                                        c + 32
567                                    } else {
568                                        *c
569                                    }
570                                })
571                                .collect(),
572                        )
573                    })
574                } else {
575                    None
576                }
577            },
578            EncodedBytes::Utf8(bytes) => unsafe {
579                // Save because we know it was a utf8 string
580                Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
581            },
582        };
583        // We otherwise would double borrow the refcell
584        if let Some(conversion) = conversion {
585            conversion
586        } else {
587            self.str().to_ascii_lowercase()
588        }
589    }
590
591    fn contains_space_characters(
592        &self,
593        latin1_characters: &'static [u8],
594        utf8_characters: &'static [char],
595    ) -> bool {
596        match self.encoded_bytes() {
597            EncodedBytes::Latin1(items) => {
598                latin1_characters.iter().any(|byte| items.contains(byte))
599            },
600            EncodedBytes::Utf8(bytes) => {
601                // Save because we know it was a utf8 string
602                let s = unsafe { str::from_utf8_unchecked(&bytes) };
603                s.contains(utf8_characters)
604            },
605        }
606    }
607
608    /// <https://infra.spec.whatwg.org/#ascii-tab-or-newline>
609    pub fn contains_tab_or_newline(&self) -> bool {
610        const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
611        const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
612
613        self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
614    }
615
616    /// <https://infra.spec.whatwg.org/#ascii-whitespace>
617    pub fn contains_html_space_characters(&self) -> bool {
618        const SPACE_BYTES: [u8; 5] = [
619            ASCII_TAB,
620            ASCII_NEWLINE,
621            ASCII_FORMFEED,
622            ASCII_CR,
623            ASCII_SPACE,
624        ];
625        self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
626    }
627
628    /// This returns the string in utf8 bytes, i.e., `[u8]` encoded with utf8.
629    pub fn as_bytes(&self) -> BytesView<'_> {
630        // BytesView will just give the raw bytes on dereference.
631        // If we are ascii this is the same for latin1 and utf8.
632        // Otherwise we convert to rust.
633        if self.is_ascii() {
634            BytesView(self.0.borrow())
635        } else {
636            self.ensure_rust_string();
637            BytesView(self.0.borrow())
638        }
639    }
640
641    /// Tests if there are only ascii lowercase characters. Does not include special characters.
642    pub fn is_ascii_lowercase(&self) -> bool {
643        match self.encoded_bytes() {
644            EncodedBytes::Latin1(items) => items
645                .iter()
646                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
647            EncodedBytes::Utf8(s) => s
648                .iter()
649                .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
650                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
651        }
652    }
653
654    /// Is the string only ascii characters
655    pub fn is_ascii(&self) -> bool {
656        self.encoded_bytes().bytes().is_ascii()
657    }
658
659    /// Returns true if the slice only contains bytes that are safe to use in cookie strings.
660    /// <https://www.ietf.org/archive/id/draft-ietf-httpbis-rfc6265bis-15.html#section-5.6-6>
661    /// Not using ServoCookie::is_valid_name_or_value to prevent dependency on the net crate.
662    pub fn is_valid_for_cookie(&self) -> bool {
663        match self.encoded_bytes() {
664            EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
665                .iter()
666                .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
667        }
668    }
669
670    /// Call the callback with a `&str` reference of the string stored in this [`DOMString`]. Note
671    /// that if the [`DOMString`] cannot be interpreted as a Rust string a conversion will be done.
672    fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
673        match self.encoded_bytes() {
674            // If the Latin1 string is all ASCII bytes, then it is safe to interpret it as UTF-8.
675            EncodedBytes::Latin1(latin1_bytes) => {
676                if latin1_bytes.iter().all(|character| character.is_ascii()) {
677                    // SAFETY: All characters are ASCII, so it is safe to interpret this string as
678                    // UTF-8.
679                    return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
680                }
681            },
682            EncodedBytes::Utf8(utf8_bytes) => {
683                // SAFETY: These are the bytes of a UTF-8 string already, so they can be interpreted
684                // as UTF-8.
685                return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
686            },
687        };
688        callback(self.str().deref())
689    }
690
691    /// Newline replacement routine as described in step 1 of the multipart/form-data
692    /// encoding algorithm and many steps of application/x-www-form-urlencoded.
693    /// e.g. <https://html.spec.whatwg.org/multipage/#convert-to-a-list-of-name-value-pairs>
694    ///
695    /// Replace every occurrence of U+000D (CR) not followed by U+000A (LF),
696    /// and every occurrence of U+000A (LF) not preceded by U+000D (CR), in entry's name,
697    /// by a string consisting of a U+000D (CR) and U+000A (LF).
698    pub fn normalize_crlf(&self) -> String {
699        let s = self.str();
700        let mut buf = String::new();
701        let mut prev = ' ';
702        for ch in s.chars() {
703            match ch {
704                '\n' if prev != '\r' => {
705                    buf.push('\r');
706                    buf.push('\n');
707                },
708                '\n' => {
709                    buf.push('\n');
710                },
711                // This character isn't LF but is
712                // preceded by CR
713                _ if prev == '\r' => {
714                    buf.push('\n');
715                    buf.push(ch);
716                },
717                _ => buf.push(ch),
718            };
719            prev = ch;
720        }
721        // In case the last character was CR
722        if prev == '\r' {
723            buf.push('\n');
724        }
725        buf
726    }
727}
728
729/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
730pub fn parse_floating_point_number(input: &str) -> Option<f64> {
731    // Steps 15-16 are telling us things about IEEE rounding modes
732    // for floating-point significands; this code assumes the Rust
733    // compiler already matches them in any cases where
734    // that actually matters. They are not
735    // related to f64::round(), which is for rounding to integers.
736    input.trim().parse::<f64>().ok().filter(|value| {
737        // A valid number is the same as what rust considers to be valid,
738        // except for +1., NaN, and Infinity.
739        !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
740    })
741}
742
743pub struct BytesView<'a>(Ref<'a, DOMStringType>);
744
745impl Deref for BytesView<'_> {
746    type Target = [u8];
747
748    fn deref(&self) -> &Self::Target {
749        // This does the correct thing by the construction of BytesView in `DOMString::as_bytes`.
750        self.0.as_raw_bytes()
751    }
752}
753
754impl Ord for DOMString {
755    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
756        self.str().cmp(&other.str())
757    }
758}
759
760impl PartialOrd for DOMString {
761    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
762        self.str().partial_cmp(&other.str())
763    }
764}
765
766impl Extend<char> for DOMString {
767    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
768        self.0.borrow_mut().ensure_rust_string().extend(iter)
769    }
770}
771
772impl ToJSValConvertible for DOMString {
773    unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
774        let val = self.0.borrow();
775        match *val {
776            DOMStringType::Rust(ref s) => unsafe {
777                s.to_jsval(cx, rval);
778            },
779            DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
780                rval.set(StringValue(&*rooted_traceable_box.get()));
781            },
782            #[cfg(test)]
783            DOMStringType::Latin1Vec(ref items) => {
784                let mut v = vec![0; items.len() * 2];
785                let real_size =
786                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
787                v.truncate(real_size);
788
789                String::from_utf8(v)
790                    .expect("Error in constructin test string")
791                    .to_jsval(cx, rval);
792            },
793        };
794    }
795}
796
797impl std::hash::Hash for DOMString {
798    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
799        self.str().hash(state);
800    }
801}
802
803impl std::fmt::Display for DOMString {
804    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
805        fmt::Display::fmt(self.str().deref(), f)
806    }
807}
808
809impl std::cmp::PartialEq<str> for DOMString {
810    fn eq(&self, other: &str) -> bool {
811        if other.is_ascii() {
812            *other.as_bytes() == *self.encoded_bytes().bytes()
813        } else {
814            self.str().deref() == other
815        }
816    }
817}
818
819impl std::cmp::PartialEq<&str> for DOMString {
820    fn eq(&self, other: &&str) -> bool {
821        self.eq(*other)
822    }
823}
824
825impl std::cmp::PartialEq<String> for DOMString {
826    fn eq(&self, other: &String) -> bool {
827        self.eq(other.as_str())
828    }
829}
830
831impl std::cmp::PartialEq<DOMString> for String {
832    fn eq(&self, other: &DOMString) -> bool {
833        other.eq(self)
834    }
835}
836
837impl std::cmp::PartialEq<DOMString> for str {
838    fn eq(&self, other: &DOMString) -> bool {
839        other.eq(self)
840    }
841}
842
843impl std::cmp::PartialEq for DOMString {
844    fn eq(&self, other: &DOMString) -> bool {
845        let result = match (self.encoded_bytes(), other.encoded_bytes()) {
846            (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
847                Some(*bytes == *other_bytes)
848            },
849            (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
850                if other_bytes.is_ascii() =>
851            {
852                Some(*bytes == *other_bytes)
853            },
854            (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
855                Some(*bytes == *other_bytes)
856            },
857            (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
858                Some(*bytes == *other_bytes)
859            },
860            _ => None,
861        };
862
863        if let Some(eq_result) = result {
864            return eq_result;
865        }
866
867        *self.str() == *other.str()
868    }
869}
870
871impl std::cmp::Eq for DOMString {}
872
873impl From<std::string::String> for DOMString {
874    fn from(string: String) -> Self {
875        DOMString(RefCell::new(DOMStringType::Rust(string)))
876    }
877}
878
879impl From<&str> for DOMString {
880    fn from(string: &str) -> Self {
881        String::from(string).into()
882    }
883}
884
885impl From<DOMString> for LocalName {
886    fn from(dom_string: DOMString) -> LocalName {
887        dom_string.with_str_reference(|string| LocalName::from(string))
888    }
889}
890
891impl From<&DOMString> for LocalName {
892    fn from(dom_string: &DOMString) -> LocalName {
893        dom_string.with_str_reference(|string| LocalName::from(string))
894    }
895}
896
897impl From<DOMString> for Namespace {
898    fn from(dom_string: DOMString) -> Namespace {
899        dom_string.with_str_reference(|string| Namespace::from(string))
900    }
901}
902
903impl From<DOMString> for Atom {
904    fn from(dom_string: DOMString) -> Atom {
905        dom_string.with_str_reference(|string| Atom::from(string))
906    }
907}
908
909impl From<DOMString> for String {
910    fn from(val: DOMString) -> Self {
911        val.ensure_rust_string();
912        let inner = val.0.take();
913        match inner {
914            DOMStringType::Rust(s) => s,
915            DOMStringType::JSString(_) => unreachable!(),
916            #[cfg(test)]
917            DOMStringType::Latin1Vec(items) => String::from_utf8(items).expect("Not valid latin1"),
918        }
919    }
920}
921
922impl From<DOMString> for Vec<u8> {
923    fn from(value: DOMString) -> Self {
924        value.ensure_rust_string();
925        let inner = value.0.take();
926        match inner {
927            DOMStringType::Rust(s) => s.into_bytes(),
928            DOMStringType::JSString(_) => unreachable!(),
929            #[cfg(test)]
930            DOMStringType::Latin1Vec(items) => items,
931        }
932    }
933}
934
935impl From<Cow<'_, str>> for DOMString {
936    fn from(value: Cow<'_, str>) -> Self {
937        DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
938    }
939}
940
941impl Zeroize for DOMString {
942    fn zeroize(&mut self) {
943        self.0.get_mut().zeroize();
944    }
945}
946
947#[macro_export]
948macro_rules! match_domstring_ascii_inner {
949    ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
950        if {
951            debug_assert!(($ascii_literal).is_ascii());
952            $ascii_literal.as_bytes()
953        } == $input.bytes() {
954          $then
955        } else {
956            $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
957        }
958
959    };
960    ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
961        match $input {
962            $p => $then
963        }
964    }
965}
966
967/// Use this to match &str against lazydomstring efficiently.
968/// You are only allowed to match ascii strings otherwise this macro will
969/// lead to wrong results.
970/// ```ignore
971/// let s = DOMString::from("test");
972/// let value = match_domstring!(s,
973/// "test1" => 1,
974/// "test2" => 2,
975/// "test" => 3,
976/// _ => 4,
977/// );
978/// assert_eq!(value, 3);
979/// ```
980///
981/// The `RefCell` inside `DOMString` is borrowed for the duration of the `match`,
982/// so the string cannot be accessed again inside a `match` arm.
983#[macro_export]
984macro_rules! match_domstring_ascii {
985    ($input:expr, $($tail:tt)*) => {
986        {
987            use $crate::domstring::EncodedBytes;
988
989            let encoded_bytes = $input.encoded_bytes();
990            match encoded_bytes {
991                EncodedBytes::Latin1(_) => {
992                    $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
993                }
994                EncodedBytes::Utf8(_) => {
995                    $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
996                }
997
998            }
999        }
1000    };
1001}
1002
1003#[cfg(test)]
1004mod tests {
1005    use super::*;
1006
1007    const LATIN1_PILLCROW: u8 = 0xB6;
1008    const UTF8_PILLCROW: [u8; 2] = [194, 182];
1009    const LATIN1_POWER2: u8 = 0xB2;
1010
1011    fn from_latin1(l1vec: Vec<u8>) -> DOMString {
1012        DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
1013    }
1014
1015    #[test]
1016    fn string_functions() {
1017        let s = DOMString::from("AbBcC❤&%$#");
1018        let s_copy = s.clone();
1019        assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
1020        assert_eq!(s, s_copy);
1021        assert_eq!(s.len(), 12);
1022        assert_eq!(s_copy.len(), 12);
1023        assert!(s.starts_with('A'));
1024        let s2 = DOMString::from("");
1025        assert!(s2.is_empty());
1026    }
1027
1028    #[test]
1029    fn string_functions_latin1() {
1030        {
1031            let s = from_latin1(vec![
1032                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1033            ]);
1034            assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
1035        }
1036        {
1037            let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
1038            assert_eq!(s.to_ascii_lowercase(), "abbcc");
1039        }
1040        {
1041            let s = from_latin1(vec![
1042                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
1043            ]);
1044            assert_eq!(s.len(), 11);
1045            assert!(s.starts_with('A'));
1046        }
1047        {
1048            let s = from_latin1(vec![]);
1049            assert!(s.is_empty());
1050        }
1051    }
1052
1053    #[test]
1054    fn test_length() {
1055        let s1 = from_latin1(vec![
1056            0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1057            0xAE, 0xAF,
1058        ]);
1059        let s2 = from_latin1(vec![
1060            0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1061            0xBE, 0xBF,
1062        ]);
1063        let s3 = from_latin1(vec![
1064            0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1065            0xCE, 0xCF,
1066        ]);
1067        let s4 = from_latin1(vec![
1068            0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1069            0xDE, 0xDF,
1070        ]);
1071        let s5 = from_latin1(vec![
1072            0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1073            0xEE, 0xEF,
1074        ]);
1075        let s6 = from_latin1(vec![
1076            0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1077            0xFE, 0xFF,
1078        ]);
1079
1080        let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1081        let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1082        let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1083        let s4_utf8 = String::from("ÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß");
1084        let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1085        let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1086
1087        assert_eq!(s1.len(), s1_utf8.len());
1088        assert_eq!(s2.len(), s2_utf8.len());
1089        assert_eq!(s3.len(), s3_utf8.len());
1090        assert_eq!(s4.len(), s4_utf8.len());
1091        assert_eq!(s5.len(), s5_utf8.len());
1092        assert_eq!(s6.len(), s6_utf8.len());
1093
1094        s1.ensure_rust_string();
1095        s2.ensure_rust_string();
1096        s3.ensure_rust_string();
1097        s4.ensure_rust_string();
1098        s5.ensure_rust_string();
1099        s6.ensure_rust_string();
1100        assert_eq!(s1.len(), s1_utf8.len());
1101        assert_eq!(s2.len(), s2_utf8.len());
1102        assert_eq!(s3.len(), s3_utf8.len());
1103        assert_eq!(s4.len(), s4_utf8.len());
1104        assert_eq!(s5.len(), s5_utf8.len());
1105        assert_eq!(s6.len(), s6_utf8.len());
1106    }
1107
1108    #[test]
1109    fn test_convert() {
1110        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1111        s.ensure_rust_string();
1112        assert_eq!(&*s.str(), "abc%$");
1113    }
1114
1115    #[test]
1116    fn partial_eq() {
1117        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1118        let string = String::from("abc%$");
1119        let s2 = DOMString::from(string.clone());
1120        assert_eq!(s, s2);
1121        assert_eq!(s, string);
1122    }
1123
1124    #[test]
1125    fn encoded_latin1_bytes() {
1126        let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1127        let dom_string = from_latin1(original_latin1_bytes.clone());
1128        let string_latin1_bytes = match dom_string.encoded_bytes() {
1129            EncodedBytes::Latin1(bytes) => bytes,
1130            _ => unreachable!("Expected Latin1 encoded bytes"),
1131        };
1132        assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1133    }
1134
1135    #[test]
1136    fn testing_stringview() {
1137        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1138
1139        assert_eq!(
1140            s.str().chars().collect::<Vec<char>>(),
1141            vec!['a', 'b', 'c', '%', '$', '²']
1142        );
1143        assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1144    }
1145
1146    // We need to be extra careful here as two strings that have different
1147    // representation need to have the same hash.
1148    // Additionally, the interior mutability is only used for the conversion
1149    // which is forced by Hash. Hence, it is safe to have this interior mutability.
1150    #[test]
1151    fn test_hash() {
1152        use std::hash::{DefaultHasher, Hash, Hasher};
1153        fn hash_value(d: &DOMString) -> u64 {
1154            let mut hasher = DefaultHasher::new();
1155            d.hash(&mut hasher);
1156            hasher.finish()
1157        }
1158
1159        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1160        let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1161        s_converted.ensure_rust_string();
1162        let s2 = DOMString::from("abc%$²");
1163
1164        let hash_s = hash_value(&s);
1165        let hash_s_converted = hash_value(&s_converted);
1166        let hash_s2 = hash_value(&s2);
1167
1168        assert_eq!(hash_s, hash_s2);
1169        assert_eq!(hash_s, hash_s_converted);
1170    }
1171
1172    // Testing match_lazydomstring if it executes the statements in the match correctly
1173    #[test]
1174    fn test_match_executing() {
1175        // executing
1176        {
1177            let s = from_latin1(vec![b'a', b'b', b'c']);
1178            match_domstring_ascii!( s,
1179                "abc" => assert!(true),
1180                "bcd" => assert!(false),
1181                _ =>  (),
1182            );
1183        }
1184
1185        {
1186            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1187            match_domstring_ascii!( s,
1188                "abc/" => assert!(true),
1189                "bcd" => assert!(false),
1190                _ =>  (),
1191            );
1192        }
1193
1194        {
1195            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1196            match_domstring_ascii!( s,
1197                "bcd" => assert!(false),
1198                "abc%$" => assert!(true),
1199                _ => (),
1200            );
1201        }
1202
1203        {
1204            let s = DOMString::from("abcde");
1205            match_domstring_ascii!( s,
1206                "abc" => assert!(false),
1207                "bcd" => assert!(false),
1208                _ => assert!(true),
1209            );
1210        }
1211        {
1212            let s = DOMString::from("abc%$");
1213            match_domstring_ascii!( s,
1214                "bcd" => assert!(false),
1215                "abc%$" => assert!(true),
1216                _ =>  (),
1217            );
1218        }
1219        {
1220            let s = from_latin1(vec![b'a', b'b', b'c']);
1221            match_domstring_ascii!( s,
1222                "abcdd" => assert!(false),
1223                "bcd" => assert!(false),
1224                _ => (),
1225            );
1226        }
1227    }
1228
1229    // Testing match_lazydomstring if it evaluates to the correct expression
1230    #[test]
1231    fn test_match_returning_result() {
1232        {
1233            let s = from_latin1(vec![b'a', b'b', b'c']);
1234            let res = match_domstring_ascii!( s,
1235                "abc" => true,
1236                "bcd" => false,
1237                _ => false,
1238            );
1239            assert_eq!(res, true);
1240        }
1241        {
1242            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1243            let res = match_domstring_ascii!( s,
1244                "abc/" => true,
1245                "bcd" => false,
1246                _ => false,
1247            );
1248            assert_eq!(res, true);
1249        }
1250        {
1251            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1252            let res = match_domstring_ascii!( s,
1253                "bcd" => false,
1254                "abc%$" => true,
1255                _ => false,
1256            );
1257            assert_eq!(res, true);
1258        }
1259
1260        {
1261            let s = DOMString::from("abcde");
1262            let res = match_domstring_ascii!( s,
1263                "abc" => false,
1264                "bcd" => false,
1265                _ => true,
1266            );
1267            assert_eq!(res, true);
1268        }
1269        {
1270            let s = DOMString::from("abc%$");
1271            let res = match_domstring_ascii!( s,
1272                "bcd" => false,
1273                "abc%$" => true,
1274                _ => false,
1275            );
1276            assert_eq!(res, true);
1277        }
1278        {
1279            let s = from_latin1(vec![b'a', b'b', b'c']);
1280            let res = match_domstring_ascii!( s,
1281                "abcdd" => false,
1282                "bcd" => false,
1283                _ => true,
1284            );
1285            assert_eq!(res, true);
1286        }
1287    }
1288
1289    #[test]
1290    #[cfg(debug_assertions)]
1291    #[should_panic]
1292    fn test_match_panic() {
1293        let s = DOMString::from("abcd");
1294        let _res = match_domstring_ascii!(s,
1295            "❤" => true,
1296            _ => false,);
1297    }
1298
1299    #[test]
1300    #[cfg(debug_assertions)]
1301    #[should_panic]
1302    fn test_match_panic2() {
1303        let s = DOMString::from("abcd");
1304        let _res = match_domstring_ascii!(s,
1305            "abc" => false,
1306            "❤" => true,
1307            _ => false,
1308        );
1309    }
1310
1311    #[test]
1312    fn test_strip_whitespace() {
1313        {
1314            let mut s = from_latin1(vec![
1315                b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1316            ]);
1317
1318            s.strip_leading_and_trailing_ascii_whitespace();
1319            s.ensure_rust_string();
1320            assert_eq!(&*s.str(), "abc%$²");
1321        }
1322        {
1323            let mut s = DOMString::from("   \n  abc%$ ");
1324
1325            s.strip_leading_and_trailing_ascii_whitespace();
1326            s.ensure_rust_string();
1327            assert_eq!(&*s.str(), "abc%$");
1328        }
1329    }
1330
1331    // https://infra.spec.whatwg.org/#ascii-whitespace
1332    #[test]
1333    fn contains_html_space_characters() {
1334        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); // TAB
1335        assert!(s.contains_html_space_characters());
1336        s.ensure_rust_string();
1337        assert!(s.contains_html_space_characters());
1338
1339        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); // NEWLINE
1340        assert!(s.contains_html_space_characters());
1341        s.ensure_rust_string();
1342        assert!(s.contains_html_space_characters());
1343
1344        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); // FF
1345        assert!(s.contains_html_space_characters());
1346        s.ensure_rust_string();
1347        assert!(s.contains_html_space_characters());
1348
1349        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); // Carriage Return
1350        assert!(s.contains_html_space_characters());
1351        s.ensure_rust_string();
1352        assert!(s.contains_html_space_characters());
1353
1354        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); // SPACE
1355        assert!(s.contains_html_space_characters());
1356        s.ensure_rust_string();
1357        assert!(s.contains_html_space_characters());
1358
1359        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1360        assert!(!s.contains_html_space_characters());
1361        s.ensure_rust_string();
1362        assert!(!s.contains_html_space_characters());
1363    }
1364
1365    #[test]
1366    fn atom() {
1367        let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1368        let atom1 = Atom::from(s);
1369        let s2 = DOMString::from("aaa aa");
1370        let atom2 = Atom::from(s2);
1371        assert_eq!(atom1, atom2);
1372        let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1373        let atom3 = Atom::from(s3);
1374        assert_ne!(atom1, atom3);
1375    }
1376
1377    #[test]
1378    fn namespace() {
1379        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1380        let atom1 = Namespace::from(s);
1381        let s2 = DOMString::from("aaa aa");
1382        let atom2 = Namespace::from(s2);
1383        assert_eq!(atom1, atom2);
1384        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1385        let atom3 = Namespace::from(s3);
1386        assert_ne!(atom1, atom3);
1387    }
1388
1389    #[test]
1390    fn localname() {
1391        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1392        let atom1 = LocalName::from(s);
1393        let s2 = DOMString::from("aaa aa");
1394        let atom2 = LocalName::from(s2);
1395        assert_eq!(atom1, atom2);
1396        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1397        let atom3 = LocalName::from(s3);
1398        assert_ne!(atom1, atom3);
1399    }
1400
1401    #[test]
1402    fn is_ascii_lowercase() {
1403        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1404        assert!(!s.is_ascii_lowercase());
1405        let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1406        assert!(!s.is_ascii_lowercase());
1407        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1408        assert!(s.is_ascii_lowercase());
1409        let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1410        assert!(!s.is_ascii_lowercase());
1411        let s = DOMString::from("`aaaz");
1412        assert!(!s.is_ascii_lowercase());
1413        let s = DOMString::from("aaaz");
1414        assert!(s.is_ascii_lowercase());
1415    }
1416
1417    #[test]
1418    fn test_as_bytes() {
1419        const ASCII_SMALL_A: u8 = b'a';
1420        const ASCII_SMALL_Z: u8 = b'z';
1421
1422        let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1423        let s = from_latin1(v1.clone());
1424        assert_eq!(
1425            *s.as_bytes(),
1426            [
1427                ASCII_SMALL_A,
1428                ASCII_SMALL_A,
1429                ASCII_SMALL_A,
1430                UTF8_PILLCROW[0],
1431                UTF8_PILLCROW[1],
1432                ASCII_SMALL_A,
1433                ASCII_SMALL_A
1434            ]
1435        );
1436
1437        let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1438        let s = from_latin1(v2.clone());
1439        assert_eq!(
1440            *s.as_bytes(),
1441            [
1442                ASCII_SMALL_A,
1443                ASCII_SMALL_A,
1444                ASCII_SMALL_A,
1445                ASCII_SMALL_A,
1446                ASCII_SMALL_Z
1447            ]
1448        );
1449
1450        let str = "abc%$²".to_owned();
1451        let s = DOMString::from(str.clone());
1452        assert_eq!(&*s.as_bytes(), str.as_bytes());
1453        let str = "AbBcC❤&%$#".to_owned();
1454        let s = DOMString::from(str.clone());
1455        assert_eq!(&*s.as_bytes(), str.as_bytes());
1456    }
1457}
script_bindings/domstring.rs

script_bindings/
domstring.rs