Skip to main content

script_bindings/
domstring.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![allow(clippy::non_canonical_partial_ord_impl)]
6use std::borrow::{Cow, ToOwned};
7use std::cell::{Ref, RefCell, RefMut};
8use std::default::Default;
9use std::ops::Deref;
10use std::ptr::{self, NonNull};
11use std::str::FromStr;
12use std::sync::LazyLock;
13use std::{fmt, slice, str};
14
15use html5ever::{LocalName, Namespace};
16use js::conversions::{ToJSValConvertible, jsstr_to_string};
17use js::gc::MutableHandleValue;
18use js::jsapi::{Heap, JS_GetLatin1StringCharsAndLength, JSContext, JSString};
19use js::jsval::StringValue;
20use js::rust::{Runtime, Trace};
21use malloc_size_of::MallocSizeOfOps;
22use num_traits::{ToPrimitive, Zero};
23use regex::Regex;
24use servo_base::text::{Utf8CodeUnitLength, Utf16CodeUnitLength};
25use style::Atom;
26use style::str::HTML_SPACE_CHARACTERS;
27
28use crate::script_runtime::JSContext as SafeJSContext;
29use crate::trace::RootedTraceableBox;
30
31const ASCII_END: u8 = 0x7E;
32const ASCII_CAPITAL_A: u8 = 0x41;
33const ASCII_CAPITAL_Z: u8 = 0x5A;
34const ASCII_LOWERCASE_A: u8 = 0x61;
35const ASCII_LOWERCASE_Z: u8 = 0x7A;
36const ASCII_TAB: u8 = 0x09;
37const ASCII_NEWLINE: u8 = 0x0A;
38const ASCII_FORMFEED: u8 = 0x0C;
39const ASCII_CR: u8 = 0x0D;
40const ASCII_SPACE: u8 = 0x20;
41
42/// Gets the latin1 bytes from the js engine.
43/// Safety: Make sure the *mut JSString is not null.
44unsafe fn get_latin1_string_bytes(
45    rooted_traceable_box: &RootedTraceableBox<Heap<*mut JSString>>,
46) -> &[u8] {
47    debug_assert!(!rooted_traceable_box.get().is_null());
48    let mut length = 0;
49    unsafe {
50        let chars = JS_GetLatin1StringCharsAndLength(
51            Runtime::get().expect("JS runtime has shut down").as_ptr(),
52            ptr::null(),
53            rooted_traceable_box.get(),
54            &mut length,
55        );
56        assert!(!chars.is_null());
57        slice::from_raw_parts(chars, length)
58    }
59}
60
61/// A type representing the underlying encoded bytes of a [`DOMString`].
62#[derive(Debug)]
63pub enum EncodedBytes<'a> {
64    /// These bytes are Latin1 encoded.
65    Latin1(Ref<'a, [u8]>),
66    /// These bytes are UTF-8 encoded.
67    Utf8(Ref<'a, [u8]>),
68}
69
70impl EncodedBytes<'_> {
71    /// Return a reference to the raw bytes of this [`EncodedBytes`] without any information about
72    /// the underlying encoding.
73    pub fn bytes(&self) -> &[u8] {
74        match self {
75            Self::Latin1(bytes) => bytes,
76            Self::Utf8(bytes) => bytes,
77        }
78    }
79
80    pub fn len(&self) -> usize {
81        match self {
82            Self::Latin1(bytes) => bytes
83                .iter()
84                .map(|b| if *b <= ASCII_END { 1 } else { 2 })
85                .sum(),
86            Self::Utf8(bytes) => bytes.len(),
87        }
88    }
89
90    /// Return whether or not there is any data in this collection of bytes.
91    pub fn is_empty(&self) -> bool {
92        self.bytes().is_empty()
93    }
94}
95
96enum DOMStringType {
97    /// A simple rust string
98    Rust(String),
99    /// A JS String stored in mozjs.
100    JSString(RootedTraceableBox<Heap<*mut JSString>>),
101    #[cfg(test)]
102    /// This is used for testing of the bindings to give
103    /// a raw u8 Latin1 encoded string without having a js engine.
104    Latin1Vec(Vec<u8>),
105}
106
107impl Default for DOMStringType {
108    fn default() -> Self {
109        Self::Rust(Default::default())
110    }
111}
112
113impl DOMStringType {
114    /// Warning:
115    /// This function does not checking and just returns the raw bytes of the string,
116    /// independently if they are  utf8 or latin1.
117    /// The caller needs to take care that these make sense in context.
118    fn as_raw_bytes(&self) -> &[u8] {
119        match self {
120            DOMStringType::Rust(s) => s.as_bytes(),
121            DOMStringType::JSString(rooted_traceable_box) => unsafe {
122                get_latin1_string_bytes(rooted_traceable_box)
123            },
124            #[cfg(test)]
125            DOMStringType::Latin1Vec(items) => items,
126        }
127    }
128
129    fn ensure_rust_string(&mut self) -> &mut String {
130        let new_string = match self {
131            DOMStringType::Rust(string) => return string,
132            DOMStringType::JSString(rooted_traceable_box) => unsafe {
133                jsstr_to_string(
134                    Runtime::get().expect("JS runtime has shut down").as_ptr(),
135                    NonNull::new(rooted_traceable_box.get()).unwrap(),
136                )
137            },
138            #[cfg(test)]
139            DOMStringType::Latin1Vec(items) => {
140                let mut v = vec![0; items.len() * 2];
141                let real_size =
142                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
143                v.truncate(real_size);
144
145                // Safety: convert_latin1_to_utf8 converts the raw bytes to utf8 and the
146                // buffer is the size specified in the documentation, so this should be safe.
147                unsafe { String::from_utf8_unchecked(v) }
148            },
149        };
150        *self = DOMStringType::Rust(new_string);
151        self.ensure_rust_string()
152    }
153}
154
155/// A reference to a Rust `str` of UTF-8 encoded bytes, used to get a Rust
156/// string from a [`DOMString`].
157#[derive(Debug)]
158pub struct StringView<'a>(Ref<'a, str>);
159
160impl StringView<'_> {
161    pub fn split_html_space_characters(&self) -> impl Iterator<Item = &str> {
162        self.split(HTML_SPACE_CHARACTERS)
163            .filter(|string| !string.is_empty())
164    }
165}
166
167impl From<StringView<'_>> for String {
168    fn from(string_view: StringView<'_>) -> Self {
169        string_view.0.to_string()
170    }
171}
172
173impl Deref for StringView<'_> {
174    type Target = str;
175    fn deref(&self) -> &str {
176        &(self.0)
177    }
178}
179
180impl AsRef<str> for StringView<'_> {
181    fn as_ref(&self) -> &str {
182        &(self.0)
183    }
184}
185
186impl PartialEq for StringView<'_> {
187    fn eq(&self, other: &Self) -> bool {
188        self.0.eq(&*(other.0))
189    }
190}
191
192impl PartialEq<&str> for StringView<'_> {
193    fn eq(&self, other: &&str) -> bool {
194        self.0.eq(*other)
195    }
196}
197
198impl Eq for StringView<'_> {}
199
200impl PartialOrd for StringView<'_> {
201    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
202        self.0.partial_cmp(&**other)
203    }
204}
205
206impl Ord for StringView<'_> {
207    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
208        self.0.cmp(other)
209    }
210}
211
212/// Safety comment:
213///
214/// This method will _not_ trace the pointer if the rust string exists.
215/// The js string could be garbage collected and, hence, violating this
216/// could lead to undefined behavior
217unsafe impl Trace for DOMStringType {
218    unsafe fn trace(&self, tracer: *mut js::jsapi::JSTracer) {
219        unsafe {
220            match self {
221                DOMStringType::Rust(_s) => {},
222                DOMStringType::JSString(rooted_traceable_box) => rooted_traceable_box.trace(tracer),
223                #[cfg(test)]
224                DOMStringType::Latin1Vec(_s) => {},
225            }
226        }
227    }
228}
229
230impl malloc_size_of::MallocSizeOf for DOMStringType {
231    fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
232        match self {
233            DOMStringType::Rust(s) => s.size_of(ops),
234            DOMStringType::JSString(_rooted_traceable_box) => {
235                // Managed by JS Engine
236                0
237            },
238            #[cfg(test)]
239            DOMStringType::Latin1Vec(s) => s.size_of(ops),
240        }
241    }
242}
243
244impl std::fmt::Debug for DOMStringType {
245    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246        match self {
247            DOMStringType::Rust(s) => f.debug_struct("DOMString").field("rust_string", s).finish(),
248            DOMStringType::JSString(_rooted_traceable_box) => f.debug_struct("DOMString").finish(),
249            #[cfg(test)]
250            DOMStringType::Latin1Vec(s) => f
251                .debug_struct("DOMString")
252                .field("latin1_string", s)
253                .finish(),
254        }
255    }
256}
257
258////// A DOMString.
259///
260/// This type corresponds to the [`DOMString`] type in WebIDL.
261///
262/// [`DOMString`]: https://webidl.spec.whatwg.org/#idl-DOMString
263///
264/// Conceptually, a DOMString has the same value space as a JavaScript String,
265/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
266/// unpaired surrogates present (also sometimes called WTF-16).
267///
268/// However, Rust `String`s are guaranteed to be valid UTF-8, and as such have
269/// a *smaller value space* than WTF-16 (i.e., some JavaScript String values
270/// can not be represented as a Rust `String`). This introduces the question of
271/// what to do with values being passed from JavaScript to Rust that contain
272/// unpaired surrogates.
273///
274/// The hypothesis is that it does not matter much how exactly those values are
275/// transformed, because  passing unpaired surrogates into the DOM is very rare.
276/// Instead Servo withh replace the unpaired surrogate by a U+FFFD replacement
277/// character.
278///
279/// Currently, the lack of crash reports about this issue provides some
280/// evidence to support the hypothesis. This evidence will hopefully be used to
281/// convince other browser vendors that it would be safe to replace unpaired
282/// surrogates at the boundary between JavaScript and native code. (This would
283/// unify the `DOMString` and `USVString` types, both in the WebIDL standard
284/// and in Servo.)
285///
286/// This string class will keep either the Reference to the mozjs object alive
287/// or will have an internal rust string.
288/// We currently default to doing most of the string operation on the rust side.
289/// You should use `str()` to get the Rust string (represented by a `StringView`
290/// which you can deref to a `&str`). You should assume that this conversion is
291/// expensive. For now, you should assume that all the functions incur this
292/// conversion cost.
293#[repr(transparent)]
294#[derive(Debug, Default, MallocSizeOf, JSTraceable)]
295pub struct DOMString(RefCell<DOMStringType>);
296
297impl Clone for DOMString {
298    fn clone(&self) -> Self {
299        self.ensure_rust_string().clone().into()
300    }
301}
302
303pub enum DOMStringErrorType {
304    JSConversionError,
305}
306
307impl DOMString {
308    /// Creates a new `DOMString`.
309    pub fn new() -> DOMString {
310        Default::default()
311    }
312
313    /// Creates the string from js. If the string can be encoded in latin1, just take the reference
314    /// to the JSString. Otherwise do the conversion to utf8 now.
315    pub fn from_js_string(
316        cx: SafeJSContext,
317        value: js::gc::HandleValue,
318    ) -> Result<DOMString, DOMStringErrorType> {
319        let string_ptr = unsafe { js::rust::ToString(*cx, value) };
320        if string_ptr.is_null() {
321            debug!("ToString failed");
322            Err(DOMStringErrorType::JSConversionError)
323        } else {
324            let latin1 = unsafe { js::jsapi::JS_DeprecatedStringHasLatin1Chars(string_ptr) };
325            let inner = if latin1 {
326                let h = RootedTraceableBox::from_box(Heap::boxed(string_ptr));
327                DOMStringType::JSString(h)
328            } else {
329                // We need to convert the string anyway as it is not just latin1
330                DOMStringType::Rust(unsafe {
331                    jsstr_to_string(*cx, ptr::NonNull::new(string_ptr).unwrap())
332                })
333            };
334            Ok(DOMString(RefCell::new(inner)))
335        }
336    }
337
338    /// Transforms the internal storage of this [`DOMString`] into a Rust string if it is not
339    /// yet one. This will make a copy of the underlying string data.
340    fn ensure_rust_string(&self) -> RefMut<'_, String> {
341        let inner = self.0.borrow_mut();
342        RefMut::map(inner, |inner| inner.ensure_rust_string())
343    }
344
345    /// Debug the current  state of the string without modifying it.
346    #[expect(unused)]
347    fn debug_js(&self) {
348        match *self.0.borrow() {
349            DOMStringType::Rust(ref s) => info!("Rust String ({})", s),
350            DOMStringType::JSString(ref rooted_traceable_box) => {
351                let s = unsafe {
352                    jsstr_to_string(
353                        Runtime::get().expect("JS runtime has shut down").as_ptr(),
354                        ptr::NonNull::new(rooted_traceable_box.get()).unwrap(),
355                    )
356                };
357                info!("JSString ({})", s);
358            },
359            #[cfg(test)]
360            DOMStringType::Latin1Vec(ref items) => info!("Latin1 string"),
361        }
362    }
363
364    /// Returns the underlying rust string.
365    pub fn str(&self) -> StringView<'_> {
366        {
367            let inner = self.0.borrow();
368            if matches!(&*inner, DOMStringType::Rust(..)) {
369                return StringView(Ref::map(inner, |inner| match inner {
370                    DOMStringType::Rust(string) => string.as_str(),
371                    _ => unreachable!("Guaranteed by condition above"),
372                }));
373            }
374        }
375
376        self.ensure_rust_string();
377        self.str()
378    }
379
380    /// Return the [`EncodedBytes`] of this [`DOMString`]. This returns the original encoded
381    /// bytes of the string without doing any conversions.
382    pub fn encoded_bytes(&self) -> EncodedBytes<'_> {
383        let inner = self.0.borrow();
384        match &*inner {
385            DOMStringType::Rust(..) => {
386                EncodedBytes::Utf8(Ref::map(inner, |inner| inner.as_raw_bytes()))
387            },
388            _ => EncodedBytes::Latin1(Ref::map(inner, |inner| inner.as_raw_bytes())),
389        }
390    }
391
392    pub fn clear(&mut self) {
393        let mut inner = self.0.borrow_mut();
394        let DOMStringType::Rust(string) = &mut *inner else {
395            *inner = DOMStringType::Rust(String::new());
396            return;
397        };
398        string.clear();
399    }
400
401    pub fn is_empty(&self) -> bool {
402        self.encoded_bytes().is_empty()
403    }
404
405    /// The length of this string in UTF-8 code units, each one being one byte in size.
406    ///
407    /// Note: This is different than the number of Unicode characters (or code points). A
408    /// character may require multiple UTF-8 code units.
409    pub fn len(&self) -> usize {
410        self.encoded_bytes().len()
411    }
412
413    /// The length of this string in UTF-8 code units, each one being one byte in size.
414    /// This method is the same as [`DOMString::len`], but the result is wrapped in a
415    /// `Utf8CodeUnitLength` to be used in code that mixes different kinds of offsets.
416    ///
417    /// Note: This is different than the number of Unicode characters (or code points). A
418    /// character may require multiple UTF-8 code units.
419    pub fn len_utf8(&self) -> Utf8CodeUnitLength {
420        Utf8CodeUnitLength(self.len())
421    }
422
423    /// The length of this string in UTF-16 code units, each one being one two bytes in size.
424    ///
425    /// Note: This is different than the number of Unicode characters (or code points). A
426    /// character may require multiple UTF-16 code units.
427    pub fn len_utf16(&self) -> Utf16CodeUnitLength {
428        Utf16CodeUnitLength(self.str().chars().map(char::len_utf16).sum())
429    }
430
431    pub fn make_ascii_lowercase(&mut self) {
432        self.0
433            .borrow_mut()
434            .ensure_rust_string()
435            .make_ascii_lowercase();
436    }
437
438    pub fn push_str(&mut self, string_to_push: &str) {
439        self.0
440            .borrow_mut()
441            .ensure_rust_string()
442            .push_str(string_to_push);
443    }
444
445    /// <https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace>
446    pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
447        if self.is_empty() {
448            return;
449        }
450
451        let mut inner = self.0.borrow_mut();
452        let string = inner.ensure_rust_string();
453        let trailing_whitespace_len = string
454            .trim_end_matches(|character: char| character.is_ascii_whitespace())
455            .len();
456        string.truncate(trailing_whitespace_len);
457        if string.is_empty() {
458            return;
459        }
460
461        let first_non_whitespace = string
462            .find(|character: char| !character.is_ascii_whitespace())
463            .unwrap();
464        string.replace_range(0..first_non_whitespace, "");
465    }
466
467    /// <https://html.spec.whatwg.org/multipage/#valid-floating-point-number>
468    pub fn is_valid_floating_point_number_string(&self) -> bool {
469        static RE: LazyLock<Regex> = LazyLock::new(|| {
470            Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap()
471        });
472
473        RE.is_match(self.0.borrow_mut().ensure_rust_string()) &&
474            self.parse_floating_point_number().is_some()
475    }
476
477    pub fn parse<T: FromStr>(&self) -> Result<T, <T as FromStr>::Err> {
478        self.str().parse::<T>()
479    }
480
481    /// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
482    pub fn parse_floating_point_number(&self) -> Option<f64> {
483        parse_floating_point_number(&self.str())
484    }
485
486    /// <https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number>
487    pub fn set_best_representation_of_the_floating_point_number(&mut self) {
488        if let Some(val) = self.parse_floating_point_number() {
489            // [tc39] Step 2: If x is either +0 or -0, return "0".
490            let parsed_value = if val.is_zero() { 0.0_f64 } else { val };
491
492            *self.0.borrow_mut() = DOMStringType::Rust(parsed_value.to_string());
493        }
494    }
495
496    pub fn to_lowercase(&self) -> String {
497        self.str().to_lowercase()
498    }
499
500    pub fn to_uppercase(&self) -> String {
501        self.str().to_uppercase()
502    }
503
504    pub fn strip_newlines(&mut self) {
505        // > To strip newlines from a string, remove any U+000A LF and U+000D CR code
506        // > points from the string.
507        self.0
508            .borrow_mut()
509            .ensure_rust_string()
510            .retain(|character| character != '\r' && character != '\n');
511    }
512
513    /// Normalize newlines according to <https://infra.spec.whatwg.org/#normalize-newlines>.
514    pub fn normalize_newlines(&mut self) {
515        // > To normalize newlines in a string, replace every U+000D CR U+000A LF code point
516        // > pair with a single U+000A LF code point, and then replace every remaining
517        // > U+000D CR code point with a U+000A LF code point.
518        let mut inner = self.0.borrow_mut();
519        let string = inner.ensure_rust_string();
520        *string = string.replace("\r\n", "\n").replace("\r", "\n")
521    }
522
523    pub fn replace(self, needle: &str, replace_char: &str) -> DOMString {
524        let new_string = self.str().to_owned();
525        DOMString(RefCell::new(DOMStringType::Rust(
526            new_string.replace(needle, replace_char),
527        )))
528    }
529
530    /// Pattern is not yet stable in rust, hence, we need different methods for str and char
531    pub fn starts_with(&self, c: char) -> bool {
532        if !c.is_ascii() {
533            self.str().starts_with(c)
534        } else {
535            // As this is an ASCII character, it is guaranteed to be a single byte, no matter if the
536            // underlying encoding is UTF-8 or Latin1.
537            self.encoded_bytes().bytes().starts_with(&[c as u8])
538        }
539    }
540
541    pub fn starts_with_str(&self, needle: &str) -> bool {
542        self.str().starts_with(needle)
543    }
544
545    pub fn ends_with_str(&self, needle: &str) -> bool {
546        self.str().ends_with(needle)
547    }
548
549    pub fn contains(&self, needle: &str) -> bool {
550        self.str().contains(needle)
551    }
552
553    pub fn to_ascii_lowercase(&self) -> String {
554        let conversion = match self.encoded_bytes() {
555            EncodedBytes::Latin1(bytes) => {
556                if bytes.iter().all(|c| *c <= ASCII_END) {
557                    // We are just simple ascii
558                    Some(unsafe {
559                        String::from_utf8_unchecked(
560                            bytes
561                                .iter()
562                                .map(|c| {
563                                    if *c >= ASCII_CAPITAL_A && *c <= ASCII_CAPITAL_Z {
564                                        c + 32
565                                    } else {
566                                        *c
567                                    }
568                                })
569                                .collect(),
570                        )
571                    })
572                } else {
573                    None
574                }
575            },
576            EncodedBytes::Utf8(bytes) => unsafe {
577                // Save because we know it was a utf8 string
578                Some(str::from_utf8_unchecked(&bytes).to_ascii_lowercase())
579            },
580        };
581        // We otherwise would double borrow the refcell
582        if let Some(conversion) = conversion {
583            conversion
584        } else {
585            self.str().to_ascii_lowercase()
586        }
587    }
588
589    fn contains_space_characters(
590        &self,
591        latin1_characters: &'static [u8],
592        utf8_characters: &'static [char],
593    ) -> bool {
594        match self.encoded_bytes() {
595            EncodedBytes::Latin1(items) => {
596                latin1_characters.iter().any(|byte| items.contains(byte))
597            },
598            EncodedBytes::Utf8(bytes) => {
599                // Save because we know it was a utf8 string
600                let s = unsafe { str::from_utf8_unchecked(&bytes) };
601                s.contains(utf8_characters)
602            },
603        }
604    }
605
606    /// <https://infra.spec.whatwg.org/#ascii-tab-or-newline>
607    pub fn contains_tab_or_newline(&self) -> bool {
608        const LATIN_TAB_OR_NEWLINE: [u8; 3] = [ASCII_TAB, ASCII_NEWLINE, ASCII_CR];
609        const UTF8_TAB_OR_NEWLINE: [char; 3] = ['\u{0009}', '\u{000a}', '\u{000d}'];
610
611        self.contains_space_characters(&LATIN_TAB_OR_NEWLINE, &UTF8_TAB_OR_NEWLINE)
612    }
613
614    /// <https://infra.spec.whatwg.org/#ascii-whitespace>
615    pub fn contains_html_space_characters(&self) -> bool {
616        const SPACE_BYTES: [u8; 5] = [
617            ASCII_TAB,
618            ASCII_NEWLINE,
619            ASCII_FORMFEED,
620            ASCII_CR,
621            ASCII_SPACE,
622        ];
623        self.contains_space_characters(&SPACE_BYTES, HTML_SPACE_CHARACTERS)
624    }
625
626    /// This returns the string in utf8 bytes, i.e., `[u8]` encoded with utf8.
627    pub fn as_bytes(&self) -> BytesView<'_> {
628        // BytesView will just give the raw bytes on dereference.
629        // If we are ascii this is the same for latin1 and utf8.
630        // Otherwise we convert to rust.
631        if self.is_ascii() {
632            BytesView(self.0.borrow())
633        } else {
634            self.ensure_rust_string();
635            BytesView(self.0.borrow())
636        }
637    }
638
639    /// Tests if there are only ascii lowercase characters. Does not include special characters.
640    pub fn is_ascii_lowercase(&self) -> bool {
641        match self.encoded_bytes() {
642            EncodedBytes::Latin1(items) => items
643                .iter()
644                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(c)),
645            EncodedBytes::Utf8(s) => s
646                .iter()
647                .map(|c| c.to_u8().unwrap_or(ASCII_LOWERCASE_A - 1))
648                .all(|c| (ASCII_LOWERCASE_A..=ASCII_LOWERCASE_Z).contains(&c)),
649        }
650    }
651
652    /// Is the string only ascii characters
653    pub fn is_ascii(&self) -> bool {
654        self.encoded_bytes().bytes().is_ascii()
655    }
656
657    /// Returns true if the slice only contains bytes that are safe to use in cookie strings.
658    /// <https://www.ietf.org/archive/id/draft-ietf-httpbis-rfc6265bis-15.html#section-5.6-6>
659    /// Not using ServoCookie::is_valid_name_or_value to prevent dependency on the net crate.
660    pub fn is_valid_for_cookie(&self) -> bool {
661        match self.encoded_bytes() {
662            EncodedBytes::Latin1(items) | EncodedBytes::Utf8(items) => !items
663                .iter()
664                .any(|c| *c == 0x7f || (*c <= 0x1f && *c != 0x09)),
665        }
666    }
667
668    /// Call the callback with a `&str` reference of the string stored in this [`DOMString`]. Note
669    /// that if the [`DOMString`] cannot be interpreted as a Rust string a conversion will be done.
670    fn with_str_reference<Result>(&self, callback: fn(&str) -> Result) -> Result {
671        match self.encoded_bytes() {
672            // If the Latin1 string is all ASCII bytes, then it is safe to interpret it as UTF-8.
673            EncodedBytes::Latin1(latin1_bytes) => {
674                if latin1_bytes.iter().all(|character| character.is_ascii()) {
675                    // SAFETY: All characters are ASCII, so it is safe to interpret this string as
676                    // UTF-8.
677                    return callback(unsafe { str::from_utf8_unchecked(&latin1_bytes) });
678                }
679            },
680            EncodedBytes::Utf8(utf8_bytes) => {
681                // SAFETY: These are the bytes of a UTF-8 string already, so they can be interpreted
682                // as UTF-8.
683                return callback(unsafe { str::from_utf8_unchecked(&utf8_bytes) });
684            },
685        };
686        callback(self.str().deref())
687    }
688}
689
690/// <https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values>
691pub fn parse_floating_point_number(input: &str) -> Option<f64> {
692    // Steps 15-16 are telling us things about IEEE rounding modes
693    // for floating-point significands; this code assumes the Rust
694    // compiler already matches them in any cases where
695    // that actually matters. They are not
696    // related to f64::round(), which is for rounding to integers.
697    input.trim().parse::<f64>().ok().filter(|value| {
698        // A valid number is the same as what rust considers to be valid,
699        // except for +1., NaN, and Infinity.
700        !(value.is_infinite() || value.is_nan() || input.ends_with('.') || input.starts_with('+'))
701    })
702}
703
704pub struct BytesView<'a>(Ref<'a, DOMStringType>);
705
706impl Deref for BytesView<'_> {
707    type Target = [u8];
708
709    fn deref(&self) -> &Self::Target {
710        // This does the correct thing by the construction of BytesView in `DOMString::as_bytes`.
711        self.0.as_raw_bytes()
712    }
713}
714
715impl Ord for DOMString {
716    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
717        self.str().cmp(&other.str())
718    }
719}
720
721impl PartialOrd for DOMString {
722    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
723        self.str().partial_cmp(&other.str())
724    }
725}
726
727impl Extend<char> for DOMString {
728    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
729        self.0.borrow_mut().ensure_rust_string().extend(iter)
730    }
731}
732
733impl ToJSValConvertible for DOMString {
734    unsafe fn to_jsval(&self, cx: *mut JSContext, mut rval: MutableHandleValue) {
735        let val = self.0.borrow();
736        match *val {
737            DOMStringType::Rust(ref s) => unsafe {
738                s.to_jsval(cx, rval);
739            },
740            DOMStringType::JSString(ref rooted_traceable_box) => unsafe {
741                rval.set(StringValue(&*rooted_traceable_box.get()));
742            },
743            #[cfg(test)]
744            DOMStringType::Latin1Vec(ref items) => {
745                let mut v = vec![0; items.len() * 2];
746                let real_size =
747                    encoding_rs::mem::convert_latin1_to_utf8(items.as_slice(), v.as_mut_slice());
748                v.truncate(real_size);
749
750                String::from_utf8(v)
751                    .expect("Error in constructin test string")
752                    .to_jsval(cx, rval);
753            },
754        };
755    }
756}
757
758impl std::hash::Hash for DOMString {
759    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
760        self.str().hash(state);
761    }
762}
763
764impl std::fmt::Display for DOMString {
765    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
766        fmt::Display::fmt(self.str().deref(), f)
767    }
768}
769
770impl std::cmp::PartialEq<str> for DOMString {
771    fn eq(&self, other: &str) -> bool {
772        if other.is_ascii() {
773            *other.as_bytes() == *self.encoded_bytes().bytes()
774        } else {
775            self.str().deref() == other
776        }
777    }
778}
779
780impl std::cmp::PartialEq<&str> for DOMString {
781    fn eq(&self, other: &&str) -> bool {
782        self.eq(*other)
783    }
784}
785
786impl std::cmp::PartialEq<String> for DOMString {
787    fn eq(&self, other: &String) -> bool {
788        self.eq(other.as_str())
789    }
790}
791
792impl std::cmp::PartialEq<DOMString> for String {
793    fn eq(&self, other: &DOMString) -> bool {
794        other.eq(self)
795    }
796}
797
798impl std::cmp::PartialEq<DOMString> for str {
799    fn eq(&self, other: &DOMString) -> bool {
800        other.eq(self)
801    }
802}
803
804impl std::cmp::PartialEq for DOMString {
805    fn eq(&self, other: &DOMString) -> bool {
806        let result = match (self.encoded_bytes(), other.encoded_bytes()) {
807            (EncodedBytes::Latin1(bytes), EncodedBytes::Latin1(other_bytes)) => {
808                Some(*bytes == *other_bytes)
809            },
810            (EncodedBytes::Latin1(bytes), EncodedBytes::Utf8(other_bytes))
811                if other_bytes.is_ascii() =>
812            {
813                Some(*bytes == *other_bytes)
814            },
815            (EncodedBytes::Utf8(bytes), EncodedBytes::Latin1(other_bytes)) if bytes.is_ascii() => {
816                Some(*bytes == *other_bytes)
817            },
818            (EncodedBytes::Utf8(bytes), EncodedBytes::Utf8(other_bytes)) => {
819                Some(*bytes == *other_bytes)
820            },
821            _ => None,
822        };
823
824        if let Some(eq_result) = result {
825            return eq_result;
826        }
827
828        *self.str() == *other.str()
829    }
830}
831
832impl std::cmp::Eq for DOMString {}
833
834impl From<std::string::String> for DOMString {
835    fn from(string: String) -> Self {
836        DOMString(RefCell::new(DOMStringType::Rust(string)))
837    }
838}
839
840impl From<&str> for DOMString {
841    fn from(string: &str) -> Self {
842        String::from(string).into()
843    }
844}
845
846impl From<DOMString> for LocalName {
847    fn from(dom_string: DOMString) -> LocalName {
848        dom_string.with_str_reference(|string| LocalName::from(string))
849    }
850}
851
852impl From<&DOMString> for LocalName {
853    fn from(dom_string: &DOMString) -> LocalName {
854        dom_string.with_str_reference(|string| LocalName::from(string))
855    }
856}
857
858impl From<DOMString> for Namespace {
859    fn from(dom_string: DOMString) -> Namespace {
860        dom_string.with_str_reference(|string| Namespace::from(string))
861    }
862}
863
864impl From<DOMString> for Atom {
865    fn from(dom_string: DOMString) -> Atom {
866        dom_string.with_str_reference(|string| Atom::from(string))
867    }
868}
869
870impl From<DOMString> for String {
871    fn from(val: DOMString) -> Self {
872        val.str().to_owned()
873    }
874}
875
876impl From<DOMString> for Vec<u8> {
877    fn from(value: DOMString) -> Self {
878        value.str().as_bytes().to_vec()
879    }
880}
881
882impl From<Cow<'_, str>> for DOMString {
883    fn from(value: Cow<'_, str>) -> Self {
884        DOMString(RefCell::new(DOMStringType::Rust(value.into_owned())))
885    }
886}
887
888#[macro_export]
889macro_rules! match_domstring_ascii_inner {
890    ($variant: expr, $input: expr, $ascii_literal: literal => $then: expr, $($rest:tt)*) => {
891        if {
892            debug_assert!(($ascii_literal).is_ascii());
893            $ascii_literal.as_bytes()
894        } == $input.bytes() {
895          $then
896        } else {
897            $crate::match_domstring_ascii_inner!($variant, $input, $($rest)*)
898        }
899
900    };
901    ($variant: expr, $input: expr, $p: pat => $then: expr,) => {
902        match $input {
903            $p => $then
904        }
905    }
906}
907
908/// Use this to match &str against lazydomstring efficiently.
909/// You are only allowed to match ascii strings otherwise this macro will
910/// lead to wrong results.
911/// ```ignore
912/// let s = DOMString::from("test");
913/// let value = match_domstring!(s,
914/// "test1" => 1,
915/// "test2" => 2,
916/// "test" => 3,
917/// _ => 4,
918/// );
919/// assert_eq!(value, 3);
920/// ```
921///
922/// The `RefCell` inside `DOMString` is borrowed for the duration of the `match`,
923/// so the string cannot be accessed again inside a `match` arm.
924#[macro_export]
925macro_rules! match_domstring_ascii {
926    ($input:expr, $($tail:tt)*) => {
927        {
928            use $crate::domstring::EncodedBytes;
929
930            let encoded_bytes = $input.encoded_bytes();
931            match encoded_bytes {
932                EncodedBytes::Latin1(_) => {
933                    $crate::match_domstring_ascii_inner!(EncodedBytes::Latin1, encoded_bytes, $($tail)*)
934                }
935                EncodedBytes::Utf8(_) => {
936                    $crate::match_domstring_ascii_inner!(EncodedBytes::Utf8, encoded_bytes, $($tail)*)
937                }
938
939            }
940        }
941    };
942}
943
944#[cfg(test)]
945mod tests {
946    use super::*;
947
948    const LATIN1_PILLCROW: u8 = 0xB6;
949    const UTF8_PILLCROW: [u8; 2] = [194, 182];
950    const LATIN1_POWER2: u8 = 0xB2;
951
952    fn from_latin1(l1vec: Vec<u8>) -> DOMString {
953        DOMString(RefCell::new(DOMStringType::Latin1Vec(l1vec)))
954    }
955
956    #[test]
957    fn string_functions() {
958        let s = DOMString::from("AbBcC❤&%$#");
959        let s_copy = s.clone();
960        assert_eq!(s.to_ascii_lowercase(), "abbcc❤&%$#");
961        assert_eq!(s, s_copy);
962        assert_eq!(s.len(), 12);
963        assert_eq!(s_copy.len(), 12);
964        assert!(s.starts_with('A'));
965        let s2 = DOMString::from("");
966        assert!(s2.is_empty());
967    }
968
969    #[test]
970    fn string_functions_latin1() {
971        {
972            let s = from_latin1(vec![
973                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
974            ]);
975            assert_eq!(s.to_ascii_lowercase(), "abbcc&%$#²");
976        }
977        {
978            let s = from_latin1(vec![b'A', b'b', b'B', b'c', b'C']);
979            assert_eq!(s.to_ascii_lowercase(), "abbcc");
980        }
981        {
982            let s = from_latin1(vec![
983                b'A', b'b', b'B', b'c', b'C', b'&', b'%', b'$', b'#', 0xB2,
984            ]);
985            assert_eq!(s.len(), 11);
986            assert!(s.starts_with('A'));
987        }
988        {
989            let s = from_latin1(vec![]);
990            assert!(s.is_empty());
991        }
992    }
993
994    #[test]
995    fn test_length() {
996        let s1 = from_latin1(vec![
997            0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
998            0xAE, 0xAF,
999        ]);
1000        let s2 = from_latin1(vec![
1001            0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD,
1002            0xBE, 0xBF,
1003        ]);
1004        let s3 = from_latin1(vec![
1005            0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD,
1006            0xCE, 0xCF,
1007        ]);
1008        let s4 = from_latin1(vec![
1009            0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
1010            0xDE, 0xDF,
1011        ]);
1012        let s5 = from_latin1(vec![
1013            0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
1014            0xEE, 0xEF,
1015        ]);
1016        let s6 = from_latin1(vec![
1017            0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD,
1018            0xFE, 0xFF,
1019        ]);
1020
1021        let s1_utf8 = String::from("\u{00A0}¡¢£¤¥¦§¨©ª«¬\u{00AD}®¯");
1022        let s2_utf8 = String::from("°±²³´µ¶·¸¹º»¼½¾¿");
1023        let s3_utf8 = String::from("ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ");
1024        let s4_utf8 = String::from("ÐÑÒÓÔÕÖרÙÚÛÜÝÞß");
1025        let s5_utf8 = String::from("àáâãäåæçèéêëìíîï");
1026        let s6_utf8 = String::from("ðñòóôõö÷øùúûüýþÿ");
1027
1028        assert_eq!(s1.len(), s1_utf8.len());
1029        assert_eq!(s2.len(), s2_utf8.len());
1030        assert_eq!(s3.len(), s3_utf8.len());
1031        assert_eq!(s4.len(), s4_utf8.len());
1032        assert_eq!(s5.len(), s5_utf8.len());
1033        assert_eq!(s6.len(), s6_utf8.len());
1034
1035        s1.ensure_rust_string();
1036        s2.ensure_rust_string();
1037        s3.ensure_rust_string();
1038        s4.ensure_rust_string();
1039        s5.ensure_rust_string();
1040        s6.ensure_rust_string();
1041        assert_eq!(s1.len(), s1_utf8.len());
1042        assert_eq!(s2.len(), s2_utf8.len());
1043        assert_eq!(s3.len(), s3_utf8.len());
1044        assert_eq!(s4.len(), s4_utf8.len());
1045        assert_eq!(s5.len(), s5_utf8.len());
1046        assert_eq!(s6.len(), s6_utf8.len());
1047    }
1048
1049    #[test]
1050    fn test_convert() {
1051        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1052        s.ensure_rust_string();
1053        assert_eq!(&*s.str(), "abc%$");
1054    }
1055
1056    #[test]
1057    fn partial_eq() {
1058        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1059        let string = String::from("abc%$");
1060        let s2 = DOMString::from(string.clone());
1061        assert_eq!(s, s2);
1062        assert_eq!(s, string);
1063    }
1064
1065    #[test]
1066    fn encoded_latin1_bytes() {
1067        let original_latin1_bytes = vec![b'a', b'b', b'c', b'%', b'$', 0xB2];
1068        let dom_string = from_latin1(original_latin1_bytes.clone());
1069        let string_latin1_bytes = match dom_string.encoded_bytes() {
1070            EncodedBytes::Latin1(bytes) => bytes,
1071            _ => unreachable!("Expected Latin1 encoded bytes"),
1072        };
1073        assert_eq!(*original_latin1_bytes, *string_latin1_bytes);
1074    }
1075
1076    #[test]
1077    fn testing_stringview() {
1078        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1079
1080        assert_eq!(
1081            s.str().chars().collect::<Vec<char>>(),
1082            vec!['a', 'b', 'c', '%', '$', '²']
1083        );
1084        assert_eq!(s.str().as_bytes(), String::from("abc%$²").as_bytes());
1085    }
1086
1087    // We need to be extra careful here as two strings that have different
1088    // representation need to have the same hash.
1089    // Additionally, the interior mutability is only used for the conversion
1090    // which is forced by Hash. Hence, it is safe to have this interior mutability.
1091    #[test]
1092    fn test_hash() {
1093        use std::hash::{DefaultHasher, Hash, Hasher};
1094        fn hash_value(d: &DOMString) -> u64 {
1095            let mut hasher = DefaultHasher::new();
1096            d.hash(&mut hasher);
1097            hasher.finish()
1098        }
1099
1100        let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1101        let s_converted = from_latin1(vec![b'a', b'b', b'c', b'%', b'$', 0xB2]);
1102        s_converted.ensure_rust_string();
1103        let s2 = DOMString::from("abc%$²");
1104
1105        let hash_s = hash_value(&s);
1106        let hash_s_converted = hash_value(&s_converted);
1107        let hash_s2 = hash_value(&s2);
1108
1109        assert_eq!(hash_s, hash_s2);
1110        assert_eq!(hash_s, hash_s_converted);
1111    }
1112
1113    // Testing match_lazydomstring if it executes the statements in the match correctly
1114    #[test]
1115    fn test_match_executing() {
1116        // executing
1117        {
1118            let s = from_latin1(vec![b'a', b'b', b'c']);
1119            match_domstring_ascii!( s,
1120                "abc" => assert!(true),
1121                "bcd" => assert!(false),
1122                _ =>  (),
1123            );
1124        }
1125
1126        {
1127            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1128            match_domstring_ascii!( s,
1129                "abc/" => assert!(true),
1130                "bcd" => assert!(false),
1131                _ =>  (),
1132            );
1133        }
1134
1135        {
1136            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1137            match_domstring_ascii!( s,
1138                "bcd" => assert!(false),
1139                "abc%$" => assert!(true),
1140                _ => (),
1141            );
1142        }
1143
1144        {
1145            let s = DOMString::from("abcde");
1146            match_domstring_ascii!( s,
1147                "abc" => assert!(false),
1148                "bcd" => assert!(false),
1149                _ => assert!(true),
1150            );
1151        }
1152        {
1153            let s = DOMString::from("abc%$");
1154            match_domstring_ascii!( s,
1155                "bcd" => assert!(false),
1156                "abc%$" => assert!(true),
1157                _ =>  (),
1158            );
1159        }
1160        {
1161            let s = from_latin1(vec![b'a', b'b', b'c']);
1162            match_domstring_ascii!( s,
1163                "abcdd" => assert!(false),
1164                "bcd" => assert!(false),
1165                _ => (),
1166            );
1167        }
1168    }
1169
1170    // Testing match_lazydomstring if it evaluates to the correct expression
1171    #[test]
1172    fn test_match_returning_result() {
1173        {
1174            let s = from_latin1(vec![b'a', b'b', b'c']);
1175            let res = match_domstring_ascii!( s,
1176                "abc" => true,
1177                "bcd" => false,
1178                _ => false,
1179            );
1180            assert_eq!(res, true);
1181        }
1182        {
1183            let s = from_latin1(vec![b'a', b'b', b'c', b'/']);
1184            let res = match_domstring_ascii!( s,
1185                "abc/" => true,
1186                "bcd" => false,
1187                _ => false,
1188            );
1189            assert_eq!(res, true);
1190        }
1191        {
1192            let s = from_latin1(vec![b'a', b'b', b'c', b'%', b'$']);
1193            let res = match_domstring_ascii!( s,
1194                "bcd" => false,
1195                "abc%$" => true,
1196                _ => false,
1197            );
1198            assert_eq!(res, true);
1199        }
1200
1201        {
1202            let s = DOMString::from("abcde");
1203            let res = match_domstring_ascii!( s,
1204                "abc" => false,
1205                "bcd" => false,
1206                _ => true,
1207            );
1208            assert_eq!(res, true);
1209        }
1210        {
1211            let s = DOMString::from("abc%$");
1212            let res = match_domstring_ascii!( s,
1213                "bcd" => false,
1214                "abc%$" => true,
1215                _ => false,
1216            );
1217            assert_eq!(res, true);
1218        }
1219        {
1220            let s = from_latin1(vec![b'a', b'b', b'c']);
1221            let res = match_domstring_ascii!( s,
1222                "abcdd" => false,
1223                "bcd" => false,
1224                _ => true,
1225            );
1226            assert_eq!(res, true);
1227        }
1228    }
1229
1230    #[test]
1231    #[should_panic]
1232    fn test_match_panic() {
1233        let s = DOMString::from("abcd");
1234        let _res = match_domstring_ascii!(s,
1235            "❤" => true,
1236            _ => false,);
1237    }
1238
1239    #[test]
1240    #[should_panic]
1241    fn test_match_panic2() {
1242        let s = DOMString::from("abcd");
1243        let _res = match_domstring_ascii!(s,
1244            "abc" => false,
1245            "❤" => true,
1246            _ => false,
1247        );
1248    }
1249
1250    #[test]
1251    fn test_strip_whitespace() {
1252        {
1253            let mut s = from_latin1(vec![
1254                b' ', b' ', b' ', b'\n', b' ', b'a', b'b', b'c', b'%', b'$', 0xB2, b' ',
1255            ]);
1256
1257            s.strip_leading_and_trailing_ascii_whitespace();
1258            s.ensure_rust_string();
1259            assert_eq!(&*s.str(), "abc%$²");
1260        }
1261        {
1262            let mut s = DOMString::from("   \n  abc%$ ");
1263
1264            s.strip_leading_and_trailing_ascii_whitespace();
1265            s.ensure_rust_string();
1266            assert_eq!(&*s.str(), "abc%$");
1267        }
1268    }
1269
1270    // https://infra.spec.whatwg.org/#ascii-whitespace
1271    #[test]
1272    fn contains_html_space_characters() {
1273        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_TAB, b'a', b'a']); // TAB
1274        assert!(s.contains_html_space_characters());
1275        s.ensure_rust_string();
1276        assert!(s.contains_html_space_characters());
1277
1278        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_NEWLINE, b'a', b'a']); // NEWLINE
1279        assert!(s.contains_html_space_characters());
1280        s.ensure_rust_string();
1281        assert!(s.contains_html_space_characters());
1282
1283        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_FORMFEED, b'a', b'a']); // FF
1284        assert!(s.contains_html_space_characters());
1285        s.ensure_rust_string();
1286        assert!(s.contains_html_space_characters());
1287
1288        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_CR, b'a', b'a']); // Carriage Return
1289        assert!(s.contains_html_space_characters());
1290        s.ensure_rust_string();
1291        assert!(s.contains_html_space_characters());
1292
1293        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']); // SPACE
1294        assert!(s.contains_html_space_characters());
1295        s.ensure_rust_string();
1296        assert!(s.contains_html_space_characters());
1297
1298        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'a']);
1299        assert!(!s.contains_html_space_characters());
1300        s.ensure_rust_string();
1301        assert!(!s.contains_html_space_characters());
1302    }
1303
1304    #[test]
1305    fn atom() {
1306        let s = from_latin1(vec![b'a', b'a', b'a', 0x20, b'a', b'a']);
1307        let atom1 = Atom::from(s);
1308        let s2 = DOMString::from("aaa aa");
1309        let atom2 = Atom::from(s2);
1310        assert_eq!(atom1, atom2);
1311        let s3 = from_latin1(vec![b'a', b'a', b'a', 0xB2, b'a', b'a']);
1312        let atom3 = Atom::from(s3);
1313        assert_ne!(atom1, atom3);
1314    }
1315
1316    #[test]
1317    fn namespace() {
1318        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1319        let atom1 = Namespace::from(s);
1320        let s2 = DOMString::from("aaa aa");
1321        let atom2 = Namespace::from(s2);
1322        assert_eq!(atom1, atom2);
1323        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1324        let atom3 = Namespace::from(s3);
1325        assert_ne!(atom1, atom3);
1326    }
1327
1328    #[test]
1329    fn localname() {
1330        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1331        let atom1 = LocalName::from(s);
1332        let s2 = DOMString::from("aaa aa");
1333        let atom2 = LocalName::from(s2);
1334        assert_eq!(atom1, atom2);
1335        let s3 = from_latin1(vec![b'a', b'a', b'a', LATIN1_POWER2, b'a', b'a']);
1336        let atom3 = LocalName::from(s3);
1337        assert_ne!(atom1, atom3);
1338    }
1339
1340    #[test]
1341    fn is_ascii_lowercase() {
1342        let s = from_latin1(vec![b'a', b'a', b'a', ASCII_SPACE, b'a', b'a']);
1343        assert!(!s.is_ascii_lowercase());
1344        let s = from_latin1(vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a']);
1345        assert!(!s.is_ascii_lowercase());
1346        let s = from_latin1(vec![b'a', b'a', b'a', b'a', b'z']);
1347        assert!(s.is_ascii_lowercase());
1348        let s = from_latin1(vec![b'`', b'a', b'a', b'a', b'z']);
1349        assert!(!s.is_ascii_lowercase());
1350        let s = DOMString::from("`aaaz");
1351        assert!(!s.is_ascii_lowercase());
1352        let s = DOMString::from("aaaz");
1353        assert!(s.is_ascii_lowercase());
1354    }
1355
1356    #[test]
1357    fn test_as_bytes() {
1358        const ASCII_SMALL_A: u8 = b'a';
1359        const ASCII_SMALL_Z: u8 = b'z';
1360
1361        let v1 = vec![b'a', b'a', b'a', LATIN1_PILLCROW, b'a', b'a'];
1362        let s = from_latin1(v1.clone());
1363        assert_eq!(
1364            *s.as_bytes(),
1365            [
1366                ASCII_SMALL_A,
1367                ASCII_SMALL_A,
1368                ASCII_SMALL_A,
1369                UTF8_PILLCROW[0],
1370                UTF8_PILLCROW[1],
1371                ASCII_SMALL_A,
1372                ASCII_SMALL_A
1373            ]
1374        );
1375
1376        let v2 = vec![b'a', b'a', b'a', b'a', b'z'];
1377        let s = from_latin1(v2.clone());
1378        assert_eq!(
1379            *s.as_bytes(),
1380            [
1381                ASCII_SMALL_A,
1382                ASCII_SMALL_A,
1383                ASCII_SMALL_A,
1384                ASCII_SMALL_A,
1385                ASCII_SMALL_Z
1386            ]
1387        );
1388
1389        let str = "abc%$²".to_owned();
1390        let s = DOMString::from(str.clone());
1391        assert_eq!(&*s.as_bytes(), str.as_bytes());
1392        let str = "AbBcC❤&%$#".to_owned();
1393        let s = DOMString::from(str.clone());
1394        assert_eq!(&*s.as_bytes(), str.as_bytes());
1395    }
1396}