boa_engine/builtins/escape/
mod.rs

1//! Boa's implementation of ECMAScript's string escaping functions.
2//!
3//! The `escape()` function replaces all characters with escape sequences, with the exception of ASCII
4//! word characters (A–Z, a–z, 0–9, _) and @*_+-./.
5//!
6//! The `unescape()` function replaces any escape sequence with the character that it represents.
7//!
8//! More information:
9//!  - [ECMAScript reference][spec]
10//!
11//! [spec]: https://tc39.es/ecma262/#sec-additional-properties-of-the-global-object
12
13use crate::{
14    Context, JsArgs, JsObject, JsResult, JsString, JsValue, context::intrinsics::Intrinsics,
15    js_string, realm::Realm, string::StaticJsStrings,
16};
17
18use super::{BuiltInBuilder, BuiltInObject, IntrinsicObject};
19
20/// The `escape` function
21#[derive(Debug, Clone, Copy)]
22pub(crate) struct Escape;
23
24impl IntrinsicObject for Escape {
25    fn init(realm: &Realm) {
26        BuiltInBuilder::callable_with_intrinsic::<Self>(realm, escape)
27            .name(Self::NAME)
28            .length(1)
29            .build();
30    }
31    fn get(intrinsics: &Intrinsics) -> JsObject {
32        intrinsics.objects().escape().into()
33    }
34}
35
36impl BuiltInObject for Escape {
37    const NAME: JsString = StaticJsStrings::ESCAPE;
38}
39
40/// Builtin JavaScript `escape ( string )` function.
41fn escape(_: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
42    /// Returns `true` if the codepoint `cp` is part of the `unescapedSet`.
43    fn is_unescaped(cp: u16) -> bool {
44        let Ok(cp) = TryInto::<u8>::try_into(cp) else {
45            return false;
46        };
47
48        // 4. Let unescapedSet be the string-concatenation of the ASCII word characters and "@*+-./".
49        cp.is_ascii_alphanumeric() || [b'_', b'@', b'*', b'+', b'-', b'.', b'/'].contains(&cp)
50    }
51
52    // 1. Set string to ? ToString(string).
53    let string = args.get_or_undefined(0).to_string(context)?;
54
55    // 3. Let R be the empty String.
56    let mut vec = Vec::with_capacity(string.len());
57
58    // 2. Let len be the length of string.
59    // 5. Let k be 0.
60    // 6. Repeat, while k < len,
61    //     a. Let C be the code unit at index k within string.
62    for cp in &string {
63        // b. If unescapedSet contains C, then
64        if is_unescaped(cp) {
65            // i. Let S be C.
66            vec.push(cp);
67            continue;
68        }
69        // c. Else,
70        //     i. Let n be the numeric value of C.
71        //     ii. If n < 256, then
72        let c = if cp < 256 {
73            //     1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
74            //     2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
75            format!("%{cp:02X}")
76        }
77        //     iii. Else,
78        else {
79            //     1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
80            //     2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
81            format!("%u{cp:04X}")
82        };
83        // d. Set R to the string-concatenation of R and S.
84        // e. Set k to k + 1.
85        vec.extend(c.encode_utf16());
86    }
87
88    // 7. Return R.
89    Ok(js_string!(&vec[..]).into())
90}
91
92/// The `unescape` function
93#[derive(Debug, Clone, Copy)]
94pub(crate) struct Unescape;
95
96impl IntrinsicObject for Unescape {
97    fn init(realm: &Realm) {
98        BuiltInBuilder::callable_with_intrinsic::<Self>(realm, unescape)
99            .name(Self::NAME)
100            .length(1)
101            .build();
102    }
103    fn get(intrinsics: &Intrinsics) -> JsObject {
104        intrinsics.objects().unescape().into()
105    }
106}
107
108impl BuiltInObject for Unescape {
109    const NAME: JsString = StaticJsStrings::UNESCAPE;
110}
111
112/// Builtin JavaScript `unescape ( string )` function.
113fn unescape(_: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
114    /// Converts a char `cp` to its corresponding hex digit value.
115    fn to_hex_digit(cp: u16) -> Option<u16> {
116        char::from_u32(u32::from(cp))
117            .and_then(|c| c.to_digit(16))
118            .and_then(|d| d.try_into().ok())
119    }
120
121    // 1. Set string to ? ToString(string).
122    let string = args.get_or_undefined(0).to_string(context)?;
123
124    // 3. Let R be the empty String.
125    let mut vec = Vec::with_capacity(string.len());
126
127    let mut codepoints = <PeekableN<_, 6>>::new(string.iter());
128
129    // 2. Let len be the length of string.
130    // 4. Let k be 0.
131    // 5. Repeat, while k < len,
132    loop {
133        // a. Let C be the code unit at index k within string.
134        let Some(cp) = codepoints.next() else {
135            break;
136        };
137
138        // b. If C is the code unit 0x0025 (PERCENT SIGN), then
139        if cp != u16::from(b'%') {
140            vec.push(cp);
141            continue;
142        }
143
144        //     i. Let hexDigits be the empty String.
145        //     ii. Let optionalAdvance be 0.
146        // TODO: Try blocks :(
147        let Some(unescaped_cp) = (|| match *codepoints.peek_n(5) {
148            // iii. If k + 5 < len and the code unit at index k + 1 within string is the code unit
149            // 0x0075 (LATIN SMALL LETTER U), then
150            [u, n1, n2, n3, n4] if u == u16::from(b'u') => {
151                // 1. Set hexDigits to the substring of string from k + 2 to k + 6.
152                // 2. Set optionalAdvance to 5.
153                let n1 = to_hex_digit(n1)?;
154                let n2 = to_hex_digit(n2)?;
155                let n3 = to_hex_digit(n3)?;
156                let n4 = to_hex_digit(n4)?;
157
158                // TODO: https://github.com/rust-lang/rust/issues/77404
159                for _ in 0..5 {
160                    codepoints.next();
161                }
162
163                Some((n1 << 12) + (n2 << 8) + (n3 << 4) + n4)
164            }
165            // iv. Else if k + 3 ≤ len, then
166            [n1, n2, ..] => {
167                // 1. Set hexDigits to the substring of string from k + 1 to k + 3.
168                // 2. Set optionalAdvance to 2.
169                let n1 = to_hex_digit(n1)?;
170                let n2 = to_hex_digit(n2)?;
171
172                // TODO: https://github.com/rust-lang/rust/issues/77404
173                for _ in 0..2 {
174                    codepoints.next();
175                }
176
177                Some((n1 << 4) + n2)
178            }
179            _ => None,
180        })() else {
181            vec.push(u16::from(b'%'));
182            continue;
183        };
184
185        //     v. Let parseResult be ParseText(StringToCodePoints(hexDigits), HexDigits[~Sep]).
186        //     vi. If parseResult is a Parse Node, then
187        //         1. Let n be the MV of parseResult.
188        //         2. Set C to the code unit whose numeric value is n.
189        //         3. Set k to k + optionalAdvance.
190        // c. Set R to the string-concatenation of R and C.
191        // d. Set k to k + 1.
192        vec.push(unescaped_cp);
193    }
194    // 6. Return R.
195    Ok(js_string!(&vec[..]).into())
196}
197
198/// An iterator that can peek `N` items.
199struct PeekableN<I, const N: usize>
200where
201    I: Iterator,
202{
203    iterator: I,
204    buffer: [I::Item; N],
205    buffered_end: usize,
206}
207
208impl<I, const N: usize> PeekableN<I, N>
209where
210    I: Iterator,
211    I::Item: Default + Copy,
212{
213    /// Creates a new `PeekableN`.
214    fn new(iterator: I) -> Self {
215        Self {
216            iterator,
217            buffer: [I::Item::default(); N],
218            buffered_end: 0,
219        }
220    }
221
222    /// Peeks `n` items from the iterator.
223    fn peek_n(&mut self, count: usize) -> &[I::Item] {
224        if count <= self.buffered_end {
225            return &self.buffer[..count];
226        }
227        for _ in 0..(count - self.buffered_end) {
228            let Some(next) = self.iterator.next() else {
229                return &self.buffer[..self.buffered_end];
230            };
231            self.buffer[self.buffered_end] = next;
232            self.buffered_end += 1;
233        }
234
235        &self.buffer[..count]
236    }
237}
238
239impl<I, const N: usize> Iterator for PeekableN<I, N>
240where
241    I: Iterator,
242    I::Item: Copy,
243{
244    type Item = I::Item;
245
246    fn next(&mut self) -> Option<Self::Item> {
247        if self.buffered_end > 0 {
248            let item = self.buffer[0];
249            self.buffer.rotate_left(1);
250            self.buffered_end -= 1;
251            return Some(item);
252        }
253        self.iterator.next()
254    }
255}