boa_engine/builtins/escape/mod.rs
1//! Boa's implementation of ECMAScript's string escaping functions.
2//!
3//! The `escape()` function replaces all characters with escape sequences, with the exception of ASCII
4//! word characters (A–Z, a–z, 0–9, _) and @*_+-./.
5//!
6//! The `unescape()` function replaces any escape sequence with the character that it represents.
7//!
8//! More information:
9//! - [ECMAScript reference][spec]
10//!
11//! [spec]: https://tc39.es/ecma262/#sec-additional-properties-of-the-global-object
12
13use crate::{
14 Context, JsArgs, JsObject, JsResult, JsString, JsValue, context::intrinsics::Intrinsics,
15 js_string, realm::Realm, string::StaticJsStrings,
16};
17
18use super::{BuiltInBuilder, BuiltInObject, IntrinsicObject};
19
20/// The `escape` function
21#[derive(Debug, Clone, Copy)]
22pub(crate) struct Escape;
23
24impl IntrinsicObject for Escape {
25 fn init(realm: &Realm) {
26 BuiltInBuilder::callable_with_intrinsic::<Self>(realm, escape)
27 .name(Self::NAME)
28 .length(1)
29 .build();
30 }
31 fn get(intrinsics: &Intrinsics) -> JsObject {
32 intrinsics.objects().escape().into()
33 }
34}
35
36impl BuiltInObject for Escape {
37 const NAME: JsString = StaticJsStrings::ESCAPE;
38}
39
40/// Builtin JavaScript `escape ( string )` function.
41fn escape(_: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
42 /// Returns `true` if the codepoint `cp` is part of the `unescapedSet`.
43 fn is_unescaped(cp: u16) -> bool {
44 let Ok(cp) = TryInto::<u8>::try_into(cp) else {
45 return false;
46 };
47
48 // 4. Let unescapedSet be the string-concatenation of the ASCII word characters and "@*+-./".
49 cp.is_ascii_alphanumeric() || [b'_', b'@', b'*', b'+', b'-', b'.', b'/'].contains(&cp)
50 }
51
52 // 1. Set string to ? ToString(string).
53 let string = args.get_or_undefined(0).to_string(context)?;
54
55 // 3. Let R be the empty String.
56 let mut vec = Vec::with_capacity(string.len());
57
58 // 2. Let len be the length of string.
59 // 5. Let k be 0.
60 // 6. Repeat, while k < len,
61 // a. Let C be the code unit at index k within string.
62 for cp in &string {
63 // b. If unescapedSet contains C, then
64 if is_unescaped(cp) {
65 // i. Let S be C.
66 vec.push(cp);
67 continue;
68 }
69 // c. Else,
70 // i. Let n be the numeric value of C.
71 // ii. If n < 256, then
72 let c = if cp < 256 {
73 // 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
74 // 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
75 format!("%{cp:02X}")
76 }
77 // iii. Else,
78 else {
79 // 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
80 // 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
81 format!("%u{cp:04X}")
82 };
83 // d. Set R to the string-concatenation of R and S.
84 // e. Set k to k + 1.
85 vec.extend(c.encode_utf16());
86 }
87
88 // 7. Return R.
89 Ok(js_string!(&vec[..]).into())
90}
91
92/// The `unescape` function
93#[derive(Debug, Clone, Copy)]
94pub(crate) struct Unescape;
95
96impl IntrinsicObject for Unescape {
97 fn init(realm: &Realm) {
98 BuiltInBuilder::callable_with_intrinsic::<Self>(realm, unescape)
99 .name(Self::NAME)
100 .length(1)
101 .build();
102 }
103 fn get(intrinsics: &Intrinsics) -> JsObject {
104 intrinsics.objects().unescape().into()
105 }
106}
107
108impl BuiltInObject for Unescape {
109 const NAME: JsString = StaticJsStrings::UNESCAPE;
110}
111
112/// Builtin JavaScript `unescape ( string )` function.
113fn unescape(_: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
114 /// Converts a char `cp` to its corresponding hex digit value.
115 fn to_hex_digit(cp: u16) -> Option<u16> {
116 char::from_u32(u32::from(cp))
117 .and_then(|c| c.to_digit(16))
118 .and_then(|d| d.try_into().ok())
119 }
120
121 // 1. Set string to ? ToString(string).
122 let string = args.get_or_undefined(0).to_string(context)?;
123
124 // 3. Let R be the empty String.
125 let mut vec = Vec::with_capacity(string.len());
126
127 let mut codepoints = <PeekableN<_, 6>>::new(string.iter());
128
129 // 2. Let len be the length of string.
130 // 4. Let k be 0.
131 // 5. Repeat, while k < len,
132 loop {
133 // a. Let C be the code unit at index k within string.
134 let Some(cp) = codepoints.next() else {
135 break;
136 };
137
138 // b. If C is the code unit 0x0025 (PERCENT SIGN), then
139 if cp != u16::from(b'%') {
140 vec.push(cp);
141 continue;
142 }
143
144 // i. Let hexDigits be the empty String.
145 // ii. Let optionalAdvance be 0.
146 // TODO: Try blocks :(
147 let Some(unescaped_cp) = (|| match *codepoints.peek_n(5) {
148 // iii. If k + 5 < len and the code unit at index k + 1 within string is the code unit
149 // 0x0075 (LATIN SMALL LETTER U), then
150 [u, n1, n2, n3, n4] if u == u16::from(b'u') => {
151 // 1. Set hexDigits to the substring of string from k + 2 to k + 6.
152 // 2. Set optionalAdvance to 5.
153 let n1 = to_hex_digit(n1)?;
154 let n2 = to_hex_digit(n2)?;
155 let n3 = to_hex_digit(n3)?;
156 let n4 = to_hex_digit(n4)?;
157
158 // TODO: https://github.com/rust-lang/rust/issues/77404
159 for _ in 0..5 {
160 codepoints.next();
161 }
162
163 Some((n1 << 12) + (n2 << 8) + (n3 << 4) + n4)
164 }
165 // iv. Else if k + 3 ≤ len, then
166 [n1, n2, ..] => {
167 // 1. Set hexDigits to the substring of string from k + 1 to k + 3.
168 // 2. Set optionalAdvance to 2.
169 let n1 = to_hex_digit(n1)?;
170 let n2 = to_hex_digit(n2)?;
171
172 // TODO: https://github.com/rust-lang/rust/issues/77404
173 for _ in 0..2 {
174 codepoints.next();
175 }
176
177 Some((n1 << 4) + n2)
178 }
179 _ => None,
180 })() else {
181 vec.push(u16::from(b'%'));
182 continue;
183 };
184
185 // v. Let parseResult be ParseText(StringToCodePoints(hexDigits), HexDigits[~Sep]).
186 // vi. If parseResult is a Parse Node, then
187 // 1. Let n be the MV of parseResult.
188 // 2. Set C to the code unit whose numeric value is n.
189 // 3. Set k to k + optionalAdvance.
190 // c. Set R to the string-concatenation of R and C.
191 // d. Set k to k + 1.
192 vec.push(unescaped_cp);
193 }
194 // 6. Return R.
195 Ok(js_string!(&vec[..]).into())
196}
197
198/// An iterator that can peek `N` items.
199struct PeekableN<I, const N: usize>
200where
201 I: Iterator,
202{
203 iterator: I,
204 buffer: [I::Item; N],
205 buffered_end: usize,
206}
207
208impl<I, const N: usize> PeekableN<I, N>
209where
210 I: Iterator,
211 I::Item: Default + Copy,
212{
213 /// Creates a new `PeekableN`.
214 fn new(iterator: I) -> Self {
215 Self {
216 iterator,
217 buffer: [I::Item::default(); N],
218 buffered_end: 0,
219 }
220 }
221
222 /// Peeks `n` items from the iterator.
223 fn peek_n(&mut self, count: usize) -> &[I::Item] {
224 if count <= self.buffered_end {
225 return &self.buffer[..count];
226 }
227 for _ in 0..(count - self.buffered_end) {
228 let Some(next) = self.iterator.next() else {
229 return &self.buffer[..self.buffered_end];
230 };
231 self.buffer[self.buffered_end] = next;
232 self.buffered_end += 1;
233 }
234
235 &self.buffer[..count]
236 }
237}
238
239impl<I, const N: usize> Iterator for PeekableN<I, N>
240where
241 I: Iterator,
242 I::Item: Copy,
243{
244 type Item = I::Item;
245
246 fn next(&mut self) -> Option<Self::Item> {
247 if self.buffered_end > 0 {
248 let item = self.buffer[0];
249 self.buffer.rotate_left(1);
250 self.buffered_end -= 1;
251 return Some(item);
252 }
253 self.iterator.next()
254 }
255}