boa_engine/builtins/regexp/mod.rs
1//! Boa's implementation of ECMAScript's global `RegExp` object.
2//!
3//! The `RegExp` object is used for matching text with a pattern.
4//!
5//! More information:
6//! - [ECMAScript reference][spec]
7//! - [MDN documentation][mdn]
8//!
9//! [spec]: https://tc39.es/ecma262/#sec-regexp-constructor
10//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
11
12use crate::{
13 Context, JsArgs, JsData, JsResult, JsString,
14 builtins::{BuiltInObject, array::Array, string},
15 context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
16 error::JsNativeError,
17 js_string,
18 object::{CONSTRUCTOR, JsObject, internal_methods::get_prototype_from_constructor},
19 property::Attribute,
20 realm::Realm,
21 string::{CodePoint, CommonJsStringBuilder, JsStrVariant, StaticJsStrings},
22 symbol::JsSymbol,
23 value::JsValue,
24};
25use boa_gc::{Finalize, Trace};
26use boa_macros::{js_str, utf16};
27use boa_parser::lexer::regex::RegExpFlags;
28use regress::{Flags, Range, Regex};
29use std::str::FromStr;
30
31use super::{BuiltInBuilder, BuiltInConstructor, IntrinsicObject};
32
33mod regexp_string_iterator;
34pub(crate) use regexp_string_iterator::RegExpStringIterator;
35#[cfg(test)]
36mod tests;
37
38/// The internal representation of a `RegExp` object.
39#[derive(Debug, Clone, Trace, Finalize, JsData)]
40// Safety: `RegExp` does not contain any objects which needs to be traced, so this is safe.
41#[boa_gc(unsafe_empty_trace)]
42pub struct RegExp {
43 /// Regex matcher.
44 matcher: Regex,
45 flags: RegExpFlags,
46 original_source: JsString,
47 original_flags: JsString,
48}
49
50impl RegExp {
51 /// Returns the original source string of the regex (e.g. `"regex-test"`).
52 pub(crate) fn original_source(&self) -> &JsString {
53 &self.original_source
54 }
55
56 /// Returns the original flags string of the regex (e.g. `"gi"`).
57 pub(crate) fn original_flags(&self) -> &JsString {
58 &self.original_flags
59 }
60}
61
62impl IntrinsicObject for RegExp {
63 fn init(realm: &Realm) {
64 let get_species = BuiltInBuilder::callable(realm, Self::get_species)
65 .name(js_string!("get [Symbol.species]"))
66 .build();
67
68 let flag_attributes = Attribute::CONFIGURABLE | Attribute::NON_ENUMERABLE;
69
70 let get_has_indices = BuiltInBuilder::callable(realm, Self::get_has_indices)
71 .name(js_string!("get hasIndices"))
72 .build();
73 let get_global = BuiltInBuilder::callable(realm, Self::get_global)
74 .name(js_string!("get global"))
75 .build();
76 let get_ignore_case = BuiltInBuilder::callable(realm, Self::get_ignore_case)
77 .name(js_string!("get ignoreCase"))
78 .build();
79 let get_multiline = BuiltInBuilder::callable(realm, Self::get_multiline)
80 .name(js_string!("get multiline"))
81 .build();
82 let get_dot_all = BuiltInBuilder::callable(realm, Self::get_dot_all)
83 .name(js_string!("get dotAll"))
84 .build();
85 let get_unicode = BuiltInBuilder::callable(realm, Self::get_unicode)
86 .name(js_string!("get unicode"))
87 .build();
88 let get_unicode_sets = BuiltInBuilder::callable(realm, Self::get_unicode_sets)
89 .name(js_string!("get unicodeSets"))
90 .build();
91 let get_sticky = BuiltInBuilder::callable(realm, Self::get_sticky)
92 .name(js_string!("get sticky"))
93 .build();
94 let get_flags = BuiltInBuilder::callable(realm, Self::get_flags)
95 .name(js_string!("get flags"))
96 .build();
97 let get_source = BuiltInBuilder::callable(realm, Self::get_source)
98 .name(js_string!("get source"))
99 .build();
100 let regexp = BuiltInBuilder::from_standard_constructor::<Self>(realm)
101 .static_method(Self::escape, js_string!("escape"), 1)
102 .static_accessor(
103 JsSymbol::species(),
104 Some(get_species),
105 None,
106 Attribute::CONFIGURABLE,
107 )
108 .property(js_string!("lastIndex"), 0, Attribute::all())
109 .method(Self::test, js_string!("test"), 1)
110 .method(Self::exec, js_string!("exec"), 1)
111 .method(Self::to_string, js_string!("toString"), 0)
112 .method(Self::r#match, JsSymbol::r#match(), 1)
113 .method(Self::match_all, JsSymbol::match_all(), 1)
114 .method(Self::replace, JsSymbol::replace(), 2)
115 .method(Self::search, JsSymbol::search(), 1)
116 .method(Self::split, JsSymbol::split(), 2)
117 .accessor(
118 js_string!("hasIndices"),
119 Some(get_has_indices),
120 None,
121 flag_attributes,
122 )
123 .accessor(
124 js_string!("global"),
125 Some(get_global),
126 None,
127 flag_attributes,
128 )
129 .accessor(
130 js_string!("ignoreCase"),
131 Some(get_ignore_case),
132 None,
133 flag_attributes,
134 )
135 .accessor(
136 js_string!("multiline"),
137 Some(get_multiline),
138 None,
139 flag_attributes,
140 )
141 .accessor(
142 js_string!("dotAll"),
143 Some(get_dot_all),
144 None,
145 flag_attributes,
146 )
147 .accessor(
148 js_string!("unicode"),
149 Some(get_unicode),
150 None,
151 flag_attributes,
152 )
153 .accessor(
154 js_string!("unicodeSets"),
155 Some(get_unicode_sets),
156 None,
157 flag_attributes,
158 )
159 .accessor(
160 js_string!("sticky"),
161 Some(get_sticky),
162 None,
163 flag_attributes,
164 )
165 .accessor(js_string!("flags"), Some(get_flags), None, flag_attributes)
166 .accessor(
167 js_string!("source"),
168 Some(get_source),
169 None,
170 flag_attributes,
171 );
172
173 #[cfg(feature = "annex-b")]
174 let regexp = regexp.method(Self::compile, js_string!("compile"), 2);
175
176 regexp.build();
177 }
178
179 fn get(intrinsics: &Intrinsics) -> JsObject {
180 Self::STANDARD_CONSTRUCTOR(intrinsics.constructors()).constructor()
181 }
182}
183
184impl BuiltInObject for RegExp {
185 const NAME: JsString = StaticJsStrings::REG_EXP;
186}
187
188impl BuiltInConstructor for RegExp {
189 const CONSTRUCTOR_ARGUMENTS: usize = 2;
190 const PROTOTYPE_STORAGE_SLOTS: usize = 30;
191 const CONSTRUCTOR_STORAGE_SLOTS: usize = 3;
192
193 const STANDARD_CONSTRUCTOR: fn(&StandardConstructors) -> &StandardConstructor =
194 StandardConstructors::regexp;
195
196 /// `22.2.3.1 RegExp ( pattern, flags )`
197 ///
198 /// More information:
199 /// - [ECMAScript reference][spec]
200 ///
201 /// [spec]: https://tc39.es/ecma262/#sec-regexp-pattern-flags
202 fn constructor(
203 new_target: &JsValue,
204 args: &[JsValue],
205 context: &mut Context,
206 ) -> JsResult<JsValue> {
207 let pattern = args.get_or_undefined(0);
208 let flags = args.get_or_undefined(1);
209
210 // 1. Let patternIsRegExp be ? IsRegExp(pattern).
211 let pattern_is_regexp = Self::is_reg_exp(pattern, context)?;
212
213 // 2. If NewTarget is undefined, then
214 // 3. Else, let newTarget be NewTarget.
215 if new_target.is_undefined() {
216 // a. Let newTarget be the active function object.
217 let new_target = context
218 .active_function_object()
219 .map_or(JsValue::undefined(), JsValue::new);
220
221 // b. If patternIsRegExp is true and flags is undefined, then
222 if let Some(pattern) = &pattern_is_regexp
223 && flags.is_undefined()
224 {
225 // i. Let patternConstructor be ? Get(pattern, "constructor").
226 let pattern_constructor = pattern.get(CONSTRUCTOR, context)?;
227
228 // ii. If SameValue(newTarget, patternConstructor) is true, return pattern.
229 if JsValue::same_value(&new_target, &pattern_constructor) {
230 return Ok(pattern.clone().into());
231 }
232 }
233 }
234
235 // 4. If pattern is an Object and pattern has a [[RegExpMatcher]] internal slot, then
236 let object = pattern.clone().as_object();
237 let (p, f) =
238 if let Some(pattern) = object.as_ref().and_then(JsObject::downcast_ref::<RegExp>) {
239 // a. Let P be pattern.[[OriginalSource]].
240 let p = pattern.original_source.clone().into();
241
242 // b. If flags is undefined, let F be pattern.[[OriginalFlags]].
243 let f = if flags.is_undefined() {
244 pattern.original_flags.clone().into()
245 // c. Else, let F be flags.
246 } else {
247 flags.clone()
248 };
249
250 (p, f)
251 } else if let Some(pattern) = &pattern_is_regexp {
252 // a. Let P be ? Get(pattern, "source").
253 let p = pattern.get(js_string!("source"), context)?;
254
255 // b. If flags is undefined, then
256 let f = if flags.is_undefined() {
257 // i. Let F be ? Get(pattern, "flags").
258 pattern.get(js_string!("flags"), context)?
259 // c. Else,
260 } else {
261 // i. Let F be flags.
262 flags.clone()
263 };
264
265 (p, f)
266 // 6. Else,
267 } else {
268 // a. Let P be pattern.
269 // b. Let F be flags.
270 (pattern.clone(), flags.clone())
271 };
272
273 // 7. Let O be ? RegExpAlloc(newTarget).
274 let proto =
275 get_prototype_from_constructor(new_target, StandardConstructors::regexp, context)?;
276
277 // 8.Return ? RegExpInitialize(O, P, F).
278 Self::initialize(Some(proto), &p, &f, context)
279 }
280}
281
282impl RegExp {
283 /// `7.2.8 IsRegExp ( argument )`
284 ///
285 /// This modified to return the object if it's `true`, [`None`] otherwise.
286 ///
287 /// More information:
288 /// - [ECMAScript reference][spec]
289 ///
290 /// [spec]: https://tc39.es/ecma262/#sec-isregexp
291 pub(crate) fn is_reg_exp(
292 argument: &JsValue,
293 context: &mut Context,
294 ) -> JsResult<Option<JsObject>> {
295 // 1. If argument is not an Object, return false.
296 let Some(argument) = argument.as_object() else {
297 return Ok(None);
298 };
299
300 // 2. Let matcher be ? Get(argument, @@match).
301 let matcher = argument.get(JsSymbol::r#match(), context)?;
302
303 // 3. If matcher is not undefined, return ToBoolean(matcher).
304 if !matcher.is_undefined() {
305 return Ok(matcher.to_boolean().then_some(argument));
306 }
307
308 // 4. If argument has a [[RegExpMatcher]] internal slot, return true.
309 if argument.is::<RegExp>() {
310 return Ok(Some(argument));
311 }
312
313 // 5. Return false.
314 Ok(None)
315 }
316
317 /// Compiles a `RegExp` from the provided pattern and flags.
318 ///
319 /// Equivalent to the beginning of [`RegExpInitialize ( obj, pattern, flags )`][spec]
320 ///
321 /// [spec]: https://tc39.es/ecma262/#sec-regexpinitialize
322 fn compile_native_regexp(
323 pattern: &JsValue,
324 flags: &JsValue,
325 context: &mut Context,
326 ) -> JsResult<RegExp> {
327 // 1. If pattern is undefined, let P be the empty String.
328 // 2. Else, let P be ? ToString(pattern).
329 let p = if pattern.is_undefined() {
330 js_string!()
331 } else {
332 pattern.to_string(context)?
333 };
334
335 // 3. If flags is undefined, let F be the empty String.
336 // 4. Else, let F be ? ToString(flags).
337 let f = if flags.is_undefined() {
338 js_string!()
339 } else {
340 flags.to_string(context)?
341 };
342
343 // 5. If F contains any code unit other than "g", "i", "m", "s", "u", "v", or "y"
344 // or if it contains the same code unit more than once, throw a SyntaxError exception.
345 // TODO: Should directly parse the JsString instead of converting to String
346 let flags = match RegExpFlags::from_str(&f.to_std_string_escaped()) {
347 Err(msg) => return Err(JsNativeError::syntax().with_message(msg).into()),
348 Ok(result) => result,
349 };
350
351 // 13. Let parseResult be ParsePattern(patternText, u, v).
352 // 14. If parseResult is a non-empty List of SyntaxError objects, throw a SyntaxError exception.
353
354 // If u or v flag is set, fullUnicode is true — compile as full codepoints.
355 let full_unicode =
356 flags.contains(RegExpFlags::UNICODE) || flags.contains(RegExpFlags::UNICODE_SETS);
357
358 let matcher = if full_unicode {
359 // Unicode mode (u/v flag) OR pattern has named groups:
360 // compile as full Unicode codepoints.
361 Regex::from_unicode(p.code_points().map(CodePoint::as_u32), Flags::from(flags))
362 .map_err(|error| {
363 JsNativeError::syntax()
364 .with_message(format!("failed to create matcher: {}", error.text))
365 })?
366 } else {
367 // Non-Unicode mode with no named groups:
368 // compile as raw UTF-16 code units so that surrogate pairs
369 // (e.g. 𠮷 = [0xD842, 0xDFB7]) are matched correctly by find_from_ucs2.
370 let utf16_units = p.code_points().flat_map(|cp| {
371 let mut buf = [0u16; 2];
372 match cp {
373 CodePoint::Unicode(c) => c
374 .encode_utf16(&mut buf)
375 .iter()
376 .map(|&u| u32::from(u))
377 .collect::<Vec<_>>(),
378 CodePoint::UnpairedSurrogate(s) => vec![u32::from(s)],
379 }
380 });
381 Regex::from_unicode(utf16_units, Flags::from(flags)).map_err(|error| {
382 JsNativeError::syntax()
383 .with_message(format!("failed to create matcher: {}", error.text))
384 })?
385 };
386
387 // 15. Assert: parseResult is a Pattern Parse Node.
388 // 16. Set obj.[[OriginalSource]] to P.
389 // 17. Set obj.[[OriginalFlags]] to F.
390 // 18. Let capturingGroupsCount be CountLeftCapturingParensWithin(parseResult).
391 // 19. Let rer be the RegExp Record { [[IgnoreCase]]: i, [[Multiline]]: m,
392 // [[DotAll]]: s, [[Unicode]]: u, [[UnicodeSets]]: v,
393 // [[CapturingGroupsCount]]: capturingGroupsCount }.
394 // 20. Set obj.[[RegExpRecord]] to rer.
395 // 21. Set obj.[[RegExpMatcher]] to CompilePattern of parseResult with argument rer.
396 Ok(RegExp {
397 matcher,
398 flags,
399 original_source: p,
400 original_flags: f,
401 })
402 }
403
404 /// `RegExpInitialize ( obj, pattern, flags )`
405 ///
406 /// If prototype is `None`, initializes the prototype to `%RegExp%.prototype`.
407 ///
408 /// More information:
409 /// - [ECMAScript reference][spec]
410 ///
411 /// [spec]: https://tc39.es/ecma262/#sec-regexpinitialize
412 pub(crate) fn initialize(
413 prototype: Option<JsObject>,
414 pattern: &JsValue,
415 flags: &JsValue,
416 context: &mut Context,
417 ) -> JsResult<JsValue> {
418 // Has the steps of `RegExpInitialize`.
419 let regexp = Self::compile_native_regexp(pattern, flags, context)?;
420
421 // 22. Perform ? Set(obj, "lastIndex", +0𝔽, true).
422 let obj = if let Some(prototype) = prototype {
423 let mut template = context
424 .intrinsics()
425 .templates()
426 .regexp_without_proto()
427 .clone();
428 template.set_prototype(prototype);
429 template.create(regexp, vec![0.into()])
430 } else {
431 context
432 .intrinsics()
433 .templates()
434 .regexp()
435 .create(regexp, vec![0.into()])
436 };
437
438 // 23. Return obj.
439 Ok(obj.into())
440 }
441
442 /// `22.2.3.2.4 RegExpCreate ( P, F )`
443 ///
444 /// More information:
445 /// - [ECMAScript reference][spec]
446 ///
447 /// [spec]: https://tc39.es/ecma262/#sec-regexpcreate
448 pub(crate) fn create(p: &JsValue, f: &JsValue, context: &mut Context) -> JsResult<JsValue> {
449 // 1. Let obj be ? RegExpAlloc(%RegExp%).
450 // 2. Return ? RegExpInitialize(obj, P, F).
451 Self::initialize(None, p, f, context)
452 }
453
454 /// `get RegExp [ @@species ]`
455 ///
456 /// The `RegExp [ @@species ]` accessor property returns the `RegExp` constructor.
457 ///
458 /// More information:
459 /// - [ECMAScript reference][spec]
460 /// - [MDN documentation][mdn]
461 ///
462 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp-@@species
463 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@species
464 #[allow(clippy::unnecessary_wraps)]
465 fn get_species(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
466 // 1. Return the this value.
467 Ok(this.clone())
468 }
469
470 fn regexp_has_flag(this: &JsValue, flag: u8, context: &mut Context) -> JsResult<JsValue> {
471 if let Some(object) = this.as_object() {
472 if let Some(regexp) = object.downcast_ref::<RegExp>() {
473 return Ok(JsValue::new(match flag {
474 b'd' => regexp.flags.contains(RegExpFlags::HAS_INDICES),
475 b'g' => regexp.flags.contains(RegExpFlags::GLOBAL),
476 b'm' => regexp.flags.contains(RegExpFlags::MULTILINE),
477 b's' => regexp.flags.contains(RegExpFlags::DOT_ALL),
478 b'i' => regexp.flags.contains(RegExpFlags::IGNORE_CASE),
479 b'u' => regexp.flags.contains(RegExpFlags::UNICODE),
480 b'v' => regexp.flags.contains(RegExpFlags::UNICODE_SETS),
481 b'y' => regexp.flags.contains(RegExpFlags::STICKY),
482 _ => unreachable!(),
483 }));
484 }
485
486 if JsObject::equals(
487 &object,
488 &context.intrinsics().constructors().regexp().prototype(),
489 ) {
490 return Ok(JsValue::undefined());
491 }
492 }
493
494 let name = match flag {
495 b'd' => "hasIndices",
496 b'g' => "global",
497 b'm' => "multiline",
498 b's' => "dotAll",
499 b'i' => "ignoreCase",
500 b'u' => "unicode",
501 b'v' => "unicodeSets",
502 b'y' => "sticky",
503 _ => unreachable!(),
504 };
505
506 Err(JsNativeError::typ()
507 .with_message(format!(
508 "RegExp.prototype.{name} getter called on non-RegExp object",
509 ))
510 .into())
511 }
512
513 /// `get RegExp.prototype.hasIndices`
514 ///
515 /// More information:
516 /// - [ECMAScript reference][spec]
517 /// - [MDN documentation][mdn]
518 ///
519 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.hasindices
520 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global
521 pub(crate) fn get_has_indices(
522 this: &JsValue,
523 _: &[JsValue],
524 context: &mut Context,
525 ) -> JsResult<JsValue> {
526 Self::regexp_has_flag(this, b'd', context)
527 }
528
529 /// `get RegExp.prototype.global`
530 ///
531 /// The `global` property indicates whether or not the "`g`" flag is used with the regular expression.
532 ///
533 /// More information:
534 /// - [ECMAScript reference][spec]
535 /// - [MDN documentation][mdn]
536 ///
537 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.global
538 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global
539 pub(crate) fn get_global(
540 this: &JsValue,
541 _: &[JsValue],
542 context: &mut Context,
543 ) -> JsResult<JsValue> {
544 Self::regexp_has_flag(this, b'g', context)
545 }
546
547 /// `get RegExp.prototype.ignoreCase`
548 ///
549 /// The `ignoreCase` property indicates whether or not the "`i`" flag is used with the regular expression.
550 ///
551 /// More information:
552 /// - [ECMAScript reference][spec]
553 /// - [MDN documentation][mdn]
554 ///
555 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.ignorecase
556 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/ignoreCase
557 pub(crate) fn get_ignore_case(
558 this: &JsValue,
559 _: &[JsValue],
560 context: &mut Context,
561 ) -> JsResult<JsValue> {
562 Self::regexp_has_flag(this, b'i', context)
563 }
564
565 /// `get RegExp.prototype.multiline`
566 ///
567 /// The multiline property indicates whether or not the "m" flag is used with the regular expression.
568 ///
569 /// More information:
570 /// - [ECMAScript reference][spec]
571 /// - [MDN documentation][mdn]
572 ///
573 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.multiline
574 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/multiline
575 pub(crate) fn get_multiline(
576 this: &JsValue,
577 _: &[JsValue],
578 context: &mut Context,
579 ) -> JsResult<JsValue> {
580 Self::regexp_has_flag(this, b'm', context)
581 }
582
583 /// `get RegExp.prototype.dotAll`
584 ///
585 /// The `dotAll` property indicates whether or not the "`s`" flag is used with the regular expression.
586 ///
587 /// More information:
588 /// - [ECMAScript reference][spec]
589 /// - [MDN documentation][mdn]
590 ///
591 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.dotAll
592 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/dotAll
593 pub(crate) fn get_dot_all(
594 this: &JsValue,
595 _: &[JsValue],
596 context: &mut Context,
597 ) -> JsResult<JsValue> {
598 Self::regexp_has_flag(this, b's', context)
599 }
600
601 /// `get RegExp.prototype.unicode`
602 ///
603 /// The unicode property indicates whether or not the "`u`" flag is used with a regular expression.
604 /// unicode is a read-only property of an individual regular expression instance.
605 ///
606 /// More information:
607 /// - [ECMAScript reference][spec]
608 /// - [MDN documentation][mdn]
609 ///
610 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.unicode
611 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode
612 pub(crate) fn get_unicode(
613 this: &JsValue,
614 _: &[JsValue],
615 context: &mut Context,
616 ) -> JsResult<JsValue> {
617 Self::regexp_has_flag(this, b'u', context)
618 }
619
620 /// `get RegExp.prototype.unicodeSets`
621 ///
622 /// More information:
623 /// - [ECMAScript reference][spec]
624 /// - [MDN documentation][mdn]
625 ///
626 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.unicodesets
627 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
628 pub(crate) fn get_unicode_sets(
629 this: &JsValue,
630 _: &[JsValue],
631 context: &mut Context,
632 ) -> JsResult<JsValue> {
633 Self::regexp_has_flag(this, b'v', context)
634 }
635
636 /// `get RegExp.prototype.sticky`
637 ///
638 /// This flag indicates that it matches only from the index indicated by the `lastIndex` property
639 /// of this regular expression in the target string (and does not attempt to match from any later indexes).
640 ///
641 /// More information:
642 /// - [ECMAScript reference][spec]
643 /// - [MDN documentation][mdn]
644 ///
645 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.sticky
646 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky
647 pub(crate) fn get_sticky(
648 this: &JsValue,
649 _: &[JsValue],
650 context: &mut Context,
651 ) -> JsResult<JsValue> {
652 Self::regexp_has_flag(this, b'y', context)
653 }
654
655 /// `get RegExp.prototype.flags`
656 ///
657 /// The `flags` property returns a string consisting of the [`flags`][flags] of the current regular expression object.
658 ///
659 /// More information:
660 /// - [ECMAScript reference][spec]
661 /// - [MDN documentation][mdn]
662 ///
663 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.flags
664 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/flags
665 /// [flags]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#Advanced_searching_with_flags_2
666 pub(crate) fn get_flags(
667 this: &JsValue,
668 _: &[JsValue],
669 context: &mut Context,
670 ) -> JsResult<JsValue> {
671 // 1. Let R be the this value.
672 // 2. If R is not an Object, throw a TypeError exception.
673 let Some(object) = this.as_object() else {
674 return Err(JsNativeError::typ()
675 .with_message("RegExp.prototype.flags getter called on non-object")
676 .into());
677 };
678
679 // 3. Let codeUnits be a new empty List.
680 let mut code_units = String::new();
681
682 // 4. Let hasIndices be ToBoolean(? Get(R, "hasIndices")).
683 // 5. If hasIndices is true, append the code unit 0x0064 (LATIN SMALL LETTER D) to codeUnits.
684 if object.get(js_string!("hasIndices"), context)?.to_boolean() {
685 code_units.push('d');
686 }
687
688 // 6. Let global be ToBoolean(? Get(R, "global")).
689 // 7. If global is true, append the code unit 0x0067 (LATIN SMALL LETTER G) to codeUnits.
690 if object.get(js_string!("global"), context)?.to_boolean() {
691 code_units.push('g');
692 }
693
694 // 8. Let ignoreCase be ToBoolean(? Get(R, "ignoreCase")).
695 // 9. If ignoreCase is true, append the code unit 0x0069 (LATIN SMALL LETTER I) to codeUnits.
696 if object.get(js_string!("ignoreCase"), context)?.to_boolean() {
697 code_units.push('i');
698 }
699
700 // 10. Let multiline be ToBoolean(? Get(R, "multiline")).
701 // 11. If multiline is true, append the code unit 0x006D (LATIN SMALL LETTER M) to codeUnits.
702 if object.get(js_string!("multiline"), context)?.to_boolean() {
703 code_units.push('m');
704 }
705
706 // 12. Let dotAll be ToBoolean(? Get(R, "dotAll")).
707 // 13. If dotAll is true, append the code unit 0x0073 (LATIN SMALL LETTER S) to codeUnits.
708 if object.get(js_string!("dotAll"), context)?.to_boolean() {
709 code_units.push('s');
710 }
711
712 // 14. Let unicode be ToBoolean(? Get(R, "unicode")).
713 // 15. If unicode is true, append the code unit 0x0075 (LATIN SMALL LETTER U) to codeUnits.
714 if object.get(js_string!("unicode"), context)?.to_boolean() {
715 code_units.push('u');
716 }
717
718 // 16. Let unicodeSets be ToBoolean(? Get(R, "unicodeSets")).
719 // 17. If unicodeSets is true, append the code unit 0x0076 (LATIN SMALL LETTER V) to codeUnits.
720 if object.get(js_string!("unicodeSets"), context)?.to_boolean() {
721 code_units.push('v');
722 }
723
724 // 18. Let sticky be ToBoolean(? Get(R, "sticky")).
725 // 19. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) to codeUnits.
726 if object.get(js_string!("sticky"), context)?.to_boolean() {
727 code_units.push('y');
728 }
729
730 // 20. Return the String value whose code units are the elements of the List codeUnits.
731 // If codeUnits has no elements, the empty String is returned.
732 Ok(JsString::from(code_units).into())
733 }
734
735 /// `get RegExp.prototype.source`
736 ///
737 /// The `source` property returns a `String` containing the source text of the regexp object,
738 /// and it doesn't contain the two forward slashes on both sides and any flags.
739 ///
740 /// More information:
741 /// - [ECMAScript reference][spec]
742 /// - [MDN documentation][mdn]
743 ///
744 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.source
745 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/source
746 pub(crate) fn get_source(
747 this: &JsValue,
748 _: &[JsValue],
749 context: &mut Context,
750 ) -> JsResult<JsValue> {
751 // 1. Let R be the this value.
752 // 2. If Type(R) is not Object, throw a TypeError exception.
753 let Some(object) = this.as_object() else {
754 return Err(JsNativeError::typ()
755 .with_message("RegExp.prototype.source method called on incompatible value")
756 .into());
757 };
758
759 let casted = object.downcast_ref::<RegExp>();
760 match casted {
761 // 3. If R does not have an [[OriginalSource]] internal slot, then
762 None => {
763 // a. If SameValue(R, %RegExp.prototype%) is true, return "(?:)".
764 // b. Otherwise, throw a TypeError exception.
765 if JsValue::same_value(
766 this,
767 &JsValue::new(context.intrinsics().constructors().regexp().prototype()),
768 ) {
769 Ok(JsValue::new(js_string!("(?:)")))
770 } else {
771 Err(JsNativeError::typ()
772 .with_message("RegExp.prototype.source method called on incompatible value")
773 .into())
774 }
775 }
776 // 4. Assert: R has an [[OriginalFlags]] internal slot.
777 Some(re) => {
778 // 5. Let src be R.[[OriginalSource]].
779 // 6. Let flags be R.[[OriginalFlags]].
780 // 7. Return EscapeRegExpPattern(src, flags).
781 Ok(Self::escape_pattern(
782 &re.original_source,
783 &re.original_flags,
784 ))
785 }
786 }
787 }
788
789 /// `22.2.3.2.5 EscapeRegExpPattern ( P, F )`
790 ///
791 /// More information:
792 /// - [ECMAScript reference][spec]
793 ///
794 /// [spec]: https://tc39.es/ecma262/#sec-escaperegexppattern
795 fn escape_pattern(src: &JsString, _flags: &JsString) -> JsValue {
796 if src.is_empty() {
797 js_string!("(?:)").into()
798 } else {
799 let mut s = Vec::with_capacity(src.len());
800 let mut buf = [0; 2];
801 for c in src.code_points() {
802 match c {
803 CodePoint::Unicode('/') => s.extend_from_slice(utf16!(r"\/")),
804 CodePoint::Unicode('\n') => s.extend_from_slice(utf16!(r"\n")),
805 CodePoint::Unicode('\r') => s.extend_from_slice(utf16!(r"\r")),
806 CodePoint::Unicode('\u{2028}') => s.extend_from_slice(utf16!(r"\u2028")),
807 CodePoint::Unicode('\u{2029}') => s.extend_from_slice(utf16!(r"\u2029")),
808 CodePoint::Unicode(c) => s.extend_from_slice(c.encode_utf16(&mut buf)),
809 CodePoint::UnpairedSurrogate(surr) => s.push(surr),
810 }
811 }
812
813 JsValue::new(js_string!(&s[..]))
814 }
815 }
816
817 /// `RegExp.escape( string )`
818 ///
819 /// The `RegExp.escape()` static method escapes any potential regex syntax characters in a string,
820 /// and returns a new string that can be safely used as a literal pattern for the `RegExp()` constructor.
821 ///
822 /// More information:
823 /// - [ECMAScript reference][spec]
824 /// - [MDN documentation][mdn]
825 ///
826 /// [spec]: https://tc39.es/proposal-regex-escaping/#sec-regexp.escape
827 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/escape
828 ///
829 /// Helper function to check if a character is a `WhiteSpace` character
830 fn is_whitespace(ch: char) -> bool {
831 matches!(
832 ch,
833 '\u{0009}' | // <TAB>
834 '\u{000B}' | // <VT>
835 '\u{000C}' | // <FF>
836 '\u{0020}' | // <SP>
837 '\u{00A0}' | // <NBSP>
838 '\u{FEFF}' | // <ZWNBSP>
839 '\u{1680}' | // Ogham Space Mark
840 '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}' | '\u{2004}' |
841 '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' |
842 '\u{200A}' | // Various space separators
843 '\u{202F}' | // Narrow No-Break Space
844 '\u{205F}' | // Medium Mathematical Space
845 '\u{3000}' // Ideographic Space
846 )
847 }
848
849 /// Helper function to check if a character is a `LineTerminator` character
850 fn is_line_terminator(ch: char) -> bool {
851 matches!(
852 ch,
853 '\u{000A}' | // <LF>
854 '\u{000D}' | // <CR>
855 '\u{2028}' | // <LS>
856 '\u{2029}' // <PS>
857 )
858 }
859
860 pub(crate) fn escape(_: &JsValue, args: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
861 let arg = args.get_or_undefined(0);
862
863 // 1. If S is not a String, throw a TypeError exception.
864 let Some(string) = arg.as_string() else {
865 return Err(JsNativeError::typ()
866 .with_message("RegExp.escape requires a string argument")
867 .into());
868 };
869
870 // 2. Let escaped be the empty String.
871 let mut escaped = CommonJsStringBuilder::new();
872
873 // 3. Let cpList be StringToCodePoints(S).
874 // 4. For each code point c of cpList, do
875 for (index, c) in string.code_points().enumerate() {
876 let code = c.as_u32();
877
878 // 4.a. If escaped is the empty String and c is matched by either DecimalDigit or AsciiLetter, then
879 if index == 0
880 && let CodePoint::Unicode(ch) = c
881 && (ch.is_ascii_digit() || ch.is_ascii_alphabetic())
882 {
883 // 4.a.ii-v. Escape using \xXX format
884 let escape_seq = format!("\\x{code:02x}");
885 escaped.push(escape_seq.as_str());
886 continue;
887 }
888
889 // 4.b. Else, set escaped to the string-concatenation of escaped and EncodeForRegExpEscape(c).
890 match c {
891 CodePoint::Unicode(ch) => {
892 // EncodeForRegExpEscape step 1: SyntaxCharacter or U+002F (SOLIDUS)
893 if matches!(
894 ch,
895 '^' | '$'
896 | '\\'
897 | '.'
898 | '*'
899 | '+'
900 | '?'
901 | '('
902 | ')'
903 | '['
904 | ']'
905 | '{'
906 | '}'
907 | '|'
908 | '/'
909 ) {
910 escaped.push('\\');
911 escaped.push(ch);
912 }
913 // Step 2: ControlEscape characters (Table 64)
914 else if ch == '\x09' {
915 escaped.push("\\t");
916 } else if ch == '\x0A' {
917 escaped.push("\\n");
918 } else if ch == '\x0B' {
919 escaped.push("\\v");
920 } else if ch == '\x0C' {
921 escaped.push("\\f");
922 } else if ch == '\x0D' {
923 escaped.push("\\r");
924 }
925 // Step 3-5: otherPunctuators or WhiteSpace or LineTerminator
926 else if matches!(
927 ch,
928 ',' | '-'
929 | '='
930 | '<'
931 | '>'
932 | '#'
933 | '&'
934 | '!'
935 | '%'
936 | ':'
937 | ';'
938 | '@'
939 | '~'
940 | '\''
941 | '`'
942 | '"'
943 ) || Self::is_whitespace(ch)
944 || Self::is_line_terminator(ch)
945 {
946 let code = ch as u32;
947 if code <= 0xFF {
948 // Use \xXX format
949 let escape_seq = format!("\\x{code:02x}");
950 escaped.push(escape_seq.as_str());
951 } else {
952 // Use \uXXXX format
953 let escape_seq = format!("\\u{code:04x}");
954 escaped.push(escape_seq.as_str());
955 }
956 }
957 // Step 6: All other Unicode characters
958 else {
959 escaped.push(ch);
960 }
961 }
962 CodePoint::UnpairedSurrogate(surr) => {
963 // Escape unpaired surrogates using \uXXXX format
964 let escape_seq = format!("\\u{surr:04x}");
965 escaped.push(escape_seq.as_str());
966 }
967 }
968 }
969
970 // 5. Return escaped.
971 Ok(JsValue::new(escaped.build()))
972 }
973
974 /// `RegExp.prototype.test( string )`
975 ///
976 /// The `test()` method executes a search for a match between a regular expression and a specified string.
977 ///
978 /// Returns `true` or `false`.
979 ///
980 /// More information:
981 /// - [ECMAScript reference][spec]
982 /// - [MDN documentation][mdn]
983 ///
984 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.test
985 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test
986 pub(crate) fn test(
987 this: &JsValue,
988 args: &[JsValue],
989 context: &mut Context,
990 ) -> JsResult<JsValue> {
991 // 1. Let R be the this value.
992 // 2. If Type(R) is not Object, throw a TypeError exception.
993 let this = this.as_object().ok_or_else(|| {
994 JsNativeError::typ()
995 .with_message("RegExp.prototype.test method called on incompatible value")
996 })?;
997
998 // 3. Let string be ? ToString(S).
999 let arg_str = args
1000 .first()
1001 .cloned()
1002 .unwrap_or_default()
1003 .to_string(context)?;
1004
1005 // 4. Let match be ? RegExpExec(R, string).
1006 let m = Self::abstract_exec(&this, arg_str, context)?;
1007
1008 // 5. If match is not null, return true; else return false.
1009 if m.is_some() {
1010 Ok(JsValue::new(true))
1011 } else {
1012 Ok(JsValue::new(false))
1013 }
1014 }
1015
1016 /// `RegExp.prototype.exec( string )`
1017 ///
1018 /// The `exec()` method executes a search for a match in a specified string.
1019 ///
1020 /// Returns a result array, or `null`.
1021 ///
1022 /// More information:
1023 /// - [ECMAScript reference][spec]
1024 /// - [MDN documentation][mdn]
1025 ///
1026 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.exec
1027 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec
1028 pub(crate) fn exec(
1029 this: &JsValue,
1030 args: &[JsValue],
1031 context: &mut Context,
1032 ) -> JsResult<JsValue> {
1033 // 1. Let R be the this value.
1034 // 2. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
1035 let this = this.as_object();
1036 let obj = this
1037 .and_then(|o| o.downcast::<RegExp>().ok())
1038 .ok_or_else(|| {
1039 JsNativeError::typ().with_message("RegExp.prototype.exec called with invalid value")
1040 })?;
1041
1042 // 3. Let S be ? ToString(string).
1043 let arg_str = args.get_or_undefined(0).to_string(context)?;
1044
1045 // 4. Return ? RegExpBuiltinExec(R, S).
1046 (Self::abstract_builtin_exec(obj, &arg_str, context)?)
1047 .map_or_else(|| Ok(JsValue::null()), |v| Ok(v.into()))
1048 }
1049
1050 /// `22.2.5.2.1 RegExpExec ( R, S )`
1051 ///
1052 /// More information:
1053 /// - [ECMAScript reference][spec]
1054 ///
1055 /// [spec]: https://tc39.es/ecma262/#sec-regexpexec
1056 pub(crate) fn abstract_exec(
1057 this: &JsObject,
1058 input: JsString,
1059 context: &mut Context,
1060 ) -> JsResult<Option<JsObject>> {
1061 // 1. Assert: Type(R) is Object.
1062 // 2. Assert: Type(S) is String.
1063
1064 // 3. Let exec be ? Get(R, "exec").
1065 let exec = this.get(js_string!("exec"), context)?;
1066
1067 // 4. If IsCallable(exec) is true, then
1068 if let Some(exec) = exec.as_callable() {
1069 // a. Let result be ? Call(exec, R, « S »).
1070 let result = exec.call(&this.clone().into(), &[input.into()], context)?;
1071
1072 // b. If Type(result) is neither Object nor Null, throw a TypeError exception.
1073 if !result.is_object() && !result.is_null() {
1074 return Err(JsNativeError::typ()
1075 .with_message("regexp exec returned neither object nor null")
1076 .into());
1077 }
1078
1079 // c. Return result.
1080 return Ok(result.as_object());
1081 }
1082
1083 // 5. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
1084 let Ok(this) = this.clone().downcast::<RegExp>() else {
1085 return Err(JsNativeError::typ()
1086 .with_message("RegExpExec called with invalid value")
1087 .into());
1088 };
1089
1090 // 6. Return ? RegExpBuiltinExec(R, S).
1091 Self::abstract_builtin_exec(this, &input, context)
1092 }
1093
1094 /// `22.2.7.2 RegExpBuiltinExec ( R, S )`
1095 ///
1096 /// More information:
1097 /// - [ECMAScript reference][spec]
1098 ///
1099 /// [spec]: https://tc39.es/ecma262/#sec-regexpbuiltinexec
1100 pub(crate) fn abstract_builtin_exec(
1101 this: JsObject<RegExp>,
1102 input: &JsString,
1103 context: &mut Context,
1104 ) -> JsResult<Option<JsObject>> {
1105 let rx = this.borrow().data().clone();
1106 let this = this.upcast();
1107
1108 // 1. Let length be the length of S.
1109 let length = input.len() as u64;
1110
1111 // 2. Let lastIndex be ℝ(? ToLength(? Get(R, "lastIndex"))).
1112 let mut last_index = this
1113 .get(js_string!("lastIndex"), context)?
1114 .to_length(context)?;
1115
1116 // 3. Let flags be R.[[OriginalFlags]].
1117 let flags = &rx.original_flags;
1118
1119 // 4. If flags contains "g", let global be true; else let global be false.
1120 let global = flags.contains(b'g');
1121
1122 // 5. If flags contains "y", let sticky be true; else let sticky be false.
1123 let sticky = flags.contains(b'y');
1124
1125 // 6. If flags contains "d", let hasIndices be true; else let hasIndices be false.
1126 let has_indices = flags.contains(b'd');
1127
1128 // 7. If global is false and sticky is false, set lastIndex to 0.
1129 if !global && !sticky {
1130 last_index = 0;
1131 }
1132
1133 // 8. Let matcher be R.[[RegExpMatcher]].
1134 let matcher = &rx.matcher;
1135
1136 // 9. If flags contains "u" or flags contains "v", let fullUnicode be true; else let fullUnicode be false.
1137 let full_unicode = flags.contains(b'u') || flags.contains(b'v');
1138
1139 // NOTE: The following steps are take care of by regress:
1140 //
1141 // SKIP: 10. Let matchSucceeded be false.
1142 // SKIP: 11. If fullUnicode is true, let input be StringToCodePoints(S). Otherwise, let input be a List whose elements are the code units that are the elements of S.
1143 // SKIP: 12. NOTE: Each element of input is considered to be a character.
1144 // SKIP: 13. Repeat, while matchSucceeded is false,
1145
1146 // 13.a. If lastIndex > length, then
1147 if last_index > length {
1148 // i. If global is true or sticky is true, then
1149 if global || sticky {
1150 // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
1151 this.set(js_string!("lastIndex"), 0, true, context)?;
1152 }
1153
1154 // ii. Return null.
1155 return Ok(None);
1156 }
1157
1158 // 13.b. Let inputIndex be the index into input of the character that was obtained from element lastIndex of S.
1159 // 13.c. Let r be matcher(input, inputIndex).
1160 let r: Option<regress::Match> = match (full_unicode, input.as_str().variant()) {
1161 (true | false, JsStrVariant::Latin1(_)) => {
1162 // TODO: Currently regress does not support latin1 encoding.
1163 let input = input.to_vec();
1164
1165 // NOTE: We can use the faster ucs2 variant since there will never be two byte unicode.
1166 matcher.find_from_ucs2(&input, last_index as usize).next()
1167 }
1168 (true, JsStrVariant::Utf16(input)) => {
1169 matcher.find_from_utf16(input, last_index as usize).next()
1170 }
1171 (false, JsStrVariant::Utf16(input)) => {
1172 matcher.find_from_ucs2(input, last_index as usize).next()
1173 }
1174 };
1175
1176 let Some(match_value) = r else {
1177 // d. If r is failure, then
1178 //
1179 // NOTE: Merged the following steps (since we no longer have a loop):
1180 // 13.d.i. If sticky is true, then
1181 // 13.a.i. If global is true or sticky is true, then
1182 if global || sticky {
1183 // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
1184 this.set(js_string!("lastIndex"), 0, true, context)?;
1185 }
1186
1187 // MOVE: ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
1188 // NOTE: Handled within the regress matches iterator, see below for last_index assignment.
1189
1190 // NOTE: Merged and steps:
1191 // 13.a.ii. Return null.
1192 // 13.d.i.2. Return null.
1193 return Ok(None);
1194 };
1195
1196 // e. Else
1197 // SKIP: i. Assert: r is a MatchState.
1198 // SKIP: ii. Set matchSucceeded to true.
1199
1200 // NOTE: regress currently doesn't support the sticky flag so we have to emulate it.
1201 if sticky && match_value.start() != last_index as usize {
1202 // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
1203 this.set(js_string!("lastIndex"), 0, true, context)?;
1204
1205 // 2. Return null.
1206 return Ok(None);
1207 }
1208
1209 // 13.d.ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
1210 // NOTE: Calculation of last_index is done in regress.
1211 last_index = match_value.start() as u64;
1212
1213 // 14. Let e be r's endIndex value.
1214 // 15. If fullUnicode is true, set e to GetStringIndex(S, e).
1215 // NOTE: Step 15 is already taken care of by regress.
1216 let e = match_value.end();
1217
1218 // 16. If global is true or sticky is true, then
1219 if global || sticky {
1220 // a. Perform ? Set(R, "lastIndex", 𝔽(e), true).
1221 this.set(js_string!("lastIndex"), e, true, context)?;
1222 }
1223
1224 // 17. Let n be the number of elements in r's captures List.
1225 let n = match_value.captures.len() as u64;
1226 // 18. Assert: n = R.[[RegExpRecord]].[[CapturingGroupsCount]].
1227 // 19. Assert: n < 232 - 1.
1228 debug_assert!(n < (1u64 << 32) - 1);
1229
1230 // 20. Let A be ! ArrayCreate(n + 1).
1231 // 21. Assert: The mathematical value of A's "length" property is n + 1.
1232 let a = Array::array_create(n + 1, None, context)?;
1233
1234 // 22. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)).
1235 a.create_data_property_or_throw(js_string!("index"), last_index, context)?;
1236
1237 // 23. Perform ! CreateDataPropertyOrThrow(A, "input", S).
1238 a.create_data_property_or_throw(js_string!("input"), input.clone(), context)?;
1239
1240 // 24. Let match be the Match Record { [[StartIndex]]: lastIndex, [[EndIndex]]: e }.
1241 // Immediately convert it to an array according to 22.2.7.7 GetMatchIndexPair(S, match)
1242 // 1. Assert: match.[[StartIndex]] ≤ match.[[EndIndex]] ≤ the length of S.
1243 // 2. Return CreateArrayFromList(« 𝔽(match.[[StartIndex]]), 𝔽(match.[[EndIndex]]) »).
1244 let match_record = Array::create_array_from_list(
1245 [match_value.start().into(), match_value.end().into()],
1246 context,
1247 );
1248
1249 // 25. Let indices be a new empty List.
1250 let indices = Array::array_create(n + 1, None, context)?;
1251
1252 // 27. Append match to indices.
1253 indices.create_data_property_or_throw(0, match_record, context)?;
1254
1255 // 28. Let matchedSubstr be GetMatchString(S, match).
1256 let matched_substr = input.get_expect((last_index as usize)..(e));
1257
1258 // 29. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr).
1259 a.create_data_property_or_throw(0, matched_substr, context)?;
1260
1261 let named_groups = match_value
1262 .named_groups()
1263 .collect::<Vec<(&str, Option<Range>)>>();
1264
1265 // Combines:
1266 // 26. Let groupNames be a new empty List.
1267 // 30. If R contains any GroupName, then
1268 // 31. Else,
1269 // 33. For each integer i such that 1 ≤ i ≤ n, in ascending order, do
1270 #[allow(clippy::if_not_else)]
1271 let (groups, group_names) = if !named_groups.clone().is_empty() {
1272 // a. Let groups be OrdinaryObjectCreate(null).
1273 let groups = JsObject::with_null_proto();
1274 let group_names = JsObject::with_null_proto();
1275
1276 // e. If the ith capture of R was defined with a GroupName, then
1277 // i. Let s be the CapturingGroupName of that GroupName.
1278 // ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
1279 // iii. Append s to groupNames.
1280 for (name, range) in named_groups {
1281 let name = js_string!(name);
1282 if let Some(range) = range {
1283 let value = input.get_expect(range.clone());
1284
1285 groups.create_data_property_or_throw(name.clone(), value, context)?;
1286
1287 // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
1288 // a. Let matchIndices be indices[i].
1289 // b. If matchIndices is not undefined, then
1290 // i. Let matchIndexPair be GetMatchIndexPair(S, matchIndices).
1291 // d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), matchIndexPair).
1292 group_names.create_data_property_or_throw(
1293 name.clone(),
1294 Array::create_array_from_list(
1295 [range.start.into(), range.end.into()],
1296 context,
1297 ),
1298 context,
1299 )?;
1300 } else {
1301 groups.create_data_property_or_throw(
1302 name.clone(),
1303 JsValue::undefined(),
1304 context,
1305 )?;
1306
1307 // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
1308 // c. Else,
1309 // i. Let matchIndexPair be undefined.
1310 // d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), matchIndexPair).
1311 group_names.create_data_property_or_throw(
1312 name,
1313 JsValue::undefined(),
1314 context,
1315 )?;
1316 }
1317 }
1318
1319 (groups.into(), group_names.into())
1320 } else {
1321 // a. Let groups be undefined.
1322 (JsValue::undefined(), JsValue::undefined())
1323 };
1324
1325 // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
1326 // 8. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
1327 indices.create_data_property_or_throw(js_string!("groups"), group_names, context)?;
1328
1329 // 32. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
1330 a.create_data_property_or_throw(js_string!("groups"), groups, context)?;
1331
1332 // 27. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
1333 for i in 1..=n {
1334 // a. Let captureI be ith element of r's captures List.
1335 let capture = match_value.group(i as usize);
1336
1337 // b. If captureI is undefined, let capturedValue be undefined.
1338 // c. Else if fullUnicode is true, then
1339 // d. Else,
1340 let captured_value = capture.clone().map_or_else(JsValue::undefined, |range| {
1341 js_string!(input.get_expect(range)).into()
1342 });
1343
1344 // e. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue).
1345 a.create_data_property_or_throw(i, captured_value.clone(), context)?;
1346
1347 // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
1348 if has_indices {
1349 // b. If matchIndices is not undefined, then
1350 // i. Let matchIndexPair be GetMatchIndexPair(S, matchIndices).
1351 // c. Else,
1352 // i. Let matchIndexPair be undefined.
1353 let indices_range = capture.map_or_else(JsValue::undefined, |range| {
1354 Array::create_array_from_list([range.start.into(), range.end.into()], context)
1355 .into()
1356 });
1357
1358 // d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), matchIndexPair).
1359 indices.create_data_property_or_throw(i, indices_range, context)?;
1360 }
1361 }
1362
1363 // 34. If hasIndices is true, then
1364 // a. Let indicesArray be MakeMatchIndicesIndexPairArray(S, indices, groupNames, hasGroups).
1365 // b. Perform ! CreateDataPropertyOrThrow(A, "indices", indicesArray).
1366 if has_indices {
1367 a.create_data_property_or_throw(js_string!("indices"), indices, context)?;
1368 }
1369
1370 // 35. Return A.
1371 Ok(Some(a))
1372 }
1373
1374 /// `RegExp.prototype[ @@match ]( string )`
1375 ///
1376 /// This method retrieves the matches when matching a string against a regular expression.
1377 ///
1378 /// More information:
1379 /// - [ECMAScript reference][spec]
1380 /// - [MDN documentation][mdn]
1381 ///
1382 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@match
1383 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@match
1384 pub(crate) fn r#match(
1385 this: &JsValue,
1386 args: &[JsValue],
1387 context: &mut Context,
1388 ) -> JsResult<JsValue> {
1389 // 1. Let rx be the this value.
1390 // 2. If rx is not an Object, throw a TypeError exception.
1391 let Some(rx) = this.as_object() else {
1392 return Err(JsNativeError::typ()
1393 .with_message("RegExp.prototype.match method called on incompatible value")
1394 .into());
1395 };
1396
1397 // 3. Let S be ? ToString(string).
1398 let arg_str = args.get_or_undefined(0).to_string(context)?;
1399
1400 // 4. Let flags be ? ToString(? Get(rx, "flags")).
1401 let flags = rx.get(js_string!("flags"), context)?.to_string(context)?;
1402
1403 // 5. If flags does not contain "g", then
1404 if !flags.contains(b'g') {
1405 // a. Return ? RegExpExec(rx, S).
1406 return (Self::abstract_exec(&rx, arg_str, context)?)
1407 .map_or_else(|| Ok(JsValue::null()), |v| Ok(v.into()));
1408 }
1409
1410 // 6. Else,
1411
1412 // a. If flags contains "u" or flags contains "v", let fullUnicode be true. Otherwise, let fullUnicode be false.
1413 let full_unicode = flags.contains(b'u') || flags.contains(b'v');
1414
1415 // b. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1416 rx.set(js_string!("lastIndex"), 0, true, context)?;
1417
1418 // c. Let A be ! ArrayCreate(0).
1419 let a = Array::array_create(0, None, context)?;
1420
1421 // d. Let n be 0.
1422 let mut n = 0;
1423
1424 // e. Repeat,
1425 loop {
1426 // i. Let result be ? RegExpExec(rx, S).
1427 let result = Self::abstract_exec(&rx, arg_str.clone(), context)?;
1428
1429 // ii. If result is null, then
1430 // iii. Else,
1431 if let Some(result) = result {
1432 // 1. Let matchStr be ? ToString(? Get(result, "0")).
1433 let match_str = result.get(0, context)?.to_string(context)?;
1434
1435 // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(n)), matchStr).
1436 a.create_data_property_or_throw(n, match_str.clone(), context)?;
1437
1438 // 3. If matchStr is the empty String, then
1439 if match_str.is_empty() {
1440 // a. Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))).
1441 let this_index = rx
1442 .get(js_string!("lastIndex"), context)?
1443 .to_length(context)?;
1444
1445 // b. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode).
1446 let next_index = advance_string_index(&arg_str, this_index, full_unicode);
1447
1448 // c. Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true).
1449 rx.set(
1450 js_string!("lastIndex"),
1451 JsValue::new(next_index),
1452 true,
1453 context,
1454 )?;
1455 }
1456
1457 // 4. Set n to n + 1.
1458 n += 1;
1459 } else {
1460 // 1. If n = 0, return null.
1461 if n == 0 {
1462 return Ok(JsValue::null());
1463 }
1464 // 2. Return A.
1465 return Ok(a.into());
1466 }
1467 }
1468 }
1469
1470 /// `RegExp.prototype.toString()`
1471 ///
1472 /// Return a string representing the regular expression.
1473 ///
1474 /// More information:
1475 /// - [ECMAScript reference][spec]
1476 /// - [MDN documentation][mdn]
1477 ///
1478 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.tostring
1479 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/toString
1480 #[allow(clippy::wrong_self_convention)]
1481 pub(crate) fn to_string(
1482 this: &JsValue,
1483 _: &[JsValue],
1484 context: &mut Context,
1485 ) -> JsResult<JsValue> {
1486 // 1. Let R be the this value.
1487 // 2. If R is not an Object, throw a TypeError exception.
1488 let regexp = this.as_object().ok_or_else(|| {
1489 JsNativeError::typ()
1490 .with_message("RegExp.prototype.toString method called on incompatible value")
1491 })?;
1492
1493 // 3. Let pattern be ? ToString(? Get(R, "source")).
1494 let pattern = regexp
1495 .get(js_string!("source"), context)?
1496 .to_string(context)?;
1497
1498 // 4. Let flags be ? ToString(? Get(R, "flags")).
1499 let flags = regexp
1500 .get(js_string!("flags"), context)?
1501 .to_string(context)?;
1502
1503 // 5. Let result be the string-concatenation of "/", pattern, "/", and flags.
1504 // 6. Return result.
1505 Ok(js_string!(js_str!("/"), &pattern, js_str!("/"), &flags).into())
1506 }
1507
1508 /// `RegExp.prototype[ @@matchAll ]( string )`
1509 ///
1510 /// The `[@@matchAll]` method returns all matches of the regular expression against a string.
1511 ///
1512 /// More information:
1513 /// - [ECMAScript reference][spec]
1514 /// - [MDN documentation][mdn]
1515 ///
1516 /// [spec]: https://tc39.es/ecma262/#sec-regexp-prototype-matchall
1517 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@matchAll
1518 pub(crate) fn match_all(
1519 this: &JsValue,
1520 args: &[JsValue],
1521 context: &mut Context,
1522 ) -> JsResult<JsValue> {
1523 // 1. Let R be the this value.
1524 // 2. If Type(R) is not Object, throw a TypeError exception.
1525 let regexp = this.as_object().ok_or_else(|| {
1526 JsNativeError::typ()
1527 .with_message("RegExp.prototype.match_all method called on incompatible value")
1528 })?;
1529
1530 // 3. Let S be ? ToString(string).
1531 let arg_str = args.get_or_undefined(0).to_string(context)?;
1532
1533 // 4. Let C be ? SpeciesConstructor(R, %RegExp%).
1534 let c = regexp.species_constructor(StandardConstructors::regexp, context)?;
1535
1536 // 5. Let flags be ? ToString(? Get(R, "flags")).
1537 let flags = regexp
1538 .get(js_string!("flags"), context)?
1539 .to_string(context)?;
1540
1541 // 6. Let matcher be ? Construct(C, « R, flags »).
1542 let matcher = c.construct(&[this.clone(), flags.clone().into()], Some(&c), context)?;
1543
1544 // 7. Let lastIndex be ? ToLength(? Get(R, "lastIndex")).
1545 let last_index = regexp
1546 .get(js_string!("lastIndex"), context)?
1547 .to_length(context)?;
1548
1549 // 8. Perform ? Set(matcher, "lastIndex", lastIndex, true).
1550 matcher.set(js_string!("lastIndex"), last_index, true, context)?;
1551
1552 // 9. If flags contains "g", let global be true.
1553 // 10. Else, let global be false.
1554 let global = flags.contains(b'g');
1555
1556 // 11. If flags contains "u", let fullUnicode be true.
1557 // 12. Else, let fullUnicode be false.
1558 let unicode = flags.contains(b'u') || flags.contains(b'v');
1559
1560 // 13. Return ! CreateRegExpStringIterator(matcher, S, global, fullUnicode).
1561 Ok(RegExpStringIterator::create_regexp_string_iterator(
1562 matcher.clone(),
1563 arg_str,
1564 global,
1565 unicode,
1566 context,
1567 ))
1568 }
1569
1570 /// `RegExp.prototype [ @@replace ] ( string, replaceValue )`
1571 ///
1572 /// The [@@replace]() method replaces some or all matches of a this pattern in a string by a replacement,
1573 /// and returns the result of the replacement as a new string.
1574 /// The replacement can be a string or a function to be called for each match.
1575 ///
1576 /// More information:
1577 /// - [ECMAScript reference][spec]
1578 /// - [MDN documentation][mdn]
1579 ///
1580 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
1581 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@replace
1582 pub(crate) fn replace(
1583 this: &JsValue,
1584 args: &[JsValue],
1585 context: &mut Context,
1586 ) -> JsResult<JsValue> {
1587 // Helper enum.
1588 enum CallableOrString {
1589 FunctionalReplace(JsObject),
1590 ReplaceValue(JsString),
1591 }
1592
1593 // 1. Let rx be the this value.
1594 // 2. If rx is not an Object, throw a TypeError exception.
1595 let rx = this.as_object().ok_or_else(|| {
1596 JsNativeError::typ().with_message(
1597 "RegExp.prototype[Symbol.replace] method called on incompatible value",
1598 )
1599 })?;
1600
1601 // 3. Let S be ? ToString(string).
1602 let s = args.get_or_undefined(0).to_string(context)?;
1603
1604 // 4. Let lengthS be the length of S.
1605 let length_s = s.len();
1606
1607 let replace_value = args.get_or_undefined(1);
1608
1609 // 5. Let functionalReplace be IsCallable(replaceValue).
1610 let functional_replace = replace_value.as_callable();
1611
1612 // 6. If functionalReplace is false, then
1613 let replace_value = if let Some(callable) = functional_replace {
1614 CallableOrString::FunctionalReplace(callable)
1615 } else {
1616 // a. Set replaceValue to ? ToString(replaceValue).
1617 CallableOrString::ReplaceValue(replace_value.to_string(context)?)
1618 };
1619
1620 // 7. Let flags be ? ToString(? Get(rx, "flags")).
1621 let flags = rx.get(js_string!("flags"), context)?.to_string(context)?;
1622
1623 // 8. If flags contains "g", let global be true. Otherwise, let global be false.
1624 let global = flags.contains(b'g');
1625
1626 // 9. If global is true, then
1627 let full_unicode = if global {
1628 // a. If flags contains "u", let fullUnicode be true. Otherwise, let fullUnicode be false.
1629 let full_unicode = flags.contains(b'u');
1630
1631 // b. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1632 rx.set(js_string!("lastIndex"), 0, true, context)?;
1633
1634 full_unicode
1635 } else {
1636 false
1637 };
1638
1639 // 10. Let results be a new empty List.
1640 let mut results = Vec::new();
1641
1642 // SKIPPED: 11. Let done be false.
1643 //
1644 // NOTE(HalidOdat): We don't keep track of `done`, we just break when done is true.
1645
1646 // 12. Repeat, while done is false,
1647 loop {
1648 // a. Let result be ? RegExpExec(rx, S).
1649 let result = Self::abstract_exec(&rx, s.clone(), context)?;
1650
1651 // b. If result is null, set done to true.
1652 let Some(result) = result else {
1653 // SKIPPED: 1. Set done to true.
1654 break;
1655 };
1656
1657 // c. Else,
1658 // i. Append result to results.
1659 results.push(result.clone());
1660
1661 // ii. If global is false, then
1662 if !global {
1663 // SKIPPED: 1. Set done to true.
1664 break;
1665 }
1666
1667 // iii. Else,
1668 // 1. Let matchStr be ? ToString(? Get(result, "0")).
1669 let match_str = result.get(0, context)?.to_string(context)?;
1670
1671 // 2. If matchStr is the empty String, then
1672 if match_str.is_empty() {
1673 // a. Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))).
1674 let this_index = rx
1675 .get(js_string!("lastIndex"), context)?
1676 .to_length(context)?;
1677
1678 // b. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode).
1679 let next_index = advance_string_index(&s, this_index, full_unicode);
1680
1681 // c. Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true).
1682 rx.set(
1683 js_string!("lastIndex"),
1684 JsValue::new(next_index),
1685 true,
1686 context,
1687 )?;
1688 }
1689 }
1690
1691 // 16. If nextSourcePosition ≥ lengthS, return accumulatedResult.
1692 // 17. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
1693
1694 // 13. Let accumulatedResult be the empty String.
1695 let mut accumulated_result = vec![];
1696
1697 // 14. Let nextSourcePosition be 0.
1698 let mut next_source_position = 0;
1699
1700 // 15. For each element result of results, do
1701 for result in results {
1702 // a. Let resultLength be ? LengthOfArrayLike(result).
1703 let result_length = result.length_of_array_like(context)? as i64;
1704
1705 // b. Let nCaptures be max(resultLength - 1, 0).
1706 let n_captures = std::cmp::max(result_length - 1, 0);
1707
1708 // c. Let matched be ? ToString(? Get(result, "0")).
1709 let matched = result.get(0, context)?.to_string(context)?;
1710
1711 // d. Let matchLength be the length of matched.
1712 let match_length = matched.len();
1713
1714 // e. Let position be ? ToIntegerOrInfinity(? Get(result, "index")).
1715 let position = result
1716 .get(js_string!("index"), context)?
1717 .to_integer_or_infinity(context)?;
1718
1719 // f. Set position to the result of clamping position between 0 and lengthS.
1720 let position = position.clamp_finite(0, length_s as i64) as usize;
1721
1722 // g. Let captures be a new empty List.
1723 let mut captures = Vec::new();
1724
1725 // h. Let n be 1.
1726 // i. Repeat, while n ≤ nCaptures,
1727 for n in 1..=n_captures {
1728 // i. Let capN be ? Get(result, ! ToString(𝔽(n))).
1729 let mut cap_n = result.get(n, context)?;
1730
1731 // ii. If capN is not undefined, then
1732 if !cap_n.is_undefined() {
1733 // 1. Set capN to ? ToString(capN).
1734 cap_n = cap_n.to_string(context)?.into();
1735 }
1736
1737 // iii. Append capN to captures.
1738 captures.push(cap_n);
1739
1740 // iv. NOTE: When n = 1, the preceding step puts the first element into captures (at index 0).
1741 // More generally, the nth capture (the characters captured by the nth set of capturing parentheses)
1742 // is at captures[n - 1].
1743 //
1744 // v. Set n to n + 1.
1745 }
1746
1747 // j. Let namedCaptures be ? Get(result, "groups").
1748 let mut named_captures = result.get(js_string!("groups"), context)?;
1749
1750 let replacement = match replace_value {
1751 // k. If functionalReplace is true, then
1752 CallableOrString::FunctionalReplace(ref replace_value) => {
1753 // i. Let replacerArgs be the list-concatenation of « matched », captures, and « 𝔽(position), S ».
1754 let mut replacer_args = vec![JsValue::new(matched)];
1755 replacer_args.extend(captures);
1756 replacer_args.push(position.into());
1757 replacer_args.push(s.clone().into());
1758
1759 // ii. If namedCaptures is not undefined, then
1760 if !named_captures.is_undefined() {
1761 // 1. Append namedCaptures to replacerArgs.
1762 replacer_args.push(named_captures);
1763 }
1764
1765 // iii. Let replValue be ? Call(replaceValue, undefined, replacerArgs).
1766 let repl_value =
1767 replace_value.call(&JsValue::undefined(), &replacer_args, context)?;
1768
1769 // iv. Let replacement be ? ToString(replValue).
1770 repl_value.to_string(context)?
1771 }
1772 // l. Else,
1773 CallableOrString::ReplaceValue(ref replace_value) => {
1774 // i. If namedCaptures is not undefined, then
1775 if !named_captures.is_undefined() {
1776 // 1. Set namedCaptures to ? ToObject(namedCaptures).
1777 named_captures = named_captures.to_object(context)?.into();
1778 }
1779
1780 // ii. Let replacement be ? GetSubstitution(matched, S, position, captures, namedCaptures, replaceValue).
1781 string::get_substitution(
1782 &matched,
1783 &s,
1784 position,
1785 &captures,
1786 &named_captures,
1787 replace_value,
1788 context,
1789 )?
1790 }
1791 };
1792
1793 // m. If position ≥ nextSourcePosition, then
1794 if position >= next_source_position {
1795 // i. NOTE: position should not normally move backwards.
1796 // If it does, it is an indication of an ill-behaving RegExp subclass or use of
1797 // an access triggered side-effect to change the global flag or other characteristics of rx.
1798 // In such cases, the corresponding substitution is ignored.
1799
1800 // ii. Set accumulatedResult to the string-concatenation of accumulatedResult, the substring of S from nextSourcePosition to position, and replacement.
1801 accumulated_result.extend(s.get_expect(next_source_position..position).iter());
1802 accumulated_result.extend(replacement.iter());
1803
1804 // iii. Set nextSourcePosition to position + matchLength.
1805 next_source_position = position + match_length;
1806 }
1807 }
1808
1809 // 16. If nextSourcePosition ≥ lengthS, return accumulatedResult.
1810 if next_source_position >= length_s {
1811 return Ok(js_string!(&accumulated_result[..]).into());
1812 }
1813
1814 // 17. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
1815 Ok(js_string!(
1816 &JsString::from(&accumulated_result[..]),
1817 &s.get_expect(next_source_position..)
1818 )
1819 .into())
1820 }
1821
1822 /// `RegExp.prototype[ @@search ]( string )`
1823 ///
1824 /// This method executes a search for a match between a this regular expression and a string.
1825 ///
1826 /// More information:
1827 /// - [ECMAScript reference][spec]
1828 /// - [MDN documentation][mdn]
1829 ///
1830 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@search
1831 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@search
1832 pub(crate) fn search(
1833 this: &JsValue,
1834 args: &[JsValue],
1835 context: &mut Context,
1836 ) -> JsResult<JsValue> {
1837 // 1. Let rx be the this value.
1838 // 2. If Type(rx) is not Object, throw a TypeError exception.
1839 let rx = this.as_object().ok_or_else(|| {
1840 JsNativeError::typ()
1841 .with_message("RegExp.prototype[Symbol.search] method called on incompatible value")
1842 })?;
1843
1844 // 3. Let S be ? ToString(string).
1845 let arg_str = args.get_or_undefined(0).to_string(context)?;
1846
1847 // 4. Let previousLastIndex be ? Get(rx, "lastIndex").
1848 let previous_last_index = rx.get(js_string!("lastIndex"), context)?;
1849
1850 // 5. If SameValue(previousLastIndex, +0𝔽) is false, then
1851 if !JsValue::same_value(&previous_last_index, &JsValue::new(0)) {
1852 // a. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1853 rx.set(js_string!("lastIndex"), 0, true, context)?;
1854 }
1855
1856 // 6. Let result be ? RegExpExec(rx, S).
1857 let result = Self::abstract_exec(&rx, arg_str, context)?;
1858
1859 // 7. Let currentLastIndex be ? Get(rx, "lastIndex").
1860 let current_last_index = rx.get(js_string!("lastIndex"), context)?;
1861
1862 // 8. If SameValue(currentLastIndex, previousLastIndex) is false, then
1863 if !JsValue::same_value(¤t_last_index, &previous_last_index) {
1864 // a. Perform ? Set(rx, "lastIndex", previousLastIndex, true).
1865 rx.set(js_string!("lastIndex"), previous_last_index, true, context)?;
1866 }
1867
1868 // 9. If result is null, return -1𝔽.
1869 // 10. Return ? Get(result, "index").
1870 result.map_or_else(
1871 || Ok(JsValue::new(-1)),
1872 |result| result.get(js_string!("index"), context),
1873 )
1874 }
1875
1876 /// `RegExp.prototype [ @@split ] ( string, limit )`
1877 ///
1878 /// The [@@split]() method splits a String object into an array of strings by separating the string into substrings.
1879 ///
1880 /// More information:
1881 /// - [ECMAScript reference][spec]
1882 /// - [MDN documentation][mdn]
1883 ///
1884 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@split
1885 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@split
1886 pub(crate) fn split(
1887 this: &JsValue,
1888 args: &[JsValue],
1889 context: &mut Context,
1890 ) -> JsResult<JsValue> {
1891 // 1. Let rx be the this value.
1892 // 2. If Type(rx) is not Object, throw a TypeError exception.
1893 let rx = this.as_object().ok_or_else(|| {
1894 JsNativeError::typ()
1895 .with_message("RegExp.prototype.split method called on incompatible value")
1896 })?;
1897
1898 // 3. Let S be ? ToString(string).
1899 let arg_str = args.get_or_undefined(0).to_string(context)?;
1900
1901 // 4. Let C be ? SpeciesConstructor(rx, %RegExp%).
1902 let constructor = rx.species_constructor(StandardConstructors::regexp, context)?;
1903
1904 // 5. Let flags be ? ToString(? Get(rx, "flags")).
1905 let flags = rx.get(js_string!("flags"), context)?.to_string(context)?;
1906
1907 // 6. If flags contains "u", let unicodeMatching be true.
1908 // 7. Else, let unicodeMatching be false.
1909 let unicode = flags.contains(b'u');
1910
1911 // 8. If flags contains "y", let newFlags be flags.
1912 // 9. Else, let newFlags be the string-concatenation of flags and "y".
1913 let new_flags = if flags.contains(b'y') {
1914 flags
1915 } else {
1916 js_string!(&flags, js_str!("y"))
1917 };
1918
1919 // 10. Let splitter be ? Construct(C, « rx, newFlags »).
1920 let splitter = constructor.construct(
1921 &[this.clone(), new_flags.into()],
1922 Some(&constructor),
1923 context,
1924 )?;
1925
1926 // 11. Let A be ! ArrayCreate(0).
1927 let a = Array::array_create(0, None, context)?;
1928
1929 // 12. Let lengthA be 0.
1930 let mut length_a = 0;
1931
1932 // 13. If limit is undefined, let lim be 2^32 - 1; else let lim be ℝ(? ToUint32(limit)).
1933 let limit = args.get_or_undefined(1);
1934 let lim = if limit.is_undefined() {
1935 u32::MAX
1936 } else {
1937 limit.to_u32(context)?
1938 };
1939
1940 // 14. If lim is 0, return A.
1941 if lim == 0 {
1942 return Ok(a.into());
1943 }
1944
1945 // 15. Let size be the length of S.
1946 let size = arg_str.len() as u64;
1947
1948 // 16. If size is 0, then
1949 if size == 0 {
1950 // a. Let z be ? RegExpExec(splitter, S).
1951 let result = Self::abstract_exec(&splitter, arg_str.clone(), context)?;
1952
1953 // b. If z is not null, return A.
1954 if result.is_some() {
1955 return Ok(a.into());
1956 }
1957
1958 // c. Perform ! CreateDataPropertyOrThrow(A, "0", S).
1959 a.create_data_property_or_throw(0, arg_str, context)?;
1960
1961 // d. Return A.
1962 return Ok(a.into());
1963 }
1964
1965 // 17. Let p be 0.
1966 // 18. Let q be p.
1967 let mut p = 0;
1968 let mut q = p;
1969
1970 // 19. Repeat, while q < size,
1971 while q < size {
1972 // a. Perform ? Set(splitter, "lastIndex", 𝔽(q), true).
1973 splitter.set(js_string!("lastIndex"), JsValue::new(q), true, context)?;
1974
1975 // b. Let z be ? RegExpExec(splitter, S).
1976 let result = Self::abstract_exec(&splitter, arg_str.clone(), context)?;
1977
1978 // c. If z is null, set q to AdvanceStringIndex(S, q, unicodeMatching).
1979 // d. Else,
1980 if let Some(result) = result {
1981 // i. Let e be ℝ(? ToLength(? Get(splitter, "lastIndex"))).
1982 let mut e = splitter
1983 .get(js_string!("lastIndex"), context)?
1984 .to_length(context)?;
1985
1986 // ii. Set e to min(e, size).
1987 e = std::cmp::min(e, size);
1988
1989 // iii. If e = p, set q to AdvanceStringIndex(S, q, unicodeMatching).
1990 // iv. Else,
1991 if e == p {
1992 q = advance_string_index(&arg_str, q, unicode);
1993 } else {
1994 // 1. Let T be the substring of S from p to q.
1995 let arg_str_substring = arg_str.get_expect(p as usize..q as usize);
1996
1997 // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T).
1998 a.create_data_property_or_throw(length_a, arg_str_substring, context)?;
1999
2000 // 3. Set lengthA to lengthA + 1.
2001 length_a += 1;
2002
2003 // 4. If lengthA = lim, return A.
2004 if length_a == lim {
2005 return Ok(a.into());
2006 }
2007
2008 // 5. Set p to e.
2009 p = e;
2010
2011 // 6. Let numberOfCaptures be ? LengthOfArrayLike(z).
2012 let mut number_of_captures = result.length_of_array_like(context)? as isize;
2013
2014 // 7. Set numberOfCaptures to max(numberOfCaptures - 1, 0).
2015 number_of_captures = std::cmp::max(number_of_captures - 1, 0);
2016
2017 // 8. Let i be 1.
2018 // 9. Repeat, while i ≤ numberOfCaptures,
2019 for i in 1..=number_of_captures {
2020 // a. Let nextCapture be ? Get(z, ! ToString(𝔽(i))).
2021 let next_capture = result.get(i, context)?;
2022
2023 // b. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), nextCapture).
2024 a.create_data_property_or_throw(length_a, next_capture, context)?;
2025
2026 // d. Set lengthA to lengthA + 1.
2027 length_a += 1;
2028
2029 // e. If lengthA = lim, return A.
2030 if length_a == lim {
2031 return Ok(a.into());
2032 }
2033 }
2034
2035 // 10. Set q to p.
2036 q = p;
2037 }
2038 } else {
2039 q = advance_string_index(&arg_str, q, unicode);
2040 }
2041 }
2042
2043 // 20. Let T be the substring of S from p to size.
2044 let arg_str_substring = arg_str.get_expect(p as usize..size as usize);
2045
2046 // 21. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T).
2047 a.create_data_property_or_throw(length_a, arg_str_substring, context)?;
2048
2049 // 22. Return A.
2050 Ok(a.into())
2051 }
2052
2053 /// [`RegExp.prototype.compile ( pattern, flags )`][spec]
2054 ///
2055 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.compile
2056 #[cfg(feature = "annex-b")]
2057 fn compile(this: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
2058 // 1. Let O be the this value.
2059 // 2. Perform ? RequireInternalSlot(O, [[RegExpMatcher]]).
2060
2061 let this = this
2062 .as_object()
2063 .filter(|o| o.is::<RegExp>())
2064 .ok_or_else(|| {
2065 JsNativeError::typ()
2066 .with_message("`RegExp.prototype.compile` cannot be called for a non-object")
2067 })?;
2068 let pattern = args.get_or_undefined(0);
2069 let flags = args.get_or_undefined(1);
2070 // 3. If pattern is an Object and pattern has a [[RegExpMatcher]] internal slot, then
2071 let (pattern, flags) = if let Some((p, f)) = pattern.as_object().and_then(|o| {
2072 o.downcast_ref::<RegExp>()
2073 .map(|rx| (rx.original_source.clone(), rx.original_flags.clone()))
2074 }) {
2075 // a. If flags is not undefined, throw a TypeError exception.
2076 if !flags.is_undefined() {
2077 return Err(JsNativeError::typ()
2078 .with_message(
2079 "`RegExp.prototype.compile` cannot be \
2080 called with both a RegExp initializer and new flags",
2081 )
2082 .into());
2083 }
2084 // b. Let P be pattern.[[OriginalSource]].
2085 // c. Let F be pattern.[[OriginalFlags]].
2086 (p.into(), f.into())
2087 } else {
2088 // 4. Else,
2089 // a. Let P be pattern.
2090 // b. Let F be flags.
2091 (pattern.clone(), flags.clone())
2092 };
2093
2094 let regexp = Self::compile_native_regexp(&pattern, &flags, context)?;
2095
2096 // 5. Return ? RegExpInitialize(O, P, F).
2097 {
2098 *this
2099 .downcast_mut::<RegExp>()
2100 .expect("already checked that the object was a RegExp") = regexp;
2101 }
2102
2103 this.set(js_string!("lastIndex"), 0, true, context)?;
2104
2105 Ok(this.into())
2106 }
2107}
2108
2109/// `22.2.5.2.3 AdvanceStringIndex ( S, index, unicode )`
2110///
2111/// More information:
2112/// - [ECMAScript reference][spec]
2113///
2114/// [spec]: https://tc39.es/ecma262/#sec-advancestringindex
2115fn advance_string_index(s: &JsString, index: u64, unicode: bool) -> u64 {
2116 // Regress only works with utf8, so this function differs from the spec.
2117
2118 // 1. Assert: index ≤ 2^53 - 1.
2119
2120 // 2. If unicode is false, return index + 1.
2121 if !unicode {
2122 return index + 1;
2123 }
2124
2125 // 3. Let length be the number of code units in S.
2126 let length = s.len() as u64;
2127
2128 // 4. If index + 1 ≥ length, return index + 1.
2129 if index + 1 > length {
2130 return index + 1;
2131 }
2132
2133 // 5. Let cp be ! CodePointAt(S, index).
2134 let code_point = s.code_point_at(index as usize);
2135
2136 index + code_point.code_unit_count() as u64
2137}