boa/builtins/regexp/mod.rs
1//! This module implements the global `RegExp` object.
2//!
3//! `The `RegExp` object is used for matching text with a pattern.
4//!
5//! More information:
6//! - [ECMAScript reference][spec]
7//! - [MDN documentation][mdn]
8//!
9//! [spec]: https://tc39.es/ecma262/#sec-regexp-constructor
10//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
11
12pub mod regexp_string_iterator;
13
14use crate::{
15 builtins::{array::Array, string, BuiltIn},
16 context::StandardObjects,
17 gc::{empty_trace, Finalize, Trace},
18 object::{
19 internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder,
20 JsObject, Object, ObjectData,
21 },
22 property::Attribute,
23 symbol::WellKnownSymbols,
24 value::{IntegerOrInfinity, JsValue},
25 BoaProfiler, Context, JsResult, JsString,
26};
27use regexp_string_iterator::RegExpStringIterator;
28use regress::Regex;
29
30use super::JsArgs;
31
32#[cfg(test)]
33mod tests;
34
35/// The internal representation on a `RegExp` object.
36#[derive(Debug, Clone, Finalize)]
37pub struct RegExp {
38 /// Regex matcher.
39 matcher: Regex,
40
41 /// Update last_index, set if global or sticky flags are set.
42 use_last_index: bool,
43
44 /// Flag 's' - dot matches newline characters.
45 dot_all: bool,
46
47 /// Flag 'g'
48 global: bool,
49
50 /// Flag 'i' - ignore case.
51 ignore_case: bool,
52
53 /// Flag 'm' - '^' and '$' match beginning/end of line.
54 multiline: bool,
55
56 /// Flag 'y'
57 sticky: bool,
58
59 /// Flag 'u' - Unicode.
60 unicode: bool,
61
62 original_source: JsString,
63 original_flags: JsString,
64}
65
66// Only safe while regress::Regex doesn't implement Trace itself.
67unsafe impl Trace for RegExp {
68 empty_trace!();
69}
70
71impl BuiltIn for RegExp {
72 const NAME: &'static str = "RegExp";
73
74 fn attribute() -> Attribute {
75 Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE
76 }
77
78 fn init(context: &mut Context) -> (&'static str, JsValue, Attribute) {
79 let _timer = BoaProfiler::global().start_event(Self::NAME, "init");
80
81 let get_species = FunctionBuilder::native(context, Self::get_species)
82 .name("get [Symbol.species]")
83 .constructable(false)
84 .build();
85
86 let flag_attributes = Attribute::CONFIGURABLE | Attribute::NON_ENUMERABLE;
87
88 let get_global = FunctionBuilder::native(context, Self::get_global)
89 .name("get global")
90 .constructable(false)
91 .build();
92 let get_ignore_case = FunctionBuilder::native(context, Self::get_ignore_case)
93 .name("get ignoreCase")
94 .constructable(false)
95 .build();
96 let get_multiline = FunctionBuilder::native(context, Self::get_multiline)
97 .name("get multiline")
98 .constructable(false)
99 .build();
100 let get_dot_all = FunctionBuilder::native(context, Self::get_dot_all)
101 .name("get dotAll")
102 .constructable(false)
103 .build();
104 let get_unicode = FunctionBuilder::native(context, Self::get_unicode)
105 .name("get unicode")
106 .constructable(false)
107 .build();
108 let get_sticky = FunctionBuilder::native(context, Self::get_sticky)
109 .name("get sticky")
110 .constructable(false)
111 .build();
112 let get_flags = FunctionBuilder::native(context, Self::get_flags)
113 .name("get flags")
114 .constructable(false)
115 .build();
116 let get_source = FunctionBuilder::native(context, Self::get_source)
117 .name("get source")
118 .constructable(false)
119 .build();
120 let regexp_object = ConstructorBuilder::with_standard_object(
121 context,
122 Self::constructor,
123 context.standard_objects().regexp_object().clone(),
124 )
125 .name(Self::NAME)
126 .length(Self::LENGTH)
127 .static_accessor(
128 WellKnownSymbols::species(),
129 Some(get_species),
130 None,
131 Attribute::CONFIGURABLE,
132 )
133 .property("lastIndex", 0, Attribute::all())
134 .method(Self::test, "test", 1)
135 .method(Self::exec, "exec", 1)
136 .method(Self::to_string, "toString", 0)
137 .method(
138 Self::r#match,
139 (WellKnownSymbols::match_(), "[Symbol.match]"),
140 1,
141 )
142 .method(
143 Self::match_all,
144 (WellKnownSymbols::match_all(), "[Symbol.matchAll]"),
145 1,
146 )
147 .method(
148 Self::replace,
149 (WellKnownSymbols::replace(), "[Symbol.replace]"),
150 2,
151 )
152 .method(
153 Self::search,
154 (WellKnownSymbols::search(), "[Symbol.search]"),
155 1,
156 )
157 .method(
158 Self::split,
159 (WellKnownSymbols::split(), "[Symbol.split]"),
160 2,
161 )
162 .accessor("global", Some(get_global), None, flag_attributes)
163 .accessor("ignoreCase", Some(get_ignore_case), None, flag_attributes)
164 .accessor("multiline", Some(get_multiline), None, flag_attributes)
165 .accessor("dotAll", Some(get_dot_all), None, flag_attributes)
166 .accessor("unicode", Some(get_unicode), None, flag_attributes)
167 .accessor("sticky", Some(get_sticky), None, flag_attributes)
168 .accessor("flags", Some(get_flags), None, flag_attributes)
169 .accessor("source", Some(get_source), None, flag_attributes)
170 .build();
171
172 // TODO: add them RegExp accessor properties
173
174 (Self::NAME, regexp_object.into(), Self::attribute())
175 }
176}
177
178impl RegExp {
179 /// The name of the object.
180 pub(crate) const NAME: &'static str = "RegExp";
181
182 /// The amount of arguments this function object takes.
183 pub(crate) const LENGTH: usize = 2;
184
185 /// `22.2.3.1 RegExp ( pattern, flags )`
186 ///
187 /// More information:
188 /// - [ECMAScript reference][spec]
189 ///
190 /// [spec]: https://tc39.es/ecma262/#sec-regexp-pattern-flags
191 pub(crate) fn constructor(
192 new_target: &JsValue,
193 args: &[JsValue],
194 context: &mut Context,
195 ) -> JsResult<JsValue> {
196 let pattern = args.get_or_undefined(0);
197 let flags = args.get_or_undefined(1);
198
199 // 1. Let patternIsRegExp be ? IsRegExp(pattern).
200 let pattern_is_regexp = if let JsValue::Object(obj) = &pattern {
201 if obj.is_regexp() {
202 Some(obj)
203 } else {
204 None
205 }
206 } else {
207 None
208 };
209
210 // 2. If NewTarget is undefined, then
211 // 3. Else, let newTarget be NewTarget.
212 if new_target.is_undefined() {
213 // a. Let newTarget be the active function object.
214 // b. If patternIsRegExp is true and flags is undefined, then
215 if let Some(pattern) = pattern_is_regexp {
216 if flags.is_undefined() {
217 // i. Let patternConstructor be ? Get(pattern, "constructor").
218 let pattern_constructor = pattern.get("constructor", context)?;
219 // ii. If SameValue(newTarget, patternConstructor) is true, return pattern.
220 if JsValue::same_value(new_target, &pattern_constructor) {
221 return Ok(pattern.clone().into());
222 }
223 }
224 }
225 }
226
227 // 4. If Type(pattern) is Object and pattern has a [[RegExpMatcher]] internal slot, then
228 // 6. Else,
229 let (p, f) = if let Some(pattern) = pattern_is_regexp {
230 let obj = pattern.borrow();
231 let regexp = obj.as_regexp().unwrap();
232
233 // a. Let P be pattern.[[OriginalSource]].
234 // b. If flags is undefined, let F be pattern.[[OriginalFlags]].
235 // c. Else, let F be flags.
236 if flags.is_undefined() {
237 (
238 JsValue::new(regexp.original_source.clone()),
239 JsValue::new(regexp.original_flags.clone()),
240 )
241 } else {
242 (JsValue::new(regexp.original_source.clone()), flags.clone())
243 }
244 } else {
245 // a. Let P be pattern.
246 // b. Let F be flags.
247 (pattern.clone(), flags.clone())
248 };
249
250 // 7. Let O be ? RegExpAlloc(newTarget).
251 let o = RegExp::alloc(new_target, &[], context)?;
252
253 // 8.Return ? RegExpInitialize(O, P, F).
254 RegExp::initialize(&o, &[p, f], context)
255 }
256
257 /// `22.2.3.2.1 RegExpAlloc ( newTarget )`
258 ///
259 /// More information:
260 /// - [ECMAScript reference][spec]
261 ///
262 /// [spec]: https://tc39.es/ecma262/#sec-regexpalloc
263 fn alloc(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
264 let proto = get_prototype_from_constructor(this, StandardObjects::regexp_object, context)?;
265
266 Ok(JsObject::new(Object::create(proto.into())).into())
267 }
268
269 /// `22.2.3.2.2 RegExpInitialize ( obj, pattern, flags )`
270 ///
271 /// More information:
272 /// - [ECMAScript reference][spec]
273 ///
274 /// [spec]: https://tc39.es/ecma262/#sec-regexpinitialize
275 fn initialize(this: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
276 let pattern = args.get_or_undefined(0);
277 let flags = args.get_or_undefined(1);
278
279 // 1. If pattern is undefined, let P be the empty String.
280 // 2. Else, let P be ? ToString(pattern).
281 let p = if pattern.is_undefined() {
282 JsString::new("")
283 } else {
284 pattern.to_string(context)?
285 };
286
287 // 3. If flags is undefined, let F be the empty String.
288 // 4. Else, let F be ? ToString(flags).
289 let f = if flags.is_undefined() {
290 JsString::new("")
291 } else {
292 flags.to_string(context)?
293 };
294
295 // 5. If F contains any code unit other than "g", "i", "m", "s", "u", or "y"
296 // or if it contains the same code unit more than once, throw a SyntaxError exception.
297 let mut global = false;
298 let mut ignore_case = false;
299 let mut multiline = false;
300 let mut dot_all = false;
301 let mut unicode = false;
302 let mut sticky = false;
303 for c in f.chars() {
304 match c {
305 'g' if global => {
306 return context.throw_syntax_error("RegExp flags contains multiple 'g'")
307 }
308 'g' => global = true,
309 'i' if ignore_case => {
310 return context.throw_syntax_error("RegExp flags contains multiple 'i'")
311 }
312 'i' => ignore_case = true,
313 'm' if multiline => {
314 return context.throw_syntax_error("RegExp flags contains multiple 'm'")
315 }
316 'm' => multiline = true,
317 's' if dot_all => {
318 return context.throw_syntax_error("RegExp flags contains multiple 's'")
319 }
320 's' => dot_all = true,
321 'u' if unicode => {
322 return context.throw_syntax_error("RegExp flags contains multiple 'u'")
323 }
324 'u' => unicode = true,
325 'y' if sticky => {
326 return context.throw_syntax_error("RegExp flags contains multiple 'y'")
327 }
328 'y' => sticky = true,
329 c => {
330 return context.throw_syntax_error(format!(
331 "RegExp flags contains unknown code unit '{}'",
332 c
333 ))
334 }
335 }
336 }
337
338 // 12. Set obj.[[OriginalSource]] to P.
339 // 13. Set obj.[[OriginalFlags]] to F.
340 // 14. Set obj.[[RegExpMatcher]] to the Abstract Closure that evaluates parseResult by applying the semantics provided in 22.2.2 using patternCharacters as the pattern's List of SourceCharacter values and F as the flag parameters.
341 let matcher = match Regex::with_flags(&p, f.as_ref()) {
342 Err(error) => {
343 return Err(context
344 .construct_syntax_error(format!("failed to create matcher: {}", error.text)));
345 }
346 Ok(val) => val,
347 };
348
349 let regexp = RegExp {
350 matcher,
351 use_last_index: global || sticky,
352 dot_all,
353 global,
354 ignore_case,
355 multiline,
356 sticky,
357 unicode,
358 original_source: p,
359 original_flags: f,
360 };
361
362 this.set_data(ObjectData::reg_exp(Box::new(regexp)));
363
364 // 16. Return obj.
365 Ok(this.clone())
366 }
367
368 /// `22.2.3.2.4 RegExpCreate ( P, F )`
369 ///
370 /// More information:
371 /// - [ECMAScript reference][spec]
372 ///
373 /// [spec]: https://tc39.es/ecma262/#sec-regexpcreate
374 pub(crate) fn create(p: JsValue, f: JsValue, context: &mut Context) -> JsResult<JsValue> {
375 // 1. Let obj be ? RegExpAlloc(%RegExp%).
376 let obj = RegExp::alloc(
377 &context.global_object().get(RegExp::NAME, context)?,
378 &[],
379 context,
380 )?;
381
382 // 2. Return ? RegExpInitialize(obj, P, F).
383 RegExp::initialize(&obj, &[p, f], context)
384 }
385
386 /// `get RegExp [ @@species ]`
387 ///
388 /// The `RegExp [ @@species ]` accessor property returns the RegExp constructor.
389 ///
390 /// More information:
391 /// - [ECMAScript reference][spec]
392 /// - [MDN documentation][mdn]
393 ///
394 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp-@@species
395 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@species
396 fn get_species(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
397 // 1. Return the this value.
398 Ok(this.clone())
399 }
400
401 #[inline]
402 fn regexp_has_flag(this: &JsValue, flag: char, context: &mut Context) -> JsResult<JsValue> {
403 if let Some(object) = this.as_object() {
404 if let Some(regexp) = object.borrow().as_regexp() {
405 return Ok(JsValue::new(match flag {
406 'g' => regexp.global,
407 'm' => regexp.multiline,
408 's' => regexp.dot_all,
409 'i' => regexp.ignore_case,
410 'u' => regexp.unicode,
411 'y' => regexp.sticky,
412 _ => unreachable!(),
413 }));
414 }
415
416 if JsObject::equals(
417 &object,
418 &context.standard_objects().regexp_object().prototype,
419 ) {
420 return Ok(JsValue::undefined());
421 }
422 }
423
424 let name = match flag {
425 'g' => "global",
426 'm' => "multiline",
427 's' => "dotAll",
428 'i' => "ignoreCase",
429 'u' => "unicode",
430 'y' => "sticky",
431 _ => unreachable!(),
432 };
433
434 context.throw_type_error(format!(
435 "RegExp.prototype.{} getter called on non-RegExp object",
436 name
437 ))
438 }
439
440 /// `get RegExp.prototype.global`
441 ///
442 /// The `global` property indicates whether or not the "`g`" flag is used with the regular expression.
443 ///
444 /// More information:
445 /// - [ECMAScript reference][spec]
446 /// - [MDN documentation][mdn]
447 ///
448 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.global
449 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global
450 pub(crate) fn get_global(
451 this: &JsValue,
452 _: &[JsValue],
453 context: &mut Context,
454 ) -> JsResult<JsValue> {
455 Self::regexp_has_flag(this, 'g', context)
456 }
457
458 /// `get RegExp.prototype.ignoreCase`
459 ///
460 /// The `ignoreCase` property indicates whether or not the "`i`" flag is used with the regular expression.
461 ///
462 /// More information:
463 /// - [ECMAScript reference][spec]
464 /// - [MDN documentation][mdn]
465 ///
466 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.ignorecase
467 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/ignoreCase
468 pub(crate) fn get_ignore_case(
469 this: &JsValue,
470 _: &[JsValue],
471 context: &mut Context,
472 ) -> JsResult<JsValue> {
473 Self::regexp_has_flag(this, 'i', context)
474 }
475
476 /// `get RegExp.prototype.multiline`
477 ///
478 /// The multiline property indicates whether or not the "m" flag is used with the regular expression.
479 ///
480 /// More information:
481 /// - [ECMAScript reference][spec]
482 /// - [MDN documentation][mdn]
483 ///
484 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.multiline
485 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/multiline
486 pub(crate) fn get_multiline(
487 this: &JsValue,
488 _: &[JsValue],
489 context: &mut Context,
490 ) -> JsResult<JsValue> {
491 Self::regexp_has_flag(this, 'm', context)
492 }
493
494 /// `get RegExp.prototype.dotAll`
495 ///
496 /// The `dotAll` property indicates whether or not the "`s`" flag is used with the regular expression.
497 ///
498 /// More information:
499 /// - [ECMAScript reference][spec]
500 /// - [MDN documentation][mdn]
501 ///
502 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.dotAll
503 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/dotAll
504 pub(crate) fn get_dot_all(
505 this: &JsValue,
506 _: &[JsValue],
507 context: &mut Context,
508 ) -> JsResult<JsValue> {
509 Self::regexp_has_flag(this, 's', context)
510 }
511
512 /// `get RegExp.prototype.unicode`
513 ///
514 /// The unicode property indicates whether or not the "`u`" flag is used with a regular expression.
515 /// unicode is a read-only property of an individual regular expression instance.
516 ///
517 /// More information:
518 /// - [ECMAScript reference][spec]
519 /// - [MDN documentation][mdn]
520 ///
521 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.unicode
522 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode
523 pub(crate) fn get_unicode(
524 this: &JsValue,
525 _: &[JsValue],
526 context: &mut Context,
527 ) -> JsResult<JsValue> {
528 Self::regexp_has_flag(this, 'u', context)
529 }
530
531 /// `get RegExp.prototype.sticky`
532 ///
533 /// This flag indicates that it matches only from the index indicated by the `lastIndex` property
534 /// of this regular expression in the target string (and does not attempt to match from any later indexes).
535 ///
536 /// More information:
537 /// - [ECMAScript reference][spec]
538 /// - [MDN documentation][mdn]
539 ///
540 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.sticky
541 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky
542 pub(crate) fn get_sticky(
543 this: &JsValue,
544 _: &[JsValue],
545 context: &mut Context,
546 ) -> JsResult<JsValue> {
547 Self::regexp_has_flag(this, 'y', context)
548 }
549
550 /// `get RegExp.prototype.flags`
551 ///
552 /// The `flags` property returns a string consisting of the [`flags`][flags] of the current regular expression object.
553 ///
554 /// More information:
555 /// - [ECMAScript reference][spec]
556 /// - [MDN documentation][mdn]
557 ///
558 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.flags
559 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/flags
560 /// [flags]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#Advanced_searching_with_flags_2
561 pub(crate) fn get_flags(
562 this: &JsValue,
563 _: &[JsValue],
564 context: &mut Context,
565 ) -> JsResult<JsValue> {
566 // 1. Let R be the this value.
567 // 2. If Type(R) is not Object, throw a TypeError exception.
568 if let Some(object) = this.as_object() {
569 // 3. Let result be the empty String.
570 let mut result = String::new();
571 // 4. Let global be ! ToBoolean(? Get(R, "global")).
572 // 5. If global is true, append the code unit 0x0067 (LATIN SMALL LETTER G) as the last code unit of result.
573 if object.get("global", context)?.to_boolean() {
574 result.push('g');
575 }
576 // 6. Let ignoreCase be ! ToBoolean(? Get(R, "ignoreCase")).
577 // 7. If ignoreCase is true, append the code unit 0x0069 (LATIN SMALL LETTER I) as the last code unit of result.
578 if object.get("ignoreCase", context)?.to_boolean() {
579 result.push('i');
580 }
581
582 // 8. Let multiline be ! ToBoolean(? Get(R, "multiline")).
583 // 9. If multiline is true, append the code unit 0x006D (LATIN SMALL LETTER M) as the last code unit of result.
584 if object.get("multiline", context)?.to_boolean() {
585 result.push('m');
586 }
587
588 // 10. Let dotAll be ! ToBoolean(? Get(R, "dotAll")).
589 // 11. If dotAll is true, append the code unit 0x0073 (LATIN SMALL LETTER S) as the last code unit of result.
590 if object.get("dotAll", context)?.to_boolean() {
591 result.push('s');
592 }
593 // 12. Let unicode be ! ToBoolean(? Get(R, "unicode")).
594 // 13. If unicode is true, append the code unit 0x0075 (LATIN SMALL LETTER U) as the last code unit of result.
595 if object.get("unicode", context)?.to_boolean() {
596 result.push('u');
597 }
598
599 // 14. Let sticky be ! ToBoolean(? Get(R, "sticky")).
600 // 15. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) as the last code unit of result.
601 if object.get("sticky", context)?.to_boolean() {
602 result.push('y');
603 }
604
605 // 16. Return result.
606 return Ok(result.into());
607 }
608
609 context.throw_type_error("RegExp.prototype.flags getter called on non-object")
610 }
611
612 /// `get RegExp.prototype.source`
613 ///
614 /// The `source` property returns a `String` containing the source text of the regexp object,
615 /// and it doesn't contain the two forward slashes on both sides and any flags.
616 ///
617 /// More information:
618 /// - [ECMAScript reference][spec]
619 /// - [MDN documentation][mdn]
620 ///
621 /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.source
622 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/source
623 pub(crate) fn get_source(
624 this: &JsValue,
625 _: &[JsValue],
626 context: &mut Context,
627 ) -> JsResult<JsValue> {
628 // 1. Let R be the this value.
629 // 2. If Type(R) is not Object, throw a TypeError exception.
630 if let Some(object) = this.as_object() {
631 let object = object.borrow();
632
633 match object.as_regexp() {
634 // 3. If R does not have an [[OriginalSource]] internal slot, then
635 None => {
636 // a. If SameValue(R, %RegExp.prototype%) is true, return "(?:)".
637 // b. Otherwise, throw a TypeError exception.
638 if JsValue::same_value(
639 this,
640 &JsValue::new(context.standard_objects().regexp_object().prototype()),
641 ) {
642 Ok(JsValue::new("(?:)"))
643 } else {
644 context.throw_type_error(
645 "RegExp.prototype.source method called on incompatible value",
646 )
647 }
648 }
649 // 4. Assert: R has an [[OriginalFlags]] internal slot.
650 Some(re) => {
651 // 5. Let src be R.[[OriginalSource]].
652 // 6. Let flags be R.[[OriginalFlags]].
653 // 7. Return EscapeRegExpPattern(src, flags).
654 RegExp::escape_pattern(&re.original_source, &re.original_flags)
655 }
656 }
657 } else {
658 context.throw_type_error("RegExp.prototype.source method called on incompatible value")
659 }
660 }
661
662 /// `22.2.3.2.5 EscapeRegExpPattern ( P, F )`
663 ///
664 /// More information:
665 /// - [ECMAScript reference][spec]
666 ///
667 /// [spec]: https://tc39.es/ecma262/#sec-escaperegexppattern
668 fn escape_pattern(src: &str, _flags: &str) -> JsResult<JsValue> {
669 if src.is_empty() {
670 Ok(JsValue::new("(?:)"))
671 } else {
672 let mut s = String::from("");
673
674 for c in src.chars() {
675 match c {
676 '/' => s.push_str("\\/"),
677 '\n' => s.push_str("\\\\n"),
678 '\r' => s.push_str("\\\\r"),
679 _ => s.push(c),
680 }
681 }
682
683 Ok(JsValue::new(s))
684 }
685 }
686
687 /// `RegExp.prototype.test( string )`
688 ///
689 /// The `test()` method executes a search for a match between a regular expression and a specified string.
690 ///
691 /// Returns `true` or `false`.
692 ///
693 /// More information:
694 /// - [ECMAScript reference][spec]
695 /// - [MDN documentation][mdn]
696 ///
697 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.test
698 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test
699 pub(crate) fn test(
700 this: &JsValue,
701 args: &[JsValue],
702 context: &mut Context,
703 ) -> JsResult<JsValue> {
704 // 1. Let R be the this value.
705 // 2. If Type(R) is not Object, throw a TypeError exception.
706 if !this.is_object() {
707 return context
708 .throw_type_error("RegExp.prototype.test method called on incompatible value");
709 }
710
711 // 3. Let string be ? ToString(S).
712 let arg_str = args
713 .get(0)
714 .cloned()
715 .unwrap_or_default()
716 .to_string(context)?;
717
718 // 4. Let match be ? RegExpExec(R, string).
719 let m = Self::abstract_exec(this, arg_str, context)?;
720
721 // 5. If match is not null, return true; else return false.
722 if m.is_some() {
723 Ok(JsValue::new(true))
724 } else {
725 Ok(JsValue::new(false))
726 }
727 }
728
729 /// `RegExp.prototype.exec( string )`
730 ///
731 /// The exec() method executes a search for a match in a specified string.
732 ///
733 /// Returns a result array, or `null`.
734 ///
735 /// More information:
736 /// - [ECMAScript reference][spec]
737 /// - [MDN documentation][mdn]
738 ///
739 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.exec
740 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec
741 pub(crate) fn exec(
742 this: &JsValue,
743 args: &[JsValue],
744 context: &mut Context,
745 ) -> JsResult<JsValue> {
746 // 1. Let R be the this value.
747 // 2. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
748 let obj = this.as_object().unwrap_or_default();
749 if !obj.is_regexp() {
750 return Err(
751 context.construct_type_error("RegExp.prototype.exec called with invalid value")
752 );
753 }
754
755 // 3. Let S be ? ToString(string).
756 let arg_str = args
757 .get(0)
758 .cloned()
759 .unwrap_or_default()
760 .to_string(context)?;
761
762 // 4. Return ? RegExpBuiltinExec(R, S).
763 if let Some(v) = Self::abstract_builtin_exec(obj, arg_str, context)? {
764 Ok(v.into())
765 } else {
766 Ok(JsValue::null())
767 }
768 }
769
770 /// `22.2.5.2.1 RegExpExec ( R, S )`
771 ///
772 /// More information:
773 /// - [ECMAScript reference][spec]
774 ///
775 /// [spec]: https://tc39.es/ecma262/#sec-regexpexec
776 pub(crate) fn abstract_exec(
777 this: &JsValue,
778 input: JsString,
779 context: &mut Context,
780 ) -> JsResult<Option<JsObject>> {
781 // 1. Assert: Type(R) is Object.
782 let object = this
783 .as_object()
784 .ok_or_else(|| context.construct_type_error("RegExpExec called with invalid value"))?;
785 // 2. Assert: Type(S) is String.
786
787 // 3. Let exec be ? Get(R, "exec").
788 let exec = this.get_field("exec", context)?;
789
790 // 4. If IsCallable(exec) is true, then
791 if exec.is_function() {
792 // a. Let result be ? Call(exec, R, « S »).
793 let result = context.call(&exec, this, &[input.into()])?;
794
795 // b. If Type(result) is neither Object nor Null, throw a TypeError exception.
796 if !result.is_object() && !result.is_null() {
797 return Err(
798 context.construct_type_error("regexp exec returned neither object nor null")
799 );
800 }
801
802 // c. Return result.
803 return Ok(result.as_object());
804 }
805
806 // 5. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
807 if !object.is_regexp() {
808 return Err(context.construct_type_error("RegExpExec called with invalid value"));
809 }
810
811 // 6. Return ? RegExpBuiltinExec(R, S).
812 Self::abstract_builtin_exec(object, input, context)
813 }
814
815 /// `22.2.5.2.2 RegExpBuiltinExec ( R, S )`
816 ///
817 /// More information:
818 /// - [ECMAScript reference][spec]
819 ///
820 /// [spec]: https://tc39.es/ecma262/#sec-regexpbuiltinexec
821 pub(crate) fn abstract_builtin_exec(
822 this: JsObject,
823 input: JsString,
824 context: &mut Context,
825 ) -> JsResult<Option<JsObject>> {
826 // 1. Assert: R is an initialized RegExp instance.
827 let rx = {
828 let obj = this.borrow();
829 if let Some(rx) = obj.as_regexp() {
830 rx.clone()
831 } else {
832 return Err(
833 context.construct_type_error("RegExpBuiltinExec called with invalid value")
834 );
835 }
836 };
837
838 // 2. Assert: Type(S) is String.
839
840 // 3. Let length be the number of code units in S.
841 let length = input.encode_utf16().count();
842
843 // 4. Let lastIndex be ℝ(? ToLength(? Get(R, "lastIndex"))).
844 let mut last_index = this.get("lastIndex", context)?.to_length(context)?;
845
846 // 5. Let flags be R.[[OriginalFlags]].
847 let flags = &rx.original_flags;
848
849 // 6. If flags contains "g", let global be true; else let global be false.
850 let global = flags.contains('g');
851
852 // 7. If flags contains "y", let sticky be true; else let sticky be false.
853 let sticky = flags.contains('y');
854
855 // 8. If global is false and sticky is false, set lastIndex to 0.
856 if !global && !sticky {
857 last_index = 0;
858 }
859
860 // 9. Let matcher be R.[[RegExpMatcher]].
861 let matcher = &rx.matcher;
862
863 // 10. If flags contains "u", let fullUnicode be true; else let fullUnicode be false.
864 let unicode = flags.contains('u');
865
866 // 11. Let matchSucceeded be false.
867 // 12. Repeat, while matchSucceeded is false,
868 let match_value = loop {
869 // a. If lastIndex > length, then
870 if last_index > length {
871 // i. If global is true or sticky is true, then
872 if global || sticky {
873 // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
874 this.set("lastIndex", 0, true, context)?;
875 }
876
877 // ii. Return null.
878 return Ok(None);
879 }
880
881 // b. Let r be matcher(S, lastIndex).
882 // Check if last_index is a valid utf8 index into input.
883 let last_byte_index = match String::from_utf16(
884 &input.encode_utf16().take(last_index).collect::<Vec<u16>>(),
885 ) {
886 Ok(s) => s.len(),
887 Err(_) => {
888 return Err(context.construct_type_error(
889 "Failed to get byte index from utf16 encoded string",
890 ))
891 }
892 };
893 let r = matcher.find_from(&input, last_byte_index).next();
894
895 match r {
896 // c. If r is failure, then
897 None => {
898 // i. If sticky is true, then
899 if sticky {
900 // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
901 this.set("lastIndex", 0, true, context)?;
902
903 // 2. Return null.
904 return Ok(None);
905 }
906
907 // ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
908 last_index = advance_string_index(input.clone(), last_index, unicode);
909 }
910
911 Some(m) => {
912 // c. If r is failure, then
913 // d. Else,
914 if m.start() != last_index {
915 // i. If sticky is true, then
916 if sticky {
917 // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
918 this.set("lastIndex", 0, true, context)?;
919
920 // 2. Return null.
921 return Ok(None);
922 }
923
924 // ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
925 last_index = advance_string_index(input.clone(), last_index, unicode);
926 } else {
927 //i. Assert: r is a State.
928 //ii. Set matchSucceeded to true.
929 break m;
930 }
931 }
932 }
933 };
934
935 // 13. Let e be r's endIndex value.
936 let mut e = match_value.end();
937
938 // 14. If fullUnicode is true, then
939 if unicode {
940 // e is an index into the Input character list, derived from S, matched by matcher.
941 // Let eUTF be the smallest index into S that corresponds to the character at element e of Input.
942 // If e is greater than or equal to the number of elements in Input, then eUTF is the number of code units in S.
943 // b. Set e to eUTF.
944 e = input.split_at(e).0.encode_utf16().count();
945 }
946
947 // 15. If global is true or sticky is true, then
948 if global || sticky {
949 // a. Perform ? Set(R, "lastIndex", 𝔽(e), true).
950 this.set("lastIndex", e, true, context)?;
951 }
952
953 // 16. Let n be the number of elements in r's captures List. (This is the same value as 22.2.2.1's NcapturingParens.)
954 let n = match_value.captures.len();
955 // 17. Assert: n < 23^2 - 1.
956 debug_assert!(n < 23usize.pow(2) - 1);
957
958 // 18. Let A be ! ArrayCreate(n + 1).
959 // 19. Assert: The mathematical value of A's "length" property is n + 1.
960 let a = Array::array_create(n + 1, None, context)?;
961
962 // 20. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)).
963 a.create_data_property_or_throw("index", match_value.start(), context)
964 .unwrap();
965
966 // 21. Perform ! CreateDataPropertyOrThrow(A, "input", S).
967 a.create_data_property_or_throw("input", input.clone(), context)
968 .unwrap();
969
970 // 22. Let matchedSubstr be the substring of S from lastIndex to e.
971 let matched_substr = if let Some(s) = input.get(match_value.range()) {
972 s
973 } else {
974 ""
975 };
976
977 // 23. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr).
978 a.create_data_property_or_throw(0, matched_substr, context)
979 .unwrap();
980
981 // 24. If R contains any GroupName, then
982 // 25. Else,
983 let named_groups = match_value.named_groups();
984 let groups = if named_groups.clone().count() > 0 {
985 // a. Let groups be ! OrdinaryObjectCreate(null).
986 let groups = JsValue::new_object(context);
987
988 // Perform 27.f here
989 // f. If the ith capture of R was defined with a GroupName, then
990 // i. Let s be the CapturingGroupName of the corresponding RegExpIdentifierName.
991 // ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
992 for (name, range) in named_groups {
993 if let Some(range) = range {
994 let value = if let Some(s) = input.get(range.clone()) {
995 s
996 } else {
997 ""
998 };
999
1000 groups
1001 .to_object(context)?
1002 .create_data_property_or_throw(name, value, context)
1003 .unwrap();
1004 }
1005 }
1006 groups
1007 } else {
1008 // a. Let groups be undefined.
1009 JsValue::undefined()
1010 };
1011
1012 // 26. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
1013 a.create_data_property_or_throw("groups", groups, context)
1014 .unwrap();
1015
1016 // 27. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
1017 for i in 1..=n {
1018 // a. Let captureI be ith element of r's captures List.
1019 let capture = match_value.group(i);
1020
1021 let captured_value = match capture {
1022 // b. If captureI is undefined, let capturedValue be undefined.
1023 None => JsValue::undefined(),
1024 // c. Else if fullUnicode is true, then
1025 // d. Else,
1026 Some(range) => {
1027 if let Some(s) = input.get(range) {
1028 s.into()
1029 } else {
1030 "".into()
1031 }
1032 }
1033 };
1034
1035 // e. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue).
1036 a.create_data_property_or_throw(i, captured_value, context)
1037 .unwrap();
1038 }
1039
1040 // 28. Return A.
1041 Ok(Some(a))
1042 }
1043
1044 /// `RegExp.prototype[ @@match ]( string )`
1045 ///
1046 /// This method retrieves the matches when matching a string against a regular expression.
1047 ///
1048 /// More information:
1049 /// - [ECMAScript reference][spec]
1050 /// - [MDN documentation][mdn]
1051 ///
1052 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@match
1053 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@match
1054 pub(crate) fn r#match(
1055 this: &JsValue,
1056 args: &[JsValue],
1057 context: &mut Context,
1058 ) -> JsResult<JsValue> {
1059 // 1. Let rx be the this value.
1060 // 2. If Type(rx) is not Object, throw a TypeError exception.
1061 let rx = if let Some(rx) = this.as_object() {
1062 rx
1063 } else {
1064 return Err(context.construct_type_error(
1065 "RegExp.prototype.match method called on incompatible value",
1066 ));
1067 };
1068
1069 // 3. Let S be ? ToString(string).
1070 let arg_str = args
1071 .get(0)
1072 .cloned()
1073 .unwrap_or_default()
1074 .to_string(context)?;
1075
1076 // 4. Let global be ! ToBoolean(? Get(rx, "global")).
1077 let global = rx.get("global", context)?.to_boolean();
1078
1079 // 5. If global is false, then
1080 // 6. Else,
1081 if !global {
1082 // a. Return ? RegExpExec(rx, S).
1083 if let Some(v) = Self::abstract_exec(&JsValue::new(rx), arg_str, context)? {
1084 Ok(v.into())
1085 } else {
1086 Ok(JsValue::null())
1087 }
1088 } else {
1089 // a. Assert: global is true.
1090
1091 // b. Let fullUnicode be ! ToBoolean(? Get(rx, "unicode")).
1092 let unicode = rx.get("unicode", context)?.to_boolean();
1093
1094 // c. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1095 rx.set("lastIndex", 0, true, context)?;
1096
1097 // d. Let A be ! ArrayCreate(0).
1098 let a = Array::array_create(0, None, context).unwrap();
1099
1100 // e. Let n be 0.
1101 let mut n = 0;
1102
1103 // f. Repeat,
1104 loop {
1105 // i. Let result be ? RegExpExec(rx, S).
1106 let result =
1107 Self::abstract_exec(&JsValue::new(rx.clone()), arg_str.clone(), context)?;
1108
1109 // ii. If result is null, then
1110 // iii. Else,
1111 if let Some(result) = result {
1112 // 1. Let matchStr be ? ToString(? Get(result, "0")).
1113 let match_str = result.get("0", context)?.to_string(context)?;
1114
1115 // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(n)), matchStr).
1116 a.create_data_property_or_throw(n, match_str.clone(), context)
1117 .unwrap();
1118
1119 // 3. If matchStr is the empty String, then
1120 if match_str.is_empty() {
1121 // a. Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))).
1122 let this_index = rx.get("lastIndex", context)?.to_length(context)?;
1123
1124 // b. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode).
1125 let next_index = advance_string_index(arg_str.clone(), this_index, unicode);
1126
1127 // c. Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true).
1128 rx.set("lastIndex", JsValue::new(next_index), true, context)?;
1129 }
1130
1131 // 4. Set n to n + 1.
1132 n += 1;
1133 } else {
1134 // 1. If n = 0, return null.
1135 // 2. Return A.
1136 if n == 0 {
1137 return Ok(JsValue::null());
1138 } else {
1139 return Ok(a.into());
1140 }
1141 }
1142 }
1143 }
1144 }
1145
1146 /// `RegExp.prototype.toString()`
1147 ///
1148 /// Return a string representing the regular expression.
1149 ///
1150 /// More information:
1151 /// - [ECMAScript reference][spec]
1152 /// - [MDN documentation][mdn]
1153 ///
1154 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.tostring
1155 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/toString
1156 #[allow(clippy::wrong_self_convention)]
1157 pub(crate) fn to_string(
1158 this: &JsValue,
1159 _: &[JsValue],
1160 context: &mut Context,
1161 ) -> JsResult<JsValue> {
1162 let (body, flags) = if let Some(object) = this.as_object() {
1163 let object = object.borrow();
1164 let regex = object.as_regexp().ok_or_else(|| {
1165 context.construct_type_error(format!(
1166 "Method RegExp.prototype.toString called on incompatible receiver {}",
1167 this.display()
1168 ))
1169 })?;
1170 (regex.original_source.clone(), regex.original_flags.clone())
1171 } else {
1172 return context.throw_type_error(format!(
1173 "Method RegExp.prototype.toString called on incompatible receiver {}",
1174 this.display()
1175 ));
1176 };
1177 Ok(format!("/{}/{}", body, flags).into())
1178 }
1179
1180 /// `RegExp.prototype[ @@matchAll ]( string )`
1181 ///
1182 /// The `[@@matchAll]` method returns all matches of the regular expression against a string.
1183 ///
1184 /// More information:
1185 /// - [ECMAScript reference][spec]
1186 /// - [MDN documentation][mdn]
1187 ///
1188 /// [spec]: https://tc39.es/ecma262/#sec-regexp-prototype-matchall
1189 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@matchAll
1190 pub(crate) fn match_all(
1191 this: &JsValue,
1192 args: &[JsValue],
1193 context: &mut Context,
1194 ) -> JsResult<JsValue> {
1195 // 1. Let R be the this value.
1196 // 2. If Type(R) is not Object, throw a TypeError exception.
1197 if !this.is_object() {
1198 return context.throw_type_error(
1199 "RegExp.prototype.match_all method called on incompatible value",
1200 );
1201 }
1202
1203 // 3. Let S be ? ToString(string).
1204 let arg_str = args
1205 .get(0)
1206 .cloned()
1207 .unwrap_or_default()
1208 .to_string(context)?;
1209
1210 // 4. Let C be ? SpeciesConstructor(R, %RegExp%).
1211 let c = this
1212 .as_object()
1213 .unwrap_or_default()
1214 .species_constructor(context.global_object().get(RegExp::NAME, context)?, context)?;
1215
1216 // 5. Let flags be ? ToString(? Get(R, "flags")).
1217 let flags = this.get_field("flags", context)?.to_string(context)?;
1218
1219 // 6. Let matcher be ? Construct(C, « R, flags »).
1220 let matcher = c
1221 .as_object()
1222 .expect("SpeciesConstructor returned non Object")
1223 .construct(&[this.clone(), flags.clone().into()], &c, context)?;
1224
1225 // 7. Let lastIndex be ? ToLength(? Get(R, "lastIndex")).
1226 let last_index = this.get_field("lastIndex", context)?.to_length(context)?;
1227
1228 // 8. Perform ? Set(matcher, "lastIndex", lastIndex, true).
1229 matcher.set_field("lastIndex", last_index, true, context)?;
1230
1231 // 9. If flags contains "g", let global be true.
1232 // 10. Else, let global be false.
1233 let global = flags.contains('g');
1234
1235 // 11. If flags contains "u", let fullUnicode be true.
1236 // 12. Else, let fullUnicode be false.
1237 let unicode = flags.contains('u');
1238
1239 // 13. Return ! CreateRegExpStringIterator(matcher, S, global, fullUnicode).
1240 RegExpStringIterator::create_regexp_string_iterator(
1241 &matcher, arg_str, global, unicode, context,
1242 )
1243 }
1244
1245 /// `RegExp.prototype [ @@replace ] ( string, replaceValue )`
1246 ///
1247 /// The [@@replace]() method replaces some or all matches of a this pattern in a string by a replacement,
1248 /// and returns the result of the replacement as a new string.
1249 /// The replacement can be a string or a function to be called for each match.
1250 ///
1251 /// More information:
1252 /// - [ECMAScript reference][spec]
1253 /// - [MDN documentation][mdn]
1254 ///
1255 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
1256 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@replace
1257 pub(crate) fn replace(
1258 this: &JsValue,
1259 args: &[JsValue],
1260 context: &mut Context,
1261 ) -> JsResult<JsValue> {
1262 // 1. Let rx be the this value.
1263 // 2. If Type(rx) is not Object, throw a TypeError exception.
1264 let rx = if let Some(rx) = this.as_object() {
1265 rx
1266 } else {
1267 return context.throw_type_error(
1268 "RegExp.prototype[Symbol.replace] method called on incompatible value",
1269 );
1270 };
1271
1272 // 3. Let S be ? ToString(string).
1273 let arg_str = args
1274 .get(0)
1275 .cloned()
1276 .unwrap_or_default()
1277 .to_string(context)?;
1278
1279 // 4. Let lengthS be the number of code unit elements in S.
1280 let length_arg_str = arg_str.encode_utf16().count();
1281
1282 // 5. Let functionalReplace be IsCallable(replaceValue).
1283 let mut replace_value = args.get_or_undefined(1).clone();
1284 let functional_replace = replace_value.is_function();
1285
1286 // 6. If functionalReplace is false, then
1287 if !functional_replace {
1288 // a. Set replaceValue to ? ToString(replaceValue).
1289 replace_value = replace_value.to_string(context)?.into();
1290 }
1291
1292 // 7. Let global be ! ToBoolean(? Get(rx, "global")).
1293 let global = rx.get("global", context)?.to_boolean();
1294
1295 // 8. If global is true, then
1296 let mut unicode = false;
1297 if global {
1298 // a. Let fullUnicode be ! ToBoolean(? Get(rx, "unicode")).
1299 unicode = rx.get("unicode", context)?.to_boolean();
1300
1301 // b. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1302 rx.set("lastIndex", 0, true, context)?;
1303 }
1304
1305 // 9. Let results be a new empty List.
1306 let mut results = Vec::new();
1307
1308 // 10. Let done be false.
1309 // 11. Repeat, while done is false,
1310 loop {
1311 // a. Let result be ? RegExpExec(rx, S).
1312 let result = Self::abstract_exec(&JsValue::new(rx.clone()), arg_str.clone(), context)?;
1313
1314 // b. If result is null, set done to true.
1315 // c. Else,
1316 if let Some(result) = result {
1317 // i. Append result to the end of results.
1318 results.push(result.clone());
1319
1320 // ii. If global is false, set done to true.
1321 // iii. Else,
1322 if !global {
1323 break;
1324 } else {
1325 // 1. Let matchStr be ? ToString(? Get(result, "0")).
1326 let match_str = result.get("0", context)?.to_string(context)?;
1327
1328 // 2. If matchStr is the empty String, then
1329 if match_str.is_empty() {
1330 // a. Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))).
1331 let this_index = rx.get("lastIndex", context)?.to_length(context)?;
1332
1333 // b. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode).
1334 let next_index = advance_string_index(arg_str.clone(), this_index, unicode);
1335
1336 // c. Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true).
1337 rx.set("lastIndex", JsValue::new(next_index), true, context)?;
1338 }
1339 }
1340 } else {
1341 break;
1342 }
1343 }
1344
1345 // 12. Let accumulatedResult be the empty String.
1346 let mut accumulated_result = JsString::new("");
1347
1348 // 13. Let nextSourcePosition be 0.
1349 let mut next_source_position = 0;
1350
1351 // 14. For each element result of results, do
1352 for result in results {
1353 // a. Let resultLength be ? LengthOfArrayLike(result).
1354 let result_length = result.length_of_array_like(context)? as isize;
1355
1356 // b. Let nCaptures be max(resultLength - 1, 0).
1357 let n_captures = std::cmp::max(result_length - 1, 0);
1358
1359 // c. Let matched be ? ToString(? Get(result, "0")).
1360 let matched = result.get("0", context)?.to_string(context)?;
1361
1362 // d. Let matchLength be the number of code units in matched.
1363 let match_length = matched.encode_utf16().count();
1364
1365 // e. Let position be ? ToIntegerOrInfinity(? Get(result, "index")).
1366 let position = result
1367 .get("index", context)?
1368 .to_integer_or_infinity(context)?;
1369
1370 // f. Set position to the result of clamping position between 0 and lengthS.
1371 //position = position.
1372 let position = match position {
1373 IntegerOrInfinity::Integer(i) => {
1374 if i < 0 {
1375 0
1376 } else if i as usize > length_arg_str {
1377 length_arg_str
1378 } else {
1379 i as usize
1380 }
1381 }
1382 IntegerOrInfinity::PositiveInfinity => length_arg_str,
1383 IntegerOrInfinity::NegativeInfinity => 0,
1384 };
1385
1386 // h. Let captures be a new empty List.
1387 let mut captures = Vec::new();
1388
1389 // g. Let n be 1.
1390 // i. Repeat, while n ≤ nCaptures,
1391 for n in 1..=n_captures {
1392 // i. Let capN be ? Get(result, ! ToString(𝔽(n))).
1393 let mut cap_n = result.get(n.to_string(), context)?;
1394
1395 // ii. If capN is not undefined, then
1396 if !cap_n.is_undefined() {
1397 // 1. Set capN to ? ToString(capN).
1398 cap_n = cap_n.to_string(context)?.into();
1399 }
1400
1401 // iii. Append capN as the last element of captures.
1402 captures.push(cap_n);
1403
1404 // iv. Set n to n + 1.
1405 }
1406
1407 // j. Let namedCaptures be ? Get(result, "groups").
1408 let mut named_captures = result.get("groups", context)?;
1409
1410 // k. If functionalReplace is true, then
1411 // l. Else,
1412 let replacement: JsString;
1413 if functional_replace {
1414 // i. Let replacerArgs be « matched ».
1415 let mut replacer_args = vec![JsValue::new(matched)];
1416
1417 // ii. Append in List order the elements of captures to the end of the List replacerArgs.
1418 replacer_args.extend(captures);
1419
1420 // iii. Append 𝔽(position) and S to replacerArgs.
1421 replacer_args.push(position.into());
1422 replacer_args.push(arg_str.clone().into());
1423
1424 // iv. If namedCaptures is not undefined, then
1425 if !named_captures.is_undefined() {
1426 // 1. Append namedCaptures as the last element of replacerArgs.
1427 replacer_args.push(named_captures);
1428 }
1429
1430 // v. Let replValue be ? Call(replaceValue, undefined, replacerArgs).
1431 let repl_value =
1432 context.call(&replace_value, &JsValue::undefined(), &replacer_args)?;
1433
1434 // vi. Let replacement be ? ToString(replValue).
1435 replacement = repl_value.to_string(context)?;
1436 } else {
1437 // i. If namedCaptures is not undefined, then
1438 if !named_captures.is_undefined() {
1439 // 1. Set namedCaptures to ? ToObject(namedCaptures).
1440 named_captures = named_captures.to_object(context)?.into();
1441 }
1442
1443 // ii. Let replacement be ? GetSubstitution(matched, S, position, captures, namedCaptures, replaceValue).
1444 replacement = string::get_substitution(
1445 matched.to_string(),
1446 arg_str.to_string(),
1447 position,
1448 captures,
1449 named_captures,
1450 replace_value.to_string(context)?,
1451 context,
1452 )?;
1453 }
1454
1455 // m. If position ≥ nextSourcePosition, then
1456 if position >= next_source_position {
1457 // i. NOTE: position should not normally move backwards.
1458 // If it does, it is an indication of an ill-behaving RegExp subclass
1459 // or use of an access triggered side-effect to change the global flag or other characteristics of rx.
1460 // In such cases, the corresponding substitution is ignored.
1461 // ii. Set accumulatedResult to the string-concatenation of accumulatedResult,
1462 // the substring of S from nextSourcePosition to position, and replacement.
1463 accumulated_result = format!(
1464 "{}{}{}",
1465 accumulated_result,
1466 arg_str.get(next_source_position..position).unwrap(),
1467 replacement
1468 )
1469 .into();
1470
1471 // iii. Set nextSourcePosition to position + matchLength.
1472 next_source_position = position + match_length;
1473 }
1474 }
1475
1476 // 15. If nextSourcePosition ≥ lengthS, return accumulatedResult.
1477 if next_source_position >= length_arg_str {
1478 return Ok(accumulated_result.into());
1479 }
1480
1481 // 16. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
1482 Ok(format!(
1483 "{}{}",
1484 accumulated_result,
1485 arg_str.get(next_source_position..).unwrap()
1486 )
1487 .into())
1488 }
1489
1490 /// `RegExp.prototype[ @@search ]( string )`
1491 ///
1492 /// This method executes a search for a match between a this regular expression and a string.
1493 ///
1494 /// More information:
1495 /// - [ECMAScript reference][spec]
1496 /// - [MDN documentation][mdn]
1497 ///
1498 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@search
1499 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@search
1500 pub(crate) fn search(
1501 this: &JsValue,
1502 args: &[JsValue],
1503 context: &mut Context,
1504 ) -> JsResult<JsValue> {
1505 // 1. Let rx be the this value.
1506 // 2. If Type(rx) is not Object, throw a TypeError exception.
1507 let rx = if let Some(rx) = this.as_object() {
1508 rx
1509 } else {
1510 return Err(context.construct_type_error(
1511 "RegExp.prototype[Symbol.search] method called on incompatible value",
1512 ));
1513 };
1514
1515 // 3. Let S be ? ToString(string).
1516 let arg_str = args
1517 .get(0)
1518 .cloned()
1519 .unwrap_or_default()
1520 .to_string(context)?;
1521
1522 // 4. Let previousLastIndex be ? Get(rx, "lastIndex").
1523 let previous_last_index = rx.get("lastIndex", context)?;
1524
1525 // 5. If SameValue(previousLastIndex, +0𝔽) is false, then
1526 if !JsValue::same_value(&previous_last_index, &JsValue::new(0)) {
1527 // a. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1528 rx.set("lastIndex", 0, true, context)?;
1529 }
1530
1531 // 6. Let result be ? RegExpExec(rx, S).
1532 let result = Self::abstract_exec(&JsValue::new(rx.clone()), arg_str, context)?;
1533
1534 // 7. Let currentLastIndex be ? Get(rx, "lastIndex").
1535 let current_last_index = rx.get("lastIndex", context)?;
1536
1537 // 8. If SameValue(currentLastIndex, previousLastIndex) is false, then
1538 if !JsValue::same_value(¤t_last_index, &previous_last_index) {
1539 // a. Perform ? Set(rx, "lastIndex", previousLastIndex, true).
1540 rx.set("lastIndex", previous_last_index, true, context)?;
1541 }
1542
1543 // 9. If result is null, return -1𝔽.
1544 // 10. Return ? Get(result, "index").
1545 if let Some(result) = result {
1546 result.get("index", context)
1547 } else {
1548 Ok(JsValue::new(-1))
1549 }
1550 }
1551
1552 /// `RegExp.prototype [ @@split ] ( string, limit )`
1553 ///
1554 /// The [@@split]() method splits a String object into an array of strings by separating the string into substrings.
1555 ///
1556 /// More information:
1557 /// - [ECMAScript reference][spec]
1558 /// - [MDN documentation][mdn]
1559 ///
1560 /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@split
1561 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@split
1562 pub(crate) fn split(
1563 this: &JsValue,
1564 args: &[JsValue],
1565 context: &mut Context,
1566 ) -> JsResult<JsValue> {
1567 // 1. Let rx be the this value.
1568 // 2. If Type(rx) is not Object, throw a TypeError exception.
1569 let rx = if let Some(rx) = this.as_object() {
1570 rx
1571 } else {
1572 return Err(context.construct_type_error(
1573 "RegExp.prototype.split method called on incompatible value",
1574 ));
1575 };
1576
1577 // 3. Let S be ? ToString(string).
1578 let arg_str = args
1579 .get(0)
1580 .cloned()
1581 .unwrap_or_default()
1582 .to_string(context)?;
1583
1584 // 4. Let C be ? SpeciesConstructor(rx, %RegExp%).
1585 let constructor =
1586 rx.species_constructor(context.global_object().get(RegExp::NAME, context)?, context)?;
1587
1588 // 5. Let flags be ? ToString(? Get(rx, "flags")).
1589 let flags = rx.get("flags", context)?.to_string(context)?;
1590
1591 // 6. If flags contains "u", let unicodeMatching be true.
1592 // 7. Else, let unicodeMatching be false.
1593 let unicode = flags.contains('u');
1594
1595 // 8. If flags contains "y", let newFlags be flags.
1596 // 9. Else, let newFlags be the string-concatenation of flags and "y".
1597 let new_flags = if flags.contains('y') {
1598 flags.to_string()
1599 } else {
1600 format!("{}{}", flags, 'y')
1601 };
1602
1603 // 10. Let splitter be ? Construct(C, « rx, newFlags »).
1604 let splitter = constructor
1605 .as_object()
1606 .expect("SpeciesConstructor returned non Object")
1607 .construct(
1608 &[JsValue::from(rx), new_flags.into()],
1609 &constructor,
1610 context,
1611 )?;
1612
1613 // 11. Let A be ! ArrayCreate(0).
1614 let a = Array::array_create(0, None, context).unwrap();
1615
1616 // 12. Let lengthA be 0.
1617 let mut length_a = 0;
1618
1619 // 13. If limit is undefined, let lim be 2^32 - 1; else let lim be ℝ(? ToUint32(limit)).
1620 let limit = args.get_or_undefined(1);
1621 let lim = if limit.is_undefined() {
1622 u32::MAX
1623 } else {
1624 limit.to_u32(context)?
1625 };
1626
1627 // 14. If lim is 0, return A.
1628 if lim == 0 {
1629 return Ok(a.into());
1630 }
1631
1632 // 15. Let size be the length of S.
1633 let size = arg_str.encode_utf16().count();
1634
1635 // 16. If size is 0, then
1636 if size == 0 {
1637 // a. Let z be ? RegExpExec(splitter, S).
1638 let result = Self::abstract_exec(&splitter, arg_str.clone(), context)?;
1639
1640 // b. If z is not null, return A.
1641 if result.is_some() {
1642 return Ok(a.into());
1643 }
1644
1645 // c. Perform ! CreateDataPropertyOrThrow(A, "0", S).
1646 a.create_data_property_or_throw(0, arg_str, context)
1647 .unwrap();
1648
1649 // d. Return A.
1650 return Ok(a.into());
1651 }
1652
1653 // 17. Let p be 0.
1654 // 18. Let q be p.
1655 let mut p = 0;
1656 let mut q = p;
1657
1658 // 19. Repeat, while q < size,
1659 while q < size {
1660 // a. Perform ? Set(splitter, "lastIndex", 𝔽(q), true).
1661 splitter.set_field("lastIndex", JsValue::new(q), true, context)?;
1662
1663 // b. Let z be ? RegExpExec(splitter, S).
1664 let result = Self::abstract_exec(&splitter, arg_str.clone(), context)?;
1665
1666 // c. If z is null, set q to AdvanceStringIndex(S, q, unicodeMatching).
1667 // d. Else,
1668 if let Some(result) = result {
1669 // i. Let e be ℝ(? ToLength(? Get(splitter, "lastIndex"))).
1670 let mut e = splitter
1671 .get_field("lastIndex", context)?
1672 .to_length(context)?;
1673
1674 // ii. Set e to min(e, size).
1675 e = std::cmp::min(e, size);
1676
1677 // iii. If e = p, set q to AdvanceStringIndex(S, q, unicodeMatching).
1678 // iv. Else,
1679 if e == p {
1680 q = advance_string_index(arg_str.clone(), q, unicode);
1681 } else {
1682 // 1. Let T be the substring of S from p to q.
1683 let arg_str_substring = String::from_utf16_lossy(
1684 &arg_str
1685 .encode_utf16()
1686 .skip(p)
1687 .take(q - p)
1688 .collect::<Vec<u16>>(),
1689 );
1690
1691 // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T).
1692 a.create_data_property_or_throw(length_a, arg_str_substring, context)
1693 .unwrap();
1694
1695 // 3. Set lengthA to lengthA + 1.
1696 length_a += 1;
1697
1698 // 4. If lengthA = lim, return A.
1699 if length_a == lim {
1700 return Ok(a.into());
1701 }
1702
1703 // 5. Set p to e.
1704 p = e;
1705
1706 // 6. Let numberOfCaptures be ? LengthOfArrayLike(z).
1707 let mut number_of_captures = result.length_of_array_like(context)? as isize;
1708
1709 // 7. Set numberOfCaptures to max(numberOfCaptures - 1, 0).
1710 number_of_captures = if number_of_captures == 0 {
1711 0
1712 } else {
1713 std::cmp::max(number_of_captures - 1, 0)
1714 };
1715
1716 // 8. Let i be 1.
1717 // 9. Repeat, while i ≤ numberOfCaptures,
1718 for i in 1..=number_of_captures {
1719 // a. Let nextCapture be ? Get(z, ! ToString(𝔽(i))).
1720 let next_capture = result.get(i.to_string(), context)?;
1721
1722 // b. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), nextCapture).
1723 a.create_data_property_or_throw(length_a, next_capture, context)
1724 .unwrap();
1725
1726 // d. Set lengthA to lengthA + 1.
1727 length_a += 1;
1728
1729 // e. If lengthA = lim, return A.
1730 if length_a == lim {
1731 return Ok(a.into());
1732 }
1733 }
1734
1735 // 10. Set q to p.
1736 q = p;
1737 }
1738 } else {
1739 q = advance_string_index(arg_str.clone(), q, unicode);
1740 }
1741 }
1742
1743 // 20. Let T be the substring of S from p to size.
1744 let arg_str_substring = String::from_utf16_lossy(
1745 &arg_str
1746 .encode_utf16()
1747 .skip(p)
1748 .take(size - p)
1749 .collect::<Vec<u16>>(),
1750 );
1751
1752 // 21. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T).
1753 a.create_data_property_or_throw(length_a, arg_str_substring, context)
1754 .unwrap();
1755
1756 // 22. Return A.
1757 Ok(a.into())
1758 }
1759}
1760
1761/// `22.2.5.2.3 AdvanceStringIndex ( S, index, unicode )`
1762///
1763/// More information:
1764/// - [ECMAScript reference][spec]
1765///
1766/// [spec]: https://tc39.es/ecma262/#sec-advancestringindex
1767fn advance_string_index(s: JsString, index: usize, unicode: bool) -> usize {
1768 // Regress only works with utf8, so this function differs from the spec.
1769
1770 // 1. Assert: index ≤ 2^53 - 1.
1771
1772 // 2. If unicode is false, return index + 1.
1773 if !unicode {
1774 return index + 1;
1775 }
1776
1777 // 3. Let length be the number of code units in S.
1778 let length = s.encode_utf16().count();
1779
1780 // 4. If index + 1 ≥ length, return index + 1.
1781 if index + 1 > length {
1782 return index + 1;
1783 }
1784
1785 // 5. Let cp be ! CodePointAt(S, index).
1786 let (_, offset, _) =
1787 crate::builtins::string::code_point_at(s, index as i32).expect("Failed to get code point");
1788
1789 index + offset as usize
1790}