boa/builtins/regexp/
mod.rs

1//! This module implements the global `RegExp` object.
2//!
3//! `The `RegExp` object is used for matching text with a pattern.
4//!
5//! More information:
6//!  - [ECMAScript reference][spec]
7//!  - [MDN documentation][mdn]
8//!
9//! [spec]: https://tc39.es/ecma262/#sec-regexp-constructor
10//! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
11
12pub mod regexp_string_iterator;
13
14use crate::{
15    builtins::{array::Array, string, BuiltIn},
16    context::StandardObjects,
17    gc::{empty_trace, Finalize, Trace},
18    object::{
19        internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder,
20        JsObject, Object, ObjectData,
21    },
22    property::Attribute,
23    symbol::WellKnownSymbols,
24    value::{IntegerOrInfinity, JsValue},
25    BoaProfiler, Context, JsResult, JsString,
26};
27use regexp_string_iterator::RegExpStringIterator;
28use regress::Regex;
29
30use super::JsArgs;
31
32#[cfg(test)]
33mod tests;
34
35/// The internal representation on a `RegExp` object.
36#[derive(Debug, Clone, Finalize)]
37pub struct RegExp {
38    /// Regex matcher.
39    matcher: Regex,
40
41    /// Update last_index, set if global or sticky flags are set.
42    use_last_index: bool,
43
44    /// Flag 's' - dot matches newline characters.
45    dot_all: bool,
46
47    /// Flag 'g'
48    global: bool,
49
50    /// Flag 'i' - ignore case.
51    ignore_case: bool,
52
53    /// Flag 'm' - '^' and '$' match beginning/end of line.
54    multiline: bool,
55
56    /// Flag 'y'
57    sticky: bool,
58
59    /// Flag 'u' - Unicode.
60    unicode: bool,
61
62    original_source: JsString,
63    original_flags: JsString,
64}
65
66// Only safe while regress::Regex doesn't implement Trace itself.
67unsafe impl Trace for RegExp {
68    empty_trace!();
69}
70
71impl BuiltIn for RegExp {
72    const NAME: &'static str = "RegExp";
73
74    fn attribute() -> Attribute {
75        Attribute::WRITABLE | Attribute::NON_ENUMERABLE | Attribute::CONFIGURABLE
76    }
77
78    fn init(context: &mut Context) -> (&'static str, JsValue, Attribute) {
79        let _timer = BoaProfiler::global().start_event(Self::NAME, "init");
80
81        let get_species = FunctionBuilder::native(context, Self::get_species)
82            .name("get [Symbol.species]")
83            .constructable(false)
84            .build();
85
86        let flag_attributes = Attribute::CONFIGURABLE | Attribute::NON_ENUMERABLE;
87
88        let get_global = FunctionBuilder::native(context, Self::get_global)
89            .name("get global")
90            .constructable(false)
91            .build();
92        let get_ignore_case = FunctionBuilder::native(context, Self::get_ignore_case)
93            .name("get ignoreCase")
94            .constructable(false)
95            .build();
96        let get_multiline = FunctionBuilder::native(context, Self::get_multiline)
97            .name("get multiline")
98            .constructable(false)
99            .build();
100        let get_dot_all = FunctionBuilder::native(context, Self::get_dot_all)
101            .name("get dotAll")
102            .constructable(false)
103            .build();
104        let get_unicode = FunctionBuilder::native(context, Self::get_unicode)
105            .name("get unicode")
106            .constructable(false)
107            .build();
108        let get_sticky = FunctionBuilder::native(context, Self::get_sticky)
109            .name("get sticky")
110            .constructable(false)
111            .build();
112        let get_flags = FunctionBuilder::native(context, Self::get_flags)
113            .name("get flags")
114            .constructable(false)
115            .build();
116        let get_source = FunctionBuilder::native(context, Self::get_source)
117            .name("get source")
118            .constructable(false)
119            .build();
120        let regexp_object = ConstructorBuilder::with_standard_object(
121            context,
122            Self::constructor,
123            context.standard_objects().regexp_object().clone(),
124        )
125        .name(Self::NAME)
126        .length(Self::LENGTH)
127        .static_accessor(
128            WellKnownSymbols::species(),
129            Some(get_species),
130            None,
131            Attribute::CONFIGURABLE,
132        )
133        .property("lastIndex", 0, Attribute::all())
134        .method(Self::test, "test", 1)
135        .method(Self::exec, "exec", 1)
136        .method(Self::to_string, "toString", 0)
137        .method(
138            Self::r#match,
139            (WellKnownSymbols::match_(), "[Symbol.match]"),
140            1,
141        )
142        .method(
143            Self::match_all,
144            (WellKnownSymbols::match_all(), "[Symbol.matchAll]"),
145            1,
146        )
147        .method(
148            Self::replace,
149            (WellKnownSymbols::replace(), "[Symbol.replace]"),
150            2,
151        )
152        .method(
153            Self::search,
154            (WellKnownSymbols::search(), "[Symbol.search]"),
155            1,
156        )
157        .method(
158            Self::split,
159            (WellKnownSymbols::split(), "[Symbol.split]"),
160            2,
161        )
162        .accessor("global", Some(get_global), None, flag_attributes)
163        .accessor("ignoreCase", Some(get_ignore_case), None, flag_attributes)
164        .accessor("multiline", Some(get_multiline), None, flag_attributes)
165        .accessor("dotAll", Some(get_dot_all), None, flag_attributes)
166        .accessor("unicode", Some(get_unicode), None, flag_attributes)
167        .accessor("sticky", Some(get_sticky), None, flag_attributes)
168        .accessor("flags", Some(get_flags), None, flag_attributes)
169        .accessor("source", Some(get_source), None, flag_attributes)
170        .build();
171
172        // TODO: add them RegExp accessor properties
173
174        (Self::NAME, regexp_object.into(), Self::attribute())
175    }
176}
177
178impl RegExp {
179    /// The name of the object.
180    pub(crate) const NAME: &'static str = "RegExp";
181
182    /// The amount of arguments this function object takes.
183    pub(crate) const LENGTH: usize = 2;
184
185    /// `22.2.3.1 RegExp ( pattern, flags )`
186    ///
187    /// More information:
188    ///  - [ECMAScript reference][spec]
189    ///
190    /// [spec]: https://tc39.es/ecma262/#sec-regexp-pattern-flags
191    pub(crate) fn constructor(
192        new_target: &JsValue,
193        args: &[JsValue],
194        context: &mut Context,
195    ) -> JsResult<JsValue> {
196        let pattern = args.get_or_undefined(0);
197        let flags = args.get_or_undefined(1);
198
199        // 1. Let patternIsRegExp be ? IsRegExp(pattern).
200        let pattern_is_regexp = if let JsValue::Object(obj) = &pattern {
201            if obj.is_regexp() {
202                Some(obj)
203            } else {
204                None
205            }
206        } else {
207            None
208        };
209
210        // 2. If NewTarget is undefined, then
211        // 3. Else, let newTarget be NewTarget.
212        if new_target.is_undefined() {
213            // a. Let newTarget be the active function object.
214            // b. If patternIsRegExp is true and flags is undefined, then
215            if let Some(pattern) = pattern_is_regexp {
216                if flags.is_undefined() {
217                    // i. Let patternConstructor be ? Get(pattern, "constructor").
218                    let pattern_constructor = pattern.get("constructor", context)?;
219                    // ii. If SameValue(newTarget, patternConstructor) is true, return pattern.
220                    if JsValue::same_value(new_target, &pattern_constructor) {
221                        return Ok(pattern.clone().into());
222                    }
223                }
224            }
225        }
226
227        // 4. If Type(pattern) is Object and pattern has a [[RegExpMatcher]] internal slot, then
228        // 6. Else,
229        let (p, f) = if let Some(pattern) = pattern_is_regexp {
230            let obj = pattern.borrow();
231            let regexp = obj.as_regexp().unwrap();
232
233            // a. Let P be pattern.[[OriginalSource]].
234            // b. If flags is undefined, let F be pattern.[[OriginalFlags]].
235            // c. Else, let F be flags.
236            if flags.is_undefined() {
237                (
238                    JsValue::new(regexp.original_source.clone()),
239                    JsValue::new(regexp.original_flags.clone()),
240                )
241            } else {
242                (JsValue::new(regexp.original_source.clone()), flags.clone())
243            }
244        } else {
245            // a. Let P be pattern.
246            // b. Let F be flags.
247            (pattern.clone(), flags.clone())
248        };
249
250        // 7. Let O be ? RegExpAlloc(newTarget).
251        let o = RegExp::alloc(new_target, &[], context)?;
252
253        // 8.Return ? RegExpInitialize(O, P, F).
254        RegExp::initialize(&o, &[p, f], context)
255    }
256
257    /// `22.2.3.2.1 RegExpAlloc ( newTarget )`
258    ///
259    /// More information:
260    ///  - [ECMAScript reference][spec]
261    ///
262    /// [spec]: https://tc39.es/ecma262/#sec-regexpalloc
263    fn alloc(this: &JsValue, _: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
264        let proto = get_prototype_from_constructor(this, StandardObjects::regexp_object, context)?;
265
266        Ok(JsObject::new(Object::create(proto.into())).into())
267    }
268
269    /// `22.2.3.2.2 RegExpInitialize ( obj, pattern, flags )`
270    ///
271    /// More information:
272    ///  - [ECMAScript reference][spec]
273    ///
274    /// [spec]: https://tc39.es/ecma262/#sec-regexpinitialize
275    fn initialize(this: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
276        let pattern = args.get_or_undefined(0);
277        let flags = args.get_or_undefined(1);
278
279        // 1. If pattern is undefined, let P be the empty String.
280        // 2. Else, let P be ? ToString(pattern).
281        let p = if pattern.is_undefined() {
282            JsString::new("")
283        } else {
284            pattern.to_string(context)?
285        };
286
287        // 3. If flags is undefined, let F be the empty String.
288        // 4. Else, let F be ? ToString(flags).
289        let f = if flags.is_undefined() {
290            JsString::new("")
291        } else {
292            flags.to_string(context)?
293        };
294
295        // 5. If F contains any code unit other than "g", "i", "m", "s", "u", or "y"
296        //    or if it contains the same code unit more than once, throw a SyntaxError exception.
297        let mut global = false;
298        let mut ignore_case = false;
299        let mut multiline = false;
300        let mut dot_all = false;
301        let mut unicode = false;
302        let mut sticky = false;
303        for c in f.chars() {
304            match c {
305                'g' if global => {
306                    return context.throw_syntax_error("RegExp flags contains multiple 'g'")
307                }
308                'g' => global = true,
309                'i' if ignore_case => {
310                    return context.throw_syntax_error("RegExp flags contains multiple 'i'")
311                }
312                'i' => ignore_case = true,
313                'm' if multiline => {
314                    return context.throw_syntax_error("RegExp flags contains multiple 'm'")
315                }
316                'm' => multiline = true,
317                's' if dot_all => {
318                    return context.throw_syntax_error("RegExp flags contains multiple 's'")
319                }
320                's' => dot_all = true,
321                'u' if unicode => {
322                    return context.throw_syntax_error("RegExp flags contains multiple 'u'")
323                }
324                'u' => unicode = true,
325                'y' if sticky => {
326                    return context.throw_syntax_error("RegExp flags contains multiple 'y'")
327                }
328                'y' => sticky = true,
329                c => {
330                    return context.throw_syntax_error(format!(
331                        "RegExp flags contains unknown code unit '{}'",
332                        c
333                    ))
334                }
335            }
336        }
337
338        // 12. Set obj.[[OriginalSource]] to P.
339        // 13. Set obj.[[OriginalFlags]] to F.
340        // 14. Set obj.[[RegExpMatcher]] to the Abstract Closure that evaluates parseResult by applying the semantics provided in 22.2.2 using patternCharacters as the pattern's List of SourceCharacter values and F as the flag parameters.
341        let matcher = match Regex::with_flags(&p, f.as_ref()) {
342            Err(error) => {
343                return Err(context
344                    .construct_syntax_error(format!("failed to create matcher: {}", error.text)));
345            }
346            Ok(val) => val,
347        };
348
349        let regexp = RegExp {
350            matcher,
351            use_last_index: global || sticky,
352            dot_all,
353            global,
354            ignore_case,
355            multiline,
356            sticky,
357            unicode,
358            original_source: p,
359            original_flags: f,
360        };
361
362        this.set_data(ObjectData::reg_exp(Box::new(regexp)));
363
364        // 16. Return obj.
365        Ok(this.clone())
366    }
367
368    /// `22.2.3.2.4 RegExpCreate ( P, F )`
369    ///
370    /// More information:
371    ///  - [ECMAScript reference][spec]
372    ///
373    /// [spec]: https://tc39.es/ecma262/#sec-regexpcreate
374    pub(crate) fn create(p: JsValue, f: JsValue, context: &mut Context) -> JsResult<JsValue> {
375        // 1. Let obj be ? RegExpAlloc(%RegExp%).
376        let obj = RegExp::alloc(
377            &context.global_object().get(RegExp::NAME, context)?,
378            &[],
379            context,
380        )?;
381
382        // 2. Return ? RegExpInitialize(obj, P, F).
383        RegExp::initialize(&obj, &[p, f], context)
384    }
385
386    /// `get RegExp [ @@species ]`
387    ///
388    /// The `RegExp [ @@species ]` accessor property returns the RegExp constructor.
389    ///
390    /// More information:
391    ///  - [ECMAScript reference][spec]
392    ///  - [MDN documentation][mdn]
393    ///
394    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp-@@species
395    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@species
396    fn get_species(this: &JsValue, _: &[JsValue], _: &mut Context) -> JsResult<JsValue> {
397        // 1. Return the this value.
398        Ok(this.clone())
399    }
400
401    #[inline]
402    fn regexp_has_flag(this: &JsValue, flag: char, context: &mut Context) -> JsResult<JsValue> {
403        if let Some(object) = this.as_object() {
404            if let Some(regexp) = object.borrow().as_regexp() {
405                return Ok(JsValue::new(match flag {
406                    'g' => regexp.global,
407                    'm' => regexp.multiline,
408                    's' => regexp.dot_all,
409                    'i' => regexp.ignore_case,
410                    'u' => regexp.unicode,
411                    'y' => regexp.sticky,
412                    _ => unreachable!(),
413                }));
414            }
415
416            if JsObject::equals(
417                &object,
418                &context.standard_objects().regexp_object().prototype,
419            ) {
420                return Ok(JsValue::undefined());
421            }
422        }
423
424        let name = match flag {
425            'g' => "global",
426            'm' => "multiline",
427            's' => "dotAll",
428            'i' => "ignoreCase",
429            'u' => "unicode",
430            'y' => "sticky",
431            _ => unreachable!(),
432        };
433
434        context.throw_type_error(format!(
435            "RegExp.prototype.{} getter called on non-RegExp object",
436            name
437        ))
438    }
439
440    /// `get RegExp.prototype.global`
441    ///
442    /// The `global` property indicates whether or not the "`g`" flag is used with the regular expression.
443    ///
444    /// More information:
445    ///  - [ECMAScript reference][spec]
446    ///  - [MDN documentation][mdn]
447    ///
448    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.global
449    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global
450    pub(crate) fn get_global(
451        this: &JsValue,
452        _: &[JsValue],
453        context: &mut Context,
454    ) -> JsResult<JsValue> {
455        Self::regexp_has_flag(this, 'g', context)
456    }
457
458    /// `get RegExp.prototype.ignoreCase`
459    ///
460    /// The `ignoreCase` property indicates whether or not the "`i`" flag is used with the regular expression.
461    ///
462    /// More information:
463    ///  - [ECMAScript reference][spec]
464    ///  - [MDN documentation][mdn]
465    ///
466    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.ignorecase
467    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/ignoreCase
468    pub(crate) fn get_ignore_case(
469        this: &JsValue,
470        _: &[JsValue],
471        context: &mut Context,
472    ) -> JsResult<JsValue> {
473        Self::regexp_has_flag(this, 'i', context)
474    }
475
476    /// `get RegExp.prototype.multiline`
477    ///
478    /// The multiline property indicates whether or not the "m" flag is used with the regular expression.
479    ///
480    /// More information:
481    ///  - [ECMAScript reference][spec]
482    ///  - [MDN documentation][mdn]
483    ///
484    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.multiline
485    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/multiline
486    pub(crate) fn get_multiline(
487        this: &JsValue,
488        _: &[JsValue],
489        context: &mut Context,
490    ) -> JsResult<JsValue> {
491        Self::regexp_has_flag(this, 'm', context)
492    }
493
494    /// `get RegExp.prototype.dotAll`
495    ///
496    /// The `dotAll` property indicates whether or not the "`s`" flag is used with the regular expression.
497    ///
498    /// More information:
499    ///  - [ECMAScript reference][spec]
500    ///  - [MDN documentation][mdn]
501    ///
502    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.dotAll
503    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/dotAll
504    pub(crate) fn get_dot_all(
505        this: &JsValue,
506        _: &[JsValue],
507        context: &mut Context,
508    ) -> JsResult<JsValue> {
509        Self::regexp_has_flag(this, 's', context)
510    }
511
512    /// `get RegExp.prototype.unicode`
513    ///
514    /// The unicode property indicates whether or not the "`u`" flag is used with a regular expression.
515    /// unicode is a read-only property of an individual regular expression instance.
516    ///
517    /// More information:
518    ///  - [ECMAScript reference][spec]
519    ///  - [MDN documentation][mdn]
520    ///
521    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.unicode
522    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode
523    pub(crate) fn get_unicode(
524        this: &JsValue,
525        _: &[JsValue],
526        context: &mut Context,
527    ) -> JsResult<JsValue> {
528        Self::regexp_has_flag(this, 'u', context)
529    }
530
531    /// `get RegExp.prototype.sticky`
532    ///
533    /// This flag indicates that it matches only from the index indicated by the `lastIndex` property
534    /// of this regular expression in the target string (and does not attempt to match from any later indexes).
535    ///
536    /// More information:
537    ///  - [ECMAScript reference][spec]
538    ///  - [MDN documentation][mdn]
539    ///
540    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.sticky
541    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky
542    pub(crate) fn get_sticky(
543        this: &JsValue,
544        _: &[JsValue],
545        context: &mut Context,
546    ) -> JsResult<JsValue> {
547        Self::regexp_has_flag(this, 'y', context)
548    }
549
550    /// `get RegExp.prototype.flags`
551    ///
552    /// The `flags` property returns a string consisting of the [`flags`][flags] of the current regular expression object.
553    ///
554    /// More information:
555    ///  - [ECMAScript reference][spec]
556    ///  - [MDN documentation][mdn]
557    ///
558    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.flags
559    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/flags
560    /// [flags]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#Advanced_searching_with_flags_2
561    pub(crate) fn get_flags(
562        this: &JsValue,
563        _: &[JsValue],
564        context: &mut Context,
565    ) -> JsResult<JsValue> {
566        // 1. Let R be the this value.
567        // 2. If Type(R) is not Object, throw a TypeError exception.
568        if let Some(object) = this.as_object() {
569            // 3. Let result be the empty String.
570            let mut result = String::new();
571            // 4. Let global be ! ToBoolean(? Get(R, "global")).
572            // 5. If global is true, append the code unit 0x0067 (LATIN SMALL LETTER G) as the last code unit of result.
573            if object.get("global", context)?.to_boolean() {
574                result.push('g');
575            }
576            // 6. Let ignoreCase be ! ToBoolean(? Get(R, "ignoreCase")).
577            // 7. If ignoreCase is true, append the code unit 0x0069 (LATIN SMALL LETTER I) as the last code unit of result.
578            if object.get("ignoreCase", context)?.to_boolean() {
579                result.push('i');
580            }
581
582            // 8. Let multiline be ! ToBoolean(? Get(R, "multiline")).
583            // 9. If multiline is true, append the code unit 0x006D (LATIN SMALL LETTER M) as the last code unit of result.
584            if object.get("multiline", context)?.to_boolean() {
585                result.push('m');
586            }
587
588            // 10. Let dotAll be ! ToBoolean(? Get(R, "dotAll")).
589            // 11. If dotAll is true, append the code unit 0x0073 (LATIN SMALL LETTER S) as the last code unit of result.
590            if object.get("dotAll", context)?.to_boolean() {
591                result.push('s');
592            }
593            // 12. Let unicode be ! ToBoolean(? Get(R, "unicode")).
594            // 13. If unicode is true, append the code unit 0x0075 (LATIN SMALL LETTER U) as the last code unit of result.
595            if object.get("unicode", context)?.to_boolean() {
596                result.push('u');
597            }
598
599            // 14. Let sticky be ! ToBoolean(? Get(R, "sticky")).
600            // 15. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) as the last code unit of result.
601            if object.get("sticky", context)?.to_boolean() {
602                result.push('y');
603            }
604
605            // 16. Return result.
606            return Ok(result.into());
607        }
608
609        context.throw_type_error("RegExp.prototype.flags getter called on non-object")
610    }
611
612    /// `get RegExp.prototype.source`
613    ///
614    /// The `source` property returns a `String` containing the source text of the regexp object,
615    /// and it doesn't contain the two forward slashes on both sides and any flags.
616    ///
617    /// More information:
618    ///  - [ECMAScript reference][spec]
619    ///  - [MDN documentation][mdn]
620    ///
621    /// [spec]: https://tc39.es/ecma262/#sec-get-regexp.prototype.source
622    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/source
623    pub(crate) fn get_source(
624        this: &JsValue,
625        _: &[JsValue],
626        context: &mut Context,
627    ) -> JsResult<JsValue> {
628        // 1. Let R be the this value.
629        // 2. If Type(R) is not Object, throw a TypeError exception.
630        if let Some(object) = this.as_object() {
631            let object = object.borrow();
632
633            match object.as_regexp() {
634                // 3. If R does not have an [[OriginalSource]] internal slot, then
635                None => {
636                    // a. If SameValue(R, %RegExp.prototype%) is true, return "(?:)".
637                    // b. Otherwise, throw a TypeError exception.
638                    if JsValue::same_value(
639                        this,
640                        &JsValue::new(context.standard_objects().regexp_object().prototype()),
641                    ) {
642                        Ok(JsValue::new("(?:)"))
643                    } else {
644                        context.throw_type_error(
645                            "RegExp.prototype.source method called on incompatible value",
646                        )
647                    }
648                }
649                // 4. Assert: R has an [[OriginalFlags]] internal slot.
650                Some(re) => {
651                    // 5. Let src be R.[[OriginalSource]].
652                    // 6. Let flags be R.[[OriginalFlags]].
653                    // 7. Return EscapeRegExpPattern(src, flags).
654                    RegExp::escape_pattern(&re.original_source, &re.original_flags)
655                }
656            }
657        } else {
658            context.throw_type_error("RegExp.prototype.source method called on incompatible value")
659        }
660    }
661
662    /// `22.2.3.2.5 EscapeRegExpPattern ( P, F )`
663    ///
664    /// More information:
665    ///  - [ECMAScript reference][spec]
666    ///
667    /// [spec]: https://tc39.es/ecma262/#sec-escaperegexppattern
668    fn escape_pattern(src: &str, _flags: &str) -> JsResult<JsValue> {
669        if src.is_empty() {
670            Ok(JsValue::new("(?:)"))
671        } else {
672            let mut s = String::from("");
673
674            for c in src.chars() {
675                match c {
676                    '/' => s.push_str("\\/"),
677                    '\n' => s.push_str("\\\\n"),
678                    '\r' => s.push_str("\\\\r"),
679                    _ => s.push(c),
680                }
681            }
682
683            Ok(JsValue::new(s))
684        }
685    }
686
687    /// `RegExp.prototype.test( string )`
688    ///
689    /// The `test()` method executes a search for a match between a regular expression and a specified string.
690    ///
691    /// Returns `true` or `false`.
692    ///
693    /// More information:
694    ///  - [ECMAScript reference][spec]
695    ///  - [MDN documentation][mdn]
696    ///
697    /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.test
698    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test
699    pub(crate) fn test(
700        this: &JsValue,
701        args: &[JsValue],
702        context: &mut Context,
703    ) -> JsResult<JsValue> {
704        // 1. Let R be the this value.
705        // 2. If Type(R) is not Object, throw a TypeError exception.
706        if !this.is_object() {
707            return context
708                .throw_type_error("RegExp.prototype.test method called on incompatible value");
709        }
710
711        // 3. Let string be ? ToString(S).
712        let arg_str = args
713            .get(0)
714            .cloned()
715            .unwrap_or_default()
716            .to_string(context)?;
717
718        // 4. Let match be ? RegExpExec(R, string).
719        let m = Self::abstract_exec(this, arg_str, context)?;
720
721        // 5. If match is not null, return true; else return false.
722        if m.is_some() {
723            Ok(JsValue::new(true))
724        } else {
725            Ok(JsValue::new(false))
726        }
727    }
728
729    /// `RegExp.prototype.exec( string )`
730    ///
731    /// The exec() method executes a search for a match in a specified string.
732    ///
733    /// Returns a result array, or `null`.
734    ///
735    /// More information:
736    ///  - [ECMAScript reference][spec]
737    ///  - [MDN documentation][mdn]
738    ///
739    /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.exec
740    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec
741    pub(crate) fn exec(
742        this: &JsValue,
743        args: &[JsValue],
744        context: &mut Context,
745    ) -> JsResult<JsValue> {
746        // 1. Let R be the this value.
747        // 2. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
748        let obj = this.as_object().unwrap_or_default();
749        if !obj.is_regexp() {
750            return Err(
751                context.construct_type_error("RegExp.prototype.exec called with invalid value")
752            );
753        }
754
755        // 3. Let S be ? ToString(string).
756        let arg_str = args
757            .get(0)
758            .cloned()
759            .unwrap_or_default()
760            .to_string(context)?;
761
762        // 4. Return ? RegExpBuiltinExec(R, S).
763        if let Some(v) = Self::abstract_builtin_exec(obj, arg_str, context)? {
764            Ok(v.into())
765        } else {
766            Ok(JsValue::null())
767        }
768    }
769
770    /// `22.2.5.2.1 RegExpExec ( R, S )`
771    ///
772    /// More information:
773    ///  - [ECMAScript reference][spec]
774    ///
775    /// [spec]: https://tc39.es/ecma262/#sec-regexpexec
776    pub(crate) fn abstract_exec(
777        this: &JsValue,
778        input: JsString,
779        context: &mut Context,
780    ) -> JsResult<Option<JsObject>> {
781        // 1. Assert: Type(R) is Object.
782        let object = this
783            .as_object()
784            .ok_or_else(|| context.construct_type_error("RegExpExec called with invalid value"))?;
785        // 2. Assert: Type(S) is String.
786
787        // 3. Let exec be ? Get(R, "exec").
788        let exec = this.get_field("exec", context)?;
789
790        // 4. If IsCallable(exec) is true, then
791        if exec.is_function() {
792            // a. Let result be ? Call(exec, R, « S »).
793            let result = context.call(&exec, this, &[input.into()])?;
794
795            // b. If Type(result) is neither Object nor Null, throw a TypeError exception.
796            if !result.is_object() && !result.is_null() {
797                return Err(
798                    context.construct_type_error("regexp exec returned neither object nor null")
799                );
800            }
801
802            // c. Return result.
803            return Ok(result.as_object());
804        }
805
806        // 5. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).
807        if !object.is_regexp() {
808            return Err(context.construct_type_error("RegExpExec called with invalid value"));
809        }
810
811        // 6. Return ? RegExpBuiltinExec(R, S).
812        Self::abstract_builtin_exec(object, input, context)
813    }
814
815    /// `22.2.5.2.2 RegExpBuiltinExec ( R, S )`
816    ///
817    /// More information:
818    ///  - [ECMAScript reference][spec]
819    ///
820    /// [spec]: https://tc39.es/ecma262/#sec-regexpbuiltinexec
821    pub(crate) fn abstract_builtin_exec(
822        this: JsObject,
823        input: JsString,
824        context: &mut Context,
825    ) -> JsResult<Option<JsObject>> {
826        // 1. Assert: R is an initialized RegExp instance.
827        let rx = {
828            let obj = this.borrow();
829            if let Some(rx) = obj.as_regexp() {
830                rx.clone()
831            } else {
832                return Err(
833                    context.construct_type_error("RegExpBuiltinExec called with invalid value")
834                );
835            }
836        };
837
838        // 2. Assert: Type(S) is String.
839
840        // 3. Let length be the number of code units in S.
841        let length = input.encode_utf16().count();
842
843        // 4. Let lastIndex be ℝ(? ToLength(? Get(R, "lastIndex"))).
844        let mut last_index = this.get("lastIndex", context)?.to_length(context)?;
845
846        // 5. Let flags be R.[[OriginalFlags]].
847        let flags = &rx.original_flags;
848
849        // 6. If flags contains "g", let global be true; else let global be false.
850        let global = flags.contains('g');
851
852        // 7. If flags contains "y", let sticky be true; else let sticky be false.
853        let sticky = flags.contains('y');
854
855        // 8. If global is false and sticky is false, set lastIndex to 0.
856        if !global && !sticky {
857            last_index = 0;
858        }
859
860        // 9. Let matcher be R.[[RegExpMatcher]].
861        let matcher = &rx.matcher;
862
863        // 10. If flags contains "u", let fullUnicode be true; else let fullUnicode be false.
864        let unicode = flags.contains('u');
865
866        // 11. Let matchSucceeded be false.
867        // 12. Repeat, while matchSucceeded is false,
868        let match_value = loop {
869            // a. If lastIndex > length, then
870            if last_index > length {
871                // i. If global is true or sticky is true, then
872                if global || sticky {
873                    // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
874                    this.set("lastIndex", 0, true, context)?;
875                }
876
877                // ii. Return null.
878                return Ok(None);
879            }
880
881            // b. Let r be matcher(S, lastIndex).
882            // Check if last_index is a valid utf8 index into input.
883            let last_byte_index = match String::from_utf16(
884                &input.encode_utf16().take(last_index).collect::<Vec<u16>>(),
885            ) {
886                Ok(s) => s.len(),
887                Err(_) => {
888                    return Err(context.construct_type_error(
889                        "Failed to get byte index from utf16 encoded string",
890                    ))
891                }
892            };
893            let r = matcher.find_from(&input, last_byte_index).next();
894
895            match r {
896                // c. If r is failure, then
897                None => {
898                    // i. If sticky is true, then
899                    if sticky {
900                        // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
901                        this.set("lastIndex", 0, true, context)?;
902
903                        // 2. Return null.
904                        return Ok(None);
905                    }
906
907                    // ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
908                    last_index = advance_string_index(input.clone(), last_index, unicode);
909                }
910
911                Some(m) => {
912                    // c. If r is failure, then
913                    // d. Else,
914                    if m.start() != last_index {
915                        // i. If sticky is true, then
916                        if sticky {
917                            // 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
918                            this.set("lastIndex", 0, true, context)?;
919
920                            // 2. Return null.
921                            return Ok(None);
922                        }
923
924                        // ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
925                        last_index = advance_string_index(input.clone(), last_index, unicode);
926                    } else {
927                        //i. Assert: r is a State.
928                        //ii. Set matchSucceeded to true.
929                        break m;
930                    }
931                }
932            }
933        };
934
935        // 13. Let e be r's endIndex value.
936        let mut e = match_value.end();
937
938        // 14. If fullUnicode is true, then
939        if unicode {
940            // e is an index into the Input character list, derived from S, matched by matcher.
941            // Let eUTF be the smallest index into S that corresponds to the character at element e of Input.
942            // If e is greater than or equal to the number of elements in Input, then eUTF is the number of code units in S.
943            // b. Set e to eUTF.
944            e = input.split_at(e).0.encode_utf16().count();
945        }
946
947        // 15. If global is true or sticky is true, then
948        if global || sticky {
949            // a. Perform ? Set(R, "lastIndex", 𝔽(e), true).
950            this.set("lastIndex", e, true, context)?;
951        }
952
953        // 16. Let n be the number of elements in r's captures List. (This is the same value as 22.2.2.1's NcapturingParens.)
954        let n = match_value.captures.len();
955        // 17. Assert: n < 23^2 - 1.
956        debug_assert!(n < 23usize.pow(2) - 1);
957
958        // 18. Let A be ! ArrayCreate(n + 1).
959        // 19. Assert: The mathematical value of A's "length" property is n + 1.
960        let a = Array::array_create(n + 1, None, context)?;
961
962        // 20. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)).
963        a.create_data_property_or_throw("index", match_value.start(), context)
964            .unwrap();
965
966        // 21. Perform ! CreateDataPropertyOrThrow(A, "input", S).
967        a.create_data_property_or_throw("input", input.clone(), context)
968            .unwrap();
969
970        // 22. Let matchedSubstr be the substring of S from lastIndex to e.
971        let matched_substr = if let Some(s) = input.get(match_value.range()) {
972            s
973        } else {
974            ""
975        };
976
977        // 23. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr).
978        a.create_data_property_or_throw(0, matched_substr, context)
979            .unwrap();
980
981        // 24. If R contains any GroupName, then
982        // 25. Else,
983        let named_groups = match_value.named_groups();
984        let groups = if named_groups.clone().count() > 0 {
985            // a. Let groups be ! OrdinaryObjectCreate(null).
986            let groups = JsValue::new_object(context);
987
988            // Perform 27.f here
989            // f. If the ith capture of R was defined with a GroupName, then
990            // i. Let s be the CapturingGroupName of the corresponding RegExpIdentifierName.
991            // ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).
992            for (name, range) in named_groups {
993                if let Some(range) = range {
994                    let value = if let Some(s) = input.get(range.clone()) {
995                        s
996                    } else {
997                        ""
998                    };
999
1000                    groups
1001                        .to_object(context)?
1002                        .create_data_property_or_throw(name, value, context)
1003                        .unwrap();
1004                }
1005            }
1006            groups
1007        } else {
1008            // a. Let groups be undefined.
1009            JsValue::undefined()
1010        };
1011
1012        // 26. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
1013        a.create_data_property_or_throw("groups", groups, context)
1014            .unwrap();
1015
1016        // 27. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
1017        for i in 1..=n {
1018            // a. Let captureI be ith element of r's captures List.
1019            let capture = match_value.group(i);
1020
1021            let captured_value = match capture {
1022                // b. If captureI is undefined, let capturedValue be undefined.
1023                None => JsValue::undefined(),
1024                // c. Else if fullUnicode is true, then
1025                // d. Else,
1026                Some(range) => {
1027                    if let Some(s) = input.get(range) {
1028                        s.into()
1029                    } else {
1030                        "".into()
1031                    }
1032                }
1033            };
1034
1035            // e. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue).
1036            a.create_data_property_or_throw(i, captured_value, context)
1037                .unwrap();
1038        }
1039
1040        // 28. Return A.
1041        Ok(Some(a))
1042    }
1043
1044    /// `RegExp.prototype[ @@match ]( string )`
1045    ///
1046    /// This method retrieves the matches when matching a string against a regular expression.
1047    ///
1048    /// More information:
1049    ///  - [ECMAScript reference][spec]
1050    ///  - [MDN documentation][mdn]
1051    ///
1052    /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@match
1053    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@match
1054    pub(crate) fn r#match(
1055        this: &JsValue,
1056        args: &[JsValue],
1057        context: &mut Context,
1058    ) -> JsResult<JsValue> {
1059        // 1. Let rx be the this value.
1060        // 2. If Type(rx) is not Object, throw a TypeError exception.
1061        let rx = if let Some(rx) = this.as_object() {
1062            rx
1063        } else {
1064            return Err(context.construct_type_error(
1065                "RegExp.prototype.match method called on incompatible value",
1066            ));
1067        };
1068
1069        // 3. Let S be ? ToString(string).
1070        let arg_str = args
1071            .get(0)
1072            .cloned()
1073            .unwrap_or_default()
1074            .to_string(context)?;
1075
1076        // 4. Let global be ! ToBoolean(? Get(rx, "global")).
1077        let global = rx.get("global", context)?.to_boolean();
1078
1079        // 5. If global is false, then
1080        // 6. Else,
1081        if !global {
1082            // a. Return ? RegExpExec(rx, S).
1083            if let Some(v) = Self::abstract_exec(&JsValue::new(rx), arg_str, context)? {
1084                Ok(v.into())
1085            } else {
1086                Ok(JsValue::null())
1087            }
1088        } else {
1089            // a. Assert: global is true.
1090
1091            // b. Let fullUnicode be ! ToBoolean(? Get(rx, "unicode")).
1092            let unicode = rx.get("unicode", context)?.to_boolean();
1093
1094            // c. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1095            rx.set("lastIndex", 0, true, context)?;
1096
1097            // d. Let A be ! ArrayCreate(0).
1098            let a = Array::array_create(0, None, context).unwrap();
1099
1100            // e. Let n be 0.
1101            let mut n = 0;
1102
1103            // f. Repeat,
1104            loop {
1105                // i. Let result be ? RegExpExec(rx, S).
1106                let result =
1107                    Self::abstract_exec(&JsValue::new(rx.clone()), arg_str.clone(), context)?;
1108
1109                // ii. If result is null, then
1110                // iii. Else,
1111                if let Some(result) = result {
1112                    // 1. Let matchStr be ? ToString(? Get(result, "0")).
1113                    let match_str = result.get("0", context)?.to_string(context)?;
1114
1115                    // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(n)), matchStr).
1116                    a.create_data_property_or_throw(n, match_str.clone(), context)
1117                        .unwrap();
1118
1119                    // 3. If matchStr is the empty String, then
1120                    if match_str.is_empty() {
1121                        // a. Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))).
1122                        let this_index = rx.get("lastIndex", context)?.to_length(context)?;
1123
1124                        // b. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode).
1125                        let next_index = advance_string_index(arg_str.clone(), this_index, unicode);
1126
1127                        // c. Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true).
1128                        rx.set("lastIndex", JsValue::new(next_index), true, context)?;
1129                    }
1130
1131                    // 4. Set n to n + 1.
1132                    n += 1;
1133                } else {
1134                    // 1. If n = 0, return null.
1135                    // 2. Return A.
1136                    if n == 0 {
1137                        return Ok(JsValue::null());
1138                    } else {
1139                        return Ok(a.into());
1140                    }
1141                }
1142            }
1143        }
1144    }
1145
1146    /// `RegExp.prototype.toString()`
1147    ///
1148    /// Return a string representing the regular expression.
1149    ///
1150    /// More information:
1151    ///  - [ECMAScript reference][spec]
1152    ///  - [MDN documentation][mdn]
1153    ///
1154    /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype.tostring
1155    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/toString
1156    #[allow(clippy::wrong_self_convention)]
1157    pub(crate) fn to_string(
1158        this: &JsValue,
1159        _: &[JsValue],
1160        context: &mut Context,
1161    ) -> JsResult<JsValue> {
1162        let (body, flags) = if let Some(object) = this.as_object() {
1163            let object = object.borrow();
1164            let regex = object.as_regexp().ok_or_else(|| {
1165                context.construct_type_error(format!(
1166                    "Method RegExp.prototype.toString called on incompatible receiver {}",
1167                    this.display()
1168                ))
1169            })?;
1170            (regex.original_source.clone(), regex.original_flags.clone())
1171        } else {
1172            return context.throw_type_error(format!(
1173                "Method RegExp.prototype.toString called on incompatible receiver {}",
1174                this.display()
1175            ));
1176        };
1177        Ok(format!("/{}/{}", body, flags).into())
1178    }
1179
1180    /// `RegExp.prototype[ @@matchAll ]( string )`
1181    ///
1182    /// The `[@@matchAll]` method returns all matches of the regular expression against a string.
1183    ///
1184    /// More information:
1185    ///  - [ECMAScript reference][spec]
1186    ///  - [MDN documentation][mdn]
1187    ///
1188    /// [spec]: https://tc39.es/ecma262/#sec-regexp-prototype-matchall
1189    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@matchAll
1190    pub(crate) fn match_all(
1191        this: &JsValue,
1192        args: &[JsValue],
1193        context: &mut Context,
1194    ) -> JsResult<JsValue> {
1195        // 1. Let R be the this value.
1196        // 2. If Type(R) is not Object, throw a TypeError exception.
1197        if !this.is_object() {
1198            return context.throw_type_error(
1199                "RegExp.prototype.match_all method called on incompatible value",
1200            );
1201        }
1202
1203        // 3. Let S be ? ToString(string).
1204        let arg_str = args
1205            .get(0)
1206            .cloned()
1207            .unwrap_or_default()
1208            .to_string(context)?;
1209
1210        // 4. Let C be ? SpeciesConstructor(R, %RegExp%).
1211        let c = this
1212            .as_object()
1213            .unwrap_or_default()
1214            .species_constructor(context.global_object().get(RegExp::NAME, context)?, context)?;
1215
1216        // 5. Let flags be ? ToString(? Get(R, "flags")).
1217        let flags = this.get_field("flags", context)?.to_string(context)?;
1218
1219        // 6. Let matcher be ? Construct(C, « R, flags »).
1220        let matcher = c
1221            .as_object()
1222            .expect("SpeciesConstructor returned non Object")
1223            .construct(&[this.clone(), flags.clone().into()], &c, context)?;
1224
1225        // 7. Let lastIndex be ? ToLength(? Get(R, "lastIndex")).
1226        let last_index = this.get_field("lastIndex", context)?.to_length(context)?;
1227
1228        // 8. Perform ? Set(matcher, "lastIndex", lastIndex, true).
1229        matcher.set_field("lastIndex", last_index, true, context)?;
1230
1231        // 9. If flags contains "g", let global be true.
1232        // 10. Else, let global be false.
1233        let global = flags.contains('g');
1234
1235        // 11. If flags contains "u", let fullUnicode be true.
1236        // 12. Else, let fullUnicode be false.
1237        let unicode = flags.contains('u');
1238
1239        // 13. Return ! CreateRegExpStringIterator(matcher, S, global, fullUnicode).
1240        RegExpStringIterator::create_regexp_string_iterator(
1241            &matcher, arg_str, global, unicode, context,
1242        )
1243    }
1244
1245    /// `RegExp.prototype [ @@replace ] ( string, replaceValue )`
1246    ///
1247    /// The [@@replace]() method replaces some or all matches of a this pattern in a string by a replacement,
1248    /// and returns the result of the replacement as a new string.
1249    /// The replacement can be a string or a function to be called for each match.
1250    ///
1251    /// More information:
1252    ///  - [ECMAScript reference][spec]
1253    ///  - [MDN documentation][mdn]
1254    ///
1255    /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
1256    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@replace
1257    pub(crate) fn replace(
1258        this: &JsValue,
1259        args: &[JsValue],
1260        context: &mut Context,
1261    ) -> JsResult<JsValue> {
1262        // 1. Let rx be the this value.
1263        // 2. If Type(rx) is not Object, throw a TypeError exception.
1264        let rx = if let Some(rx) = this.as_object() {
1265            rx
1266        } else {
1267            return context.throw_type_error(
1268                "RegExp.prototype[Symbol.replace] method called on incompatible value",
1269            );
1270        };
1271
1272        // 3. Let S be ? ToString(string).
1273        let arg_str = args
1274            .get(0)
1275            .cloned()
1276            .unwrap_or_default()
1277            .to_string(context)?;
1278
1279        // 4. Let lengthS be the number of code unit elements in S.
1280        let length_arg_str = arg_str.encode_utf16().count();
1281
1282        // 5. Let functionalReplace be IsCallable(replaceValue).
1283        let mut replace_value = args.get_or_undefined(1).clone();
1284        let functional_replace = replace_value.is_function();
1285
1286        // 6. If functionalReplace is false, then
1287        if !functional_replace {
1288            // a. Set replaceValue to ? ToString(replaceValue).
1289            replace_value = replace_value.to_string(context)?.into();
1290        }
1291
1292        // 7. Let global be ! ToBoolean(? Get(rx, "global")).
1293        let global = rx.get("global", context)?.to_boolean();
1294
1295        // 8. If global is true, then
1296        let mut unicode = false;
1297        if global {
1298            // a. Let fullUnicode be ! ToBoolean(? Get(rx, "unicode")).
1299            unicode = rx.get("unicode", context)?.to_boolean();
1300
1301            // b. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1302            rx.set("lastIndex", 0, true, context)?;
1303        }
1304
1305        //  9. Let results be a new empty List.
1306        let mut results = Vec::new();
1307
1308        // 10. Let done be false.
1309        // 11. Repeat, while done is false,
1310        loop {
1311            // a. Let result be ? RegExpExec(rx, S).
1312            let result = Self::abstract_exec(&JsValue::new(rx.clone()), arg_str.clone(), context)?;
1313
1314            // b. If result is null, set done to true.
1315            // c. Else,
1316            if let Some(result) = result {
1317                // i. Append result to the end of results.
1318                results.push(result.clone());
1319
1320                // ii. If global is false, set done to true.
1321                // iii. Else,
1322                if !global {
1323                    break;
1324                } else {
1325                    // 1. Let matchStr be ? ToString(? Get(result, "0")).
1326                    let match_str = result.get("0", context)?.to_string(context)?;
1327
1328                    // 2. If matchStr is the empty String, then
1329                    if match_str.is_empty() {
1330                        // a. Let thisIndex be ℝ(? ToLength(? Get(rx, "lastIndex"))).
1331                        let this_index = rx.get("lastIndex", context)?.to_length(context)?;
1332
1333                        // b. Let nextIndex be AdvanceStringIndex(S, thisIndex, fullUnicode).
1334                        let next_index = advance_string_index(arg_str.clone(), this_index, unicode);
1335
1336                        // c. Perform ? Set(rx, "lastIndex", 𝔽(nextIndex), true).
1337                        rx.set("lastIndex", JsValue::new(next_index), true, context)?;
1338                    }
1339                }
1340            } else {
1341                break;
1342            }
1343        }
1344
1345        // 12. Let accumulatedResult be the empty String.
1346        let mut accumulated_result = JsString::new("");
1347
1348        // 13. Let nextSourcePosition be 0.
1349        let mut next_source_position = 0;
1350
1351        // 14. For each element result of results, do
1352        for result in results {
1353            // a. Let resultLength be ? LengthOfArrayLike(result).
1354            let result_length = result.length_of_array_like(context)? as isize;
1355
1356            // b. Let nCaptures be max(resultLength - 1, 0).
1357            let n_captures = std::cmp::max(result_length - 1, 0);
1358
1359            // c. Let matched be ? ToString(? Get(result, "0")).
1360            let matched = result.get("0", context)?.to_string(context)?;
1361
1362            // d. Let matchLength be the number of code units in matched.
1363            let match_length = matched.encode_utf16().count();
1364
1365            // e. Let position be ? ToIntegerOrInfinity(? Get(result, "index")).
1366            let position = result
1367                .get("index", context)?
1368                .to_integer_or_infinity(context)?;
1369
1370            // f. Set position to the result of clamping position between 0 and lengthS.
1371            //position = position.
1372            let position = match position {
1373                IntegerOrInfinity::Integer(i) => {
1374                    if i < 0 {
1375                        0
1376                    } else if i as usize > length_arg_str {
1377                        length_arg_str
1378                    } else {
1379                        i as usize
1380                    }
1381                }
1382                IntegerOrInfinity::PositiveInfinity => length_arg_str,
1383                IntegerOrInfinity::NegativeInfinity => 0,
1384            };
1385
1386            // h. Let captures be a new empty List.
1387            let mut captures = Vec::new();
1388
1389            // g. Let n be 1.
1390            // i. Repeat, while n ≤ nCaptures,
1391            for n in 1..=n_captures {
1392                // i. Let capN be ? Get(result, ! ToString(𝔽(n))).
1393                let mut cap_n = result.get(n.to_string(), context)?;
1394
1395                // ii. If capN is not undefined, then
1396                if !cap_n.is_undefined() {
1397                    // 1. Set capN to ? ToString(capN).
1398                    cap_n = cap_n.to_string(context)?.into();
1399                }
1400
1401                // iii. Append capN as the last element of captures.
1402                captures.push(cap_n);
1403
1404                // iv. Set n to n + 1.
1405            }
1406
1407            // j. Let namedCaptures be ? Get(result, "groups").
1408            let mut named_captures = result.get("groups", context)?;
1409
1410            // k. If functionalReplace is true, then
1411            // l. Else,
1412            let replacement: JsString;
1413            if functional_replace {
1414                // i. Let replacerArgs be « matched ».
1415                let mut replacer_args = vec![JsValue::new(matched)];
1416
1417                // ii. Append in List order the elements of captures to the end of the List replacerArgs.
1418                replacer_args.extend(captures);
1419
1420                // iii. Append 𝔽(position) and S to replacerArgs.
1421                replacer_args.push(position.into());
1422                replacer_args.push(arg_str.clone().into());
1423
1424                // iv. If namedCaptures is not undefined, then
1425                if !named_captures.is_undefined() {
1426                    // 1. Append namedCaptures as the last element of replacerArgs.
1427                    replacer_args.push(named_captures);
1428                }
1429
1430                // v. Let replValue be ? Call(replaceValue, undefined, replacerArgs).
1431                let repl_value =
1432                    context.call(&replace_value, &JsValue::undefined(), &replacer_args)?;
1433
1434                // vi. Let replacement be ? ToString(replValue).
1435                replacement = repl_value.to_string(context)?;
1436            } else {
1437                // i. If namedCaptures is not undefined, then
1438                if !named_captures.is_undefined() {
1439                    // 1. Set namedCaptures to ? ToObject(namedCaptures).
1440                    named_captures = named_captures.to_object(context)?.into();
1441                }
1442
1443                // ii. Let replacement be ? GetSubstitution(matched, S, position, captures, namedCaptures, replaceValue).
1444                replacement = string::get_substitution(
1445                    matched.to_string(),
1446                    arg_str.to_string(),
1447                    position,
1448                    captures,
1449                    named_captures,
1450                    replace_value.to_string(context)?,
1451                    context,
1452                )?;
1453            }
1454
1455            // m. If position ≥ nextSourcePosition, then
1456            if position >= next_source_position {
1457                // i. NOTE: position should not normally move backwards.
1458                //    If it does, it is an indication of an ill-behaving RegExp subclass
1459                //    or use of an access triggered side-effect to change the global flag or other characteristics of rx.
1460                //    In such cases, the corresponding substitution is ignored.
1461                // ii. Set accumulatedResult to the string-concatenation of accumulatedResult,
1462                //     the substring of S from nextSourcePosition to position, and replacement.
1463                accumulated_result = format!(
1464                    "{}{}{}",
1465                    accumulated_result,
1466                    arg_str.get(next_source_position..position).unwrap(),
1467                    replacement
1468                )
1469                .into();
1470
1471                // iii. Set nextSourcePosition to position + matchLength.
1472                next_source_position = position + match_length;
1473            }
1474        }
1475
1476        // 15. If nextSourcePosition ≥ lengthS, return accumulatedResult.
1477        if next_source_position >= length_arg_str {
1478            return Ok(accumulated_result.into());
1479        }
1480
1481        // 16. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
1482        Ok(format!(
1483            "{}{}",
1484            accumulated_result,
1485            arg_str.get(next_source_position..).unwrap()
1486        )
1487        .into())
1488    }
1489
1490    /// `RegExp.prototype[ @@search ]( string )`
1491    ///
1492    /// This method executes a search for a match between a this regular expression and a string.
1493    ///
1494    /// More information:
1495    ///  - [ECMAScript reference][spec]
1496    ///  - [MDN documentation][mdn]
1497    ///
1498    /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@search
1499    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@search
1500    pub(crate) fn search(
1501        this: &JsValue,
1502        args: &[JsValue],
1503        context: &mut Context,
1504    ) -> JsResult<JsValue> {
1505        // 1. Let rx be the this value.
1506        // 2. If Type(rx) is not Object, throw a TypeError exception.
1507        let rx = if let Some(rx) = this.as_object() {
1508            rx
1509        } else {
1510            return Err(context.construct_type_error(
1511                "RegExp.prototype[Symbol.search] method called on incompatible value",
1512            ));
1513        };
1514
1515        // 3. Let S be ? ToString(string).
1516        let arg_str = args
1517            .get(0)
1518            .cloned()
1519            .unwrap_or_default()
1520            .to_string(context)?;
1521
1522        // 4. Let previousLastIndex be ? Get(rx, "lastIndex").
1523        let previous_last_index = rx.get("lastIndex", context)?;
1524
1525        // 5. If SameValue(previousLastIndex, +0𝔽) is false, then
1526        if !JsValue::same_value(&previous_last_index, &JsValue::new(0)) {
1527            // a. Perform ? Set(rx, "lastIndex", +0𝔽, true).
1528            rx.set("lastIndex", 0, true, context)?;
1529        }
1530
1531        // 6. Let result be ? RegExpExec(rx, S).
1532        let result = Self::abstract_exec(&JsValue::new(rx.clone()), arg_str, context)?;
1533
1534        // 7. Let currentLastIndex be ? Get(rx, "lastIndex").
1535        let current_last_index = rx.get("lastIndex", context)?;
1536
1537        // 8. If SameValue(currentLastIndex, previousLastIndex) is false, then
1538        if !JsValue::same_value(&current_last_index, &previous_last_index) {
1539            // a. Perform ? Set(rx, "lastIndex", previousLastIndex, true).
1540            rx.set("lastIndex", previous_last_index, true, context)?;
1541        }
1542
1543        // 9. If result is null, return -1𝔽.
1544        // 10. Return ? Get(result, "index").
1545        if let Some(result) = result {
1546            result.get("index", context)
1547        } else {
1548            Ok(JsValue::new(-1))
1549        }
1550    }
1551
1552    /// `RegExp.prototype [ @@split ] ( string, limit )`
1553    ///
1554    /// The [@@split]() method splits a String object into an array of strings by separating the string into substrings.
1555    ///
1556    /// More information:
1557    ///  - [ECMAScript reference][spec]
1558    ///  - [MDN documentation][mdn]
1559    ///
1560    /// [spec]: https://tc39.es/ecma262/#sec-regexp.prototype-@@split
1561    /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/@@split
1562    pub(crate) fn split(
1563        this: &JsValue,
1564        args: &[JsValue],
1565        context: &mut Context,
1566    ) -> JsResult<JsValue> {
1567        // 1. Let rx be the this value.
1568        // 2. If Type(rx) is not Object, throw a TypeError exception.
1569        let rx = if let Some(rx) = this.as_object() {
1570            rx
1571        } else {
1572            return Err(context.construct_type_error(
1573                "RegExp.prototype.split method called on incompatible value",
1574            ));
1575        };
1576
1577        // 3. Let S be ? ToString(string).
1578        let arg_str = args
1579            .get(0)
1580            .cloned()
1581            .unwrap_or_default()
1582            .to_string(context)?;
1583
1584        // 4. Let C be ? SpeciesConstructor(rx, %RegExp%).
1585        let constructor =
1586            rx.species_constructor(context.global_object().get(RegExp::NAME, context)?, context)?;
1587
1588        // 5. Let flags be ? ToString(? Get(rx, "flags")).
1589        let flags = rx.get("flags", context)?.to_string(context)?;
1590
1591        // 6. If flags contains "u", let unicodeMatching be true.
1592        // 7. Else, let unicodeMatching be false.
1593        let unicode = flags.contains('u');
1594
1595        // 8. If flags contains "y", let newFlags be flags.
1596        // 9. Else, let newFlags be the string-concatenation of flags and "y".
1597        let new_flags = if flags.contains('y') {
1598            flags.to_string()
1599        } else {
1600            format!("{}{}", flags, 'y')
1601        };
1602
1603        // 10. Let splitter be ? Construct(C, « rx, newFlags »).
1604        let splitter = constructor
1605            .as_object()
1606            .expect("SpeciesConstructor returned non Object")
1607            .construct(
1608                &[JsValue::from(rx), new_flags.into()],
1609                &constructor,
1610                context,
1611            )?;
1612
1613        // 11. Let A be ! ArrayCreate(0).
1614        let a = Array::array_create(0, None, context).unwrap();
1615
1616        // 12. Let lengthA be 0.
1617        let mut length_a = 0;
1618
1619        // 13. If limit is undefined, let lim be 2^32 - 1; else let lim be ℝ(? ToUint32(limit)).
1620        let limit = args.get_or_undefined(1);
1621        let lim = if limit.is_undefined() {
1622            u32::MAX
1623        } else {
1624            limit.to_u32(context)?
1625        };
1626
1627        // 14. If lim is 0, return A.
1628        if lim == 0 {
1629            return Ok(a.into());
1630        }
1631
1632        // 15. Let size be the length of S.
1633        let size = arg_str.encode_utf16().count();
1634
1635        // 16. If size is 0, then
1636        if size == 0 {
1637            // a. Let z be ? RegExpExec(splitter, S).
1638            let result = Self::abstract_exec(&splitter, arg_str.clone(), context)?;
1639
1640            // b. If z is not null, return A.
1641            if result.is_some() {
1642                return Ok(a.into());
1643            }
1644
1645            // c. Perform ! CreateDataPropertyOrThrow(A, "0", S).
1646            a.create_data_property_or_throw(0, arg_str, context)
1647                .unwrap();
1648
1649            // d. Return A.
1650            return Ok(a.into());
1651        }
1652
1653        // 17. Let p be 0.
1654        // 18. Let q be p.
1655        let mut p = 0;
1656        let mut q = p;
1657
1658        // 19. Repeat, while q < size,
1659        while q < size {
1660            // a. Perform ? Set(splitter, "lastIndex", 𝔽(q), true).
1661            splitter.set_field("lastIndex", JsValue::new(q), true, context)?;
1662
1663            // b. Let z be ? RegExpExec(splitter, S).
1664            let result = Self::abstract_exec(&splitter, arg_str.clone(), context)?;
1665
1666            // c. If z is null, set q to AdvanceStringIndex(S, q, unicodeMatching).
1667            // d. Else,
1668            if let Some(result) = result {
1669                // i. Let e be ℝ(? ToLength(? Get(splitter, "lastIndex"))).
1670                let mut e = splitter
1671                    .get_field("lastIndex", context)?
1672                    .to_length(context)?;
1673
1674                // ii. Set e to min(e, size).
1675                e = std::cmp::min(e, size);
1676
1677                // iii. If e = p, set q to AdvanceStringIndex(S, q, unicodeMatching).
1678                // iv. Else,
1679                if e == p {
1680                    q = advance_string_index(arg_str.clone(), q, unicode);
1681                } else {
1682                    // 1. Let T be the substring of S from p to q.
1683                    let arg_str_substring = String::from_utf16_lossy(
1684                        &arg_str
1685                            .encode_utf16()
1686                            .skip(p)
1687                            .take(q - p)
1688                            .collect::<Vec<u16>>(),
1689                    );
1690
1691                    // 2. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T).
1692                    a.create_data_property_or_throw(length_a, arg_str_substring, context)
1693                        .unwrap();
1694
1695                    // 3. Set lengthA to lengthA + 1.
1696                    length_a += 1;
1697
1698                    // 4. If lengthA = lim, return A.
1699                    if length_a == lim {
1700                        return Ok(a.into());
1701                    }
1702
1703                    // 5. Set p to e.
1704                    p = e;
1705
1706                    // 6. Let numberOfCaptures be ? LengthOfArrayLike(z).
1707                    let mut number_of_captures = result.length_of_array_like(context)? as isize;
1708
1709                    // 7. Set numberOfCaptures to max(numberOfCaptures - 1, 0).
1710                    number_of_captures = if number_of_captures == 0 {
1711                        0
1712                    } else {
1713                        std::cmp::max(number_of_captures - 1, 0)
1714                    };
1715
1716                    // 8. Let i be 1.
1717                    // 9. Repeat, while i ≤ numberOfCaptures,
1718                    for i in 1..=number_of_captures {
1719                        // a. Let nextCapture be ? Get(z, ! ToString(𝔽(i))).
1720                        let next_capture = result.get(i.to_string(), context)?;
1721
1722                        // b. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), nextCapture).
1723                        a.create_data_property_or_throw(length_a, next_capture, context)
1724                            .unwrap();
1725
1726                        // d. Set lengthA to lengthA + 1.
1727                        length_a += 1;
1728
1729                        // e. If lengthA = lim, return A.
1730                        if length_a == lim {
1731                            return Ok(a.into());
1732                        }
1733                    }
1734
1735                    // 10. Set q to p.
1736                    q = p;
1737                }
1738            } else {
1739                q = advance_string_index(arg_str.clone(), q, unicode);
1740            }
1741        }
1742
1743        // 20. Let T be the substring of S from p to size.
1744        let arg_str_substring = String::from_utf16_lossy(
1745            &arg_str
1746                .encode_utf16()
1747                .skip(p)
1748                .take(size - p)
1749                .collect::<Vec<u16>>(),
1750        );
1751
1752        // 21. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(lengthA)), T).
1753        a.create_data_property_or_throw(length_a, arg_str_substring, context)
1754            .unwrap();
1755
1756        // 22. Return A.
1757        Ok(a.into())
1758    }
1759}
1760
1761/// `22.2.5.2.3 AdvanceStringIndex ( S, index, unicode )`
1762///
1763/// More information:
1764///  - [ECMAScript reference][spec]
1765///
1766/// [spec]: https://tc39.es/ecma262/#sec-advancestringindex
1767fn advance_string_index(s: JsString, index: usize, unicode: bool) -> usize {
1768    // Regress only works with utf8, so this function differs from the spec.
1769
1770    // 1. Assert: index ≤ 2^53 - 1.
1771
1772    // 2. If unicode is false, return index + 1.
1773    if !unicode {
1774        return index + 1;
1775    }
1776
1777    // 3. Let length be the number of code units in S.
1778    let length = s.encode_utf16().count();
1779
1780    // 4. If index + 1 ≥ length, return index + 1.
1781    if index + 1 > length {
1782        return index + 1;
1783    }
1784
1785    // 5. Let cp be ! CodePointAt(S, index).
1786    let (_, offset, _) =
1787        crate::builtins::string::code_point_at(s, index as i32).expect("Failed to get code point");
1788
1789    index + offset as usize
1790}