use crate::js_runtime::SandboxError;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RegexScanVerdict {
Accept,
Reject {
reason: String,
},
}
impl RegexScanVerdict {
pub fn into_sandbox_error(self) -> SandboxError {
match self {
RegexScanVerdict::Accept => {
SandboxError::ScriptError("regex guard: into_sandbox_error on Accept".into())
}
RegexScanVerdict::Reject { reason } => SandboxError::RegexRejected(reason),
}
}
}
pub fn scan_script_for_redos(src: &str) -> RegexScanVerdict {
let bytes = src.as_bytes();
let mut i = 0;
let mut prev_significant: u8 = b';';
while i < bytes.len() {
let b = bytes[i];
if b == b'/' && i + 1 < bytes.len() {
let n = bytes[i + 1];
if n == b'/' {
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
continue;
}
if n == b'*' {
i += 2;
while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
i += 1;
}
i = (i + 2).min(bytes.len());
continue;
}
}
if b == b'"' || b == b'\'' {
let quote = b;
i += 1;
while i < bytes.len() {
let c = bytes[i];
if c == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
i += 1;
if c == quote {
break;
}
}
prev_significant = b'"';
continue;
}
if b == b'`' {
i += 1;
while i < bytes.len() && bytes[i] != b'`' {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
i += 1;
}
i = (i + 1).min(bytes.len());
prev_significant = b'`';
continue;
}
if b == b'/' && is_regex_context(prev_significant) {
let pat_start = i + 1;
let mut j = pat_start;
let mut in_class = false;
while j < bytes.len() {
let c = bytes[j];
if c == b'\n' {
break;
}
if c == b'\\' && j + 1 < bytes.len() {
j += 2;
continue;
}
if c == b'[' {
in_class = true;
} else if c == b']' {
in_class = false;
} else if c == b'/' && !in_class {
let body = &src[pat_start..j];
if let Some(reason) = pattern_is_dangerous(body) {
return RegexScanVerdict::Reject { reason };
}
i = j + 1;
while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
i += 1;
}
prev_significant = b')'; break;
}
j += 1;
}
if j >= bytes.len() || bytes[j] == b'\n' {
i += 1;
prev_significant = b'/';
}
continue;
}
if b == b'R' && looks_like_new_regexp(bytes, i) {
let mut k = i + b"RegExp".len();
while k < bytes.len() && bytes[k].is_ascii_whitespace() {
k += 1;
}
if k < bytes.len() && bytes[k] == b'(' {
k += 1;
while k < bytes.len() && bytes[k].is_ascii_whitespace() {
k += 1;
}
if k < bytes.len() && (bytes[k] == b'"' || bytes[k] == b'\'') {
let quote = bytes[k];
let pat_start = k + 1;
let mut j = pat_start;
while j < bytes.len() {
let c = bytes[j];
if c == b'\\' && j + 1 < bytes.len() {
j += 2;
continue;
}
if c == quote {
let body = &src[pat_start..j];
if let Some(reason) = pattern_is_dangerous(body) {
return RegexScanVerdict::Reject {
reason: format!("new RegExp: {reason}"),
};
}
break;
}
j += 1;
}
}
}
i += 1;
prev_significant = b'p';
continue;
}
if !b.is_ascii_whitespace() {
prev_significant = b;
}
i += 1;
}
RegexScanVerdict::Accept
}
fn is_regex_context(prev_significant: u8) -> bool {
match prev_significant {
b')' | b']' | b'}' => false,
c if c.is_ascii_alphanumeric() || c == b'_' || c == b'$' => false,
_ => true,
}
}
fn looks_like_new_regexp(bytes: &[u8], i: usize) -> bool {
let kw = b"RegExp";
if i + kw.len() > bytes.len() {
return false;
}
if &bytes[i..i + kw.len()] != kw {
return false;
}
if i > 0 {
let p = bytes[i - 1];
if p.is_ascii_alphanumeric() || p == b'_' || p == b'$' {
return false;
}
}
let next = bytes.get(i + kw.len()).copied().unwrap_or(b' ');
if next.is_ascii_alphanumeric() || next == b'_' || next == b'$' {
return false;
}
true
}
pub(crate) fn pattern_is_dangerous(pat: &str) -> Option<String> {
if pat.len() > 4096 {
return Some(format!(
"regex pattern exceeds 4 KiB safety bound ({} bytes)",
pat.len()
));
}
let bytes = pat.as_bytes();
let n = bytes.len();
let mut group_stack: Vec<usize> = Vec::new();
let mut last_quantifiable_end: Option<usize> = None;
let mut i = 0;
while i < n {
let b = bytes[i];
if b == b'\\' && i + 1 < n {
last_quantifiable_end = Some(i + 2);
i += 2;
continue;
}
if b == b'[' {
let mut j = i + 1;
while j < n && bytes[j] != b']' {
if bytes[j] == b'\\' && j + 1 < n {
j += 2;
continue;
}
j += 1;
}
last_quantifiable_end = Some((j + 1).min(n));
i = (j + 1).min(n);
continue;
}
if b == b'(' {
group_stack.push(i);
i += 1;
if i < n && bytes[i] == b'?' {
i += 1;
while i < n
&& bytes[i] != b':'
&& bytes[i] != b'='
&& bytes[i] != b'!'
&& bytes[i] != b'>'
{
i += 1;
}
if i < n {
i += 1;
}
}
continue;
}
if b == b')' {
let group_start = match group_stack.pop() {
Some(s) => s,
None => {
return None;
}
};
let group_body = &pat[group_start + 1..i];
let outer_quant = match bytes.get(i + 1).copied() {
Some(b'+') | Some(b'*') => true,
Some(b'{') => is_unbounded_brace_after(bytes, i + 1),
_ => false,
};
if outer_quant {
if body_ends_with_unbounded_quantifier(group_body) {
return Some(format!(
"nested quantifier on group `({group_body})` — W2-E REDOS-01 shape"
));
}
if alternation_has_overlap(group_body) {
return Some(format!(
"overlapping alternation inside quantified group `({group_body})`"
));
}
}
last_quantifiable_end = Some(i + 1);
i += 1;
continue;
}
if b == b'+' || b == b'*' {
last_quantifiable_end = Some(i + 1);
i += 1;
continue;
}
last_quantifiable_end = Some(i + 1);
i += 1;
}
let _ = last_quantifiable_end; None
}
fn is_unbounded_brace_after(bytes: &[u8], start: usize) -> bool {
debug_assert!(start < bytes.len() && bytes[start] == b'{');
let mut k = start + 1;
let n_start = k;
while k < bytes.len() && bytes[k].is_ascii_digit() {
k += 1;
}
if k == n_start {
return false;
}
if k >= bytes.len() {
return false;
}
if bytes[k] == b'}' {
return false;
}
if bytes[k] != b',' {
return false;
}
k += 1;
let m_start = k;
while k < bytes.len() && bytes[k].is_ascii_digit() {
k += 1;
}
if k >= bytes.len() || bytes[k] != b'}' {
return false;
}
m_start == k
}
fn body_ends_with_unbounded_quantifier(body: &str) -> bool {
let trimmed = body.trim_end();
if let Some(last) = trimmed.as_bytes().last().copied() {
if last == b'+' || last == b'*' {
return true;
}
if last == b'}' {
if let Some(open) = trimmed.rfind('{') {
let spec = &trimmed[open + 1..trimmed.len() - 1];
if let Some(comma) = spec.find(',') {
let after = spec[comma + 1..].trim();
return after.is_empty();
}
}
}
}
false
}
fn alternation_has_overlap(body: &str) -> bool {
if !body.contains('|') {
return false;
}
let mut depth_paren = 0;
let mut depth_class = 0;
let mut branches: Vec<&str> = Vec::new();
let mut start = 0;
let bytes = body.as_bytes();
for (idx, &b) in bytes.iter().enumerate() {
match b {
b'\\' => {
}
b'(' if depth_class == 0 => depth_paren += 1,
b')' if depth_class == 0 && depth_paren > 0 => depth_paren -= 1,
b'[' if depth_class == 0 => depth_class = 1,
b']' if depth_class == 1 => depth_class = 0,
b'|' if depth_paren == 0
&& depth_class == 0
&& (idx == 0 || bytes[idx - 1] != b'\\') =>
{
branches.push(&body[start..idx]);
start = idx + 1;
}
_ => {}
}
}
branches.push(&body[start..]);
if branches.len() < 2 {
return false;
}
let firsts: Vec<u8> = branches
.iter()
.filter_map(|br| {
let s = br.trim();
if s.is_empty() || s.len() > 8 {
return None;
}
if s.bytes().any(|c| {
matches!(
c,
b'.' | b'*' | b'+' | b'?' | b'(' | b'[' | b'\\' | b'^' | b'$' | b'{'
)
}) {
return None;
}
s.bytes().next()
})
.collect();
if firsts.len() < 2 {
return false;
}
let mut sorted = firsts.clone();
sorted.sort_unstable();
sorted.dedup();
sorted.len() < firsts.len()
}
#[cfg(test)]
mod tests {
use super::*;
fn rejected(src: &str) -> String {
match scan_script_for_redos(src) {
RegexScanVerdict::Accept => panic!("expected Reject for: {src}"),
RegexScanVerdict::Reject { reason } => reason,
}
}
fn accepted(src: &str) {
match scan_script_for_redos(src) {
RegexScanVerdict::Accept => {}
RegexScanVerdict::Reject { reason } => {
panic!("expected Accept for `{src}` but rejected: {reason}")
}
}
}
#[test]
fn w2e_redos_01_proof_pattern_rejected() {
let reason = rejected(r#"var m = /(a+)+$/.test("aaaaaaaaaaaaaaaaaaaaaaaaa!");"#);
assert!(
reason.contains("nested quantifier"),
"reason should name nested quantifier; got: {reason}"
);
}
#[test]
fn star_star_shape_rejected() {
rejected("var m = /(a*)*$/.test('aaaa');");
}
#[test]
fn plus_star_shape_rejected() {
rejected("var m = /(a+)*$/.test('aaaa');");
}
#[test]
fn star_plus_shape_rejected() {
rejected("var m = /(a*)+$/.test('aaaa');");
}
#[test]
fn char_class_inside_nested_quant_rejected() {
rejected("var m = /([a-z]+)+$/.test('abcd');");
}
#[test]
fn dot_plus_plus_rejected() {
rejected("var m = /(.+)+$/.test('abc');");
}
#[test]
fn bounded_unbounded_brace_rejected() {
rejected("var m = /(a{1,})+$/.test('aaaa');");
}
#[test]
fn bounded_brace_outside_accepted() {
accepted("var m = /(a+){1,3}$/.test('aaa');");
}
#[test]
fn bounded_brace_inside_accepted() {
accepted("var m = /(a{1,3})+$/.test('aaa');");
}
#[test]
fn new_regexp_double_quote_rejected() {
let r = rejected(r#"var re = new RegExp("(a+)+$");"#);
assert!(r.starts_with("new RegExp"), "reason: {r}");
}
#[test]
fn new_regexp_single_quote_rejected() {
rejected(r#"var re = new RegExp('(a+)+$');"#);
}
#[test]
fn new_regexp_with_whitespace_rejected() {
rejected(r#"var re = new RegExp ( "(a+)+$" );"#);
}
#[test]
fn my_regexp_identifier_does_not_trigger() {
accepted("var MyRegExp = 1; var x = MyRegExp;");
}
#[test]
fn regexp_method_call_not_constructor_does_not_trigger() {
accepted("var x = RegExp.prototype.toString;");
}
#[test]
fn dangerous_shape_in_line_comment_accepted() {
accepted("// /(a+)+$/ is dangerous but commented out\nvar x = 1;");
}
#[test]
fn dangerous_shape_in_block_comment_accepted() {
accepted("/* /(a+)+$/ */ var x = 1;");
}
#[test]
fn dangerous_shape_in_string_literal_accepted() {
accepted(r#"var s = "/(a+)+$/"; var x = 1;"#);
}
#[test]
fn dangerous_shape_in_template_literal_accepted() {
accepted(r#"var s = `/(a+)+$/`; var x = 1;"#);
}
#[test]
fn xfa_zip_code_mask_accepted() {
accepted(r#"var m = /^\d{4}\s?[A-Z]{2}$/.test('1234 AB');"#);
}
#[test]
fn xfa_phone_number_mask_accepted() {
accepted(r#"var m = /^\+?[0-9]{1,3}-[0-9]{3,12}$/.test('+1-555');"#);
}
#[test]
fn xfa_email_mask_accepted() {
accepted(r#"var m = /^[^@]+@[^@]+\.[a-zA-Z]{2,}$/.test('a@b.co');"#);
}
#[test]
fn xfa_date_iso_mask_accepted() {
accepted(r#"var m = /^\d{4}-\d{2}-\d{2}$/.test('2026-05-19');"#);
}
#[test]
fn alternation_aa_aa_overlap_rejected() {
rejected("var m = /(a|aa)+$/.test('aaa');");
}
#[test]
fn alternation_disjoint_first_chars_accepted() {
accepted("var m = /(a|b)+$/.test('abab');");
}
#[test]
fn oversized_pattern_rejected() {
let big = "a".repeat(5000);
let src = format!(r#"var re = new RegExp("{big}");"#);
let reason = rejected(&src);
assert!(reason.contains("4 KiB"), "reason: {reason}");
}
#[test]
fn division_after_identifier_accepted() {
accepted("var x = a / b; var y = c / d;");
}
#[test]
fn division_after_number_accepted() {
accepted("var x = 10 / 5;");
}
#[test]
fn division_after_close_paren_accepted() {
accepted("var x = (a + b) / c;");
}
#[test]
fn pattern_is_dangerous_bare_nested_plus_plus() {
assert!(pattern_is_dangerous("(a+)+").is_some());
}
#[test]
fn pattern_is_dangerous_safe_anchored_digits() {
assert!(pattern_is_dangerous(r"^\d{4}$").is_none());
}
#[test]
fn reject_verdict_converts_to_typed_error() {
let v = RegexScanVerdict::Reject {
reason: "test".into(),
};
match v.into_sandbox_error() {
SandboxError::RegexRejected(r) => assert_eq!(r, "test"),
other => panic!("expected RegexRejected, got {other:?}"),
}
}
}