use super::{
CaptureGroup, CompiledRegex, EngineError, EngineFlags, EngineKind, EngineResult, Match,
RegexEngine,
};
pub fn is_pcre2_10_45() -> bool {
runtime_pcre2_version() == Some((10, 45))
}
fn runtime_pcre2_version() -> Option<(u32, u32)> {
use std::ffi::CStr;
unsafe {
let needed =
pcre2_sys::pcre2_config_8(pcre2_sys::PCRE2_CONFIG_VERSION, std::ptr::null_mut());
if needed <= 0 {
return None;
}
let mut buf: Vec<u8> = vec![0u8; needed as usize];
let rc = pcre2_sys::pcre2_config_8(
pcre2_sys::PCRE2_CONFIG_VERSION,
buf.as_mut_ptr() as *mut std::ffi::c_void,
);
if rc < 0 {
return None;
}
let cstr = CStr::from_ptr(buf.as_ptr() as *const std::ffi::c_char);
let s = cstr.to_str().ok()?;
let version_part = s.split_whitespace().next()?;
let mut parts = version_part.split('.');
let major = parts.next()?.parse::<u32>().ok()?;
let minor = parts.next()?.parse::<u32>().ok()?;
Some((major, minor))
}
}
fn uses_scs_verb(pattern: &str) -> bool {
let lower = pattern.to_lowercase();
lower.contains("(*scs:") || lower.contains("(*scan_substring:")
}
pub struct Pcre2Engine;
impl RegexEngine for Pcre2Engine {
fn kind(&self) -> EngineKind {
EngineKind::Pcre2
}
fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>> {
if is_pcre2_10_45() && uses_scs_verb(pattern) {
return Err(EngineError::CompileError(
"Pattern blocked (CVE-2025-58050): (*scs:) / (*SCAN_SUBSTRING:) triggers \
a heap-buffer-overflow on the linked PCRE2 10.45. \
Upgrade to PCRE2 >= 10.46 to use this verb."
.to_string(),
));
}
let mut builder = pcre2::bytes::RegexBuilder::new();
builder.utf(true);
builder.ucp(flags.unicode);
builder.caseless(flags.case_insensitive);
builder.multi_line(flags.multi_line);
builder.dotall(flags.dot_matches_newline);
builder.extended(flags.extended);
builder.jit_if_available(true);
let re = builder
.build(pattern)
.map_err(|e| EngineError::CompileError(e.to_string()))?;
Ok(Box::new(Pcre2CompiledRegex { re }))
}
}
struct Pcre2CompiledRegex {
re: pcre2::bytes::Regex,
}
impl CompiledRegex for Pcre2CompiledRegex {
fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>> {
let mut matches = Vec::new();
let bytes = text.as_bytes();
let mut offset = 0;
while offset <= bytes.len() {
let caps = match self.re.captures(&bytes[offset..]) {
Ok(Some(caps)) => caps,
Ok(None) => break,
Err(e) => return Err(EngineError::MatchError(e.to_string())),
};
let overall = caps.get(0).expect("capture group 0 must exist");
if overall.start() == overall.end() && overall.start() == 0 && offset > 0 {
offset += 1;
continue;
}
let abs_start = offset + overall.start();
let abs_end = offset + overall.end();
let mut captures = Vec::new();
let names = self.re.capture_names();
for i in 1..caps.len() {
if let Some(m) = caps.get(i) {
let cap_start = offset + m.start();
let cap_end = offset + m.end();
let name = names.get(i).and_then(|n| n.clone());
captures.push(CaptureGroup {
index: i,
name,
start: cap_start,
end: cap_end,
text: text[cap_start..cap_end].to_string(),
});
}
}
matches.push(Match {
start: abs_start,
end: abs_end,
text: text[abs_start..abs_end].to_string(),
captures,
});
if overall.start() == overall.end() {
offset = abs_end + 1;
} else {
offset = abs_end;
}
}
Ok(matches)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_match() {
let engine = Pcre2Engine;
let flags = EngineFlags::default();
let compiled = engine.compile(r"\d+", &flags).unwrap();
let matches = compiled.find_matches("abc 123 def 456").unwrap();
assert_eq!(matches.len(), 2);
assert_eq!(matches[0].text, "123");
}
#[test]
fn test_named_captures() {
let engine = Pcre2Engine;
let flags = EngineFlags::default();
let compiled = engine
.compile(r"(?P<user>\w+)@(?P<domain>\w+)", &flags)
.unwrap();
let matches = compiled.find_matches("user@example").unwrap();
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].captures.len(), 2);
assert_eq!(matches[0].captures[0].name, Some("user".to_string()));
assert_eq!(matches[0].captures[0].text, "user");
assert_eq!(matches[0].captures[1].name, Some("domain".to_string()));
assert_eq!(matches[0].captures[1].text, "example");
}
#[test]
fn test_backreference() {
let engine = Pcre2Engine;
let flags = EngineFlags::default();
let compiled = engine.compile(r"(\w+) \1", &flags).unwrap();
let matches = compiled.find_matches("hello hello world").unwrap();
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].text, "hello hello");
}
#[test]
fn test_lookahead() {
let engine = Pcre2Engine;
let flags = EngineFlags::default();
let compiled = engine.compile(r"\w+(?=@)", &flags).unwrap();
let matches = compiled.find_matches("user@example.com").unwrap();
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].text, "user");
}
#[test]
fn test_uses_scs_verb_detection() {
assert!(uses_scs_verb(r"(a)(b+)(*scs:(1)a(*ACCEPT))(\2)"));
assert!(uses_scs_verb(r"(a)(*SCS:b)"));
assert!(uses_scs_verb(r"(a)(*SCAN_SUBSTRING:b)"));
assert!(uses_scs_verb(r"(a)(*scan_substring:b)"));
assert!(!uses_scs_verb(r"(\w+) \1"));
assert!(!uses_scs_verb(r"(?<=@)\w+"));
assert!(!uses_scs_verb(r"(*ACCEPT)"));
assert!(!uses_scs_verb(r"(*FAIL)"));
}
#[test]
fn test_non_scs_patterns_unaffected_by_cve_guard() {
if !is_pcre2_10_45() {
let engine = Pcre2Engine;
let flags = EngineFlags::default();
assert!(engine.compile(r"\d+", &flags).is_ok());
assert!(engine.compile(r"(?<=@)\w+", &flags).is_ok());
}
}
}