#![allow(clippy::uninlined_format_args)]
pub fn test_parse_fails(pattern: &str) {
let res = regress::Regex::new(pattern);
assert!(res.is_err(), "Pattern should not have parsed: {}", pattern);
}
pub fn test_parse_fails_flags(pattern: &str, flags: &str) {
let res = regress::Regex::with_flags(pattern, flags);
assert!(res.is_err(), "Pattern should not have parsed: {}", pattern);
}
fn format_match(r: ®ress::Match, input: &str) -> String {
let mut result = input[r.range()].to_string();
for cg in r.captures.iter() {
result.push(',');
if let Some(cg) = cg {
result.push_str(&input[cg.clone()])
}
}
result
}
pub fn to_utf16(input: &str) -> Vec<u16> {
input.encode_utf16().collect()
}
pub fn range_from_utf16(utf16: &[u16], r: regress::Range) -> regress::Range {
use std::char::decode_utf16;
let start_utf8: usize = decode_utf16(utf16[0..r.start].iter().copied())
.map(|r| r.expect("Invalid UTF16").len_utf8())
.sum();
let len_utf8: usize = decode_utf16(utf16[r].iter().copied())
.map(|r| r.expect("Invalid UTF16").len_utf8())
.sum();
start_utf8..(start_utf8 + len_utf8)
}
pub trait StringTestHelpers {
fn test_eq(&self, s: &str);
}
impl StringTestHelpers for String {
fn test_eq(&self, rhs: &str) {
assert_eq!(self.as_str(), rhs)
}
}
pub trait VecTestHelpers {
fn test_eq(&self, rhs: Vec<&str>);
}
impl VecTestHelpers for Vec<&str> {
fn test_eq(&self, rhs: Vec<&str>) {
assert_eq!(*self, rhs)
}
}
#[derive(Debug, Clone)]
pub struct TestCompiledRegex {
re: regress::Regex,
tc: TestConfig,
}
impl TestCompiledRegex {
#[track_caller]
pub fn matches(&'_ self, input: &'_ str, start: usize) -> Vec<regress::Match> {
use regress::backends as rbe;
#[cfg(feature = "utf16")]
{
if self.tc.encoding == Encoding::Utf16 {
return self.match_utf16(input, start);
} else if self.tc.encoding == Encoding::Ucs2 {
if input.chars().any(|c| c > '\u{FFFF}') {
return self.match_utf16(input, start);
}
return self.match_ucs2(input, start);
}
}
match (self.tc.use_ascii(input), self.tc.backend) {
(true, Backend::PikeVM) => {
rbe::find::<rbe::PikeVMExecutor>(&self.re, input, start).collect()
}
(false, Backend::PikeVM) => {
rbe::find::<rbe::PikeVMExecutor>(&self.re, input, start).collect()
}
(true, Backend::Backtracking) => {
rbe::find_ascii::<rbe::BacktrackExecutor>(&self.re, input, start).collect()
}
(false, Backend::Backtracking) => {
rbe::find::<rbe::BacktrackExecutor>(&self.re, input, start).collect()
}
}
}
#[cfg(feature = "utf16")]
#[track_caller]
pub fn match_utf16(&self, input: &str, start: usize) -> Vec<regress::Match> {
let u16_start = input[..start].chars().map(char::len_utf16).sum();
let u16_input = to_utf16(input);
let mut matches: Vec<_> = self.re.find_from_utf16(&u16_input, u16_start).collect();
for matc in matches.iter_mut() {
matc.range = range_from_utf16(&u16_input, matc.range());
for r in matc.captures.iter_mut().flatten() {
*r = range_from_utf16(&u16_input, r.clone());
}
}
matches
}
#[cfg(feature = "utf16")]
#[track_caller]
pub fn match_ucs2(&self, input: &str, start: usize) -> Vec<regress::Match> {
let u16_start = input[..start].chars().map(char::len_utf16).sum();
let u16_input = to_utf16(input);
let mut matches: Vec<_> = self.re.find_from_ucs2(&u16_input, u16_start).collect();
for matc in matches.iter_mut() {
matc.range = range_from_utf16(&u16_input, matc.range());
for r in matc.captures.iter_mut().flatten() {
*r = range_from_utf16(&u16_input, r.clone());
}
}
matches
}
pub fn find(&self, input: &str) -> Option<regress::Match> {
self.matches(input, 0).into_iter().next()
}
#[cfg(feature = "utf16")]
pub fn find_utf16(&self, input: &str) -> Option<regress::Match> {
self.match_utf16(input, 0).into_iter().next()
}
#[cfg(feature = "utf16")]
pub fn find_ucs2(&self, input: &str) -> Option<regress::Match> {
self.match_ucs2(input, 0).into_iter().next()
}
#[track_caller]
pub fn match1f(&self, input: &str) -> String {
match self.find(input) {
Some(m) => format_match(&m, input),
None => panic!("Failed to match {}", input),
}
}
pub fn match1_named_group(&self, input: &str, group: &str) -> String {
match self.find(input) {
Some(m) => match m.named_group(group) {
Some(r) => match input.get(r.clone()) {
Some(str) => str.to_string(),
None => panic!("Cannot get range from string input {:?}", r),
},
None => panic!("Named capture group does not exist {}", group),
},
None => panic!("Failed to match {}", input),
}
}
pub fn match1_vec<'b>(&self, input: &'b str) -> Vec<Option<&'b str>> {
let mut result = Vec::new();
let m: regress::Match = self.find(input).expect("Failed to match");
result.push(Some(&input[m.range()]));
for cr in m.captures {
result.push(cr.map(|r| &input[r]));
}
result
}
#[track_caller]
pub fn test_fails(&self, input: &str) {
assert!(self.find(input).is_none(), "Should not have matched")
}
#[track_caller]
pub fn test_succeeds(&self, input: &str) {
assert!(self.find(input).is_some(), "Should have matched")
}
pub fn match_all_from(&'_ self, input: &'_ str, start: usize) -> Vec<regress::Range> {
self.matches(input, start)
.into_iter()
.map(move |m| m.range())
.collect()
}
pub fn match_all<'b>(&self, input: &'b str) -> Vec<&'b str> {
self.matches(input, 0)
.into_iter()
.map(move |m| &input[m.range()])
.collect()
}
pub fn run_global_match(&self, input: &str) -> String {
self.matches(input, 0)
.into_iter()
.map(move |m| format_match(&m, input))
.collect::<Vec<String>>()
.join(",")
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum Backend {
PikeVM,
Backtracking,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum Encoding {
Utf8,
Utf16,
Ucs2,
}
#[derive(Debug, Copy, Clone)]
pub struct TestConfig {
ascii: bool,
optimize: bool,
backend: Backend,
#[allow(dead_code)]
encoding: Encoding,
}
impl TestConfig {
pub fn use_ascii(&self, s: &str) -> bool {
self.ascii && s.is_ascii()
}
pub fn compile(&self, pattern: &str) -> TestCompiledRegex {
self.compilef(pattern, "")
}
#[track_caller]
pub fn compilef(&self, pattern: &str, flags_str: &str) -> TestCompiledRegex {
let mut flags = regress::Flags::from(flags_str);
flags.no_opt = !self.optimize;
let re = regress::Regex::with_flags(pattern, flags);
assert!(
re.is_ok(),
"Failed to parse! flags: {} pattern: {}, error: {}",
flags_str,
pattern,
re.unwrap_err()
);
TestCompiledRegex {
re: re.unwrap(),
tc: *self,
}
}
#[track_caller]
pub fn test_match_succeeds(&self, pattern: &str, flags_str: &str, input: &str) {
let cr = self.compilef(pattern, flags_str);
cr.test_succeeds(input)
}
pub fn test_match_fails(&self, pattern: &str, flags_str: &str, input: &str) {
let cr = self.compilef(pattern, flags_str);
cr.test_fails(input)
}
}
pub fn test_with_configs<F>(func: F)
where
F: Fn(TestConfig),
{
let encoding = Encoding::Utf8;
func(TestConfig {
ascii: true,
optimize: false,
backend: Backend::PikeVM,
encoding,
});
func(TestConfig {
ascii: false,
optimize: false,
backend: Backend::PikeVM,
encoding,
});
func(TestConfig {
ascii: true,
optimize: false,
backend: Backend::Backtracking,
encoding,
});
func(TestConfig {
ascii: false,
optimize: false,
backend: Backend::Backtracking,
encoding,
});
func(TestConfig {
ascii: true,
optimize: true,
backend: Backend::Backtracking,
encoding,
});
func(TestConfig {
ascii: false,
optimize: true,
backend: Backend::Backtracking,
encoding,
});
if cfg!(feature = "utf16") {
func(TestConfig {
ascii: false,
optimize: false,
backend: Backend::Backtracking,
encoding: Encoding::Utf16,
});
func(TestConfig {
ascii: false,
optimize: true,
backend: Backend::Backtracking,
encoding: Encoding::Utf16,
});
func(TestConfig {
ascii: false,
optimize: false,
backend: Backend::Backtracking,
encoding: Encoding::Ucs2,
});
func(TestConfig {
ascii: false,
optimize: true,
backend: Backend::Backtracking,
encoding: Encoding::Ucs2,
});
}
}
pub fn test_with_configs_no_ascii<F>(func: F)
where
F: Fn(TestConfig),
{
func(TestConfig {
ascii: false,
optimize: false,
backend: Backend::PikeVM,
encoding: Encoding::Utf8,
});
func(TestConfig {
ascii: false,
optimize: false,
backend: Backend::Backtracking,
encoding: Encoding::Utf8,
});
func(TestConfig {
ascii: false,
optimize: true,
backend: Backend::Backtracking,
encoding: Encoding::Utf8,
});
if cfg!(feature = "utf16") {
func(TestConfig {
ascii: false,
optimize: false,
backend: Backend::Backtracking,
encoding: Encoding::Utf16,
});
func(TestConfig {
ascii: false,
optimize: true,
backend: Backend::Backtracking,
encoding: Encoding::Utf16,
});
func(TestConfig {
ascii: false,
optimize: false,
backend: Backend::Backtracking,
encoding: Encoding::Ucs2,
});
func(TestConfig {
ascii: false,
optimize: true,
backend: Backend::Backtracking,
encoding: Encoding::Ucs2,
});
}
}