use std::{fmt::Debug, path::Path};
use log::trace;
use crate::internal::ScannerImpl;
use crate::{FindMatches, Result, ScannerMode};
pub trait ScannerModeSwitcher {
fn set_mode(&mut self, mode: usize);
fn current_mode(&self) -> usize;
fn mode_name(&self, index: usize) -> Option<&str>;
}
#[derive(Debug)]
pub struct Scanner {
pub(crate) inner: ScannerImpl,
}
impl Scanner {
pub fn find_iter<'h>(&self, input: &'h str) -> FindMatches<'h> {
FindMatches::new(self.inner.clone(), input)
}
pub fn log_compiled_automata_as_dot(&self) -> Result<()> {
self.inner.log_compiled_automata_as_dot()
}
pub fn generate_compiled_automata_as_dot(
&self,
prefix: &str,
target_folder: &Path,
) -> Result<()> {
self.inner
.generate_compiled_automata_as_dot(prefix, target_folder)
}
}
impl ScannerModeSwitcher for Scanner {
fn current_mode(&self) -> usize {
self.inner.current_mode()
}
fn set_mode(&mut self, mode: usize) {
trace!("Set scanner mode to {}", mode);
self.inner.set_mode(mode);
}
fn mode_name(&self, index: usize) -> Option<&str> {
self.inner.mode_name(index)
}
}
impl TryFrom<Vec<ScannerMode>> for Scanner {
type Error = crate::ScnrError;
fn try_from(scanner_modes: Vec<ScannerMode>) -> Result<Self> {
Ok(Scanner {
inner: scanner_modes.try_into()?,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{Pattern, ScannerBuilder};
use std::{fs, sync::Once};
static INIT: Once = Once::new();
const TARGET_FOLDER: &str = concat!(
env!("CARGO_MANIFEST_DIR"),
"/target/testout/test_pathological_regular_expressions_dfa"
);
fn init() {
INIT.call_once(|| {
let _ = env_logger::builder().is_test(true).try_init();
let _ = fs::remove_dir_all(TARGET_FOLDER);
fs::create_dir_all(TARGET_FOLDER).unwrap();
});
}
#[test]
fn test_scanner_builder_with_single_mode() {
init();
let scanner_mode = ScannerMode::new(
"INITIAL",
vec![
Pattern::new(r"\r\n|\r|\n".to_string(), 1),
Pattern::new(r"(//.*(\r\n|\r|\n))".to_string(), 3),
],
vec![(1, 1), (3, 1)],
);
let scanner = ScannerBuilder::new()
.add_scanner_mode(scanner_mode)
.build()
.unwrap();
assert_eq!(Some("INITIAL"), scanner.inner.mode_name(0));
}
#[test]
fn test_scanner_current_mode() {
init();
let scanner_mode = ScannerMode::new(
"INITIAL",
vec![
Pattern::new(r"\r\n|\r|\n".to_string(), 1),
Pattern::new(r"(//.*(\r\n|\r|\n))".to_string(), 3),
],
vec![(1, 1), (3, 1)],
);
let mut scanner = ScannerBuilder::new()
.add_scanner_mode(scanner_mode)
.build()
.unwrap();
assert_eq!(0, scanner.current_mode());
assert_eq!(0, scanner.inner.current_mode());
scanner.set_mode(1);
assert_eq!(1, scanner.current_mode());
assert_eq!(1, scanner.inner.current_mode());
let mut find_iter = scanner.find_iter("Hello\nWorld");
assert_eq!(0, find_iter.current_mode());
assert_eq!(1, scanner.current_mode());
assert_eq!(1, scanner.inner.current_mode());
assert_eq!(1, scanner.inner.clone().current_mode());
find_iter.set_mode(1);
assert_eq!(1, find_iter.current_mode());
scanner.set_mode(0);
assert_eq!(0, scanner.current_mode());
assert_eq!(0, scanner.inner.current_mode());
assert_eq!(0, scanner.inner.clone().current_mode());
}
struct TestData {
pattern: &'static str,
input: &'static str,
expected_match: Option<&'static str>,
}
const TEST_DATA: &[TestData] = &[
TestData {
pattern: r"((a*)*b)",
input: "aaaaaaaaaaaaaaaaaaaaaaaaaab",
expected_match: Some("aaaaaaaaaaaaaaaaaaaaaaaaaab"),
},
TestData {
pattern: r"(a+)+b",
input: "aaaaaaaaaaaaaaaaaaaaaaaaaab",
expected_match: Some("aaaaaaaaaaaaaaaaaaaaaaaaaab"),
},
TestData {
pattern: r"(a+)+b",
input: "aaaaaaaaaaaaaaaaaaaaaaaaaa",
expected_match: None,
},
TestData {
pattern: r"(a|aa)+b",
input: "aaaaaaaaaaaaaaaaaaaaaaaaaab",
expected_match: Some("aaaaaaaaaaaaaaaaaaaaaaaaaab"),
},
TestData {
pattern: r"(a|a?)+b",
input: "aaaaaaaaaaaaaaaaaaaaaaaaaab",
expected_match: Some("aaaaaaaaaaaaaaaaaaaaaaaaaab"),
},
TestData {
pattern: r"((a|aa|aaa|aaaa|aaaaa)*)*b",
input: "aaaaaaaaaaaaaaaaaaaaaaaaaab",
expected_match: Some("aaaaaaaaaaaaaaaaaaaaaaaaaab"),
},
TestData {
pattern: r"((a*a*a*a*a*a*)*)*b",
input: "aaaaaaaaaaaaaaaaaaaaaaaaaab",
expected_match: Some("aaaaaaaaaaaaaaaaaaaaaaaaaab"),
},
TestData {
pattern: r"a{3}{3}*b",
input: "aaaaaaaaaaaaaaaaaaaaaaaaaaab",
expected_match: Some("aaaaaaaaaaaaaaaaaaaaaaaaaaab"),
},
];
#[test]
fn test_pathological_regular_expressions_dfa() {
init();
#[allow(unused_variables)]
for (index, test) in TEST_DATA.iter().enumerate() {
let scanner_mode = ScannerMode::new(
"INITIAL",
vec![Pattern::new(test.pattern.to_string(), 1)],
vec![],
);
let scanner = ScannerBuilder::new()
.add_scanner_mode(scanner_mode.clone())
.build()
.unwrap();
#[cfg(not(feature = "regex_automata"))]
scanner
.generate_compiled_automata_as_dot(
&format!("Test{}", index),
Path::new(&TARGET_FOLDER),
)
.unwrap();
let mut find_iter = scanner.find_iter(test.input);
let match1 = find_iter.next();
assert_eq!(
test.expected_match,
match1.map(|m| test.input.get(m.start()..m.end()).unwrap())
);
}
}
}