use crate::{
internal::SCANNER_CACHE, scanner::Scanner, scanner_mode::ScannerMode, Pattern, Result,
};
#[derive(Debug, Clone, Default)]
pub struct ScannerBuilder {
scanner_modes: Vec<ScannerMode>,
}
impl ScannerBuilder {
pub fn new() -> Self {
Self {
scanner_modes: Vec::new(),
}
}
pub fn add_patterns<P, S>(self, patterns: P) -> SimpleScannerBuilder
where
P: IntoIterator<Item = S>,
S: AsRef<str>,
{
let patterns = patterns
.into_iter()
.enumerate()
.map(|(i, pattern)| Pattern::new(pattern.as_ref().to_string(), i))
.collect::<Vec<_>>();
SimpleScannerBuilder::new(patterns)
}
pub fn add_scanner_mode(mut self, scanner_mode: ScannerMode) -> Self {
self.scanner_modes.push(scanner_mode);
self
}
pub fn add_scanner_modes(mut self, scanner_modes: &[ScannerMode]) -> Self {
self.scanner_modes.extend_from_slice(scanner_modes);
self
}
pub fn build(self) -> Result<Scanner> {
Ok(Scanner {
inner: SCANNER_CACHE.write().unwrap().get(&self.scanner_modes)?,
})
}
#[allow(dead_code)]
pub fn build_uncached(self) -> Result<Scanner> {
Ok(Scanner {
inner: self.scanner_modes.try_into()?,
})
}
}
#[derive(Debug, Clone)]
pub struct SimpleScannerBuilder {
scanner_mode: ScannerMode,
}
impl SimpleScannerBuilder {
fn new<P>(patterns: P) -> Self
where
P: IntoIterator<Item = Pattern>,
{
Self {
scanner_mode: ScannerMode::new("INITIAL", patterns, vec![]),
}
}
pub fn build(self) -> Result<Scanner> {
Ok(Scanner {
inner: SCANNER_CACHE.write().unwrap().get(&[self.scanner_mode])?,
})
}
}
#[cfg(test)]
mod tests {
#[cfg(not(feature = "regex_automata"))]
use std::path::Path;
use std::{fs, sync::Once};
use crate::{Pattern, ScannerModeSwitcher};
use super::*;
static INIT: Once = Once::new();
const TARGET_FOLDER: &str = concat!(
env!("CARGO_MANIFEST_DIR"),
"/target/testout/test_simple_scanner_builder"
);
fn init() {
INIT.call_once(|| {
let _ = env_logger::builder().is_test(true).try_init();
let _ = fs::remove_dir_all(TARGET_FOLDER);
fs::create_dir_all(TARGET_FOLDER).unwrap();
});
}
#[test]
fn test_scanner_builder_with_single_mode() {
init();
let scanner_mode = ScannerMode::new(
"INITIAL",
vec![
Pattern::new(r"\r\n|\r|\n".to_string(), 1),
Pattern::new(r"(//.*(\r\n|\r|\n))".to_string(), 3),
],
vec![(1, 1), (3, 1)],
);
let scanner = ScannerBuilder::new()
.add_scanner_mode(scanner_mode)
.build()
.unwrap();
assert_eq!(Some("INITIAL"), scanner.inner.mode_name(0));
}
#[test]
fn test_scanner_builder_with_multiple_modes() {
init();
let scanner_modes = vec![
ScannerMode::new(
"INITIAL",
vec![
Pattern::new(r"\r\n|\r|\n".to_string(), 1),
Pattern::new(r"(//.*(\r\n|\r|\n))".to_string(), 3),
],
vec![(1, 1), (3, 1)],
),
ScannerMode::new(
"STRING",
vec![Pattern::new(r#""[^"]*""#.to_string(), 2)],
vec![(2, 0)],
),
];
let scanner = ScannerBuilder::new()
.add_scanner_modes(&scanner_modes)
.build()
.unwrap();
assert_eq!(Some("INITIAL"), scanner.inner.mode_name(0));
}
#[test]
fn test_simple_scanner_builder() {
init();
let scanner = ScannerBuilder::new()
.add_patterns(["\r\n|\r|\n", "//.*(\r\n|\r|\n)"])
.build()
.unwrap();
assert_eq!(Some("INITIAL"), scanner.inner.mode_name(0));
let input = r#"
// Line comment1
// Line comment2
"#;
#[cfg(not(feature = "regex_automata"))]
scanner
.generate_compiled_automata_as_dot("LineComment", Path::new(TARGET_FOLDER))
.expect("Failed to generate compiled automata as dot");
let matches: Vec<_> = scanner.find_iter(input).collect();
assert_eq!(matches.len(), 4);
assert_eq!(matches[0].token_type(), 0);
assert_eq!(matches[1].token_type(), 1);
assert_eq!(
&input[matches[1].span().range()].to_string().trim(),
&"// Line comment1"
);
assert_eq!(matches[2].token_type(), 0);
assert_eq!(matches[3].token_type(), 1);
assert_eq!(
&input[matches[3].span().range()].to_string().trim(),
&"// Line comment2"
);
}
}