use cloakrs_core::{PiiEntity, Result, Scanner};
use serde::{Deserialize, Serialize};
use std::io::BufRead;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct LineScanResult {
pub line_number: usize,
pub findings: Vec<PiiEntity>,
pub masked_line: Option<String>,
}
pub fn scan_text(text: &str, scanner: &Scanner) -> Result<Vec<LineScanResult>> {
scan_lines(std::io::Cursor::new(text), scanner)
}
pub fn scan_lines<R>(reader: R, scanner: &Scanner) -> Result<Vec<LineScanResult>>
where
R: BufRead,
{
let mut results = Vec::new();
for (index, line) in reader.lines().enumerate() {
let line = line?;
let scan = scanner.scan(&line)?;
results.push(LineScanResult {
line_number: index + 1,
findings: scan.findings,
masked_line: scan.masked_text,
});
}
Ok(results)
}
#[cfg(test)]
mod tests {
use super::*;
use cloakrs_core::Locale;
use cloakrs_patterns::default_registry;
fn scanner() -> Scanner {
default_registry()
.into_scanner_builder()
.locale(Locale::US)
.build()
.unwrap()
}
#[test]
fn test_scan_text_multiline_reports_line_numbers() {
let results =
scan_text("email jane@example.com\nplain\nssn 123-45-6789", &scanner()).unwrap();
assert_eq!(results[0].line_number, 1);
assert_eq!(results[1].line_number, 2);
assert_eq!(results[2].line_number, 3);
assert_eq!(results[0].findings.len(), 1);
assert_eq!(results[2].findings.len(), 1);
}
#[test]
fn test_scan_text_empty_lines_are_preserved() {
let results = scan_text("\nemail jane@example.com", &scanner()).unwrap();
assert_eq!(results.len(), 2);
assert!(results[0].findings.is_empty());
}
#[test]
fn test_scan_text_long_line_detects_finding() {
let mut line = "a".repeat(12_000);
line.push_str(" email jane@example.com");
let results = scan_text(&line, &scanner()).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].findings.len(), 1);
}
}