use super::strategy::{RedactionResult, RedactionStrategy, RedactionTarget};
use crate::domain::{
PatternMatcher, PhoneNumberMatcher, VerizonAccountMatcher, VerizonCallDetailsMatcher,
};
use crate::error::{RedactorError, RedactorResult};
use std::path::Path;
use mupdf::pdf::{PdfAnnotationType, PdfDocument, PdfPage};
use mupdf::Rect as MuRect;
#[derive(Debug, Clone, Default)]
pub struct SecureRedactionStrategy {
max_hits: u32,
}
impl SecureRedactionStrategy {
pub fn new() -> Self {
Self { max_hits: 100 }
}
pub fn with_max_hits(mut self, max_hits: u32) -> Self {
self.max_hits = max_hits;
self
}
fn resolve_patterns(
&self,
input: &Path,
targets: &[RedactionTarget],
) -> RedactorResult<Vec<String>> {
let mut patterns = Vec::new();
for target in targets {
match target {
RedactionTarget::Literal(text) => {
patterns.push(text.clone());
}
RedactionTarget::PhoneNumbers => {
let text = self.extract_text(input)?;
let matcher = PhoneNumberMatcher::new();
for phone_str in matcher.extract_all(&text) {
if let Some(normalized) = matcher.normalize(phone_str) {
patterns.extend(matcher.generate_variants(&normalized));
}
}
}
RedactionTarget::VerizonAccount => {
let text = self.extract_text(input)?;
if let Some(account) = VerizonAccountMatcher::find_account_number(&text) {
let matcher = VerizonAccountMatcher::new();
patterns.extend(matcher.generate_variants(&account));
} else {
return Err(RedactorError::PatternNotFound {
pattern: "Verizon account number".to_string(),
context: "document text".to_string(),
});
}
}
RedactionTarget::VerizonCallDetails => {
let text = self.extract_text(input)?;
let matcher = VerizonCallDetailsMatcher::new();
if VerizonCallDetailsMatcher::has_call_detail_table(&text) {
let details = matcher.extract_all_call_details(&text);
patterns.extend(details);
}
}
RedactionTarget::Regex(pattern) => {
let text = self.extract_text(input)?;
let re =
regex::Regex::new(pattern).map_err(|e| RedactorError::InvalidInput {
parameter: "regex_pattern".to_string(),
reason: format!("Invalid regex pattern: {}", e),
})?;
for capture in re.find_iter(&text) {
let matched_text = capture.as_str().to_string();
if !matched_text.is_empty() {
patterns.push(matched_text);
}
}
}
}
}
Ok(patterns)
}
fn apply_mupdf_redactions(
&self,
pdf_doc: &PdfDocument,
patterns: &[String],
) -> RedactorResult<RedactionResult> {
let page_count = pdf_doc
.page_count()
.map_err(|e| RedactorError::BackendError {
backend: "MuPDF".to_string(),
message: format!("Failed to get page count: {}", e),
source: Some(Box::new(e)),
})?;
let mut result = RedactionResult {
pages_processed: page_count as usize,
secure: true,
..Default::default()
};
let redact_all = patterns.len() == 1 && patterns[0] == ".+";
for page_idx in 0..page_count {
let page = pdf_doc
.load_page(page_idx)
.map_err(|e| RedactorError::PdfProcessing {
message: format!("Failed to load page {}", page_idx + 1),
page: Some(page_idx as usize + 1),
source: Some(Box::new(e)),
})?;
let mut pdf_page = match PdfPage::try_from(page.clone()) {
Ok(p) => p,
Err(_) => continue, };
let mut page_redactions = 0;
if redact_all {
let bounds = page.bounds().map_err(|e| RedactorError::BackendError {
backend: "MuPDF".to_string(),
message: format!("Failed to get bounds for page {}", page_idx + 1),
source: Some(Box::new(e)),
})?;
let annot = pdf_page
.create_annotation(PdfAnnotationType::Redact)
.map_err(|e| RedactorError::PdfProcessing {
message: "Failed to create redaction annotation".to_string(),
page: Some(page_idx as usize + 1),
source: Some(Box::new(e)),
})?;
unsafe {
ffi::set_annotation_rect(&annot, bounds);
}
page_redactions += 1;
} else {
for pattern in patterns {
let hits = page.search(pattern, self.max_hits).map_err(|e| {
RedactorError::BackendError {
backend: "MuPDF".to_string(),
message: format!("Search failed for pattern: {}", pattern),
source: Some(Box::new(e)),
}
})?;
for quad in hits {
let annot = pdf_page
.create_annotation(PdfAnnotationType::Redact)
.map_err(|e| RedactorError::PdfProcessing {
message: "Failed to create redaction annotation".to_string(),
page: Some(page_idx as usize + 1),
source: Some(Box::new(e)),
})?;
let rect = MuRect {
x0: quad.ul.x.min(quad.ll.x).min(quad.ur.x).min(quad.lr.x),
y0: quad.ul.y.min(quad.ll.y).min(quad.ur.y).min(quad.lr.y),
x1: quad.ul.x.max(quad.ll.x).max(quad.ur.x).max(quad.lr.x),
y1: quad.ul.y.max(quad.ll.y).max(quad.ur.y).max(quad.lr.y),
};
unsafe {
ffi::set_annotation_rect(&annot, rect);
}
page_redactions += 1;
}
}
}
if page_redactions > 0 {
pdf_page
.redact()
.map_err(|e| RedactorError::PdfProcessing {
message: format!("Failed to apply redactions on page {}", page_idx + 1),
page: Some(page_idx as usize + 1),
source: Some(Box::new(e)),
})?;
result.instances_redacted += page_redactions;
result.pages_modified += 1;
}
}
Ok(result)
}
}
impl RedactionStrategy for SecureRedactionStrategy {
fn redact(
&self,
input: &Path,
output: &Path,
targets: &[RedactionTarget],
) -> RedactorResult<RedactionResult> {
let patterns = self.resolve_patterns(input, targets)?;
if patterns.is_empty() {
std::fs::copy(input, output).map_err(|e| RedactorError::Io {
path: output.to_path_buf(),
source: e,
})?;
return Ok(RedactionResult::none());
}
let input_str = input.to_str().ok_or_else(|| RedactorError::InvalidInput {
parameter: "input".to_string(),
reason: "Path contains invalid UTF-8".to_string(),
})?;
let pdf_doc = PdfDocument::open(input_str).map_err(|e| RedactorError::PdfProcessing {
message: "Failed to open PDF with MuPDF".to_string(),
page: None,
source: Some(Box::new(e)),
})?;
let result = self.apply_mupdf_redactions(&pdf_doc, &patterns)?;
if result.has_redactions() {
let output_str = output.to_str().ok_or_else(|| RedactorError::InvalidInput {
parameter: "output".to_string(),
reason: "Path contains invalid UTF-8".to_string(),
})?;
pdf_doc
.save(output_str)
.map_err(|e| RedactorError::PdfProcessing {
message: "Failed to save redacted PDF".to_string(),
page: None,
source: Some(Box::new(e)),
})?;
} else {
std::fs::copy(input, output).map_err(|e| RedactorError::Io {
path: output.to_path_buf(),
source: e,
})?;
}
Ok(result)
}
fn extract_text(&self, input: &Path) -> RedactorResult<String> {
let bytes = std::fs::read(input).map_err(|e| RedactorError::Io {
path: input.to_path_buf(),
source: e,
})?;
pdf_extract::extract_text_from_mem(&bytes).map_err(|e| RedactorError::TextExtraction {
path: input.to_path_buf(),
reason: e.to_string(),
})
}
fn name(&self) -> &str {
"SecureRedaction"
}
fn is_secure(&self) -> bool {
true
}
}
mod ffi {
use mupdf::pdf::PdfAnnotation;
use mupdf::Rect;
pub unsafe fn set_annotation_rect(annot: &PdfAnnotation, rect: Rect) {
#[repr(C)]
struct PdfAnnotRaw {
inner: *mut mupdf_sys::pdf_annot,
}
let annot_raw = std::mem::transmute::<&PdfAnnotation, &PdfAnnotRaw>(annot);
let ctx = mupdf_sys::mupdf_new_base_context();
if !ctx.is_null() {
let fz_rect = mupdf_sys::fz_rect {
x0: rect.x0,
y0: rect.y0,
x1: rect.x1,
y1: rect.y1,
};
mupdf_sys::pdf_set_annot_rect(ctx, annot_raw.inner, fz_rect);
mupdf_sys::mupdf_drop_base_context(ctx);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strategy_creation() {
let strategy = SecureRedactionStrategy::new();
assert_eq!(strategy.name(), "SecureRedaction");
assert!(strategy.is_secure());
}
#[test]
fn test_max_hits_configuration() {
let strategy = SecureRedactionStrategy::new().with_max_hits(50);
assert_eq!(strategy.max_hits, 50);
}
}