adaptive_pipeline/infrastructure/services/
pii_masking.rs1#![allow(unused_variables)]
10use adaptive_pipeline_domain::entities::{Operation, ProcessingContext, StageConfiguration, StagePosition, StageType};
69use adaptive_pipeline_domain::services::{FromParameters, StageService};
70use adaptive_pipeline_domain::value_objects::file_chunk::FileChunk;
71use adaptive_pipeline_domain::PipelineError;
72use once_cell::sync::Lazy;
73use regex::Regex;
74use std::collections::HashMap;
75
76static EMAIL_REGEX: Lazy<Regex> = Lazy::new(|| {
84 Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
85 .unwrap_or_else(|_| Regex::new(r"[^\s\S]").unwrap_or_else(|_| unsafe { std::hint::unreachable_unchecked() }))
86});
87
88static SSN_REGEX: Lazy<Regex> = Lazy::new(|| {
89 Regex::new(r"\b\d{3}-\d{2}-\d{4}\b")
90 .unwrap_or_else(|_| Regex::new(r"[^\s\S]").unwrap_or_else(|_| unsafe { std::hint::unreachable_unchecked() }))
91});
92
93static PHONE_REGEX: Lazy<Regex> = Lazy::new(|| {
94 Regex::new(r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b")
95 .unwrap_or_else(|_| Regex::new(r"[^\s\S]").unwrap_or_else(|_| unsafe { std::hint::unreachable_unchecked() }))
96});
97
98static CREDIT_CARD_REGEX: Lazy<Regex> = Lazy::new(|| {
99 Regex::new(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b")
100 .unwrap_or_else(|_| Regex::new(r"[^\s\S]").unwrap_or_else(|_| unsafe { std::hint::unreachable_unchecked() }))
101});
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
105pub enum PiiPattern {
106 Email,
108 Ssn,
110 Phone,
112 CreditCard,
114}
115
116impl PiiPattern {
117 fn all() -> Vec<PiiPattern> {
119 vec![
120 PiiPattern::Email,
121 PiiPattern::Ssn,
122 PiiPattern::Phone,
123 PiiPattern::CreditCard,
124 ]
125 }
126
127 fn regex(&self) -> &Regex {
129 match self {
130 PiiPattern::Email => &EMAIL_REGEX,
131 PiiPattern::Ssn => &SSN_REGEX,
132 PiiPattern::Phone => &PHONE_REGEX,
133 PiiPattern::CreditCard => &CREDIT_CARD_REGEX,
134 }
135 }
136
137 fn mask(&self, text: &str, mask_char: char, preserve_format: bool) -> String {
139 if preserve_format {
140 match self {
141 PiiPattern::Email => {
142 if let Some(at_pos) = text.find('@') {
144 let (local, domain_with_at) = text.split_at(at_pos);
145 let domain = &domain_with_at[1..]; if let Some(dot_pos) = domain.rfind('.') {
147 let (domain_name, tld) = domain.split_at(dot_pos);
148 format!(
149 "{}@{}{}",
150 mask_char.to_string().repeat(local.len().min(3)),
151 mask_char.to_string().repeat(domain_name.len().min(3)),
152 tld
153 )
154 } else {
155 mask_char.to_string().repeat(text.len())
156 }
157 } else {
158 mask_char.to_string().repeat(text.len())
159 }
160 }
161 PiiPattern::Ssn => {
162 text.chars().map(|c| if c == '-' { '-' } else { mask_char }).collect()
164 }
165 PiiPattern::Phone => {
166 text.chars()
168 .map(|c| if c.is_ascii_digit() { mask_char } else { c })
169 .collect()
170 }
171 PiiPattern::CreditCard => {
172 text.chars()
174 .map(|c| if c.is_ascii_digit() { mask_char } else { c })
175 .collect()
176 }
177 }
178 } else {
179 mask_char.to_string().repeat(text.len())
181 }
182 }
183}
184
185#[derive(Debug, Clone, PartialEq, Eq)]
187pub struct PiiMaskingConfig {
188 pub patterns: Vec<PiiPattern>,
190 pub mask_char: char,
192 pub preserve_format: bool,
194}
195
196impl Default for PiiMaskingConfig {
197 fn default() -> Self {
198 Self {
199 patterns: PiiPattern::all(),
200 mask_char: '*',
201 preserve_format: true,
202 }
203 }
204}
205
206impl FromParameters for PiiMaskingConfig {
211 fn from_parameters(params: &HashMap<String, String>) -> Result<Self, PipelineError> {
212 let patterns = params
214 .get("patterns")
215 .map(|s| {
216 if s.to_lowercase() == "all" {
217 Ok(PiiPattern::all())
218 } else {
219 s.split(',')
220 .map(|p| match p.trim().to_lowercase().as_str() {
221 "email" => Ok(PiiPattern::Email),
222 "ssn" => Ok(PiiPattern::Ssn),
223 "phone" => Ok(PiiPattern::Phone),
224 "credit_card" | "creditcard" => Ok(PiiPattern::CreditCard),
225 other => Err(PipelineError::InvalidParameter(format!(
226 "Unknown PII pattern: {}. Valid: email, ssn, phone, credit_card, all",
227 other
228 ))),
229 })
230 .collect::<Result<Vec<_>, _>>()
231 }
232 })
233 .transpose()?
234 .unwrap_or_else(PiiPattern::all);
235
236 let mask_char = params.get("mask_char").and_then(|s| s.chars().next()).unwrap_or('*');
238
239 let preserve_format = params
241 .get("preserve_format")
242 .map(|s| s.to_lowercase() == "true")
243 .unwrap_or(true);
244
245 Ok(Self {
246 patterns,
247 mask_char,
248 preserve_format,
249 })
250 }
251}
252
253pub struct PiiMaskingService;
270
271impl PiiMaskingService {
272 pub fn new() -> Self {
274 Self
275 }
276
277 fn mask_data(&self, data: &[u8], config: &PiiMaskingConfig) -> Result<Vec<u8>, PipelineError> {
279 let text = String::from_utf8_lossy(data);
281 let mut masked = text.to_string();
282
283 for pattern in &config.patterns {
285 masked = pattern
286 .regex()
287 .replace_all(&masked, |caps: ®ex::Captures| {
288 pattern.mask(&caps[0], config.mask_char, config.preserve_format)
289 })
290 .to_string();
291 }
292
293 Ok(masked.into_bytes())
294 }
295}
296
297impl Default for PiiMaskingService {
298 fn default() -> Self {
299 Self::new()
300 }
301}
302
303impl StageService for PiiMaskingService {
312 fn process_chunk(
313 &self,
314 chunk: FileChunk,
315 config: &StageConfiguration,
316 context: &mut ProcessingContext,
317 ) -> Result<FileChunk, PipelineError> {
318 let pii_config = PiiMaskingConfig::from_parameters(&config.parameters)?;
320
321 let input_size = chunk.data().len();
322
323 let processed_data = match config.operation {
325 Operation::Forward => {
326 tracing::debug!(
328 chunk_seq = chunk.sequence_number(),
329 patterns = ?pii_config.patterns,
330 "Masking PII in chunk"
331 );
332 self.mask_data(chunk.data(), &pii_config)?
333 }
334 Operation::Reverse => {
335 return Err(PipelineError::ProcessingFailed(
337 "PII masking is not reversible - cannot recover original data".to_string(),
338 ));
339 }
340 };
341
342 let output_size = processed_data.len();
343
344 tracing::trace!(
346 operation = %config.operation,
347 input_bytes = input_size,
348 output_bytes = output_size,
349 "PII masking complete"
350 );
351
352 let processed_chunk = chunk.with_data(processed_data)?;
354
355 Ok(processed_chunk)
356 }
357
358 fn position(&self) -> StagePosition {
359 StagePosition::PreBinary
362 }
363
364 fn is_reversible(&self) -> bool {
365 false
367 }
368
369 fn stage_type(&self) -> StageType {
370 StageType::Transform
372 }
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378
379 #[test]
380 fn test_from_parameters_default() {
381 let params = HashMap::new();
382 let config = PiiMaskingConfig::from_parameters(¶ms).unwrap();
383 assert_eq!(config.patterns.len(), 4); assert_eq!(config.mask_char, '*');
385 assert!(config.preserve_format);
386 }
387
388 #[test]
389 fn test_from_parameters_email_only() {
390 let mut params = HashMap::new();
391 params.insert("patterns".to_string(), "email".to_string());
392 let config = PiiMaskingConfig::from_parameters(¶ms).unwrap();
393 assert_eq!(config.patterns, vec![PiiPattern::Email]);
394 }
395
396 #[test]
397 fn test_from_parameters_multiple_patterns() {
398 let mut params = HashMap::new();
399 params.insert("patterns".to_string(), "email,ssn,phone".to_string());
400 let config = PiiMaskingConfig::from_parameters(¶ms).unwrap();
401 assert_eq!(
402 config.patterns,
403 vec![PiiPattern::Email, PiiPattern::Ssn, PiiPattern::Phone]
404 );
405 }
406
407 #[test]
408 fn test_from_parameters_custom_mask_char() {
409 let mut params = HashMap::new();
410 params.insert("mask_char".to_string(), "#".to_string());
411 let config = PiiMaskingConfig::from_parameters(¶ms).unwrap();
412 assert_eq!(config.mask_char, '#');
413 }
414
415 #[test]
416 fn test_from_parameters_invalid_pattern() {
417 let mut params = HashMap::new();
418 params.insert("patterns".to_string(), "invalid".to_string());
419 let result = PiiMaskingConfig::from_parameters(¶ms);
420 assert!(result.is_err());
421 }
422
423 #[test]
424 fn test_mask_email() {
425 let service = PiiMaskingService::new();
426 let config = PiiMaskingConfig {
427 patterns: vec![PiiPattern::Email],
428 mask_char: '*',
429 preserve_format: true,
430 };
431
432 let data = b"Contact: user@example.com for more info";
433 let masked = service.mask_data(data, &config).unwrap();
434 let result = String::from_utf8_lossy(&masked);
435
436 assert!(result.contains("***@***.com"));
437 assert!(!result.contains("user@example.com"));
438 }
439
440 #[test]
441 fn test_mask_ssn() {
442 let service = PiiMaskingService::new();
443 let config = PiiMaskingConfig {
444 patterns: vec![PiiPattern::Ssn],
445 mask_char: '*',
446 preserve_format: true,
447 };
448
449 let data = b"SSN: 123-45-6789";
450 let masked = service.mask_data(data, &config).unwrap();
451 let result = String::from_utf8_lossy(&masked);
452
453 assert!(result.contains("***-**-****"));
454 assert!(!result.contains("123-45-6789"));
455 }
456
457 #[test]
458 fn test_mask_phone() {
459 let service = PiiMaskingService::new();
460 let config = PiiMaskingConfig {
461 patterns: vec![PiiPattern::Phone],
462 mask_char: '*',
463 preserve_format: true,
464 };
465
466 let data = b"Call: 555-123-4567";
467 let masked = service.mask_data(data, &config).unwrap();
468 let result = String::from_utf8_lossy(&masked);
469
470 assert!(result.contains("***-***-****"));
471 assert!(!result.contains("555-123-4567"));
472 }
473
474 #[test]
475 fn test_mask_credit_card() {
476 let service = PiiMaskingService::new();
477 let config = PiiMaskingConfig {
478 patterns: vec![PiiPattern::CreditCard],
479 mask_char: '*',
480 preserve_format: true,
481 };
482
483 let data = b"Card: 1234-5678-9012-3456";
484 let masked = service.mask_data(data, &config).unwrap();
485 let result = String::from_utf8_lossy(&masked);
486
487 assert!(result.contains("****-****-****-****"));
488 assert!(!result.contains("1234-5678-9012-3456"));
489 }
490
491 #[test]
492 fn test_reverse_operation_fails() {
493 use adaptive_pipeline_domain::entities::pipeline_stage::StageConfiguration;
494 use adaptive_pipeline_domain::entities::{SecurityContext, SecurityLevel};
495
496
497 let service = PiiMaskingService::new();
498 let chunk = FileChunk::new(0, 0, vec![0u8; 100], false).unwrap();
499 let config = StageConfiguration {
500 algorithm: "pii_masking".to_string(),
501 operation: Operation::Reverse,
502 parameters: HashMap::new(),
503 parallel_processing: false,
504 chunk_size: None,
505 };
506 let mut context = ProcessingContext::new(
507 100,
508 SecurityContext::new(None, SecurityLevel::Public),
509 );
510
511 let result = service.process_chunk(chunk, &config, &mut context);
512 assert!(result.is_err());
513 assert!(result.unwrap_err().to_string().contains("not reversible"));
514 }
515
516 #[test]
517 fn test_stage_service_properties() {
518 let service = PiiMaskingService::new();
519
520 assert_eq!(service.position(), StagePosition::PreBinary);
521 assert!(!service.is_reversible());
522 assert_eq!(service.stage_type(), StageType::Transform);
523 }
524}