1use crate::roles::Role;
36use serde::{Deserialize, Serialize};
37use thiserror::Error;
38
39#[derive(Debug, Error)]
45pub enum MaskingError {
46 #[error("Value does not match expected pattern for {pattern:?}: {reason}")]
48 PatternMismatch {
49 pattern: RedactPattern,
50 reason: String,
51 },
52
53 #[error("Column '{column}' not found in row")]
55 ColumnNotFound { column: String },
56
57 #[error("Row has {row_len} values but {col_len} columns were provided")]
59 ColumnCountMismatch { row_len: usize, col_len: usize },
60
61 #[error("FieldMask column name must not be empty")]
63 EmptyColumn,
64}
65
66pub type Result<T> = std::result::Result<T, MaskingError>;
68
69#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
75pub enum RedactPattern {
76 Ssn,
78 Phone,
80 Email,
82 CreditCard,
84 Custom {
86 replacement: String,
88 },
89}
90
91#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
93pub enum MaskingStrategy {
94 Redact(RedactPattern),
96 Hash,
98 Tokenize,
100 Truncate { max_chars: usize },
102 Null,
104}
105
106#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
112pub struct FieldMask {
113 pub column: String,
115 pub strategy: MaskingStrategy,
117 pub applies_to_roles: Option<Vec<Role>>,
122 pub exempt_roles: Vec<Role>,
124}
125
126impl FieldMask {
127 #[track_caller]
137 pub fn new(column: &str, strategy: MaskingStrategy) -> Self {
138 Self::try_new(column, strategy)
139 .expect("FieldMask::new: empty column — use try_new for fallible construction")
140 }
141
142 pub fn try_new(column: &str, strategy: MaskingStrategy) -> Result<Self> {
148 if column.is_empty() {
149 return Err(MaskingError::EmptyColumn);
150 }
151 Ok(Self {
152 column: column.to_string(),
153 strategy,
154 applies_to_roles: None,
155 exempt_roles: Vec::new(),
156 })
157 }
158
159 pub fn applies_to(mut self, role: Role) -> Self {
164 let roles = self.applies_to_roles.get_or_insert_with(Vec::new);
165 if !roles.contains(&role) {
166 roles.push(role);
167 }
168 self
169 }
170
171 pub fn exempt(mut self, role: Role) -> Self {
173 if !self.exempt_roles.contains(&role) {
174 self.exempt_roles.push(role);
175 }
176 self
177 }
178
179 pub fn should_mask(&self, role: &Role) -> bool {
181 if self.exempt_roles.contains(role) {
183 return false;
184 }
185 match &self.applies_to_roles {
186 None => true,
188 Some(roles) => roles.contains(role),
190 }
191 }
192}
193
194#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
196pub struct MaskingPolicy {
197 masks: Vec<FieldMask>,
199}
200
201impl MaskingPolicy {
202 pub fn new() -> Self {
204 Self { masks: Vec::new() }
205 }
206
207 pub fn with_mask(mut self, mask: FieldMask) -> Self {
209 self.masks.push(mask);
210 self
211 }
212
213 pub fn mask_for_column(&self, column: &str) -> Option<&FieldMask> {
215 self.masks.iter().find(|m| m.column == column)
216 }
217
218 pub fn masks(&self) -> &[FieldMask] {
220 &self.masks
221 }
222}
223
224pub fn apply_mask(value: &[u8], mask: &FieldMask, role: &Role) -> Result<Vec<u8>> {
238 assert!(
240 !mask.column.is_empty(),
241 "FieldMask column must not be empty"
242 );
243
244 if !mask.should_mask(role) {
246 return Ok(value.to_vec());
247 }
248
249 let result = match &mask.strategy {
250 MaskingStrategy::Redact(pattern) => apply_redact(value, pattern)?,
251 MaskingStrategy::Hash => apply_hash(value),
252 MaskingStrategy::Tokenize => apply_tokenize(value),
253 MaskingStrategy::Truncate { max_chars } => apply_truncate(value, *max_chars),
254 MaskingStrategy::Null => apply_null(),
255 };
256
257 debug_assert!(
260 matches!(mask.strategy, MaskingStrategy::Null) || !result.is_empty() || value.is_empty(),
261 "Non-null masking strategy should produce non-empty output for non-empty input"
262 );
263
264 Ok(result)
265}
266
267pub fn apply_masks_to_row(
277 row: &[Vec<u8>],
278 columns: &[String],
279 policy: &MaskingPolicy,
280 role: &Role,
281) -> Result<Vec<Vec<u8>>> {
282 if row.len() != columns.len() {
284 return Err(MaskingError::ColumnCountMismatch {
285 row_len: row.len(),
286 col_len: columns.len(),
287 });
288 }
289
290 let masked_row: Vec<Vec<u8>> = row
291 .iter()
292 .zip(columns.iter())
293 .map(|(value, col_name)| {
294 match policy.mask_for_column(col_name) {
295 Some(mask) => apply_mask(value, mask, role),
296 None => Ok(value.clone()), }
298 })
299 .collect::<Result<Vec<_>>>()?;
300
301 assert_eq!(
303 masked_row.len(),
304 row.len(),
305 "Masked row must have same column count as input"
306 );
307
308 Ok(masked_row)
309}
310
311fn apply_redact(value: &[u8], pattern: &RedactPattern) -> Result<Vec<u8>> {
317 let text = String::from_utf8_lossy(value);
318
319 let redacted = match pattern {
320 RedactPattern::Ssn => redact_ssn(&text, pattern)?,
321 RedactPattern::Phone => redact_phone(&text, pattern)?,
322 RedactPattern::Email => redact_email(&text, pattern)?,
323 RedactPattern::CreditCard => redact_credit_card(&text, pattern)?,
324 RedactPattern::Custom { replacement } => replacement.clone(),
325 };
326
327 Ok(redacted.into_bytes())
328}
329
330fn redact_ssn(text: &str, pattern: &RedactPattern) -> Result<String> {
332 let digits: String = text.chars().filter(char::is_ascii_digit).collect();
334
335 if digits.len() != 9 {
336 return Err(MaskingError::PatternMismatch {
337 pattern: pattern.clone(),
338 reason: format!(
339 "Expected 9 digits for SSN, found {} in '{text}'",
340 digits.len(),
341 ),
342 });
343 }
344
345 let last_four = &digits[5..9];
346
347 debug_assert_eq!(last_four.len(), 4, "SSN last-four must be 4 digits");
349
350 Ok(format!("***-**-{last_four}"))
351}
352
353fn redact_phone(text: &str, pattern: &RedactPattern) -> Result<String> {
355 let digits: String = text.chars().filter(char::is_ascii_digit).collect();
356
357 if digits.len() < 10 {
358 return Err(MaskingError::PatternMismatch {
359 pattern: pattern.clone(),
360 reason: format!(
361 "Expected at least 10 digits for phone, found {} in '{text}'",
362 digits.len(),
363 ),
364 });
365 }
366
367 let last_four = &digits[digits.len() - 4..];
368
369 debug_assert_eq!(last_four.len(), 4, "Phone last-four must be 4 digits");
370
371 Ok(format!("***-***-{last_four}"))
372}
373
374fn redact_email(text: &str, pattern: &RedactPattern) -> Result<String> {
376 let parts: Vec<&str> = text.splitn(2, '@').collect();
377
378 if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
379 return Err(MaskingError::PatternMismatch {
380 pattern: pattern.clone(),
381 reason: format!("Invalid email format: '{text}'"),
382 });
383 }
384
385 let first_char = &parts[0][..1];
386 let domain = parts[1];
387
388 debug_assert!(!domain.is_empty(), "Email domain must not be empty");
390
391 Ok(format!("{first_char}***@{domain}"))
392}
393
394fn redact_credit_card(text: &str, pattern: &RedactPattern) -> Result<String> {
396 let digits: String = text.chars().filter(char::is_ascii_digit).collect();
397
398 if digits.len() < 13 || digits.len() > 19 {
399 return Err(MaskingError::PatternMismatch {
400 pattern: pattern.clone(),
401 reason: format!(
402 "Expected 13-19 digits for credit card, found {} in '{text}'",
403 digits.len(),
404 ),
405 });
406 }
407
408 let last_four = &digits[digits.len() - 4..];
409
410 debug_assert_eq!(last_four.len(), 4, "Credit card last-four must be 4 digits");
411
412 Ok(format!("****-****-****-{last_four}"))
413}
414
415fn apply_hash(value: &[u8]) -> Vec<u8> {
417 use sha2::Digest;
418
419 let hash = sha2::Sha256::digest(value);
420 let hex = bytes_to_hex(&hash);
421
422 debug_assert_eq!(hex.len(), 64, "SHA-256 hex must be 64 characters");
424
425 hex.into_bytes()
426}
427
428fn bytes_to_hex(bytes: &[u8]) -> String {
430 use std::fmt::Write;
431 let mut hex = String::with_capacity(bytes.len() * 2);
432 for byte in bytes {
433 write!(hex, "{byte:02x}").expect("writing to String should not fail");
434 }
435 hex
436}
437
438fn apply_tokenize(value: &[u8]) -> Vec<u8> {
442 let hash = blake3::hash(value);
443 let hex = hash.to_hex();
444 let token = format!("tok_{}", &hex[..16]);
445
446 debug_assert_eq!(token.len(), 20, "Token must be exactly 20 characters");
448
449 token.into_bytes()
450}
451
452fn apply_truncate(value: &[u8], max_chars: usize) -> Vec<u8> {
454 let text = String::from_utf8_lossy(value);
455
456 if text.len() <= max_chars {
457 return value.to_vec();
458 }
459
460 let truncated: String = text.chars().take(max_chars).collect();
461 let result = format!("{truncated}...");
462
463 result.into_bytes()
464}
465
466fn apply_null() -> Vec<u8> {
468 Vec::new()
469}
470
471#[cfg(test)]
476mod tests {
477 use super::*;
478
479 #[test]
480 fn test_redact_ssn() {
481 let mask = FieldMask::new("ssn", MaskingStrategy::Redact(RedactPattern::Ssn))
482 .applies_to(Role::User);
483
484 let value = b"123-45-6789";
485 let masked = apply_mask(value, &mask, &Role::User).unwrap();
486 assert_eq!(masked, b"***-**-6789");
487 }
488
489 #[test]
490 fn test_redact_ssn_unformatted() {
491 let mask = FieldMask::new("ssn", MaskingStrategy::Redact(RedactPattern::Ssn))
492 .applies_to(Role::User);
493
494 let value = b"123456789";
495 let masked = apply_mask(value, &mask, &Role::User).unwrap();
496 assert_eq!(masked, b"***-**-6789");
497 }
498
499 #[test]
500 fn test_redact_ssn_invalid() {
501 let mask = FieldMask::new("ssn", MaskingStrategy::Redact(RedactPattern::Ssn))
502 .applies_to(Role::User);
503
504 let value = b"12345";
505 let result = apply_mask(value, &mask, &Role::User);
506 assert!(result.is_err());
507 }
508
509 #[test]
510 fn test_redact_email() {
511 let mask = FieldMask::new("email", MaskingStrategy::Redact(RedactPattern::Email))
512 .applies_to(Role::User);
513
514 let value = b"john@example.com";
515 let masked = apply_mask(value, &mask, &Role::User).unwrap();
516 assert_eq!(masked, b"j***@example.com");
517 }
518
519 #[test]
520 fn test_redact_email_invalid() {
521 let mask = FieldMask::new("email", MaskingStrategy::Redact(RedactPattern::Email))
522 .applies_to(Role::User);
523
524 let value = b"not-an-email";
525 let result = apply_mask(value, &mask, &Role::User);
526 assert!(result.is_err());
527 }
528
529 #[test]
530 fn test_redact_phone() {
531 let mask = FieldMask::new("phone", MaskingStrategy::Redact(RedactPattern::Phone))
532 .applies_to(Role::User);
533
534 let value = b"555-123-4567";
535 let masked = apply_mask(value, &mask, &Role::User).unwrap();
536 assert_eq!(masked, b"***-***-4567");
537 }
538
539 #[test]
540 fn test_redact_credit_card() {
541 let mask = FieldMask::new("cc", MaskingStrategy::Redact(RedactPattern::CreditCard))
542 .applies_to(Role::User);
543
544 let value = b"1234-5678-9012-3456";
545 let masked = apply_mask(value, &mask, &Role::User).unwrap();
546 assert_eq!(masked, b"****-****-****-3456");
547 }
548
549 #[test]
550 fn test_redact_custom() {
551 let mask = FieldMask::new(
552 "secret",
553 MaskingStrategy::Redact(RedactPattern::Custom {
554 replacement: "[REDACTED]".to_string(),
555 }),
556 )
557 .applies_to(Role::User);
558
559 let value = b"super secret data";
560 let masked = apply_mask(value, &mask, &Role::User).unwrap();
561 assert_eq!(masked, b"[REDACTED]");
562 }
563
564 #[test]
565 fn test_hash_deterministic() {
566 let mask = FieldMask::new("field", MaskingStrategy::Hash).applies_to(Role::User);
567
568 let value = b"sensitive-data";
569
570 let hash1 = apply_mask(value, &mask, &Role::User).unwrap();
571 let hash2 = apply_mask(value, &mask, &Role::User).unwrap();
572
573 assert_eq!(hash1, hash2);
575
576 assert_eq!(hash1.len(), 64);
578
579 let different = apply_mask(b"other-data", &mask, &Role::User).unwrap();
581 assert_ne!(hash1, different);
582 }
583
584 #[test]
585 fn test_tokenize() {
586 let mask = FieldMask::new("field", MaskingStrategy::Tokenize).applies_to(Role::User);
587
588 let value = b"sensitive-data";
589 let token = apply_mask(value, &mask, &Role::User).unwrap();
590 let token_str = String::from_utf8(token.clone()).unwrap();
591
592 assert!(token_str.starts_with("tok_"));
594
595 assert_eq!(token_str.len(), 20);
597
598 let token2 = apply_mask(value, &mask, &Role::User).unwrap();
600 assert_eq!(token, token2);
601 }
602
603 #[test]
604 fn test_truncate() {
605 let mask = FieldMask::new("name", MaskingStrategy::Truncate { max_chars: 3 })
606 .applies_to(Role::User);
607
608 let value = b"Jonathan";
609 let truncated = apply_mask(value, &mask, &Role::User).unwrap();
610 assert_eq!(truncated, b"Jon...");
611 }
612
613 #[test]
614 fn test_truncate_short_value() {
615 let mask = FieldMask::new("name", MaskingStrategy::Truncate { max_chars: 20 })
616 .applies_to(Role::User);
617
618 let value = b"Jo";
619 let truncated = apply_mask(value, &mask, &Role::User).unwrap();
620 assert_eq!(truncated, b"Jo");
622 }
623
624 #[test]
625 fn test_null_mask() {
626 let mask = FieldMask::new("field", MaskingStrategy::Null).applies_to(Role::User);
627
628 let value = b"sensitive-data";
629 let masked = apply_mask(value, &mask, &Role::User).unwrap();
630 assert!(masked.is_empty());
631 }
632
633 #[test]
634 fn test_admin_exempt() {
635 let mask = FieldMask::new("ssn", MaskingStrategy::Redact(RedactPattern::Ssn))
636 .applies_to(Role::User)
637 .applies_to(Role::Analyst)
638 .exempt(Role::Admin);
639
640 let value = b"123-45-6789";
641
642 let admin_result = apply_mask(value, &mask, &Role::Admin).unwrap();
644 assert_eq!(admin_result, value);
645
646 let user_result = apply_mask(value, &mask, &Role::User).unwrap();
648 assert_eq!(user_result, b"***-**-6789");
649
650 let analyst_result = apply_mask(value, &mask, &Role::Analyst).unwrap();
652 assert_eq!(analyst_result, b"***-**-6789");
653 }
654
655 #[test]
656 fn test_role_not_in_applies_to() {
657 let mask = FieldMask::new("ssn", MaskingStrategy::Redact(RedactPattern::Ssn))
658 .applies_to(Role::User);
659
660 let value = b"123-45-6789";
661
662 let result = apply_mask(value, &mask, &Role::Analyst).unwrap();
664 assert_eq!(result, value);
665 }
666
667 #[test]
668 fn test_apply_masks_to_row() {
669 let policy = MaskingPolicy::new()
670 .with_mask(
671 FieldMask::new("name", MaskingStrategy::Truncate { max_chars: 3 })
672 .applies_to(Role::User),
673 )
674 .with_mask(
675 FieldMask::new("ssn", MaskingStrategy::Redact(RedactPattern::Ssn))
676 .applies_to(Role::User),
677 )
678 .with_mask(FieldMask::new("notes", MaskingStrategy::Null).applies_to(Role::User));
679
680 let columns = vec![
681 "name".to_string(),
682 "ssn".to_string(),
683 "age".to_string(), "notes".to_string(),
685 ];
686
687 let row = vec![
688 b"Jonathan".to_vec(),
689 b"123-45-6789".to_vec(),
690 b"42".to_vec(),
691 b"Some private notes".to_vec(),
692 ];
693
694 let masked = apply_masks_to_row(&row, &columns, &policy, &Role::User).unwrap();
695
696 assert_eq!(masked.len(), 4);
697 assert_eq!(masked[0], b"Jon..."); assert_eq!(masked[1], b"***-**-6789"); assert_eq!(masked[2], b"42"); assert!(masked[3].is_empty()); }
702
703 #[test]
704 fn test_apply_masks_to_row_column_mismatch() {
705 let policy = MaskingPolicy::new();
706
707 let columns = vec!["a".to_string(), "b".to_string()];
708 let row = vec![b"1".to_vec()]; let result = apply_masks_to_row(&row, &columns, &policy, &Role::User);
711 assert!(result.is_err());
712 }
713
714 #[test]
715 fn test_masking_policy_lookup() {
716 let policy = MaskingPolicy::new()
717 .with_mask(FieldMask::new("ssn", MaskingStrategy::Hash))
718 .with_mask(FieldMask::new(
719 "email",
720 MaskingStrategy::Redact(RedactPattern::Email),
721 ));
722
723 assert!(policy.mask_for_column("ssn").is_some());
724 assert!(policy.mask_for_column("email").is_some());
725 assert!(policy.mask_for_column("name").is_none());
726 assert_eq!(policy.masks().len(), 2);
727 }
728
729 #[test]
730 fn test_should_mask_empty_applies_to() {
731 let mask = FieldMask::new("field", MaskingStrategy::Null).exempt(Role::Admin);
733
734 assert!(mask.should_mask(&Role::User));
735 assert!(mask.should_mask(&Role::Analyst));
736 assert!(mask.should_mask(&Role::Auditor));
737 assert!(!mask.should_mask(&Role::Admin)); }
739
740 #[test]
741 #[should_panic(expected = "use try_new for fallible construction")]
742 fn test_empty_column_name_panics() {
743 FieldMask::new("", MaskingStrategy::Null);
744 }
745
746 #[test]
747 fn test_try_new_empty_column_returns_err() {
748 let err = FieldMask::try_new("", MaskingStrategy::Null)
749 .expect_err("empty column must be rejected");
750 assert!(matches!(err, MaskingError::EmptyColumn));
751 }
752}