image_anonymizer/ocr/
masking.rs1use crate::ocr::detection::BoundingPoly;
2use anyhow::Result;
3use image::{DynamicImage, GenericImage, Rgba};
4use rayon::prelude::*;
5use serde::{Deserialize, Serialize};
6use tracing::{debug, error, info};
7
8use super::detection::TextAnnotation;
9use super::gemini::analyze_text_sensitivity;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct SensitiveTextCriteria {
13 pub api_keys: bool,
14 pub emails: bool,
15 pub phone_numbers: bool,
16 pub credit_cards: bool,
17 pub personal_names: bool,
18 pub company_names: bool,
19}
20
21impl Default for SensitiveTextCriteria {
28 fn default() -> Self {
29 Self {
30 api_keys: true,
31 emails: true,
32 phone_numbers: true,
33 credit_cards: true,
34 personal_names: true,
35 company_names: true,
36 }
37 }
38}
39
40fn is_sensitive_text(
57 text: &str,
58 criteria: &SensitiveTextCriteria,
59 additional_texts: &[String],
60) -> bool {
61 if additional_texts.iter().any(|t| text.contains(t)) {
63 debug!("Text matched additional mask pattern: {}", text);
64 return true;
65 }
66
67 if text.len() < 3 {
68 return false;
69 }
70
71 if criteria.api_keys
72 && text.len() > 20
73 && text
74 .chars()
75 .all(|c| c.is_alphanumeric() || c == '_' || c == '.' || c == '@')
76 {
77 debug!("Detected potential API key: {}", text);
78 return true;
79 }
80
81 match analyze_text_sensitivity(text) {
83 Ok(is_sensitive) => {
84 if is_sensitive {
85 debug!("Gemini identified sensitive text: {}", text);
86 true
87 } else {
88 false
89 }
90 }
91 Err(err) => {
92 error!(
93 "Error calling Gemini API, defaulting to non-sensitive: {}",
94 err
95 );
96 text.contains('@')
99 || text.contains('-')
100 || (text.chars().filter(|c| c.is_numeric()).count() > 8)
101 }
102 }
103}
104
105pub fn mask_text(
121 image: &mut DynamicImage,
122 annotations: &[TextAnnotation],
123 additional_masks: &[String],
124) -> Result<()> {
125 let criteria = SensitiveTextCriteria::default();
126
127 info!("Masking sensitive text in image");
128
129 let annotations_to_process = if annotations.len() > 1 {
132 &annotations[1..]
133 } else {
134 annotations
135 };
136
137 let sensitive_annotations: Vec<&TextAnnotation> = annotations_to_process
139 .par_iter() .filter(|&annotation| {
141 is_sensitive_text(&annotation.description, &criteria, additional_masks)
142 })
143 .collect();
144
145 let masked_count = sensitive_annotations.len();
146
147 for annotation in sensitive_annotations {
150 mask_annotation(image, annotation)?;
151 }
152
153 info!("Masked {} sensitive text regions", masked_count);
154 Ok(())
155}
156
157fn mask_annotation(image: &mut DynamicImage, annotation: &TextAnnotation) -> Result<()> {
172 let empty_poly = BoundingPoly { vertices: vec![] };
173 let vertices = &annotation
174 .bounding_poly
175 .as_ref()
176 .unwrap_or(&empty_poly)
177 .vertices;
178
179 if vertices.is_empty() {
180 debug!("Skipping annotation with empty bounding polygon");
181 return Ok(());
182 }
183
184 let min_x = vertices.iter().map(|v| v.x).min().unwrap_or(0).max(0) as u32;
185 let min_y = vertices.iter().map(|v| v.y).min().unwrap_or(0).max(0) as u32;
186 let max_x = vertices.iter().map(|v| v.x).max().unwrap_or(0).max(0) as u32;
187 let max_y = vertices.iter().map(|v| v.y).max().unwrap_or(0).max(0) as u32;
188
189 let (width, height) = (image.width(), image.height());
190
191 let max_x = max_x.min(width - 1);
192 let max_y = max_y.min(height - 1);
193
194 let box_width = max_x.saturating_sub(min_x);
195 let box_height = max_y.saturating_sub(min_y);
196
197 if box_width > width / 2 || box_height > height / 2 {
198 debug!(
199 "Skipping oversized bounding box: {}x{}",
200 box_width, box_height
201 );
202 return Ok(());
203 }
204
205 let black = Rgba([0, 0, 0, 128]);
206
207 for y in min_y..=max_y {
208 for x in min_x..=max_x {
209 if x < width && y < height {
210 image.put_pixel(x, y, black);
211 }
212 }
213 }
214
215 Ok(())
216}