1use serde::{Deserialize, Serialize};
2use std::ops::Range;
3
4#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
5#[serde(rename_all = "snake_case")]
6pub enum FindingKind {
7 Secret,
8 Domain,
9 Url,
10 Email,
11 Ip,
12 Cidr,
13 Phone,
14 Person,
15 Organization,
16 CustomString,
17 CustomFile,
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
21#[serde(default)]
22pub struct RedactionRules {
23 pub secret: bool,
24 pub domain: bool,
25 pub url: bool,
26 pub email: bool,
27 pub ip: bool,
28 pub cidr: bool,
29 pub phone: bool,
30 pub person: bool,
31 pub organization: bool,
32}
33
34impl Default for RedactionRules {
35 fn default() -> Self {
36 Self {
37 secret: false,
38 domain: false,
39 url: false,
40 email: true,
41 ip: true,
42 cidr: true,
43 phone: false,
44 person: false,
45 organization: false,
46 }
47 }
48}
49
50impl RedactionRules {
51 pub fn with_kind(mut self, kind: FindingKind, enabled: bool) -> Self {
52 self.set_kind(kind, enabled);
53 self
54 }
55
56 pub fn set_kind(&mut self, kind: FindingKind, enabled: bool) {
57 match kind {
58 FindingKind::Secret => self.secret = enabled,
59 FindingKind::Domain => self.domain = enabled,
60 FindingKind::Url => self.url = enabled,
61 FindingKind::Email => self.email = enabled,
62 FindingKind::Ip => self.ip = enabled,
63 FindingKind::Cidr => self.cidr = enabled,
64 FindingKind::Phone => self.phone = enabled,
65 FindingKind::Person => self.person = enabled,
66 FindingKind::Organization => self.organization = enabled,
67 FindingKind::CustomString | FindingKind::CustomFile => {}
68 }
69 }
70
71 pub fn is_enabled(self, kind: FindingKind) -> bool {
72 match kind {
73 FindingKind::Secret => self.secret,
74 FindingKind::Domain => self.domain,
75 FindingKind::Url => self.url,
76 FindingKind::Email => self.email,
77 FindingKind::Ip => self.ip,
78 FindingKind::Cidr => self.cidr,
79 FindingKind::Phone => self.phone,
80 FindingKind::Person => self.person,
81 FindingKind::Organization => self.organization,
82 FindingKind::CustomString | FindingKind::CustomFile => true,
83 }
84 }
85}
86
87#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
88#[serde(rename_all = "snake_case")]
89pub enum CustomStringMatch {
90 Exact,
91 Contains,
92 Regex,
93}
94
95impl Default for CustomStringMatch {
96 fn default() -> Self {
97 Self::Exact
98 }
99}
100
101#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
102#[serde(rename_all = "snake_case")]
103pub enum CustomStringScope {
104 Text,
105 Line,
106}
107
108impl Default for CustomStringScope {
109 fn default() -> Self {
110 Self::Text
111 }
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
115pub struct CustomStringRule {
116 pub pattern: String,
117 #[serde(default)]
118 pub match_type: CustomStringMatch,
119 #[serde(default)]
120 pub scope: CustomStringScope,
121}
122
123#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
124pub struct CustomFileRule {
125 pub path: String,
126}
127
128#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
129pub struct RedactionPolicy {
130 #[serde(flatten)]
131 pub rules: RedactionRules,
132 #[serde(default)]
133 pub custom_strings: Vec<CustomStringRule>,
134 #[serde(default)]
135 pub custom_files: Vec<CustomFileRule>,
136}
137
138impl Default for RedactionPolicy {
139 fn default() -> Self {
140 Self {
141 rules: RedactionRules::default(),
142 custom_strings: Vec::new(),
143 custom_files: Vec::new(),
144 }
145 }
146}
147
148impl RedactionPolicy {
149 pub fn with_kind(mut self, kind: FindingKind, enabled: bool) -> Self {
150 self.rules.set_kind(kind, enabled);
151 self
152 }
153
154 pub fn with_custom_string(mut self, rule: CustomStringRule) -> Self {
155 self.custom_strings.push(rule);
156 self
157 }
158
159 pub fn with_custom_file(mut self, rule: CustomFileRule) -> Self {
160 self.custom_files.push(rule);
161 self
162 }
163
164 pub fn with_custom_strings<I: IntoIterator<Item = CustomStringRule>>(mut self, rules: I) -> Self {
165 self.custom_strings.extend(rules);
166 self
167 }
168
169 pub fn with_custom_files<I: IntoIterator<Item = CustomFileRule>>(mut self, rules: I) -> Self {
170 self.custom_files.extend(rules);
171 self
172 }
173
174 pub fn validate(&self) -> Result<(), String> {
175 for (index, rule) in self.custom_strings.iter().enumerate() {
176 if rule.pattern.is_empty() {
177 return Err(format!(
178 "custom_strings[{index}]: pattern must not be empty"
179 ));
180 }
181 if matches!(rule.match_type, CustomStringMatch::Regex) {
182 if regex::Regex::new(&rule.pattern).is_err() {
183 return Err(format!(
184 "custom_strings[{index}]: invalid regex pattern: {}",
185 rule.pattern
186 ));
187 }
188 }
189 }
190 for (index, rule) in self.custom_files.iter().enumerate() {
191 if rule.path.is_empty() {
192 return Err(format!(
193 "custom_files[{index}]: path must not be empty"
194 ));
195 }
196 }
197 Ok(())
198 }
199}
200
201impl From<RedactionRules> for RedactionPolicy {
202 fn from(rules: RedactionRules) -> Self {
203 Self {
204 rules,
205 custom_strings: Vec::new(),
206 custom_files: Vec::new(),
207 }
208 }
209}
210
211#[derive(Debug, Clone, Copy, PartialEq, Eq)]
212struct FindingKindMeta {
213 label: &'static str,
214 token_label: &'static str,
215 priority: u8,
216 containment_priority: u8,
217}
218
219impl FindingKind {
220 const fn meta(self) -> FindingKindMeta {
221 match self {
222 Self::Secret => FindingKindMeta {
223 label: "secret",
224 token_label: "SECRET",
225 priority: 100,
226 containment_priority: 75,
227 },
228 Self::Domain => FindingKindMeta {
229 label: "domain",
230 token_label: "DOMAIN",
231 priority: 70,
232 containment_priority: 80,
233 },
234 Self::Url => FindingKindMeta {
235 label: "url",
236 token_label: "URL",
237 priority: 90,
238 containment_priority: 100,
239 },
240 Self::Email => FindingKindMeta {
241 label: "email",
242 token_label: "EMAIL",
243 priority: 85,
244 containment_priority: 95,
245 },
246 Self::Ip => FindingKindMeta {
247 label: "ip",
248 token_label: "IP",
249 priority: 75,
250 containment_priority: 85,
251 },
252 Self::Cidr => FindingKindMeta {
253 label: "cidr",
254 token_label: "CIDR",
255 priority: 80,
256 containment_priority: 90,
257 },
258 Self::Phone => FindingKindMeta {
259 label: "phone",
260 token_label: "PHONE",
261 priority: 60,
262 containment_priority: 70,
263 },
264 Self::Person => FindingKindMeta {
265 label: "person",
266 token_label: "PERSON",
267 priority: 50,
268 containment_priority: 50,
269 },
270 Self::Organization => FindingKindMeta {
271 label: "organization",
272 token_label: "ORG",
273 priority: 45,
274 containment_priority: 45,
275 },
276 Self::CustomString => FindingKindMeta {
277 label: "custom_string",
278 token_label: "CSTR",
279 priority: 95,
280 containment_priority: 40,
281 },
282 Self::CustomFile => FindingKindMeta {
283 label: "custom_file",
284 token_label: "FILE",
285 priority: 99,
286 containment_priority: 99,
287 },
288 }
289 }
290
291 pub fn label(self) -> &'static str {
292 self.meta().label
293 }
294
295 pub fn token_label(self) -> &'static str {
296 self.meta().token_label
297 }
298
299 pub fn priority(self) -> u8 {
300 self.meta().priority
301 }
302
303 pub fn containment_priority(self) -> u8 {
304 self.meta().containment_priority
305 }
306}
307
308#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
309#[serde(rename_all = "snake_case")]
310pub enum FindingSource {
311 Rule,
312 Llm,
313}
314
315impl FindingSource {
316 pub fn bonus(self) -> u8 {
317 match self {
318 Self::Rule => 10,
319 Self::Llm => 0,
320 }
321 }
322}
323
324#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
325pub struct Finding {
326 pub kind: FindingKind,
327 pub source: FindingSource,
328 pub match_text: String,
329 pub normalized_key: String,
330 pub confidence: u8,
331 pub start: usize,
332 pub end: usize,
333}
334
335impl Finding {
336 pub fn range(&self) -> Range<usize> {
337 self.start..self.end
338 }
339
340 pub fn score(&self) -> u16 {
341 u16::from(self.kind.priority())
342 + u16::from(self.source.bonus())
343 + u16::from(self.confidence)
344 }
345}
346
347#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
348#[serde(rename_all = "snake_case")]
349pub enum ReplacementStrategy {
350 StructuredToken,
351}
352
353#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
354pub struct AppliedReplacement {
355 pub kind: FindingKind,
356 #[serde(skip_serializing)]
357 pub original: String,
358 pub replacement: String,
359 pub strategy: ReplacementStrategy,
360 pub display_value: Option<String>,
361}
362
363#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
364pub struct RedactionStats {
365 pub total_findings: usize,
366 pub applied_replacements: usize,
367 pub dropped_findings: usize,
368 pub llm_configured: bool,
369 pub llm_request_failed: bool,
370 pub llm_candidates_accepted: usize,
371 pub llm_candidates_rejected: usize,
372 pub llm_error: Option<String>,
373}
374
375#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
376pub struct RedactionResult {
377 pub redacted_text: String,
378 pub findings: Vec<Finding>,
379 pub applied_replacements: Vec<AppliedReplacement>,
380 pub stats: RedactionStats,
381}
382
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct RedactionArtifact {
385 pub result: RedactionResult,
386 pub session: RedactionSession,
387}
388
389#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
390pub struct RestorationEntry {
391 pub token: String,
392 pub kind: FindingKind,
393 pub original: String,
394 pub replacement_hint: Option<String>,
395 pub occurrences: usize,
396}
397
398#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
399pub struct RedactionSession {
400 pub version: u32,
401 pub session_id: String,
402 pub fingerprint: String,
403 pub redacted_fingerprint: String,
404 pub redacted_text: String,
405 #[serde(default)]
406 pub policy: RedactionPolicy,
407 pub entries: Vec<RestorationEntry>,
408}
409
410#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
411pub struct RestoreResult {
412 pub restored_text: String,
413 pub restored_count: usize,
414 pub unresolved_tokens: Vec<String>,
415 pub validation_errors: Vec<String>,
416}
417
418impl RestoreResult {
419 pub fn is_valid(&self) -> bool {
420 self.validation_errors.is_empty() && self.unresolved_tokens.is_empty()
421 }
422}
423
424#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
425pub struct SessionEntrySummary {
426 pub token: String,
427 pub kind: FindingKind,
428 pub replacement_hint: Option<String>,
429 pub occurrences: usize,
430}
431
432#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
433pub struct SessionSummary {
434 pub version: u32,
435 pub session_id: String,
436 pub fingerprint: String,
437 pub redacted_fingerprint: String,
438 pub entry_count: usize,
439 pub entries: Vec<SessionEntrySummary>,
440}