Skip to main content

cloudiful_redactor/
types.rs

1use serde::{Deserialize, Serialize};
2use std::ops::Range;
3
4#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
5#[serde(rename_all = "snake_case")]
6pub enum FindingKind {
7    Secret,
8    Domain,
9    Url,
10    Email,
11    Ip,
12    Cidr,
13    Phone,
14    Person,
15    Organization,
16    CustomString,
17    CustomFile,
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
21#[serde(default)]
22pub struct RedactionRules {
23    pub secret: bool,
24    pub domain: bool,
25    pub url: bool,
26    pub email: bool,
27    pub ip: bool,
28    pub cidr: bool,
29    pub phone: bool,
30    pub person: bool,
31    pub organization: bool,
32}
33
34impl Default for RedactionRules {
35    fn default() -> Self {
36        Self {
37            secret: false,
38            domain: false,
39            url: false,
40            email: true,
41            ip: true,
42            cidr: true,
43            phone: false,
44            person: false,
45            organization: false,
46        }
47    }
48}
49
50impl RedactionRules {
51    pub fn with_kind(mut self, kind: FindingKind, enabled: bool) -> Self {
52        self.set_kind(kind, enabled);
53        self
54    }
55
56    pub fn set_kind(&mut self, kind: FindingKind, enabled: bool) {
57        match kind {
58            FindingKind::Secret => self.secret = enabled,
59            FindingKind::Domain => self.domain = enabled,
60            FindingKind::Url => self.url = enabled,
61            FindingKind::Email => self.email = enabled,
62            FindingKind::Ip => self.ip = enabled,
63            FindingKind::Cidr => self.cidr = enabled,
64            FindingKind::Phone => self.phone = enabled,
65            FindingKind::Person => self.person = enabled,
66            FindingKind::Organization => self.organization = enabled,
67            FindingKind::CustomString | FindingKind::CustomFile => {}
68        }
69    }
70
71    pub fn is_enabled(self, kind: FindingKind) -> bool {
72        match kind {
73            FindingKind::Secret => self.secret,
74            FindingKind::Domain => self.domain,
75            FindingKind::Url => self.url,
76            FindingKind::Email => self.email,
77            FindingKind::Ip => self.ip,
78            FindingKind::Cidr => self.cidr,
79            FindingKind::Phone => self.phone,
80            FindingKind::Person => self.person,
81            FindingKind::Organization => self.organization,
82            FindingKind::CustomString | FindingKind::CustomFile => true,
83        }
84    }
85}
86
87#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
88#[serde(rename_all = "snake_case")]
89pub enum CustomStringMatch {
90    Exact,
91    Contains,
92    Regex,
93}
94
95impl Default for CustomStringMatch {
96    fn default() -> Self {
97        Self::Exact
98    }
99}
100
101#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
102#[serde(rename_all = "snake_case")]
103pub enum CustomStringScope {
104    Text,
105    Line,
106}
107
108impl Default for CustomStringScope {
109    fn default() -> Self {
110        Self::Text
111    }
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
115pub struct CustomStringRule {
116    pub pattern: String,
117    #[serde(default)]
118    pub match_type: CustomStringMatch,
119    #[serde(default)]
120    pub scope: CustomStringScope,
121}
122
123#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
124pub struct CustomFileRule {
125    pub path: String,
126}
127
128#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
129pub struct RedactionPolicy {
130    #[serde(flatten)]
131    pub rules: RedactionRules,
132    #[serde(default)]
133    pub custom_strings: Vec<CustomStringRule>,
134    #[serde(default)]
135    pub custom_files: Vec<CustomFileRule>,
136}
137
138impl Default for RedactionPolicy {
139    fn default() -> Self {
140        Self {
141            rules: RedactionRules::default(),
142            custom_strings: Vec::new(),
143            custom_files: Vec::new(),
144        }
145    }
146}
147
148impl RedactionPolicy {
149    pub fn with_kind(mut self, kind: FindingKind, enabled: bool) -> Self {
150        self.rules.set_kind(kind, enabled);
151        self
152    }
153
154    pub fn with_custom_string(mut self, rule: CustomStringRule) -> Self {
155        self.custom_strings.push(rule);
156        self
157    }
158
159    pub fn with_custom_file(mut self, rule: CustomFileRule) -> Self {
160        self.custom_files.push(rule);
161        self
162    }
163
164    pub fn with_custom_strings<I: IntoIterator<Item = CustomStringRule>>(mut self, rules: I) -> Self {
165        self.custom_strings.extend(rules);
166        self
167    }
168
169    pub fn with_custom_files<I: IntoIterator<Item = CustomFileRule>>(mut self, rules: I) -> Self {
170        self.custom_files.extend(rules);
171        self
172    }
173
174    pub fn validate(&self) -> Result<(), String> {
175        for (index, rule) in self.custom_strings.iter().enumerate() {
176            if rule.pattern.is_empty() {
177                return Err(format!(
178                    "custom_strings[{index}]: pattern must not be empty"
179                ));
180            }
181            if matches!(rule.match_type, CustomStringMatch::Regex) {
182                if regex::Regex::new(&rule.pattern).is_err() {
183                    return Err(format!(
184                        "custom_strings[{index}]: invalid regex pattern: {}",
185                        rule.pattern
186                    ));
187                }
188            }
189        }
190        for (index, rule) in self.custom_files.iter().enumerate() {
191            if rule.path.is_empty() {
192                return Err(format!(
193                    "custom_files[{index}]: path must not be empty"
194                ));
195            }
196        }
197        Ok(())
198    }
199}
200
201impl From<RedactionRules> for RedactionPolicy {
202    fn from(rules: RedactionRules) -> Self {
203        Self {
204            rules,
205            custom_strings: Vec::new(),
206            custom_files: Vec::new(),
207        }
208    }
209}
210
211#[derive(Debug, Clone, Copy, PartialEq, Eq)]
212struct FindingKindMeta {
213    label: &'static str,
214    token_label: &'static str,
215    priority: u8,
216    containment_priority: u8,
217}
218
219impl FindingKind {
220    const fn meta(self) -> FindingKindMeta {
221        match self {
222            Self::Secret => FindingKindMeta {
223                label: "secret",
224                token_label: "SECRET",
225                priority: 100,
226                containment_priority: 75,
227            },
228            Self::Domain => FindingKindMeta {
229                label: "domain",
230                token_label: "DOMAIN",
231                priority: 70,
232                containment_priority: 80,
233            },
234            Self::Url => FindingKindMeta {
235                label: "url",
236                token_label: "URL",
237                priority: 90,
238                containment_priority: 100,
239            },
240            Self::Email => FindingKindMeta {
241                label: "email",
242                token_label: "EMAIL",
243                priority: 85,
244                containment_priority: 95,
245            },
246            Self::Ip => FindingKindMeta {
247                label: "ip",
248                token_label: "IP",
249                priority: 75,
250                containment_priority: 85,
251            },
252            Self::Cidr => FindingKindMeta {
253                label: "cidr",
254                token_label: "CIDR",
255                priority: 80,
256                containment_priority: 90,
257            },
258            Self::Phone => FindingKindMeta {
259                label: "phone",
260                token_label: "PHONE",
261                priority: 60,
262                containment_priority: 70,
263            },
264            Self::Person => FindingKindMeta {
265                label: "person",
266                token_label: "PERSON",
267                priority: 50,
268                containment_priority: 50,
269            },
270            Self::Organization => FindingKindMeta {
271                label: "organization",
272                token_label: "ORG",
273                priority: 45,
274                containment_priority: 45,
275            },
276            Self::CustomString => FindingKindMeta {
277                label: "custom_string",
278                token_label: "CSTR",
279                priority: 95,
280                containment_priority: 40,
281            },
282            Self::CustomFile => FindingKindMeta {
283                label: "custom_file",
284                token_label: "FILE",
285                priority: 99,
286                containment_priority: 99,
287            },
288        }
289    }
290
291    pub fn label(self) -> &'static str {
292        self.meta().label
293    }
294
295    pub fn token_label(self) -> &'static str {
296        self.meta().token_label
297    }
298
299    pub fn priority(self) -> u8 {
300        self.meta().priority
301    }
302
303    pub fn containment_priority(self) -> u8 {
304        self.meta().containment_priority
305    }
306}
307
308#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
309#[serde(rename_all = "snake_case")]
310pub enum FindingSource {
311    Rule,
312    Llm,
313}
314
315impl FindingSource {
316    pub fn bonus(self) -> u8 {
317        match self {
318            Self::Rule => 10,
319            Self::Llm => 0,
320        }
321    }
322}
323
324#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
325pub struct Finding {
326    pub kind: FindingKind,
327    pub source: FindingSource,
328    pub match_text: String,
329    pub normalized_key: String,
330    pub confidence: u8,
331    pub start: usize,
332    pub end: usize,
333}
334
335impl Finding {
336    pub fn range(&self) -> Range<usize> {
337        self.start..self.end
338    }
339
340    pub fn score(&self) -> u16 {
341        u16::from(self.kind.priority())
342            + u16::from(self.source.bonus())
343            + u16::from(self.confidence)
344    }
345}
346
347#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
348#[serde(rename_all = "snake_case")]
349pub enum ReplacementStrategy {
350    StructuredToken,
351}
352
353#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
354pub struct AppliedReplacement {
355    pub kind: FindingKind,
356    #[serde(skip_serializing)]
357    pub original: String,
358    pub replacement: String,
359    pub strategy: ReplacementStrategy,
360    pub display_value: Option<String>,
361}
362
363#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
364pub struct RedactionStats {
365    pub total_findings: usize,
366    pub applied_replacements: usize,
367    pub dropped_findings: usize,
368    pub llm_configured: bool,
369    pub llm_request_failed: bool,
370    pub llm_candidates_accepted: usize,
371    pub llm_candidates_rejected: usize,
372    pub llm_error: Option<String>,
373}
374
375#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
376pub struct RedactionResult {
377    pub redacted_text: String,
378    pub findings: Vec<Finding>,
379    pub applied_replacements: Vec<AppliedReplacement>,
380    pub stats: RedactionStats,
381}
382
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct RedactionArtifact {
385    pub result: RedactionResult,
386    pub session: RedactionSession,
387}
388
389#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
390pub struct RestorationEntry {
391    pub token: String,
392    pub kind: FindingKind,
393    pub original: String,
394    pub replacement_hint: Option<String>,
395    pub occurrences: usize,
396}
397
398#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
399pub struct RedactionSession {
400    pub version: u32,
401    pub session_id: String,
402    pub fingerprint: String,
403    pub redacted_fingerprint: String,
404    pub redacted_text: String,
405    #[serde(default)]
406    pub policy: RedactionPolicy,
407    pub entries: Vec<RestorationEntry>,
408}
409
410#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
411pub struct RestoreResult {
412    pub restored_text: String,
413    pub restored_count: usize,
414    pub unresolved_tokens: Vec<String>,
415    pub validation_errors: Vec<String>,
416}
417
418impl RestoreResult {
419    pub fn is_valid(&self) -> bool {
420        self.validation_errors.is_empty() && self.unresolved_tokens.is_empty()
421    }
422}
423
424#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
425pub struct SessionEntrySummary {
426    pub token: String,
427    pub kind: FindingKind,
428    pub replacement_hint: Option<String>,
429    pub occurrences: usize,
430}
431
432#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
433pub struct SessionSummary {
434    pub version: u32,
435    pub session_id: String,
436    pub fingerprint: String,
437    pub redacted_fingerprint: String,
438    pub entry_count: usize,
439    pub entries: Vec<SessionEntrySummary>,
440}