Skip to main content

cloudiful_redactor/
types.rs

1use serde::{Deserialize, Serialize};
2use std::ops::Range;
3
4#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
5#[serde(rename_all = "snake_case")]
6pub enum FindingKind {
7    Secret,
8    Domain,
9    Url,
10    Email,
11    Ip,
12    Cidr,
13    Phone,
14    Person,
15    Organization,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(default)]
20pub struct RedactionRules {
21    pub secret: bool,
22    pub domain: bool,
23    pub url: bool,
24    pub email: bool,
25    pub ip: bool,
26    pub cidr: bool,
27    pub phone: bool,
28    pub person: bool,
29    pub organization: bool,
30}
31
32impl Default for RedactionRules {
33    fn default() -> Self {
34        Self {
35            secret: true,
36            domain: false,
37            url: true,
38            email: true,
39            ip: true,
40            cidr: true,
41            phone: true,
42            person: false,
43            organization: true,
44        }
45    }
46}
47
48impl RedactionRules {
49    pub fn with_kind(mut self, kind: FindingKind, enabled: bool) -> Self {
50        self.set_kind(kind, enabled);
51        self
52    }
53
54    pub fn set_kind(&mut self, kind: FindingKind, enabled: bool) {
55        match kind {
56            FindingKind::Secret => self.secret = enabled,
57            FindingKind::Domain => self.domain = enabled,
58            FindingKind::Url => self.url = enabled,
59            FindingKind::Email => self.email = enabled,
60            FindingKind::Ip => self.ip = enabled,
61            FindingKind::Cidr => self.cidr = enabled,
62            FindingKind::Phone => self.phone = enabled,
63            FindingKind::Person => self.person = enabled,
64            FindingKind::Organization => self.organization = enabled,
65        }
66    }
67
68    pub fn is_enabled(self, kind: FindingKind) -> bool {
69        match kind {
70            FindingKind::Secret => self.secret,
71            FindingKind::Domain => self.domain,
72            FindingKind::Url => self.url,
73            FindingKind::Email => self.email,
74            FindingKind::Ip => self.ip,
75            FindingKind::Cidr => self.cidr,
76            FindingKind::Phone => self.phone,
77            FindingKind::Person => self.person,
78            FindingKind::Organization => self.organization,
79        }
80    }
81}
82
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84struct FindingKindMeta {
85    label: &'static str,
86    token_label: &'static str,
87    priority: u8,
88    containment_priority: u8,
89}
90
91impl FindingKind {
92    const fn meta(self) -> FindingKindMeta {
93        match self {
94            Self::Secret => FindingKindMeta {
95                label: "secret",
96                token_label: "SECRET",
97                priority: 100,
98                containment_priority: 75,
99            },
100            Self::Domain => FindingKindMeta {
101                label: "domain",
102                token_label: "DOMAIN",
103                priority: 70,
104                containment_priority: 80,
105            },
106            Self::Url => FindingKindMeta {
107                label: "url",
108                token_label: "URL",
109                priority: 90,
110                containment_priority: 100,
111            },
112            Self::Email => FindingKindMeta {
113                label: "email",
114                token_label: "EMAIL",
115                priority: 85,
116                containment_priority: 95,
117            },
118            Self::Ip => FindingKindMeta {
119                label: "ip",
120                token_label: "IP",
121                priority: 75,
122                containment_priority: 85,
123            },
124            Self::Cidr => FindingKindMeta {
125                label: "cidr",
126                token_label: "CIDR",
127                priority: 80,
128                containment_priority: 90,
129            },
130            Self::Phone => FindingKindMeta {
131                label: "phone",
132                token_label: "PHONE",
133                priority: 60,
134                containment_priority: 70,
135            },
136            Self::Person => FindingKindMeta {
137                label: "person",
138                token_label: "PERSON",
139                priority: 50,
140                containment_priority: 50,
141            },
142            Self::Organization => FindingKindMeta {
143                label: "organization",
144                token_label: "ORG",
145                priority: 45,
146                containment_priority: 45,
147            },
148        }
149    }
150
151    pub fn label(self) -> &'static str {
152        self.meta().label
153    }
154
155    pub fn token_label(self) -> &'static str {
156        self.meta().token_label
157    }
158
159    pub fn priority(self) -> u8 {
160        self.meta().priority
161    }
162
163    pub fn containment_priority(self) -> u8 {
164        self.meta().containment_priority
165    }
166}
167
168#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
169#[serde(rename_all = "snake_case")]
170pub enum FindingSource {
171    Rule,
172    Llm,
173}
174
175impl FindingSource {
176    pub fn bonus(self) -> u8 {
177        match self {
178            Self::Rule => 10,
179            Self::Llm => 0,
180        }
181    }
182}
183
184#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
185pub struct Finding {
186    pub kind: FindingKind,
187    pub source: FindingSource,
188    pub match_text: String,
189    pub normalized_key: String,
190    pub confidence: u8,
191    pub start: usize,
192    pub end: usize,
193}
194
195impl Finding {
196    pub fn range(&self) -> Range<usize> {
197        self.start..self.end
198    }
199
200    pub fn score(&self) -> u16 {
201        u16::from(self.kind.priority())
202            + u16::from(self.source.bonus())
203            + u16::from(self.confidence)
204    }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
208#[serde(rename_all = "snake_case")]
209pub enum ReplacementStrategy {
210    StructuredToken,
211}
212
213#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
214pub struct AppliedReplacement {
215    pub kind: FindingKind,
216    #[serde(skip_serializing)]
217    pub original: String,
218    pub replacement: String,
219    pub strategy: ReplacementStrategy,
220    pub display_value: Option<String>,
221}
222
223#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
224pub struct RedactionStats {
225    pub total_findings: usize,
226    pub applied_replacements: usize,
227    pub dropped_findings: usize,
228    pub llm_configured: bool,
229    pub llm_request_failed: bool,
230    pub llm_candidates_accepted: usize,
231    pub llm_candidates_rejected: usize,
232    pub llm_error: Option<String>,
233}
234
235#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
236pub struct RedactionResult {
237    pub redacted_text: String,
238    pub findings: Vec<Finding>,
239    pub applied_replacements: Vec<AppliedReplacement>,
240    pub stats: RedactionStats,
241}
242
243#[derive(Debug, Clone, PartialEq, Eq)]
244pub struct RedactionArtifact {
245    pub result: RedactionResult,
246    pub session: RedactionSession,
247}
248
249#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
250pub struct RestorationEntry {
251    pub token: String,
252    pub kind: FindingKind,
253    pub original: String,
254    pub replacement_hint: Option<String>,
255    pub occurrences: usize,
256}
257
258#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
259pub struct RedactionSession {
260    pub version: u32,
261    pub session_id: String,
262    pub fingerprint: String,
263    pub redacted_fingerprint: String,
264    pub redacted_text: String,
265    pub entries: Vec<RestorationEntry>,
266}
267
268#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
269pub struct RestoreResult {
270    pub restored_text: String,
271    pub restored_count: usize,
272    pub unresolved_tokens: Vec<String>,
273    pub validation_errors: Vec<String>,
274}
275
276impl RestoreResult {
277    pub fn is_valid(&self) -> bool {
278        self.validation_errors.is_empty() && self.unresolved_tokens.is_empty()
279    }
280}
281
282#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
283pub struct SessionEntrySummary {
284    pub token: String,
285    pub kind: FindingKind,
286    pub replacement_hint: Option<String>,
287    pub occurrences: usize,
288}
289
290#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
291pub struct SessionSummary {
292    pub version: u32,
293    pub session_id: String,
294    pub fingerprint: String,
295    pub redacted_fingerprint: String,
296    pub entry_count: usize,
297    pub entries: Vec<SessionEntrySummary>,
298}