sanitize_engine/processor/profile.rs
1//! File-type profiles for structured processors.
2//!
3//! A [`FileTypeProfile`] tells the processing pipeline which processor
4//! to use and which fields/keys within the file should be sanitized.
5
6use crate::category::Category;
7use serde::{Deserialize, Serialize};
8
9// ---------------------------------------------------------------------------
10// FieldRule
11// ---------------------------------------------------------------------------
12
13/// A rule describing a single field/key to sanitize.
14///
15/// # Pattern Syntax
16///
17/// - Exact key: `"password"`, `"db_host"`.
18/// - Dotted path: `"database.password"`, `"smtp.user"`.
19/// - Glob suffix: `"*.password"` — matches any key ending in `.password`.
20/// - Glob prefix: `"db.*"` — matches any key starting with `db.`.
21/// - Wildcard: `"*"` — matches every field.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct FieldRule {
24 /// Key pattern to match (see Pattern Syntax above).
25 pub pattern: String,
26
27 /// Category for replacement generation. Defaults to `Custom("field")`
28 /// if not specified.
29 #[serde(default, skip_serializing_if = "Option::is_none")]
30 pub category: Option<Category>,
31
32 /// Optional human-readable label for reporting.
33 #[serde(default, skip_serializing_if = "Option::is_none")]
34 pub label: Option<String>,
35}
36
37impl FieldRule {
38 /// Create a new field rule with just a pattern.
39 #[must_use]
40 pub fn new(pattern: impl Into<String>) -> Self {
41 Self {
42 pattern: pattern.into(),
43 category: None,
44 label: None,
45 }
46 }
47
48 /// Set the category for this rule.
49 #[must_use]
50 pub fn with_category(mut self, category: Category) -> Self {
51 self.category = Some(category);
52 self
53 }
54
55 /// Set the label for this rule.
56 #[must_use]
57 pub fn with_label(mut self, label: impl Into<String>) -> Self {
58 self.label = Some(label.into());
59 self
60 }
61}
62
63// ---------------------------------------------------------------------------
64// FileTypeProfile
65// ---------------------------------------------------------------------------
66
67/// Specifies which processor to use and what fields to sanitize.
68///
69/// # Example (serialized as JSON)
70///
71/// ```json
72/// {
73/// "processor": "key_value",
74/// "extensions": [".rb", ".conf"],
75/// "fields": [
76/// { "pattern": "*.password", "category": "custom:password" },
77/// { "pattern": "*.secret", "category": "custom:secret" },
78/// { "pattern": "smtp_address", "category": "hostname" }
79/// ],
80/// "options": {
81/// "delimiter": "=",
82/// "comment_prefix": "#"
83/// }
84/// }
85/// ```
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct FileTypeProfile {
88 /// Name of the processor to use (e.g. `"key_value"`, `"json"`).
89 pub processor: String,
90
91 /// File extensions this profile applies to (e.g. `[".rb", ".conf"]`).
92 #[serde(default)]
93 pub extensions: Vec<String>,
94
95 /// Field rules: which keys/paths to sanitize.
96 pub fields: Vec<FieldRule>,
97
98 /// Free-form options passed to the processor (e.g. delimiter, comment chars).
99 #[serde(default)]
100 pub options: std::collections::HashMap<String, String>,
101}
102
103impl FileTypeProfile {
104 /// Create a minimal profile for a given processor.
105 #[must_use]
106 pub fn new(processor: impl Into<String>, fields: Vec<FieldRule>) -> Self {
107 Self {
108 processor: processor.into(),
109 extensions: Vec::new(),
110 fields,
111 options: std::collections::HashMap::new(),
112 }
113 }
114
115 /// Add an extension to this profile.
116 #[must_use]
117 pub fn with_extension(mut self, ext: impl Into<String>) -> Self {
118 self.extensions.push(ext.into());
119 self
120 }
121
122 /// Add a free-form option.
123 #[must_use]
124 pub fn with_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
125 self.options.insert(key.into(), value.into());
126 self
127 }
128
129 /// Check whether a filename matches this profile's extensions.
130 ///
131 /// Returns `false` if the profile has no extensions.
132 ///
133 /// # Examples
134 ///
135 /// ```
136 /// use sanitize_engine::processor::profile::FieldRule;
137 /// use sanitize_engine::processor::profile::FileTypeProfile;
138 ///
139 /// let profile = FileTypeProfile::new("json", vec![])
140 /// .with_extension(".json")
141 /// .with_extension(".jsonc");
142 ///
143 /// assert!(profile.matches_filename("config.json"));
144 /// assert!(profile.matches_filename("deep/path/app.jsonc"));
145 /// assert!(!profile.matches_filename("config.yml"));
146 /// assert!(!FileTypeProfile::new("json", vec![]).matches_filename("any.json"));
147 /// ```
148 pub fn matches_filename(&self, filename: &str) -> bool {
149 if self.extensions.is_empty() {
150 return false;
151 }
152 self.extensions
153 .iter()
154 .any(|ext| filename.ends_with(ext.as_str()))
155 }
156}
157
158// ---------------------------------------------------------------------------
159// Serde support for Category (as string)
160// ---------------------------------------------------------------------------
161
162impl Serialize for Category {
163 fn serialize<S: serde::Serializer>(
164 &self,
165 serializer: S,
166 ) -> std::result::Result<S::Ok, S::Error> {
167 serializer.serialize_str(&self.to_string())
168 }
169}
170
171impl<'de> Deserialize<'de> for Category {
172 fn deserialize<D: serde::Deserializer<'de>>(
173 deserializer: D,
174 ) -> std::result::Result<Self, D::Error> {
175 let s = String::deserialize(deserializer)?;
176 Ok(match s.as_str() {
177 "email" => Category::Email,
178 "name" => Category::Name,
179 "phone" => Category::Phone,
180 "ipv4" => Category::IpV4,
181 "ipv6" => Category::IpV6,
182 "credit_card" => Category::CreditCard,
183 "ssn" => Category::Ssn,
184 "hostname" => Category::Hostname,
185 "mac_address" => Category::MacAddress,
186 "container_id" => Category::ContainerId,
187 "uuid" => Category::Uuid,
188 "jwt" => Category::Jwt,
189 "auth_token" => Category::AuthToken,
190 "file_path" => Category::FilePath,
191 "windows_sid" => Category::WindowsSid,
192 "url" => Category::Url,
193 "aws_arn" => Category::AwsArn,
194 "azure_resource_id" => Category::AzureResourceId,
195 other => {
196 let tag = other.strip_prefix("custom:").unwrap_or(other);
197 Category::Custom(tag.into())
198 }
199 })
200 }
201}