Skip to main content

sanitize_engine/processor/
profile.rs

1//! File-type profiles for structured processors.
2//!
3//! A [`FileTypeProfile`] tells the processing pipeline which processor
4//! to use and which fields/keys within the file should be sanitized.
5
6use crate::category::Category;
7use serde::{Deserialize, Serialize};
8
9// ---------------------------------------------------------------------------
10// FieldRule
11// ---------------------------------------------------------------------------
12
13/// A rule describing a single field/key to sanitize.
14///
15/// # Pattern Syntax
16///
17/// - Exact key: `"password"`, `"db_host"`.
18/// - Dotted path: `"database.password"`, `"smtp.user"`.
19/// - Glob suffix: `"*.password"` — matches any key ending in `.password`.
20/// - Glob prefix: `"db.*"` — matches any key starting with `db.`.
21/// - Wildcard: `"*"` — matches every field.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct FieldRule {
24    /// Key pattern to match (see Pattern Syntax above).
25    pub pattern: String,
26
27    /// Category for replacement generation. Defaults to `Custom("field")`
28    /// if not specified.
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub category: Option<Category>,
31
32    /// Optional human-readable label for reporting.
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub label: Option<String>,
35}
36
37impl FieldRule {
38    /// Create a new field rule with just a pattern.
39    #[must_use]
40    pub fn new(pattern: impl Into<String>) -> Self {
41        Self {
42            pattern: pattern.into(),
43            category: None,
44            label: None,
45        }
46    }
47
48    /// Set the category for this rule.
49    #[must_use]
50    pub fn with_category(mut self, category: Category) -> Self {
51        self.category = Some(category);
52        self
53    }
54
55    /// Set the label for this rule.
56    #[must_use]
57    pub fn with_label(mut self, label: impl Into<String>) -> Self {
58        self.label = Some(label.into());
59        self
60    }
61}
62
63// ---------------------------------------------------------------------------
64// FileTypeProfile
65// ---------------------------------------------------------------------------
66
67/// Specifies which processor to use and what fields to sanitize.
68///
69/// # Example (serialized as JSON)
70///
71/// ```json
72/// {
73///   "processor": "key_value",
74///   "extensions": [".rb", ".conf"],
75///   "fields": [
76///     { "pattern": "*.password", "category": "custom:password" },
77///     { "pattern": "*.secret",   "category": "custom:secret"   },
78///     { "pattern": "smtp_address", "category": "hostname" }
79///   ],
80///   "options": {
81///     "delimiter": "=",
82///     "comment_prefix": "#"
83///   }
84/// }
85/// ```
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct FileTypeProfile {
88    /// Name of the processor to use (e.g. `"key_value"`, `"json"`).
89    pub processor: String,
90
91    /// File extensions this profile applies to (e.g. `[".rb", ".conf"]`).
92    #[serde(default)]
93    pub extensions: Vec<String>,
94
95    /// Field rules: which keys/paths to sanitize.
96    pub fields: Vec<FieldRule>,
97
98    /// Free-form options passed to the processor (e.g. delimiter, comment chars).
99    #[serde(default)]
100    pub options: std::collections::HashMap<String, String>,
101}
102
103impl FileTypeProfile {
104    /// Create a minimal profile for a given processor.
105    #[must_use]
106    pub fn new(processor: impl Into<String>, fields: Vec<FieldRule>) -> Self {
107        Self {
108            processor: processor.into(),
109            extensions: Vec::new(),
110            fields,
111            options: std::collections::HashMap::new(),
112        }
113    }
114
115    /// Add an extension to this profile.
116    #[must_use]
117    pub fn with_extension(mut self, ext: impl Into<String>) -> Self {
118        self.extensions.push(ext.into());
119        self
120    }
121
122    /// Add a free-form option.
123    #[must_use]
124    pub fn with_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
125        self.options.insert(key.into(), value.into());
126        self
127    }
128
129    /// Check whether a filename matches this profile's extensions.
130    ///
131    /// Returns `false` if the profile has no extensions.
132    ///
133    /// # Examples
134    ///
135    /// ```
136    /// use sanitize_engine::processor::profile::FieldRule;
137    /// use sanitize_engine::processor::profile::FileTypeProfile;
138    ///
139    /// let profile = FileTypeProfile::new("json", vec![])
140    ///     .with_extension(".json")
141    ///     .with_extension(".jsonc");
142    ///
143    /// assert!(profile.matches_filename("config.json"));
144    /// assert!(profile.matches_filename("deep/path/app.jsonc"));
145    /// assert!(!profile.matches_filename("config.yml"));
146    /// assert!(!FileTypeProfile::new("json", vec![]).matches_filename("any.json"));
147    /// ```
148    pub fn matches_filename(&self, filename: &str) -> bool {
149        if self.extensions.is_empty() {
150            return false;
151        }
152        self.extensions
153            .iter()
154            .any(|ext| filename.ends_with(ext.as_str()))
155    }
156}
157
158// ---------------------------------------------------------------------------
159// Serde support for Category (as string)
160// ---------------------------------------------------------------------------
161
162impl Serialize for Category {
163    fn serialize<S: serde::Serializer>(
164        &self,
165        serializer: S,
166    ) -> std::result::Result<S::Ok, S::Error> {
167        serializer.serialize_str(&self.to_string())
168    }
169}
170
171impl<'de> Deserialize<'de> for Category {
172    fn deserialize<D: serde::Deserializer<'de>>(
173        deserializer: D,
174    ) -> std::result::Result<Self, D::Error> {
175        let s = String::deserialize(deserializer)?;
176        Ok(match s.as_str() {
177            "email" => Category::Email,
178            "name" => Category::Name,
179            "phone" => Category::Phone,
180            "ipv4" => Category::IpV4,
181            "ipv6" => Category::IpV6,
182            "credit_card" => Category::CreditCard,
183            "ssn" => Category::Ssn,
184            "hostname" => Category::Hostname,
185            "mac_address" => Category::MacAddress,
186            "container_id" => Category::ContainerId,
187            "uuid" => Category::Uuid,
188            "jwt" => Category::Jwt,
189            "auth_token" => Category::AuthToken,
190            "file_path" => Category::FilePath,
191            "windows_sid" => Category::WindowsSid,
192            "url" => Category::Url,
193            "aws_arn" => Category::AwsArn,
194            "azure_resource_id" => Category::AzureResourceId,
195            other => {
196                let tag = other.strip_prefix("custom:").unwrap_or(other);
197                Category::Custom(tag.into())
198            }
199        })
200    }
201}