Skip to main content

cloakrs_patterns/
user_path.rs

1use crate::common::{compile_regex, confidence, context_boost};
2use cloakrs_core::{Confidence, EntityType, Locale, PiiEntity, Recognizer, Span};
3use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashSet;
6
7static UNIX_HOME_REGEX: Lazy<Regex> = Lazy::new(|| {
8    compile_regex(r#"(?:/home/|/Users/)([a-zA-Z][a-zA-Z0-9._-]{0,31})(/[^\s:"'`,;)}\]]*)*"#)
9});
10
11static WINDOWS_HOME_REGEX: Lazy<Regex> = Lazy::new(|| {
12    compile_regex(r#"(?i)[A-Z]:\\Users\\([a-zA-Z][a-zA-Z0-9._\- ]{0,31})(\\[^\s:"'`,;)}\]]*)*"#)
13});
14
15static ROOT_PATH_REGEX: Lazy<Regex> = Lazy::new(|| compile_regex(r#"/root(/[^\s:"'`,;)}\]]*)*"#));
16
17const IGNORE_USERNAMES: &[&str] = &[
18    "user",
19    "username",
20    "example",
21    "your_user",
22    "your_username",
23    "yourusername",
24    "myuser",
25    "testuser",
26    "$user",
27    "{user}",
28    "xxx",
29    "placeholder",
30];
31
32const CONTEXT_WORDS: &[&str] = &[
33    "file",
34    "path",
35    "directory",
36    "folder",
37    "config",
38    "open",
39    "read",
40    "write",
41    "permission",
42    "denied",
43    "not found",
44    "no such file",
45    "filenotfounderror",
46    "enoent",
47    "stack trace",
48    "at /",
49    "from /",
50    "in /",
51];
52
53const SENSITIVE_SUBPATHS: &[&str] = &[
54    ".ssh",
55    ".aws",
56    ".env",
57    "credentials",
58    ".gnupg",
59    "id_rsa",
60    "authorized_keys",
61];
62
63/// Recognizes home-directory paths that expose system usernames.
64///
65/// # Examples
66///
67/// ```
68/// use cloakrs_core::{EntityType, Recognizer};
69/// use cloakrs_patterns::UserPathRecognizer;
70///
71/// let findings = UserPathRecognizer.scan("open /home/kadir/.ssh/id_rsa");
72/// assert_eq!(findings[0].entity_type, EntityType::UserPath);
73/// assert_eq!(UserPathRecognizer::extract_username("/home/kadir/.ssh/id_rsa").as_deref(), Some("kadir"));
74/// ```
75#[derive(Debug, Clone, Copy, Default)]
76pub struct UserPathRecognizer;
77
78impl Recognizer for UserPathRecognizer {
79    fn id(&self) -> &str {
80        "user_path_home_v1"
81    }
82
83    fn entity_type(&self) -> EntityType {
84        EntityType::UserPath
85    }
86
87    fn supported_locales(&self) -> &[Locale] {
88        &[]
89    }
90
91    fn scan(&self, text: &str) -> Vec<PiiEntity> {
92        let mut seen = HashSet::new();
93        let mut findings = Vec::new();
94
95        for regex in [&*UNIX_HOME_REGEX, &*WINDOWS_HOME_REGEX, &*ROOT_PATH_REGEX] {
96            for matched in regex.find_iter(text) {
97                let span = trim_path_span(text, Span::new(matched.start(), matched.end()));
98                if span.is_empty() || !seen.insert((span.start, span.end)) {
99                    continue;
100                }
101                let candidate = &text[span.start..span.end];
102                if self.is_valid_path_match(text, span.start, span.end, candidate) {
103                    findings.push(PiiEntity {
104                        entity_type: self.entity_type(),
105                        span,
106                        text: candidate.to_string(),
107                        confidence: self.compute_confidence(text, span.start, candidate),
108                        recognizer_id: self.id().to_string(),
109                    });
110                }
111            }
112        }
113
114        findings.sort_by_key(|finding| finding.span.start);
115        findings
116    }
117
118    fn validate(&self, candidate: &str) -> bool {
119        Self::extract_username(candidate).is_some_and(|username| !is_ignored_username(&username))
120    }
121}
122
123impl UserPathRecognizer {
124    /// Extracts the username segment from a supported home-directory path.
125    ///
126    /// # Examples
127    ///
128    /// ```
129    /// use cloakrs_patterns::UserPathRecognizer;
130    ///
131    /// assert_eq!(UserPathRecognizer::extract_username("/Users/john/Documents").as_deref(), Some("john"));
132    /// assert_eq!(UserPathRecognizer::extract_username(r"C:\Users\john.doe\Desktop").as_deref(), Some("john.doe"));
133    /// ```
134    #[must_use]
135    pub fn extract_username(path: &str) -> Option<String> {
136        if let Some(rest) = path.strip_prefix("/home/") {
137            return first_unix_segment(rest);
138        }
139        if let Some(rest) = path.strip_prefix("/Users/") {
140            return first_unix_segment(rest);
141        }
142        let lower = path.to_ascii_lowercase();
143        if let Some(index) = lower.find(r"\users\") {
144            let after = &path[index + r"\Users\".len()..];
145            return after.split('\\').next().map(str::to_string);
146        }
147        if path == "/root" || path.starts_with("/root/") {
148            return Some("root".to_string());
149        }
150        None
151    }
152
153    fn is_valid_path_match(&self, text: &str, start: usize, end: usize, candidate: &str) -> bool {
154        self.validate(candidate) && is_path_boundary(text, start, end)
155    }
156
157    fn compute_confidence(&self, text: &str, start: usize, candidate: &str) -> Confidence {
158        let base = if candidate == "/root" || candidate.starts_with("/root/") {
159            0.75
160        } else {
161            0.85
162        };
163        let sensitive_boost = if contains_sensitive_subpath(candidate) {
164            0.05
165        } else {
166            0.0
167        };
168        confidence(base + sensitive_boost + context_boost(text, start, CONTEXT_WORDS))
169    }
170}
171
172fn first_unix_segment(rest: &str) -> Option<String> {
173    rest.split('/').next().map(str::to_string)
174}
175
176fn is_ignored_username(username: &str) -> bool {
177    let lower = username.to_ascii_lowercase();
178    IGNORE_USERNAMES.contains(&lower.as_str())
179}
180
181fn contains_sensitive_subpath(path: &str) -> bool {
182    let lower = path.to_ascii_lowercase();
183    SENSITIVE_SUBPATHS
184        .iter()
185        .any(|subpath| lower.contains(subpath))
186}
187
188fn trim_path_span(text: &str, span: Span) -> Span {
189    let mut end = span.end;
190    while end > span.start {
191        let value = &text[span.start..end];
192        let Some(c) = value.chars().next_back() else {
193            break;
194        };
195        if !matches!(c, '.' | ',' | ':' | ';' | '!' | '?' | ')' | ']' | '}') {
196            break;
197        }
198        end -= c.len_utf8();
199    }
200    Span::new(span.start, end)
201}
202
203fn is_path_boundary(text: &str, start: usize, end: usize) -> bool {
204    let before = text[..start].chars().next_back();
205    let after = text[end..].chars().next();
206    !before.is_some_and(is_path_continuation) && !after.is_some_and(is_path_continuation)
207}
208
209fn is_path_continuation(c: char) -> bool {
210    c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | '/' | '\\')
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use cloakrs_core::RecognizerRegistry;
217
218    fn texts(input: &str) -> Vec<String> {
219        UserPathRecognizer
220            .scan(input)
221            .into_iter()
222            .map(|finding| finding.text)
223            .collect()
224    }
225
226    #[test]
227    fn test_user_path_home_project_detected() {
228        assert_eq!(
229            texts("open /home/kadir/projects/myapp/config.yml"),
230            ["/home/kadir/projects/myapp/config.yml"]
231        );
232    }
233
234    #[test]
235    fn test_user_path_linux_ssh_key_detected() {
236        assert_eq!(
237            texts("read /home/ubuntu/.ssh/id_rsa"),
238            ["/home/ubuntu/.ssh/id_rsa"]
239        );
240    }
241
242    #[test]
243    fn test_user_path_linux_env_file_detected() {
244        assert_eq!(texts("config /home/deploy/.env"), ["/home/deploy/.env"]);
245    }
246
247    #[test]
248    fn test_user_path_macos_document_detected() {
249        assert_eq!(
250            texts("file /Users/john/Documents/report.pdf"),
251            ["/Users/john/Documents/report.pdf"]
252        );
253    }
254
255    #[test]
256    fn test_user_path_macos_aws_credentials_detected() {
257        assert_eq!(
258            texts("path /Users/admin/.aws/credentials"),
259            ["/Users/admin/.aws/credentials"]
260        );
261    }
262
263    #[test]
264    fn test_user_path_windows_administrator_detected() {
265        assert_eq!(
266            texts(r"open C:\Users\Administrator\Desktop\secrets.txt"),
267            [r"C:\Users\Administrator\Desktop\secrets.txt"]
268        );
269    }
270
271    #[test]
272    fn test_user_path_windows_dotted_username_detected() {
273        assert_eq!(
274            texts(r"temp C:\Users\john.doe\AppData\Local\Temp"),
275            [r"C:\Users\john.doe\AppData\Local\Temp"]
276        );
277    }
278
279    #[test]
280    fn test_user_path_root_bashrc_detected() {
281        assert_eq!(texts("file /root/.bashrc"), ["/root/.bashrc"]);
282    }
283
284    #[test]
285    fn test_user_path_root_authorized_keys_detected() {
286        assert_eq!(
287            texts("read /root/.ssh/authorized_keys"),
288            ["/root/.ssh/authorized_keys"]
289        );
290    }
291
292    #[test]
293    fn test_user_path_usr_local_rejected() {
294        assert!(texts("path /usr/local/bin/python").is_empty());
295    }
296
297    #[test]
298    fn test_user_path_var_log_rejected() {
299        assert!(texts("path /var/log/syslog").is_empty());
300    }
301
302    #[test]
303    fn test_user_path_etc_nginx_rejected() {
304        assert!(texts("path /etc/nginx/nginx.conf").is_empty());
305    }
306
307    #[test]
308    fn test_user_path_relative_config_rejected() {
309        assert!(texts("path ./config/settings.yml").is_empty());
310    }
311
312    #[test]
313    fn test_user_path_placeholder_user_rejected() {
314        assert!(texts("example /home/user/example").is_empty());
315    }
316
317    #[test]
318    fn test_user_path_container_app_rejected() {
319        assert!(texts("path /app/src/main.rs").is_empty());
320    }
321
322    #[test]
323    fn test_user_path_filenotfound_context_boosts_confidence() {
324        let with_context =
325            UserPathRecognizer.scan("FileNotFoundError: /home/kadir/.config/app.yml");
326        let without_context = UserPathRecognizer.scan("value /home/kadir/.config/app.yml");
327        assert!(with_context[0].confidence > without_context[0].confidence);
328    }
329
330    #[test]
331    fn test_user_path_permission_denied_context_boosts_confidence() {
332        let with_context = UserPathRecognizer.scan("permission denied: /Users/admin/private");
333        let without_context = UserPathRecognizer.scan("value /Users/admin/private");
334        assert!(with_context[0].confidence > without_context[0].confidence);
335    }
336
337    #[test]
338    fn test_user_path_extract_username_from_linux_home() {
339        assert_eq!(
340            UserPathRecognizer::extract_username("/home/kadir/stuff").as_deref(),
341            Some("kadir")
342        );
343    }
344
345    #[test]
346    fn test_user_path_extract_username_from_windows_home() {
347        assert_eq!(
348            UserPathRecognizer::extract_username(r"C:\Users\john.doe\Desktop").as_deref(),
349            Some("john.doe")
350        );
351    }
352
353    #[test]
354    fn test_user_path_extract_username_from_root_home() {
355        assert_eq!(
356            UserPathRecognizer::extract_username("/root/.ssh").as_deref(),
357            Some("root")
358        );
359    }
360
361    #[test]
362    fn test_user_path_sensitive_subpath_boosts_confidence() {
363        let sensitive = UserPathRecognizer.scan("value /home/kadir/.ssh/id_rsa");
364        let ordinary = UserPathRecognizer.scan("value /home/kadir/projects/app");
365        assert!(sensitive[0].confidence > ordinary[0].confidence);
366    }
367
368    #[test]
369    fn test_user_path_supported_locales_are_universal() {
370        assert!(UserPathRecognizer.supported_locales().is_empty());
371    }
372
373    #[test]
374    fn test_user_path_registry_integration_detects_default_recognizer() {
375        let mut registry = RecognizerRegistry::new();
376        crate::register_default_recognizers(&mut registry);
377
378        let findings = registry.scan_all("open /home/kadir/projects/app");
379
380        assert!(findings
381            .iter()
382            .any(|finding| finding.entity_type == EntityType::UserPath));
383    }
384}