Skip to main content

gobby_code/index/
security.rs

1//! Security checks for code indexing.
2//! Ports logic from src/gobby/code_index/security.py.
3
4use std::path::Path;
5
6const SECRET_EXTENSIONS: &[&str] = &[
7    ".env",
8    ".pem",
9    ".key",
10    ".p12",
11    ".pfx",
12    ".jks",
13    ".keystore",
14    ".secret",
15];
16
17const SECRET_PREFIXES: &[&str] = &["credentials", ".env", "id_rsa", "id_ed25519", "token"];
18
19const SECRET_SUBSTRINGS: &[&str] = &["api_key", "apikey", "_secret.", "_token."];
20
21/// Generated output directories that are excluded only when they are the
22/// first component under the indexed root.
23const ROOT_GENERATED_DIRS: &[&str] = &["build", "dist"];
24
25/// Check that `path` resolves within `root` (prevents directory traversal).
26pub fn validate_path(path: &Path, root: &Path) -> bool {
27    match (path.canonicalize(), root.canonicalize()) {
28        (Ok(resolved), Ok(root_resolved)) => resolved.starts_with(&root_resolved),
29        _ => false,
30    }
31}
32
33/// Check that a symlink target is still within root.
34pub fn is_symlink_safe(path: &Path, root: &Path) -> bool {
35    if !path.is_symlink() {
36        return true;
37    }
38    validate_path(path, root)
39}
40
41/// Check if file appears to be binary (has null bytes in first 8KB).
42pub fn is_binary(path: &Path) -> bool {
43    use std::io::Read;
44    let mut file = match std::fs::File::open(path) {
45        Ok(f) => f,
46        Err(_) => return true,
47    };
48    let mut buf = [0u8; 8192];
49    let n = match file.read(&mut buf) {
50        Ok(n) => n,
51        Err(_) => return true,
52    };
53    buf[..n].contains(&0)
54}
55
56/// Check if a path should be excluded.
57///
58/// Patterns listed in `ROOT_GENERATED_DIRS` match only the first relative path
59/// component, so source paths like `src/package/build/mod.rs` remain indexable.
60/// Other exclude patterns match any component of the relative path.
61/// Root-generated directory names are literal component names; wildcard
62/// patterns such as `build*` do not get root-only special handling.
63pub fn should_exclude_path(root: &Path, path: &Path, patterns: &[impl AsRef<str>]) -> bool {
64    let rel = path.strip_prefix(root).unwrap_or(path);
65
66    for pattern in patterns {
67        let pattern = pattern.as_ref();
68        if is_root_generated_dir(pattern) {
69            if rel
70                .components()
71                .next()
72                .map(|component| glob_match(pattern, &component.as_os_str().to_string_lossy()))
73                .unwrap_or(false)
74            {
75                return true;
76            }
77            continue;
78        }
79
80        for component in rel.components() {
81            let name = component.as_os_str().to_string_lossy();
82            if glob_match(pattern, &name) {
83                return true;
84            }
85        }
86    }
87
88    false
89}
90
91fn is_root_generated_dir(pattern: &str) -> bool {
92    ROOT_GENERATED_DIRS.contains(&pattern)
93}
94
95/// Check if file extension suggests secret content.
96pub fn has_secret_extension(path: &Path) -> bool {
97    let name = path
98        .file_name()
99        .map(|n| n.to_string_lossy().to_lowercase())
100        .unwrap_or_default();
101    let suffix = path
102        .extension()
103        .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
104        .unwrap_or_default();
105
106    if SECRET_EXTENSIONS.contains(&suffix.as_str()) {
107        return true;
108    }
109    for prefix in SECRET_PREFIXES {
110        if name.starts_with(prefix) {
111            return true;
112        }
113    }
114    for substring in SECRET_SUBSTRINGS {
115        if name.contains(substring) {
116            return true;
117        }
118    }
119    false
120}
121
122/// Simple glob matching supporting `*` and `?` wildcards.
123pub fn glob_match(pattern: &str, text: &str) -> bool {
124    let pc: Vec<char> = pattern.chars().collect();
125    let tc: Vec<char> = text.chars().collect();
126    glob_inner(&pc, &tc)
127}
128
129fn glob_inner(pattern: &[char], text: &[char]) -> bool {
130    if pattern.is_empty() {
131        return text.is_empty();
132    }
133    if pattern[0] == '*' {
134        for i in 0..=text.len() {
135            if glob_inner(&pattern[1..], &text[i..]) {
136                return true;
137            }
138        }
139        return false;
140    }
141    if text.is_empty() {
142        return false;
143    }
144    if pattern[0] == '?' || pattern[0] == text[0] {
145        return glob_inner(&pattern[1..], &text[1..]);
146    }
147    false
148}