Skip to main content

gobby_code/index/
security.rs

1//! Security checks for code indexing.
2//! Ports logic from src/gobby/code_index/security.py.
3
4use std::path::Path;
5
6const SECRET_EXTENSIONS: &[&str] = &[
7    ".env",
8    ".pem",
9    ".key",
10    ".p12",
11    ".pfx",
12    ".jks",
13    ".keystore",
14    ".secret",
15];
16
17const SECRET_PREFIXES: &[&str] = &["credentials", ".env", "id_rsa", "id_ed25519", "token"];
18
19const SECRET_SUBSTRINGS: &[&str] = &["api_key", "apikey", "_secret.", "_token."];
20
21/// Generated output directories that are excluded only when they are the
22/// first component under the indexed root.
23const ROOT_GENERATED_DIRS: &[&str] = &["build", "dist"];
24
25/// Check that `path` resolves within `root` (prevents directory traversal).
26pub fn validate_path(path: &Path, root: &Path) -> bool {
27    match (path.canonicalize(), root.canonicalize()) {
28        (Ok(resolved), Ok(root_resolved)) => resolved.starts_with(&root_resolved),
29        _ => false,
30    }
31}
32
33/// Check that a symlink target is still within root.
34pub fn is_symlink_safe(path: &Path, root: &Path) -> bool {
35    if !path.is_symlink() {
36        return true;
37    }
38    validate_path(path, root)
39}
40
41/// Check if file appears to be binary (has null bytes in first 8KB).
42pub fn is_binary(path: &Path) -> bool {
43    use std::io::Read;
44    let mut file = match std::fs::File::open(path) {
45        Ok(f) => f,
46        Err(_) => return true,
47    };
48    let mut buf = [0u8; 8192];
49    let n = match file.read(&mut buf) {
50        Ok(n) => n,
51        Err(_) => return true,
52    };
53    buf[..n].contains(&0)
54}
55
56/// Check if a path should be excluded.
57///
58/// Patterns listed in `ROOT_GENERATED_DIRS` match only the first relative path
59/// component, so source paths like `src/package/build/mod.rs` remain indexable.
60/// Other exclude patterns match any component of the relative path.
61/// Root-generated directory names are literal component names; wildcard
62/// patterns such as `build*` do not get root-only special handling.
63pub fn should_exclude_path(root: &Path, path: &Path, patterns: &[String]) -> bool {
64    let rel = path.strip_prefix(root).unwrap_or(path);
65
66    for pattern in patterns {
67        if is_root_generated_dir(pattern) {
68            if rel
69                .components()
70                .next()
71                .map(|component| glob_match(pattern, &component.as_os_str().to_string_lossy()))
72                .unwrap_or(false)
73            {
74                return true;
75            }
76            continue;
77        }
78
79        for component in rel.components() {
80            let name = component.as_os_str().to_string_lossy();
81            if glob_match(pattern, &name) {
82                return true;
83            }
84        }
85    }
86
87    false
88}
89
90fn is_root_generated_dir(pattern: &str) -> bool {
91    ROOT_GENERATED_DIRS.contains(&pattern)
92}
93
94/// Check if file extension suggests secret content.
95pub fn has_secret_extension(path: &Path) -> bool {
96    let name = path
97        .file_name()
98        .map(|n| n.to_string_lossy().to_lowercase())
99        .unwrap_or_default();
100    let suffix = path
101        .extension()
102        .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
103        .unwrap_or_default();
104
105    if SECRET_EXTENSIONS.contains(&suffix.as_str()) {
106        return true;
107    }
108    for prefix in SECRET_PREFIXES {
109        if name.starts_with(prefix) {
110            return true;
111        }
112    }
113    for substring in SECRET_SUBSTRINGS {
114        if name.contains(substring) {
115            return true;
116        }
117    }
118    false
119}
120
121/// Simple glob matching supporting `*` and `?` wildcards.
122pub fn glob_match(pattern: &str, text: &str) -> bool {
123    let pc: Vec<char> = pattern.chars().collect();
124    let tc: Vec<char> = text.chars().collect();
125    glob_inner(&pc, &tc)
126}
127
128fn glob_inner(pattern: &[char], text: &[char]) -> bool {
129    if pattern.is_empty() {
130        return text.is_empty();
131    }
132    if pattern[0] == '*' {
133        for i in 0..=text.len() {
134            if glob_inner(&pattern[1..], &text[i..]) {
135                return true;
136            }
137        }
138        return false;
139    }
140    if text.is_empty() {
141        return false;
142    }
143    if pattern[0] == '?' || pattern[0] == text[0] {
144        return glob_inner(&pattern[1..], &text[1..]);
145    }
146    false
147}