Skip to main content

gobby_code/index/
security.rs

1//! Security checks for code indexing.
2//! Ports logic from src/gobby/code_index/security.py.
3
4use std::path::Path;
5
6const SECRET_EXTENSIONS: &[&str] = &[
7    ".env",
8    ".pem",
9    ".key",
10    ".p12",
11    ".pfx",
12    ".jks",
13    ".keystore",
14    ".secret",
15];
16
17const SECRET_PREFIXES: &[&str] = &["credentials", ".env", "id_rsa", "id_ed25519", "token"];
18
19const SECRET_SUBSTRINGS: &[&str] = &["api_key", "apikey", "_secret.", "_token."];
20
21/// Generated output directories that are excluded only when they are the
22/// first component under the indexed root.
23const ROOT_GENERATED_DIRS: &[&str] = &["build", "dist"];
24
25/// Check that `path` resolves within `root` (prevents directory traversal).
26pub fn validate_path(path: &Path, root: &Path) -> bool {
27    match (path.canonicalize(), root.canonicalize()) {
28        (Ok(resolved), Ok(root_resolved)) => resolved.starts_with(&root_resolved),
29        _ => false,
30    }
31}
32
33/// Check that a symlink target is still within root.
34pub fn is_symlink_safe(path: &Path, root: &Path) -> bool {
35    if !path.is_symlink() {
36        return true;
37    }
38    validate_path(path, root)
39}
40
41/// Check if file appears to be binary (has null bytes in first 8KB).
42pub fn is_binary(path: &Path) -> bool {
43    use std::io::Read;
44    let mut file = match std::fs::File::open(path) {
45        Ok(f) => f,
46        Err(_) => return true,
47    };
48    let mut buf = [0u8; 8192];
49    let n = match file.read(&mut buf) {
50        Ok(n) => n,
51        Err(_) => return true,
52    };
53    buf[..n].contains(&0)
54}
55
56/// Check if a path should be excluded.
57///
58/// Patterns listed in `ROOT_GENERATED_DIRS` match only the first relative path
59/// component, so source paths like `src/package/build/mod.rs` remain indexable.
60/// Other exclude patterns match any component of the relative path.
61pub fn should_exclude_path(root: &Path, path: &Path, patterns: &[String]) -> bool {
62    let rel = path.strip_prefix(root).unwrap_or(path);
63
64    for pattern in patterns {
65        if is_root_generated_dir(pattern) {
66            if rel
67                .components()
68                .next()
69                .map(|component| glob_match(pattern, &component.as_os_str().to_string_lossy()))
70                .unwrap_or(false)
71            {
72                return true;
73            }
74            continue;
75        }
76
77        for component in rel.components() {
78            let name = component.as_os_str().to_string_lossy();
79            if glob_match(pattern, &name) {
80                return true;
81            }
82        }
83    }
84
85    false
86}
87
88fn is_root_generated_dir(pattern: &str) -> bool {
89    ROOT_GENERATED_DIRS.contains(&pattern)
90}
91
92/// Check if file extension suggests secret content.
93pub fn has_secret_extension(path: &Path) -> bool {
94    let name = path
95        .file_name()
96        .map(|n| n.to_string_lossy().to_lowercase())
97        .unwrap_or_default();
98    let suffix = path
99        .extension()
100        .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
101        .unwrap_or_default();
102
103    if SECRET_EXTENSIONS.contains(&suffix.as_str()) {
104        return true;
105    }
106    for prefix in SECRET_PREFIXES {
107        if name.starts_with(prefix) {
108            return true;
109        }
110    }
111    for substring in SECRET_SUBSTRINGS {
112        if name.contains(substring) {
113            return true;
114        }
115    }
116    false
117}
118
119/// Simple glob matching supporting `*` and `?` wildcards.
120pub fn glob_match(pattern: &str, text: &str) -> bool {
121    let pc: Vec<char> = pattern.chars().collect();
122    let tc: Vec<char> = text.chars().collect();
123    glob_inner(&pc, &tc)
124}
125
126fn glob_inner(pattern: &[char], text: &[char]) -> bool {
127    if pattern.is_empty() {
128        return text.is_empty();
129    }
130    if pattern[0] == '*' {
131        for i in 0..=text.len() {
132            if glob_inner(&pattern[1..], &text[i..]) {
133                return true;
134            }
135        }
136        return false;
137    }
138    if text.is_empty() {
139        return false;
140    }
141    if pattern[0] == '?' || pattern[0] == text[0] {
142        return glob_inner(&pattern[1..], &text[1..]);
143    }
144    false
145}