Skip to main content

gobby_code/index/
security.rs

1//! Security checks for code indexing.
2//! Ports logic from src/gobby/code_index/security.py.
3
4use std::path::Path;
5
6const SECRET_EXTENSIONS: &[&str] = &[
7    ".env",
8    ".pem",
9    ".key",
10    ".p12",
11    ".pfx",
12    ".jks",
13    ".keystore",
14    ".secret",
15];
16
17const SECRET_PREFIXES: &[&str] = &["credentials", ".env", "id_rsa", "id_ed25519", "token"];
18
19const SECRET_SUBSTRINGS: &[&str] = &["api_key", "apikey", "_secret.", "_token."];
20
21/// Generated output directories that are excluded only when they are the
22/// first component under the indexed root.
23const ROOT_GENERATED_DIRS: &[&str] = &["build", "dist"];
24
25/// Check that `path` resolves within `root` (prevents directory traversal).
26pub fn validate_path(path: &Path, root: &Path) -> bool {
27    match (path.canonicalize(), root.canonicalize()) {
28        (Ok(resolved), Ok(root_resolved)) => resolved.starts_with(&root_resolved),
29        _ => false,
30    }
31}
32
33/// Check that a symlink target is still within root.
34pub fn is_symlink_safe(path: &Path, root: &Path) -> bool {
35    if !path.is_symlink() {
36        return true;
37    }
38    validate_path(path, root)
39}
40
41/// Check if file appears to be binary (has null bytes anywhere in the stream).
42pub fn is_binary(path: &Path) -> bool {
43    use std::io::Read;
44    let mut file = match std::fs::File::open(path) {
45        Ok(f) => f,
46        Err(_) => return true,
47    };
48    // Scan the whole stream, not just the first 8KB: NUL bytes can appear late
49    // (a clean prefix followed by binary garbage corrupts the index — gobby-cli
50    // #17356 / Gobby #17344). The read is bounded: `is_safe_text_file` already
51    // rejects files larger than MAX_FILE_SIZE before this runs, so the loop reads
52    // at most that cap. Do not narrow this back to a single read.
53    let mut buf = [0u8; 8192];
54    loop {
55        let n = match file.read(&mut buf) {
56            Ok(n) => n,
57            Err(_) => return true,
58        };
59        if n == 0 {
60            return false;
61        }
62        if buf[..n].contains(&0) {
63            return true;
64        }
65    }
66}
67
68/// Check if a path should be excluded.
69///
70/// Patterns listed in `ROOT_GENERATED_DIRS` match only the first relative path
71/// component, so source paths like `src/package/build/mod.rs` remain indexable.
72/// Other exclude patterns match any component of the relative path.
73/// Root-generated directory names are literal component names; wildcard
74/// patterns such as `build*` do not get root-only special handling.
75pub fn should_exclude_path(root: &Path, path: &Path, patterns: &[impl AsRef<str>]) -> bool {
76    let rel = path.strip_prefix(root).unwrap_or(path);
77
78    for pattern in patterns {
79        let pattern = pattern.as_ref();
80        if is_root_generated_dir(pattern) {
81            if rel
82                .components()
83                .next()
84                .map(|component| glob_match(pattern, &component.as_os_str().to_string_lossy()))
85                .unwrap_or(false)
86            {
87                return true;
88            }
89            continue;
90        }
91
92        for component in rel.components() {
93            let name = component.as_os_str().to_string_lossy();
94            if glob_match(pattern, &name) {
95                return true;
96            }
97        }
98    }
99
100    false
101}
102
103fn is_root_generated_dir(pattern: &str) -> bool {
104    ROOT_GENERATED_DIRS.contains(&pattern)
105}
106
107/// Check if file extension suggests secret content.
108pub fn has_secret_extension(path: &Path) -> bool {
109    let name = path
110        .file_name()
111        .map(|n| n.to_string_lossy().to_lowercase())
112        .unwrap_or_default();
113    let suffix = path
114        .extension()
115        .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
116        .unwrap_or_default();
117
118    if SECRET_EXTENSIONS.contains(&suffix.as_str()) {
119        return true;
120    }
121    for prefix in SECRET_PREFIXES {
122        if name.starts_with(prefix) {
123            return true;
124        }
125    }
126    for substring in SECRET_SUBSTRINGS {
127        if name.contains(substring) {
128            return true;
129        }
130    }
131    false
132}
133
134/// Simple glob matching supporting `*` and `?` wildcards.
135pub fn glob_match(pattern: &str, text: &str) -> bool {
136    let pc: Vec<char> = pattern.chars().collect();
137    let tc: Vec<char> = text.chars().collect();
138    glob_inner(&pc, &tc)
139}
140
141fn glob_inner(pattern: &[char], text: &[char]) -> bool {
142    if pattern.is_empty() {
143        return text.is_empty();
144    }
145    if pattern[0] == '*' {
146        for i in 0..=text.len() {
147            if glob_inner(&pattern[1..], &text[i..]) {
148                return true;
149            }
150        }
151        return false;
152    }
153    if text.is_empty() {
154        return false;
155    }
156    if pattern[0] == '?' || pattern[0] == text[0] {
157        return glob_inner(&pattern[1..], &text[1..]);
158    }
159    false
160}