Skip to main content

codebones_core/
api.rs

1use crate::cache::{CacheStore, SqliteCache, Symbol as CacheSymbol};
2use crate::indexer::{DefaultIndexer, Indexer, IndexerOptions};
3use crate::parser::{get_spec_for_extension, parse_file};
4use crate::plugin::{OutputFormat, Packer};
5use anyhow::Result;
6use std::collections::HashSet;
7use std::fs;
8use std::path::Path;
9
10/// Walks `dir`, hashes every eligible file, and upserts changed files and their symbols into the local SQLite cache.
11///
12/// Must be called before `get`, `outline`, or `search`; those functions read from the cache `index` populates.
13pub fn index(dir: &Path) -> Result<()> {
14    let db_path = dir.join("codebones.db");
15    let db_path_str = db_path
16        .to_str()
17        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
18    let cache = SqliteCache::new(db_path_str)?;
19    cache.init()?;
20
21    let indexer = DefaultIndexer;
22    let hashes = indexer.index(dir, &IndexerOptions::default())?;
23    let current_paths: HashSet<String> = hashes
24        .iter()
25        .map(|fh| fh.path.to_string_lossy().to_string())
26        .collect();
27
28    for cached_path in cache.list_file_paths()? {
29        if !current_paths.contains(&cached_path) {
30            cache.delete_file(&cached_path)?;
31        }
32    }
33
34    for fh in hashes {
35        let path_str = fh.path.to_string_lossy().to_string();
36        let existing_hash = cache.get_file_hash(&path_str)?;
37
38        if existing_hash.as_deref() != Some(fh.hash.as_str()) {
39            let full_path = dir.join(&fh.path);
40            let content = fs::read(&full_path).unwrap_or_else(|e| {
41                eprintln!("Warning: could not read {}: {}", full_path.display(), e);
42                vec![]
43            });
44
45            // Delete old file to trigger cascade delete of symbols.
46            // Ignoring the error here is intentional: if the file does not yet exist in
47            // the cache this is a no-op, which is the desired idempotent behaviour.
48            let _ = cache.delete_file(&path_str);
49
50            let file_id = cache.upsert_file(&path_str, &fh.hash, &content)?;
51
52            let ext = fh.path.extension().unwrap_or_default().to_string_lossy();
53            if let Some(spec) = get_spec_for_extension(&ext) {
54                if let Ok(source) = String::from_utf8(content) {
55                    let doc = parse_file(&source, &spec);
56                    for sym in doc.symbols {
57                        let kind_str = match sym.kind {
58                            crate::parser::SymbolKind::Function => "Function",
59                            crate::parser::SymbolKind::Method => "Method",
60                            crate::parser::SymbolKind::Class => "Class",
61                            crate::parser::SymbolKind::Struct => "Struct",
62                            crate::parser::SymbolKind::Impl => "Impl",
63                            crate::parser::SymbolKind::Interface => "Interface",
64                        }
65                        .to_string();
66
67                        let cache_sym = CacheSymbol {
68                            id: format!("{}::{}", path_str, sym.qualified_name),
69                            file_id,
70                            name: sym.qualified_name.clone(),
71                            kind: kind_str,
72                            byte_offset: sym.full_range.start,
73                            byte_length: sym.full_range.end - sym.full_range.start,
74                        };
75                        cache.insert_symbol(&cache_sym)?;
76                    }
77                }
78            }
79        }
80    }
81
82    Ok(())
83}
84
85/// Retrieves the raw source content of a symbol (using `::` notation) or a file path from the cache.
86///
87/// Returns an error if the symbol or path is not found; run `index` first to populate the cache.
88///
89/// # Security
90///
91/// Path lookup is performed against the SQLite cache only — no filesystem reads occur.
92/// `codebones.db` is a trust boundary: callers must ensure the database file has
93/// appropriate filesystem permissions and has not been tampered with.
94pub fn get(dir: &Path, symbol_or_path: &str) -> Result<String> {
95    let db_path = dir.join("codebones.db");
96    let db_path_str = db_path
97        .to_str()
98        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
99    let cache = SqliteCache::new(db_path_str)?;
100    cache.init()?;
101
102    // It's a symbol if it contains ::
103    if symbol_or_path.contains("::") {
104        if let Some(content) = cache.get_symbol_content(symbol_or_path)? {
105            return Ok(String::from_utf8_lossy(&content).to_string());
106        }
107    } else {
108        // Assume file path
109        if let Some(content) = cache.get_file_content(symbol_or_path)? {
110            return Ok(String::from_utf8_lossy(&content).to_string());
111        }
112    }
113
114    anyhow::bail!("Symbol or path not found: {}", symbol_or_path)
115}
116
117/// Returns a skeleton view of a source file by eliding function and class bodies with `...`.
118///
119/// Falls back to the full raw source if the file's language is not supported by the parser.
120///
121/// # Security
122///
123/// Path lookup is performed against the SQLite cache only — no filesystem reads occur.
124/// `codebones.db` is a trust boundary: callers must ensure the database file has
125/// appropriate filesystem permissions and has not been tampered with.
126pub fn outline(dir: &Path, path: &str) -> Result<String> {
127    let db_path = dir.join("codebones.db");
128    let db_path_str = db_path
129        .to_str()
130        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
131    let cache = SqliteCache::new(db_path_str)?;
132    cache.init()?;
133
134    if let Some(content) = cache.get_file_content(path)? {
135        let source = String::from_utf8_lossy(&content).to_string();
136
137        let ext = Path::new(path)
138            .extension()
139            .unwrap_or_default()
140            .to_string_lossy();
141        if let Some(spec) = get_spec_for_extension(&ext) {
142            let doc = parse_file(&source, &spec);
143
144            // elide document
145            let mut result = String::new();
146            let mut last_end = 0;
147
148            let mut indices: Vec<usize> = (0..doc.symbols.len()).collect();
149            indices.sort_by_key(|&i| doc.symbols[i].full_range.start);
150
151            for i in &indices {
152                let sym = &doc.symbols[*i];
153                if let Some(body_range) = &sym.body_range {
154                    if body_range.start >= last_end {
155                        result.push_str(&source[last_end..body_range.start]);
156                        result.push_str("...");
157                        last_end = body_range.end;
158                    }
159                }
160            }
161            result.push_str(&source[last_end..]);
162            return Ok(result);
163        }
164
165        return Ok(source);
166    }
167
168    anyhow::bail!("Path not found: {}", path)
169}
170
171/// Searches the cache for symbol IDs whose name contains `query` (substring match).
172///
173/// Returns a list of fully-qualified symbol ID strings; an empty vec means no matches.
174pub fn search(dir: &Path, query: &str) -> Result<Vec<String>> {
175    let db_path = dir.join("codebones.db");
176    let db_path_str = db_path
177        .to_str()
178        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
179    let cache = SqliteCache::new(db_path_str)?;
180    cache.init()?;
181
182    let escaped = query
183        .replace('\\', "\\\\")
184        .replace('%', "\\%")
185        .replace('_', "\\_");
186    let like_query = format!("%{}%", escaped);
187    cache.search_symbol_ids(&like_query).map_err(Into::into)
188}
189
190/// Options that control how `pack` filters and transforms files before bundling them.
191///
192/// Set boolean flags to strip comments, empty lines, or long base64 blobs; use `include`/`ignore` glob lists to narrow the file set.
193pub struct PackOptions {
194    pub no_file_summary: bool,
195    pub no_files: bool,
196    pub remove_comments: bool,
197    pub remove_empty_lines: bool,
198    pub truncate_base64: bool,
199    pub include: Option<Vec<String>>,
200    pub ignore: Option<Vec<String>>,
201}
202
203/// Bundles all indexed files in `dir` into a single AI-friendly document in Markdown or XML format.
204///
205/// Automatically re-indexes `dir` before packing; pass `max_tokens` to enable token-budget degradation that drops file bodies when the limit is exceeded.
206pub fn pack(
207    dir: &Path,
208    format_str: &str,
209    max_tokens: Option<usize>,
210    options: PackOptions,
211) -> Result<String> {
212    // If the provided dir is actually a file, use its parent directory for the database
213    let base_dir = if dir.is_file() {
214        let parent = dir.parent().unwrap_or(Path::new("."));
215        if parent.as_os_str().is_empty() {
216            Path::new(".")
217        } else {
218            parent
219        }
220    } else {
221        dir
222    };
223
224    // Ensure the cache is up to date before packing
225    index(base_dir)?;
226
227    let db_path = base_dir.join("codebones.db");
228    let db_path_str = db_path
229        .to_str()
230        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
231    let cache = SqliteCache::new(db_path_str)?;
232    cache.init()?;
233
234    let format = match format_str.to_lowercase().as_str() {
235        "xml" => OutputFormat::Xml,
236        _ => OutputFormat::Markdown,
237    };
238
239    // Get all files
240    let mut paths = Vec::new();
241    {
242        let file_paths = cache.list_file_paths()?;
243
244        let mut include_builder = globset::GlobSetBuilder::new();
245        let mut has_includes = false;
246        if let Some(includes) = &options.include {
247            for pattern in includes {
248                if let Ok(glob) = globset::Glob::new(pattern) {
249                    include_builder.add(glob);
250                    has_includes = true;
251                }
252            }
253        }
254        let include_set = include_builder.build().unwrap_or(globset::GlobSet::empty());
255
256        let mut ignore_builder = globset::GlobSetBuilder::new();
257        let mut has_ignores = false;
258        if let Some(ignores) = &options.ignore {
259            for pattern in ignores {
260                if let Ok(glob) = globset::Glob::new(pattern) {
261                    ignore_builder.add(glob);
262                    has_ignores = true;
263                }
264            }
265        }
266        let ignore_set = ignore_builder.build().unwrap_or(globset::GlobSet::empty());
267
268        // Security: canonicalize the base directory once before iterating files.
269        // If this fails (e.g. the directory does not exist), propagate the error
270        // rather than silently allowing all paths through the traversal guard.
271        let base_canonical = base_dir.canonicalize().map_err(|e| {
272            anyhow::anyhow!(
273                "Cannot resolve base directory '{}': {}",
274                base_dir.display(),
275                e
276            )
277        })?;
278
279        for path_str in file_paths {
280            if has_includes && !include_set.is_match(&path_str) {
281                continue;
282            }
283            if has_ignores && ignore_set.is_match(&path_str) {
284                continue;
285            }
286
287            let file_path = base_dir.join(&path_str);
288
289            // Security: verify the DB-stored path doesn't escape the base directory.
290            // If canonicalize fails (e.g. broken symlink), skip the file to avoid
291            // bypassing the traversal guard.
292            let canonical = match file_path.canonicalize() {
293                Ok(c) => c,
294                Err(_) => continue,
295            };
296            if !canonical.starts_with(&base_canonical) {
297                eprintln!("Warning: skipping path that escapes base dir: {}", path_str);
298                continue;
299            }
300
301            // If the user specified a file rather than a directory, only include that specific file
302            if dir.is_file() {
303                let dir_canon = dir.canonicalize().unwrap_or_else(|_| dir.to_path_buf());
304                let file_canon = file_path
305                    .canonicalize()
306                    .unwrap_or_else(|_| file_path.clone());
307                if file_canon != dir_canon {
308                    continue;
309                }
310            }
311
312            if file_path.exists() {
313                paths.push(file_path);
314            }
315        }
316    }
317
318    let packer = Packer::new(
319        cache,
320        crate::parser::Parser {},
321        format,
322        max_tokens,
323        options.no_file_summary,
324        options.no_files,
325        options.remove_comments,
326        options.remove_empty_lines,
327        options.truncate_base64,
328    );
329
330    packer.pack(&paths)
331}