Skip to main content

codebones_core/
api.rs

1use crate::cache::{CacheStore, SqliteCache, Symbol as CacheSymbol};
2use crate::indexer::{DefaultIndexer, Indexer, IndexerOptions};
3use crate::parser::{get_spec_for_extension, parse_file};
4use crate::plugin::{OutputFormat, Packer};
5use anyhow::Result;
6use std::fs;
7use std::path::Path;
8
9/// Walks `dir`, hashes every eligible file, and upserts changed files and their symbols into the local SQLite cache.
10///
11/// Must be called before `get`, `outline`, or `search`; those functions read from the cache `index` populates.
12pub fn index(dir: &Path) -> Result<()> {
13    let db_path = dir.join("codebones.db");
14    let db_path_str = db_path
15        .to_str()
16        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
17    let cache = SqliteCache::new(db_path_str)?;
18    cache.init()?;
19
20    let indexer = DefaultIndexer;
21    let hashes = indexer.index(dir, &IndexerOptions::default())?;
22
23    for fh in hashes {
24        let path_str = fh.path.to_string_lossy().to_string();
25        let existing_hash = cache.get_file_hash(&path_str)?;
26
27        if existing_hash.as_deref() != Some(fh.hash.as_str()) {
28            let full_path = dir.join(&fh.path);
29            let content = fs::read(&full_path).unwrap_or_else(|e| {
30                eprintln!("Warning: could not read {}: {}", full_path.display(), e);
31                vec![]
32            });
33
34            // Delete old file to trigger cascade delete of symbols.
35            // Ignoring the error here is intentional: if the file does not yet exist in
36            // the cache this is a no-op, which is the desired idempotent behaviour.
37            let _ = cache.delete_file(&path_str);
38
39            let file_id = cache.upsert_file(&path_str, &fh.hash, &content)?;
40
41            let ext = fh.path.extension().unwrap_or_default().to_string_lossy();
42            if let Some(spec) = get_spec_for_extension(&ext) {
43                if let Ok(source) = String::from_utf8(content) {
44                    let doc = parse_file(&source, &spec);
45                    for sym in doc.symbols {
46                        let kind_str = match sym.kind {
47                            crate::parser::SymbolKind::Function => "Function",
48                            crate::parser::SymbolKind::Method => "Method",
49                            crate::parser::SymbolKind::Class => "Class",
50                            crate::parser::SymbolKind::Struct => "Struct",
51                            crate::parser::SymbolKind::Impl => "Impl",
52                            crate::parser::SymbolKind::Interface => "Interface",
53                        }
54                        .to_string();
55
56                        let cache_sym = CacheSymbol {
57                            id: format!("{}::{}", path_str, sym.qualified_name),
58                            file_id,
59                            name: sym.qualified_name.clone(),
60                            kind: kind_str,
61                            byte_offset: sym.full_range.start,
62                            byte_length: sym.full_range.end - sym.full_range.start,
63                        };
64                        cache.insert_symbol(&cache_sym)?;
65                    }
66                }
67            }
68        }
69    }
70
71    Ok(())
72}
73
74/// Retrieves the raw source content of a symbol (using `::` notation) or a file path from the cache.
75///
76/// Returns an error if the symbol or path is not found; run `index` first to populate the cache.
77///
78/// # Security
79///
80/// Path lookup is performed against the SQLite cache only — no filesystem reads occur.
81/// `codebones.db` is a trust boundary: callers must ensure the database file has
82/// appropriate filesystem permissions and has not been tampered with.
83pub fn get(dir: &Path, symbol_or_path: &str) -> Result<String> {
84    let db_path = dir.join("codebones.db");
85    let db_path_str = db_path
86        .to_str()
87        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
88    let cache = SqliteCache::new(db_path_str)?;
89    cache.init()?;
90
91    // It's a symbol if it contains ::
92    if symbol_or_path.contains("::") {
93        if let Some(content) = cache.get_symbol_content(symbol_or_path)? {
94            return Ok(String::from_utf8_lossy(&content).to_string());
95        }
96    } else {
97        // Assume file path
98        if let Some(content) = cache.get_file_content(symbol_or_path)? {
99            return Ok(String::from_utf8_lossy(&content).to_string());
100        }
101    }
102
103    anyhow::bail!("Symbol or path not found: {}", symbol_or_path)
104}
105
106/// Returns a skeleton view of a source file by eliding function and class bodies with `...`.
107///
108/// Falls back to the full raw source if the file's language is not supported by the parser.
109///
110/// # Security
111///
112/// Path lookup is performed against the SQLite cache only — no filesystem reads occur.
113/// `codebones.db` is a trust boundary: callers must ensure the database file has
114/// appropriate filesystem permissions and has not been tampered with.
115pub fn outline(dir: &Path, path: &str) -> Result<String> {
116    let db_path = dir.join("codebones.db");
117    let db_path_str = db_path
118        .to_str()
119        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
120    let cache = SqliteCache::new(db_path_str)?;
121    cache.init()?;
122
123    if let Some(content) = cache.get_file_content(path)? {
124        let source = String::from_utf8_lossy(&content).to_string();
125
126        let ext = Path::new(path)
127            .extension()
128            .unwrap_or_default()
129            .to_string_lossy();
130        if let Some(spec) = get_spec_for_extension(&ext) {
131            let doc = parse_file(&source, &spec);
132
133            // elide document
134            let mut result = String::new();
135            let mut last_end = 0;
136
137            let mut indices: Vec<usize> = (0..doc.symbols.len()).collect();
138            indices.sort_by_key(|&i| doc.symbols[i].full_range.start);
139
140            for i in &indices {
141                let sym = &doc.symbols[*i];
142                if let Some(body_range) = &sym.body_range {
143                    if body_range.start >= last_end {
144                        result.push_str(&source[last_end..body_range.start]);
145                        result.push_str("...");
146                        last_end = body_range.end;
147                    }
148                }
149            }
150            result.push_str(&source[last_end..]);
151            return Ok(result);
152        }
153
154        return Ok(source);
155    }
156
157    anyhow::bail!("Path not found: {}", path)
158}
159
160/// Searches the cache for symbol IDs whose name contains `query` (substring match).
161///
162/// Returns a list of fully-qualified symbol ID strings; an empty vec means no matches.
163pub fn search(dir: &Path, query: &str) -> Result<Vec<String>> {
164    let db_path = dir.join("codebones.db");
165    let db_path_str = db_path
166        .to_str()
167        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
168    let cache = SqliteCache::new(db_path_str)?;
169    cache.init()?;
170
171    let escaped = query
172        .replace('\\', "\\\\")
173        .replace('%', "\\%")
174        .replace('_', "\\_");
175    let like_query = format!("%{}%", escaped);
176    cache.search_symbol_ids(&like_query).map_err(Into::into)
177}
178
179/// Options that control how `pack` filters and transforms files before bundling them.
180///
181/// Set boolean flags to strip comments, empty lines, or long base64 blobs; use `include`/`ignore` glob lists to narrow the file set.
182pub struct PackOptions {
183    pub no_file_summary: bool,
184    pub no_files: bool,
185    pub remove_comments: bool,
186    pub remove_empty_lines: bool,
187    pub truncate_base64: bool,
188    pub include: Option<Vec<String>>,
189    pub ignore: Option<Vec<String>>,
190}
191
192/// Bundles all indexed files in `dir` into a single AI-friendly document in Markdown or XML format.
193///
194/// Automatically re-indexes `dir` before packing; pass `max_tokens` to enable token-budget degradation that drops file bodies when the limit is exceeded.
195pub fn pack(
196    dir: &Path,
197    format_str: &str,
198    max_tokens: Option<usize>,
199    options: PackOptions,
200) -> Result<String> {
201    // If the provided dir is actually a file, use its parent directory for the database
202    let base_dir = if dir.is_file() {
203        let parent = dir.parent().unwrap_or(Path::new("."));
204        if parent.as_os_str().is_empty() {
205            Path::new(".")
206        } else {
207            parent
208        }
209    } else {
210        dir
211    };
212
213    // Ensure the cache is up to date before packing
214    index(base_dir)?;
215
216    let db_path = base_dir.join("codebones.db");
217    let db_path_str = db_path
218        .to_str()
219        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
220    let cache = SqliteCache::new(db_path_str)?;
221    cache.init()?;
222
223    let format = match format_str.to_lowercase().as_str() {
224        "xml" => OutputFormat::Xml,
225        _ => OutputFormat::Markdown,
226    };
227
228    // Get all files
229    let mut paths = Vec::new();
230    {
231        let file_paths = cache.list_file_paths()?;
232
233        let mut include_builder = globset::GlobSetBuilder::new();
234        let mut has_includes = false;
235        if let Some(includes) = &options.include {
236            for pattern in includes {
237                if let Ok(glob) = globset::Glob::new(pattern) {
238                    include_builder.add(glob);
239                    has_includes = true;
240                }
241            }
242        }
243        let include_set = include_builder.build().unwrap_or(globset::GlobSet::empty());
244
245        let mut ignore_builder = globset::GlobSetBuilder::new();
246        let mut has_ignores = false;
247        if let Some(ignores) = &options.ignore {
248            for pattern in ignores {
249                if let Ok(glob) = globset::Glob::new(pattern) {
250                    ignore_builder.add(glob);
251                    has_ignores = true;
252                }
253            }
254        }
255        let ignore_set = ignore_builder.build().unwrap_or(globset::GlobSet::empty());
256
257        // Security: canonicalize the base directory once before iterating files.
258        // If this fails (e.g. the directory does not exist), propagate the error
259        // rather than silently allowing all paths through the traversal guard.
260        let base_canonical = base_dir.canonicalize().map_err(|e| {
261            anyhow::anyhow!(
262                "Cannot resolve base directory '{}': {}",
263                base_dir.display(),
264                e
265            )
266        })?;
267
268        for path_str in file_paths {
269            if has_includes && !include_set.is_match(&path_str) {
270                continue;
271            }
272            if has_ignores && ignore_set.is_match(&path_str) {
273                continue;
274            }
275
276            let file_path = base_dir.join(&path_str);
277
278            // Security: verify the DB-stored path doesn't escape the base directory.
279            // If canonicalize fails (e.g. broken symlink), skip the file to avoid
280            // bypassing the traversal guard.
281            let canonical = match file_path.canonicalize() {
282                Ok(c) => c,
283                Err(_) => continue,
284            };
285            if !canonical.starts_with(&base_canonical) {
286                eprintln!("Warning: skipping path that escapes base dir: {}", path_str);
287                continue;
288            }
289
290            // If the user specified a file rather than a directory, only include that specific file
291            if dir.is_file() {
292                let dir_canon = dir.canonicalize().unwrap_or_else(|_| dir.to_path_buf());
293                let file_canon = file_path
294                    .canonicalize()
295                    .unwrap_or_else(|_| file_path.clone());
296                if file_canon != dir_canon {
297                    continue;
298                }
299            }
300
301            if file_path.exists() {
302                paths.push(file_path);
303            }
304        }
305    }
306
307    let packer = Packer::new(
308        cache,
309        crate::parser::Parser {},
310        format,
311        max_tokens,
312        options.no_file_summary,
313        options.no_files,
314        options.remove_comments,
315        options.remove_empty_lines,
316        options.truncate_base64,
317    );
318
319    packer.pack(&paths)
320}