Skip to main content

codebones_core/
api.rs

1use crate::cache::{CacheStore, SqliteCache, Symbol as CacheSymbol};
2use crate::indexer::{DefaultIndexer, Indexer, IndexerOptions};
3use crate::parser::{get_spec_for_extension, parse_file};
4use crate::plugin::{OutputFormat, Packer};
5use anyhow::Result;
6use std::collections::HashSet;
7use std::fs;
8use std::path::Path;
9
10/// Walks `dir`, hashes every eligible file, and upserts changed files and their symbols into the local SQLite cache.
11///
12/// Must be called before `get`, `outline`, or `search`; those functions read from the cache `index` populates.
13pub fn index(dir: &Path) -> Result<()> {
14    let db_path = dir.join("codebones.db");
15    let db_path_str = db_path
16        .to_str()
17        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
18    let cache = SqliteCache::new(db_path_str)?;
19    cache.init()?;
20
21    let indexer = DefaultIndexer;
22    let hashes = indexer.index(dir, &IndexerOptions::default())?;
23    let current_paths: HashSet<String> = hashes
24        .iter()
25        .map(|fh| fh.path.to_string_lossy().to_string())
26        .collect();
27
28    for cached_path in cache.list_file_paths()? {
29        if current_paths.contains(&cached_path) {
30            continue;
31        }
32
33        let full_path = dir.join(&cached_path);
34        match fs::symlink_metadata(&full_path) {
35            Ok(_) => {
36                // The file still exists on disk but was skipped by the indexer
37                // (for example due to a transient read/permission failure). Keep
38                // the last known cached content instead of treating it as deleted.
39            }
40            Err(error) if error.kind() == std::io::ErrorKind::NotFound => {
41                cache.delete_file(&cached_path)?;
42            }
43            Err(error) if error.kind() == std::io::ErrorKind::PermissionDenied => {
44                // Preserve the cached entry when the file still exists but is no
45                // longer readable.
46            }
47            Err(error) => return Err(error.into()),
48        }
49    }
50
51    for fh in hashes {
52        let path_str = fh.path.to_string_lossy().to_string();
53        let existing_hash = cache.get_file_hash(&path_str)?;
54
55        if existing_hash.as_deref() != Some(fh.hash.as_str()) {
56            let full_path = dir.join(&fh.path);
57            let content = match fs::read(&full_path) {
58                Ok(content) => content,
59                Err(e) => {
60                    eprintln!("Warning: could not read {}: {}", full_path.display(), e);
61                    continue;
62                }
63            };
64
65            // Delete old file to trigger cascade delete of symbols.
66            // Ignoring the error here is intentional: if the file does not yet exist in
67            // the cache this is a no-op, which is the desired idempotent behaviour.
68            let _ = cache.delete_file(&path_str);
69
70            let file_id = cache.upsert_file(&path_str, &fh.hash, &content)?;
71
72            let ext = fh.path.extension().unwrap_or_default().to_string_lossy();
73            if let Some(spec) = get_spec_for_extension(&ext) {
74                if let Ok(source) = String::from_utf8(content) {
75                    let doc = parse_file(&source, &spec);
76                    for sym in doc.symbols {
77                        let kind_str = match sym.kind {
78                            crate::parser::SymbolKind::Function => "Function",
79                            crate::parser::SymbolKind::Method => "Method",
80                            crate::parser::SymbolKind::Class => "Class",
81                            crate::parser::SymbolKind::Struct => "Struct",
82                            crate::parser::SymbolKind::Impl => "Impl",
83                            crate::parser::SymbolKind::Interface => "Interface",
84                        }
85                        .to_string();
86
87                        let cache_sym = CacheSymbol {
88                            id: format!("{}::{}", path_str, sym.qualified_name),
89                            file_id,
90                            name: sym.qualified_name.clone(),
91                            kind: kind_str,
92                            byte_offset: sym.full_range.start,
93                            byte_length: sym.full_range.end - sym.full_range.start,
94                        };
95                        cache.insert_symbol(&cache_sym)?;
96                    }
97                }
98            }
99        }
100    }
101
102    Ok(())
103}
104
105/// Retrieves the raw source content of a symbol (using `::` notation) or a file path from the cache.
106///
107/// Returns an error if the symbol or path is not found; run `index` first to populate the cache.
108///
109/// # Security
110///
111/// Path lookup is performed against the SQLite cache only — no filesystem reads occur.
112/// `codebones.db` is a trust boundary: callers must ensure the database file has
113/// appropriate filesystem permissions and has not been tampered with.
114pub fn get(dir: &Path, symbol_or_path: &str) -> Result<String> {
115    let db_path = dir.join("codebones.db");
116    let db_path_str = db_path
117        .to_str()
118        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
119    let cache = SqliteCache::new(db_path_str)?;
120    cache.init()?;
121
122    // It's a symbol if it contains ::
123    if symbol_or_path.contains("::") {
124        if let Some(content) = cache.get_symbol_content(symbol_or_path)? {
125            return Ok(String::from_utf8_lossy(&content).to_string());
126        }
127    } else {
128        // Assume file path
129        if let Some(content) = cache.get_file_content(symbol_or_path)? {
130            return Ok(String::from_utf8_lossy(&content).to_string());
131        }
132    }
133
134    anyhow::bail!("Symbol or path not found: {}", symbol_or_path)
135}
136
137/// Returns a skeleton view of a source file by eliding function and class bodies with `...`.
138///
139/// Falls back to the full raw source if the file's language is not supported by the parser.
140///
141/// # Security
142///
143/// Path lookup is performed against the SQLite cache only — no filesystem reads occur.
144/// `codebones.db` is a trust boundary: callers must ensure the database file has
145/// appropriate filesystem permissions and has not been tampered with.
146pub fn outline(dir: &Path, path: &str) -> Result<String> {
147    let db_path = dir.join("codebones.db");
148    let db_path_str = db_path
149        .to_str()
150        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
151    let cache = SqliteCache::new(db_path_str)?;
152    cache.init()?;
153
154    if let Some(content) = cache.get_file_content(path)? {
155        let source = String::from_utf8_lossy(&content).to_string();
156
157        let ext = Path::new(path)
158            .extension()
159            .unwrap_or_default()
160            .to_string_lossy();
161        if let Some(spec) = get_spec_for_extension(&ext) {
162            let doc = parse_file(&source, &spec);
163
164            // elide document
165            let mut result = String::new();
166            let mut last_end = 0;
167
168            let mut indices: Vec<usize> = (0..doc.symbols.len()).collect();
169            indices.sort_by_key(|&i| doc.symbols[i].full_range.start);
170
171            for i in &indices {
172                let sym = &doc.symbols[*i];
173                if let Some(body_range) = &sym.body_range {
174                    if body_range.start >= last_end {
175                        result.push_str(&source[last_end..body_range.start]);
176                        result.push_str("...");
177                        last_end = body_range.end;
178                    }
179                }
180            }
181            result.push_str(&source[last_end..]);
182            return Ok(result);
183        }
184
185        return Ok(source);
186    }
187
188    anyhow::bail!("Path not found: {}", path)
189}
190
191/// Searches the cache for symbol IDs whose name contains `query` (substring match).
192///
193/// Returns a list of fully-qualified symbol ID strings; an empty vec means no matches.
194pub fn search(dir: &Path, query: &str) -> Result<Vec<String>> {
195    let db_path = dir.join("codebones.db");
196    let db_path_str = db_path
197        .to_str()
198        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
199    let cache = SqliteCache::new(db_path_str)?;
200    cache.init()?;
201
202    let escaped = query
203        .replace('\\', "\\\\")
204        .replace('%', "\\%")
205        .replace('_', "\\_");
206    let like_query = format!("%{}%", escaped);
207    cache.search_symbol_ids(&like_query).map_err(Into::into)
208}
209
210/// Options that control how `pack` filters and transforms files before bundling them.
211///
212/// Set boolean flags to strip comments, empty lines, or long base64 blobs; use `include`/`ignore` glob lists to narrow the file set.
213pub struct PackOptions {
214    pub no_file_summary: bool,
215    pub no_files: bool,
216    pub remove_comments: bool,
217    pub remove_empty_lines: bool,
218    pub truncate_base64: bool,
219    pub include: Option<Vec<String>>,
220    pub ignore: Option<Vec<String>>,
221}
222
223/// Bundles all indexed files in `dir` into a single AI-friendly document in Markdown or XML format.
224///
225/// Automatically re-indexes `dir` before packing; pass `max_tokens` to enable token-budget degradation that drops file bodies when the limit is exceeded.
226pub fn pack(
227    dir: &Path,
228    format_str: &str,
229    max_tokens: Option<usize>,
230    options: PackOptions,
231) -> Result<String> {
232    // If the provided dir is actually a file, use its parent directory for the database
233    let base_dir = if dir.is_file() {
234        let parent = dir.parent().unwrap_or(Path::new("."));
235        if parent.as_os_str().is_empty() {
236            Path::new(".")
237        } else {
238            parent
239        }
240    } else {
241        dir
242    };
243
244    // Ensure the cache is up to date before packing
245    index(base_dir)?;
246
247    let db_path = base_dir.join("codebones.db");
248    let db_path_str = db_path
249        .to_str()
250        .ok_or_else(|| anyhow::anyhow!("Database path contains invalid UTF-8: {:?}", db_path))?;
251    let cache = SqliteCache::new(db_path_str)?;
252    cache.init()?;
253
254    let format = OutputFormat::parse(format_str)?;
255
256    // Get all files
257    let mut paths = Vec::new();
258    {
259        let file_paths = cache.list_file_paths()?;
260
261        let mut include_builder = globset::GlobSetBuilder::new();
262        let mut has_includes = false;
263        if let Some(includes) = &options.include {
264            for pattern in includes {
265                if let Ok(glob) = globset::Glob::new(pattern) {
266                    include_builder.add(glob);
267                    has_includes = true;
268                }
269            }
270        }
271        let include_set = include_builder.build().unwrap_or(globset::GlobSet::empty());
272
273        let mut ignore_builder = globset::GlobSetBuilder::new();
274        let mut has_ignores = false;
275        if let Some(ignores) = &options.ignore {
276            for pattern in ignores {
277                if let Ok(glob) = globset::Glob::new(pattern) {
278                    ignore_builder.add(glob);
279                    has_ignores = true;
280                }
281            }
282        }
283        let ignore_set = ignore_builder.build().unwrap_or(globset::GlobSet::empty());
284
285        // Security: canonicalize the base directory once before iterating files.
286        // If this fails (e.g. the directory does not exist), propagate the error
287        // rather than silently allowing all paths through the traversal guard.
288        let base_canonical = base_dir.canonicalize().map_err(|e| {
289            anyhow::anyhow!(
290                "Cannot resolve base directory '{}': {}",
291                base_dir.display(),
292                e
293            )
294        })?;
295
296        for path_str in file_paths {
297            if has_includes && !include_set.is_match(&path_str) {
298                continue;
299            }
300            if has_ignores && ignore_set.is_match(&path_str) {
301                continue;
302            }
303
304            let file_path = base_dir.join(&path_str);
305
306            // Security: verify the DB-stored path doesn't escape the base directory.
307            // If canonicalize fails (e.g. broken symlink), skip the file to avoid
308            // bypassing the traversal guard.
309            let canonical = match file_path.canonicalize() {
310                Ok(c) => c,
311                Err(_) => continue,
312            };
313            if !canonical.starts_with(&base_canonical) {
314                eprintln!("Warning: skipping path that escapes base dir: {}", path_str);
315                continue;
316            }
317
318            // If the user specified a file rather than a directory, only include that specific file
319            if dir.is_file() {
320                let dir_canon = dir.canonicalize().unwrap_or_else(|_| dir.to_path_buf());
321                let file_canon = file_path
322                    .canonicalize()
323                    .unwrap_or_else(|_| file_path.clone());
324                if file_canon != dir_canon {
325                    continue;
326                }
327            }
328
329            if file_path.exists() {
330                paths.push(file_path);
331            }
332        }
333    }
334
335    let packer = Packer::with_workspace_root(
336        cache,
337        crate::parser::Parser {},
338        base_dir.to_path_buf(),
339        format,
340        max_tokens,
341        options.no_file_summary,
342        options.no_files,
343        options.remove_comments,
344        options.remove_empty_lines,
345        options.truncate_base64,
346    );
347
348    packer.pack(&paths)
349}