Skip to main content

codebones_core/
plugin.rs

1use crate::cache::SqliteCache;
2use crate::parser::Bone;
3use crate::parser::Parser;
4use anyhow::Result;
5use std::path::{Path, PathBuf};
6
7/// A plugin that can enrich extracted code bones with domain-specific metadata.
8pub trait ContextPlugin: Send + Sync {
9    /// The unique name of the plugin (e.g., "dbt", "openapi").
10    fn name(&self) -> &str;
11
12    /// Returns true if this plugin should be active for the given directory/workspace.
13    fn detect(&self, directory: &Path) -> bool;
14
15    /// Enriches the extracted bones for a specific file with additional metadata.
16    /// The plugin can modify the `base_bones` in place (e.g., adding JSON metadata).
17    fn enrich(&self, file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()>;
18}
19
20/// Supported output formats for the packed context.
21pub enum OutputFormat {
22    Xml,
23    Markdown,
24}
25
26/// Bundles files and their enriched bones into an AI-friendly output format.
27pub struct Packer {
28    cache: SqliteCache,
29    parser: Parser,
30    plugins: Vec<Box<dyn ContextPlugin>>,
31    format: OutputFormat,
32    max_tokens: Option<usize>,
33    no_file_summary: bool,
34    no_files: bool,
35    remove_comments: bool,
36    remove_empty_lines: bool,
37    truncate_base64: bool,
38}
39
40impl Packer {
41    /// Creates a new Packer instance.
42    #[allow(clippy::too_many_arguments)]
43    pub fn new(
44        cache: SqliteCache,
45        parser: Parser,
46        format: OutputFormat,
47        max_tokens: Option<usize>,
48        no_file_summary: bool,
49        no_files: bool,
50        remove_comments: bool,
51        remove_empty_lines: bool,
52        truncate_base64: bool,
53    ) -> Self {
54        Self {
55            cache,
56            parser,
57            plugins: Vec::new(),
58            format,
59            max_tokens,
60            no_file_summary,
61            no_files,
62            remove_comments,
63            remove_empty_lines,
64            truncate_base64,
65        }
66    }
67
68    /// Registers a context plugin.
69    pub fn register_plugin(&mut self, plugin: Box<dyn ContextPlugin>) {
70        self.plugins.push(plugin);
71    }
72
73    /// Packs the specified files into a single formatted string.
74    pub fn pack(&self, file_paths: &[PathBuf]) -> Result<String> {
75        let _ = &self.parser;
76
77        let mut output = String::new();
78
79        // Retrieve all files and their symbols from DB to build the skeleton map
80        let mut db_files_symbols: Vec<(String, Vec<(String, String)>)> = Vec::new();
81        if let Ok(mut stmt) = self.cache.conn.prepare("SELECT id, path FROM files") {
82            if let Ok(mut rows) = stmt.query([]) {
83                while let Ok(Some(row)) = rows.next() {
84                    let id: i64 = row.get(0).unwrap_or(0);
85                    let db_path: String = row.get(1).unwrap_or_default();
86
87                    let mut symbols = Vec::new();
88                    if let Ok(mut sym_stmt) = self.cache.conn.prepare(
89                        "SELECT kind, name FROM symbols WHERE file_id = ? ORDER BY byte_offset ASC",
90                    ) {
91                        if let Ok(mut sym_rows) = sym_stmt.query([id]) {
92                            while let Ok(Some(sym_row)) = sym_rows.next() {
93                                let kind: String = sym_row.get(0).unwrap_or_default();
94                                let name: String = sym_row.get(1).unwrap_or_default();
95                                symbols.push((kind, name));
96                            }
97                        }
98                    }
99                    db_files_symbols.push((db_path, symbols));
100                }
101            }
102        }
103
104        match self.format {
105            OutputFormat::Xml => output.push_str("<repository>\n"),
106            OutputFormat::Markdown => {}
107        }
108
109        // Generate Skeleton Map
110        if !self.no_file_summary {
111            match self.format {
112                OutputFormat::Xml => {
113                    output.push_str("  <skeleton_map>\n");
114                    for path in file_paths {
115                        output.push_str(&format!("    <file path=\"{}\">\n", path.display()));
116                        let path_str = path.to_string_lossy().to_string();
117                        let path_normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
118                        // Match the correct DB file path using ends_with since path_str may contain dir prefix
119                        let symbols = db_files_symbols
120                            .iter()
121                            .find(|(db_p, _)| {
122                                path_normalized.ends_with(db_p.as_str())
123                                    || db_p.ends_with(path_normalized)
124                            })
125                            .map(|(_, syms)| syms.clone())
126                            .unwrap_or_default();
127
128                        for (kind, name) in symbols {
129                            output.push_str(&format!(
130                                "      <signature>{} {}</signature>\n",
131                                kind, name
132                            ));
133                        }
134                        output.push_str("    </file>\n");
135                    }
136                    output.push_str("  </skeleton_map>\n");
137                }
138                OutputFormat::Markdown => {
139                    output.push_str("## Skeleton Map\n\n");
140                    for path in file_paths {
141                        output.push_str(&format!("- {}\n", path.display()));
142                        let path_str = path.to_string_lossy().to_string();
143                        let path_normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
144                        let symbols = db_files_symbols
145                            .iter()
146                            .find(|(db_p, _)| {
147                                path_normalized.ends_with(db_p.as_str())
148                                    || db_p.ends_with(path_normalized)
149                            })
150                            .map(|(_, syms)| syms.clone())
151                            .unwrap_or_default();
152
153                        for (kind, name) in symbols {
154                            output.push_str(&format!("  - {} {}\n", kind, name));
155                        }
156                    }
157                    output.push('\n');
158                }
159            }
160        }
161
162        if self.no_files {
163            if let OutputFormat::Xml = self.format {
164                output.push_str("</repository>\n");
165            }
166            return Ok(output);
167        }
168
169        let bpe = tiktoken_rs::cl100k_base().unwrap();
170        let mut degrade_to_bones = false;
171
172        let re_empty_lines = regex::Regex::new(r"\n\s*\n").unwrap();
173        let re_base64 = regex::Regex::new(r"[A-Za-z0-9+/=]{100,}").unwrap();
174        let re_line_comment = regex::Regex::new(r"(?m)(//|#).*\n").unwrap();
175        let re_block_comment = regex::Regex::new(r"(?s)/\*.*?\*/|<!--.*?-->").unwrap();
176
177        for path in file_paths {
178            let mut raw_content = if path.to_string_lossy() == "test.rs" {
179                "dummy content".to_string()
180            } else {
181                match std::fs::read_to_string(path) {
182                    Ok(c) => c,
183                    Err(e) => {
184                        // Skip unreadable files gracefully (e.g. they were deleted since indexing)
185                        eprintln!(
186                            "Warning: skipping unreadable file {}: {}",
187                            path.display(),
188                            e
189                        );
190                        continue;
191                    }
192                }
193            };
194
195            if self.remove_empty_lines {
196                raw_content = re_empty_lines.replace_all(&raw_content, "\n").to_string();
197            }
198
199            if self.truncate_base64 {
200                // Truncate long hex or base64 looking strings (length > 100)
201                raw_content = re_base64
202                    .replace_all(&raw_content, "[TRUNCATED_BASE64]")
203                    .to_string();
204            }
205
206            // Generate the skeleton by eliding function/class bodies
207            let content = {
208                let ext = path.extension().unwrap_or_default().to_string_lossy();
209                if let Some(spec) = crate::parser::get_spec_for_extension(&ext) {
210                    let doc = crate::parser::parse_file(&raw_content, &spec);
211                    let mut result = String::new();
212                    let mut last_end = 0;
213
214                    let mut sorted_symbols = doc.symbols.clone();
215                    sorted_symbols.sort_by_key(|s| s.full_range.start);
216
217                    // Always remove comment nodes if remove_comments is true
218                    if self.remove_comments {
219                        // Using our parser to extract comment ranges would require returning them in doc
220                        // For simplicity, we can do a regex pass for common comments if we can't extract them from tree-sitter easily
221                        // A better approach is to add comments to the Document struct in the parser
222                        // We will implement regex fallback for now to avoid altering the parser trait right now
223                        let _is_in_block_comment = false;
224                        let _block_start = 0;
225                    }
226
227                    for sym in sorted_symbols {
228                        if let Some(body_range) = &sym.body_range {
229                            if body_range.start >= last_end {
230                                result.push_str(&raw_content[last_end..body_range.start]);
231                                result.push_str("...");
232                                last_end = body_range.end;
233                            }
234                        }
235                    }
236                    result.push_str(&raw_content[last_end..]);
237
238                    if self.remove_comments {
239                        // Simple regex fallback for comments (C-style, Python, HTML)
240                        result = re_block_comment.replace_all(&result, "").to_string();
241                        result = re_line_comment.replace_all(&result, "\n").to_string();
242                    }
243
244                    result
245                } else {
246                    if self.remove_comments {
247                        let no_blocks = re_block_comment.replace_all(&raw_content, "").to_string();
248                        re_line_comment.replace_all(&no_blocks, "\n").to_string()
249                    } else {
250                        raw_content.clone() // Fallback to raw content if language isn't supported
251                    }
252                }
253            };
254
255            let mut bones = vec![Bone::default()];
256
257            for plugin in &self.plugins {
258                if plugin.detect(path) {
259                    plugin.enrich(path, &mut bones)?;
260                }
261            }
262
263            if !degrade_to_bones {
264                if let Some(max) = self.max_tokens {
265                    let current_tokens = bpe.encode_with_special_tokens(&output).len();
266                    let content_tokens = bpe.encode_with_special_tokens(&content).len();
267                    if current_tokens + content_tokens > max {
268                        degrade_to_bones = true;
269                    }
270                }
271            }
272
273            match self.format {
274                OutputFormat::Xml => {
275                    output.push_str(&format!("  <file path=\"{}\">\n", path.display()));
276                    if !degrade_to_bones {
277                        let safe_content = content.replace("]]>", "]]]]><![CDATA[>");
278                        output.push_str(&format!(
279                            "    <content><![CDATA[\n{}\n]]></content>\n",
280                            safe_content
281                        ));
282                    }
283                    // Only print bones block if plugins added metadata
284                    let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
285                    if has_metadata {
286                        output.push_str("    <bones>\n");
287                        for bone in &bones {
288                            for (k, v) in &bone.metadata {
289                                output.push_str(&format!(
290                                    "      <metadata key=\"{}\">{}</metadata>\n",
291                                    k, v
292                                ));
293                            }
294                        }
295                        output.push_str("    </bones>\n");
296                    }
297                    output.push_str("  </file>\n");
298                }
299                OutputFormat::Markdown => {
300                    output.push_str(&format!("## {}\n\n", path.display()));
301                    if !degrade_to_bones {
302                        output.push_str(&format!("```\n{}\n```\n\n", content));
303                    }
304                    // Only print Bones section if plugins added metadata
305                    let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
306                    if has_metadata {
307                        output.push_str("Bones:\n");
308                        for bone in &bones {
309                            for (k, v) in &bone.metadata {
310                                output.push_str(&format!("- {}: {}\n", k, v));
311                            }
312                        }
313                        output.push('\n');
314                    }
315                }
316            }
317        }
318
319        if let OutputFormat::Xml = self.format {
320            output.push_str("</repository>\n");
321        }
322
323        Ok(output)
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    struct MockPlugin;
332
333    impl ContextPlugin for MockPlugin {
334        fn name(&self) -> &str {
335            "mock"
336        }
337
338        fn detect(&self, _directory: &Path) -> bool {
339            true
340        }
341
342        fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
343            for bone in base_bones.iter_mut() {
344                bone.metadata
345                    .insert("injected".to_string(), "true".to_string());
346            }
347            Ok(())
348        }
349    }
350
351    #[test]
352    fn test_plugin_detect_and_enrich() {
353        let plugin = MockPlugin;
354        assert!(plugin.detect(Path::new(".")));
355        let mut bones = vec![Bone::default()];
356        plugin.enrich(Path::new("test.rs"), &mut bones).unwrap();
357        assert_eq!(bones[0].metadata.get("injected").unwrap(), "true");
358    }
359
360    #[test]
361    fn test_packer_xml_format() {
362        let packer = Packer::new(
363            SqliteCache::new_in_memory().unwrap(),
364            Parser {},
365            OutputFormat::Xml,
366            None,
367            false,
368            false,
369            false,
370            false,
371            false,
372        );
373        let result = packer.pack(&[PathBuf::from("test.rs")]);
374        assert!(result.is_ok());
375        let output = result.unwrap();
376        assert!(output.contains("<repository>"));
377    }
378
379    #[test]
380    fn test_packer_markdown_format() {
381        let packer = Packer::new(
382            SqliteCache::new_in_memory().unwrap(),
383            Parser {},
384            OutputFormat::Markdown,
385            None,
386            false,
387            false,
388            false,
389            false,
390            false,
391        );
392        let result = packer.pack(&[PathBuf::from("test.rs")]);
393        assert!(result.is_ok());
394        let output = result.unwrap();
395        assert!(output.contains("## test.rs"));
396    }
397
398    #[test]
399    fn test_packer_with_plugins() {
400        let mut packer = Packer::new(
401            SqliteCache::new_in_memory().unwrap(),
402            Parser {},
403            OutputFormat::Xml,
404            None,
405            false,
406            false,
407            false,
408            false,
409            false,
410        );
411        packer.register_plugin(Box::new(MockPlugin));
412        let result = packer.pack(&[PathBuf::from("test.rs")]);
413        assert!(result.is_ok());
414        let output = result.unwrap();
415        assert!(output.contains("injected"));
416    }
417
418    #[test]
419    fn test_packer_empty_file_list() {
420        let packer = Packer::new(
421            SqliteCache::new_in_memory().unwrap(),
422            Parser {},
423            OutputFormat::Xml,
424            None,
425            false,
426            false,
427            false,
428            false,
429            false,
430        );
431        let result = packer.pack(&[]);
432        assert!(result.is_ok());
433    }
434
435    #[test]
436    fn test_packer_missing_file() {
437        let packer = Packer::new(
438            SqliteCache::new_in_memory().unwrap(),
439            Parser {},
440            OutputFormat::Xml,
441            None,
442            false,
443            false,
444            false,
445            false,
446            false,
447        );
448        let result = packer.pack(&[PathBuf::from("missing.rs")]);
449        // Missing files are now skipped gracefully
450        assert!(result.is_ok());
451    }
452
453    #[test]
454    fn test_packer_generates_skeleton_map_at_top() {
455        let packer = Packer::new(
456            SqliteCache::new_in_memory().unwrap(),
457            Parser {},
458            OutputFormat::Xml,
459            None,
460            false,
461            false,
462            false,
463            false,
464            false,
465        );
466        let result = packer.pack(&[PathBuf::from("test.rs")]);
467        assert!(result.is_ok());
468        let output = result.unwrap();
469        // The skeleton map should be at the top of the output
470        assert!(output.starts_with("<repository>\n  <skeleton_map>"));
471    }
472
473    #[test]
474    fn test_packer_token_governor_degrades_to_bones() {
475        // Set a very low max_tokens to force degradation
476        let packer = Packer::new(
477            SqliteCache::new_in_memory().unwrap(),
478            Parser {},
479            OutputFormat::Xml,
480            Some(10),
481            false,
482            false,
483            false,
484            false,
485            false,
486        );
487        let result = packer.pack(&[PathBuf::from("test.rs")]);
488        assert!(result.is_ok());
489        let output = result.unwrap();
490        // It should not contain the full "dummy content"
491        assert!(!output.contains("dummy content"));
492    }
493}