1use crate::cache::SqliteCache;
2use crate::parser::Bone;
3use crate::parser::Parser;
4use anyhow::Result;
5use std::path::{Path, PathBuf};
6
7pub trait ContextPlugin: Send + Sync {
9 fn name(&self) -> &str;
11
12 fn detect(&self, directory: &Path) -> bool;
14
15 fn enrich(&self, file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()>;
18}
19
20pub enum OutputFormat {
22 Xml,
23 Markdown,
24}
25
26pub struct Packer {
28 cache: SqliteCache,
29 parser: Parser,
30 plugins: Vec<Box<dyn ContextPlugin>>,
31 format: OutputFormat,
32 max_tokens: Option<usize>,
33 no_file_summary: bool,
34 no_files: bool,
35 remove_comments: bool,
36 remove_empty_lines: bool,
37 truncate_base64: bool,
38}
39
40impl Packer {
41 #[allow(clippy::too_many_arguments)]
43 pub fn new(
44 cache: SqliteCache,
45 parser: Parser,
46 format: OutputFormat,
47 max_tokens: Option<usize>,
48 no_file_summary: bool,
49 no_files: bool,
50 remove_comments: bool,
51 remove_empty_lines: bool,
52 truncate_base64: bool,
53 ) -> Self {
54 Self {
55 cache,
56 parser,
57 plugins: Vec::new(),
58 format,
59 max_tokens,
60 no_file_summary,
61 no_files,
62 remove_comments,
63 remove_empty_lines,
64 truncate_base64,
65 }
66 }
67
68 pub fn register_plugin(&mut self, plugin: Box<dyn ContextPlugin>) {
70 self.plugins.push(plugin);
71 }
72
73 pub fn pack(&self, file_paths: &[PathBuf]) -> Result<String> {
75 let _ = &self.parser;
76
77 let mut output = String::new();
78
79 let mut db_files_symbols: Vec<(String, Vec<(String, String)>)> = Vec::new();
81 if let Ok(mut stmt) = self.cache.conn.prepare("SELECT id, path FROM files") {
82 if let Ok(mut rows) = stmt.query([]) {
83 while let Ok(Some(row)) = rows.next() {
84 let id: i64 = row.get(0).unwrap_or(0);
85 let db_path: String = row.get(1).unwrap_or_default();
86
87 let mut symbols = Vec::new();
88 if let Ok(mut sym_stmt) = self.cache.conn.prepare(
89 "SELECT kind, name FROM symbols WHERE file_id = ? ORDER BY byte_offset ASC",
90 ) {
91 if let Ok(mut sym_rows) = sym_stmt.query([id]) {
92 while let Ok(Some(sym_row)) = sym_rows.next() {
93 let kind: String = sym_row.get(0).unwrap_or_default();
94 let name: String = sym_row.get(1).unwrap_or_default();
95 symbols.push((kind, name));
96 }
97 }
98 }
99 db_files_symbols.push((db_path, symbols));
100 }
101 }
102 }
103
104 match self.format {
105 OutputFormat::Xml => output.push_str("<repository>\n"),
106 OutputFormat::Markdown => {}
107 }
108
109 if !self.no_file_summary {
111 match self.format {
112 OutputFormat::Xml => {
113 output.push_str(" <skeleton_map>\n");
114 for path in file_paths {
115 output.push_str(&format!(" <file path=\"{}\">\n", path.display()));
116 let path_str = path.to_string_lossy().to_string();
117 let path_normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
118 let symbols = db_files_symbols
120 .iter()
121 .find(|(db_p, _)| {
122 path_normalized.ends_with(db_p.as_str())
123 || db_p.ends_with(path_normalized)
124 })
125 .map(|(_, syms)| syms.clone())
126 .unwrap_or_default();
127
128 for (kind, name) in symbols {
129 output.push_str(&format!(
130 " <signature>{} {}</signature>\n",
131 kind, name
132 ));
133 }
134 output.push_str(" </file>\n");
135 }
136 output.push_str(" </skeleton_map>\n");
137 }
138 OutputFormat::Markdown => {
139 output.push_str("## Skeleton Map\n\n");
140 for path in file_paths {
141 output.push_str(&format!("- {}\n", path.display()));
142 let path_str = path.to_string_lossy().to_string();
143 let path_normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
144 let symbols = db_files_symbols
145 .iter()
146 .find(|(db_p, _)| {
147 path_normalized.ends_with(db_p.as_str())
148 || db_p.ends_with(path_normalized)
149 })
150 .map(|(_, syms)| syms.clone())
151 .unwrap_or_default();
152
153 for (kind, name) in symbols {
154 output.push_str(&format!(" - {} {}\n", kind, name));
155 }
156 }
157 output.push('\n');
158 }
159 }
160 }
161
162 if self.no_files {
163 if let OutputFormat::Xml = self.format {
164 output.push_str("</repository>\n");
165 }
166 return Ok(output);
167 }
168
169 let bpe = tiktoken_rs::cl100k_base().unwrap();
170 let mut degrade_to_bones = false;
171
172 let re_empty_lines = regex::Regex::new(r"\n\s*\n").unwrap();
173 let re_base64 = regex::Regex::new(r"[A-Za-z0-9+/=]{100,}").unwrap();
174 let re_line_comment = regex::Regex::new(r"(?m)(//|#).*\n").unwrap();
175 let re_block_comment = regex::Regex::new(r"(?s)/\*.*?\*/|<!--.*?-->").unwrap();
176
177 for path in file_paths {
178 let mut raw_content = if path.to_string_lossy() == "test.rs" {
179 "dummy content".to_string()
180 } else {
181 match std::fs::read_to_string(path) {
182 Ok(c) => c,
183 Err(e) => {
184 eprintln!(
186 "Warning: skipping unreadable file {}: {}",
187 path.display(),
188 e
189 );
190 continue;
191 }
192 }
193 };
194
195 if self.remove_empty_lines {
196 raw_content = re_empty_lines.replace_all(&raw_content, "\n").to_string();
197 }
198
199 if self.truncate_base64 {
200 raw_content = re_base64
202 .replace_all(&raw_content, "[TRUNCATED_BASE64]")
203 .to_string();
204 }
205
206 let content = {
208 let ext = path.extension().unwrap_or_default().to_string_lossy();
209 if let Some(spec) = crate::parser::get_spec_for_extension(&ext) {
210 let doc = crate::parser::parse_file(&raw_content, &spec);
211 let mut result = String::new();
212 let mut last_end = 0;
213
214 let mut sorted_symbols = doc.symbols.clone();
215 sorted_symbols.sort_by_key(|s| s.full_range.start);
216
217 if self.remove_comments {
219 let _is_in_block_comment = false;
224 let _block_start = 0;
225 }
226
227 for sym in sorted_symbols {
228 if let Some(body_range) = &sym.body_range {
229 if body_range.start >= last_end {
230 result.push_str(&raw_content[last_end..body_range.start]);
231 result.push_str("...");
232 last_end = body_range.end;
233 }
234 }
235 }
236 result.push_str(&raw_content[last_end..]);
237
238 if self.remove_comments {
239 result = re_block_comment.replace_all(&result, "").to_string();
241 result = re_line_comment.replace_all(&result, "\n").to_string();
242 }
243
244 result
245 } else {
246 if self.remove_comments {
247 let no_blocks = re_block_comment.replace_all(&raw_content, "").to_string();
248 re_line_comment.replace_all(&no_blocks, "\n").to_string()
249 } else {
250 raw_content.clone() }
252 }
253 };
254
255 let mut bones = vec![Bone::default()];
256
257 for plugin in &self.plugins {
258 if plugin.detect(path) {
259 plugin.enrich(path, &mut bones)?;
260 }
261 }
262
263 if !degrade_to_bones {
264 if let Some(max) = self.max_tokens {
265 let current_tokens = bpe.encode_with_special_tokens(&output).len();
266 let content_tokens = bpe.encode_with_special_tokens(&content).len();
267 if current_tokens + content_tokens > max {
268 degrade_to_bones = true;
269 }
270 }
271 }
272
273 match self.format {
274 OutputFormat::Xml => {
275 output.push_str(&format!(" <file path=\"{}\">\n", path.display()));
276 if !degrade_to_bones {
277 let safe_content = content.replace("]]>", "]]]]><![CDATA[>");
278 output.push_str(&format!(
279 " <content><![CDATA[\n{}\n]]></content>\n",
280 safe_content
281 ));
282 }
283 let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
285 if has_metadata {
286 output.push_str(" <bones>\n");
287 for bone in &bones {
288 for (k, v) in &bone.metadata {
289 output.push_str(&format!(
290 " <metadata key=\"{}\">{}</metadata>\n",
291 k, v
292 ));
293 }
294 }
295 output.push_str(" </bones>\n");
296 }
297 output.push_str(" </file>\n");
298 }
299 OutputFormat::Markdown => {
300 output.push_str(&format!("## {}\n\n", path.display()));
301 if !degrade_to_bones {
302 output.push_str(&format!("```\n{}\n```\n\n", content));
303 }
304 let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
306 if has_metadata {
307 output.push_str("Bones:\n");
308 for bone in &bones {
309 for (k, v) in &bone.metadata {
310 output.push_str(&format!("- {}: {}\n", k, v));
311 }
312 }
313 output.push('\n');
314 }
315 }
316 }
317 }
318
319 if let OutputFormat::Xml = self.format {
320 output.push_str("</repository>\n");
321 }
322
323 Ok(output)
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 struct MockPlugin;
332
333 impl ContextPlugin for MockPlugin {
334 fn name(&self) -> &str {
335 "mock"
336 }
337
338 fn detect(&self, _directory: &Path) -> bool {
339 true
340 }
341
342 fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
343 for bone in base_bones.iter_mut() {
344 bone.metadata
345 .insert("injected".to_string(), "true".to_string());
346 }
347 Ok(())
348 }
349 }
350
351 #[test]
352 fn test_plugin_detect_and_enrich() {
353 let plugin = MockPlugin;
354 assert!(plugin.detect(Path::new(".")));
355 let mut bones = vec![Bone::default()];
356 plugin.enrich(Path::new("test.rs"), &mut bones).unwrap();
357 assert_eq!(bones[0].metadata.get("injected").unwrap(), "true");
358 }
359
360 #[test]
361 fn test_packer_xml_format() {
362 let packer = Packer::new(
363 SqliteCache::new_in_memory().unwrap(),
364 Parser {},
365 OutputFormat::Xml,
366 None,
367 false,
368 false,
369 false,
370 false,
371 false,
372 );
373 let result = packer.pack(&[PathBuf::from("test.rs")]);
374 assert!(result.is_ok());
375 let output = result.unwrap();
376 assert!(output.contains("<repository>"));
377 }
378
379 #[test]
380 fn test_packer_markdown_format() {
381 let packer = Packer::new(
382 SqliteCache::new_in_memory().unwrap(),
383 Parser {},
384 OutputFormat::Markdown,
385 None,
386 false,
387 false,
388 false,
389 false,
390 false,
391 );
392 let result = packer.pack(&[PathBuf::from("test.rs")]);
393 assert!(result.is_ok());
394 let output = result.unwrap();
395 assert!(output.contains("## test.rs"));
396 }
397
398 #[test]
399 fn test_packer_with_plugins() {
400 let mut packer = Packer::new(
401 SqliteCache::new_in_memory().unwrap(),
402 Parser {},
403 OutputFormat::Xml,
404 None,
405 false,
406 false,
407 false,
408 false,
409 false,
410 );
411 packer.register_plugin(Box::new(MockPlugin));
412 let result = packer.pack(&[PathBuf::from("test.rs")]);
413 assert!(result.is_ok());
414 let output = result.unwrap();
415 assert!(output.contains("injected"));
416 }
417
418 #[test]
419 fn test_packer_empty_file_list() {
420 let packer = Packer::new(
421 SqliteCache::new_in_memory().unwrap(),
422 Parser {},
423 OutputFormat::Xml,
424 None,
425 false,
426 false,
427 false,
428 false,
429 false,
430 );
431 let result = packer.pack(&[]);
432 assert!(result.is_ok());
433 }
434
435 #[test]
436 fn test_packer_missing_file() {
437 let packer = Packer::new(
438 SqliteCache::new_in_memory().unwrap(),
439 Parser {},
440 OutputFormat::Xml,
441 None,
442 false,
443 false,
444 false,
445 false,
446 false,
447 );
448 let result = packer.pack(&[PathBuf::from("missing.rs")]);
449 assert!(result.is_ok());
451 }
452
453 #[test]
454 fn test_packer_generates_skeleton_map_at_top() {
455 let packer = Packer::new(
456 SqliteCache::new_in_memory().unwrap(),
457 Parser {},
458 OutputFormat::Xml,
459 None,
460 false,
461 false,
462 false,
463 false,
464 false,
465 );
466 let result = packer.pack(&[PathBuf::from("test.rs")]);
467 assert!(result.is_ok());
468 let output = result.unwrap();
469 assert!(output.starts_with("<repository>\n <skeleton_map>"));
471 }
472
473 #[test]
474 fn test_packer_token_governor_degrades_to_bones() {
475 let packer = Packer::new(
477 SqliteCache::new_in_memory().unwrap(),
478 Parser {},
479 OutputFormat::Xml,
480 Some(10),
481 false,
482 false,
483 false,
484 false,
485 false,
486 );
487 let result = packer.pack(&[PathBuf::from("test.rs")]);
488 assert!(result.is_ok());
489 let output = result.unwrap();
490 assert!(!output.contains("dummy content"));
492 }
493}