1use std::path::{Path, PathBuf};
30use std::process::Command;
31use std::time::Instant;
32
33use anyhow::{Context, Result};
34use serde::{Deserialize, Serialize};
35
36const BUGBOT_DIR: &str = ".bugbot";
38
39const STATE_DB_FILENAME: &str = "state.db";
41
42const STATE_VERSION: u32 = 1;
44
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
50pub struct BugbotState {
51 pub version: u32,
53 pub created_at: String,
55 pub baseline_built: bool,
57}
58
59#[derive(Debug, Clone, PartialEq)]
61pub enum FirstRunStatus {
62 FirstRun,
64 SubsequentRun {
66 state: BugbotState,
68 },
69}
70
71impl FirstRunStatus {
72 pub fn is_first_run(&self) -> bool {
74 matches!(self, FirstRunStatus::FirstRun)
75 }
76}
77
78pub fn bugbot_dir(project_root: &Path) -> PathBuf {
80 project_root.join(BUGBOT_DIR)
81}
82
83pub fn state_db_path(project_root: &Path) -> PathBuf {
85 bugbot_dir(project_root).join(STATE_DB_FILENAME)
86}
87
88pub fn detect_first_run(project_root: &Path) -> FirstRunStatus {
94 let path = state_db_path(project_root);
95
96 if !path.exists() {
97 return FirstRunStatus::FirstRun;
98 }
99
100 match std::fs::read_to_string(&path) {
101 Ok(contents) => match serde_json::from_str::<BugbotState>(&contents) {
102 Ok(state) if state.baseline_built => FirstRunStatus::SubsequentRun { state },
103 Ok(_) => {
104 FirstRunStatus::FirstRun
107 }
108 Err(_) => {
109 FirstRunStatus::FirstRun
111 }
112 },
113 Err(_) => {
114 FirstRunStatus::FirstRun
116 }
117 }
118}
119
120pub fn create_state_db(project_root: &Path) -> Result<BugbotState> {
125 let dir = bugbot_dir(project_root);
126 std::fs::create_dir_all(&dir)?;
127
128 let state = BugbotState {
129 version: STATE_VERSION,
130 created_at: chrono::Utc::now().to_rfc3339(),
131 baseline_built: true,
132 };
133
134 let json = serde_json::to_string_pretty(&state)?;
135 std::fs::write(state_db_path(project_root), json)?;
136
137 Ok(state)
138}
139
140pub fn run_first_run_scan<F>(project_root: &Path, writer_fn: &F) -> Result<FirstRunResult>
154where
155 F: Fn(&str),
156{
157 let start = Instant::now();
158
159 writer_fn("Building initial baselines... (one-time, ~8s)");
160
161 let mut baselines_built: Vec<String> = Vec::new();
168 let mut baseline_errors: Vec<String> = Vec::new();
169
170 match build_call_graph_baseline(project_root) {
172 Ok(()) => baselines_built.push("call_graph".to_string()),
173 Err(e) => baseline_errors.push(format!("call_graph: {e}")),
174 }
175
176 match build_complexity_baseline(project_root) {
178 Ok(()) => baselines_built.push("complexity".to_string()),
179 Err(e) => baseline_errors.push(format!("complexity: {e}")),
180 }
181
182 match build_clone_baseline(project_root) {
184 Ok(()) => baselines_built.push("clones".to_string()),
185 Err(e) => baseline_errors.push(format!("clones: {e}")),
186 }
187
188 match build_temporal_baseline(project_root) {
190 Ok(()) => baselines_built.push("temporal".to_string()),
191 Err(e) => baseline_errors.push(format!("temporal: {e}")),
192 }
193
194 let state = create_state_db(project_root)?;
196
197 let elapsed_ms = start.elapsed().as_millis() as u64;
198
199 writer_fn(&format!(
200 "Baselines built in {}ms ({} succeeded, {} failed)",
201 elapsed_ms,
202 baselines_built.len(),
203 baseline_errors.len()
204 ));
205
206 Ok(FirstRunResult {
207 state,
208 elapsed_ms,
209 baselines_built,
210 baseline_errors,
211 })
212}
213
214#[derive(Debug, Clone)]
216pub struct FirstRunResult {
217 pub state: BugbotState,
219 pub elapsed_ms: u64,
221 pub baselines_built: Vec<String>,
223 pub baseline_errors: Vec<String>,
225}
226
227const BASELINE_CG_FILENAME: &str = "baseline_call_graph.json";
236const BASELINE_CG_META_FILENAME: &str = "baseline_call_graph_meta.json";
237
238#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
240pub struct BaselineCallGraphMeta {
241 pub commit_hash: String,
243 pub language: String,
245 pub built_at: String,
247}
248
249pub fn save_baseline_call_graph(
255 project_root: &Path,
256 call_graph: &serde_json::Value,
257 commit_hash: &str,
258 language: &str,
259) -> Result<()> {
260 let dir = bugbot_dir(project_root);
261 std::fs::create_dir_all(&dir)?;
262
263 let cg_path = dir.join(BASELINE_CG_FILENAME);
264 let meta_path = dir.join(BASELINE_CG_META_FILENAME);
265
266 let meta = BaselineCallGraphMeta {
267 commit_hash: commit_hash.to_string(),
268 language: language.to_string(),
269 built_at: chrono::Utc::now().to_rfc3339(),
270 };
271
272 std::fs::write(&cg_path, serde_json::to_string(call_graph)?)
273 .context("writing baseline call graph cache")?;
274 std::fs::write(&meta_path, serde_json::to_string_pretty(&meta)?)
275 .context("writing baseline call graph metadata")?;
276
277 Ok(())
278}
279
280pub fn load_cached_baseline_call_graph(
288 project_root: &Path,
289 expected_commit: &str,
290) -> Option<serde_json::Value> {
291 let dir = bugbot_dir(project_root);
292 let cg_path = dir.join(BASELINE_CG_FILENAME);
293 let meta_path = dir.join(BASELINE_CG_META_FILENAME);
294
295 let meta_str = std::fs::read_to_string(&meta_path).ok()?;
296 let meta: BaselineCallGraphMeta = serde_json::from_str(&meta_str).ok()?;
297
298 if meta.commit_hash != expected_commit {
299 return None;
300 }
301
302 let cg_str = std::fs::read_to_string(&cg_path).ok()?;
303 serde_json::from_str(&cg_str).ok()
304}
305
306pub fn resolve_git_ref(project_root: &Path, git_ref: &str) -> Result<String> {
311 let output = Command::new("git")
312 .args(["rev-parse", git_ref])
313 .current_dir(project_root)
314 .output()
315 .context("Failed to run git rev-parse")?;
316
317 if !output.status.success() {
318 let stderr = String::from_utf8_lossy(&output.stderr);
319 anyhow::bail!("git rev-parse {} failed: {}", git_ref, stderr.trim());
320 }
321
322 Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
323}
324
325fn build_call_graph_baseline(project_root: &Path) -> Result<()> {
340 let language = match tldr_core::Language::from_directory(project_root) {
342 Some(lang) => lang,
343 None => return Ok(()), };
345
346 let call_graph =
347 tldr_core::callgraph::build_project_call_graph(project_root, language, None, true)
348 .map_err(|e| anyhow::anyhow!("{e}"))?;
349
350 let call_graph_json = serde_json::to_value(&call_graph)
353 .map_err(|e| anyhow::anyhow!("serialize call graph: {e}"))?;
354
355 let commit_hash = resolve_git_ref(project_root, "HEAD").unwrap_or_default();
356 if !commit_hash.is_empty() {
357 if let Err(e) = save_baseline_call_graph(
358 project_root,
359 &call_graph_json,
360 &commit_hash,
361 language.as_str(),
362 ) {
363 eprintln!("Warning: failed to cache baseline call graph: {e}");
364 }
365 }
366
367 Ok(())
368}
369
370fn build_complexity_baseline(project_root: &Path) -> Result<()> {
376 let source_files = collect_source_files(project_root);
380 for file in &source_files {
381 if let Ok(contents) = std::fs::read_to_string(file) {
382 let lang = tldr_core::Language::from_path(file);
383 if let Some(language) = lang {
384 let _complexities =
386 tldr_core::metrics::calculate_all_complexities(&contents, language);
387 }
388 }
389 }
390 Ok(())
391}
392
393fn build_clone_baseline(project_root: &Path) -> Result<()> {
398 let options = tldr_core::analysis::clones::ClonesOptions::default();
399 let _clones = tldr_core::analysis::clones::detect_clones(project_root, &options)
400 .map_err(|e| anyhow::anyhow!("{e}"))?;
401 Ok(())
402}
403
404fn build_temporal_baseline(project_root: &Path) -> Result<()> {
410 let source_files = collect_source_files(project_root);
414 for file in &source_files {
415 let lang = tldr_core::Language::from_path(file);
416 if let Some(language) = lang {
417 let _structure = tldr_core::ast::get_code_structure(
418 file, language, 0, None,
420 );
421 }
422 }
423 Ok(())
424}
425
426fn collect_source_files(project_root: &Path) -> Vec<PathBuf> {
432 let mut files = Vec::new();
433 collect_source_files_recursive(project_root, &mut files);
434 files
435}
436
437fn collect_source_files_recursive(dir: &Path, files: &mut Vec<PathBuf>) {
439 let entries = match std::fs::read_dir(dir) {
440 Ok(e) => e,
441 Err(_) => return,
442 };
443
444 for entry in entries.flatten() {
445 let path = entry.path();
446 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
447 if name.starts_with('.')
449 || name == "target"
450 || name == "node_modules"
451 || name == "vendor"
452 || name == "__pycache__"
453 || name == "dist"
454 || name == "build"
455 {
456 continue;
457 }
458 }
459
460 if path.is_dir() {
461 collect_source_files_recursive(&path, files);
462 } else if is_source_file(&path) {
463 files.push(path);
464 }
465 }
466}
467
468fn is_source_file(path: &Path) -> bool {
470 let ext = match path.extension().and_then(|e| e.to_str()) {
471 Some(e) => e,
472 None => return false,
473 };
474
475 matches!(
476 ext,
477 "rs" | "py"
478 | "js"
479 | "ts"
480 | "tsx"
481 | "jsx"
482 | "go"
483 | "java"
484 | "c"
485 | "cpp"
486 | "h"
487 | "hpp"
488 | "rb"
489 | "php"
490 | "kt"
491 | "swift"
492 | "cs"
493 | "scala"
494 | "ex"
495 | "exs"
496 | "lua"
497 )
498}
499
500#[cfg(test)]
501mod tests {
502 use super::*;
503 use std::cell::RefCell;
504 use std::fs;
505 use tempfile::TempDir;
506
507 #[test]
512 fn test_first_run_detects_no_state_db() {
513 let tmp = TempDir::new().unwrap();
514 let status = detect_first_run(tmp.path());
515 assert_eq!(status, FirstRunStatus::FirstRun);
516 assert!(status.is_first_run());
517 }
518
519 #[test]
520 fn test_first_run_skips_if_state_exists() {
521 let tmp = TempDir::new().unwrap();
522
523 let dir = tmp.path().join(BUGBOT_DIR);
525 fs::create_dir_all(&dir).unwrap();
526 let state = BugbotState {
527 version: 1,
528 created_at: "2026-01-15T10:00:00Z".to_string(),
529 baseline_built: true,
530 };
531 fs::write(
532 dir.join(STATE_DB_FILENAME),
533 serde_json::to_string_pretty(&state).unwrap(),
534 )
535 .unwrap();
536
537 let status = detect_first_run(tmp.path());
538 assert!(!status.is_first_run());
539 match status {
540 FirstRunStatus::SubsequentRun { state: s } => {
541 assert_eq!(s.version, 1);
542 assert!(s.baseline_built);
543 assert_eq!(s.created_at, "2026-01-15T10:00:00Z");
544 }
545 _ => panic!("Expected SubsequentRun"),
546 }
547 }
548
549 #[test]
550 fn test_first_run_treats_malformed_state_as_first_run() {
551 let tmp = TempDir::new().unwrap();
552 let dir = tmp.path().join(BUGBOT_DIR);
553 fs::create_dir_all(&dir).unwrap();
554 fs::write(dir.join(STATE_DB_FILENAME), "not valid json {{{").unwrap();
555
556 let status = detect_first_run(tmp.path());
557 assert!(status.is_first_run());
558 }
559
560 #[test]
561 fn test_first_run_treats_incomplete_baseline_as_first_run() {
562 let tmp = TempDir::new().unwrap();
563 let dir = tmp.path().join(BUGBOT_DIR);
564 fs::create_dir_all(&dir).unwrap();
565 let state = BugbotState {
566 version: 1,
567 created_at: "2026-01-15T10:00:00Z".to_string(),
568 baseline_built: false, };
570 fs::write(
571 dir.join(STATE_DB_FILENAME),
572 serde_json::to_string_pretty(&state).unwrap(),
573 )
574 .unwrap();
575
576 let status = detect_first_run(tmp.path());
577 assert!(
578 status.is_first_run(),
579 "baseline_built=false should be treated as first run"
580 );
581 }
582
583 #[test]
588 fn test_first_run_creates_state_db() {
589 let tmp = TempDir::new().unwrap();
590 assert!(!state_db_path(tmp.path()).exists());
591
592 let state = create_state_db(tmp.path()).unwrap();
593 assert!(state.baseline_built);
594 assert_eq!(state.version, STATE_VERSION);
595 assert!(!state.created_at.is_empty());
596
597 assert!(state_db_path(tmp.path()).exists());
599
600 let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
602 let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
603 assert_eq!(parsed, state);
604 }
605
606 #[test]
607 fn test_subsequent_run_uses_existing_baselines() {
608 let tmp = TempDir::new().unwrap();
609
610 assert!(detect_first_run(tmp.path()).is_first_run());
612 let state = create_state_db(tmp.path()).unwrap();
613
614 let status = detect_first_run(tmp.path());
616 assert!(!status.is_first_run());
617 match status {
618 FirstRunStatus::SubsequentRun { state: s } => {
619 assert_eq!(s.version, state.version);
620 assert_eq!(s.created_at, state.created_at);
621 assert!(s.baseline_built);
622 }
623 _ => panic!("Expected SubsequentRun after create_state_db"),
624 }
625 }
626
627 #[test]
632 fn test_bugbot_dir_path() {
633 let root = Path::new("/projects/myapp");
634 assert_eq!(bugbot_dir(root), PathBuf::from("/projects/myapp/.bugbot"));
635 }
636
637 #[test]
638 fn test_state_db_path_correct() {
639 let root = Path::new("/projects/myapp");
640 assert_eq!(
641 state_db_path(root),
642 PathBuf::from("/projects/myapp/.bugbot/state.db")
643 );
644 }
645
646 #[test]
651 fn test_first_run_scan_creates_state_and_records_baselines() {
652 let tmp = TempDir::new().unwrap();
653
654 let src_dir = tmp.path().join("src");
656 fs::create_dir_all(&src_dir).unwrap();
657 fs::write(src_dir.join("main.py"), "def hello():\n return 42\n").unwrap();
658
659 let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
660 let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
661
662 let result = run_first_run_scan(tmp.path(), &writer).unwrap();
663
664 assert!(state_db_path(tmp.path()).exists());
666 assert!(result.state.baseline_built);
667
668 let total = result.baselines_built.len() + result.baseline_errors.len();
670 assert_eq!(total, 4, "Should attempt all 4 baseline categories");
671
672 assert!(
674 result.elapsed_ms < 30_000,
675 "Scan should complete in reasonable time"
676 );
677 }
678
679 #[test]
680 fn test_first_run_progress_indication() {
681 let tmp = TempDir::new().unwrap();
682
683 let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
684 let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
685
686 let _result = run_first_run_scan(tmp.path(), &writer).unwrap();
687
688 let messages = messages.into_inner();
689
690 assert!(
692 messages
693 .iter()
694 .any(|m| m.contains("Building initial baselines")),
695 "Must print progress message containing 'Building initial baselines'. Got: {:?}",
696 messages
697 );
698
699 assert!(
700 messages.iter().any(|m| m.contains("one-time")),
701 "Progress message must mention one-time cost. Got: {:?}",
702 messages
703 );
704
705 assert!(
707 messages.iter().any(|m| m.contains("Baselines built in")),
708 "Must print completion message. Got: {:?}",
709 messages
710 );
711 }
712
713 #[test]
721 fn test_collect_source_files_finds_source_files() {
722 let tmp = TempDir::new().unwrap();
723 let src = tmp.path().join("src");
724 fs::create_dir_all(&src).unwrap();
725 fs::write(src.join("main.rs"), "fn main() {}").unwrap();
726 fs::write(src.join("lib.py"), "def f(): pass").unwrap();
727 fs::write(src.join("notes.txt"), "not source").unwrap();
728
729 let files = collect_source_files(tmp.path());
730 assert_eq!(files.len(), 2);
731 assert!(files.iter().any(|f| f.ends_with("main.rs")));
732 assert!(files.iter().any(|f| f.ends_with("lib.py")));
733 }
734
735 #[test]
736 fn test_collect_source_files_skips_hidden_and_build_dirs() {
737 let tmp = TempDir::new().unwrap();
738
739 for dir_name in &[".git", "target", "node_modules", "__pycache__", "vendor"] {
741 let dir = tmp.path().join(dir_name);
742 fs::create_dir_all(&dir).unwrap();
743 fs::write(dir.join("hidden.rs"), "fn f() {}").unwrap();
744 }
745
746 fs::write(tmp.path().join("visible.rs"), "fn main() {}").unwrap();
748
749 let files = collect_source_files(tmp.path());
750 assert_eq!(files.len(), 1);
751 assert!(files[0].ends_with("visible.rs"));
752 }
753
754 #[test]
755 fn test_is_source_file_recognizes_all_extensions() {
756 let extensions = vec![
757 "rs", "py", "js", "ts", "tsx", "jsx", "go", "java", "c", "cpp", "h", "hpp", "rb",
758 "php", "kt", "swift", "cs", "scala", "ex", "exs", "lua",
759 ];
760
761 for ext in &extensions {
762 let path = PathBuf::from(format!("test.{ext}"));
763 assert!(
764 is_source_file(&path),
765 "Extension .{ext} should be recognized as source"
766 );
767 }
768
769 for ext in &["txt", "md", "json", "yaml", "toml", "lock", "png"] {
771 let path = PathBuf::from(format!("test.{ext}"));
772 assert!(
773 !is_source_file(&path),
774 "Extension .{ext} should NOT be recognized as source"
775 );
776 }
777 }
778
779 #[test]
780 fn test_state_db_overwritten_on_second_first_run() {
781 let tmp = TempDir::new().unwrap();
782
783 let state1 = create_state_db(tmp.path()).unwrap();
785
786 std::thread::sleep(std::time::Duration::from_millis(10));
788
789 let state2 = create_state_db(tmp.path()).unwrap();
791
792 assert_ne!(
793 state1.created_at, state2.created_at,
794 "Second creation should have a later timestamp"
795 );
796
797 let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
799 let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
800 assert_eq!(parsed, state2);
801 }
802
803 #[test]
804 fn test_bugbot_state_serialization_roundtrip() {
805 let state = BugbotState {
806 version: 1,
807 created_at: "2026-03-02T12:00:00Z".to_string(),
808 baseline_built: true,
809 };
810
811 let json = serde_json::to_string_pretty(&state).unwrap();
812 let parsed: BugbotState = serde_json::from_str(&json).unwrap();
813 assert_eq!(parsed, state);
814 }
815
816 #[test]
817 fn test_first_run_result_fields() {
818 let tmp = TempDir::new().unwrap();
819
820 let writer = |_msg: &str| {};
821 let result = run_first_run_scan(tmp.path(), &writer).unwrap();
822
823 assert_eq!(result.state.version, STATE_VERSION);
824 assert!(result.state.baseline_built);
825 let total = result.baselines_built.len() + result.baseline_errors.len();
827 assert_eq!(total, 4);
828 }
829
830 #[test]
831 fn test_first_run_empty_project_succeeds() {
832 let tmp = TempDir::new().unwrap();
835
836 let writer = |_msg: &str| {};
837 let result = run_first_run_scan(tmp.path(), &writer);
838
839 assert!(
840 result.is_ok(),
841 "First-run scan should succeed even on an empty project: {:?}",
842 result.err()
843 );
844 }
845
846 #[test]
851 fn test_save_load_baseline_call_graph_roundtrip() {
852 let tmp = TempDir::new().unwrap();
853
854 let cg = serde_json::json!({
855 "edges": [
856 {"src_file": "a.py", "src_func": "foo", "dst_file": "b.py", "dst_func": "bar"}
857 ]
858 });
859
860 save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
861
862 let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
863 assert!(loaded.is_some(), "Cache should load with matching commit");
864 assert_eq!(loaded.unwrap(), cg);
865 }
866
867 #[test]
868 fn test_load_baseline_rejects_stale_commit() {
869 let tmp = TempDir::new().unwrap();
870
871 let cg = serde_json::json!({"edges": []});
872 save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
873
874 let loaded = load_cached_baseline_call_graph(tmp.path(), "def456");
875 assert!(
876 loaded.is_none(),
877 "Cache should not load with different commit"
878 );
879 }
880
881 #[test]
882 fn test_load_baseline_nonexistent_cache() {
883 let tmp = TempDir::new().unwrap();
884 let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
885 assert!(loaded.is_none(), "No cache should return None");
886 }
887
888 #[test]
889 fn test_baseline_meta_serialization() {
890 let meta = BaselineCallGraphMeta {
891 commit_hash: "abc123".to_string(),
892 language: "rust".to_string(),
893 built_at: "2026-03-16T12:00:00Z".to_string(),
894 };
895
896 let json = serde_json::to_string_pretty(&meta).unwrap();
897 let parsed: BaselineCallGraphMeta = serde_json::from_str(&json).unwrap();
898 assert_eq!(parsed, meta);
899 }
900
901 #[test]
902 fn test_save_creates_bugbot_dir() {
903 let tmp = TempDir::new().unwrap();
904 assert!(!bugbot_dir(tmp.path()).exists());
905
906 let cg = serde_json::json!({"edges": []});
907 save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
908
909 assert!(bugbot_dir(tmp.path()).exists());
910 assert!(bugbot_dir(tmp.path()).join(BASELINE_CG_FILENAME).exists());
911 assert!(bugbot_dir(tmp.path())
912 .join(BASELINE_CG_META_FILENAME)
913 .exists());
914 }
915}