1use std::path::{Path, PathBuf};
30use std::process::Command;
31use std::time::Instant;
32
33use anyhow::{Context, Result};
34use serde::{Deserialize, Serialize};
35
36const BUGBOT_DIR: &str = ".bugbot";
38
39const STATE_DB_FILENAME: &str = "state.db";
41
42const STATE_VERSION: u32 = 1;
44
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
50pub struct BugbotState {
51 pub version: u32,
53 pub created_at: String,
55 pub baseline_built: bool,
57}
58
59#[derive(Debug, Clone, PartialEq)]
61pub enum FirstRunStatus {
62 FirstRun,
64 SubsequentRun {
66 state: BugbotState,
68 },
69}
70
71impl FirstRunStatus {
72 pub fn is_first_run(&self) -> bool {
74 matches!(self, FirstRunStatus::FirstRun)
75 }
76}
77
78pub fn bugbot_dir(project_root: &Path) -> PathBuf {
80 project_root.join(BUGBOT_DIR)
81}
82
83pub fn state_db_path(project_root: &Path) -> PathBuf {
85 bugbot_dir(project_root).join(STATE_DB_FILENAME)
86}
87
88pub fn detect_first_run(project_root: &Path) -> FirstRunStatus {
94 let path = state_db_path(project_root);
95
96 if !path.exists() {
97 return FirstRunStatus::FirstRun;
98 }
99
100 match std::fs::read_to_string(&path) {
101 Ok(contents) => match serde_json::from_str::<BugbotState>(&contents) {
102 Ok(state) if state.baseline_built => FirstRunStatus::SubsequentRun { state },
103 Ok(_) => {
104 FirstRunStatus::FirstRun
107 }
108 Err(_) => {
109 FirstRunStatus::FirstRun
111 }
112 },
113 Err(_) => {
114 FirstRunStatus::FirstRun
116 }
117 }
118}
119
120pub fn create_state_db(project_root: &Path) -> Result<BugbotState> {
125 let dir = bugbot_dir(project_root);
126 std::fs::create_dir_all(&dir)?;
127
128 let state = BugbotState {
129 version: STATE_VERSION,
130 created_at: chrono::Utc::now().to_rfc3339(),
131 baseline_built: true,
132 };
133
134 let json = serde_json::to_string_pretty(&state)?;
135 std::fs::write(state_db_path(project_root), json)?;
136
137 Ok(state)
138}
139
140pub fn run_first_run_scan<F>(
154 project_root: &Path,
155 writer_fn: &F,
156) -> Result<FirstRunResult>
157where
158 F: Fn(&str),
159{
160 let start = Instant::now();
161
162 writer_fn("Building initial baselines... (one-time, ~8s)");
163
164 let mut baselines_built: Vec<String> = Vec::new();
171 let mut baseline_errors: Vec<String> = Vec::new();
172
173 match build_call_graph_baseline(project_root) {
175 Ok(()) => baselines_built.push("call_graph".to_string()),
176 Err(e) => baseline_errors.push(format!("call_graph: {e}")),
177 }
178
179 match build_complexity_baseline(project_root) {
181 Ok(()) => baselines_built.push("complexity".to_string()),
182 Err(e) => baseline_errors.push(format!("complexity: {e}")),
183 }
184
185 match build_clone_baseline(project_root) {
187 Ok(()) => baselines_built.push("clones".to_string()),
188 Err(e) => baseline_errors.push(format!("clones: {e}")),
189 }
190
191 match build_temporal_baseline(project_root) {
193 Ok(()) => baselines_built.push("temporal".to_string()),
194 Err(e) => baseline_errors.push(format!("temporal: {e}")),
195 }
196
197 let state = create_state_db(project_root)?;
199
200 let elapsed_ms = start.elapsed().as_millis() as u64;
201
202 writer_fn(&format!(
203 "Baselines built in {}ms ({} succeeded, {} failed)",
204 elapsed_ms,
205 baselines_built.len(),
206 baseline_errors.len()
207 ));
208
209 Ok(FirstRunResult {
210 state,
211 elapsed_ms,
212 baselines_built,
213 baseline_errors,
214 })
215}
216
217#[derive(Debug, Clone)]
219pub struct FirstRunResult {
220 pub state: BugbotState,
222 pub elapsed_ms: u64,
224 pub baselines_built: Vec<String>,
226 pub baseline_errors: Vec<String>,
228}
229
230const BASELINE_CG_FILENAME: &str = "baseline_call_graph.json";
239const BASELINE_CG_META_FILENAME: &str = "baseline_call_graph_meta.json";
240
241#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
243pub struct BaselineCallGraphMeta {
244 pub commit_hash: String,
246 pub language: String,
248 pub built_at: String,
250}
251
252pub fn save_baseline_call_graph(
258 project_root: &Path,
259 call_graph: &serde_json::Value,
260 commit_hash: &str,
261 language: &str,
262) -> Result<()> {
263 let dir = bugbot_dir(project_root);
264 std::fs::create_dir_all(&dir)?;
265
266 let cg_path = dir.join(BASELINE_CG_FILENAME);
267 let meta_path = dir.join(BASELINE_CG_META_FILENAME);
268
269 let meta = BaselineCallGraphMeta {
270 commit_hash: commit_hash.to_string(),
271 language: language.to_string(),
272 built_at: chrono::Utc::now().to_rfc3339(),
273 };
274
275 std::fs::write(&cg_path, serde_json::to_string(call_graph)?)
276 .context("writing baseline call graph cache")?;
277 std::fs::write(&meta_path, serde_json::to_string_pretty(&meta)?)
278 .context("writing baseline call graph metadata")?;
279
280 Ok(())
281}
282
283pub fn load_cached_baseline_call_graph(
291 project_root: &Path,
292 expected_commit: &str,
293) -> Option<serde_json::Value> {
294 let dir = bugbot_dir(project_root);
295 let cg_path = dir.join(BASELINE_CG_FILENAME);
296 let meta_path = dir.join(BASELINE_CG_META_FILENAME);
297
298 let meta_str = std::fs::read_to_string(&meta_path).ok()?;
299 let meta: BaselineCallGraphMeta = serde_json::from_str(&meta_str).ok()?;
300
301 if meta.commit_hash != expected_commit {
302 return None;
303 }
304
305 let cg_str = std::fs::read_to_string(&cg_path).ok()?;
306 serde_json::from_str(&cg_str).ok()
307}
308
309pub fn resolve_git_ref(project_root: &Path, git_ref: &str) -> Result<String> {
314 let output = Command::new("git")
315 .args(["rev-parse", git_ref])
316 .current_dir(project_root)
317 .output()
318 .context("Failed to run git rev-parse")?;
319
320 if !output.status.success() {
321 let stderr = String::from_utf8_lossy(&output.stderr);
322 anyhow::bail!("git rev-parse {} failed: {}", git_ref, stderr.trim());
323 }
324
325 Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
326}
327
328fn build_call_graph_baseline(project_root: &Path) -> Result<()> {
343 let language = match tldr_core::Language::from_directory(project_root) {
345 Some(lang) => lang,
346 None => return Ok(()), };
348
349 let call_graph =
350 tldr_core::callgraph::build_project_call_graph(project_root, language, None, true)
351 .map_err(|e| anyhow::anyhow!("{e}"))?;
352
353 let call_graph_json = serde_json::to_value(&call_graph)
356 .map_err(|e| anyhow::anyhow!("serialize call graph: {e}"))?;
357
358 let commit_hash = resolve_git_ref(project_root, "HEAD").unwrap_or_default();
359 if !commit_hash.is_empty() {
360 if let Err(e) = save_baseline_call_graph(
361 project_root,
362 &call_graph_json,
363 &commit_hash,
364 language.as_str(),
365 ) {
366 eprintln!("Warning: failed to cache baseline call graph: {e}");
367 }
368 }
369
370 Ok(())
371}
372
373fn build_complexity_baseline(project_root: &Path) -> Result<()> {
379 let source_files = collect_source_files(project_root);
383 for file in &source_files {
384 if let Ok(contents) = std::fs::read_to_string(file) {
385 let lang = tldr_core::Language::from_path(file);
386 if let Some(language) = lang {
387 let _complexities =
389 tldr_core::metrics::calculate_all_complexities(&contents, language);
390 }
391 }
392 }
393 Ok(())
394}
395
396fn build_clone_baseline(project_root: &Path) -> Result<()> {
401 let options = tldr_core::analysis::clones::ClonesOptions::default();
402 let _clones = tldr_core::analysis::clones::detect_clones(project_root, &options)
403 .map_err(|e| anyhow::anyhow!("{e}"))?;
404 Ok(())
405}
406
407fn build_temporal_baseline(project_root: &Path) -> Result<()> {
413 let source_files = collect_source_files(project_root);
417 for file in &source_files {
418 let lang = tldr_core::Language::from_path(file);
419 if let Some(language) = lang {
420 let _structure = tldr_core::ast::get_code_structure(
421 file,
422 language,
423 0, None,
425 );
426 }
427 }
428 Ok(())
429}
430
431fn collect_source_files(project_root: &Path) -> Vec<PathBuf> {
437 let mut files = Vec::new();
438 collect_source_files_recursive(project_root, &mut files);
439 files
440}
441
442fn collect_source_files_recursive(dir: &Path, files: &mut Vec<PathBuf>) {
444 let entries = match std::fs::read_dir(dir) {
445 Ok(e) => e,
446 Err(_) => return,
447 };
448
449 for entry in entries.flatten() {
450 let path = entry.path();
451 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
452 if name.starts_with('.')
454 || name == "target"
455 || name == "node_modules"
456 || name == "vendor"
457 || name == "__pycache__"
458 || name == "dist"
459 || name == "build"
460 {
461 continue;
462 }
463 }
464
465 if path.is_dir() {
466 collect_source_files_recursive(&path, files);
467 } else if is_source_file(&path) {
468 files.push(path);
469 }
470 }
471}
472
473fn is_source_file(path: &Path) -> bool {
475 let ext = match path.extension().and_then(|e| e.to_str()) {
476 Some(e) => e,
477 None => return false,
478 };
479
480 matches!(
481 ext,
482 "rs" | "py"
483 | "js"
484 | "ts"
485 | "tsx"
486 | "jsx"
487 | "go"
488 | "java"
489 | "c"
490 | "cpp"
491 | "h"
492 | "hpp"
493 | "rb"
494 | "php"
495 | "kt"
496 | "swift"
497 | "cs"
498 | "scala"
499 | "ex"
500 | "exs"
501 | "lua"
502 )
503}
504
505#[cfg(test)]
506mod tests {
507 use super::*;
508 use std::cell::RefCell;
509 use std::fs;
510 use tempfile::TempDir;
511
512 #[test]
517 fn test_first_run_detects_no_state_db() {
518 let tmp = TempDir::new().unwrap();
519 let status = detect_first_run(tmp.path());
520 assert_eq!(status, FirstRunStatus::FirstRun);
521 assert!(status.is_first_run());
522 }
523
524 #[test]
525 fn test_first_run_skips_if_state_exists() {
526 let tmp = TempDir::new().unwrap();
527
528 let dir = tmp.path().join(BUGBOT_DIR);
530 fs::create_dir_all(&dir).unwrap();
531 let state = BugbotState {
532 version: 1,
533 created_at: "2026-01-15T10:00:00Z".to_string(),
534 baseline_built: true,
535 };
536 fs::write(
537 dir.join(STATE_DB_FILENAME),
538 serde_json::to_string_pretty(&state).unwrap(),
539 )
540 .unwrap();
541
542 let status = detect_first_run(tmp.path());
543 assert!(!status.is_first_run());
544 match status {
545 FirstRunStatus::SubsequentRun { state: s } => {
546 assert_eq!(s.version, 1);
547 assert!(s.baseline_built);
548 assert_eq!(s.created_at, "2026-01-15T10:00:00Z");
549 }
550 _ => panic!("Expected SubsequentRun"),
551 }
552 }
553
554 #[test]
555 fn test_first_run_treats_malformed_state_as_first_run() {
556 let tmp = TempDir::new().unwrap();
557 let dir = tmp.path().join(BUGBOT_DIR);
558 fs::create_dir_all(&dir).unwrap();
559 fs::write(dir.join(STATE_DB_FILENAME), "not valid json {{{").unwrap();
560
561 let status = detect_first_run(tmp.path());
562 assert!(status.is_first_run());
563 }
564
565 #[test]
566 fn test_first_run_treats_incomplete_baseline_as_first_run() {
567 let tmp = TempDir::new().unwrap();
568 let dir = tmp.path().join(BUGBOT_DIR);
569 fs::create_dir_all(&dir).unwrap();
570 let state = BugbotState {
571 version: 1,
572 created_at: "2026-01-15T10:00:00Z".to_string(),
573 baseline_built: false, };
575 fs::write(
576 dir.join(STATE_DB_FILENAME),
577 serde_json::to_string_pretty(&state).unwrap(),
578 )
579 .unwrap();
580
581 let status = detect_first_run(tmp.path());
582 assert!(status.is_first_run(), "baseline_built=false should be treated as first run");
583 }
584
585 #[test]
590 fn test_first_run_creates_state_db() {
591 let tmp = TempDir::new().unwrap();
592 assert!(!state_db_path(tmp.path()).exists());
593
594 let state = create_state_db(tmp.path()).unwrap();
595 assert!(state.baseline_built);
596 assert_eq!(state.version, STATE_VERSION);
597 assert!(!state.created_at.is_empty());
598
599 assert!(state_db_path(tmp.path()).exists());
601
602 let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
604 let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
605 assert_eq!(parsed, state);
606 }
607
608 #[test]
609 fn test_subsequent_run_uses_existing_baselines() {
610 let tmp = TempDir::new().unwrap();
611
612 assert!(detect_first_run(tmp.path()).is_first_run());
614 let state = create_state_db(tmp.path()).unwrap();
615
616 let status = detect_first_run(tmp.path());
618 assert!(!status.is_first_run());
619 match status {
620 FirstRunStatus::SubsequentRun { state: s } => {
621 assert_eq!(s.version, state.version);
622 assert_eq!(s.created_at, state.created_at);
623 assert!(s.baseline_built);
624 }
625 _ => panic!("Expected SubsequentRun after create_state_db"),
626 }
627 }
628
629 #[test]
634 fn test_bugbot_dir_path() {
635 let root = Path::new("/projects/myapp");
636 assert_eq!(bugbot_dir(root), PathBuf::from("/projects/myapp/.bugbot"));
637 }
638
639 #[test]
640 fn test_state_db_path_correct() {
641 let root = Path::new("/projects/myapp");
642 assert_eq!(
643 state_db_path(root),
644 PathBuf::from("/projects/myapp/.bugbot/state.db")
645 );
646 }
647
648 #[test]
653 fn test_first_run_scan_creates_state_and_records_baselines() {
654 let tmp = TempDir::new().unwrap();
655
656 let src_dir = tmp.path().join("src");
658 fs::create_dir_all(&src_dir).unwrap();
659 fs::write(
660 src_dir.join("main.py"),
661 "def hello():\n return 42\n",
662 )
663 .unwrap();
664
665 let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
666 let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
667
668 let result = run_first_run_scan(tmp.path(), &writer).unwrap();
669
670 assert!(state_db_path(tmp.path()).exists());
672 assert!(result.state.baseline_built);
673
674 let total = result.baselines_built.len() + result.baseline_errors.len();
676 assert_eq!(total, 4, "Should attempt all 4 baseline categories");
677
678 assert!(result.elapsed_ms < 30_000, "Scan should complete in reasonable time");
680 }
681
682 #[test]
683 fn test_first_run_progress_indication() {
684 let tmp = TempDir::new().unwrap();
685
686 let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
687 let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
688
689 let _result = run_first_run_scan(tmp.path(), &writer).unwrap();
690
691 let messages = messages.into_inner();
692
693 assert!(
695 messages.iter().any(|m| m.contains("Building initial baselines")),
696 "Must print progress message containing 'Building initial baselines'. Got: {:?}",
697 messages
698 );
699
700 assert!(
701 messages.iter().any(|m| m.contains("one-time")),
702 "Progress message must mention one-time cost. Got: {:?}",
703 messages
704 );
705
706 assert!(
708 messages.iter().any(|m| m.contains("Baselines built in")),
709 "Must print completion message. Got: {:?}",
710 messages
711 );
712 }
713
714 #[test]
722 fn test_collect_source_files_finds_source_files() {
723 let tmp = TempDir::new().unwrap();
724 let src = tmp.path().join("src");
725 fs::create_dir_all(&src).unwrap();
726 fs::write(src.join("main.rs"), "fn main() {}").unwrap();
727 fs::write(src.join("lib.py"), "def f(): pass").unwrap();
728 fs::write(src.join("notes.txt"), "not source").unwrap();
729
730 let files = collect_source_files(tmp.path());
731 assert_eq!(files.len(), 2);
732 assert!(files.iter().any(|f| f.ends_with("main.rs")));
733 assert!(files.iter().any(|f| f.ends_with("lib.py")));
734 }
735
736 #[test]
737 fn test_collect_source_files_skips_hidden_and_build_dirs() {
738 let tmp = TempDir::new().unwrap();
739
740 for dir_name in &[".git", "target", "node_modules", "__pycache__", "vendor"] {
742 let dir = tmp.path().join(dir_name);
743 fs::create_dir_all(&dir).unwrap();
744 fs::write(dir.join("hidden.rs"), "fn f() {}").unwrap();
745 }
746
747 fs::write(tmp.path().join("visible.rs"), "fn main() {}").unwrap();
749
750 let files = collect_source_files(tmp.path());
751 assert_eq!(files.len(), 1);
752 assert!(files[0].ends_with("visible.rs"));
753 }
754
755 #[test]
756 fn test_is_source_file_recognizes_all_extensions() {
757 let extensions = vec![
758 "rs", "py", "js", "ts", "tsx", "jsx", "go", "java", "c", "cpp",
759 "h", "hpp", "rb", "php", "kt", "swift", "cs", "scala", "ex", "exs", "lua",
760 ];
761
762 for ext in &extensions {
763 let path = PathBuf::from(format!("test.{ext}"));
764 assert!(
765 is_source_file(&path),
766 "Extension .{ext} should be recognized as source"
767 );
768 }
769
770 for ext in &["txt", "md", "json", "yaml", "toml", "lock", "png"] {
772 let path = PathBuf::from(format!("test.{ext}"));
773 assert!(
774 !is_source_file(&path),
775 "Extension .{ext} should NOT be recognized as source"
776 );
777 }
778 }
779
780 #[test]
781 fn test_state_db_overwritten_on_second_first_run() {
782 let tmp = TempDir::new().unwrap();
783
784 let state1 = create_state_db(tmp.path()).unwrap();
786
787 std::thread::sleep(std::time::Duration::from_millis(10));
789
790 let state2 = create_state_db(tmp.path()).unwrap();
792
793 assert_ne!(
794 state1.created_at, state2.created_at,
795 "Second creation should have a later timestamp"
796 );
797
798 let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
800 let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
801 assert_eq!(parsed, state2);
802 }
803
804 #[test]
805 fn test_bugbot_state_serialization_roundtrip() {
806 let state = BugbotState {
807 version: 1,
808 created_at: "2026-03-02T12:00:00Z".to_string(),
809 baseline_built: true,
810 };
811
812 let json = serde_json::to_string_pretty(&state).unwrap();
813 let parsed: BugbotState = serde_json::from_str(&json).unwrap();
814 assert_eq!(parsed, state);
815 }
816
817 #[test]
818 fn test_first_run_result_fields() {
819 let tmp = TempDir::new().unwrap();
820
821 let writer = |_msg: &str| {};
822 let result = run_first_run_scan(tmp.path(), &writer).unwrap();
823
824 assert_eq!(result.state.version, STATE_VERSION);
825 assert!(result.state.baseline_built);
826 let total = result.baselines_built.len() + result.baseline_errors.len();
828 assert_eq!(total, 4);
829 }
830
831 #[test]
832 fn test_first_run_empty_project_succeeds() {
833 let tmp = TempDir::new().unwrap();
836
837 let writer = |_msg: &str| {};
838 let result = run_first_run_scan(tmp.path(), &writer);
839
840 assert!(
841 result.is_ok(),
842 "First-run scan should succeed even on an empty project: {:?}",
843 result.err()
844 );
845 }
846
847 #[test]
852 fn test_save_load_baseline_call_graph_roundtrip() {
853 let tmp = TempDir::new().unwrap();
854
855 let cg = serde_json::json!({
856 "edges": [
857 {"src_file": "a.py", "src_func": "foo", "dst_file": "b.py", "dst_func": "bar"}
858 ]
859 });
860
861 save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
862
863 let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
864 assert!(loaded.is_some(), "Cache should load with matching commit");
865 assert_eq!(loaded.unwrap(), cg);
866 }
867
868 #[test]
869 fn test_load_baseline_rejects_stale_commit() {
870 let tmp = TempDir::new().unwrap();
871
872 let cg = serde_json::json!({"edges": []});
873 save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
874
875 let loaded = load_cached_baseline_call_graph(tmp.path(), "def456");
876 assert!(loaded.is_none(), "Cache should not load with different commit");
877 }
878
879 #[test]
880 fn test_load_baseline_nonexistent_cache() {
881 let tmp = TempDir::new().unwrap();
882 let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
883 assert!(loaded.is_none(), "No cache should return None");
884 }
885
886 #[test]
887 fn test_baseline_meta_serialization() {
888 let meta = BaselineCallGraphMeta {
889 commit_hash: "abc123".to_string(),
890 language: "rust".to_string(),
891 built_at: "2026-03-16T12:00:00Z".to_string(),
892 };
893
894 let json = serde_json::to_string_pretty(&meta).unwrap();
895 let parsed: BaselineCallGraphMeta = serde_json::from_str(&json).unwrap();
896 assert_eq!(parsed, meta);
897 }
898
899 #[test]
900 fn test_save_creates_bugbot_dir() {
901 let tmp = TempDir::new().unwrap();
902 assert!(!bugbot_dir(tmp.path()).exists());
903
904 let cg = serde_json::json!({"edges": []});
905 save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
906
907 assert!(bugbot_dir(tmp.path()).exists());
908 assert!(bugbot_dir(tmp.path()).join(BASELINE_CG_FILENAME).exists());
909 assert!(bugbot_dir(tmp.path()).join(BASELINE_CG_META_FILENAME).exists());
910 }
911}