Skip to main content

tldr_cli/commands/bugbot/
first_run.rs

1//! First-run auto-scan behavior for bugbot (PM-34).
2//!
3//! When bugbot detects no prior state (`.bugbot/state.db` does not exist),
4//! it automatically runs a lightweight scan to establish baselines. This scan
5//! builds:
6//!
7//! - Project call graph (cached for daemon)
8//! - Per-file complexity and maintainability baselines
9//! - Clone fragment index
10//! - Temporal pattern database
11//!
12//! Budget: <10s for a 50K LOC project (one-time cost). Runs ONCE, transparently,
13//! on first `bugbot check` invocation.
14//!
15//! # Baseline Policy
16//!
17//! For files with no git history (new project, or new files in a monorepo),
18//! delta engines treat the "before" as empty:
19//!
20//! - All current smells, clones, and complexity are "new" (reported)
21//! - All current contracts are the baseline (no regression possible)
22//! - Guard-removed and contract-regression produce no findings (no prior state)
23//!
24//! # Progress Indication
25//!
26//! Prints `"Building initial baselines... (one-time, ~8s)"` so users understand
27//! why the first run is slow.
28
29use std::path::{Path, PathBuf};
30use std::process::Command;
31use std::time::Instant;
32
33use anyhow::{Context, Result};
34use serde::{Deserialize, Serialize};
35
36/// Name of the bugbot state directory (created under the project root).
37const BUGBOT_DIR: &str = ".bugbot";
38
39/// Name of the state database file within the `.bugbot/` directory.
40const STATE_DB_FILENAME: &str = "state.db";
41
42/// State file version for forward compatibility.
43const STATE_VERSION: u32 = 1;
44
45/// Persisted state for bugbot across runs.
46///
47/// Stored as JSON in `.bugbot/state.db`. Contains metadata about when
48/// the baseline was built and which version of the state format is in use.
49#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
50pub struct BugbotState {
51    /// Schema version for forward compatibility.
52    pub version: u32,
53    /// ISO 8601 timestamp when the baseline was first established.
54    pub created_at: String,
55    /// Whether the baseline has been fully built.
56    pub baseline_built: bool,
57}
58
59/// Result of checking whether this is a first run.
60#[derive(Debug, Clone, PartialEq)]
61pub enum FirstRunStatus {
62    /// No prior state exists. Baselines need to be built.
63    FirstRun,
64    /// State exists and baselines have been built previously.
65    SubsequentRun {
66        /// The persisted state from the previous run.
67        state: BugbotState,
68    },
69}
70
71impl FirstRunStatus {
72    /// Returns true if this is the first run (no prior state).
73    pub fn is_first_run(&self) -> bool {
74        matches!(self, FirstRunStatus::FirstRun)
75    }
76}
77
78/// Returns the path to the `.bugbot/` directory for a given project root.
79pub fn bugbot_dir(project_root: &Path) -> PathBuf {
80    project_root.join(BUGBOT_DIR)
81}
82
83/// Returns the path to the state database file for a given project root.
84pub fn state_db_path(project_root: &Path) -> PathBuf {
85    bugbot_dir(project_root).join(STATE_DB_FILENAME)
86}
87
88/// Detect whether this is a first run by checking for `.bugbot/state.db`.
89///
90/// Returns `FirstRunStatus::FirstRun` if no state file exists, or
91/// `FirstRunStatus::SubsequentRun` if one does. A malformed state file
92/// is treated as a first run (the file will be overwritten).
93pub fn detect_first_run(project_root: &Path) -> FirstRunStatus {
94    let path = state_db_path(project_root);
95
96    if !path.exists() {
97        return FirstRunStatus::FirstRun;
98    }
99
100    match std::fs::read_to_string(&path) {
101        Ok(contents) => match serde_json::from_str::<BugbotState>(&contents) {
102            Ok(state) if state.baseline_built => FirstRunStatus::SubsequentRun { state },
103            Ok(_) => {
104                // baseline_built is false — treat as first run so baselines
105                // get built (previous run may have been interrupted).
106                FirstRunStatus::FirstRun
107            }
108            Err(_) => {
109                // Malformed state file — treat as first run and overwrite.
110                FirstRunStatus::FirstRun
111            }
112        },
113        Err(_) => {
114            // Cannot read file — treat as first run.
115            FirstRunStatus::FirstRun
116        }
117    }
118}
119
120/// Create the `.bugbot/` directory and write the initial `state.db` file.
121///
122/// Marks `baseline_built: true` so subsequent runs skip the baseline scan.
123/// Returns the written `BugbotState`.
124pub fn create_state_db(project_root: &Path) -> Result<BugbotState> {
125    let dir = bugbot_dir(project_root);
126    std::fs::create_dir_all(&dir)?;
127
128    let state = BugbotState {
129        version: STATE_VERSION,
130        created_at: chrono::Utc::now().to_rfc3339(),
131        baseline_built: true,
132    };
133
134    let json = serde_json::to_string_pretty(&state)?;
135    std::fs::write(state_db_path(project_root), json)?;
136
137    Ok(state)
138}
139
140/// Run the first-run baseline scan.
141///
142/// This is the main entry point called from `check.rs` when `detect_first_run`
143/// returns `FirstRunStatus::FirstRun`. It:
144///
145/// 1. Prints a progress message to stderr
146/// 2. Builds initial baselines (call graph, complexity, clones, temporal)
147/// 3. Creates the `.bugbot/state.db` file
148///
149/// Returns the duration of the baseline scan in milliseconds.
150///
151/// The `writer_fn` parameter emits progress messages. In production this
152/// is wired to `OutputWriter::progress`; in tests it can capture output.
153pub fn run_first_run_scan<F>(
154    project_root: &Path,
155    writer_fn: &F,
156) -> Result<FirstRunResult>
157where
158    F: Fn(&str),
159{
160    let start = Instant::now();
161
162    writer_fn("Building initial baselines... (one-time, ~8s)");
163
164    // Build initial baselines. These populate the caches that L2 engines
165    // will use during the subsequent analysis pass.
166    //
167    // Each baseline step is best-effort: if it fails, we log the error
168    // but continue with the remaining baselines. The L2 engines handle
169    // missing cache data gracefully (they recompute on demand).
170    let mut baselines_built: Vec<String> = Vec::new();
171    let mut baseline_errors: Vec<String> = Vec::new();
172
173    // 1. Call graph baseline
174    match build_call_graph_baseline(project_root) {
175        Ok(()) => baselines_built.push("call_graph".to_string()),
176        Err(e) => baseline_errors.push(format!("call_graph: {e}")),
177    }
178
179    // 2. Complexity baseline
180    match build_complexity_baseline(project_root) {
181        Ok(()) => baselines_built.push("complexity".to_string()),
182        Err(e) => baseline_errors.push(format!("complexity: {e}")),
183    }
184
185    // 3. Clone fragment index
186    match build_clone_baseline(project_root) {
187        Ok(()) => baselines_built.push("clones".to_string()),
188        Err(e) => baseline_errors.push(format!("clones: {e}")),
189    }
190
191    // 4. Temporal pattern database
192    match build_temporal_baseline(project_root) {
193        Ok(()) => baselines_built.push("temporal".to_string()),
194        Err(e) => baseline_errors.push(format!("temporal: {e}")),
195    }
196
197    // Create state file to mark first run complete
198    let state = create_state_db(project_root)?;
199
200    let elapsed_ms = start.elapsed().as_millis() as u64;
201
202    writer_fn(&format!(
203        "Baselines built in {}ms ({} succeeded, {} failed)",
204        elapsed_ms,
205        baselines_built.len(),
206        baseline_errors.len()
207    ));
208
209    Ok(FirstRunResult {
210        state,
211        elapsed_ms,
212        baselines_built,
213        baseline_errors,
214    })
215}
216
217/// Result of a first-run baseline scan.
218#[derive(Debug, Clone)]
219pub struct FirstRunResult {
220    /// The state that was persisted to disk.
221    pub state: BugbotState,
222    /// Duration of the baseline scan in milliseconds.
223    pub elapsed_ms: u64,
224    /// Names of baselines that were successfully built.
225    pub baselines_built: Vec<String>,
226    /// Error messages for baselines that failed.
227    pub baseline_errors: Vec<String>,
228}
229
230// ============================================================================
231// Baseline call graph cache
232//
233// Saves/loads the baseline call graph as JSON so that subsequent bugbot runs
234// can skip rebuilding it (and skip creating a git worktree + subprocess).
235// ============================================================================
236
237/// Cache file names within `.bugbot/`.
238const BASELINE_CG_FILENAME: &str = "baseline_call_graph.json";
239const BASELINE_CG_META_FILENAME: &str = "baseline_call_graph_meta.json";
240
241/// Metadata for a cached baseline call graph.
242#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
243pub struct BaselineCallGraphMeta {
244    /// Git commit hash the baseline was built from.
245    pub commit_hash: String,
246    /// Language the call graph was built for.
247    pub language: String,
248    /// ISO 8601 timestamp when the cache was written.
249    pub built_at: String,
250}
251
252/// Save a baseline call graph to `.bugbot/baseline_call_graph.json`.
253///
254/// Also writes a metadata file with the commit hash and language so that
255/// staleness can be detected on load. Creates the `.bugbot/` directory if
256/// it does not already exist.
257pub fn save_baseline_call_graph(
258    project_root: &Path,
259    call_graph: &serde_json::Value,
260    commit_hash: &str,
261    language: &str,
262) -> Result<()> {
263    let dir = bugbot_dir(project_root);
264    std::fs::create_dir_all(&dir)?;
265
266    let cg_path = dir.join(BASELINE_CG_FILENAME);
267    let meta_path = dir.join(BASELINE_CG_META_FILENAME);
268
269    let meta = BaselineCallGraphMeta {
270        commit_hash: commit_hash.to_string(),
271        language: language.to_string(),
272        built_at: chrono::Utc::now().to_rfc3339(),
273    };
274
275    std::fs::write(&cg_path, serde_json::to_string(call_graph)?)
276        .context("writing baseline call graph cache")?;
277    std::fs::write(&meta_path, serde_json::to_string_pretty(&meta)?)
278        .context("writing baseline call graph metadata")?;
279
280    Ok(())
281}
282
283/// Load a cached baseline call graph if the cache exists and was built from
284/// the expected commit.
285///
286/// Returns `None` if:
287/// - No cache file exists
288/// - The metadata file is missing or malformed
289/// - The cached commit hash does not match `expected_commit`
290pub fn load_cached_baseline_call_graph(
291    project_root: &Path,
292    expected_commit: &str,
293) -> Option<serde_json::Value> {
294    let dir = bugbot_dir(project_root);
295    let cg_path = dir.join(BASELINE_CG_FILENAME);
296    let meta_path = dir.join(BASELINE_CG_META_FILENAME);
297
298    let meta_str = std::fs::read_to_string(&meta_path).ok()?;
299    let meta: BaselineCallGraphMeta = serde_json::from_str(&meta_str).ok()?;
300
301    if meta.commit_hash != expected_commit {
302        return None;
303    }
304
305    let cg_str = std::fs::read_to_string(&cg_path).ok()?;
306    serde_json::from_str(&cg_str).ok()
307}
308
309/// Resolve a git ref (e.g. "HEAD", "main", "origin/main") to a full commit hash.
310///
311/// Runs `git rev-parse <ref>` in the project directory. Returns an error
312/// if git is not available or the ref cannot be resolved.
313pub fn resolve_git_ref(project_root: &Path, git_ref: &str) -> Result<String> {
314    let output = Command::new("git")
315        .args(["rev-parse", git_ref])
316        .current_dir(project_root)
317        .output()
318        .context("Failed to run git rev-parse")?;
319
320    if !output.status.success() {
321        let stderr = String::from_utf8_lossy(&output.stderr);
322        anyhow::bail!("git rev-parse {} failed: {}", git_ref, stderr.trim());
323    }
324
325    Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
326}
327
328// ============================================================================
329// Baseline builders
330//
331// Each function builds one category of baseline data. They are best-effort:
332// failures are captured as errors but do not abort the first-run process.
333// These call into existing tldr_core APIs that the L2 engines already use.
334// ============================================================================
335
336/// Build the project call graph and cache it to `.bugbot/baseline_call_graph.json`.
337///
338/// Uses `tldr_core::callgraph::build_project_call_graph` to scan all source
339/// files and create the call graph. The result is serialized to JSON and
340/// saved so that subsequent bugbot runs can reuse it as the baseline
341/// (avoiding a worktree + subprocess rebuild).
342fn build_call_graph_baseline(project_root: &Path) -> Result<()> {
343    // Detect the project language for call graph building.
344    let language = match tldr_core::Language::from_directory(project_root) {
345        Some(lang) => lang,
346        None => return Ok(()), // No detectable language, skip call graph
347    };
348
349    let call_graph =
350        tldr_core::callgraph::build_project_call_graph(project_root, language, None, true)
351            .map_err(|e| anyhow::anyhow!("{e}"))?;
352
353    // Serialize and cache the baseline. Non-fatal on failure — the
354    // differential engine will fall back to the worktree approach.
355    let call_graph_json = serde_json::to_value(&call_graph)
356        .map_err(|e| anyhow::anyhow!("serialize call graph: {e}"))?;
357
358    let commit_hash = resolve_git_ref(project_root, "HEAD").unwrap_or_default();
359    if !commit_hash.is_empty() {
360        if let Err(e) = save_baseline_call_graph(
361            project_root,
362            &call_graph_json,
363            &commit_hash,
364            language.as_str(),
365        ) {
366            eprintln!("Warning: failed to cache baseline call graph: {e}");
367        }
368    }
369
370    Ok(())
371}
372
373/// Build per-file complexity baselines.
374///
375/// Scans source files to compute cyclomatic complexity for each function.
376/// The DeltaEngine uses these as the "before" values for complexity-increase
377/// detection.
378fn build_complexity_baseline(project_root: &Path) -> Result<()> {
379    // Walk source files and compute complexity for each.
380    // On first run, these values become the baseline. Subsequent runs
381    // compare current complexity against these baselines.
382    let source_files = collect_source_files(project_root);
383    for file in &source_files {
384        if let Ok(contents) = std::fs::read_to_string(file) {
385            let lang = tldr_core::Language::from_path(file);
386            if let Some(language) = lang {
387                // calculate_all_complexities scans every function in the file.
388                let _complexities =
389                    tldr_core::metrics::calculate_all_complexities(&contents, language);
390            }
391        }
392    }
393    Ok(())
394}
395
396/// Build the clone fragment index.
397///
398/// Scans source files to detect code clones. The DeltaEngine uses this
399/// index to determine which clones are "new" vs pre-existing.
400fn build_clone_baseline(project_root: &Path) -> Result<()> {
401    let options = tldr_core::analysis::clones::ClonesOptions::default();
402    let _clones = tldr_core::analysis::clones::detect_clones(project_root, &options)
403        .map_err(|e| anyhow::anyhow!("{e}"))?;
404    Ok(())
405}
406
407/// Build the temporal pattern database.
408///
409/// Mines temporal ordering constraints from source files (e.g., "open must
410/// precede read"). The temporal finding extractor uses these constraints to
411/// detect violations in changed code.
412fn build_temporal_baseline(project_root: &Path) -> Result<()> {
413    // Temporal mining works on function bodies. We scan all functions in all
414    // source files to build the constraint database. This is lightweight since
415    // it only extracts method-call sequences from ASTs.
416    let source_files = collect_source_files(project_root);
417    for file in &source_files {
418        let lang = tldr_core::Language::from_path(file);
419        if let Some(language) = lang {
420            let _structure = tldr_core::ast::get_code_structure(
421                file,
422                language,
423                0, // no depth limit
424                None,
425            );
426        }
427    }
428    Ok(())
429}
430
431/// Collect source files from the project directory.
432///
433/// Walks the project root recursively and returns paths to files with
434/// recognized source extensions. Skips hidden directories, `target/`,
435/// `node_modules/`, and `vendor/` directories.
436fn collect_source_files(project_root: &Path) -> Vec<PathBuf> {
437    let mut files = Vec::new();
438    collect_source_files_recursive(project_root, &mut files);
439    files
440}
441
442/// Recursive helper for `collect_source_files`.
443fn collect_source_files_recursive(dir: &Path, files: &mut Vec<PathBuf>) {
444    let entries = match std::fs::read_dir(dir) {
445        Ok(e) => e,
446        Err(_) => return,
447    };
448
449    for entry in entries.flatten() {
450        let path = entry.path();
451        if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
452            // Skip hidden directories and common non-source dirs
453            if name.starts_with('.')
454                || name == "target"
455                || name == "node_modules"
456                || name == "vendor"
457                || name == "__pycache__"
458                || name == "dist"
459                || name == "build"
460            {
461                continue;
462            }
463        }
464
465        if path.is_dir() {
466            collect_source_files_recursive(&path, files);
467        } else if is_source_file(&path) {
468            files.push(path);
469        }
470    }
471}
472
473/// Check if a file has a recognized source file extension.
474fn is_source_file(path: &Path) -> bool {
475    let ext = match path.extension().and_then(|e| e.to_str()) {
476        Some(e) => e,
477        None => return false,
478    };
479
480    matches!(
481        ext,
482        "rs" | "py"
483            | "js"
484            | "ts"
485            | "tsx"
486            | "jsx"
487            | "go"
488            | "java"
489            | "c"
490            | "cpp"
491            | "h"
492            | "hpp"
493            | "rb"
494            | "php"
495            | "kt"
496            | "swift"
497            | "cs"
498            | "scala"
499            | "ex"
500            | "exs"
501            | "lua"
502    )
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508    use std::cell::RefCell;
509    use std::fs;
510    use tempfile::TempDir;
511
512    // =========================================================================
513    // Detection tests
514    // =========================================================================
515
516    #[test]
517    fn test_first_run_detects_no_state_db() {
518        let tmp = TempDir::new().unwrap();
519        let status = detect_first_run(tmp.path());
520        assert_eq!(status, FirstRunStatus::FirstRun);
521        assert!(status.is_first_run());
522    }
523
524    #[test]
525    fn test_first_run_skips_if_state_exists() {
526        let tmp = TempDir::new().unwrap();
527
528        // Create a valid state file
529        let dir = tmp.path().join(BUGBOT_DIR);
530        fs::create_dir_all(&dir).unwrap();
531        let state = BugbotState {
532            version: 1,
533            created_at: "2026-01-15T10:00:00Z".to_string(),
534            baseline_built: true,
535        };
536        fs::write(
537            dir.join(STATE_DB_FILENAME),
538            serde_json::to_string_pretty(&state).unwrap(),
539        )
540        .unwrap();
541
542        let status = detect_first_run(tmp.path());
543        assert!(!status.is_first_run());
544        match status {
545            FirstRunStatus::SubsequentRun { state: s } => {
546                assert_eq!(s.version, 1);
547                assert!(s.baseline_built);
548                assert_eq!(s.created_at, "2026-01-15T10:00:00Z");
549            }
550            _ => panic!("Expected SubsequentRun"),
551        }
552    }
553
554    #[test]
555    fn test_first_run_treats_malformed_state_as_first_run() {
556        let tmp = TempDir::new().unwrap();
557        let dir = tmp.path().join(BUGBOT_DIR);
558        fs::create_dir_all(&dir).unwrap();
559        fs::write(dir.join(STATE_DB_FILENAME), "not valid json {{{").unwrap();
560
561        let status = detect_first_run(tmp.path());
562        assert!(status.is_first_run());
563    }
564
565    #[test]
566    fn test_first_run_treats_incomplete_baseline_as_first_run() {
567        let tmp = TempDir::new().unwrap();
568        let dir = tmp.path().join(BUGBOT_DIR);
569        fs::create_dir_all(&dir).unwrap();
570        let state = BugbotState {
571            version: 1,
572            created_at: "2026-01-15T10:00:00Z".to_string(),
573            baseline_built: false, // interrupted previous run
574        };
575        fs::write(
576            dir.join(STATE_DB_FILENAME),
577            serde_json::to_string_pretty(&state).unwrap(),
578        )
579        .unwrap();
580
581        let status = detect_first_run(tmp.path());
582        assert!(status.is_first_run(), "baseline_built=false should be treated as first run");
583    }
584
585    // =========================================================================
586    // State creation tests
587    // =========================================================================
588
589    #[test]
590    fn test_first_run_creates_state_db() {
591        let tmp = TempDir::new().unwrap();
592        assert!(!state_db_path(tmp.path()).exists());
593
594        let state = create_state_db(tmp.path()).unwrap();
595        assert!(state.baseline_built);
596        assert_eq!(state.version, STATE_VERSION);
597        assert!(!state.created_at.is_empty());
598
599        // Verify file was created on disk
600        assert!(state_db_path(tmp.path()).exists());
601
602        // Verify file content is valid JSON
603        let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
604        let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
605        assert_eq!(parsed, state);
606    }
607
608    #[test]
609    fn test_subsequent_run_uses_existing_baselines() {
610        let tmp = TempDir::new().unwrap();
611
612        // First run: creates state
613        assert!(detect_first_run(tmp.path()).is_first_run());
614        let state = create_state_db(tmp.path()).unwrap();
615
616        // Second run: detects existing state
617        let status = detect_first_run(tmp.path());
618        assert!(!status.is_first_run());
619        match status {
620            FirstRunStatus::SubsequentRun { state: s } => {
621                assert_eq!(s.version, state.version);
622                assert_eq!(s.created_at, state.created_at);
623                assert!(s.baseline_built);
624            }
625            _ => panic!("Expected SubsequentRun after create_state_db"),
626        }
627    }
628
629    // =========================================================================
630    // Path helper tests
631    // =========================================================================
632
633    #[test]
634    fn test_bugbot_dir_path() {
635        let root = Path::new("/projects/myapp");
636        assert_eq!(bugbot_dir(root), PathBuf::from("/projects/myapp/.bugbot"));
637    }
638
639    #[test]
640    fn test_state_db_path_correct() {
641        let root = Path::new("/projects/myapp");
642        assert_eq!(
643            state_db_path(root),
644            PathBuf::from("/projects/myapp/.bugbot/state.db")
645        );
646    }
647
648    // =========================================================================
649    // First-run scan tests
650    // =========================================================================
651
652    #[test]
653    fn test_first_run_scan_creates_state_and_records_baselines() {
654        let tmp = TempDir::new().unwrap();
655
656        // Create a minimal source file so baseline builders have something to scan
657        let src_dir = tmp.path().join("src");
658        fs::create_dir_all(&src_dir).unwrap();
659        fs::write(
660            src_dir.join("main.py"),
661            "def hello():\n    return 42\n",
662        )
663        .unwrap();
664
665        let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
666        let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
667
668        let result = run_first_run_scan(tmp.path(), &writer).unwrap();
669
670        // State should be created
671        assert!(state_db_path(tmp.path()).exists());
672        assert!(result.state.baseline_built);
673
674        // Should have attempted all 4 baseline categories
675        let total = result.baselines_built.len() + result.baseline_errors.len();
676        assert_eq!(total, 4, "Should attempt all 4 baseline categories");
677
678        // Elapsed time should be populated
679        assert!(result.elapsed_ms < 30_000, "Scan should complete in reasonable time");
680    }
681
682    #[test]
683    fn test_first_run_progress_indication() {
684        let tmp = TempDir::new().unwrap();
685
686        let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
687        let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
688
689        let _result = run_first_run_scan(tmp.path(), &writer).unwrap();
690
691        let messages = messages.into_inner();
692
693        // Must print the specific progress message from the spec
694        assert!(
695            messages.iter().any(|m| m.contains("Building initial baselines")),
696            "Must print progress message containing 'Building initial baselines'. Got: {:?}",
697            messages
698        );
699
700        assert!(
701            messages.iter().any(|m| m.contains("one-time")),
702            "Progress message must mention one-time cost. Got: {:?}",
703            messages
704        );
705
706        // Must print completion message
707        assert!(
708            messages.iter().any(|m| m.contains("Baselines built in")),
709            "Must print completion message. Got: {:?}",
710            messages
711        );
712    }
713
714    // test_first_run_baseline_policy_no_prior_state removed: guard and
715    // contract extractors were killed (0% Ashby true-positive rate).
716
717    // =========================================================================
718    // Source file collection tests
719    // =========================================================================
720
721    #[test]
722    fn test_collect_source_files_finds_source_files() {
723        let tmp = TempDir::new().unwrap();
724        let src = tmp.path().join("src");
725        fs::create_dir_all(&src).unwrap();
726        fs::write(src.join("main.rs"), "fn main() {}").unwrap();
727        fs::write(src.join("lib.py"), "def f(): pass").unwrap();
728        fs::write(src.join("notes.txt"), "not source").unwrap();
729
730        let files = collect_source_files(tmp.path());
731        assert_eq!(files.len(), 2);
732        assert!(files.iter().any(|f| f.ends_with("main.rs")));
733        assert!(files.iter().any(|f| f.ends_with("lib.py")));
734    }
735
736    #[test]
737    fn test_collect_source_files_skips_hidden_and_build_dirs() {
738        let tmp = TempDir::new().unwrap();
739
740        // Create files in directories that should be skipped
741        for dir_name in &[".git", "target", "node_modules", "__pycache__", "vendor"] {
742            let dir = tmp.path().join(dir_name);
743            fs::create_dir_all(&dir).unwrap();
744            fs::write(dir.join("hidden.rs"), "fn f() {}").unwrap();
745        }
746
747        // Create a file that should be found
748        fs::write(tmp.path().join("visible.rs"), "fn main() {}").unwrap();
749
750        let files = collect_source_files(tmp.path());
751        assert_eq!(files.len(), 1);
752        assert!(files[0].ends_with("visible.rs"));
753    }
754
755    #[test]
756    fn test_is_source_file_recognizes_all_extensions() {
757        let extensions = vec![
758            "rs", "py", "js", "ts", "tsx", "jsx", "go", "java", "c", "cpp",
759            "h", "hpp", "rb", "php", "kt", "swift", "cs", "scala", "ex", "exs", "lua",
760        ];
761
762        for ext in &extensions {
763            let path = PathBuf::from(format!("test.{ext}"));
764            assert!(
765                is_source_file(&path),
766                "Extension .{ext} should be recognized as source"
767            );
768        }
769
770        // Non-source extensions
771        for ext in &["txt", "md", "json", "yaml", "toml", "lock", "png"] {
772            let path = PathBuf::from(format!("test.{ext}"));
773            assert!(
774                !is_source_file(&path),
775                "Extension .{ext} should NOT be recognized as source"
776            );
777        }
778    }
779
780    #[test]
781    fn test_state_db_overwritten_on_second_first_run() {
782        let tmp = TempDir::new().unwrap();
783
784        // First creation
785        let state1 = create_state_db(tmp.path()).unwrap();
786
787        // Small delay to ensure different timestamp
788        std::thread::sleep(std::time::Duration::from_millis(10));
789
790        // Second creation (e.g., after manual deletion of state, or interrupted first run)
791        let state2 = create_state_db(tmp.path()).unwrap();
792
793        assert_ne!(
794            state1.created_at, state2.created_at,
795            "Second creation should have a later timestamp"
796        );
797
798        // File on disk should match the latest state
799        let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
800        let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
801        assert_eq!(parsed, state2);
802    }
803
804    #[test]
805    fn test_bugbot_state_serialization_roundtrip() {
806        let state = BugbotState {
807            version: 1,
808            created_at: "2026-03-02T12:00:00Z".to_string(),
809            baseline_built: true,
810        };
811
812        let json = serde_json::to_string_pretty(&state).unwrap();
813        let parsed: BugbotState = serde_json::from_str(&json).unwrap();
814        assert_eq!(parsed, state);
815    }
816
817    #[test]
818    fn test_first_run_result_fields() {
819        let tmp = TempDir::new().unwrap();
820
821        let writer = |_msg: &str| {};
822        let result = run_first_run_scan(tmp.path(), &writer).unwrap();
823
824        assert_eq!(result.state.version, STATE_VERSION);
825        assert!(result.state.baseline_built);
826        // With no source files, baselines still attempt and succeed (no-op)
827        let total = result.baselines_built.len() + result.baseline_errors.len();
828        assert_eq!(total, 4);
829    }
830
831    #[test]
832    fn test_first_run_empty_project_succeeds() {
833        // A completely empty project directory should still complete
834        // the first-run scan without errors.
835        let tmp = TempDir::new().unwrap();
836
837        let writer = |_msg: &str| {};
838        let result = run_first_run_scan(tmp.path(), &writer);
839
840        assert!(
841            result.is_ok(),
842            "First-run scan should succeed even on an empty project: {:?}",
843            result.err()
844        );
845    }
846
847    // =========================================================================
848    // Baseline call graph cache tests
849    // =========================================================================
850
851    #[test]
852    fn test_save_load_baseline_call_graph_roundtrip() {
853        let tmp = TempDir::new().unwrap();
854
855        let cg = serde_json::json!({
856            "edges": [
857                {"src_file": "a.py", "src_func": "foo", "dst_file": "b.py", "dst_func": "bar"}
858            ]
859        });
860
861        save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
862
863        let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
864        assert!(loaded.is_some(), "Cache should load with matching commit");
865        assert_eq!(loaded.unwrap(), cg);
866    }
867
868    #[test]
869    fn test_load_baseline_rejects_stale_commit() {
870        let tmp = TempDir::new().unwrap();
871
872        let cg = serde_json::json!({"edges": []});
873        save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
874
875        let loaded = load_cached_baseline_call_graph(tmp.path(), "def456");
876        assert!(loaded.is_none(), "Cache should not load with different commit");
877    }
878
879    #[test]
880    fn test_load_baseline_nonexistent_cache() {
881        let tmp = TempDir::new().unwrap();
882        let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
883        assert!(loaded.is_none(), "No cache should return None");
884    }
885
886    #[test]
887    fn test_baseline_meta_serialization() {
888        let meta = BaselineCallGraphMeta {
889            commit_hash: "abc123".to_string(),
890            language: "rust".to_string(),
891            built_at: "2026-03-16T12:00:00Z".to_string(),
892        };
893
894        let json = serde_json::to_string_pretty(&meta).unwrap();
895        let parsed: BaselineCallGraphMeta = serde_json::from_str(&json).unwrap();
896        assert_eq!(parsed, meta);
897    }
898
899    #[test]
900    fn test_save_creates_bugbot_dir() {
901        let tmp = TempDir::new().unwrap();
902        assert!(!bugbot_dir(tmp.path()).exists());
903
904        let cg = serde_json::json!({"edges": []});
905        save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
906
907        assert!(bugbot_dir(tmp.path()).exists());
908        assert!(bugbot_dir(tmp.path()).join(BASELINE_CG_FILENAME).exists());
909        assert!(bugbot_dir(tmp.path()).join(BASELINE_CG_META_FILENAME).exists());
910    }
911}