Skip to main content

tldr_cli/commands/bugbot/
first_run.rs

1//! First-run auto-scan behavior for bugbot (PM-34).
2//!
3//! When bugbot detects no prior state (`.bugbot/state.db` does not exist),
4//! it automatically runs a lightweight scan to establish baselines. This scan
5//! builds:
6//!
7//! - Project call graph (cached for daemon)
8//! - Per-file complexity and maintainability baselines
9//! - Clone fragment index
10//! - Temporal pattern database
11//!
12//! Budget: <10s for a 50K LOC project (one-time cost). Runs ONCE, transparently,
13//! on first `bugbot check` invocation.
14//!
15//! # Baseline Policy
16//!
17//! For files with no git history (new project, or new files in a monorepo),
18//! delta engines treat the "before" as empty:
19//!
20//! - All current smells, clones, and complexity are "new" (reported)
21//! - All current contracts are the baseline (no regression possible)
22//! - Guard-removed and contract-regression produce no findings (no prior state)
23//!
24//! # Progress Indication
25//!
26//! Prints `"Building initial baselines... (one-time, ~8s)"` so users understand
27//! why the first run is slow.
28
29use std::path::{Path, PathBuf};
30use std::process::Command;
31use std::time::Instant;
32
33use anyhow::{Context, Result};
34use serde::{Deserialize, Serialize};
35
36/// Name of the bugbot state directory (created under the project root).
37const BUGBOT_DIR: &str = ".bugbot";
38
39/// Name of the state database file within the `.bugbot/` directory.
40const STATE_DB_FILENAME: &str = "state.db";
41
42/// State file version for forward compatibility.
43const STATE_VERSION: u32 = 1;
44
45/// Persisted state for bugbot across runs.
46///
47/// Stored as JSON in `.bugbot/state.db`. Contains metadata about when
48/// the baseline was built and which version of the state format is in use.
49#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
50pub struct BugbotState {
51    /// Schema version for forward compatibility.
52    pub version: u32,
53    /// ISO 8601 timestamp when the baseline was first established.
54    pub created_at: String,
55    /// Whether the baseline has been fully built.
56    pub baseline_built: bool,
57}
58
59/// Result of checking whether this is a first run.
60#[derive(Debug, Clone, PartialEq)]
61pub enum FirstRunStatus {
62    /// No prior state exists. Baselines need to be built.
63    FirstRun,
64    /// State exists and baselines have been built previously.
65    SubsequentRun {
66        /// The persisted state from the previous run.
67        state: BugbotState,
68    },
69}
70
71impl FirstRunStatus {
72    /// Returns true if this is the first run (no prior state).
73    pub fn is_first_run(&self) -> bool {
74        matches!(self, FirstRunStatus::FirstRun)
75    }
76}
77
78/// Returns the path to the `.bugbot/` directory for a given project root.
79pub fn bugbot_dir(project_root: &Path) -> PathBuf {
80    project_root.join(BUGBOT_DIR)
81}
82
83/// Returns the path to the state database file for a given project root.
84pub fn state_db_path(project_root: &Path) -> PathBuf {
85    bugbot_dir(project_root).join(STATE_DB_FILENAME)
86}
87
88/// Detect whether this is a first run by checking for `.bugbot/state.db`.
89///
90/// Returns `FirstRunStatus::FirstRun` if no state file exists, or
91/// `FirstRunStatus::SubsequentRun` if one does. A malformed state file
92/// is treated as a first run (the file will be overwritten).
93pub fn detect_first_run(project_root: &Path) -> FirstRunStatus {
94    let path = state_db_path(project_root);
95
96    if !path.exists() {
97        return FirstRunStatus::FirstRun;
98    }
99
100    match std::fs::read_to_string(&path) {
101        Ok(contents) => match serde_json::from_str::<BugbotState>(&contents) {
102            Ok(state) if state.baseline_built => FirstRunStatus::SubsequentRun { state },
103            Ok(_) => {
104                // baseline_built is false — treat as first run so baselines
105                // get built (previous run may have been interrupted).
106                FirstRunStatus::FirstRun
107            }
108            Err(_) => {
109                // Malformed state file — treat as first run and overwrite.
110                FirstRunStatus::FirstRun
111            }
112        },
113        Err(_) => {
114            // Cannot read file — treat as first run.
115            FirstRunStatus::FirstRun
116        }
117    }
118}
119
120/// Create the `.bugbot/` directory and write the initial `state.db` file.
121///
122/// Marks `baseline_built: true` so subsequent runs skip the baseline scan.
123/// Returns the written `BugbotState`.
124pub fn create_state_db(project_root: &Path) -> Result<BugbotState> {
125    let dir = bugbot_dir(project_root);
126    std::fs::create_dir_all(&dir)?;
127
128    let state = BugbotState {
129        version: STATE_VERSION,
130        created_at: chrono::Utc::now().to_rfc3339(),
131        baseline_built: true,
132    };
133
134    let json = serde_json::to_string_pretty(&state)?;
135    std::fs::write(state_db_path(project_root), json)?;
136
137    Ok(state)
138}
139
140/// Run the first-run baseline scan.
141///
142/// This is the main entry point called from `check.rs` when `detect_first_run`
143/// returns `FirstRunStatus::FirstRun`. It:
144///
145/// 1. Prints a progress message to stderr
146/// 2. Builds initial baselines (call graph, complexity, clones, temporal)
147/// 3. Creates the `.bugbot/state.db` file
148///
149/// Returns the duration of the baseline scan in milliseconds.
150///
151/// The `writer_fn` parameter emits progress messages. In production this
152/// is wired to `OutputWriter::progress`; in tests it can capture output.
153pub fn run_first_run_scan<F>(project_root: &Path, writer_fn: &F) -> Result<FirstRunResult>
154where
155    F: Fn(&str),
156{
157    let start = Instant::now();
158
159    writer_fn("Building initial baselines... (one-time, ~8s)");
160
161    // Build initial baselines. These populate the caches that L2 engines
162    // will use during the subsequent analysis pass.
163    //
164    // Each baseline step is best-effort: if it fails, we log the error
165    // but continue with the remaining baselines. The L2 engines handle
166    // missing cache data gracefully (they recompute on demand).
167    let mut baselines_built: Vec<String> = Vec::new();
168    let mut baseline_errors: Vec<String> = Vec::new();
169
170    // 1. Call graph baseline
171    match build_call_graph_baseline(project_root) {
172        Ok(()) => baselines_built.push("call_graph".to_string()),
173        Err(e) => baseline_errors.push(format!("call_graph: {e}")),
174    }
175
176    // 2. Complexity baseline
177    match build_complexity_baseline(project_root) {
178        Ok(()) => baselines_built.push("complexity".to_string()),
179        Err(e) => baseline_errors.push(format!("complexity: {e}")),
180    }
181
182    // 3. Clone fragment index
183    match build_clone_baseline(project_root) {
184        Ok(()) => baselines_built.push("clones".to_string()),
185        Err(e) => baseline_errors.push(format!("clones: {e}")),
186    }
187
188    // 4. Temporal pattern database
189    match build_temporal_baseline(project_root) {
190        Ok(()) => baselines_built.push("temporal".to_string()),
191        Err(e) => baseline_errors.push(format!("temporal: {e}")),
192    }
193
194    // Create state file to mark first run complete
195    let state = create_state_db(project_root)?;
196
197    let elapsed_ms = start.elapsed().as_millis() as u64;
198
199    writer_fn(&format!(
200        "Baselines built in {}ms ({} succeeded, {} failed)",
201        elapsed_ms,
202        baselines_built.len(),
203        baseline_errors.len()
204    ));
205
206    Ok(FirstRunResult {
207        state,
208        elapsed_ms,
209        baselines_built,
210        baseline_errors,
211    })
212}
213
214/// Result of a first-run baseline scan.
215#[derive(Debug, Clone)]
216pub struct FirstRunResult {
217    /// The state that was persisted to disk.
218    pub state: BugbotState,
219    /// Duration of the baseline scan in milliseconds.
220    pub elapsed_ms: u64,
221    /// Names of baselines that were successfully built.
222    pub baselines_built: Vec<String>,
223    /// Error messages for baselines that failed.
224    pub baseline_errors: Vec<String>,
225}
226
227// ============================================================================
228// Baseline call graph cache
229//
230// Saves/loads the baseline call graph as JSON so that subsequent bugbot runs
231// can skip rebuilding it (and skip creating a git worktree + subprocess).
232// ============================================================================
233
234/// Cache file names within `.bugbot/`.
235const BASELINE_CG_FILENAME: &str = "baseline_call_graph.json";
236const BASELINE_CG_META_FILENAME: &str = "baseline_call_graph_meta.json";
237
238/// Metadata for a cached baseline call graph.
239#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
240pub struct BaselineCallGraphMeta {
241    /// Git commit hash the baseline was built from.
242    pub commit_hash: String,
243    /// Language the call graph was built for.
244    pub language: String,
245    /// ISO 8601 timestamp when the cache was written.
246    pub built_at: String,
247}
248
249/// Save a baseline call graph to `.bugbot/baseline_call_graph.json`.
250///
251/// Also writes a metadata file with the commit hash and language so that
252/// staleness can be detected on load. Creates the `.bugbot/` directory if
253/// it does not already exist.
254pub fn save_baseline_call_graph(
255    project_root: &Path,
256    call_graph: &serde_json::Value,
257    commit_hash: &str,
258    language: &str,
259) -> Result<()> {
260    let dir = bugbot_dir(project_root);
261    std::fs::create_dir_all(&dir)?;
262
263    let cg_path = dir.join(BASELINE_CG_FILENAME);
264    let meta_path = dir.join(BASELINE_CG_META_FILENAME);
265
266    let meta = BaselineCallGraphMeta {
267        commit_hash: commit_hash.to_string(),
268        language: language.to_string(),
269        built_at: chrono::Utc::now().to_rfc3339(),
270    };
271
272    std::fs::write(&cg_path, serde_json::to_string(call_graph)?)
273        .context("writing baseline call graph cache")?;
274    std::fs::write(&meta_path, serde_json::to_string_pretty(&meta)?)
275        .context("writing baseline call graph metadata")?;
276
277    Ok(())
278}
279
280/// Load a cached baseline call graph if the cache exists and was built from
281/// the expected commit.
282///
283/// Returns `None` if:
284/// - No cache file exists
285/// - The metadata file is missing or malformed
286/// - The cached commit hash does not match `expected_commit`
287pub fn load_cached_baseline_call_graph(
288    project_root: &Path,
289    expected_commit: &str,
290) -> Option<serde_json::Value> {
291    let dir = bugbot_dir(project_root);
292    let cg_path = dir.join(BASELINE_CG_FILENAME);
293    let meta_path = dir.join(BASELINE_CG_META_FILENAME);
294
295    let meta_str = std::fs::read_to_string(&meta_path).ok()?;
296    let meta: BaselineCallGraphMeta = serde_json::from_str(&meta_str).ok()?;
297
298    if meta.commit_hash != expected_commit {
299        return None;
300    }
301
302    let cg_str = std::fs::read_to_string(&cg_path).ok()?;
303    serde_json::from_str(&cg_str).ok()
304}
305
306/// Resolve a git ref (e.g. "HEAD", "main", "origin/main") to a full commit hash.
307///
308/// Runs `git rev-parse <ref>` in the project directory. Returns an error
309/// if git is not available or the ref cannot be resolved.
310pub fn resolve_git_ref(project_root: &Path, git_ref: &str) -> Result<String> {
311    let output = Command::new("git")
312        .args(["rev-parse", git_ref])
313        .current_dir(project_root)
314        .output()
315        .context("Failed to run git rev-parse")?;
316
317    if !output.status.success() {
318        let stderr = String::from_utf8_lossy(&output.stderr);
319        anyhow::bail!("git rev-parse {} failed: {}", git_ref, stderr.trim());
320    }
321
322    Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
323}
324
325// ============================================================================
326// Baseline builders
327//
328// Each function builds one category of baseline data. They are best-effort:
329// failures are captured as errors but do not abort the first-run process.
330// These call into existing tldr_core APIs that the L2 engines already use.
331// ============================================================================
332
333/// Build the project call graph and cache it to `.bugbot/baseline_call_graph.json`.
334///
335/// Uses `tldr_core::callgraph::build_project_call_graph` to scan all source
336/// files and create the call graph. The result is serialized to JSON and
337/// saved so that subsequent bugbot runs can reuse it as the baseline
338/// (avoiding a worktree + subprocess rebuild).
339fn build_call_graph_baseline(project_root: &Path) -> Result<()> {
340    // Detect the project language for call graph building.
341    let language = match tldr_core::Language::from_directory(project_root) {
342        Some(lang) => lang,
343        None => return Ok(()), // No detectable language, skip call graph
344    };
345
346    let call_graph =
347        tldr_core::callgraph::build_project_call_graph(project_root, language, None, true)
348            .map_err(|e| anyhow::anyhow!("{e}"))?;
349
350    // Serialize and cache the baseline. Non-fatal on failure — the
351    // differential engine will fall back to the worktree approach.
352    let call_graph_json = serde_json::to_value(&call_graph)
353        .map_err(|e| anyhow::anyhow!("serialize call graph: {e}"))?;
354
355    let commit_hash = resolve_git_ref(project_root, "HEAD").unwrap_or_default();
356    if !commit_hash.is_empty() {
357        if let Err(e) = save_baseline_call_graph(
358            project_root,
359            &call_graph_json,
360            &commit_hash,
361            language.as_str(),
362        ) {
363            eprintln!("Warning: failed to cache baseline call graph: {e}");
364        }
365    }
366
367    Ok(())
368}
369
370/// Build per-file complexity baselines.
371///
372/// Scans source files to compute cyclomatic complexity for each function.
373/// The DeltaEngine uses these as the "before" values for complexity-increase
374/// detection.
375fn build_complexity_baseline(project_root: &Path) -> Result<()> {
376    // Walk source files and compute complexity for each.
377    // On first run, these values become the baseline. Subsequent runs
378    // compare current complexity against these baselines.
379    let source_files = collect_source_files(project_root);
380    for file in &source_files {
381        if let Ok(contents) = std::fs::read_to_string(file) {
382            let lang = tldr_core::Language::from_path(file);
383            if let Some(language) = lang {
384                // calculate_all_complexities scans every function in the file.
385                let _complexities =
386                    tldr_core::metrics::calculate_all_complexities(&contents, language);
387            }
388        }
389    }
390    Ok(())
391}
392
393/// Build the clone fragment index.
394///
395/// Scans source files to detect code clones. The DeltaEngine uses this
396/// index to determine which clones are "new" vs pre-existing.
397fn build_clone_baseline(project_root: &Path) -> Result<()> {
398    let options = tldr_core::analysis::clones::ClonesOptions::default();
399    let _clones = tldr_core::analysis::clones::detect_clones(project_root, &options)
400        .map_err(|e| anyhow::anyhow!("{e}"))?;
401    Ok(())
402}
403
404/// Build the temporal pattern database.
405///
406/// Mines temporal ordering constraints from source files (e.g., "open must
407/// precede read"). The temporal finding extractor uses these constraints to
408/// detect violations in changed code.
409fn build_temporal_baseline(project_root: &Path) -> Result<()> {
410    // Temporal mining works on function bodies. We scan all functions in all
411    // source files to build the constraint database. This is lightweight since
412    // it only extracts method-call sequences from ASTs.
413    let source_files = collect_source_files(project_root);
414    for file in &source_files {
415        let lang = tldr_core::Language::from_path(file);
416        if let Some(language) = lang {
417            let _structure = tldr_core::ast::get_code_structure(
418                file, language, 0, // no depth limit
419                None,
420            );
421        }
422    }
423    Ok(())
424}
425
426/// Collect source files from the project directory.
427///
428/// Walks the project root recursively and returns paths to files with
429/// recognized source extensions. Skips hidden directories, `target/`,
430/// `node_modules/`, and `vendor/` directories.
431fn collect_source_files(project_root: &Path) -> Vec<PathBuf> {
432    let mut files = Vec::new();
433    collect_source_files_recursive(project_root, &mut files);
434    files
435}
436
437/// Recursive helper for `collect_source_files`.
438fn collect_source_files_recursive(dir: &Path, files: &mut Vec<PathBuf>) {
439    let entries = match std::fs::read_dir(dir) {
440        Ok(e) => e,
441        Err(_) => return,
442    };
443
444    for entry in entries.flatten() {
445        let path = entry.path();
446        if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
447            // Skip hidden directories and common non-source dirs
448            if name.starts_with('.')
449                || name == "target"
450                || name == "node_modules"
451                || name == "vendor"
452                || name == "__pycache__"
453                || name == "dist"
454                || name == "build"
455            {
456                continue;
457            }
458        }
459
460        if path.is_dir() {
461            collect_source_files_recursive(&path, files);
462        } else if is_source_file(&path) {
463            files.push(path);
464        }
465    }
466}
467
468/// Check if a file has a recognized source file extension.
469fn is_source_file(path: &Path) -> bool {
470    let ext = match path.extension().and_then(|e| e.to_str()) {
471        Some(e) => e,
472        None => return false,
473    };
474
475    matches!(
476        ext,
477        "rs" | "py"
478            | "js"
479            | "ts"
480            | "tsx"
481            | "jsx"
482            | "go"
483            | "java"
484            | "c"
485            | "cpp"
486            | "h"
487            | "hpp"
488            | "rb"
489            | "php"
490            | "kt"
491            | "swift"
492            | "cs"
493            | "scala"
494            | "ex"
495            | "exs"
496            | "lua"
497    )
498}
499
500#[cfg(test)]
501mod tests {
502    use super::*;
503    use std::cell::RefCell;
504    use std::fs;
505    use tempfile::TempDir;
506
507    // =========================================================================
508    // Detection tests
509    // =========================================================================
510
511    #[test]
512    fn test_first_run_detects_no_state_db() {
513        let tmp = TempDir::new().unwrap();
514        let status = detect_first_run(tmp.path());
515        assert_eq!(status, FirstRunStatus::FirstRun);
516        assert!(status.is_first_run());
517    }
518
519    #[test]
520    fn test_first_run_skips_if_state_exists() {
521        let tmp = TempDir::new().unwrap();
522
523        // Create a valid state file
524        let dir = tmp.path().join(BUGBOT_DIR);
525        fs::create_dir_all(&dir).unwrap();
526        let state = BugbotState {
527            version: 1,
528            created_at: "2026-01-15T10:00:00Z".to_string(),
529            baseline_built: true,
530        };
531        fs::write(
532            dir.join(STATE_DB_FILENAME),
533            serde_json::to_string_pretty(&state).unwrap(),
534        )
535        .unwrap();
536
537        let status = detect_first_run(tmp.path());
538        assert!(!status.is_first_run());
539        match status {
540            FirstRunStatus::SubsequentRun { state: s } => {
541                assert_eq!(s.version, 1);
542                assert!(s.baseline_built);
543                assert_eq!(s.created_at, "2026-01-15T10:00:00Z");
544            }
545            _ => panic!("Expected SubsequentRun"),
546        }
547    }
548
549    #[test]
550    fn test_first_run_treats_malformed_state_as_first_run() {
551        let tmp = TempDir::new().unwrap();
552        let dir = tmp.path().join(BUGBOT_DIR);
553        fs::create_dir_all(&dir).unwrap();
554        fs::write(dir.join(STATE_DB_FILENAME), "not valid json {{{").unwrap();
555
556        let status = detect_first_run(tmp.path());
557        assert!(status.is_first_run());
558    }
559
560    #[test]
561    fn test_first_run_treats_incomplete_baseline_as_first_run() {
562        let tmp = TempDir::new().unwrap();
563        let dir = tmp.path().join(BUGBOT_DIR);
564        fs::create_dir_all(&dir).unwrap();
565        let state = BugbotState {
566            version: 1,
567            created_at: "2026-01-15T10:00:00Z".to_string(),
568            baseline_built: false, // interrupted previous run
569        };
570        fs::write(
571            dir.join(STATE_DB_FILENAME),
572            serde_json::to_string_pretty(&state).unwrap(),
573        )
574        .unwrap();
575
576        let status = detect_first_run(tmp.path());
577        assert!(
578            status.is_first_run(),
579            "baseline_built=false should be treated as first run"
580        );
581    }
582
583    // =========================================================================
584    // State creation tests
585    // =========================================================================
586
587    #[test]
588    fn test_first_run_creates_state_db() {
589        let tmp = TempDir::new().unwrap();
590        assert!(!state_db_path(tmp.path()).exists());
591
592        let state = create_state_db(tmp.path()).unwrap();
593        assert!(state.baseline_built);
594        assert_eq!(state.version, STATE_VERSION);
595        assert!(!state.created_at.is_empty());
596
597        // Verify file was created on disk
598        assert!(state_db_path(tmp.path()).exists());
599
600        // Verify file content is valid JSON
601        let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
602        let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
603        assert_eq!(parsed, state);
604    }
605
606    #[test]
607    fn test_subsequent_run_uses_existing_baselines() {
608        let tmp = TempDir::new().unwrap();
609
610        // First run: creates state
611        assert!(detect_first_run(tmp.path()).is_first_run());
612        let state = create_state_db(tmp.path()).unwrap();
613
614        // Second run: detects existing state
615        let status = detect_first_run(tmp.path());
616        assert!(!status.is_first_run());
617        match status {
618            FirstRunStatus::SubsequentRun { state: s } => {
619                assert_eq!(s.version, state.version);
620                assert_eq!(s.created_at, state.created_at);
621                assert!(s.baseline_built);
622            }
623            _ => panic!("Expected SubsequentRun after create_state_db"),
624        }
625    }
626
627    // =========================================================================
628    // Path helper tests
629    // =========================================================================
630
631    #[test]
632    fn test_bugbot_dir_path() {
633        let root = Path::new("/projects/myapp");
634        assert_eq!(bugbot_dir(root), PathBuf::from("/projects/myapp/.bugbot"));
635    }
636
637    #[test]
638    fn test_state_db_path_correct() {
639        let root = Path::new("/projects/myapp");
640        assert_eq!(
641            state_db_path(root),
642            PathBuf::from("/projects/myapp/.bugbot/state.db")
643        );
644    }
645
646    // =========================================================================
647    // First-run scan tests
648    // =========================================================================
649
650    #[test]
651    fn test_first_run_scan_creates_state_and_records_baselines() {
652        let tmp = TempDir::new().unwrap();
653
654        // Create a minimal source file so baseline builders have something to scan
655        let src_dir = tmp.path().join("src");
656        fs::create_dir_all(&src_dir).unwrap();
657        fs::write(src_dir.join("main.py"), "def hello():\n    return 42\n").unwrap();
658
659        let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
660        let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
661
662        let result = run_first_run_scan(tmp.path(), &writer).unwrap();
663
664        // State should be created
665        assert!(state_db_path(tmp.path()).exists());
666        assert!(result.state.baseline_built);
667
668        // Should have attempted all 4 baseline categories
669        let total = result.baselines_built.len() + result.baseline_errors.len();
670        assert_eq!(total, 4, "Should attempt all 4 baseline categories");
671
672        // Elapsed time should be populated
673        assert!(
674            result.elapsed_ms < 30_000,
675            "Scan should complete in reasonable time"
676        );
677    }
678
679    #[test]
680    fn test_first_run_progress_indication() {
681        let tmp = TempDir::new().unwrap();
682
683        let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
684        let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
685
686        let _result = run_first_run_scan(tmp.path(), &writer).unwrap();
687
688        let messages = messages.into_inner();
689
690        // Must print the specific progress message from the spec
691        assert!(
692            messages
693                .iter()
694                .any(|m| m.contains("Building initial baselines")),
695            "Must print progress message containing 'Building initial baselines'. Got: {:?}",
696            messages
697        );
698
699        assert!(
700            messages.iter().any(|m| m.contains("one-time")),
701            "Progress message must mention one-time cost. Got: {:?}",
702            messages
703        );
704
705        // Must print completion message
706        assert!(
707            messages.iter().any(|m| m.contains("Baselines built in")),
708            "Must print completion message. Got: {:?}",
709            messages
710        );
711    }
712
713    // test_first_run_baseline_policy_no_prior_state removed: guard and
714    // contract extractors were killed (0% Ashby true-positive rate).
715
716    // =========================================================================
717    // Source file collection tests
718    // =========================================================================
719
720    #[test]
721    fn test_collect_source_files_finds_source_files() {
722        let tmp = TempDir::new().unwrap();
723        let src = tmp.path().join("src");
724        fs::create_dir_all(&src).unwrap();
725        fs::write(src.join("main.rs"), "fn main() {}").unwrap();
726        fs::write(src.join("lib.py"), "def f(): pass").unwrap();
727        fs::write(src.join("notes.txt"), "not source").unwrap();
728
729        let files = collect_source_files(tmp.path());
730        assert_eq!(files.len(), 2);
731        assert!(files.iter().any(|f| f.ends_with("main.rs")));
732        assert!(files.iter().any(|f| f.ends_with("lib.py")));
733    }
734
735    #[test]
736    fn test_collect_source_files_skips_hidden_and_build_dirs() {
737        let tmp = TempDir::new().unwrap();
738
739        // Create files in directories that should be skipped
740        for dir_name in &[".git", "target", "node_modules", "__pycache__", "vendor"] {
741            let dir = tmp.path().join(dir_name);
742            fs::create_dir_all(&dir).unwrap();
743            fs::write(dir.join("hidden.rs"), "fn f() {}").unwrap();
744        }
745
746        // Create a file that should be found
747        fs::write(tmp.path().join("visible.rs"), "fn main() {}").unwrap();
748
749        let files = collect_source_files(tmp.path());
750        assert_eq!(files.len(), 1);
751        assert!(files[0].ends_with("visible.rs"));
752    }
753
754    #[test]
755    fn test_is_source_file_recognizes_all_extensions() {
756        let extensions = vec![
757            "rs", "py", "js", "ts", "tsx", "jsx", "go", "java", "c", "cpp", "h", "hpp", "rb",
758            "php", "kt", "swift", "cs", "scala", "ex", "exs", "lua",
759        ];
760
761        for ext in &extensions {
762            let path = PathBuf::from(format!("test.{ext}"));
763            assert!(
764                is_source_file(&path),
765                "Extension .{ext} should be recognized as source"
766            );
767        }
768
769        // Non-source extensions
770        for ext in &["txt", "md", "json", "yaml", "toml", "lock", "png"] {
771            let path = PathBuf::from(format!("test.{ext}"));
772            assert!(
773                !is_source_file(&path),
774                "Extension .{ext} should NOT be recognized as source"
775            );
776        }
777    }
778
779    #[test]
780    fn test_state_db_overwritten_on_second_first_run() {
781        let tmp = TempDir::new().unwrap();
782
783        // First creation
784        let state1 = create_state_db(tmp.path()).unwrap();
785
786        // Small delay to ensure different timestamp
787        std::thread::sleep(std::time::Duration::from_millis(10));
788
789        // Second creation (e.g., after manual deletion of state, or interrupted first run)
790        let state2 = create_state_db(tmp.path()).unwrap();
791
792        assert_ne!(
793            state1.created_at, state2.created_at,
794            "Second creation should have a later timestamp"
795        );
796
797        // File on disk should match the latest state
798        let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
799        let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
800        assert_eq!(parsed, state2);
801    }
802
803    #[test]
804    fn test_bugbot_state_serialization_roundtrip() {
805        let state = BugbotState {
806            version: 1,
807            created_at: "2026-03-02T12:00:00Z".to_string(),
808            baseline_built: true,
809        };
810
811        let json = serde_json::to_string_pretty(&state).unwrap();
812        let parsed: BugbotState = serde_json::from_str(&json).unwrap();
813        assert_eq!(parsed, state);
814    }
815
816    #[test]
817    fn test_first_run_result_fields() {
818        let tmp = TempDir::new().unwrap();
819
820        let writer = |_msg: &str| {};
821        let result = run_first_run_scan(tmp.path(), &writer).unwrap();
822
823        assert_eq!(result.state.version, STATE_VERSION);
824        assert!(result.state.baseline_built);
825        // With no source files, baselines still attempt and succeed (no-op)
826        let total = result.baselines_built.len() + result.baseline_errors.len();
827        assert_eq!(total, 4);
828    }
829
830    #[test]
831    fn test_first_run_empty_project_succeeds() {
832        // A completely empty project directory should still complete
833        // the first-run scan without errors.
834        let tmp = TempDir::new().unwrap();
835
836        let writer = |_msg: &str| {};
837        let result = run_first_run_scan(tmp.path(), &writer);
838
839        assert!(
840            result.is_ok(),
841            "First-run scan should succeed even on an empty project: {:?}",
842            result.err()
843        );
844    }
845
846    // =========================================================================
847    // Baseline call graph cache tests
848    // =========================================================================
849
850    #[test]
851    fn test_save_load_baseline_call_graph_roundtrip() {
852        let tmp = TempDir::new().unwrap();
853
854        let cg = serde_json::json!({
855            "edges": [
856                {"src_file": "a.py", "src_func": "foo", "dst_file": "b.py", "dst_func": "bar"}
857            ]
858        });
859
860        save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
861
862        let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
863        assert!(loaded.is_some(), "Cache should load with matching commit");
864        assert_eq!(loaded.unwrap(), cg);
865    }
866
867    #[test]
868    fn test_load_baseline_rejects_stale_commit() {
869        let tmp = TempDir::new().unwrap();
870
871        let cg = serde_json::json!({"edges": []});
872        save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
873
874        let loaded = load_cached_baseline_call_graph(tmp.path(), "def456");
875        assert!(
876            loaded.is_none(),
877            "Cache should not load with different commit"
878        );
879    }
880
881    #[test]
882    fn test_load_baseline_nonexistent_cache() {
883        let tmp = TempDir::new().unwrap();
884        let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
885        assert!(loaded.is_none(), "No cache should return None");
886    }
887
888    #[test]
889    fn test_baseline_meta_serialization() {
890        let meta = BaselineCallGraphMeta {
891            commit_hash: "abc123".to_string(),
892            language: "rust".to_string(),
893            built_at: "2026-03-16T12:00:00Z".to_string(),
894        };
895
896        let json = serde_json::to_string_pretty(&meta).unwrap();
897        let parsed: BaselineCallGraphMeta = serde_json::from_str(&json).unwrap();
898        assert_eq!(parsed, meta);
899    }
900
901    #[test]
902    fn test_save_creates_bugbot_dir() {
903        let tmp = TempDir::new().unwrap();
904        assert!(!bugbot_dir(tmp.path()).exists());
905
906        let cg = serde_json::json!({"edges": []});
907        save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
908
909        assert!(bugbot_dir(tmp.path()).exists());
910        assert!(bugbot_dir(tmp.path()).join(BASELINE_CG_FILENAME).exists());
911        assert!(bugbot_dir(tmp.path())
912            .join(BASELINE_CG_META_FILENAME)
913            .exists());
914    }
915}