use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Instant;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
const BUGBOT_DIR: &str = ".bugbot";
const STATE_DB_FILENAME: &str = "state.db";
const STATE_VERSION: u32 = 1;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct BugbotState {
pub version: u32,
pub created_at: String,
pub baseline_built: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum FirstRunStatus {
FirstRun,
SubsequentRun {
state: BugbotState,
},
}
impl FirstRunStatus {
pub fn is_first_run(&self) -> bool {
matches!(self, FirstRunStatus::FirstRun)
}
}
pub fn bugbot_dir(project_root: &Path) -> PathBuf {
project_root.join(BUGBOT_DIR)
}
pub fn state_db_path(project_root: &Path) -> PathBuf {
bugbot_dir(project_root).join(STATE_DB_FILENAME)
}
pub fn detect_first_run(project_root: &Path) -> FirstRunStatus {
let path = state_db_path(project_root);
if !path.exists() {
return FirstRunStatus::FirstRun;
}
match std::fs::read_to_string(&path) {
Ok(contents) => match serde_json::from_str::<BugbotState>(&contents) {
Ok(state) if state.baseline_built => FirstRunStatus::SubsequentRun { state },
Ok(_) => {
FirstRunStatus::FirstRun
}
Err(_) => {
FirstRunStatus::FirstRun
}
},
Err(_) => {
FirstRunStatus::FirstRun
}
}
}
pub fn create_state_db(project_root: &Path) -> Result<BugbotState> {
let dir = bugbot_dir(project_root);
std::fs::create_dir_all(&dir)?;
let state = BugbotState {
version: STATE_VERSION,
created_at: chrono::Utc::now().to_rfc3339(),
baseline_built: true,
};
let json = serde_json::to_string_pretty(&state)?;
std::fs::write(state_db_path(project_root), json)?;
Ok(state)
}
pub fn run_first_run_scan<F>(
project_root: &Path,
writer_fn: &F,
) -> Result<FirstRunResult>
where
F: Fn(&str),
{
let start = Instant::now();
writer_fn("Building initial baselines... (one-time, ~8s)");
let mut baselines_built: Vec<String> = Vec::new();
let mut baseline_errors: Vec<String> = Vec::new();
match build_call_graph_baseline(project_root) {
Ok(()) => baselines_built.push("call_graph".to_string()),
Err(e) => baseline_errors.push(format!("call_graph: {e}")),
}
match build_complexity_baseline(project_root) {
Ok(()) => baselines_built.push("complexity".to_string()),
Err(e) => baseline_errors.push(format!("complexity: {e}")),
}
match build_clone_baseline(project_root) {
Ok(()) => baselines_built.push("clones".to_string()),
Err(e) => baseline_errors.push(format!("clones: {e}")),
}
match build_temporal_baseline(project_root) {
Ok(()) => baselines_built.push("temporal".to_string()),
Err(e) => baseline_errors.push(format!("temporal: {e}")),
}
let state = create_state_db(project_root)?;
let elapsed_ms = start.elapsed().as_millis() as u64;
writer_fn(&format!(
"Baselines built in {}ms ({} succeeded, {} failed)",
elapsed_ms,
baselines_built.len(),
baseline_errors.len()
));
Ok(FirstRunResult {
state,
elapsed_ms,
baselines_built,
baseline_errors,
})
}
#[derive(Debug, Clone)]
pub struct FirstRunResult {
pub state: BugbotState,
pub elapsed_ms: u64,
pub baselines_built: Vec<String>,
pub baseline_errors: Vec<String>,
}
const BASELINE_CG_FILENAME: &str = "baseline_call_graph.json";
const BASELINE_CG_META_FILENAME: &str = "baseline_call_graph_meta.json";
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct BaselineCallGraphMeta {
pub commit_hash: String,
pub language: String,
pub built_at: String,
}
pub fn save_baseline_call_graph(
project_root: &Path,
call_graph: &serde_json::Value,
commit_hash: &str,
language: &str,
) -> Result<()> {
let dir = bugbot_dir(project_root);
std::fs::create_dir_all(&dir)?;
let cg_path = dir.join(BASELINE_CG_FILENAME);
let meta_path = dir.join(BASELINE_CG_META_FILENAME);
let meta = BaselineCallGraphMeta {
commit_hash: commit_hash.to_string(),
language: language.to_string(),
built_at: chrono::Utc::now().to_rfc3339(),
};
std::fs::write(&cg_path, serde_json::to_string(call_graph)?)
.context("writing baseline call graph cache")?;
std::fs::write(&meta_path, serde_json::to_string_pretty(&meta)?)
.context("writing baseline call graph metadata")?;
Ok(())
}
pub fn load_cached_baseline_call_graph(
project_root: &Path,
expected_commit: &str,
) -> Option<serde_json::Value> {
let dir = bugbot_dir(project_root);
let cg_path = dir.join(BASELINE_CG_FILENAME);
let meta_path = dir.join(BASELINE_CG_META_FILENAME);
let meta_str = std::fs::read_to_string(&meta_path).ok()?;
let meta: BaselineCallGraphMeta = serde_json::from_str(&meta_str).ok()?;
if meta.commit_hash != expected_commit {
return None;
}
let cg_str = std::fs::read_to_string(&cg_path).ok()?;
serde_json::from_str(&cg_str).ok()
}
pub fn resolve_git_ref(project_root: &Path, git_ref: &str) -> Result<String> {
let output = Command::new("git")
.args(["rev-parse", git_ref])
.current_dir(project_root)
.output()
.context("Failed to run git rev-parse")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("git rev-parse {} failed: {}", git_ref, stderr.trim());
}
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
}
fn build_call_graph_baseline(project_root: &Path) -> Result<()> {
let language = match tldr_core::Language::from_directory(project_root) {
Some(lang) => lang,
None => return Ok(()), };
let call_graph =
tldr_core::callgraph::build_project_call_graph(project_root, language, None, true)
.map_err(|e| anyhow::anyhow!("{e}"))?;
let call_graph_json = serde_json::to_value(&call_graph)
.map_err(|e| anyhow::anyhow!("serialize call graph: {e}"))?;
let commit_hash = resolve_git_ref(project_root, "HEAD").unwrap_or_default();
if !commit_hash.is_empty() {
if let Err(e) = save_baseline_call_graph(
project_root,
&call_graph_json,
&commit_hash,
language.as_str(),
) {
eprintln!("Warning: failed to cache baseline call graph: {e}");
}
}
Ok(())
}
fn build_complexity_baseline(project_root: &Path) -> Result<()> {
let source_files = collect_source_files(project_root);
for file in &source_files {
if let Ok(contents) = std::fs::read_to_string(file) {
let lang = tldr_core::Language::from_path(file);
if let Some(language) = lang {
let _complexities =
tldr_core::metrics::calculate_all_complexities(&contents, language);
}
}
}
Ok(())
}
fn build_clone_baseline(project_root: &Path) -> Result<()> {
let options = tldr_core::analysis::clones::ClonesOptions::default();
let _clones = tldr_core::analysis::clones::detect_clones(project_root, &options)
.map_err(|e| anyhow::anyhow!("{e}"))?;
Ok(())
}
fn build_temporal_baseline(project_root: &Path) -> Result<()> {
let source_files = collect_source_files(project_root);
for file in &source_files {
let lang = tldr_core::Language::from_path(file);
if let Some(language) = lang {
let _structure = tldr_core::ast::get_code_structure(
file,
language,
0, None,
);
}
}
Ok(())
}
fn collect_source_files(project_root: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
collect_source_files_recursive(project_root, &mut files);
files
}
fn collect_source_files_recursive(dir: &Path, files: &mut Vec<PathBuf>) {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.starts_with('.')
|| name == "target"
|| name == "node_modules"
|| name == "vendor"
|| name == "__pycache__"
|| name == "dist"
|| name == "build"
{
continue;
}
}
if path.is_dir() {
collect_source_files_recursive(&path, files);
} else if is_source_file(&path) {
files.push(path);
}
}
}
fn is_source_file(path: &Path) -> bool {
let ext = match path.extension().and_then(|e| e.to_str()) {
Some(e) => e,
None => return false,
};
matches!(
ext,
"rs" | "py"
| "js"
| "ts"
| "tsx"
| "jsx"
| "go"
| "java"
| "c"
| "cpp"
| "h"
| "hpp"
| "rb"
| "php"
| "kt"
| "swift"
| "cs"
| "scala"
| "ex"
| "exs"
| "lua"
)
}
#[cfg(test)]
mod tests {
use super::*;
use std::cell::RefCell;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_first_run_detects_no_state_db() {
let tmp = TempDir::new().unwrap();
let status = detect_first_run(tmp.path());
assert_eq!(status, FirstRunStatus::FirstRun);
assert!(status.is_first_run());
}
#[test]
fn test_first_run_skips_if_state_exists() {
let tmp = TempDir::new().unwrap();
let dir = tmp.path().join(BUGBOT_DIR);
fs::create_dir_all(&dir).unwrap();
let state = BugbotState {
version: 1,
created_at: "2026-01-15T10:00:00Z".to_string(),
baseline_built: true,
};
fs::write(
dir.join(STATE_DB_FILENAME),
serde_json::to_string_pretty(&state).unwrap(),
)
.unwrap();
let status = detect_first_run(tmp.path());
assert!(!status.is_first_run());
match status {
FirstRunStatus::SubsequentRun { state: s } => {
assert_eq!(s.version, 1);
assert!(s.baseline_built);
assert_eq!(s.created_at, "2026-01-15T10:00:00Z");
}
_ => panic!("Expected SubsequentRun"),
}
}
#[test]
fn test_first_run_treats_malformed_state_as_first_run() {
let tmp = TempDir::new().unwrap();
let dir = tmp.path().join(BUGBOT_DIR);
fs::create_dir_all(&dir).unwrap();
fs::write(dir.join(STATE_DB_FILENAME), "not valid json {{{").unwrap();
let status = detect_first_run(tmp.path());
assert!(status.is_first_run());
}
#[test]
fn test_first_run_treats_incomplete_baseline_as_first_run() {
let tmp = TempDir::new().unwrap();
let dir = tmp.path().join(BUGBOT_DIR);
fs::create_dir_all(&dir).unwrap();
let state = BugbotState {
version: 1,
created_at: "2026-01-15T10:00:00Z".to_string(),
baseline_built: false, };
fs::write(
dir.join(STATE_DB_FILENAME),
serde_json::to_string_pretty(&state).unwrap(),
)
.unwrap();
let status = detect_first_run(tmp.path());
assert!(status.is_first_run(), "baseline_built=false should be treated as first run");
}
#[test]
fn test_first_run_creates_state_db() {
let tmp = TempDir::new().unwrap();
assert!(!state_db_path(tmp.path()).exists());
let state = create_state_db(tmp.path()).unwrap();
assert!(state.baseline_built);
assert_eq!(state.version, STATE_VERSION);
assert!(!state.created_at.is_empty());
assert!(state_db_path(tmp.path()).exists());
let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
assert_eq!(parsed, state);
}
#[test]
fn test_subsequent_run_uses_existing_baselines() {
let tmp = TempDir::new().unwrap();
assert!(detect_first_run(tmp.path()).is_first_run());
let state = create_state_db(tmp.path()).unwrap();
let status = detect_first_run(tmp.path());
assert!(!status.is_first_run());
match status {
FirstRunStatus::SubsequentRun { state: s } => {
assert_eq!(s.version, state.version);
assert_eq!(s.created_at, state.created_at);
assert!(s.baseline_built);
}
_ => panic!("Expected SubsequentRun after create_state_db"),
}
}
#[test]
fn test_bugbot_dir_path() {
let root = Path::new("/projects/myapp");
assert_eq!(bugbot_dir(root), PathBuf::from("/projects/myapp/.bugbot"));
}
#[test]
fn test_state_db_path_correct() {
let root = Path::new("/projects/myapp");
assert_eq!(
state_db_path(root),
PathBuf::from("/projects/myapp/.bugbot/state.db")
);
}
#[test]
fn test_first_run_scan_creates_state_and_records_baselines() {
let tmp = TempDir::new().unwrap();
let src_dir = tmp.path().join("src");
fs::create_dir_all(&src_dir).unwrap();
fs::write(
src_dir.join("main.py"),
"def hello():\n return 42\n",
)
.unwrap();
let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
let result = run_first_run_scan(tmp.path(), &writer).unwrap();
assert!(state_db_path(tmp.path()).exists());
assert!(result.state.baseline_built);
let total = result.baselines_built.len() + result.baseline_errors.len();
assert_eq!(total, 4, "Should attempt all 4 baseline categories");
assert!(result.elapsed_ms < 30_000, "Scan should complete in reasonable time");
}
#[test]
fn test_first_run_progress_indication() {
let tmp = TempDir::new().unwrap();
let messages: RefCell<Vec<String>> = RefCell::new(Vec::new());
let writer = |msg: &str| messages.borrow_mut().push(msg.to_string());
let _result = run_first_run_scan(tmp.path(), &writer).unwrap();
let messages = messages.into_inner();
assert!(
messages.iter().any(|m| m.contains("Building initial baselines")),
"Must print progress message containing 'Building initial baselines'. Got: {:?}",
messages
);
assert!(
messages.iter().any(|m| m.contains("one-time")),
"Progress message must mention one-time cost. Got: {:?}",
messages
);
assert!(
messages.iter().any(|m| m.contains("Baselines built in")),
"Must print completion message. Got: {:?}",
messages
);
}
#[test]
fn test_collect_source_files_finds_source_files() {
let tmp = TempDir::new().unwrap();
let src = tmp.path().join("src");
fs::create_dir_all(&src).unwrap();
fs::write(src.join("main.rs"), "fn main() {}").unwrap();
fs::write(src.join("lib.py"), "def f(): pass").unwrap();
fs::write(src.join("notes.txt"), "not source").unwrap();
let files = collect_source_files(tmp.path());
assert_eq!(files.len(), 2);
assert!(files.iter().any(|f| f.ends_with("main.rs")));
assert!(files.iter().any(|f| f.ends_with("lib.py")));
}
#[test]
fn test_collect_source_files_skips_hidden_and_build_dirs() {
let tmp = TempDir::new().unwrap();
for dir_name in &[".git", "target", "node_modules", "__pycache__", "vendor"] {
let dir = tmp.path().join(dir_name);
fs::create_dir_all(&dir).unwrap();
fs::write(dir.join("hidden.rs"), "fn f() {}").unwrap();
}
fs::write(tmp.path().join("visible.rs"), "fn main() {}").unwrap();
let files = collect_source_files(tmp.path());
assert_eq!(files.len(), 1);
assert!(files[0].ends_with("visible.rs"));
}
#[test]
fn test_is_source_file_recognizes_all_extensions() {
let extensions = vec![
"rs", "py", "js", "ts", "tsx", "jsx", "go", "java", "c", "cpp",
"h", "hpp", "rb", "php", "kt", "swift", "cs", "scala", "ex", "exs", "lua",
];
for ext in &extensions {
let path = PathBuf::from(format!("test.{ext}"));
assert!(
is_source_file(&path),
"Extension .{ext} should be recognized as source"
);
}
for ext in &["txt", "md", "json", "yaml", "toml", "lock", "png"] {
let path = PathBuf::from(format!("test.{ext}"));
assert!(
!is_source_file(&path),
"Extension .{ext} should NOT be recognized as source"
);
}
}
#[test]
fn test_state_db_overwritten_on_second_first_run() {
let tmp = TempDir::new().unwrap();
let state1 = create_state_db(tmp.path()).unwrap();
std::thread::sleep(std::time::Duration::from_millis(10));
let state2 = create_state_db(tmp.path()).unwrap();
assert_ne!(
state1.created_at, state2.created_at,
"Second creation should have a later timestamp"
);
let contents = fs::read_to_string(state_db_path(tmp.path())).unwrap();
let parsed: BugbotState = serde_json::from_str(&contents).unwrap();
assert_eq!(parsed, state2);
}
#[test]
fn test_bugbot_state_serialization_roundtrip() {
let state = BugbotState {
version: 1,
created_at: "2026-03-02T12:00:00Z".to_string(),
baseline_built: true,
};
let json = serde_json::to_string_pretty(&state).unwrap();
let parsed: BugbotState = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, state);
}
#[test]
fn test_first_run_result_fields() {
let tmp = TempDir::new().unwrap();
let writer = |_msg: &str| {};
let result = run_first_run_scan(tmp.path(), &writer).unwrap();
assert_eq!(result.state.version, STATE_VERSION);
assert!(result.state.baseline_built);
let total = result.baselines_built.len() + result.baseline_errors.len();
assert_eq!(total, 4);
}
#[test]
fn test_first_run_empty_project_succeeds() {
let tmp = TempDir::new().unwrap();
let writer = |_msg: &str| {};
let result = run_first_run_scan(tmp.path(), &writer);
assert!(
result.is_ok(),
"First-run scan should succeed even on an empty project: {:?}",
result.err()
);
}
#[test]
fn test_save_load_baseline_call_graph_roundtrip() {
let tmp = TempDir::new().unwrap();
let cg = serde_json::json!({
"edges": [
{"src_file": "a.py", "src_func": "foo", "dst_file": "b.py", "dst_func": "bar"}
]
});
save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
assert!(loaded.is_some(), "Cache should load with matching commit");
assert_eq!(loaded.unwrap(), cg);
}
#[test]
fn test_load_baseline_rejects_stale_commit() {
let tmp = TempDir::new().unwrap();
let cg = serde_json::json!({"edges": []});
save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
let loaded = load_cached_baseline_call_graph(tmp.path(), "def456");
assert!(loaded.is_none(), "Cache should not load with different commit");
}
#[test]
fn test_load_baseline_nonexistent_cache() {
let tmp = TempDir::new().unwrap();
let loaded = load_cached_baseline_call_graph(tmp.path(), "abc123");
assert!(loaded.is_none(), "No cache should return None");
}
#[test]
fn test_baseline_meta_serialization() {
let meta = BaselineCallGraphMeta {
commit_hash: "abc123".to_string(),
language: "rust".to_string(),
built_at: "2026-03-16T12:00:00Z".to_string(),
};
let json = serde_json::to_string_pretty(&meta).unwrap();
let parsed: BaselineCallGraphMeta = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, meta);
}
#[test]
fn test_save_creates_bugbot_dir() {
let tmp = TempDir::new().unwrap();
assert!(!bugbot_dir(tmp.path()).exists());
let cg = serde_json::json!({"edges": []});
save_baseline_call_graph(tmp.path(), &cg, "abc123", "python").unwrap();
assert!(bugbot_dir(tmp.path()).exists());
assert!(bugbot_dir(tmp.path()).join(BASELINE_CG_FILENAME).exists());
assert!(bugbot_dir(tmp.path()).join(BASELINE_CG_META_FILENAME).exists());
}
}