Skip to main content

tldr_cli/commands/bugbot/
baseline.rs

1//! Git baseline extraction for bugbot
2//!
3//! Retrieves the "before" version of changed files from git so the analysis
4//! pipeline can compare baseline vs current and detect regressions.
5
6use std::io::Write;
7use std::path::Path;
8use std::process::Command;
9
10use anyhow::{Context, Result};
11use tempfile::NamedTempFile;
12
13/// Result of checking baseline status for a file.
14#[derive(Debug)]
15pub enum BaselineStatus {
16    /// File exists at the baseline ref; contains the original content.
17    Exists(String),
18    /// File is new -- it did not exist at the baseline ref.
19    NewFile,
20    /// `git show` failed for an unexpected reason (stderr captured).
21    GitShowFailed(String),
22}
23
24/// Get the content of a file at the given git ref.
25///
26/// # Arguments
27/// * `project` - Project root directory (must be inside a git repo).
28/// * `file`    - Path to the file (absolute or relative to `project`).
29/// * `base_ref`- Git ref to read from, e.g. `"HEAD"`, `"main"`.
30///
31/// # Returns
32/// * `BaselineStatus::Exists(content)` when the file existed at `base_ref`.
33/// * `BaselineStatus::NewFile` when `git show` reports the path does not exist.
34/// * `BaselineStatus::GitShowFailed(stderr)` on other git failures.
35pub fn get_baseline_content(
36    project: &Path,
37    file: &Path,
38    base_ref: &str,
39) -> Result<BaselineStatus> {
40    // Compute relative path from project root.
41    // If the file is already relative (or outside the project) we fall through.
42    let relative = file.strip_prefix(project).unwrap_or(file);
43
44    // On all platforms git expects forward-slash separators in `ref:path`.
45    let relative_str = relative
46        .components()
47        .map(|c| c.as_os_str().to_string_lossy().to_string())
48        .collect::<Vec<_>>()
49        .join("/");
50
51    let output = Command::new("git")
52        .args(["show", &format!("{}:{}", base_ref, relative_str)])
53        .current_dir(project)
54        .output()
55        .context("Failed to run git show")?;
56
57    if output.status.success() {
58        let content = String::from_utf8(output.stdout)
59            .context("git show output is not valid UTF-8")?;
60        Ok(BaselineStatus::Exists(content))
61    } else {
62        let stderr = String::from_utf8_lossy(&output.stderr);
63        if stderr.contains("does not exist")
64            || stderr.contains("not exist in")
65            || stderr.contains("exists on disk, but not in")
66            || stderr.contains("did not match any")
67        {
68            Ok(BaselineStatus::NewFile)
69        } else {
70            Ok(BaselineStatus::GitShowFailed(stderr.to_string()))
71        }
72    }
73}
74
75/// Write baseline content to a temporary file with the correct extension.
76///
77/// The extension is preserved so that tree-sitter can detect the language
78/// when parsing the temporary file.  The caller must keep the returned
79/// `NamedTempFile` handle alive -- dropping it deletes the file.
80pub fn write_baseline_tmpfile(content: &str, file_path: &Path) -> Result<NamedTempFile> {
81    let extension = file_path
82        .extension()
83        .and_then(|e| e.to_str())
84        .unwrap_or("txt");
85
86    let mut tmpfile = tempfile::Builder::new()
87        .prefix("bugbot_baseline_")
88        .suffix(&format!(".{}", extension))
89        .tempfile()
90        .context("Failed to create temp file for baseline")?;
91
92    tmpfile
93        .write_all(content.as_bytes())
94        .context("Failed to write baseline content to temp file")?;
95    tmpfile.flush()?;
96
97    Ok(tmpfile)
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use std::path::PathBuf;
104
105    /// Helper: initialize a git repo with an initial commit in a temp directory.
106    fn init_git_repo() -> tempfile::TempDir {
107        let tmp = tempfile::TempDir::new().expect("create temp dir");
108        let dir = tmp.path();
109
110        Command::new("git")
111            .args(["init"])
112            .current_dir(dir)
113            .output()
114            .expect("git init");
115
116        Command::new("git")
117            .args(["config", "user.email", "test@test.com"])
118            .current_dir(dir)
119            .output()
120            .expect("git config email");
121
122        Command::new("git")
123            .args(["config", "user.name", "Test"])
124            .current_dir(dir)
125            .output()
126            .expect("git config name");
127
128        // Create an initial commit so HEAD exists.
129        std::fs::write(dir.join("README.md"), "# test\n").expect("write readme");
130        Command::new("git")
131            .args(["add", "."])
132            .current_dir(dir)
133            .output()
134            .expect("git add");
135        Command::new("git")
136            .args(["commit", "-m", "init"])
137            .current_dir(dir)
138            .output()
139            .expect("git commit");
140
141        tmp
142    }
143
144    #[test]
145    fn test_get_baseline_existing_file() {
146        let tmp = init_git_repo();
147        let dir = tmp.path();
148
149        // Commit a file with known content.
150        let original = "fn original() {}\n";
151        std::fs::write(dir.join("lib.rs"), original).expect("write lib.rs");
152        Command::new("git")
153            .args(["add", "lib.rs"])
154            .current_dir(dir)
155            .output()
156            .expect("git add");
157        Command::new("git")
158            .args(["commit", "-m", "add lib.rs"])
159            .current_dir(dir)
160            .output()
161            .expect("git commit");
162
163        // Modify the file (uncommitted).
164        std::fs::write(dir.join("lib.rs"), "fn modified() {}\n").expect("overwrite lib.rs");
165
166        // Baseline at HEAD should return the original content.
167        let status =
168            get_baseline_content(dir, &dir.join("lib.rs"), "HEAD").expect("get_baseline_content");
169
170        match status {
171            BaselineStatus::Exists(content) => {
172                assert_eq!(content, original, "Baseline should return the committed content");
173            }
174            other => panic!("Expected BaselineStatus::Exists, got: {:?}", other),
175        }
176    }
177
178    #[test]
179    fn test_get_baseline_new_file() {
180        let tmp = init_git_repo();
181        let dir = tmp.path();
182
183        // Create a file that has never been committed.
184        std::fs::write(dir.join("brand_new.rs"), "fn new() {}\n").expect("write new file");
185
186        let status = get_baseline_content(dir, &dir.join("brand_new.rs"), "HEAD")
187            .expect("get_baseline_content");
188
189        match status {
190            BaselineStatus::NewFile => {} // expected
191            other => panic!("Expected BaselineStatus::NewFile, got: {:?}", other),
192        }
193    }
194
195    #[test]
196    fn test_get_baseline_deleted_file() {
197        let tmp = init_git_repo();
198        let dir = tmp.path();
199
200        // Commit a file.
201        let original = "fn to_delete() {}\n";
202        std::fs::write(dir.join("doomed.rs"), original).expect("write doomed.rs");
203        Command::new("git")
204            .args(["add", "doomed.rs"])
205            .current_dir(dir)
206            .output()
207            .expect("git add");
208        Command::new("git")
209            .args(["commit", "-m", "add doomed.rs"])
210            .current_dir(dir)
211            .output()
212            .expect("git commit");
213
214        // Delete the file from the working tree.
215        std::fs::remove_file(dir.join("doomed.rs")).expect("delete doomed.rs");
216
217        // Baseline at HEAD should still return the committed content.
218        let status = get_baseline_content(dir, &dir.join("doomed.rs"), "HEAD")
219            .expect("get_baseline_content");
220
221        match status {
222            BaselineStatus::Exists(content) => {
223                assert_eq!(content, original, "Baseline should return the committed content even after deletion");
224            }
225            other => panic!("Expected BaselineStatus::Exists, got: {:?}", other),
226        }
227    }
228
229    #[test]
230    fn test_tmpfile_has_correct_extension() {
231        let tmpfile =
232            write_baseline_tmpfile("content", &PathBuf::from("src/lib.rs")).expect("write tmpfile");
233
234        let path = tmpfile.path();
235        let ext = path.extension().and_then(|e| e.to_str());
236        assert_eq!(ext, Some("rs"), "Temp file should have .rs extension");
237    }
238
239    #[test]
240    fn test_tmpfile_content_matches() {
241        let content = "fn hello() { println!(\"world\"); }\n";
242        let tmpfile =
243            write_baseline_tmpfile(content, &PathBuf::from("example.py")).expect("write tmpfile");
244
245        let read_back = std::fs::read_to_string(tmpfile.path()).expect("read tmpfile");
246        assert_eq!(read_back, content, "Content read back from temp file should match what was written");
247    }
248}