use super::lockfile_parser::parse_lockfile_content;
use crate::ports::outbound::{DiffLockfileReader, DiffSource};
use crate::sbom_generation::domain::Package;
use crate::shared::error::SbomError;
use crate::shared::security::{read_file_with_security, MAX_FILE_SIZE};
use crate::shared::Result;
use std::path::{Path, PathBuf};
use std::process::Command;
pub struct GitLockfileReader;
impl GitLockfileReader {
pub fn new() -> Self {
Self
}
}
impl Default for GitLockfileReader {
fn default() -> Self {
Self::new()
}
}
impl DiffLockfileReader for GitLockfileReader {
fn read_base_packages(&self, source: &DiffSource, project_path: &Path) -> Result<Vec<Package>> {
match source {
DiffSource::GitRef(ref_name) => {
validate_git_ref(ref_name)?;
let output = Command::new("git")
.args(["show", &format!("{}:./uv.lock", ref_name)])
.current_dir(project_path)
.output()
.map_err(|e| SbomError::FileReadError {
path: project_path.join("uv.lock"),
details: format!(
"Failed to invoke git: {}. Ensure `git` is installed and on PATH.",
e
),
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(SbomError::FileReadError {
path: project_path.join("uv.lock"),
details: format!(
"git show {}:./uv.lock failed: {}",
ref_name,
stderr.trim()
),
}
.into());
}
let content = String::from_utf8(output.stdout).map_err(|_| {
SbomError::LockfileParseError {
path: project_path.join("uv.lock"),
details: "git show output is not valid UTF-8".to_string(),
}
})?;
let (packages, _) = parse_lockfile_content(&content, project_path)?;
Ok(packages)
}
DiffSource::FilePath(path) => {
let content = read_file_with_security(path, "uv.lock (diff base)", MAX_FILE_SIZE)?;
let (packages, _) = parse_lockfile_content(&content, project_path)?;
Ok(packages)
}
}
}
}
fn validate_git_ref(ref_name: &str) -> Result<()> {
if ref_name.is_empty() {
return Err(SbomError::SecurityError {
path: PathBuf::from("<git-ref>"),
reason: "Git ref must not be empty".to_string(),
hint: "Provide a valid git ref such as a branch name, tag, or commit SHA.".to_string(),
}
.into());
}
if ref_name.starts_with('-') {
return Err(SbomError::SecurityError {
path: PathBuf::from(ref_name),
reason: format!("Git ref '{}' must not start with '-'", ref_name),
hint: "Git refs starting with '-' could be interpreted as command options. Use a valid ref name.".to_string(),
}
.into());
}
if let Some(c) = ref_name
.chars()
.find(|&c| !c.is_ascii_alphanumeric() && !matches!(c, '.' | '_' | '/' | '-'))
{
return Err(SbomError::SecurityError {
path: PathBuf::from(ref_name),
reason: format!("Git ref '{}' contains invalid character '{}'", ref_name, c),
hint: "Only alphanumeric characters and '.', '_', '/', '-' are allowed in git refs."
.to_string(),
}
.into());
}
Ok(())
}
pub fn determine_diff_source(arg: &str) -> DiffSource {
let path = Path::new(arg);
if path.exists() && path.is_file() {
DiffSource::FilePath(path.to_path_buf())
} else {
DiffSource::GitRef(arg.to_string())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn git_available() -> bool {
Command::new("git")
.arg("--version")
.output()
.map(|o| o.status.success())
.unwrap_or(false)
}
fn minimal_uv_lock() -> &'static str {
"version = 1\nrevision = 1\nrequires-python = \">=3.11\"\n\n\
[[package]]\nname = \"certifi\"\nversion = \"2024.8.30\"\n"
}
fn init_repo_with_lockfile_at(
repo_root: &Path,
relative_lock_path: &Path,
lock_content: &str,
tag: &str,
) {
let abs_lock = repo_root.join(relative_lock_path);
if let Some(parent) = abs_lock.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(&abs_lock, lock_content).unwrap();
let run = |args: &[&str]| {
let status = Command::new("git")
.args(args)
.current_dir(repo_root)
.env("GIT_CONFIG_GLOBAL", "/dev/null")
.env("GIT_CONFIG_SYSTEM", "/dev/null")
.env("GIT_AUTHOR_NAME", "Test")
.env("GIT_AUTHOR_EMAIL", "test@example.com")
.env("GIT_COMMITTER_NAME", "Test")
.env("GIT_COMMITTER_EMAIL", "test@example.com")
.status()
.expect("git invocation failed");
assert!(status.success(), "git {:?} failed", args);
};
run(&["init", "-q", "-b", "main"]);
run(&["config", "commit.gpgsign", "false"]);
run(&["config", "tag.gpgsign", "false"]);
run(&["config", "user.email", "test@example.com"]);
run(&["config", "user.name", "Test"]);
run(&["add", relative_lock_path.to_str().unwrap()]);
run(&["commit", "-q", "-m", "add lockfile"]);
run(&["tag", tag]);
}
#[test]
fn test_validate_git_ref_accepts_main() {
assert!(validate_git_ref("main").is_ok());
}
#[test]
fn test_validate_git_ref_accepts_version_tag() {
assert!(validate_git_ref("v1.0.0").is_ok());
}
#[test]
fn test_validate_git_ref_accepts_short_commit_sha() {
assert!(validate_git_ref("abc1234").is_ok());
}
#[test]
fn test_validate_git_ref_accepts_feature_branch_with_slash() {
assert!(validate_git_ref("feature/my-branch").is_ok());
}
#[test]
fn test_validate_git_ref_accepts_remote_tracking_ref() {
assert!(validate_git_ref("origin/main").is_ok());
}
#[test]
fn test_validate_git_ref_rejects_empty_string() {
let result = validate_git_ref("");
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("must not be empty"));
}
#[test]
fn test_validate_git_ref_rejects_leading_dash() {
let result = validate_git_ref("-rf");
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("must not start with '-'"));
}
#[test]
fn test_validate_git_ref_rejects_option_injection() {
let result = validate_git_ref("--upload-pack=malicious");
assert!(result.is_err());
}
#[test]
fn test_validate_git_ref_rejects_semicolon() {
let result = validate_git_ref("main;rm");
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("invalid character"));
}
#[test]
fn test_validate_git_ref_rejects_space() {
let result = validate_git_ref("feature branch");
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("invalid character"));
}
#[test]
fn test_determine_diff_source_returns_file_path_for_existing_file() {
let temp_dir = TempDir::new().unwrap();
let lock_path = temp_dir.path().join("uv.lock");
fs::write(&lock_path, "version = 1").unwrap();
let source = determine_diff_source(lock_path.to_str().unwrap());
assert_eq!(source, DiffSource::FilePath(lock_path));
}
#[test]
fn test_determine_diff_source_returns_git_ref_for_branch_name() {
let source = determine_diff_source("main");
assert_eq!(source, DiffSource::GitRef("main".to_string()));
}
#[test]
fn test_determine_diff_source_returns_git_ref_for_nonexistent_path() {
let source = determine_diff_source("/nonexistent/path/uv.lock");
assert_eq!(
source,
DiffSource::GitRef("/nonexistent/path/uv.lock".to_string())
);
}
#[test]
fn test_read_base_packages_reads_subdirectory_lockfile_not_repo_root() {
if !git_available() {
eprintln!("skipping: git binary not available on PATH");
return;
}
let repo = TempDir::new().unwrap();
let sub_rel = Path::new("examples/sample-project");
let root_lock = "version = 1\nrevision = 1\nrequires-python = \">=3.11\"\n\n\
[[package]]\nname = \"root-pkg\"\nversion = \"0.0.0\"\n";
let sub_lock = "version = 1\nrevision = 1\nrequires-python = \">=3.11\"\n\n\
[[package]]\nname = \"sub-pkg\"\nversion = \"9.9.9\"\n";
fs::create_dir_all(repo.path().join(sub_rel)).unwrap();
fs::write(repo.path().join("uv.lock"), root_lock).unwrap();
fs::write(repo.path().join(sub_rel).join("uv.lock"), sub_lock).unwrap();
let run = |args: &[&str]| {
let status = Command::new("git")
.args(args)
.current_dir(repo.path())
.env("GIT_CONFIG_GLOBAL", "/dev/null")
.env("GIT_CONFIG_SYSTEM", "/dev/null")
.env("GIT_AUTHOR_NAME", "Test")
.env("GIT_AUTHOR_EMAIL", "test@example.com")
.env("GIT_COMMITTER_NAME", "Test")
.env("GIT_COMMITTER_EMAIL", "test@example.com")
.status()
.expect("git failed");
assert!(status.success(), "git {:?} failed", args);
};
run(&["init", "-q", "-b", "main"]);
run(&["config", "commit.gpgsign", "false"]);
run(&["config", "tag.gpgsign", "false"]);
run(&["config", "user.email", "test@example.com"]);
run(&["config", "user.name", "Test"]);
run(&["add", "."]);
run(&["commit", "-q", "-m", "init"]);
run(&["tag", "v-test"]);
let reader = GitLockfileReader::new();
let packages = reader
.read_base_packages(
&DiffSource::GitRef("v-test".to_string()),
&repo.path().join(sub_rel),
)
.expect("read_base_packages should succeed for subdirectory project");
assert_eq!(packages.len(), 1);
assert_eq!(
packages[0].name(),
"sub-pkg",
"expected the subdirectory lockfile to be read, not the repo-root lockfile"
);
}
#[test]
fn test_read_base_packages_still_works_at_repo_root() {
if !git_available() {
eprintln!("skipping: git binary not available on PATH");
return;
}
let repo = TempDir::new().unwrap();
init_repo_with_lockfile_at(
repo.path(),
Path::new("uv.lock"),
minimal_uv_lock(),
"v-root",
);
let reader = GitLockfileReader::new();
let packages = reader
.read_base_packages(&DiffSource::GitRef("v-root".to_string()), repo.path())
.expect("repo-root project should still work");
assert_eq!(packages[0].name(), "certifi");
}
#[test]
fn test_determine_diff_source_returns_git_ref_for_directory() {
let temp_dir = TempDir::new().unwrap();
let source = determine_diff_source(temp_dir.path().to_str().unwrap());
assert_eq!(
source,
DiffSource::GitRef(temp_dir.path().to_str().unwrap().to_string())
);
}
}