use anyhow::{Context, Result};
use std::collections::HashMap;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;
use std::process::{Command, Stdio};
use std::thread;
use super::git_interop::{create_consolidated_read_branch, get_repository_root};
pub struct MeasurementSizeInfo {
pub total_bytes: u64,
pub count: usize,
}
pub struct NotesSizeInfo {
pub total_bytes: u64,
pub note_count: usize,
pub by_measurement: Option<HashMap<String, MeasurementSizeInfo>>,
}
pub fn get_notes_size(detailed: bool, disk_size: bool) -> Result<NotesSizeInfo> {
let repo_root =
get_repository_root().map_err(|e| anyhow::anyhow!("Failed to get repo root: {}", e))?;
let read_branch = create_consolidated_read_branch()?;
let batch_format = if disk_size {
"%(objectsize:disk)"
} else {
"%(objectsize)"
};
let mut list_notes = Command::new("git")
.args(["notes", "--ref", read_branch.ref_name(), "list"])
.current_dir(&repo_root)
.stdout(Stdio::piped())
.spawn()
.context("Failed to spawn git notes list")?;
let notes_out = list_notes
.stdout
.take()
.context("Failed to take stdout from git notes list")?;
let mut cat_file = Command::new("git")
.args(["cat-file", &format!("--batch-check={}", batch_format)])
.current_dir(&repo_root)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.context("Failed to spawn git cat-file")?;
let cat_file_in = cat_file
.stdin
.take()
.context("Failed to take stdin from git cat-file")?;
let cat_file_out = cat_file
.stdout
.take()
.context("Failed to take stdout from git cat-file")?;
let note_oids_handle = thread::spawn(move || -> Result<Vec<String>> {
let reader = BufReader::new(notes_out);
let mut writer = BufWriter::new(cat_file_in);
let mut note_oids = Vec::new();
for line in reader.lines() {
let line = line.context("Failed to read line from git notes list")?;
if let Some(note_oid) = line.split_whitespace().next() {
writeln!(writer, "{}", note_oid).context("Failed to write OID to git cat-file")?;
note_oids.push(note_oid.to_string());
}
}
Ok(note_oids)
});
let reader = BufReader::new(cat_file_out);
let mut sizes = Vec::new();
for line in reader.lines() {
let line = line.context("Failed to read line from git cat-file")?;
let size = line
.trim()
.parse::<u64>()
.with_context(|| format!("Failed to parse size from: {}", line))?;
sizes.push(size);
}
let note_oids = note_oids_handle
.join()
.map_err(|_| anyhow::anyhow!("Thread panicked"))?
.context("Failed to collect note OIDs")?;
list_notes
.wait()
.context("Failed to wait for git notes list")?;
let cat_file_status = cat_file.wait().context("Failed to wait for git cat-file")?;
if !cat_file_status.success() {
anyhow::bail!("git cat-file process failed");
}
let note_count = note_oids.len();
if note_count == 0 {
return Ok(NotesSizeInfo {
total_bytes: 0,
note_count: 0,
by_measurement: if detailed { Some(HashMap::new()) } else { None },
});
}
if sizes.len() != note_count {
anyhow::bail!("Expected {} sizes but got {}", note_count, sizes.len());
}
let total_bytes: u64 = sizes.iter().sum();
let mut by_measurement = if detailed { Some(HashMap::new()) } else { None };
if let Some(ref mut by_name) = by_measurement {
for (note_oid, &size) in note_oids.iter().zip(sizes.iter()) {
accumulate_measurement_sizes(Path::new(&repo_root), note_oid, size, by_name)?;
}
}
Ok(NotesSizeInfo {
total_bytes,
note_count,
by_measurement,
})
}
fn accumulate_measurement_sizes(
repo_root: &std::path::Path,
note_oid: &str,
note_size: u64,
by_name: &mut HashMap<String, MeasurementSizeInfo>,
) -> Result<()> {
use crate::serialization::deserialize;
let output = Command::new("git")
.args(["cat-file", "-p", note_oid])
.current_dir(repo_root)
.output()
.context("Failed to execute git cat-file -p")?;
if !output.status.success() {
anyhow::bail!("git cat-file -p failed for {}", note_oid);
}
let content = String::from_utf8_lossy(&output.stdout);
let measurements = deserialize(&content);
if measurements.is_empty() {
return Ok(());
}
let size_per_measurement = note_size / measurements.len() as u64;
for measurement in measurements {
let entry = by_name
.entry(measurement.name.clone())
.or_insert(MeasurementSizeInfo {
total_bytes: 0,
count: 0,
});
entry.total_bytes += size_per_measurement;
entry.count += 1;
}
Ok(())
}
pub struct RepoStats {
pub loose_objects: u64,
pub loose_size: u64,
pub packed_objects: u64,
pub pack_size: u64,
}
pub fn get_repo_stats() -> Result<RepoStats> {
let repo_root =
get_repository_root().map_err(|e| anyhow::anyhow!("Failed to get repo root: {}", e))?;
let output = Command::new("git")
.args(["count-objects", "-v"])
.current_dir(&repo_root)
.output()
.context("Failed to execute git count-objects")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("git count-objects failed: {}", stderr);
}
let stdout = String::from_utf8_lossy(&output.stdout);
let mut loose_objects = 0;
let mut loose_size = 0; let mut packed_objects = 0;
let mut pack_size = 0;
for line in stdout.lines() {
let parts: Vec<&str> = line.split(':').collect();
if parts.len() != 2 {
continue;
}
let key = parts[0].trim();
let value = parts[1].trim().parse::<u64>().unwrap_or(0);
match key {
"count" => loose_objects = value,
"size" => loose_size = value,
"in-pack" => packed_objects = value,
"size-pack" => pack_size = value,
_ => {}
}
}
Ok(RepoStats {
loose_objects,
loose_size: loose_size * 1024, packed_objects,
pack_size: pack_size * 1024, })
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_helpers::with_isolated_cwd_git;
#[test]
fn test_get_repo_stats_basic() {
with_isolated_cwd_git(|_git_dir| {
let stats = get_repo_stats().unwrap();
assert!(stats.loose_objects > 0 || stats.packed_objects > 0);
if stats.loose_size > 0 {
assert_eq!(
stats.loose_size % 1024,
0,
"loose_size should be multiple of 1024"
);
}
if stats.pack_size > 0 {
assert_eq!(
stats.pack_size % 1024,
0,
"pack_size should be multiple of 1024"
);
}
});
}
#[test]
fn test_get_notes_size_empty_repo() {
with_isolated_cwd_git(|_git_dir| {
let result = get_notes_size(false, false).unwrap();
assert_eq!(result.total_bytes, 0);
assert_eq!(result.note_count, 0);
assert!(result.by_measurement.is_none());
});
}
#[test]
fn test_get_repo_stats_conversion_factors() {
with_isolated_cwd_git(|_git_dir| {
let stats = get_repo_stats().unwrap();
assert_eq!(
stats.loose_size % 1024,
0,
"loose_size must be multiple of 1024 (bytes conversion from KiB)"
);
assert_eq!(
stats.pack_size % 1024,
0,
"pack_size must be multiple of 1024 (bytes conversion from KiB)"
);
if stats.loose_objects > 0 {
assert!(
stats.loose_size > 0,
"loose_size should be > 0 if loose_objects > 0"
);
assert!(
stats.loose_size < 1_000_000_000,
"loose_size should be reasonable"
);
}
});
}
#[test]
fn test_get_repo_stats_field_assignments() {
with_isolated_cwd_git(|_git_dir| {
let stats = get_repo_stats().unwrap();
let total_objects = stats.loose_objects + stats.packed_objects;
assert!(
total_objects > 0,
"Should have at least one object from initial commit"
);
let _sum =
stats.loose_objects + stats.loose_size + stats.packed_objects + stats.pack_size;
assert!(
_sum >= stats.loose_objects,
"Arithmetic should work on u64 fields"
);
});
}
#[test]
fn test_get_notes_size_with_measurements() {
use crate::measurement_storage;
with_isolated_cwd_git(|_git_dir| {
measurement_storage::add("test_metric_1", 42.0, &[]).unwrap();
measurement_storage::add("test_metric_2", 100.0, &[]).unwrap();
measurement_storage::add("test_metric_1", 84.0, &[]).unwrap();
let result = get_notes_size(true, false).unwrap();
assert!(
result.total_bytes > 0,
"total_bytes should be > 0 after adding measurements"
);
assert_eq!(
result.note_count, 1,
"Should have 1 note (all measurements on HEAD)"
);
let by_measurement = result
.by_measurement
.expect("Should have detailed breakdown");
assert!(
by_measurement.contains_key("test_metric_1"),
"Should have test_metric_1 in breakdown"
);
assert!(
by_measurement.contains_key("test_metric_2"),
"Should have test_metric_2 in breakdown"
);
let metric1_info = &by_measurement["test_metric_1"];
assert_eq!(
metric1_info.count, 2,
"test_metric_1 should have 2 occurrences"
);
assert!(
metric1_info.total_bytes > 0,
"test_metric_1 should have non-zero size"
);
let metric2_info = &by_measurement["test_metric_2"];
assert_eq!(
metric2_info.count, 1,
"test_metric_2 should have 1 occurrence"
);
assert!(
metric2_info.total_bytes > 0,
"test_metric_2 should have non-zero size"
);
let total_from_breakdown: u64 =
by_measurement.values().map(|info| info.total_bytes).sum();
let num_measurements = 3u64;
assert!(
result.total_bytes.abs_diff(total_from_breakdown) < num_measurements,
"Sum of breakdown ({}) should be within {} bytes of total_bytes ({}) due to integer division",
total_from_breakdown,
num_measurements,
result.total_bytes
);
let expected_per_measurement = result.total_bytes / num_measurements;
assert!(
metric1_info.total_bytes >= expected_per_measurement,
"test_metric_1 appears twice, should have at least 1/3 of total (appears 2/3 times)"
);
});
}
}