git_perf/git/
size_ops.rs

1use anyhow::{Context, Result};
2use std::collections::HashMap;
3use std::io::{BufRead, BufReader, BufWriter, Write};
4use std::path::Path;
5use std::process::{Command, Stdio};
6use std::thread;
7
8use super::git_interop::{create_consolidated_read_branch, get_repository_root};
9
10/// Information about the size of a specific measurement
11pub struct MeasurementSizeInfo {
12    /// Total bytes for this measurement
13    pub total_bytes: u64,
14    /// Number of occurrences
15    pub count: usize,
16}
17
18/// Information about measurement storage size
19pub struct NotesSizeInfo {
20    /// Total size in bytes
21    pub total_bytes: u64,
22    /// Number of commits with measurements
23    pub note_count: usize,
24    /// Optional breakdown by measurement name
25    pub by_measurement: Option<HashMap<String, MeasurementSizeInfo>>,
26}
27
28/// Get size information for all measurement notes
29pub fn get_notes_size(detailed: bool, disk_size: bool) -> Result<NotesSizeInfo> {
30    let repo_root =
31        get_repository_root().map_err(|e| anyhow::anyhow!("Failed to get repo root: {}", e))?;
32
33    // Create a consolidated read branch to include pending writes
34    let read_branch = create_consolidated_read_branch()?;
35
36    let batch_format = if disk_size {
37        "%(objectsize:disk)"
38    } else {
39        "%(objectsize)"
40    };
41
42    // Spawn git notes list process using the temporary read branch
43    let mut list_notes = Command::new("git")
44        .args(["notes", "--ref", read_branch.ref_name(), "list"])
45        .current_dir(&repo_root)
46        .stdout(Stdio::piped())
47        .spawn()
48        .context("Failed to spawn git notes list")?;
49
50    let notes_out = list_notes
51        .stdout
52        .take()
53        .context("Failed to take stdout from git notes list")?;
54
55    // Spawn git cat-file process
56    let mut cat_file = Command::new("git")
57        .args(["cat-file", &format!("--batch-check={}", batch_format)])
58        .current_dir(&repo_root)
59        .stdin(Stdio::piped())
60        .stdout(Stdio::piped())
61        .spawn()
62        .context("Failed to spawn git cat-file")?;
63
64    let cat_file_in = cat_file
65        .stdin
66        .take()
67        .context("Failed to take stdin from git cat-file")?;
68    let cat_file_out = cat_file
69        .stdout
70        .take()
71        .context("Failed to take stdout from git cat-file")?;
72
73    // Spawn a thread to pipe note OIDs from git notes list to git cat-file
74    // Also collect the note OIDs for later use in detailed breakdown
75    let note_oids_handle = thread::spawn(move || -> Result<Vec<String>> {
76        let reader = BufReader::new(notes_out);
77        let mut writer = BufWriter::new(cat_file_in);
78        let mut note_oids = Vec::new();
79
80        for line in reader.lines() {
81            let line = line.context("Failed to read line from git notes list")?;
82            if let Some(note_oid) = line.split_whitespace().next() {
83                writeln!(writer, "{}", note_oid).context("Failed to write OID to git cat-file")?;
84                note_oids.push(note_oid.to_string());
85            }
86        }
87        // writer is dropped here, closing stdin to cat-file
88        Ok(note_oids)
89    });
90
91    // Read sizes from git cat-file output
92    let reader = BufReader::new(cat_file_out);
93    let mut sizes = Vec::new();
94
95    for line in reader.lines() {
96        let line = line.context("Failed to read line from git cat-file")?;
97        let size = line
98            .trim()
99            .parse::<u64>()
100            .with_context(|| format!("Failed to parse size from: {}", line))?;
101        sizes.push(size);
102    }
103
104    // Wait for processes to complete
105    let note_oids = note_oids_handle
106        .join()
107        .map_err(|_| anyhow::anyhow!("Thread panicked"))?
108        .context("Failed to collect note OIDs")?;
109
110    list_notes
111        .wait()
112        .context("Failed to wait for git notes list")?;
113    let cat_file_status = cat_file.wait().context("Failed to wait for git cat-file")?;
114
115    if !cat_file_status.success() {
116        anyhow::bail!("git cat-file process failed");
117    }
118
119    let note_count = note_oids.len();
120    if note_count == 0 {
121        return Ok(NotesSizeInfo {
122            total_bytes: 0,
123            note_count: 0,
124            by_measurement: if detailed { Some(HashMap::new()) } else { None },
125        });
126    }
127
128    if sizes.len() != note_count {
129        anyhow::bail!("Expected {} sizes but got {}", note_count, sizes.len());
130    }
131
132    let total_bytes: u64 = sizes.iter().sum();
133
134    let mut by_measurement = if detailed { Some(HashMap::new()) } else { None };
135
136    // If detailed breakdown requested, parse measurement names
137    if let Some(ref mut by_name) = by_measurement {
138        for (note_oid, &size) in note_oids.iter().zip(sizes.iter()) {
139            accumulate_measurement_sizes(Path::new(&repo_root), note_oid, size, by_name)?;
140        }
141    }
142
143    Ok(NotesSizeInfo {
144        total_bytes,
145        note_count,
146        by_measurement,
147    })
148}
149
150/// Parse note contents and accumulate sizes by measurement name
151fn accumulate_measurement_sizes(
152    repo_root: &std::path::Path,
153    note_oid: &str,
154    note_size: u64,
155    by_name: &mut HashMap<String, MeasurementSizeInfo>,
156) -> Result<()> {
157    use crate::serialization::deserialize;
158
159    // Get note content
160    let output = Command::new("git")
161        .args(["cat-file", "-p", note_oid])
162        .current_dir(repo_root)
163        .output()
164        .context("Failed to execute git cat-file -p")?;
165
166    if !output.status.success() {
167        anyhow::bail!("git cat-file -p failed for {}", note_oid);
168    }
169
170    let content = String::from_utf8_lossy(&output.stdout);
171
172    // Parse measurements from note
173    let measurements = deserialize(&content);
174
175    if measurements.is_empty() {
176        return Ok(());
177    }
178
179    // Distribute note size evenly among measurements in this note
180    // (Each measurement contributes roughly equally to the note size)
181    let size_per_measurement = note_size / measurements.len() as u64;
182
183    for measurement in measurements {
184        let entry = by_name
185            .entry(measurement.name.clone())
186            .or_insert(MeasurementSizeInfo {
187                total_bytes: 0,
188                count: 0,
189            });
190
191        entry.total_bytes += size_per_measurement;
192        entry.count += 1;
193    }
194
195    Ok(())
196}
197
198/// Git repository statistics from count-objects
199pub struct RepoStats {
200    /// Number of loose objects
201    pub loose_objects: u64,
202    /// Size of loose objects in bytes
203    pub loose_size: u64,
204    /// Number of packed objects
205    pub packed_objects: u64,
206    /// Size of pack files in bytes
207    pub pack_size: u64,
208}
209
210/// Get git repository statistics
211pub fn get_repo_stats() -> Result<RepoStats> {
212    let repo_root =
213        get_repository_root().map_err(|e| anyhow::anyhow!("Failed to get repo root: {}", e))?;
214
215    let output = Command::new("git")
216        .args(["count-objects", "-v"])
217        .current_dir(&repo_root)
218        .output()
219        .context("Failed to execute git count-objects")?;
220
221    if !output.status.success() {
222        let stderr = String::from_utf8_lossy(&output.stderr);
223        anyhow::bail!("git count-objects failed: {}", stderr);
224    }
225
226    let stdout = String::from_utf8_lossy(&output.stdout);
227
228    let mut loose_objects = 0;
229    let mut loose_size = 0; // in KiB from git
230    let mut packed_objects = 0;
231    let mut pack_size = 0; // in KiB from git
232
233    for line in stdout.lines() {
234        let parts: Vec<&str> = line.split(':').collect();
235        if parts.len() != 2 {
236            continue;
237        }
238
239        let key = parts[0].trim();
240        let value = parts[1].trim().parse::<u64>().unwrap_or(0);
241
242        match key {
243            "count" => loose_objects = value,
244            "size" => loose_size = value,
245            "in-pack" => packed_objects = value,
246            "size-pack" => pack_size = value,
247            _ => {}
248        }
249    }
250
251    Ok(RepoStats {
252        loose_objects,
253        loose_size: loose_size * 1024, // Convert KiB to bytes
254        packed_objects,
255        pack_size: pack_size * 1024, // Convert KiB to bytes
256    })
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::test_helpers::dir_with_repo;
263
264    #[test]
265    fn test_get_repo_stats_basic() {
266        // Test that get_repo_stats works and returns proper values
267        let temp_dir = dir_with_repo();
268        std::env::set_current_dir(temp_dir.path()).unwrap();
269
270        let stats = get_repo_stats().unwrap();
271
272        // Should have some objects after initial commit
273        assert!(stats.loose_objects > 0 || stats.packed_objects > 0);
274
275        // Sizes should be multiples of 1024 (tests * 1024 conversion)
276        if stats.loose_size > 0 {
277            assert_eq!(
278                stats.loose_size % 1024,
279                0,
280                "loose_size should be multiple of 1024"
281            );
282        }
283        if stats.pack_size > 0 {
284            assert_eq!(
285                stats.pack_size % 1024,
286                0,
287                "pack_size should be multiple of 1024"
288            );
289        }
290    }
291
292    #[test]
293    fn test_get_notes_size_empty_repo() {
294        // Test with a repo that has no notes - exercises the empty case
295        let temp_dir = dir_with_repo();
296        std::env::set_current_dir(temp_dir.path()).unwrap();
297
298        let result = get_notes_size(false, false).unwrap();
299        assert_eq!(result.total_bytes, 0);
300        assert_eq!(result.note_count, 0);
301        assert!(result.by_measurement.is_none());
302    }
303}