git_perf/git/
size_ops.rs

1use anyhow::{Context, Result};
2use std::collections::HashMap;
3use std::io::{BufRead, BufReader, BufWriter, Write};
4use std::path::Path;
5use std::process::{Command, Stdio};
6use std::thread;
7
8use super::git_interop::{create_consolidated_read_branch, get_repository_root};
9
10/// Information about the size of a specific measurement
11pub struct MeasurementSizeInfo {
12    /// Total bytes for this measurement
13    pub total_bytes: u64,
14    /// Number of occurrences
15    pub count: usize,
16}
17
18/// Information about measurement storage size
19pub struct NotesSizeInfo {
20    /// Total size in bytes
21    pub total_bytes: u64,
22    /// Number of commits with measurements
23    pub note_count: usize,
24    /// Optional breakdown by measurement name
25    pub by_measurement: Option<HashMap<String, MeasurementSizeInfo>>,
26}
27
28/// Get size information for all measurement notes
29pub fn get_notes_size(detailed: bool, disk_size: bool) -> Result<NotesSizeInfo> {
30    let repo_root =
31        get_repository_root().map_err(|e| anyhow::anyhow!("Failed to get repo root: {}", e))?;
32
33    // Create a consolidated read branch to include pending writes
34    let read_branch = create_consolidated_read_branch()?;
35
36    let batch_format = if disk_size {
37        "%(objectsize:disk)"
38    } else {
39        "%(objectsize)"
40    };
41
42    // Spawn git notes list process using the temporary read branch
43    let mut list_notes = Command::new("git")
44        .args(["notes", "--ref", read_branch.ref_name(), "list"])
45        .current_dir(&repo_root)
46        .stdout(Stdio::piped())
47        .spawn()
48        .context("Failed to spawn git notes list")?;
49
50    let notes_out = list_notes
51        .stdout
52        .take()
53        .context("Failed to take stdout from git notes list")?;
54
55    // Spawn git cat-file process
56    let mut cat_file = Command::new("git")
57        .args(["cat-file", &format!("--batch-check={}", batch_format)])
58        .current_dir(&repo_root)
59        .stdin(Stdio::piped())
60        .stdout(Stdio::piped())
61        .spawn()
62        .context("Failed to spawn git cat-file")?;
63
64    let cat_file_in = cat_file
65        .stdin
66        .take()
67        .context("Failed to take stdin from git cat-file")?;
68    let cat_file_out = cat_file
69        .stdout
70        .take()
71        .context("Failed to take stdout from git cat-file")?;
72
73    // Spawn a thread to pipe note OIDs from git notes list to git cat-file
74    // Also collect the note OIDs for later use in detailed breakdown
75    let note_oids_handle = thread::spawn(move || -> Result<Vec<String>> {
76        let reader = BufReader::new(notes_out);
77        let mut writer = BufWriter::new(cat_file_in);
78        let mut note_oids = Vec::new();
79
80        for line in reader.lines() {
81            let line = line.context("Failed to read line from git notes list")?;
82            if let Some(note_oid) = line.split_whitespace().next() {
83                writeln!(writer, "{}", note_oid).context("Failed to write OID to git cat-file")?;
84                note_oids.push(note_oid.to_string());
85            }
86        }
87        // writer is dropped here, closing stdin to cat-file
88        Ok(note_oids)
89    });
90
91    // Read sizes from git cat-file output
92    let reader = BufReader::new(cat_file_out);
93    let mut sizes = Vec::new();
94
95    for line in reader.lines() {
96        let line = line.context("Failed to read line from git cat-file")?;
97        let size = line
98            .trim()
99            .parse::<u64>()
100            .with_context(|| format!("Failed to parse size from: {}", line))?;
101        sizes.push(size);
102    }
103
104    // Wait for processes to complete
105    let note_oids = note_oids_handle
106        .join()
107        .map_err(|_| anyhow::anyhow!("Thread panicked"))?
108        .context("Failed to collect note OIDs")?;
109
110    list_notes
111        .wait()
112        .context("Failed to wait for git notes list")?;
113    let cat_file_status = cat_file.wait().context("Failed to wait for git cat-file")?;
114
115    if !cat_file_status.success() {
116        anyhow::bail!("git cat-file process failed");
117    }
118
119    let note_count = note_oids.len();
120    if note_count == 0 {
121        return Ok(NotesSizeInfo {
122            total_bytes: 0,
123            note_count: 0,
124            by_measurement: if detailed { Some(HashMap::new()) } else { None },
125        });
126    }
127
128    if sizes.len() != note_count {
129        anyhow::bail!("Expected {} sizes but got {}", note_count, sizes.len());
130    }
131
132    let total_bytes: u64 = sizes.iter().sum();
133
134    let mut by_measurement = if detailed { Some(HashMap::new()) } else { None };
135
136    // If detailed breakdown requested, parse measurement names
137    if let Some(ref mut by_name) = by_measurement {
138        for (note_oid, &size) in note_oids.iter().zip(sizes.iter()) {
139            accumulate_measurement_sizes(Path::new(&repo_root), note_oid, size, by_name)?;
140        }
141    }
142
143    Ok(NotesSizeInfo {
144        total_bytes,
145        note_count,
146        by_measurement,
147    })
148}
149
150/// Parse note contents and accumulate sizes by measurement name
151fn accumulate_measurement_sizes(
152    repo_root: &std::path::Path,
153    note_oid: &str,
154    note_size: u64,
155    by_name: &mut HashMap<String, MeasurementSizeInfo>,
156) -> Result<()> {
157    use crate::serialization::deserialize;
158
159    // Get note content
160    let output = Command::new("git")
161        .args(["cat-file", "-p", note_oid])
162        .current_dir(repo_root)
163        .output()
164        .context("Failed to execute git cat-file -p")?;
165
166    if !output.status.success() {
167        anyhow::bail!("git cat-file -p failed for {}", note_oid);
168    }
169
170    let content = String::from_utf8_lossy(&output.stdout);
171
172    // Parse measurements from note
173    let measurements = deserialize(&content);
174
175    if measurements.is_empty() {
176        return Ok(());
177    }
178
179    // Distribute note size evenly among measurements in this note
180    // (Each measurement contributes roughly equally to the note size)
181    let size_per_measurement = note_size / measurements.len() as u64;
182
183    for measurement in measurements {
184        let entry = by_name
185            .entry(measurement.name.clone())
186            .or_insert(MeasurementSizeInfo {
187                total_bytes: 0,
188                count: 0,
189            });
190
191        entry.total_bytes += size_per_measurement;
192        entry.count += 1;
193    }
194
195    Ok(())
196}
197
198/// Git repository statistics from count-objects
199pub struct RepoStats {
200    /// Number of loose objects
201    pub loose_objects: u64,
202    /// Size of loose objects in bytes
203    pub loose_size: u64,
204    /// Number of packed objects
205    pub packed_objects: u64,
206    /// Size of pack files in bytes
207    pub pack_size: u64,
208}
209
210/// Get git repository statistics
211pub fn get_repo_stats() -> Result<RepoStats> {
212    let repo_root =
213        get_repository_root().map_err(|e| anyhow::anyhow!("Failed to get repo root: {}", e))?;
214
215    let output = Command::new("git")
216        .args(["count-objects", "-v"])
217        .current_dir(&repo_root)
218        .output()
219        .context("Failed to execute git count-objects")?;
220
221    if !output.status.success() {
222        let stderr = String::from_utf8_lossy(&output.stderr);
223        anyhow::bail!("git count-objects failed: {}", stderr);
224    }
225
226    let stdout = String::from_utf8_lossy(&output.stdout);
227
228    let mut loose_objects = 0;
229    let mut loose_size = 0; // in KiB from git
230    let mut packed_objects = 0;
231    let mut pack_size = 0; // in KiB from git
232
233    for line in stdout.lines() {
234        let parts: Vec<&str> = line.split(':').collect();
235        if parts.len() != 2 {
236            continue;
237        }
238
239        let key = parts[0].trim();
240        let value = parts[1].trim().parse::<u64>().unwrap_or(0);
241
242        match key {
243            "count" => loose_objects = value,
244            "size" => loose_size = value,
245            "in-pack" => packed_objects = value,
246            "size-pack" => pack_size = value,
247            _ => {}
248        }
249    }
250
251    Ok(RepoStats {
252        loose_objects,
253        loose_size: loose_size * 1024, // Convert KiB to bytes
254        packed_objects,
255        pack_size: pack_size * 1024, // Convert KiB to bytes
256    })
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::test_helpers::with_isolated_cwd_git;
263
264    #[test]
265    fn test_get_repo_stats_basic() {
266        // Test that get_repo_stats works and returns proper values
267        with_isolated_cwd_git(|_git_dir| {
268            let stats = get_repo_stats().unwrap();
269
270            // Should have some objects after initial commit
271            assert!(stats.loose_objects > 0 || stats.packed_objects > 0);
272
273            // Sizes should be multiples of 1024 (tests * 1024 conversion)
274            if stats.loose_size > 0 {
275                assert_eq!(
276                    stats.loose_size % 1024,
277                    0,
278                    "loose_size should be multiple of 1024"
279                );
280            }
281            if stats.pack_size > 0 {
282                assert_eq!(
283                    stats.pack_size % 1024,
284                    0,
285                    "pack_size should be multiple of 1024"
286                );
287            }
288        });
289    }
290
291    #[test]
292    fn test_get_notes_size_empty_repo() {
293        // Test with a repo that has no notes - exercises the empty case
294        with_isolated_cwd_git(|_git_dir| {
295            let result = get_notes_size(false, false).unwrap();
296            assert_eq!(result.total_bytes, 0);
297            assert_eq!(result.note_count, 0);
298            assert!(result.by_measurement.is_none());
299        });
300    }
301
302    #[test]
303    fn test_get_repo_stats_conversion_factors() {
304        // Test that the * 1024 conversion is correctly applied
305        with_isolated_cwd_git(|_git_dir| {
306            let stats = get_repo_stats().unwrap();
307
308            // Test that loose_size and pack_size are properly converted from KiB to bytes
309            // Both should be multiples of 1024
310            assert_eq!(
311                stats.loose_size % 1024,
312                0,
313                "loose_size must be multiple of 1024 (bytes conversion from KiB)"
314            );
315            assert_eq!(
316                stats.pack_size % 1024,
317                0,
318                "pack_size must be multiple of 1024 (bytes conversion from KiB)"
319            );
320
321            // If there are loose objects, the size should be reasonable (not zero, not absurdly large)
322            if stats.loose_objects > 0 {
323                assert!(
324                    stats.loose_size > 0,
325                    "loose_size should be > 0 if loose_objects > 0"
326                );
327                assert!(
328                    stats.loose_size < 1_000_000_000,
329                    "loose_size should be reasonable"
330                );
331            }
332        });
333    }
334
335    #[test]
336    fn test_get_repo_stats_field_assignments() {
337        // Test that all fields are properly assigned from git output
338        with_isolated_cwd_git(|_git_dir| {
339            let stats = get_repo_stats().unwrap();
340
341            // Verify that fields are assigned (not just defaulted to 0)
342            // After creating a repo with an initial commit, we should have objects
343            let total_objects = stats.loose_objects + stats.packed_objects;
344            assert!(
345                total_objects > 0,
346                "Should have at least one object from initial commit"
347            );
348
349            // Verify the match arms are working by checking expected field types
350            // loose_objects should be count
351            // loose_size should be size * 1024
352            // packed_objects should be in-pack
353            // pack_size should be size-pack * 1024
354
355            // Verify fields are properly typed as u64 (not negative types)
356            // The fact that we can do arithmetic on them proves the match arms worked
357            let _sum =
358                stats.loose_objects + stats.loose_size + stats.packed_objects + stats.pack_size;
359            assert!(
360                _sum >= stats.loose_objects,
361                "Arithmetic should work on u64 fields"
362            );
363        });
364    }
365
366    #[test]
367    fn test_get_notes_size_with_measurements() {
368        use crate::measurement_storage;
369
370        // Test the full flow: add measurements -> get size with detailed breakdown
371        with_isolated_cwd_git(|_git_dir| {
372            // Add measurements using the public API
373            measurement_storage::add("test_metric_1", 42.0, &[]).unwrap();
374            measurement_storage::add("test_metric_2", 100.0, &[]).unwrap();
375            measurement_storage::add("test_metric_1", 84.0, &[]).unwrap();
376
377            // Get size information with detailed breakdown
378            let result = get_notes_size(true, false).unwrap();
379
380            // Should have measurements now
381            assert!(
382                result.total_bytes > 0,
383                "total_bytes should be > 0 after adding measurements"
384            );
385            assert_eq!(
386                result.note_count, 1,
387                "Should have 1 note (all measurements on HEAD)"
388            );
389
390            // Verify detailed breakdown
391            let by_measurement = result
392                .by_measurement
393                .expect("Should have detailed breakdown");
394
395            // Should have entries for both metrics
396            assert!(
397                by_measurement.contains_key("test_metric_1"),
398                "Should have test_metric_1 in breakdown"
399            );
400            assert!(
401                by_measurement.contains_key("test_metric_2"),
402                "Should have test_metric_2 in breakdown"
403            );
404
405            // Test metric 1 should have count of 2
406            let metric1_info = &by_measurement["test_metric_1"];
407            assert_eq!(
408                metric1_info.count, 2,
409                "test_metric_1 should have 2 occurrences"
410            );
411            assert!(
412                metric1_info.total_bytes > 0,
413                "test_metric_1 should have non-zero size"
414            );
415
416            // Test metric 2 should have count of 1
417            let metric2_info = &by_measurement["test_metric_2"];
418            assert_eq!(
419                metric2_info.count, 1,
420                "test_metric_2 should have 1 occurrence"
421            );
422            assert!(
423                metric2_info.total_bytes > 0,
424                "test_metric_2 should have non-zero size"
425            );
426
427            // Verify that the size is distributed correctly (note_size / num_measurements)
428            // In this case, 3 measurements total, so each should get roughly 1/3 of note size
429            let total_from_breakdown: u64 =
430                by_measurement.values().map(|info| info.total_bytes).sum();
431
432            // The total from breakdown may not exactly equal total_bytes due to integer division
433            // For example: 121 / 3 = 40 per measurement, 40 * 3 = 120 (loses 1 byte)
434            // So we verify it's within the number of measurements
435            let num_measurements = 3u64;
436            assert!(
437                result.total_bytes.abs_diff(total_from_breakdown) < num_measurements,
438                "Sum of breakdown ({}) should be within {} bytes of total_bytes ({}) due to integer division",
439                total_from_breakdown,
440                num_measurements,
441                result.total_bytes
442            );
443
444            // Since we have 3 measurements, each gets result.total_bytes / 3
445            let expected_per_measurement = result.total_bytes / num_measurements;
446            assert!(
447                metric1_info.total_bytes >= expected_per_measurement,
448                "test_metric_1 appears twice, should have at least 1/3 of total (appears 2/3 times)"
449            );
450        });
451    }
452}