Skip to main content

perfgate_ingest/
pytest.rs

1//! Parser for pytest-benchmark JSON output.
2//!
3//! pytest-benchmark stores results in `.benchmarks/*.json` with a structure
4//! containing `benchmarks[]` with `name`, `stats` (min, max, mean, median,
5//! stddev, rounds, iterations), etc. All timing values are in seconds.
6
7use anyhow::Context;
8use perfgate_types::{RunReceipt, Sample, Stats};
9use serde::Deserialize;
10
11use crate::{compute_u64_summary, make_receipt};
12
13/// Statistics from a single pytest-benchmark entry.
14#[derive(Debug, Deserialize)]
15struct PytestStats {
16    min: f64,
17    max: f64,
18    mean: f64,
19    median: f64,
20    stddev: f64,
21    rounds: u64,
22}
23
24/// A single benchmark entry from pytest-benchmark JSON.
25#[derive(Debug, Deserialize)]
26struct PytestBenchmark {
27    name: String,
28    stats: PytestStats,
29}
30
31/// Top-level pytest-benchmark JSON structure.
32#[derive(Debug, Deserialize)]
33struct PytestOutput {
34    benchmarks: Vec<PytestBenchmark>,
35}
36
37/// Parse a pytest-benchmark JSON file into a `RunReceipt`.
38///
39/// If the JSON contains multiple benchmarks, only the first is used.
40/// All timing values in the input are in seconds and are converted
41/// to milliseconds for the `wall_ms` metric.
42pub fn parse_pytest_benchmark(input: &str, name: Option<&str>) -> anyhow::Result<RunReceipt> {
43    let output: PytestOutput =
44        serde_json::from_str(input).context("failed to parse pytest-benchmark JSON")?;
45
46    let bench = output
47        .benchmarks
48        .first()
49        .context("pytest-benchmark JSON contains no benchmarks")?;
50
51    let bench_name = name
52        .map(|n| n.to_string())
53        .unwrap_or_else(|| bench.name.clone());
54
55    let stats = &bench.stats;
56
57    // pytest-benchmark times are in seconds; convert to milliseconds.
58    // Generate synthetic samples: we create `rounds` samples spread around the
59    // mean with the reported stddev. If rounds is large, we cap at 30 samples.
60    let num_samples = stats.rounds.min(30) as usize;
61    let num_samples = num_samples.max(1);
62
63    let mut wall_values = Vec::new();
64    let mut samples = Vec::new();
65
66    if num_samples == 1 {
67        let ms = seconds_to_ms(stats.mean);
68        wall_values.push(ms);
69        samples.push(make_sample(ms));
70    } else {
71        // Generate evenly-spaced samples between min and max
72        for i in 0..num_samples {
73            let t = if num_samples > 1 {
74                let frac = i as f64 / (num_samples - 1) as f64;
75                stats.min + frac * (stats.max - stats.min)
76            } else {
77                stats.mean
78            };
79            let ms = seconds_to_ms(t);
80            wall_values.push(ms);
81            samples.push(make_sample(ms));
82        }
83    }
84
85    let mut computed = compute_u64_summary(&wall_values);
86    // Override with the actual pytest-benchmark statistics.
87    // median/min/max are u64 so integer seconds_to_ms() is fine.
88    computed.median = seconds_to_ms(stats.median);
89    computed.min = seconds_to_ms(stats.min);
90    computed.max = seconds_to_ms(stats.max);
91    // IMPORTANT: Use f64 arithmetic here, NOT seconds_to_ms(). See the
92    // GOTCHA on seconds_to_ms — integer truncation would lose sub-ms
93    // precision that budget evaluation and significance testing rely on.
94    computed.mean = Some(stats.mean * 1000.0);
95    computed.stddev = Some(stats.stddev * 1000.0);
96
97    let full_stats = Stats {
98        wall_ms: computed,
99        cpu_ms: None,
100        page_faults: None,
101        ctx_switches: None,
102        max_rss_kb: None,
103        io_read_bytes: None,
104        io_write_bytes: None,
105        network_packets: None,
106        energy_uj: None,
107        binary_bytes: None,
108        throughput_per_s: None,
109    };
110
111    Ok(make_receipt(&bench_name, samples, full_stats))
112}
113
114fn make_sample(wall_ms: u64) -> Sample {
115    Sample {
116        wall_ms,
117        exit_code: 0,
118        warmup: false,
119        timed_out: false,
120        cpu_ms: None,
121        page_faults: None,
122        ctx_switches: None,
123        max_rss_kb: None,
124        io_read_bytes: None,
125        io_write_bytes: None,
126        network_packets: None,
127        energy_uj: None,
128        binary_bytes: None,
129        stdout: None,
130        stderr: None,
131    }
132}
133
134/// Integer seconds-to-ms conversion for sample `wall_ms` values (u64).
135///
136/// GOTCHA: This intentionally truncates to integer milliseconds -- it is only
137/// appropriate for per-sample u64 fields where sub-ms precision is not needed.
138/// For stats fields (mean, stddev) use direct `f64` arithmetic (`value * 1000.0`)
139/// to preserve sub-millisecond precision. Using this function for stats would
140/// silently destroy the fractional component that downstream budget evaluation
141/// and significance testing depend on.
142fn seconds_to_ms(s: f64) -> u64 {
143    let ms = s * 1000.0;
144    if ms < 1.0 && ms > 0.0 {
145        1
146    } else {
147        ms.round() as u64
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154    use perfgate_types::RUN_SCHEMA_V1;
155
156    const PYTEST_JSON: &str = r#"{
157        "machine_info": {
158            "node": "test-host",
159            "processor": "x86_64",
160            "machine": "x86_64",
161            "python_implementation": "CPython",
162            "python_version": "3.11.0",
163            "python_compiler": "GCC 12.2.0",
164            "release": "6.1.0",
165            "system": "Linux"
166        },
167        "commit_info": {
168            "id": "abc123"
169        },
170        "benchmarks": [
171            {
172                "group": null,
173                "name": "test_sort",
174                "fullname": "tests/test_perf.py::test_sort",
175                "params": null,
176                "param": null,
177                "extra_info": {},
178                "options": {
179                    "disable_gc": false,
180                    "timer": "perf_counter",
181                    "min_rounds": 5,
182                    "max_time": 1.0,
183                    "min_time": 0.000005,
184                    "warmup": false
185                },
186                "stats": {
187                    "min": 0.0234,
188                    "max": 0.0312,
189                    "mean": 0.0256,
190                    "stddev": 0.0021,
191                    "rounds": 10,
192                    "iterations": 1,
193                    "median": 0.0250,
194                    "iqr": 0.0030,
195                    "q1": 0.0240,
196                    "q3": 0.0270,
197                    "iqr_outliers": 0,
198                    "stddev_outliers": 1,
199                    "outliers": "1;0",
200                    "ld15iqr": 0.0234,
201                    "hd15iqr": 0.0312,
202                    "ops": 39.0625,
203                    "total": 0.256
204                }
205            }
206        ],
207        "datetime": "2024-01-15T10:30:00.000000",
208        "version": "4.0.0"
209    }"#;
210
211    #[test]
212    fn parse_pytest_basic() {
213        let receipt = parse_pytest_benchmark(PYTEST_JSON, Some("sort-bench")).unwrap();
214        assert_eq!(receipt.schema, RUN_SCHEMA_V1);
215        assert_eq!(receipt.bench.name, "sort-bench");
216        // 10 rounds -> 10 samples
217        assert_eq!(receipt.samples.len(), 10);
218        // median 0.025s = 25ms (rounded)
219        assert_eq!(receipt.stats.wall_ms.median, 25);
220        assert_eq!(receipt.stats.wall_ms.min, 23);
221        assert_eq!(receipt.stats.wall_ms.max, 31);
222    }
223
224    #[test]
225    fn parse_pytest_default_name() {
226        let receipt = parse_pytest_benchmark(PYTEST_JSON, None).unwrap();
227        assert_eq!(receipt.bench.name, "test_sort");
228    }
229
230    #[test]
231    fn parse_pytest_sample_count_capped() {
232        // If rounds is very large, should cap at 30
233        let input = r#"{
234            "benchmarks": [
235                {
236                    "name": "test_big",
237                    "stats": {
238                        "min": 0.010,
239                        "max": 0.020,
240                        "mean": 0.015,
241                        "stddev": 0.002,
242                        "rounds": 1000,
243                        "iterations": 1,
244                        "median": 0.015
245                    }
246                }
247            ]
248        }"#;
249        let receipt = parse_pytest_benchmark(input, None).unwrap();
250        assert_eq!(receipt.samples.len(), 30);
251    }
252
253    #[test]
254    fn parse_pytest_single_round() {
255        let input = r#"{
256            "benchmarks": [
257                {
258                    "name": "test_single",
259                    "stats": {
260                        "min": 0.100,
261                        "max": 0.100,
262                        "mean": 0.100,
263                        "stddev": 0.0,
264                        "rounds": 1,
265                        "iterations": 1,
266                        "median": 0.100
267                    }
268                }
269            ]
270        }"#;
271        let receipt = parse_pytest_benchmark(input, None).unwrap();
272        assert_eq!(receipt.samples.len(), 1);
273        assert_eq!(receipt.samples[0].wall_ms, 100);
274    }
275
276    #[test]
277    fn parse_pytest_empty_benchmarks() {
278        let input = r#"{"benchmarks": []}"#;
279        let result = parse_pytest_benchmark(input, None);
280        assert!(result.is_err());
281    }
282
283    #[test]
284    fn parse_pytest_invalid_json() {
285        let result = parse_pytest_benchmark("not json", None);
286        assert!(result.is_err());
287    }
288
289    #[test]
290    fn parse_pytest_submillisecond() {
291        let input = r#"{
292            "benchmarks": [
293                {
294                    "name": "test_fast",
295                    "stats": {
296                        "min": 0.0001,
297                        "max": 0.0003,
298                        "mean": 0.0002,
299                        "stddev": 0.00005,
300                        "rounds": 5,
301                        "iterations": 100,
302                        "median": 0.0002
303                    }
304                }
305            ]
306        }"#;
307        let receipt = parse_pytest_benchmark(input, None).unwrap();
308        // All sub-millisecond values should clamp to 1
309        for sample in &receipt.samples {
310            assert!(sample.wall_ms >= 1, "wall_ms was {} < 1", sample.wall_ms);
311        }
312    }
313}