Skip to main content

shift_preflight/
stats.rs

1//! Persistent run statistics for cumulative token savings tracking.
2//!
3//! Stores one JSON line per SHIFT invocation in `~/.shift/stats.jsonl`.
4//! Inspired by RTK's `rtk gain` analytics system.
5
6use anyhow::{Context, Result};
7use serde::{Deserialize, Serialize};
8use std::fs;
9use std::io::{BufRead, BufReader, Write};
10use std::path::PathBuf;
11
12/// Maximum number of records to load from the stats file.
13/// Prevents unbounded memory allocation from huge/malicious files.
14const MAX_STATS_RECORDS: usize = 100_000;
15
16/// Maximum line length (bytes) to accept when reading the stats file.
17/// Lines longer than this are skipped as likely corrupt.
18const MAX_LINE_LENGTH: usize = 65_536;
19
20use crate::cost::TokenSavings;
21
22/// A single run record persisted to the stats file.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct RunRecord {
25    /// ISO 8601 timestamp
26    pub timestamp: String,
27    /// Date portion (YYYY-MM-DD) for daily aggregation
28    pub date: String,
29    /// Provider used
30    pub provider: String,
31    /// Number of images processed
32    pub images: usize,
33    /// Number of images modified
34    pub modified: usize,
35    /// Number of images dropped (economy mode excess, SVG source mode)
36    #[serde(default)]
37    pub dropped: usize,
38    /// Number of SVGs rasterized
39    #[serde(default)]
40    pub svgs_rasterized: usize,
41    /// Byte sizes
42    pub bytes_before: usize,
43    pub bytes_after: usize,
44    /// Token savings
45    pub token_savings: TokenSavings,
46}
47
48/// Aggregated gain summary.
49#[derive(Debug, Clone, Default)]
50pub struct GainSummary {
51    pub total_runs: usize,
52    pub total_images: usize,
53    pub total_modified: usize,
54    pub total_bytes_before: u64,
55    pub total_bytes_after: u64,
56    pub total_openai_before: u64,
57    pub total_openai_after: u64,
58    pub total_anthropic_before: u64,
59    pub total_anthropic_after: u64,
60}
61
62/// Daily aggregation bucket.
63#[derive(Debug, Clone)]
64pub struct DailyGain {
65    pub date: String,
66    pub runs: usize,
67    pub images: usize,
68    pub openai_saved: u64,
69    pub anthropic_saved: u64,
70}
71
72impl GainSummary {
73    pub fn openai_saved(&self) -> u64 {
74        self.total_openai_before
75            .saturating_sub(self.total_openai_after)
76    }
77
78    pub fn anthropic_saved(&self) -> u64 {
79        self.total_anthropic_before
80            .saturating_sub(self.total_anthropic_after)
81    }
82
83    pub fn openai_pct(&self) -> f64 {
84        if self.total_openai_before == 0 {
85            return 0.0;
86        }
87        (self.openai_saved() as f64 / self.total_openai_before as f64) * 100.0
88    }
89
90    pub fn anthropic_pct(&self) -> f64 {
91        if self.total_anthropic_before == 0 {
92            return 0.0;
93        }
94        (self.anthropic_saved() as f64 / self.total_anthropic_before as f64) * 100.0
95    }
96
97    pub fn bytes_saved(&self) -> u64 {
98        self.total_bytes_before
99            .saturating_sub(self.total_bytes_after)
100    }
101}
102
103/// Get the default stats file path: `~/.shift/stats.jsonl`.
104pub fn default_stats_path() -> Result<PathBuf> {
105    let home = std::env::var("HOME")
106        .or_else(|_| std::env::var("USERPROFILE"))
107        .context("could not determine home directory")?;
108    Ok(PathBuf::from(home).join(".shift").join("stats.jsonl"))
109}
110
111/// Append a run record to the stats file.
112pub fn record_run(record: &RunRecord, path: Option<&PathBuf>) -> Result<()> {
113    let stats_path = match path {
114        Some(p) => p.clone(),
115        None => default_stats_path()?,
116    };
117
118    // Ensure parent directory exists
119    if let Some(parent) = stats_path.parent() {
120        fs::create_dir_all(parent).context("failed to create ~/.shift directory")?;
121
122        // Reject symlinks on the directory (consistent with pipeline.rs profile path validation)
123        let dir_meta = fs::symlink_metadata(parent)
124            .with_context(|| format!("failed to stat {}", parent.display()))?;
125        if dir_meta.file_type().is_symlink() {
126            anyhow::bail!(
127                "stats directory {} is a symlink (possible symlink attack)",
128                parent.display()
129            );
130        }
131    }
132
133    // Open the file with O_NOFOLLOW on Unix to atomically reject symlinks
134    // (avoids TOCTOU race between stat and open).
135    #[cfg(unix)]
136    let mut file = {
137        use std::os::unix::fs::OpenOptionsExt;
138        fs::OpenOptions::new()
139            .create(true)
140            .append(true)
141            .custom_flags(libc::O_NOFOLLOW)
142            .open(&stats_path)
143            .with_context(|| {
144                format!(
145                    "failed to open stats file: {} (symlinks are rejected)",
146                    stats_path.display()
147                )
148            })?
149    };
150
151    #[cfg(not(unix))]
152    let mut file = {
153        // Fallback: stat-then-open (TOCTOU risk, but best we can do on non-Unix)
154        if stats_path.exists() {
155            let file_meta = fs::symlink_metadata(&stats_path)
156                .with_context(|| format!("failed to stat {}", stats_path.display()))?;
157            if file_meta.file_type().is_symlink() {
158                anyhow::bail!(
159                    "stats file {} is a symlink (possible symlink attack)",
160                    stats_path.display()
161                );
162            }
163        }
164        fs::OpenOptions::new()
165            .create(true)
166            .append(true)
167            .open(&stats_path)
168            .with_context(|| format!("failed to open stats file: {}", stats_path.display()))?
169    };
170
171    // Serialize to a single buffer and write atomically to reduce interleave risk
172    let mut line = serde_json::to_string(record).context("failed to serialize run record")?;
173    line.push('\n');
174    file.write_all(line.as_bytes())
175        .context("failed to write to stats file")?;
176    file.flush().context("failed to flush stats file")?;
177
178    Ok(())
179}
180
181/// Result of loading stats records, including count of skipped malformed lines.
182pub struct LoadResult {
183    pub records: Vec<RunRecord>,
184    pub skipped_lines: usize,
185}
186
187/// Load all run records from the stats file.
188pub fn load_records(path: Option<&PathBuf>) -> Result<LoadResult> {
189    let stats_path = match path {
190        Some(p) => p.clone(),
191        None => default_stats_path()?,
192    };
193
194    if !stats_path.exists() {
195        return Ok(LoadResult {
196            records: Vec::new(),
197            skipped_lines: 0,
198        });
199    }
200
201    let file = fs::File::open(&stats_path)
202        .with_context(|| format!("failed to open stats file: {}", stats_path.display()))?;
203    let reader = BufReader::new(file);
204    let mut records = Vec::new();
205    let mut skipped_lines = 0;
206
207    for (i, line) in reader.lines().enumerate() {
208        let line = line.with_context(|| format!("failed to read line {} of stats file", i + 1))?;
209        let trimmed = line.trim();
210        if trimmed.is_empty() {
211            continue;
212        }
213        // Skip excessively long lines (likely corrupt)
214        if trimmed.len() > MAX_LINE_LENGTH {
215            eprintln!(
216                "shift-ai: warning: skipping oversized stats line {} ({} bytes)",
217                i + 1,
218                trimmed.len()
219            );
220            skipped_lines += 1;
221            continue;
222        }
223        match serde_json::from_str::<RunRecord>(trimmed) {
224            Ok(record) => records.push(record),
225            Err(e) => {
226                // Skip malformed lines rather than failing
227                eprintln!(
228                    "shift-ai: warning: skipping malformed stats line {}: {}",
229                    i + 1,
230                    e
231                );
232                skipped_lines += 1;
233            }
234        }
235        // Cap total records to prevent unbounded memory allocation
236        if records.len() >= MAX_STATS_RECORDS {
237            eprintln!(
238                "shift-ai: warning: stats file has >{} entries, loading only the first {}",
239                MAX_STATS_RECORDS, MAX_STATS_RECORDS
240            );
241            break;
242        }
243    }
244
245    Ok(LoadResult {
246        records,
247        skipped_lines,
248    })
249}
250
251/// Compute aggregate gain summary from records.
252pub fn summarize(records: &[RunRecord]) -> GainSummary {
253    let mut s = GainSummary::default();
254    for r in records {
255        s.total_runs += 1;
256        s.total_images += r.images;
257        s.total_modified += r.modified;
258        s.total_bytes_before += r.bytes_before as u64;
259        s.total_bytes_after += r.bytes_after as u64;
260        s.total_openai_before += r.token_savings.openai_before;
261        s.total_openai_after += r.token_savings.openai_after;
262        s.total_anthropic_before += r.token_savings.anthropic_before;
263        s.total_anthropic_after += r.token_savings.anthropic_after;
264    }
265    s
266}
267
268/// Compute daily breakdown from records.
269pub fn daily_breakdown(records: &[RunRecord]) -> Vec<DailyGain> {
270    use std::collections::BTreeMap;
271
272    let mut days: BTreeMap<String, DailyGain> = BTreeMap::new();
273
274    for r in records {
275        let entry = days.entry(r.date.clone()).or_insert_with(|| DailyGain {
276            date: r.date.clone(),
277            runs: 0,
278            images: 0,
279            openai_saved: 0,
280            anthropic_saved: 0,
281        });
282        entry.runs += 1;
283        entry.images += r.images;
284        entry.openai_saved += r
285            .token_savings
286            .openai_before
287            .saturating_sub(r.token_savings.openai_after);
288        entry.anthropic_saved += r
289            .token_savings
290            .anthropic_before
291            .saturating_sub(r.token_savings.anthropic_after);
292    }
293
294    days.into_values().collect()
295}
296
297/// Build a RunRecord from a completed Report.
298pub fn record_from_report(report: &crate::report::Report, provider: &str) -> RunRecord {
299    // Get current timestamp
300    let now = std::time::SystemTime::now()
301        .duration_since(std::time::UNIX_EPOCH)
302        .unwrap_or_default()
303        .as_secs();
304
305    // Format as ISO 8601 (basic — no chrono dependency)
306    let secs_per_day = 86400;
307    let days_since_epoch = now / secs_per_day;
308    let secs_today = now % secs_per_day;
309    let hours = secs_today / 3600;
310    let minutes = (secs_today % 3600) / 60;
311    let seconds = secs_today % 60;
312
313    // Civil date calculation (Hinnant algorithm, exact for proleptic Gregorian calendar)
314    let (year, month, day) = days_to_ymd(days_since_epoch);
315
316    let timestamp = format!(
317        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
318        year, month, day, hours, minutes, seconds
319    );
320    let date = format!("{:04}-{:02}-{:02}", year, month, day);
321
322    RunRecord {
323        timestamp,
324        date,
325        provider: provider.to_string(),
326        images: report.images_found,
327        modified: report.images_modified,
328        dropped: report.images_dropped,
329        svgs_rasterized: report.svgs_rasterized,
330        bytes_before: report.original_size,
331        bytes_after: report.transformed_size,
332        token_savings: report.token_savings.clone(),
333    }
334}
335
336/// Convert days since Unix epoch to (year, month, day).
337fn days_to_ymd(days: u64) -> (u64, u64, u64) {
338    // Simplified civil date calculation
339    let z = days + 719468;
340    let era = z / 146097;
341    let doe = z - era * 146097;
342    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
343    let y = yoe + era * 400;
344    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
345    let mp = (5 * doy + 2) / 153;
346    let d = doy - (153 * mp + 2) / 5 + 1;
347    let m = if mp < 10 { mp + 3 } else { mp - 9 };
348    let y = if m <= 2 { y + 1 } else { y };
349    (y, m, d)
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355    use crate::cost::TokenSavings;
356    use tempfile::NamedTempFile;
357
358    fn make_record(date: &str, openai_before: u64, openai_after: u64) -> RunRecord {
359        RunRecord {
360            timestamp: format!("{}T12:00:00Z", date),
361            date: date.to_string(),
362            provider: "openai".to_string(),
363            images: 3,
364            modified: 2,
365            dropped: 0,
366            svgs_rasterized: 0,
367            bytes_before: 5_000_000,
368            bytes_after: 1_000_000,
369            token_savings: TokenSavings {
370                openai_before,
371                openai_after,
372                anthropic_before: 3000,
373                anthropic_after: 1000,
374            },
375        }
376    }
377
378    #[test]
379    fn test_record_and_load_roundtrip() {
380        let tmp = NamedTempFile::new().unwrap();
381        let path = tmp.path().to_path_buf();
382
383        let r1 = make_record("2026-04-20", 1000, 300);
384        let r2 = make_record("2026-04-21", 2000, 500);
385
386        record_run(&r1, Some(&path)).unwrap();
387        record_run(&r2, Some(&path)).unwrap();
388
389        let result = load_records(Some(&path)).unwrap();
390        assert_eq!(result.records.len(), 2);
391        assert_eq!(result.skipped_lines, 0);
392        assert_eq!(result.records[0].date, "2026-04-20");
393        assert_eq!(result.records[1].date, "2026-04-21");
394    }
395
396    #[test]
397    fn test_load_empty_file() {
398        let tmp = NamedTempFile::new().unwrap();
399        let path = tmp.path().to_path_buf();
400        let result = load_records(Some(&path)).unwrap();
401        assert!(result.records.is_empty());
402        assert_eq!(result.skipped_lines, 0);
403    }
404
405    #[test]
406    fn test_load_nonexistent_file() {
407        let path = PathBuf::from("/tmp/shift-test-nonexistent-stats.jsonl");
408        let result = load_records(Some(&path)).unwrap();
409        assert!(result.records.is_empty());
410        assert_eq!(result.skipped_lines, 0);
411    }
412
413    #[test]
414    fn test_summarize() {
415        let records = vec![
416            make_record("2026-04-20", 1000, 300),
417            make_record("2026-04-21", 2000, 500),
418        ];
419        let summary = summarize(&records);
420        assert_eq!(summary.total_runs, 2);
421        assert_eq!(summary.total_images, 6);
422        assert_eq!(summary.total_modified, 4);
423        assert_eq!(summary.total_openai_before, 3000);
424        assert_eq!(summary.total_openai_after, 800);
425        assert_eq!(summary.openai_saved(), 2200);
426    }
427
428    #[test]
429    fn test_daily_breakdown() {
430        let records = vec![
431            make_record("2026-04-20", 1000, 300),
432            make_record("2026-04-20", 500, 200),
433            make_record("2026-04-21", 2000, 500),
434        ];
435        let daily = daily_breakdown(&records);
436        assert_eq!(daily.len(), 2);
437        assert_eq!(daily[0].date, "2026-04-20");
438        assert_eq!(daily[0].runs, 2);
439        assert_eq!(daily[0].openai_saved, 1000); // (1000-300) + (500-200)
440        assert_eq!(daily[1].date, "2026-04-21");
441        assert_eq!(daily[1].runs, 1);
442    }
443
444    #[test]
445    fn test_summary_percentages() {
446        let summary = GainSummary {
447            total_openai_before: 10000,
448            total_openai_after: 3000,
449            total_anthropic_before: 5000,
450            total_anthropic_after: 1000,
451            ..Default::default()
452        };
453        assert!((summary.openai_pct() - 70.0).abs() < 0.1);
454        assert!((summary.anthropic_pct() - 80.0).abs() < 0.1);
455    }
456
457    #[test]
458    fn test_summary_zero_division() {
459        let summary = GainSummary::default();
460        assert_eq!(summary.openai_pct(), 0.0);
461        assert_eq!(summary.anthropic_pct(), 0.0);
462    }
463
464    #[test]
465    fn test_malformed_lines_skipped() {
466        let tmp = NamedTempFile::new().unwrap();
467        let path = tmp.path().to_path_buf();
468
469        // Write valid + invalid lines
470        let r = make_record("2026-04-20", 1000, 300);
471        record_run(&r, Some(&path)).unwrap();
472        // Append garbage
473        let mut f = fs::OpenOptions::new().append(true).open(&path).unwrap();
474        writeln!(f, "not json at all").unwrap();
475        writeln!(f, "{{\"partial\": true}}").unwrap();
476        // Write another valid record
477        record_run(&r, Some(&path)).unwrap();
478
479        let result = load_records(Some(&path)).unwrap();
480        assert_eq!(result.records.len(), 2); // only the 2 valid records
481        assert_eq!(result.skipped_lines, 2); // 2 malformed lines skipped
482    }
483
484    #[test]
485    fn test_record_from_report() {
486        let mut report = crate::report::Report::new();
487        report.images_found = 3;
488        report.images_modified = 2;
489        report.original_size = 5_000_000;
490        report.transformed_size = 1_000_000;
491        report.token_savings = TokenSavings {
492            openai_before: 2000,
493            openai_after: 500,
494            anthropic_before: 3000,
495            anthropic_after: 800,
496        };
497
498        let record = record_from_report(&report, "openai");
499        assert_eq!(record.provider, "openai");
500        assert_eq!(record.images, 3);
501        assert_eq!(record.modified, 2);
502        assert!(!record.timestamp.is_empty());
503        assert!(!record.date.is_empty());
504    }
505
506    #[test]
507    fn test_days_to_ymd() {
508        // Unix epoch
509        let (y, m, d) = days_to_ymd(0);
510        assert_eq!((y, m, d), (1970, 1, 1));
511
512        // Leap year: 2000-02-29 = day 11016
513        let (y, m, d) = days_to_ymd(11016);
514        assert_eq!((y, m, d), (2000, 2, 29));
515
516        // Day after leap day: 2000-03-01 = day 11017
517        let (y, m, d) = days_to_ymd(11017);
518        assert_eq!((y, m, d), (2000, 3, 1));
519
520        // Non-leap century year: 2100-02-28 = day 47540
521        let (y, m, d) = days_to_ymd(47540);
522        assert_eq!((y, m, d), (2100, 2, 28));
523
524        // 2100-03-01 = day 47541 (no Feb 29 in 2100)
525        let (y, m, d) = days_to_ymd(47541);
526        assert_eq!((y, m, d), (2100, 3, 1));
527
528        // Year boundary: 2025-12-31 = day 20453
529        let (y, m, d) = days_to_ymd(20453);
530        assert_eq!((y, m, d), (2025, 12, 31));
531
532        // 2026-01-01 = day 20454
533        let (y, m, d) = days_to_ymd(20454);
534        assert_eq!((y, m, d), (2026, 1, 1));
535    }
536
537    #[cfg(unix)]
538    #[test]
539    fn test_symlink_directory_rejected() {
540        use std::os::unix::fs as unix_fs;
541
542        let real_dir = tempfile::tempdir().unwrap();
543        let symlink_dir = tempfile::tempdir().unwrap();
544        let symlink_path = symlink_dir.path().join("symlinked-shift");
545
546        // Create symlink to real directory
547        unix_fs::symlink(real_dir.path(), &symlink_path).unwrap();
548
549        let stats_file = symlink_path.join("stats.jsonl");
550        let r = make_record("2026-04-22", 100, 50);
551        let result = record_run(&r, Some(&stats_file));
552
553        assert!(result.is_err());
554        let err_msg = format!("{}", result.unwrap_err());
555        assert!(
556            err_msg.contains("symlink"),
557            "expected symlink error, got: {}",
558            err_msg
559        );
560    }
561
562    #[cfg(unix)]
563    #[test]
564    fn test_symlink_file_rejected() {
565        use std::os::unix::fs as unix_fs;
566
567        let tmp_dir = tempfile::tempdir().unwrap();
568        let real_file = tmp_dir.path().join("real-stats.jsonl");
569        let symlink_file = tmp_dir.path().join("stats.jsonl");
570
571        // Create the real file
572        fs::write(&real_file, "").unwrap();
573        // Create symlink pointing to real file
574        unix_fs::symlink(&real_file, &symlink_file).unwrap();
575
576        let r = make_record("2026-04-22", 100, 50);
577        let result = record_run(&r, Some(&symlink_file));
578
579        assert!(result.is_err());
580        let err_msg = format!("{}", result.unwrap_err());
581        assert!(
582            err_msg.contains("symlink"),
583            "expected symlink error, got: {}",
584            err_msg
585        );
586    }
587
588    #[test]
589    fn test_skipped_lines_counted() {
590        let tmp = NamedTempFile::new().unwrap();
591        let path = tmp.path().to_path_buf();
592
593        let r = make_record("2026-04-22", 500, 200);
594        record_run(&r, Some(&path)).unwrap();
595
596        // Append 3 garbage lines
597        let mut f = fs::OpenOptions::new().append(true).open(&path).unwrap();
598        writeln!(f, "garbage1").unwrap();
599        writeln!(f, "garbage2").unwrap();
600        writeln!(f, "garbage3").unwrap();
601
602        record_run(&r, Some(&path)).unwrap();
603
604        let result = load_records(Some(&path)).unwrap();
605        assert_eq!(result.records.len(), 2);
606        assert_eq!(result.skipped_lines, 3);
607    }
608}