Skip to main content

shift_preflight/
stats.rs

1//! Persistent run statistics for cumulative token savings tracking.
2//!
3//! Stores one JSON line per SHIFT invocation in `~/.shift/stats.jsonl`.
4//! Inspired by RTK's `rtk gain` analytics system.
5
6use anyhow::{Context, Result};
7use serde::{Deserialize, Serialize};
8use std::fs;
9use std::io::{BufRead, BufReader, Write};
10use std::path::PathBuf;
11
12use crate::cost::TokenSavings;
13
14/// A single run record persisted to the stats file.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct RunRecord {
17    /// ISO 8601 timestamp
18    pub timestamp: String,
19    /// Date portion (YYYY-MM-DD) for daily aggregation
20    pub date: String,
21    /// Provider used
22    pub provider: String,
23    /// Number of images processed
24    pub images: usize,
25    /// Number of images modified
26    pub modified: usize,
27    /// Byte sizes
28    pub bytes_before: usize,
29    pub bytes_after: usize,
30    /// Token savings
31    pub token_savings: TokenSavings,
32}
33
34/// Aggregated gain summary.
35#[derive(Debug, Clone, Default)]
36pub struct GainSummary {
37    pub total_runs: usize,
38    pub total_images: usize,
39    pub total_modified: usize,
40    pub total_bytes_before: u64,
41    pub total_bytes_after: u64,
42    pub total_openai_before: u64,
43    pub total_openai_after: u64,
44    pub total_anthropic_before: u64,
45    pub total_anthropic_after: u64,
46}
47
48/// Daily aggregation bucket.
49#[derive(Debug, Clone)]
50pub struct DailyGain {
51    pub date: String,
52    pub runs: usize,
53    pub images: usize,
54    pub openai_saved: u64,
55    pub anthropic_saved: u64,
56}
57
58impl GainSummary {
59    pub fn openai_saved(&self) -> u64 {
60        self.total_openai_before
61            .saturating_sub(self.total_openai_after)
62    }
63
64    pub fn anthropic_saved(&self) -> u64 {
65        self.total_anthropic_before
66            .saturating_sub(self.total_anthropic_after)
67    }
68
69    pub fn openai_pct(&self) -> f64 {
70        if self.total_openai_before == 0 {
71            return 0.0;
72        }
73        (self.openai_saved() as f64 / self.total_openai_before as f64) * 100.0
74    }
75
76    pub fn anthropic_pct(&self) -> f64 {
77        if self.total_anthropic_before == 0 {
78            return 0.0;
79        }
80        (self.anthropic_saved() as f64 / self.total_anthropic_before as f64) * 100.0
81    }
82
83    pub fn bytes_saved(&self) -> u64 {
84        self.total_bytes_before
85            .saturating_sub(self.total_bytes_after)
86    }
87}
88
89/// Get the default stats file path: `~/.shift/stats.jsonl`.
90pub fn default_stats_path() -> Result<PathBuf> {
91    let home = std::env::var("HOME")
92        .or_else(|_| std::env::var("USERPROFILE"))
93        .context("could not determine home directory")?;
94    Ok(PathBuf::from(home).join(".shift").join("stats.jsonl"))
95}
96
97/// Append a run record to the stats file.
98pub fn record_run(record: &RunRecord, path: Option<&PathBuf>) -> Result<()> {
99    let stats_path = match path {
100        Some(p) => p.clone(),
101        None => default_stats_path()?,
102    };
103
104    // Ensure parent directory exists
105    if let Some(parent) = stats_path.parent() {
106        fs::create_dir_all(parent).context("failed to create ~/.shift directory")?;
107
108        // Reject symlinks on the directory (consistent with pipeline.rs profile path validation)
109        let dir_meta = fs::symlink_metadata(parent)
110            .with_context(|| format!("failed to stat {}", parent.display()))?;
111        if dir_meta.file_type().is_symlink() {
112            anyhow::bail!(
113                "stats directory {} is a symlink (possible symlink attack)",
114                parent.display()
115            );
116        }
117    }
118
119    // Reject symlinks on the stats file itself
120    if stats_path.exists() {
121        let file_meta = fs::symlink_metadata(&stats_path)
122            .with_context(|| format!("failed to stat {}", stats_path.display()))?;
123        if file_meta.file_type().is_symlink() {
124            anyhow::bail!(
125                "stats file {} is a symlink (possible symlink attack)",
126                stats_path.display()
127            );
128        }
129    }
130
131    let mut file = fs::OpenOptions::new()
132        .create(true)
133        .append(true)
134        .open(&stats_path)
135        .with_context(|| format!("failed to open stats file: {}", stats_path.display()))?;
136
137    // Serialize to a single buffer and write atomically to reduce interleave risk
138    let mut line = serde_json::to_string(record).context("failed to serialize run record")?;
139    line.push('\n');
140    file.write_all(line.as_bytes())
141        .context("failed to write to stats file")?;
142    file.flush().context("failed to flush stats file")?;
143
144    Ok(())
145}
146
147/// Result of loading stats records, including count of skipped malformed lines.
148pub struct LoadResult {
149    pub records: Vec<RunRecord>,
150    pub skipped_lines: usize,
151}
152
153/// Load all run records from the stats file.
154pub fn load_records(path: Option<&PathBuf>) -> Result<LoadResult> {
155    let stats_path = match path {
156        Some(p) => p.clone(),
157        None => default_stats_path()?,
158    };
159
160    if !stats_path.exists() {
161        return Ok(LoadResult {
162            records: Vec::new(),
163            skipped_lines: 0,
164        });
165    }
166
167    let file = fs::File::open(&stats_path)
168        .with_context(|| format!("failed to open stats file: {}", stats_path.display()))?;
169    let reader = BufReader::new(file);
170    let mut records = Vec::new();
171    let mut skipped_lines = 0;
172
173    for (i, line) in reader.lines().enumerate() {
174        let line = line.with_context(|| format!("failed to read line {} of stats file", i + 1))?;
175        let trimmed = line.trim();
176        if trimmed.is_empty() {
177            continue;
178        }
179        match serde_json::from_str::<RunRecord>(trimmed) {
180            Ok(record) => records.push(record),
181            Err(e) => {
182                // Skip malformed lines rather than failing
183                eprintln!(
184                    "shift: warning: skipping malformed stats line {}: {}",
185                    i + 1,
186                    e
187                );
188                skipped_lines += 1;
189            }
190        }
191    }
192
193    Ok(LoadResult {
194        records,
195        skipped_lines,
196    })
197}
198
199/// Compute aggregate gain summary from records.
200pub fn summarize(records: &[RunRecord]) -> GainSummary {
201    let mut s = GainSummary::default();
202    for r in records {
203        s.total_runs += 1;
204        s.total_images += r.images;
205        s.total_modified += r.modified;
206        s.total_bytes_before += r.bytes_before as u64;
207        s.total_bytes_after += r.bytes_after as u64;
208        s.total_openai_before += r.token_savings.openai_before;
209        s.total_openai_after += r.token_savings.openai_after;
210        s.total_anthropic_before += r.token_savings.anthropic_before;
211        s.total_anthropic_after += r.token_savings.anthropic_after;
212    }
213    s
214}
215
216/// Compute daily breakdown from records.
217pub fn daily_breakdown(records: &[RunRecord]) -> Vec<DailyGain> {
218    use std::collections::BTreeMap;
219
220    let mut days: BTreeMap<String, DailyGain> = BTreeMap::new();
221
222    for r in records {
223        let entry = days.entry(r.date.clone()).or_insert_with(|| DailyGain {
224            date: r.date.clone(),
225            runs: 0,
226            images: 0,
227            openai_saved: 0,
228            anthropic_saved: 0,
229        });
230        entry.runs += 1;
231        entry.images += r.images;
232        entry.openai_saved += r
233            .token_savings
234            .openai_before
235            .saturating_sub(r.token_savings.openai_after);
236        entry.anthropic_saved += r
237            .token_savings
238            .anthropic_before
239            .saturating_sub(r.token_savings.anthropic_after);
240    }
241
242    days.into_values().collect()
243}
244
245/// Build a RunRecord from a completed Report.
246pub fn record_from_report(report: &crate::report::Report, provider: &str) -> RunRecord {
247    // Get current timestamp
248    let now = std::time::SystemTime::now()
249        .duration_since(std::time::UNIX_EPOCH)
250        .unwrap_or_default()
251        .as_secs();
252
253    // Format as ISO 8601 (basic — no chrono dependency)
254    let secs_per_day = 86400;
255    let days_since_epoch = now / secs_per_day;
256    let secs_today = now % secs_per_day;
257    let hours = secs_today / 3600;
258    let minutes = (secs_today % 3600) / 60;
259    let seconds = secs_today % 60;
260
261    // Simple date calculation (approximate, good enough for stats)
262    let (year, month, day) = days_to_ymd(days_since_epoch);
263
264    let timestamp = format!(
265        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
266        year, month, day, hours, minutes, seconds
267    );
268    let date = format!("{:04}-{:02}-{:02}", year, month, day);
269
270    RunRecord {
271        timestamp,
272        date,
273        provider: provider.to_string(),
274        images: report.images_found,
275        modified: report.images_modified,
276        bytes_before: report.original_size,
277        bytes_after: report.transformed_size,
278        token_savings: report.token_savings.clone(),
279    }
280}
281
282/// Convert days since Unix epoch to (year, month, day).
283fn days_to_ymd(days: u64) -> (u64, u64, u64) {
284    // Simplified civil date calculation
285    let z = days + 719468;
286    let era = z / 146097;
287    let doe = z - era * 146097;
288    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
289    let y = yoe + era * 400;
290    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
291    let mp = (5 * doy + 2) / 153;
292    let d = doy - (153 * mp + 2) / 5 + 1;
293    let m = if mp < 10 { mp + 3 } else { mp - 9 };
294    let y = if m <= 2 { y + 1 } else { y };
295    (y, m, d)
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use crate::cost::TokenSavings;
302    use tempfile::NamedTempFile;
303
304    fn make_record(date: &str, openai_before: u64, openai_after: u64) -> RunRecord {
305        RunRecord {
306            timestamp: format!("{}T12:00:00Z", date),
307            date: date.to_string(),
308            provider: "openai".to_string(),
309            images: 3,
310            modified: 2,
311            bytes_before: 5_000_000,
312            bytes_after: 1_000_000,
313            token_savings: TokenSavings {
314                openai_before,
315                openai_after,
316                anthropic_before: 3000,
317                anthropic_after: 1000,
318            },
319        }
320    }
321
322    #[test]
323    fn test_record_and_load_roundtrip() {
324        let tmp = NamedTempFile::new().unwrap();
325        let path = tmp.path().to_path_buf();
326
327        let r1 = make_record("2026-04-20", 1000, 300);
328        let r2 = make_record("2026-04-21", 2000, 500);
329
330        record_run(&r1, Some(&path)).unwrap();
331        record_run(&r2, Some(&path)).unwrap();
332
333        let result = load_records(Some(&path)).unwrap();
334        assert_eq!(result.records.len(), 2);
335        assert_eq!(result.skipped_lines, 0);
336        assert_eq!(result.records[0].date, "2026-04-20");
337        assert_eq!(result.records[1].date, "2026-04-21");
338    }
339
340    #[test]
341    fn test_load_empty_file() {
342        let tmp = NamedTempFile::new().unwrap();
343        let path = tmp.path().to_path_buf();
344        let result = load_records(Some(&path)).unwrap();
345        assert!(result.records.is_empty());
346        assert_eq!(result.skipped_lines, 0);
347    }
348
349    #[test]
350    fn test_load_nonexistent_file() {
351        let path = PathBuf::from("/tmp/shift-test-nonexistent-stats.jsonl");
352        let result = load_records(Some(&path)).unwrap();
353        assert!(result.records.is_empty());
354        assert_eq!(result.skipped_lines, 0);
355    }
356
357    #[test]
358    fn test_summarize() {
359        let records = vec![
360            make_record("2026-04-20", 1000, 300),
361            make_record("2026-04-21", 2000, 500),
362        ];
363        let summary = summarize(&records);
364        assert_eq!(summary.total_runs, 2);
365        assert_eq!(summary.total_images, 6);
366        assert_eq!(summary.total_modified, 4);
367        assert_eq!(summary.total_openai_before, 3000);
368        assert_eq!(summary.total_openai_after, 800);
369        assert_eq!(summary.openai_saved(), 2200);
370    }
371
372    #[test]
373    fn test_daily_breakdown() {
374        let records = vec![
375            make_record("2026-04-20", 1000, 300),
376            make_record("2026-04-20", 500, 200),
377            make_record("2026-04-21", 2000, 500),
378        ];
379        let daily = daily_breakdown(&records);
380        assert_eq!(daily.len(), 2);
381        assert_eq!(daily[0].date, "2026-04-20");
382        assert_eq!(daily[0].runs, 2);
383        assert_eq!(daily[0].openai_saved, 1000); // (1000-300) + (500-200)
384        assert_eq!(daily[1].date, "2026-04-21");
385        assert_eq!(daily[1].runs, 1);
386    }
387
388    #[test]
389    fn test_summary_percentages() {
390        let summary = GainSummary {
391            total_openai_before: 10000,
392            total_openai_after: 3000,
393            total_anthropic_before: 5000,
394            total_anthropic_after: 1000,
395            ..Default::default()
396        };
397        assert!((summary.openai_pct() - 70.0).abs() < 0.1);
398        assert!((summary.anthropic_pct() - 80.0).abs() < 0.1);
399    }
400
401    #[test]
402    fn test_summary_zero_division() {
403        let summary = GainSummary::default();
404        assert_eq!(summary.openai_pct(), 0.0);
405        assert_eq!(summary.anthropic_pct(), 0.0);
406    }
407
408    #[test]
409    fn test_malformed_lines_skipped() {
410        let tmp = NamedTempFile::new().unwrap();
411        let path = tmp.path().to_path_buf();
412
413        // Write valid + invalid lines
414        let r = make_record("2026-04-20", 1000, 300);
415        record_run(&r, Some(&path)).unwrap();
416        // Append garbage
417        let mut f = fs::OpenOptions::new().append(true).open(&path).unwrap();
418        writeln!(f, "not json at all").unwrap();
419        writeln!(f, "{{\"partial\": true}}").unwrap();
420        // Write another valid record
421        record_run(&r, Some(&path)).unwrap();
422
423        let result = load_records(Some(&path)).unwrap();
424        assert_eq!(result.records.len(), 2); // only the 2 valid records
425        assert_eq!(result.skipped_lines, 2); // 2 malformed lines skipped
426    }
427
428    #[test]
429    fn test_record_from_report() {
430        let mut report = crate::report::Report::new();
431        report.images_found = 3;
432        report.images_modified = 2;
433        report.original_size = 5_000_000;
434        report.transformed_size = 1_000_000;
435        report.token_savings = TokenSavings {
436            openai_before: 2000,
437            openai_after: 500,
438            anthropic_before: 3000,
439            anthropic_after: 800,
440        };
441
442        let record = record_from_report(&report, "openai");
443        assert_eq!(record.provider, "openai");
444        assert_eq!(record.images, 3);
445        assert_eq!(record.modified, 2);
446        assert!(!record.timestamp.is_empty());
447        assert!(!record.date.is_empty());
448    }
449
450    #[test]
451    fn test_days_to_ymd() {
452        // Unix epoch
453        let (y, m, d) = days_to_ymd(0);
454        assert_eq!((y, m, d), (1970, 1, 1));
455
456        // Leap year: 2000-02-29 = day 11016
457        let (y, m, d) = days_to_ymd(11016);
458        assert_eq!((y, m, d), (2000, 2, 29));
459
460        // Day after leap day: 2000-03-01 = day 11017
461        let (y, m, d) = days_to_ymd(11017);
462        assert_eq!((y, m, d), (2000, 3, 1));
463
464        // Non-leap century year: 2100-02-28 = day 47540
465        let (y, m, d) = days_to_ymd(47540);
466        assert_eq!((y, m, d), (2100, 2, 28));
467
468        // 2100-03-01 = day 47541 (no Feb 29 in 2100)
469        let (y, m, d) = days_to_ymd(47541);
470        assert_eq!((y, m, d), (2100, 3, 1));
471
472        // Year boundary: 2025-12-31 = day 20453
473        let (y, m, d) = days_to_ymd(20453);
474        assert_eq!((y, m, d), (2025, 12, 31));
475
476        // 2026-01-01 = day 20454
477        let (y, m, d) = days_to_ymd(20454);
478        assert_eq!((y, m, d), (2026, 1, 1));
479    }
480
481    #[cfg(unix)]
482    #[test]
483    fn test_symlink_directory_rejected() {
484        use std::os::unix::fs as unix_fs;
485
486        let real_dir = tempfile::tempdir().unwrap();
487        let symlink_dir = tempfile::tempdir().unwrap();
488        let symlink_path = symlink_dir.path().join("symlinked-shift");
489
490        // Create symlink to real directory
491        unix_fs::symlink(real_dir.path(), &symlink_path).unwrap();
492
493        let stats_file = symlink_path.join("stats.jsonl");
494        let r = make_record("2026-04-22", 100, 50);
495        let result = record_run(&r, Some(&stats_file));
496
497        assert!(result.is_err());
498        let err_msg = format!("{}", result.unwrap_err());
499        assert!(
500            err_msg.contains("symlink"),
501            "expected symlink error, got: {}",
502            err_msg
503        );
504    }
505
506    #[cfg(unix)]
507    #[test]
508    fn test_symlink_file_rejected() {
509        use std::os::unix::fs as unix_fs;
510
511        let tmp_dir = tempfile::tempdir().unwrap();
512        let real_file = tmp_dir.path().join("real-stats.jsonl");
513        let symlink_file = tmp_dir.path().join("stats.jsonl");
514
515        // Create the real file
516        fs::write(&real_file, "").unwrap();
517        // Create symlink pointing to real file
518        unix_fs::symlink(&real_file, &symlink_file).unwrap();
519
520        let r = make_record("2026-04-22", 100, 50);
521        let result = record_run(&r, Some(&symlink_file));
522
523        assert!(result.is_err());
524        let err_msg = format!("{}", result.unwrap_err());
525        assert!(
526            err_msg.contains("symlink"),
527            "expected symlink error, got: {}",
528            err_msg
529        );
530    }
531
532    #[test]
533    fn test_skipped_lines_counted() {
534        let tmp = NamedTempFile::new().unwrap();
535        let path = tmp.path().to_path_buf();
536
537        let r = make_record("2026-04-22", 500, 200);
538        record_run(&r, Some(&path)).unwrap();
539
540        // Append 3 garbage lines
541        let mut f = fs::OpenOptions::new().append(true).open(&path).unwrap();
542        writeln!(f, "garbage1").unwrap();
543        writeln!(f, "garbage2").unwrap();
544        writeln!(f, "garbage3").unwrap();
545
546        record_run(&r, Some(&path)).unwrap();
547
548        let result = load_records(Some(&path)).unwrap();
549        assert_eq!(result.records.len(), 2);
550        assert_eq!(result.skipped_lines, 3);
551    }
552}