Skip to main content

shift_preflight/
stats.rs

1//! Persistent run statistics for cumulative token savings tracking.
2//!
3//! Stores one JSON line per SHIFT invocation in `~/.shift/stats.jsonl`.
4//! Inspired by RTK's `rtk gain` analytics system.
5
6use anyhow::{Context, Result};
7use serde::{Deserialize, Serialize};
8use std::fs;
9use std::io::{BufRead, BufReader, Write};
10use std::path::PathBuf;
11
12/// Maximum number of records to load from the stats file.
13/// Prevents unbounded memory allocation from huge/malicious files.
14const MAX_STATS_RECORDS: usize = 100_000;
15
16/// Maximum line length (bytes) to accept when reading the stats file.
17/// Lines longer than this are skipped as likely corrupt.
18const MAX_LINE_LENGTH: usize = 65_536;
19
20/// Records older than this many days are automatically purged.
21const RETENTION_DAYS: u64 = 90;
22
23use crate::cost::TokenSavings;
24
25/// A single run record persisted to the stats file.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct RunRecord {
28    /// ISO 8601 timestamp
29    pub timestamp: String,
30    /// Date portion (YYYY-MM-DD) for daily aggregation
31    pub date: String,
32    /// Provider used
33    pub provider: String,
34    /// Number of images processed
35    pub images: usize,
36    /// Number of images modified
37    pub modified: usize,
38    /// Number of images dropped (economy mode excess, SVG source mode)
39    #[serde(default)]
40    pub dropped: usize,
41    /// Number of SVGs rasterized
42    #[serde(default)]
43    pub svgs_rasterized: usize,
44    /// Byte sizes
45    pub bytes_before: usize,
46    pub bytes_after: usize,
47    /// Token savings
48    pub token_savings: TokenSavings,
49    /// Pipeline execution time in milliseconds
50    #[serde(default)]
51    pub duration_ms: u64,
52    /// Per-action counts: (action_name, count)
53    #[serde(default)]
54    pub action_counts: Vec<(String, usize)>,
55}
56
57/// Aggregated gain summary.
58#[derive(Debug, Clone, Default)]
59pub struct GainSummary {
60    pub total_runs: usize,
61    pub total_images: usize,
62    pub total_modified: usize,
63    pub total_bytes_before: u64,
64    pub total_bytes_after: u64,
65    pub total_openai_before: u64,
66    pub total_openai_after: u64,
67    pub total_anthropic_before: u64,
68    pub total_anthropic_after: u64,
69    pub total_duration_ms: u64,
70    /// Per-provider breakdown sorted by tokens saved descending.
71    pub by_provider: Vec<ProviderGain>,
72    /// Per-action breakdown sorted by count descending.
73    pub by_action: Vec<ActionGain>,
74}
75
76/// Per-provider aggregated stats.
77#[derive(Debug, Clone)]
78pub struct ProviderGain {
79    pub provider: String,
80    pub runs: usize,
81    pub images: usize,
82    pub tokens_saved: u64,
83    /// Aggregate savings percentage: (total_saved / total_before) * 100.
84    pub overall_pct: f64,
85    pub avg_duration_ms: u64,
86}
87
88/// Per-action aggregated stats.
89#[derive(Debug, Clone)]
90pub struct ActionGain {
91    pub action: String,
92    pub count: usize,
93}
94
95/// Daily aggregation bucket.
96#[derive(Debug, Clone)]
97pub struct DailyGain {
98    pub date: String,
99    pub runs: usize,
100    pub images: usize,
101    pub openai_saved: u64,
102    pub anthropic_saved: u64,
103}
104
105impl GainSummary {
106    pub fn openai_saved(&self) -> u64 {
107        self.total_openai_before
108            .saturating_sub(self.total_openai_after)
109    }
110
111    pub fn anthropic_saved(&self) -> u64 {
112        self.total_anthropic_before
113            .saturating_sub(self.total_anthropic_after)
114    }
115
116    pub fn openai_pct(&self) -> f64 {
117        if self.total_openai_before == 0 {
118            return 0.0;
119        }
120        (self.openai_saved() as f64 / self.total_openai_before as f64) * 100.0
121    }
122
123    pub fn anthropic_pct(&self) -> f64 {
124        if self.total_anthropic_before == 0 {
125            return 0.0;
126        }
127        (self.anthropic_saved() as f64 / self.total_anthropic_before as f64) * 100.0
128    }
129
130    pub fn bytes_saved(&self) -> u64 {
131        self.total_bytes_before
132            .saturating_sub(self.total_bytes_after)
133    }
134}
135
136/// Get the default stats file path: `~/.shift/stats.jsonl`.
137pub fn default_stats_path() -> Result<PathBuf> {
138    let home = std::env::var("HOME")
139        .or_else(|_| std::env::var("USERPROFILE"))
140        .context("could not determine home directory")?;
141    Ok(PathBuf::from(home).join(".shift").join("stats.jsonl"))
142}
143
144/// Acquire an advisory file lock on `<stats_path>.lock`.
145///
146/// Uses `flock(LOCK_EX)` on Unix to serialize concurrent writes and purges.
147/// The lock is released when the returned `File` handle is dropped.
148///
149/// On non-Unix platforms, returns `None` (best-effort — no locking).
150/// Errors acquiring the lock are silently ignored (stats recording is
151/// fire-and-forget; we never want to fail an API request because of stats).
152#[cfg(unix)]
153fn acquire_stats_lock(stats_path: &std::path::Path) -> Option<fs::File> {
154    let lock_path = stats_path.with_extension("lock");
155    let file = fs::OpenOptions::new()
156        .create(true)
157        .truncate(false)
158        .write(true)
159        .open(&lock_path)
160        .ok()?;
161
162    // LOCK_EX blocks until the lock is available (serializes concurrent callers).
163    // We use a short timeout via LOCK_NB first, falling back to blocking.
164    use std::os::unix::io::AsRawFd;
165    let fd = file.as_raw_fd();
166    let ret = unsafe { libc::flock(fd, libc::LOCK_EX) };
167    if ret != 0 {
168        // Lock failed — proceed without it (fire-and-forget)
169        return None;
170    }
171
172    Some(file)
173}
174
175#[cfg(not(unix))]
176fn acquire_stats_lock(_stats_path: &std::path::Path) -> Option<fs::File> {
177    None // Advisory locking not available on non-Unix
178}
179
180/// Append a run record to the stats file.
181pub fn record_run(record: &RunRecord, path: Option<&PathBuf>) -> Result<()> {
182    let stats_path = match path {
183        Some(p) => p.clone(),
184        None => default_stats_path()?,
185    };
186
187    // Ensure parent directory exists
188    if let Some(parent) = stats_path.parent() {
189        fs::create_dir_all(parent).context("failed to create ~/.shift directory")?;
190
191        // Reject symlinks on the directory (consistent with pipeline.rs profile path validation)
192        let dir_meta = fs::symlink_metadata(parent)
193            .with_context(|| format!("failed to stat {}", parent.display()))?;
194        if dir_meta.file_type().is_symlink() {
195            anyhow::bail!(
196                "stats directory {} is a symlink (possible symlink attack)",
197                parent.display()
198            );
199        }
200    }
201
202    // Acquire an advisory lock to serialize concurrent writes and purges.
203    // This prevents the race where a purge (read + atomic rename) loses
204    // records that were appended between the read and rename.
205    let _lock = acquire_stats_lock(&stats_path);
206
207    // Open the file with O_NOFOLLOW on Unix to atomically reject symlinks
208    // (avoids TOCTOU race between stat and open).
209    #[cfg(unix)]
210    let mut file = {
211        use std::os::unix::fs::OpenOptionsExt;
212        fs::OpenOptions::new()
213            .create(true)
214            .append(true)
215            .custom_flags(libc::O_NOFOLLOW)
216            .open(&stats_path)
217            .with_context(|| {
218                format!(
219                    "failed to open stats file: {} (symlinks are rejected)",
220                    stats_path.display()
221                )
222            })?
223    };
224
225    #[cfg(not(unix))]
226    let mut file = {
227        // Fallback: stat-then-open (TOCTOU risk, but best we can do on non-Unix)
228        if stats_path.exists() {
229            let file_meta = fs::symlink_metadata(&stats_path)
230                .with_context(|| format!("failed to stat {}", stats_path.display()))?;
231            if file_meta.file_type().is_symlink() {
232                anyhow::bail!(
233                    "stats file {} is a symlink (possible symlink attack)",
234                    stats_path.display()
235                );
236            }
237        }
238        fs::OpenOptions::new()
239            .create(true)
240            .append(true)
241            .open(&stats_path)
242            .with_context(|| format!("failed to open stats file: {}", stats_path.display()))?
243    };
244
245    // Serialize to a single buffer and write atomically to reduce interleave risk
246    let mut line = serde_json::to_string(record).context("failed to serialize run record")?;
247    line.push('\n');
248    file.write_all(line.as_bytes())
249        .context("failed to write to stats file")?;
250    file.flush().context("failed to flush stats file")?;
251
252    // Drop the file handle before purging (purge re-opens the file)
253    drop(file);
254
255    // Auto-purge records older than RETENTION_DAYS.
256    // Only run when the file exceeds 50KB to amortize the cost.
257    // The advisory lock is still held, so purge is safe from concurrent appends.
258    if let Ok(meta) = fs::metadata(&stats_path) {
259        if meta.len() > 50_000 {
260            if let Err(e) = purge_old_records(&stats_path) {
261                eprintln!("shift-ai: warning: auto-purge failed: {}", e);
262            }
263        }
264    }
265
266    // _lock dropped here — releases the advisory lock
267    Ok(())
268}
269
270/// Remove records older than RETENTION_DAYS from the stats file.
271///
272/// Reads all records, filters to those within the retention window,
273/// and rewrites the file atomically via a temp file + rename.
274///
275/// Uses `tempfile::NamedTempFile` for a unique temp filename (safe under
276/// concurrent invocations) and auto-cleanup on failure. Calls `sync_all()`
277/// before `persist()` to ensure data durability before the rename.
278pub fn purge_old_records(path: &PathBuf) -> Result<usize> {
279    let cutoff_date = {
280        let now_secs = std::time::SystemTime::now()
281            .duration_since(std::time::UNIX_EPOCH)
282            .unwrap_or_default()
283            .as_secs();
284        let cutoff_secs = now_secs.saturating_sub(RETENTION_DAYS * 86400);
285        let (y, m, d) = days_to_ymd(cutoff_secs / 86400);
286        format!("{:04}-{:02}-{:02}", y, m, d)
287    };
288
289    let load_result = load_records(Some(path))?;
290    let total = load_result.records.len();
291    let kept: Vec<&RunRecord> = load_result
292        .records
293        .iter()
294        .filter(|r| r.date >= cutoff_date)
295        .collect();
296    let purged = total - kept.len();
297
298    if purged == 0 {
299        return Ok(0);
300    }
301
302    // Write to a unique temp file in the same directory. NamedTempFile
303    // auto-deletes on drop if persist() is never called, so crashes and
304    // errors never leave orphaned files behind.
305    let parent = path
306        .parent()
307        .context("stats file has no parent directory")?;
308    let mut tmp_file =
309        tempfile::NamedTempFile::new_in(parent).context("failed to create temp file for purge")?;
310
311    for record in &kept {
312        let mut line = serde_json::to_string(record)?;
313        line.push('\n');
314        tmp_file.write_all(line.as_bytes())?;
315    }
316    tmp_file.flush()?;
317
318    // Ensure data reaches persistent storage before the atomic rename
319    tmp_file
320        .as_file()
321        .sync_all()
322        .context("failed to sync temp file")?;
323
324    // persist() atomically renames the temp file over the target path.
325    // On failure the temp file is still auto-cleaned up by Drop.
326    tmp_file
327        .persist(path)
328        .context("failed to rename purged stats file")?;
329
330    Ok(purged)
331}
332
333/// Result of loading stats records, including count of skipped malformed lines.
334pub struct LoadResult {
335    pub records: Vec<RunRecord>,
336    pub skipped_lines: usize,
337}
338
339/// Load all run records from the stats file.
340pub fn load_records(path: Option<&PathBuf>) -> Result<LoadResult> {
341    let stats_path = match path {
342        Some(p) => p.clone(),
343        None => default_stats_path()?,
344    };
345
346    if !stats_path.exists() {
347        return Ok(LoadResult {
348            records: Vec::new(),
349            skipped_lines: 0,
350        });
351    }
352
353    let file = fs::File::open(&stats_path)
354        .with_context(|| format!("failed to open stats file: {}", stats_path.display()))?;
355    let reader = BufReader::new(file);
356    let mut records = Vec::new();
357    let mut skipped_lines = 0;
358
359    for (i, line) in reader.lines().enumerate() {
360        let line = line.with_context(|| format!("failed to read line {} of stats file", i + 1))?;
361        let trimmed = line.trim();
362        if trimmed.is_empty() {
363            continue;
364        }
365        // Skip excessively long lines (likely corrupt)
366        if trimmed.len() > MAX_LINE_LENGTH {
367            eprintln!(
368                "shift-ai: warning: skipping oversized stats line {} ({} bytes)",
369                i + 1,
370                trimmed.len()
371            );
372            skipped_lines += 1;
373            continue;
374        }
375        match serde_json::from_str::<RunRecord>(trimmed) {
376            Ok(record) => records.push(record),
377            Err(e) => {
378                // Skip malformed lines rather than failing
379                eprintln!(
380                    "shift-ai: warning: skipping malformed stats line {}: {}",
381                    i + 1,
382                    e
383                );
384                skipped_lines += 1;
385            }
386        }
387        // Cap total records to prevent unbounded memory allocation
388        if records.len() >= MAX_STATS_RECORDS {
389            eprintln!(
390                "shift-ai: warning: stats file has >{} entries, loading only the first {}",
391                MAX_STATS_RECORDS, MAX_STATS_RECORDS
392            );
393            break;
394        }
395    }
396
397    Ok(LoadResult {
398        records,
399        skipped_lines,
400    })
401}
402
403/// Compute aggregate gain summary from records.
404pub fn summarize(records: &[RunRecord]) -> GainSummary {
405    use std::collections::BTreeMap;
406
407    let mut s = GainSummary::default();
408
409    // Per-provider accumulators: (runs, images, tokens_before, tokens_after, total_duration_ms)
410    let mut providers: BTreeMap<String, (usize, usize, u64, u64, u64)> = BTreeMap::new();
411    // Per-action accumulators
412    let mut actions: BTreeMap<String, usize> = BTreeMap::new();
413
414    for r in records {
415        s.total_runs += 1;
416        s.total_images += r.images;
417        s.total_modified += r.modified;
418        s.total_bytes_before += r.bytes_before as u64;
419        s.total_bytes_after += r.bytes_after as u64;
420        s.total_openai_before += r.token_savings.openai_before;
421        s.total_openai_after += r.token_savings.openai_after;
422        s.total_anthropic_before += r.token_savings.anthropic_before;
423        s.total_anthropic_after += r.token_savings.anthropic_after;
424        s.total_duration_ms += r.duration_ms;
425
426        // Per-provider
427        let entry = providers.entry(r.provider.clone()).or_default();
428        entry.0 += 1; // runs
429        entry.1 += r.images; // images
430                             // Use the matching provider's tokens (case-insensitive)
431        let provider_lower = r.provider.to_ascii_lowercase();
432        let (before, after) = if provider_lower == "anthropic" {
433            (
434                r.token_savings.anthropic_before,
435                r.token_savings.anthropic_after,
436            )
437        } else {
438            // Default to OpenAI tokens for "openai" and any unknown providers
439            (r.token_savings.openai_before, r.token_savings.openai_after)
440        };
441        entry.2 += before;
442        entry.3 += after;
443        entry.4 += r.duration_ms;
444
445        // Per-action
446        for (action, count) in &r.action_counts {
447            *actions.entry(action.clone()).or_default() += count;
448        }
449    }
450
451    // Build per-provider vec sorted by tokens saved descending
452    let mut by_provider: Vec<ProviderGain> = providers
453        .into_iter()
454        .map(|(name, (runs, images, before, after, dur))| {
455            let saved = before.saturating_sub(after);
456            let overall_pct = if before > 0 {
457                (saved as f64 / before as f64) * 100.0
458            } else {
459                0.0
460            };
461            let avg_dur = if runs > 0 { dur / runs as u64 } else { 0 };
462            ProviderGain {
463                provider: name,
464                runs,
465                images,
466                tokens_saved: saved,
467                overall_pct,
468                avg_duration_ms: avg_dur,
469            }
470        })
471        .collect();
472    by_provider.sort_by_key(|b| std::cmp::Reverse(b.tokens_saved));
473    s.by_provider = by_provider;
474
475    // Build per-action vec sorted by count descending
476    let mut by_action: Vec<ActionGain> = actions
477        .into_iter()
478        .map(|(action, count)| ActionGain { action, count })
479        .collect();
480    by_action.sort_by_key(|b| std::cmp::Reverse(b.count));
481    s.by_action = by_action;
482
483    s
484}
485
486/// Compute daily breakdown from records.
487pub fn daily_breakdown(records: &[RunRecord]) -> Vec<DailyGain> {
488    use std::collections::BTreeMap;
489
490    let mut days: BTreeMap<String, DailyGain> = BTreeMap::new();
491
492    for r in records {
493        let entry = days.entry(r.date.clone()).or_insert_with(|| DailyGain {
494            date: r.date.clone(),
495            runs: 0,
496            images: 0,
497            openai_saved: 0,
498            anthropic_saved: 0,
499        });
500        entry.runs += 1;
501        entry.images += r.images;
502        entry.openai_saved += r
503            .token_savings
504            .openai_before
505            .saturating_sub(r.token_savings.openai_after);
506        entry.anthropic_saved += r
507            .token_savings
508            .anthropic_before
509            .saturating_sub(r.token_savings.anthropic_after);
510    }
511
512    days.into_values().collect()
513}
514
515/// Build a RunRecord from a completed Report.
516pub fn record_from_report(
517    report: &crate::report::Report,
518    provider: &str,
519    duration_ms: u64,
520) -> RunRecord {
521    // Get current timestamp
522    let now = std::time::SystemTime::now()
523        .duration_since(std::time::UNIX_EPOCH)
524        .unwrap_or_default()
525        .as_secs();
526
527    // Format as ISO 8601 (basic — no chrono dependency)
528    let secs_per_day = 86400;
529    let days_since_epoch = now / secs_per_day;
530    let secs_today = now % secs_per_day;
531    let hours = secs_today / 3600;
532    let minutes = (secs_today % 3600) / 60;
533    let seconds = secs_today % 60;
534
535    // Civil date calculation (Hinnant algorithm, exact for proleptic Gregorian calendar)
536    let (year, month, day) = days_to_ymd(days_since_epoch);
537
538    let timestamp = format!(
539        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
540        year, month, day, hours, minutes, seconds
541    );
542    let date = format!("{:04}-{:02}-{:02}", year, month, day);
543
544    // Compute per-action counts from action records
545    let mut action_map = std::collections::BTreeMap::new();
546    for a in &report.actions {
547        *action_map.entry(a.action.clone()).or_insert(0usize) += 1;
548    }
549    let action_counts: Vec<(String, usize)> = action_map.into_iter().collect();
550
551    RunRecord {
552        timestamp,
553        date,
554        provider: provider.to_string(),
555        images: report.images_found,
556        modified: report.images_modified,
557        dropped: report.images_dropped,
558        svgs_rasterized: report.svgs_rasterized,
559        bytes_before: report.original_size,
560        bytes_after: report.transformed_size,
561        token_savings: report.token_savings.clone(),
562        duration_ms,
563        action_counts,
564    }
565}
566
567/// Convert days since Unix epoch to (year, month, day).
568fn days_to_ymd(days: u64) -> (u64, u64, u64) {
569    // Simplified civil date calculation
570    let z = days + 719468;
571    let era = z / 146097;
572    let doe = z - era * 146097;
573    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
574    let y = yoe + era * 400;
575    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
576    let mp = (5 * doy + 2) / 153;
577    let d = doy - (153 * mp + 2) / 5 + 1;
578    let m = if mp < 10 { mp + 3 } else { mp - 9 };
579    let y = if m <= 2 { y + 1 } else { y };
580    (y, m, d)
581}
582
583#[cfg(test)]
584mod tests {
585    use super::*;
586    use crate::cost::TokenSavings;
587    use tempfile::NamedTempFile;
588
589    fn make_record(date: &str, openai_before: u64, openai_after: u64) -> RunRecord {
590        RunRecord {
591            timestamp: format!("{}T12:00:00Z", date),
592            date: date.to_string(),
593            provider: "openai".to_string(),
594            images: 3,
595            modified: 2,
596            dropped: 0,
597            svgs_rasterized: 0,
598            bytes_before: 5_000_000,
599            bytes_after: 1_000_000,
600            token_savings: TokenSavings {
601                openai_before,
602                openai_after,
603                anthropic_before: 3000,
604                anthropic_after: 1000,
605            },
606            duration_ms: 500,
607            action_counts: vec![("resize".to_string(), 2)],
608        }
609    }
610
611    #[test]
612    fn test_record_and_load_roundtrip() {
613        let tmp = NamedTempFile::new().unwrap();
614        let path = tmp.path().to_path_buf();
615
616        let r1 = make_record("2026-04-20", 1000, 300);
617        let r2 = make_record("2026-04-21", 2000, 500);
618
619        record_run(&r1, Some(&path)).unwrap();
620        record_run(&r2, Some(&path)).unwrap();
621
622        let result = load_records(Some(&path)).unwrap();
623        assert_eq!(result.records.len(), 2);
624        assert_eq!(result.skipped_lines, 0);
625        assert_eq!(result.records[0].date, "2026-04-20");
626        assert_eq!(result.records[1].date, "2026-04-21");
627    }
628
629    #[test]
630    fn test_load_empty_file() {
631        let tmp = NamedTempFile::new().unwrap();
632        let path = tmp.path().to_path_buf();
633        let result = load_records(Some(&path)).unwrap();
634        assert!(result.records.is_empty());
635        assert_eq!(result.skipped_lines, 0);
636    }
637
638    #[test]
639    fn test_load_nonexistent_file() {
640        let path = PathBuf::from("/tmp/shift-test-nonexistent-stats.jsonl");
641        let result = load_records(Some(&path)).unwrap();
642        assert!(result.records.is_empty());
643        assert_eq!(result.skipped_lines, 0);
644    }
645
646    #[test]
647    fn test_summarize() {
648        let records = vec![
649            make_record("2026-04-20", 1000, 300),
650            make_record("2026-04-21", 2000, 500),
651        ];
652        let summary = summarize(&records);
653        assert_eq!(summary.total_runs, 2);
654        assert_eq!(summary.total_images, 6);
655        assert_eq!(summary.total_modified, 4);
656        assert_eq!(summary.total_openai_before, 3000);
657        assert_eq!(summary.total_openai_after, 800);
658        assert_eq!(summary.openai_saved(), 2200);
659    }
660
661    #[test]
662    fn test_daily_breakdown() {
663        let records = vec![
664            make_record("2026-04-20", 1000, 300),
665            make_record("2026-04-20", 500, 200),
666            make_record("2026-04-21", 2000, 500),
667        ];
668        let daily = daily_breakdown(&records);
669        assert_eq!(daily.len(), 2);
670        assert_eq!(daily[0].date, "2026-04-20");
671        assert_eq!(daily[0].runs, 2);
672        assert_eq!(daily[0].openai_saved, 1000); // (1000-300) + (500-200)
673        assert_eq!(daily[1].date, "2026-04-21");
674        assert_eq!(daily[1].runs, 1);
675    }
676
677    #[test]
678    fn test_summary_percentages() {
679        let summary = GainSummary {
680            total_openai_before: 10000,
681            total_openai_after: 3000,
682            total_anthropic_before: 5000,
683            total_anthropic_after: 1000,
684            ..Default::default()
685        };
686        assert!((summary.openai_pct() - 70.0).abs() < 0.1);
687        assert!((summary.anthropic_pct() - 80.0).abs() < 0.1);
688    }
689
690    #[test]
691    fn test_summary_zero_division() {
692        let summary = GainSummary::default();
693        assert_eq!(summary.openai_pct(), 0.0);
694        assert_eq!(summary.anthropic_pct(), 0.0);
695    }
696
697    #[test]
698    fn test_malformed_lines_skipped() {
699        let tmp = NamedTempFile::new().unwrap();
700        let path = tmp.path().to_path_buf();
701
702        // Write valid + invalid lines
703        let r = make_record("2026-04-20", 1000, 300);
704        record_run(&r, Some(&path)).unwrap();
705        // Append garbage
706        let mut f = fs::OpenOptions::new().append(true).open(&path).unwrap();
707        writeln!(f, "not json at all").unwrap();
708        writeln!(f, "{{\"partial\": true}}").unwrap();
709        // Write another valid record
710        record_run(&r, Some(&path)).unwrap();
711
712        let result = load_records(Some(&path)).unwrap();
713        assert_eq!(result.records.len(), 2); // only the 2 valid records
714        assert_eq!(result.skipped_lines, 2); // 2 malformed lines skipped
715    }
716
717    #[test]
718    fn test_record_from_report() {
719        let mut report = crate::report::Report::new();
720        report.images_found = 3;
721        report.images_modified = 2;
722        report.original_size = 5_000_000;
723        report.transformed_size = 1_000_000;
724        report.token_savings = TokenSavings {
725            openai_before: 2000,
726            openai_after: 500,
727            anthropic_before: 3000,
728            anthropic_after: 800,
729        };
730
731        let record = record_from_report(&report, "openai", 1234);
732        assert_eq!(record.provider, "openai");
733        assert_eq!(record.images, 3);
734        assert_eq!(record.modified, 2);
735        assert_eq!(record.duration_ms, 1234);
736        assert!(!record.timestamp.is_empty());
737        assert!(!record.date.is_empty());
738    }
739
740    #[test]
741    fn test_days_to_ymd() {
742        // Unix epoch
743        let (y, m, d) = days_to_ymd(0);
744        assert_eq!((y, m, d), (1970, 1, 1));
745
746        // Leap year: 2000-02-29 = day 11016
747        let (y, m, d) = days_to_ymd(11016);
748        assert_eq!((y, m, d), (2000, 2, 29));
749
750        // Day after leap day: 2000-03-01 = day 11017
751        let (y, m, d) = days_to_ymd(11017);
752        assert_eq!((y, m, d), (2000, 3, 1));
753
754        // Non-leap century year: 2100-02-28 = day 47540
755        let (y, m, d) = days_to_ymd(47540);
756        assert_eq!((y, m, d), (2100, 2, 28));
757
758        // 2100-03-01 = day 47541 (no Feb 29 in 2100)
759        let (y, m, d) = days_to_ymd(47541);
760        assert_eq!((y, m, d), (2100, 3, 1));
761
762        // Year boundary: 2025-12-31 = day 20453
763        let (y, m, d) = days_to_ymd(20453);
764        assert_eq!((y, m, d), (2025, 12, 31));
765
766        // 2026-01-01 = day 20454
767        let (y, m, d) = days_to_ymd(20454);
768        assert_eq!((y, m, d), (2026, 1, 1));
769    }
770
771    #[cfg(unix)]
772    #[test]
773    fn test_symlink_directory_rejected() {
774        use std::os::unix::fs as unix_fs;
775
776        let real_dir = tempfile::tempdir().unwrap();
777        let symlink_dir = tempfile::tempdir().unwrap();
778        let symlink_path = symlink_dir.path().join("symlinked-shift");
779
780        // Create symlink to real directory
781        unix_fs::symlink(real_dir.path(), &symlink_path).unwrap();
782
783        let stats_file = symlink_path.join("stats.jsonl");
784        let r = make_record("2026-04-22", 100, 50);
785        let result = record_run(&r, Some(&stats_file));
786
787        assert!(result.is_err());
788        let err_msg = format!("{}", result.unwrap_err());
789        assert!(
790            err_msg.contains("symlink"),
791            "expected symlink error, got: {}",
792            err_msg
793        );
794    }
795
796    #[cfg(unix)]
797    #[test]
798    fn test_symlink_file_rejected() {
799        use std::os::unix::fs as unix_fs;
800
801        let tmp_dir = tempfile::tempdir().unwrap();
802        let real_file = tmp_dir.path().join("real-stats.jsonl");
803        let symlink_file = tmp_dir.path().join("stats.jsonl");
804
805        // Create the real file
806        fs::write(&real_file, "").unwrap();
807        // Create symlink pointing to real file
808        unix_fs::symlink(&real_file, &symlink_file).unwrap();
809
810        let r = make_record("2026-04-22", 100, 50);
811        let result = record_run(&r, Some(&symlink_file));
812
813        assert!(result.is_err());
814        let err_msg = format!("{}", result.unwrap_err());
815        assert!(
816            err_msg.contains("symlink"),
817            "expected symlink error, got: {}",
818            err_msg
819        );
820    }
821
822    #[test]
823    fn test_skipped_lines_counted() {
824        let tmp = NamedTempFile::new().unwrap();
825        let path = tmp.path().to_path_buf();
826
827        let r = make_record("2026-04-22", 500, 200);
828        record_run(&r, Some(&path)).unwrap();
829
830        // Append 3 garbage lines
831        let mut f = fs::OpenOptions::new().append(true).open(&path).unwrap();
832        writeln!(f, "garbage1").unwrap();
833        writeln!(f, "garbage2").unwrap();
834        writeln!(f, "garbage3").unwrap();
835
836        record_run(&r, Some(&path)).unwrap();
837
838        let result = load_records(Some(&path)).unwrap();
839        assert_eq!(result.records.len(), 2);
840        assert_eq!(result.skipped_lines, 3);
841    }
842
843    // ── Helpers for multi-provider tests ─────────────────────────────
844
845    fn make_anthropic_record(date: &str, anthropic_before: u64, anthropic_after: u64) -> RunRecord {
846        RunRecord {
847            timestamp: format!("{}T12:00:00Z", date),
848            date: date.to_string(),
849            provider: "anthropic".to_string(),
850            images: 2,
851            modified: 1,
852            dropped: 0,
853            svgs_rasterized: 0,
854            bytes_before: 3_000_000,
855            bytes_after: 800_000,
856            token_savings: TokenSavings {
857                openai_before: 500,
858                openai_after: 200,
859                anthropic_before,
860                anthropic_after,
861            },
862            duration_ms: 300,
863            action_counts: vec![("recompress".to_string(), 1)],
864        }
865    }
866
867    fn make_record_with_actions(date: &str, actions: Vec<(String, usize)>) -> RunRecord {
868        RunRecord {
869            action_counts: actions,
870            ..make_record(date, 1000, 300)
871        }
872    }
873
874    // ── Purge tests ──────────────────────────────────────────────────
875
876    #[test]
877    fn test_purge_removes_old_records() {
878        let tmp = NamedTempFile::new().unwrap();
879        let path = tmp.path().to_path_buf();
880
881        // Write a record with a very old date (should be purged)
882        let old = make_record("2020-01-01", 1000, 300);
883        record_run(&old, Some(&path)).unwrap();
884
885        // Write a record with today's date (should be kept)
886        let now_secs = std::time::SystemTime::now()
887            .duration_since(std::time::UNIX_EPOCH)
888            .unwrap()
889            .as_secs();
890        let (y, m, d) = days_to_ymd(now_secs / 86400);
891        let today = format!("{:04}-{:02}-{:02}", y, m, d);
892        let recent = make_record(&today, 2000, 500);
893        record_run(&recent, Some(&path)).unwrap();
894
895        let purged = purge_old_records(&path).unwrap();
896        assert_eq!(purged, 1);
897
898        let result = load_records(Some(&path)).unwrap();
899        assert_eq!(result.records.len(), 1);
900        assert_eq!(result.records[0].date, today);
901    }
902
903    #[test]
904    fn test_purge_no_op_when_nothing_to_purge() {
905        let tmp = NamedTempFile::new().unwrap();
906        let path = tmp.path().to_path_buf();
907
908        let now_secs = std::time::SystemTime::now()
909            .duration_since(std::time::UNIX_EPOCH)
910            .unwrap()
911            .as_secs();
912        let (y, m, d) = days_to_ymd(now_secs / 86400);
913        let today = format!("{:04}-{:02}-{:02}", y, m, d);
914
915        let r = make_record(&today, 1000, 300);
916        record_run(&r, Some(&path)).unwrap();
917
918        let purged = purge_old_records(&path).unwrap();
919        assert_eq!(purged, 0);
920
921        let result = load_records(Some(&path)).unwrap();
922        assert_eq!(result.records.len(), 1);
923    }
924
925    #[test]
926    fn test_purge_all_records_expired() {
927        let tmp = NamedTempFile::new().unwrap();
928        let path = tmp.path().to_path_buf();
929
930        let old1 = make_record("2019-01-01", 1000, 300);
931        let old2 = make_record("2019-06-15", 2000, 500);
932        record_run(&old1, Some(&path)).unwrap();
933        record_run(&old2, Some(&path)).unwrap();
934
935        let purged = purge_old_records(&path).unwrap();
936        assert_eq!(purged, 2);
937
938        let result = load_records(Some(&path)).unwrap();
939        assert_eq!(result.records.len(), 0);
940    }
941
942    #[test]
943    fn test_purge_preserves_record_data() {
944        let tmp = NamedTempFile::new().unwrap();
945        let path = tmp.path().to_path_buf();
946
947        let now_secs = std::time::SystemTime::now()
948            .duration_since(std::time::UNIX_EPOCH)
949            .unwrap()
950            .as_secs();
951        let (y, m, d) = days_to_ymd(now_secs / 86400);
952        let today = format!("{:04}-{:02}-{:02}", y, m, d);
953
954        let r = RunRecord {
955            provider: "anthropic".to_string(),
956            images: 7,
957            modified: 5,
958            duration_ms: 1234,
959            action_counts: vec![("resize".to_string(), 3), ("convert".to_string(), 2)],
960            ..make_record(&today, 5000, 1500)
961        };
962        // Also write an old record to force purge to actually rewrite
963        let old = make_record("2020-01-01", 100, 50);
964        record_run(&old, Some(&path)).unwrap();
965        record_run(&r, Some(&path)).unwrap();
966
967        let purged = purge_old_records(&path).unwrap();
968        assert_eq!(purged, 1);
969
970        let result = load_records(Some(&path)).unwrap();
971        assert_eq!(result.records.len(), 1);
972        let kept = &result.records[0];
973        assert_eq!(kept.provider, "anthropic");
974        assert_eq!(kept.images, 7);
975        assert_eq!(kept.modified, 5);
976        assert_eq!(kept.duration_ms, 1234);
977        assert_eq!(kept.action_counts.len(), 2);
978    }
979
980    // ── Per-provider summarization tests ─────────────────────────────
981
982    #[test]
983    fn test_summarize_by_provider() {
984        let records = vec![
985            make_record("2026-04-20", 1000, 300),            // openai
986            make_record("2026-04-21", 2000, 500),            // openai
987            make_anthropic_record("2026-04-20", 4000, 1000), // anthropic
988        ];
989        let summary = summarize(&records);
990
991        assert_eq!(summary.by_provider.len(), 2);
992
993        // Sorted by tokens_saved descending
994        // anthropic: 4000-1000 = 3000
995        // openai:    (1000-300) + (2000-500) = 700 + 1500 = 2200
996        assert_eq!(summary.by_provider[0].provider, "anthropic");
997        assert_eq!(summary.by_provider[0].tokens_saved, 3000);
998        assert_eq!(summary.by_provider[0].runs, 1);
999        assert_eq!(summary.by_provider[0].images, 2);
1000        assert!((summary.by_provider[0].overall_pct - 75.0).abs() < 0.1);
1001
1002        assert_eq!(summary.by_provider[1].provider, "openai");
1003        assert_eq!(summary.by_provider[1].tokens_saved, 2200);
1004        assert_eq!(summary.by_provider[1].runs, 2);
1005        assert_eq!(summary.by_provider[1].images, 6);
1006    }
1007
1008    #[test]
1009    fn test_summarize_single_provider() {
1010        let records = vec![make_record("2026-04-20", 1000, 300)];
1011        let summary = summarize(&records);
1012        assert_eq!(summary.by_provider.len(), 1);
1013        assert_eq!(summary.by_provider[0].provider, "openai");
1014        assert_eq!(summary.by_provider[0].tokens_saved, 700);
1015    }
1016
1017    #[test]
1018    fn test_summarize_provider_duration() {
1019        let records = vec![
1020            make_record("2026-04-20", 1000, 300), // duration_ms = 500
1021            make_record("2026-04-21", 2000, 500), // duration_ms = 500
1022        ];
1023        let summary = summarize(&records);
1024        assert_eq!(summary.by_provider[0].avg_duration_ms, 500); // 1000 total / 2 runs
1025        assert_eq!(summary.total_duration_ms, 1000);
1026    }
1027
1028    // ── Per-action summarization tests ───────────────────────────────
1029
1030    #[test]
1031    fn test_summarize_by_action() {
1032        let records = vec![
1033            make_record_with_actions(
1034                "2026-04-20",
1035                vec![("resize".to_string(), 3), ("convert".to_string(), 1)],
1036            ),
1037            make_record_with_actions(
1038                "2026-04-21",
1039                vec![("resize".to_string(), 2), ("recompress".to_string(), 4)],
1040            ),
1041        ];
1042        let summary = summarize(&records);
1043
1044        assert_eq!(summary.by_action.len(), 3);
1045        // Sorted by count descending: resize=5, recompress=4, convert=1
1046        assert_eq!(summary.by_action[0].action, "resize");
1047        assert_eq!(summary.by_action[0].count, 5);
1048        assert_eq!(summary.by_action[1].action, "recompress");
1049        assert_eq!(summary.by_action[1].count, 4);
1050        assert_eq!(summary.by_action[2].action, "convert");
1051        assert_eq!(summary.by_action[2].count, 1);
1052    }
1053
1054    #[test]
1055    fn test_summarize_empty_actions() {
1056        let mut r = make_record("2026-04-20", 1000, 300);
1057        r.action_counts = vec![];
1058        let summary = summarize(&[r]);
1059        assert!(summary.by_action.is_empty());
1060    }
1061
1062    #[test]
1063    fn test_summarize_empty_records() {
1064        let summary = summarize(&[]);
1065        assert_eq!(summary.total_runs, 0);
1066        assert!(summary.by_provider.is_empty());
1067        assert!(summary.by_action.is_empty());
1068    }
1069}