Skip to main content

shift_preflight/
stats.rs

1//! Persistent run statistics for cumulative token savings tracking.
2//!
3//! Stores one JSON line per SHIFT invocation in `~/.shift/stats.jsonl`.
4//! Inspired by RTK's `rtk gain` analytics system.
5
6use anyhow::{Context, Result};
7use serde::{Deserialize, Serialize};
8use std::fs;
9use std::io::{BufRead, BufReader, Write};
10use std::path::PathBuf;
11
12/// Maximum number of records to load from the stats file.
13/// Prevents unbounded memory allocation from huge/malicious files.
14const MAX_STATS_RECORDS: usize = 100_000;
15
16/// Maximum line length (bytes) to accept when reading the stats file.
17/// Lines longer than this are skipped as likely corrupt.
18const MAX_LINE_LENGTH: usize = 65_536;
19
20/// Records older than this many days are automatically purged.
21const RETENTION_DAYS: u64 = 90;
22
23use crate::cost::TokenSavings;
24
25/// A single run record persisted to the stats file.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct RunRecord {
28    /// ISO 8601 timestamp
29    pub timestamp: String,
30    /// Date portion (YYYY-MM-DD) for daily aggregation
31    pub date: String,
32    /// Provider used
33    pub provider: String,
34    /// Number of images processed
35    pub images: usize,
36    /// Number of images modified
37    pub modified: usize,
38    /// Number of images dropped (economy mode excess, SVG source mode)
39    #[serde(default)]
40    pub dropped: usize,
41    /// Number of SVGs rasterized
42    #[serde(default)]
43    pub svgs_rasterized: usize,
44    /// Byte sizes
45    pub bytes_before: usize,
46    pub bytes_after: usize,
47    /// Token savings
48    pub token_savings: TokenSavings,
49    /// Pipeline execution time in milliseconds
50    #[serde(default)]
51    pub duration_ms: u64,
52    /// Per-action counts: (action_name, count)
53    #[serde(default)]
54    pub action_counts: Vec<(String, usize)>,
55}
56
57/// Aggregated gain summary.
58#[derive(Debug, Clone, Default)]
59pub struct GainSummary {
60    pub total_runs: usize,
61    pub total_images: usize,
62    pub total_modified: usize,
63    pub total_bytes_before: u64,
64    pub total_bytes_after: u64,
65    pub total_openai_before: u64,
66    pub total_openai_after: u64,
67    pub total_anthropic_before: u64,
68    pub total_anthropic_after: u64,
69    pub total_duration_ms: u64,
70    /// Per-provider breakdown sorted by tokens saved descending.
71    pub by_provider: Vec<ProviderGain>,
72    /// Per-action breakdown sorted by count descending.
73    pub by_action: Vec<ActionGain>,
74}
75
76/// Per-provider aggregated stats.
77#[derive(Debug, Clone)]
78pub struct ProviderGain {
79    pub provider: String,
80    pub runs: usize,
81    pub images: usize,
82    pub tokens_saved: u64,
83    /// Aggregate savings percentage: (total_saved / total_before) * 100.
84    pub overall_pct: f64,
85    pub avg_duration_ms: u64,
86}
87
88/// Per-action aggregated stats.
89#[derive(Debug, Clone)]
90pub struct ActionGain {
91    pub action: String,
92    pub count: usize,
93}
94
95/// Daily aggregation bucket.
96#[derive(Debug, Clone)]
97pub struct DailyGain {
98    pub date: String,
99    pub runs: usize,
100    pub images: usize,
101    pub openai_saved: u64,
102    pub anthropic_saved: u64,
103}
104
105impl GainSummary {
106    pub fn openai_saved(&self) -> u64 {
107        self.total_openai_before
108            .saturating_sub(self.total_openai_after)
109    }
110
111    pub fn anthropic_saved(&self) -> u64 {
112        self.total_anthropic_before
113            .saturating_sub(self.total_anthropic_after)
114    }
115
116    pub fn openai_pct(&self) -> f64 {
117        if self.total_openai_before == 0 {
118            return 0.0;
119        }
120        (self.openai_saved() as f64 / self.total_openai_before as f64) * 100.0
121    }
122
123    pub fn anthropic_pct(&self) -> f64 {
124        if self.total_anthropic_before == 0 {
125            return 0.0;
126        }
127        (self.anthropic_saved() as f64 / self.total_anthropic_before as f64) * 100.0
128    }
129
130    pub fn bytes_saved(&self) -> u64 {
131        self.total_bytes_before
132            .saturating_sub(self.total_bytes_after)
133    }
134}
135
136/// Get the default stats file path: `~/.shift/stats.jsonl`.
137pub fn default_stats_path() -> Result<PathBuf> {
138    let home = std::env::var("HOME")
139        .or_else(|_| std::env::var("USERPROFILE"))
140        .context("could not determine home directory")?;
141    Ok(PathBuf::from(home).join(".shift").join("stats.jsonl"))
142}
143
144/// Append a run record to the stats file.
145pub fn record_run(record: &RunRecord, path: Option<&PathBuf>) -> Result<()> {
146    let stats_path = match path {
147        Some(p) => p.clone(),
148        None => default_stats_path()?,
149    };
150
151    // Ensure parent directory exists
152    if let Some(parent) = stats_path.parent() {
153        fs::create_dir_all(parent).context("failed to create ~/.shift directory")?;
154
155        // Reject symlinks on the directory (consistent with pipeline.rs profile path validation)
156        let dir_meta = fs::symlink_metadata(parent)
157            .with_context(|| format!("failed to stat {}", parent.display()))?;
158        if dir_meta.file_type().is_symlink() {
159            anyhow::bail!(
160                "stats directory {} is a symlink (possible symlink attack)",
161                parent.display()
162            );
163        }
164    }
165
166    // Open the file with O_NOFOLLOW on Unix to atomically reject symlinks
167    // (avoids TOCTOU race between stat and open).
168    #[cfg(unix)]
169    let mut file = {
170        use std::os::unix::fs::OpenOptionsExt;
171        fs::OpenOptions::new()
172            .create(true)
173            .append(true)
174            .custom_flags(libc::O_NOFOLLOW)
175            .open(&stats_path)
176            .with_context(|| {
177                format!(
178                    "failed to open stats file: {} (symlinks are rejected)",
179                    stats_path.display()
180                )
181            })?
182    };
183
184    #[cfg(not(unix))]
185    let mut file = {
186        // Fallback: stat-then-open (TOCTOU risk, but best we can do on non-Unix)
187        if stats_path.exists() {
188            let file_meta = fs::symlink_metadata(&stats_path)
189                .with_context(|| format!("failed to stat {}", stats_path.display()))?;
190            if file_meta.file_type().is_symlink() {
191                anyhow::bail!(
192                    "stats file {} is a symlink (possible symlink attack)",
193                    stats_path.display()
194                );
195            }
196        }
197        fs::OpenOptions::new()
198            .create(true)
199            .append(true)
200            .open(&stats_path)
201            .with_context(|| format!("failed to open stats file: {}", stats_path.display()))?
202    };
203
204    // Serialize to a single buffer and write atomically to reduce interleave risk
205    let mut line = serde_json::to_string(record).context("failed to serialize run record")?;
206    line.push('\n');
207    file.write_all(line.as_bytes())
208        .context("failed to write to stats file")?;
209    file.flush().context("failed to flush stats file")?;
210
211    // Drop the file handle before purging (purge needs exclusive access)
212    drop(file);
213
214    // Auto-purge records older than RETENTION_DAYS.
215    // Only run when the file exceeds 50KB to amortize the cost.
216    if let Ok(meta) = fs::metadata(&stats_path) {
217        if meta.len() > 50_000 {
218            if let Err(e) = purge_old_records(&stats_path) {
219                eprintln!("shift-ai: warning: auto-purge failed: {}", e);
220            }
221        }
222    }
223
224    Ok(())
225}
226
227/// Remove records older than RETENTION_DAYS from the stats file.
228///
229/// Reads all records, filters to those within the retention window,
230/// and rewrites the file atomically via a temp file + rename.
231///
232/// Uses `tempfile::NamedTempFile` for a unique temp filename (safe under
233/// concurrent invocations) and auto-cleanup on failure. Calls `sync_all()`
234/// before `persist()` to ensure data durability before the rename.
235pub fn purge_old_records(path: &PathBuf) -> Result<usize> {
236    let cutoff_date = {
237        let now_secs = std::time::SystemTime::now()
238            .duration_since(std::time::UNIX_EPOCH)
239            .unwrap_or_default()
240            .as_secs();
241        let cutoff_secs = now_secs.saturating_sub(RETENTION_DAYS * 86400);
242        let (y, m, d) = days_to_ymd(cutoff_secs / 86400);
243        format!("{:04}-{:02}-{:02}", y, m, d)
244    };
245
246    let load_result = load_records(Some(path))?;
247    let total = load_result.records.len();
248    let kept: Vec<&RunRecord> = load_result
249        .records
250        .iter()
251        .filter(|r| r.date >= cutoff_date)
252        .collect();
253    let purged = total - kept.len();
254
255    if purged == 0 {
256        return Ok(0);
257    }
258
259    // Write to a unique temp file in the same directory. NamedTempFile
260    // auto-deletes on drop if persist() is never called, so crashes and
261    // errors never leave orphaned files behind.
262    let parent = path
263        .parent()
264        .context("stats file has no parent directory")?;
265    let mut tmp_file =
266        tempfile::NamedTempFile::new_in(parent).context("failed to create temp file for purge")?;
267
268    for record in &kept {
269        let mut line = serde_json::to_string(record)?;
270        line.push('\n');
271        tmp_file.write_all(line.as_bytes())?;
272    }
273    tmp_file.flush()?;
274
275    // Ensure data reaches persistent storage before the atomic rename
276    tmp_file
277        .as_file()
278        .sync_all()
279        .context("failed to sync temp file")?;
280
281    // persist() atomically renames the temp file over the target path.
282    // On failure the temp file is still auto-cleaned up by Drop.
283    tmp_file
284        .persist(path)
285        .context("failed to rename purged stats file")?;
286
287    Ok(purged)
288}
289
290/// Result of loading stats records, including count of skipped malformed lines.
291pub struct LoadResult {
292    pub records: Vec<RunRecord>,
293    pub skipped_lines: usize,
294}
295
296/// Load all run records from the stats file.
297pub fn load_records(path: Option<&PathBuf>) -> Result<LoadResult> {
298    let stats_path = match path {
299        Some(p) => p.clone(),
300        None => default_stats_path()?,
301    };
302
303    if !stats_path.exists() {
304        return Ok(LoadResult {
305            records: Vec::new(),
306            skipped_lines: 0,
307        });
308    }
309
310    let file = fs::File::open(&stats_path)
311        .with_context(|| format!("failed to open stats file: {}", stats_path.display()))?;
312    let reader = BufReader::new(file);
313    let mut records = Vec::new();
314    let mut skipped_lines = 0;
315
316    for (i, line) in reader.lines().enumerate() {
317        let line = line.with_context(|| format!("failed to read line {} of stats file", i + 1))?;
318        let trimmed = line.trim();
319        if trimmed.is_empty() {
320            continue;
321        }
322        // Skip excessively long lines (likely corrupt)
323        if trimmed.len() > MAX_LINE_LENGTH {
324            eprintln!(
325                "shift-ai: warning: skipping oversized stats line {} ({} bytes)",
326                i + 1,
327                trimmed.len()
328            );
329            skipped_lines += 1;
330            continue;
331        }
332        match serde_json::from_str::<RunRecord>(trimmed) {
333            Ok(record) => records.push(record),
334            Err(e) => {
335                // Skip malformed lines rather than failing
336                eprintln!(
337                    "shift-ai: warning: skipping malformed stats line {}: {}",
338                    i + 1,
339                    e
340                );
341                skipped_lines += 1;
342            }
343        }
344        // Cap total records to prevent unbounded memory allocation
345        if records.len() >= MAX_STATS_RECORDS {
346            eprintln!(
347                "shift-ai: warning: stats file has >{} entries, loading only the first {}",
348                MAX_STATS_RECORDS, MAX_STATS_RECORDS
349            );
350            break;
351        }
352    }
353
354    Ok(LoadResult {
355        records,
356        skipped_lines,
357    })
358}
359
360/// Compute aggregate gain summary from records.
361pub fn summarize(records: &[RunRecord]) -> GainSummary {
362    use std::collections::BTreeMap;
363
364    let mut s = GainSummary::default();
365
366    // Per-provider accumulators: (runs, images, tokens_before, tokens_after, total_duration_ms)
367    let mut providers: BTreeMap<String, (usize, usize, u64, u64, u64)> = BTreeMap::new();
368    // Per-action accumulators
369    let mut actions: BTreeMap<String, usize> = BTreeMap::new();
370
371    for r in records {
372        s.total_runs += 1;
373        s.total_images += r.images;
374        s.total_modified += r.modified;
375        s.total_bytes_before += r.bytes_before as u64;
376        s.total_bytes_after += r.bytes_after as u64;
377        s.total_openai_before += r.token_savings.openai_before;
378        s.total_openai_after += r.token_savings.openai_after;
379        s.total_anthropic_before += r.token_savings.anthropic_before;
380        s.total_anthropic_after += r.token_savings.anthropic_after;
381        s.total_duration_ms += r.duration_ms;
382
383        // Per-provider
384        let entry = providers.entry(r.provider.clone()).or_default();
385        entry.0 += 1; // runs
386        entry.1 += r.images; // images
387                             // Use the matching provider's tokens (case-insensitive)
388        let provider_lower = r.provider.to_ascii_lowercase();
389        let (before, after) = if provider_lower == "anthropic" {
390            (
391                r.token_savings.anthropic_before,
392                r.token_savings.anthropic_after,
393            )
394        } else {
395            // Default to OpenAI tokens for "openai" and any unknown providers
396            (r.token_savings.openai_before, r.token_savings.openai_after)
397        };
398        entry.2 += before;
399        entry.3 += after;
400        entry.4 += r.duration_ms;
401
402        // Per-action
403        for (action, count) in &r.action_counts {
404            *actions.entry(action.clone()).or_default() += count;
405        }
406    }
407
408    // Build per-provider vec sorted by tokens saved descending
409    let mut by_provider: Vec<ProviderGain> = providers
410        .into_iter()
411        .map(|(name, (runs, images, before, after, dur))| {
412            let saved = before.saturating_sub(after);
413            let overall_pct = if before > 0 {
414                (saved as f64 / before as f64) * 100.0
415            } else {
416                0.0
417            };
418            let avg_dur = if runs > 0 { dur / runs as u64 } else { 0 };
419            ProviderGain {
420                provider: name,
421                runs,
422                images,
423                tokens_saved: saved,
424                overall_pct,
425                avg_duration_ms: avg_dur,
426            }
427        })
428        .collect();
429    by_provider.sort_by_key(|b| std::cmp::Reverse(b.tokens_saved));
430    s.by_provider = by_provider;
431
432    // Build per-action vec sorted by count descending
433    let mut by_action: Vec<ActionGain> = actions
434        .into_iter()
435        .map(|(action, count)| ActionGain { action, count })
436        .collect();
437    by_action.sort_by_key(|b| std::cmp::Reverse(b.count));
438    s.by_action = by_action;
439
440    s
441}
442
443/// Compute daily breakdown from records.
444pub fn daily_breakdown(records: &[RunRecord]) -> Vec<DailyGain> {
445    use std::collections::BTreeMap;
446
447    let mut days: BTreeMap<String, DailyGain> = BTreeMap::new();
448
449    for r in records {
450        let entry = days.entry(r.date.clone()).or_insert_with(|| DailyGain {
451            date: r.date.clone(),
452            runs: 0,
453            images: 0,
454            openai_saved: 0,
455            anthropic_saved: 0,
456        });
457        entry.runs += 1;
458        entry.images += r.images;
459        entry.openai_saved += r
460            .token_savings
461            .openai_before
462            .saturating_sub(r.token_savings.openai_after);
463        entry.anthropic_saved += r
464            .token_savings
465            .anthropic_before
466            .saturating_sub(r.token_savings.anthropic_after);
467    }
468
469    days.into_values().collect()
470}
471
472/// Build a RunRecord from a completed Report.
473pub fn record_from_report(
474    report: &crate::report::Report,
475    provider: &str,
476    duration_ms: u64,
477) -> RunRecord {
478    // Get current timestamp
479    let now = std::time::SystemTime::now()
480        .duration_since(std::time::UNIX_EPOCH)
481        .unwrap_or_default()
482        .as_secs();
483
484    // Format as ISO 8601 (basic — no chrono dependency)
485    let secs_per_day = 86400;
486    let days_since_epoch = now / secs_per_day;
487    let secs_today = now % secs_per_day;
488    let hours = secs_today / 3600;
489    let minutes = (secs_today % 3600) / 60;
490    let seconds = secs_today % 60;
491
492    // Civil date calculation (Hinnant algorithm, exact for proleptic Gregorian calendar)
493    let (year, month, day) = days_to_ymd(days_since_epoch);
494
495    let timestamp = format!(
496        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
497        year, month, day, hours, minutes, seconds
498    );
499    let date = format!("{:04}-{:02}-{:02}", year, month, day);
500
501    // Compute per-action counts from action records
502    let mut action_map = std::collections::BTreeMap::new();
503    for a in &report.actions {
504        *action_map.entry(a.action.clone()).or_insert(0usize) += 1;
505    }
506    let action_counts: Vec<(String, usize)> = action_map.into_iter().collect();
507
508    RunRecord {
509        timestamp,
510        date,
511        provider: provider.to_string(),
512        images: report.images_found,
513        modified: report.images_modified,
514        dropped: report.images_dropped,
515        svgs_rasterized: report.svgs_rasterized,
516        bytes_before: report.original_size,
517        bytes_after: report.transformed_size,
518        token_savings: report.token_savings.clone(),
519        duration_ms,
520        action_counts,
521    }
522}
523
524/// Convert days since Unix epoch to (year, month, day).
525fn days_to_ymd(days: u64) -> (u64, u64, u64) {
526    // Simplified civil date calculation
527    let z = days + 719468;
528    let era = z / 146097;
529    let doe = z - era * 146097;
530    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
531    let y = yoe + era * 400;
532    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
533    let mp = (5 * doy + 2) / 153;
534    let d = doy - (153 * mp + 2) / 5 + 1;
535    let m = if mp < 10 { mp + 3 } else { mp - 9 };
536    let y = if m <= 2 { y + 1 } else { y };
537    (y, m, d)
538}
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543    use crate::cost::TokenSavings;
544    use tempfile::NamedTempFile;
545
546    fn make_record(date: &str, openai_before: u64, openai_after: u64) -> RunRecord {
547        RunRecord {
548            timestamp: format!("{}T12:00:00Z", date),
549            date: date.to_string(),
550            provider: "openai".to_string(),
551            images: 3,
552            modified: 2,
553            dropped: 0,
554            svgs_rasterized: 0,
555            bytes_before: 5_000_000,
556            bytes_after: 1_000_000,
557            token_savings: TokenSavings {
558                openai_before,
559                openai_after,
560                anthropic_before: 3000,
561                anthropic_after: 1000,
562            },
563            duration_ms: 500,
564            action_counts: vec![("resize".to_string(), 2)],
565        }
566    }
567
568    #[test]
569    fn test_record_and_load_roundtrip() {
570        let tmp = NamedTempFile::new().unwrap();
571        let path = tmp.path().to_path_buf();
572
573        let r1 = make_record("2026-04-20", 1000, 300);
574        let r2 = make_record("2026-04-21", 2000, 500);
575
576        record_run(&r1, Some(&path)).unwrap();
577        record_run(&r2, Some(&path)).unwrap();
578
579        let result = load_records(Some(&path)).unwrap();
580        assert_eq!(result.records.len(), 2);
581        assert_eq!(result.skipped_lines, 0);
582        assert_eq!(result.records[0].date, "2026-04-20");
583        assert_eq!(result.records[1].date, "2026-04-21");
584    }
585
586    #[test]
587    fn test_load_empty_file() {
588        let tmp = NamedTempFile::new().unwrap();
589        let path = tmp.path().to_path_buf();
590        let result = load_records(Some(&path)).unwrap();
591        assert!(result.records.is_empty());
592        assert_eq!(result.skipped_lines, 0);
593    }
594
595    #[test]
596    fn test_load_nonexistent_file() {
597        let path = PathBuf::from("/tmp/shift-test-nonexistent-stats.jsonl");
598        let result = load_records(Some(&path)).unwrap();
599        assert!(result.records.is_empty());
600        assert_eq!(result.skipped_lines, 0);
601    }
602
603    #[test]
604    fn test_summarize() {
605        let records = vec![
606            make_record("2026-04-20", 1000, 300),
607            make_record("2026-04-21", 2000, 500),
608        ];
609        let summary = summarize(&records);
610        assert_eq!(summary.total_runs, 2);
611        assert_eq!(summary.total_images, 6);
612        assert_eq!(summary.total_modified, 4);
613        assert_eq!(summary.total_openai_before, 3000);
614        assert_eq!(summary.total_openai_after, 800);
615        assert_eq!(summary.openai_saved(), 2200);
616    }
617
618    #[test]
619    fn test_daily_breakdown() {
620        let records = vec![
621            make_record("2026-04-20", 1000, 300),
622            make_record("2026-04-20", 500, 200),
623            make_record("2026-04-21", 2000, 500),
624        ];
625        let daily = daily_breakdown(&records);
626        assert_eq!(daily.len(), 2);
627        assert_eq!(daily[0].date, "2026-04-20");
628        assert_eq!(daily[0].runs, 2);
629        assert_eq!(daily[0].openai_saved, 1000); // (1000-300) + (500-200)
630        assert_eq!(daily[1].date, "2026-04-21");
631        assert_eq!(daily[1].runs, 1);
632    }
633
634    #[test]
635    fn test_summary_percentages() {
636        let summary = GainSummary {
637            total_openai_before: 10000,
638            total_openai_after: 3000,
639            total_anthropic_before: 5000,
640            total_anthropic_after: 1000,
641            ..Default::default()
642        };
643        assert!((summary.openai_pct() - 70.0).abs() < 0.1);
644        assert!((summary.anthropic_pct() - 80.0).abs() < 0.1);
645    }
646
647    #[test]
648    fn test_summary_zero_division() {
649        let summary = GainSummary::default();
650        assert_eq!(summary.openai_pct(), 0.0);
651        assert_eq!(summary.anthropic_pct(), 0.0);
652    }
653
654    #[test]
655    fn test_malformed_lines_skipped() {
656        let tmp = NamedTempFile::new().unwrap();
657        let path = tmp.path().to_path_buf();
658
659        // Write valid + invalid lines
660        let r = make_record("2026-04-20", 1000, 300);
661        record_run(&r, Some(&path)).unwrap();
662        // Append garbage
663        let mut f = fs::OpenOptions::new().append(true).open(&path).unwrap();
664        writeln!(f, "not json at all").unwrap();
665        writeln!(f, "{{\"partial\": true}}").unwrap();
666        // Write another valid record
667        record_run(&r, Some(&path)).unwrap();
668
669        let result = load_records(Some(&path)).unwrap();
670        assert_eq!(result.records.len(), 2); // only the 2 valid records
671        assert_eq!(result.skipped_lines, 2); // 2 malformed lines skipped
672    }
673
674    #[test]
675    fn test_record_from_report() {
676        let mut report = crate::report::Report::new();
677        report.images_found = 3;
678        report.images_modified = 2;
679        report.original_size = 5_000_000;
680        report.transformed_size = 1_000_000;
681        report.token_savings = TokenSavings {
682            openai_before: 2000,
683            openai_after: 500,
684            anthropic_before: 3000,
685            anthropic_after: 800,
686        };
687
688        let record = record_from_report(&report, "openai", 1234);
689        assert_eq!(record.provider, "openai");
690        assert_eq!(record.images, 3);
691        assert_eq!(record.modified, 2);
692        assert_eq!(record.duration_ms, 1234);
693        assert!(!record.timestamp.is_empty());
694        assert!(!record.date.is_empty());
695    }
696
697    #[test]
698    fn test_days_to_ymd() {
699        // Unix epoch
700        let (y, m, d) = days_to_ymd(0);
701        assert_eq!((y, m, d), (1970, 1, 1));
702
703        // Leap year: 2000-02-29 = day 11016
704        let (y, m, d) = days_to_ymd(11016);
705        assert_eq!((y, m, d), (2000, 2, 29));
706
707        // Day after leap day: 2000-03-01 = day 11017
708        let (y, m, d) = days_to_ymd(11017);
709        assert_eq!((y, m, d), (2000, 3, 1));
710
711        // Non-leap century year: 2100-02-28 = day 47540
712        let (y, m, d) = days_to_ymd(47540);
713        assert_eq!((y, m, d), (2100, 2, 28));
714
715        // 2100-03-01 = day 47541 (no Feb 29 in 2100)
716        let (y, m, d) = days_to_ymd(47541);
717        assert_eq!((y, m, d), (2100, 3, 1));
718
719        // Year boundary: 2025-12-31 = day 20453
720        let (y, m, d) = days_to_ymd(20453);
721        assert_eq!((y, m, d), (2025, 12, 31));
722
723        // 2026-01-01 = day 20454
724        let (y, m, d) = days_to_ymd(20454);
725        assert_eq!((y, m, d), (2026, 1, 1));
726    }
727
728    #[cfg(unix)]
729    #[test]
730    fn test_symlink_directory_rejected() {
731        use std::os::unix::fs as unix_fs;
732
733        let real_dir = tempfile::tempdir().unwrap();
734        let symlink_dir = tempfile::tempdir().unwrap();
735        let symlink_path = symlink_dir.path().join("symlinked-shift");
736
737        // Create symlink to real directory
738        unix_fs::symlink(real_dir.path(), &symlink_path).unwrap();
739
740        let stats_file = symlink_path.join("stats.jsonl");
741        let r = make_record("2026-04-22", 100, 50);
742        let result = record_run(&r, Some(&stats_file));
743
744        assert!(result.is_err());
745        let err_msg = format!("{}", result.unwrap_err());
746        assert!(
747            err_msg.contains("symlink"),
748            "expected symlink error, got: {}",
749            err_msg
750        );
751    }
752
753    #[cfg(unix)]
754    #[test]
755    fn test_symlink_file_rejected() {
756        use std::os::unix::fs as unix_fs;
757
758        let tmp_dir = tempfile::tempdir().unwrap();
759        let real_file = tmp_dir.path().join("real-stats.jsonl");
760        let symlink_file = tmp_dir.path().join("stats.jsonl");
761
762        // Create the real file
763        fs::write(&real_file, "").unwrap();
764        // Create symlink pointing to real file
765        unix_fs::symlink(&real_file, &symlink_file).unwrap();
766
767        let r = make_record("2026-04-22", 100, 50);
768        let result = record_run(&r, Some(&symlink_file));
769
770        assert!(result.is_err());
771        let err_msg = format!("{}", result.unwrap_err());
772        assert!(
773            err_msg.contains("symlink"),
774            "expected symlink error, got: {}",
775            err_msg
776        );
777    }
778
779    #[test]
780    fn test_skipped_lines_counted() {
781        let tmp = NamedTempFile::new().unwrap();
782        let path = tmp.path().to_path_buf();
783
784        let r = make_record("2026-04-22", 500, 200);
785        record_run(&r, Some(&path)).unwrap();
786
787        // Append 3 garbage lines
788        let mut f = fs::OpenOptions::new().append(true).open(&path).unwrap();
789        writeln!(f, "garbage1").unwrap();
790        writeln!(f, "garbage2").unwrap();
791        writeln!(f, "garbage3").unwrap();
792
793        record_run(&r, Some(&path)).unwrap();
794
795        let result = load_records(Some(&path)).unwrap();
796        assert_eq!(result.records.len(), 2);
797        assert_eq!(result.skipped_lines, 3);
798    }
799
800    // ── Helpers for multi-provider tests ─────────────────────────────
801
802    fn make_anthropic_record(date: &str, anthropic_before: u64, anthropic_after: u64) -> RunRecord {
803        RunRecord {
804            timestamp: format!("{}T12:00:00Z", date),
805            date: date.to_string(),
806            provider: "anthropic".to_string(),
807            images: 2,
808            modified: 1,
809            dropped: 0,
810            svgs_rasterized: 0,
811            bytes_before: 3_000_000,
812            bytes_after: 800_000,
813            token_savings: TokenSavings {
814                openai_before: 500,
815                openai_after: 200,
816                anthropic_before,
817                anthropic_after,
818            },
819            duration_ms: 300,
820            action_counts: vec![("recompress".to_string(), 1)],
821        }
822    }
823
824    fn make_record_with_actions(date: &str, actions: Vec<(String, usize)>) -> RunRecord {
825        RunRecord {
826            action_counts: actions,
827            ..make_record(date, 1000, 300)
828        }
829    }
830
831    // ── Purge tests ──────────────────────────────────────────────────
832
833    #[test]
834    fn test_purge_removes_old_records() {
835        let tmp = NamedTempFile::new().unwrap();
836        let path = tmp.path().to_path_buf();
837
838        // Write a record with a very old date (should be purged)
839        let old = make_record("2020-01-01", 1000, 300);
840        record_run(&old, Some(&path)).unwrap();
841
842        // Write a record with today's date (should be kept)
843        let now_secs = std::time::SystemTime::now()
844            .duration_since(std::time::UNIX_EPOCH)
845            .unwrap()
846            .as_secs();
847        let (y, m, d) = days_to_ymd(now_secs / 86400);
848        let today = format!("{:04}-{:02}-{:02}", y, m, d);
849        let recent = make_record(&today, 2000, 500);
850        record_run(&recent, Some(&path)).unwrap();
851
852        let purged = purge_old_records(&path).unwrap();
853        assert_eq!(purged, 1);
854
855        let result = load_records(Some(&path)).unwrap();
856        assert_eq!(result.records.len(), 1);
857        assert_eq!(result.records[0].date, today);
858    }
859
860    #[test]
861    fn test_purge_no_op_when_nothing_to_purge() {
862        let tmp = NamedTempFile::new().unwrap();
863        let path = tmp.path().to_path_buf();
864
865        let now_secs = std::time::SystemTime::now()
866            .duration_since(std::time::UNIX_EPOCH)
867            .unwrap()
868            .as_secs();
869        let (y, m, d) = days_to_ymd(now_secs / 86400);
870        let today = format!("{:04}-{:02}-{:02}", y, m, d);
871
872        let r = make_record(&today, 1000, 300);
873        record_run(&r, Some(&path)).unwrap();
874
875        let purged = purge_old_records(&path).unwrap();
876        assert_eq!(purged, 0);
877
878        let result = load_records(Some(&path)).unwrap();
879        assert_eq!(result.records.len(), 1);
880    }
881
882    #[test]
883    fn test_purge_all_records_expired() {
884        let tmp = NamedTempFile::new().unwrap();
885        let path = tmp.path().to_path_buf();
886
887        let old1 = make_record("2019-01-01", 1000, 300);
888        let old2 = make_record("2019-06-15", 2000, 500);
889        record_run(&old1, Some(&path)).unwrap();
890        record_run(&old2, Some(&path)).unwrap();
891
892        let purged = purge_old_records(&path).unwrap();
893        assert_eq!(purged, 2);
894
895        let result = load_records(Some(&path)).unwrap();
896        assert_eq!(result.records.len(), 0);
897    }
898
899    #[test]
900    fn test_purge_preserves_record_data() {
901        let tmp = NamedTempFile::new().unwrap();
902        let path = tmp.path().to_path_buf();
903
904        let now_secs = std::time::SystemTime::now()
905            .duration_since(std::time::UNIX_EPOCH)
906            .unwrap()
907            .as_secs();
908        let (y, m, d) = days_to_ymd(now_secs / 86400);
909        let today = format!("{:04}-{:02}-{:02}", y, m, d);
910
911        let r = RunRecord {
912            provider: "anthropic".to_string(),
913            images: 7,
914            modified: 5,
915            duration_ms: 1234,
916            action_counts: vec![("resize".to_string(), 3), ("convert".to_string(), 2)],
917            ..make_record(&today, 5000, 1500)
918        };
919        // Also write an old record to force purge to actually rewrite
920        let old = make_record("2020-01-01", 100, 50);
921        record_run(&old, Some(&path)).unwrap();
922        record_run(&r, Some(&path)).unwrap();
923
924        let purged = purge_old_records(&path).unwrap();
925        assert_eq!(purged, 1);
926
927        let result = load_records(Some(&path)).unwrap();
928        assert_eq!(result.records.len(), 1);
929        let kept = &result.records[0];
930        assert_eq!(kept.provider, "anthropic");
931        assert_eq!(kept.images, 7);
932        assert_eq!(kept.modified, 5);
933        assert_eq!(kept.duration_ms, 1234);
934        assert_eq!(kept.action_counts.len(), 2);
935    }
936
937    // ── Per-provider summarization tests ─────────────────────────────
938
939    #[test]
940    fn test_summarize_by_provider() {
941        let records = vec![
942            make_record("2026-04-20", 1000, 300),            // openai
943            make_record("2026-04-21", 2000, 500),            // openai
944            make_anthropic_record("2026-04-20", 4000, 1000), // anthropic
945        ];
946        let summary = summarize(&records);
947
948        assert_eq!(summary.by_provider.len(), 2);
949
950        // Sorted by tokens_saved descending
951        // anthropic: 4000-1000 = 3000
952        // openai:    (1000-300) + (2000-500) = 700 + 1500 = 2200
953        assert_eq!(summary.by_provider[0].provider, "anthropic");
954        assert_eq!(summary.by_provider[0].tokens_saved, 3000);
955        assert_eq!(summary.by_provider[0].runs, 1);
956        assert_eq!(summary.by_provider[0].images, 2);
957        assert!((summary.by_provider[0].overall_pct - 75.0).abs() < 0.1);
958
959        assert_eq!(summary.by_provider[1].provider, "openai");
960        assert_eq!(summary.by_provider[1].tokens_saved, 2200);
961        assert_eq!(summary.by_provider[1].runs, 2);
962        assert_eq!(summary.by_provider[1].images, 6);
963    }
964
965    #[test]
966    fn test_summarize_single_provider() {
967        let records = vec![make_record("2026-04-20", 1000, 300)];
968        let summary = summarize(&records);
969        assert_eq!(summary.by_provider.len(), 1);
970        assert_eq!(summary.by_provider[0].provider, "openai");
971        assert_eq!(summary.by_provider[0].tokens_saved, 700);
972    }
973
974    #[test]
975    fn test_summarize_provider_duration() {
976        let records = vec![
977            make_record("2026-04-20", 1000, 300), // duration_ms = 500
978            make_record("2026-04-21", 2000, 500), // duration_ms = 500
979        ];
980        let summary = summarize(&records);
981        assert_eq!(summary.by_provider[0].avg_duration_ms, 500); // 1000 total / 2 runs
982        assert_eq!(summary.total_duration_ms, 1000);
983    }
984
985    // ── Per-action summarization tests ───────────────────────────────
986
987    #[test]
988    fn test_summarize_by_action() {
989        let records = vec![
990            make_record_with_actions(
991                "2026-04-20",
992                vec![("resize".to_string(), 3), ("convert".to_string(), 1)],
993            ),
994            make_record_with_actions(
995                "2026-04-21",
996                vec![("resize".to_string(), 2), ("recompress".to_string(), 4)],
997            ),
998        ];
999        let summary = summarize(&records);
1000
1001        assert_eq!(summary.by_action.len(), 3);
1002        // Sorted by count descending: resize=5, recompress=4, convert=1
1003        assert_eq!(summary.by_action[0].action, "resize");
1004        assert_eq!(summary.by_action[0].count, 5);
1005        assert_eq!(summary.by_action[1].action, "recompress");
1006        assert_eq!(summary.by_action[1].count, 4);
1007        assert_eq!(summary.by_action[2].action, "convert");
1008        assert_eq!(summary.by_action[2].count, 1);
1009    }
1010
1011    #[test]
1012    fn test_summarize_empty_actions() {
1013        let mut r = make_record("2026-04-20", 1000, 300);
1014        r.action_counts = vec![];
1015        let summary = summarize(&[r]);
1016        assert!(summary.by_action.is_empty());
1017    }
1018
1019    #[test]
1020    fn test_summarize_empty_records() {
1021        let summary = summarize(&[]);
1022        assert_eq!(summary.total_runs, 0);
1023        assert!(summary.by_provider.is_empty());
1024        assert!(summary.by_action.is_empty());
1025    }
1026}