Skip to main content

shunt/
state.rs

1/// Runtime state: per-account cooldowns/disabling + conversation stickiness.
2///
3/// Thread-safe via Arc<Mutex<>>. Cooldowns and disables are persisted to disk;
4/// stickiness is ephemeral (lost on restart is acceptable).
5use anyhow::Result;
6use serde::{Deserialize, Serialize};
7use std::collections::{HashMap, VecDeque};
8use std::path::{Path, PathBuf};
9use std::sync::atomic::{AtomicBool, Ordering};
10use std::sync::{Arc, Mutex};
11use std::time::{SystemTime, UNIX_EPOCH};
12use tracing::warn;
13
14fn now_ms() -> u64 {
15    SystemTime::now()
16        .duration_since(UNIX_EPOCH)
17        .unwrap_or_default()
18        .as_millis() as u64
19}
20
21// ---------------------------------------------------------------------------
22// On-disk data
23// ---------------------------------------------------------------------------
24
25#[derive(Debug, Serialize, Deserialize, Default, Clone)]
26pub struct AccountState {
27    /// Epoch-ms timestamp after which this account is usable again (0 = not cooling).
28    #[serde(default)]
29    pub cooldown_until_ms: u64,
30    /// Permanently disabled (auth failure).
31    #[serde(default)]
32    pub disabled: bool,
33    /// OAuth credentials are expired and need re-authorization via `shunt add-account`.
34    #[serde(default)]
35    pub auth_failed: bool,
36}
37
38#[derive(Serialize, Deserialize, Default, Clone)]
39struct StickyEntry {
40    account_name: String,
41    expires_at_ms: u64,
42}
43
44/// Rolling 5-hour quota window per account.
45#[derive(Debug, Serialize, Deserialize, Default, Clone)]
46pub struct QuotaWindow {
47    /// Epoch-ms when this window started (0 = never used).
48    #[serde(default)]
49    pub window_start_ms: u64,
50    #[serde(default)]
51    pub input_tokens: u64,
52    #[serde(default)]
53    pub output_tokens: u64,
54}
55
56impl QuotaWindow {
57    pub fn total_tokens(&self) -> u64 {
58        self.input_tokens + self.output_tokens
59    }
60    pub fn window_expires_ms(&self) -> Option<u64> {
61        if self.window_start_ms == 0 { None } else { Some(self.window_start_ms + WINDOW_MS) }
62    }
63}
64
65pub const WINDOW_MS: u64 = 5 * 60 * 60 * 1000; // 5 hours
66
67// ---------------------------------------------------------------------------
68// Request log
69// ---------------------------------------------------------------------------
70
71/// A single proxied request recorded for the live monitor.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct RequestLog {
74    pub ts_ms: u64,
75    pub account: String,
76    pub model: String,
77    pub status: u16,
78    pub input_tokens: u64,
79    pub output_tokens: u64,
80    pub duration_ms: u64,
81}
82
83const MAX_RECENT: usize = 200;
84
85/// Rate-limit info extracted from `anthropic-ratelimit-unified-*` response headers.
86#[derive(Debug, Serialize, Deserialize, Default, Clone)]
87pub struct RateLimitInfo {
88    /// 5-hour window utilization 0.0–1.0
89    pub utilization_5h: Option<f64>,
90    /// Unix epoch seconds when 5h window resets
91    pub reset_5h: Option<u64>,
92    /// "allowed" | "exhausted"
93    pub status_5h: Option<String>,
94    /// 7-day window utilization 0.0–1.0
95    pub utilization_7d: Option<f64>,
96    /// Unix epoch seconds when 7d window resets
97    pub reset_7d: Option<u64>,
98    pub status_7d: Option<String>,
99    /// Extra usage (overage) status: "allowed" | "rejected"
100    pub overage_status: Option<String>,
101    pub overage_disabled_reason: Option<String>,
102    /// Which claim is currently representative ("five_hour" | "seven_day")
103    pub representative_claim: Option<String>,
104    pub updated_ms: u64,
105}
106
107/// Per-day token and API-cost accumulator (all accounts combined).
108#[derive(Debug, Serialize, Deserialize, Default, Clone)]
109pub struct DailyBucket {
110    pub input_tokens: u64,
111    pub output_tokens: u64,
112    /// What those tokens would have cost on the public API (USD).
113    pub api_cost_usd: f64,
114}
115
116/// Snapshot returned by `savings_snapshot()` for the status endpoint + CLI.
117#[derive(Debug, Serialize, Deserialize, Default, Clone)]
118pub struct SavingsSnapshot {
119    pub today_input: u64,
120    pub today_output: u64,
121    pub today_cost_usd: f64,
122    pub week_input: u64,
123    pub week_output: u64,
124    pub week_cost_usd: f64,
125    pub all_time_input: u64,
126    pub all_time_output: u64,
127    pub all_time_cost_usd: f64,
128}
129
130#[derive(Serialize, Deserialize, Default, Clone)]
131struct StateData {
132    #[serde(default)]
133    accounts: HashMap<String, AccountState>,
134    #[serde(default)]
135    sticky: HashMap<String, StickyEntry>,
136    #[serde(default)]
137    quota: HashMap<String, QuotaWindow>,
138    #[serde(default)]
139    rate_limits: HashMap<String, RateLimitInfo>,
140    /// If set, all requests are forced to this account (overrides routing).
141    #[serde(default)]
142    pinned_account: Option<String>,
143    /// The most recent account that successfully handled a proxied request.
144    #[serde(default)]
145    last_used_account: Option<String>,
146    /// Recent request log (ephemeral — not persisted to disk).
147    #[serde(skip)]
148    recent_requests: VecDeque<RequestLog>,
149    /// Daily token + cost buckets keyed by "YYYY-MM-DD" (all accounts combined).
150    #[serde(default)]
151    global_daily: HashMap<String, DailyBucket>,
152    /// All-time totals.
153    #[serde(default)]
154    all_time_input: u64,
155    #[serde(default)]
156    all_time_output: u64,
157    #[serde(default)]
158    all_time_cost_usd: f64,
159}
160
161// ---------------------------------------------------------------------------
162// Store
163// ---------------------------------------------------------------------------
164
165#[derive(Clone)]
166pub struct StateStore {
167    path: PathBuf,
168    inner: Arc<Mutex<StateData>>,
169    /// Set to true when a write is needed; the background writer thread clears it.
170    pending: Arc<AtomicBool>,
171}
172
173impl StateStore {
174    /// Create a fresh in-memory store with no backing file (useful for tests).
175    pub fn new_empty() -> Self {
176        // No background writer thread for the null store — writes are no-ops.
177        Self {
178            path: PathBuf::from("/dev/null"),
179            inner: Arc::new(Mutex::new(StateData::default())),
180            pending: Arc::new(AtomicBool::new(false)),
181        }
182    }
183
184    pub fn load(path: &Path) -> Self {
185        let data: StateData = if path.exists() {
186            match std::fs::read_to_string(path) {
187                Ok(text) => serde_json::from_str(&text).unwrap_or_else(|e| {
188                    warn!("State file unreadable ({e}), starting fresh");
189                    StateData::default()
190                }),
191                Err(e) => {
192                    warn!("Cannot read state file ({e}), starting fresh");
193                    StateData::default()
194                }
195            }
196        } else {
197            StateData::default()
198        };
199
200        let store = Self {
201            path: path.to_owned(),
202            inner: Arc::new(Mutex::new(data)),
203            pending: Arc::new(AtomicBool::new(false)),
204        };
205        store.start_writer_thread();
206        store
207    }
208
209    /// Spawn a single background thread that flushes state to disk at most every 100 ms.
210    /// This prevents unbounded thread spawning when many requests fire in rapid succession.
211    fn start_writer_thread(&self) {
212        let pending = Arc::clone(&self.pending);
213        let inner   = Arc::clone(&self.inner);
214        let path    = self.path.clone();
215        std::thread::spawn(move || {
216            loop {
217                std::thread::sleep(std::time::Duration::from_millis(100));
218                if pending.compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed).is_ok() {
219                    let data = inner.lock().unwrap().clone();
220                    if let Err(e) = write_to_disk(&data, &path) {
221                        warn!("Failed to persist state: {e}");
222                    }
223                }
224            }
225        });
226    }
227
228    // -----------------------------------------------------------------------
229    // Availability
230    // -----------------------------------------------------------------------
231
232    pub fn is_available(&self, name: &str) -> bool {
233        let data = self.inner.lock().unwrap();
234        match data.accounts.get(name) {
235            None => true,
236            Some(s) => !s.disabled && now_ms() >= s.cooldown_until_ms,
237        }
238    }
239
240    /// Returns a snapshot of all account states for the status endpoint.
241    pub fn account_states(&self) -> HashMap<String, AccountState> {
242        self.inner.lock().unwrap().accounts.clone()
243    }
244
245    // -----------------------------------------------------------------------
246    // Cooldown / disable
247    // -----------------------------------------------------------------------
248
249    pub fn set_cooldown(&self, name: &str, duration_ms: u64) {
250        {
251            let mut data = self.inner.lock().unwrap();
252            let acc = data.accounts.entry(name.to_owned()).or_default();
253            acc.cooldown_until_ms = now_ms() + duration_ms;
254        }
255        self.persist();
256    }
257
258    pub fn disable_account(&self, name: &str) {
259        {
260            let mut data = self.inner.lock().unwrap();
261            data.accounts.entry(name.to_owned()).or_default().disabled = true;
262        }
263        self.persist();
264    }
265
266    pub fn set_auth_failed(&self, name: &str) {
267        {
268            let mut data = self.inner.lock().unwrap();
269            let acc = data.accounts.entry(name.to_owned()).or_default();
270            acc.auth_failed = true;
271            acc.disabled = true; // also disable so it's skipped in routing
272        }
273        self.persist();
274    }
275
276    /// Clear auth_failed + disabled for an account after a successful token refresh.
277    pub fn clear_auth_failed(&self, name: &str) {
278        {
279            let mut data = self.inner.lock().unwrap();
280            if let Some(acc) = data.accounts.get_mut(name) {
281                acc.auth_failed = false;
282                acc.disabled = false;
283            }
284        }
285        self.persist();
286    }
287
288    /// Returns names of accounts (from the given list) that have auth_failed set.
289    pub fn auth_failed_accounts<'a>(&self, names: &[&'a str]) -> Vec<&'a str> {
290        let data = self.inner.lock().unwrap();
291        names.iter()
292            .filter(|&&n| data.accounts.get(n).map(|s| s.auth_failed).unwrap_or(false))
293            .copied()
294            .collect()
295    }
296
297    // -----------------------------------------------------------------------
298    // Stickiness (ephemeral — not persisted)
299    // -----------------------------------------------------------------------
300
301    pub fn get_sticky(&self, fingerprint: &str) -> Option<String> {
302        let data = self.inner.lock().unwrap();
303        let entry = data.sticky.get(fingerprint)?;
304        if now_ms() < entry.expires_at_ms {
305            Some(entry.account_name.clone())
306        } else {
307            None
308        }
309    }
310
311    pub fn set_sticky(&self, fingerprint: &str, account_name: &str, ttl_ms: u64) {
312        let mut data = self.inner.lock().unwrap();
313        data.sticky.insert(
314            fingerprint.to_owned(),
315            StickyEntry { account_name: account_name.to_owned(), expires_at_ms: now_ms() + ttl_ms },
316        );
317    }
318
319    // -----------------------------------------------------------------------
320    // Quota tracking
321    // -----------------------------------------------------------------------
322
323    /// Epoch-ms when the account's current window started.
324    /// Returns u64::MAX for accounts with no window (sorts last in earliest-expiry).
325    pub fn window_start_ms(&self, name: &str) -> u64 {
326        let data = self.inner.lock().unwrap();
327        data.quota.get(name).map(|q| q.window_start_ms).unwrap_or(u64::MAX)
328    }
329
330    /// Unix epoch seconds when this account's 5h window resets.
331    /// Returns None if unknown or already past.
332    pub fn reset_5h_secs(&self, name: &str) -> Option<u64> {
333        let now_secs = SystemTime::now()
334            .duration_since(UNIX_EPOCH)
335            .unwrap_or_default()
336            .as_secs();
337        let data = self.inner.lock().unwrap();
338        let reset = data.rate_limits.get(name)?.reset_5h?;
339        if reset > now_secs { Some(reset) } else { None }
340    }
341
342    /// 5-hour utilization 0.0–1.0 from the last upstream response headers.
343    /// Returns 0.0 for fresh accounts or when the reset window has already passed.
344    pub fn utilization_5h(&self, name: &str) -> f64 {
345        let now_secs = SystemTime::now()
346            .duration_since(UNIX_EPOCH)
347            .unwrap_or_default()
348            .as_secs();
349        let data = self.inner.lock().unwrap();
350        let Some(rl) = data.rate_limits.get(name) else { return 0.0 };
351        // If the reset time is in the past, the window has rolled over — treat as fresh
352        if rl.reset_5h.map(|t| t <= now_secs).unwrap_or(false) {
353            return 0.0;
354        }
355        rl.utilization_5h.unwrap_or(0.0)
356    }
357
358    /// Record token usage from a completed request.
359    /// Lazily resets the window if the 5-hour period has elapsed.
360    pub fn record_usage(&self, name: &str, input_tokens: u64, output_tokens: u64) {
361        if input_tokens == 0 && output_tokens == 0 {
362            return;
363        }
364        {
365            let mut data = self.inner.lock().unwrap();
366            let quota = data.quota.entry(name.to_owned()).or_default();
367            let now = now_ms();
368            if quota.window_start_ms == 0 || now >= quota.window_start_ms + WINDOW_MS {
369                quota.window_start_ms = now;
370                quota.input_tokens = 0;
371                quota.output_tokens = 0;
372            }
373            quota.input_tokens += input_tokens;
374            quota.output_tokens += output_tokens;
375        }
376        self.persist();
377    }
378
379    /// Snapshot of all quota windows for the status endpoint.
380    pub fn quota_snapshot(&self) -> HashMap<String, QuotaWindow> {
381        self.inner.lock().unwrap().quota.clone()
382    }
383
384    // -----------------------------------------------------------------------
385    // Rate limit header tracking
386    // -----------------------------------------------------------------------
387
388    pub fn update_rate_limits(&self, name: &str, info: RateLimitInfo) {
389        {
390            let mut data = self.inner.lock().unwrap();
391            data.rate_limits.insert(name.to_owned(), info);
392        }
393        self.persist();
394    }
395
396    pub fn rate_limit_snapshot(&self) -> HashMap<String, RateLimitInfo> {
397        self.inner.lock().unwrap().rate_limits.clone()
398    }
399
400    // -----------------------------------------------------------------------
401    // Account pinning
402    // -----------------------------------------------------------------------
403
404    pub fn get_pinned(&self) -> Option<String> {
405        self.inner.lock().unwrap().pinned_account.clone()
406    }
407
408    pub fn set_pinned(&self, name: Option<String>) {
409        {
410            let mut data = self.inner.lock().unwrap();
411            data.pinned_account = name;
412        }
413        self.persist();
414    }
415
416    // -----------------------------------------------------------------------
417    // Last-used tracking
418    // -----------------------------------------------------------------------
419
420    pub fn get_last_used(&self) -> Option<String> {
421        self.inner.lock().unwrap().last_used_account.clone()
422    }
423
424    pub fn set_last_used(&self, name: &str) {
425        {
426            let mut data = self.inner.lock().unwrap();
427            data.last_used_account = Some(name.to_owned());
428        }
429        self.persist();
430    }
431
432    // -----------------------------------------------------------------------
433    // Request log
434    // -----------------------------------------------------------------------
435
436    pub fn record_request(&self, log: RequestLog) {
437        let mut data = self.inner.lock().unwrap();
438        if data.recent_requests.len() >= MAX_RECENT {
439            data.recent_requests.pop_front();
440        }
441        data.recent_requests.push_back(log);
442    }
443
444    /// Most-recent first snapshot for the monitor / status endpoint.
445    pub fn recent_requests_snapshot(&self) -> Vec<RequestLog> {
446        let data = self.inner.lock().unwrap();
447        data.recent_requests.iter().rev().cloned().collect()
448    }
449
450    // -----------------------------------------------------------------------
451    // Global savings tracking
452    // -----------------------------------------------------------------------
453
454    /// Record tokens + API cost globally (across all accounts) for the savings display.
455    pub fn record_global(&self, model: &str, input_tokens: u64, output_tokens: u64) {
456        if input_tokens == 0 && output_tokens == 0 {
457            return;
458        }
459        let cost = crate::pricing::api_cost_usd(model, input_tokens, output_tokens);
460        let key = today_key();
461        {
462            let mut data = self.inner.lock().unwrap();
463            let bucket = data.global_daily.entry(key).or_default();
464            bucket.input_tokens  += input_tokens;
465            bucket.output_tokens += output_tokens;
466            bucket.api_cost_usd  += cost;
467            data.all_time_input      += input_tokens;
468            data.all_time_output     += output_tokens;
469            data.all_time_cost_usd   += cost;
470
471            // Prune buckets older than 90 days to prevent unbounded growth.
472            if data.global_daily.len() > 100 {
473                let cutoff = epoch_to_ymd(
474                    SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs()
475                        .saturating_sub(90 * 86400)
476                );
477                data.global_daily.retain(|k, _| k.as_str() >= cutoff.as_str());
478            }
479        }
480        self.persist();
481    }
482
483    /// Snapshot of daily and all-time savings for the status endpoint and CLI.
484    pub fn savings_snapshot(&self) -> SavingsSnapshot {
485        let now_secs = SystemTime::now()
486            .duration_since(UNIX_EPOCH)
487            .unwrap_or_default()
488            .as_secs();
489        let today   = today_key();
490        let week_ago = epoch_to_ymd(now_secs.saturating_sub(7 * 86400));
491
492        let data = self.inner.lock().unwrap();
493
494        let today_bucket = data.global_daily.get(&today).cloned().unwrap_or_default();
495
496        let (week_input, week_output, week_cost) = data.global_daily.iter()
497            .filter(|(k, _)| k.as_str() >= week_ago.as_str())
498            .fold((0u64, 0u64, 0f64), |(i, o, c), (_, b)| {
499                (i + b.input_tokens, o + b.output_tokens, c + b.api_cost_usd)
500            });
501
502        SavingsSnapshot {
503            today_input:      today_bucket.input_tokens,
504            today_output:     today_bucket.output_tokens,
505            today_cost_usd:   today_bucket.api_cost_usd,
506            week_input,
507            week_output,
508            week_cost_usd:    week_cost,
509            all_time_input:   data.all_time_input,
510            all_time_output:  data.all_time_output,
511            all_time_cost_usd: data.all_time_cost_usd,
512        }
513    }
514
515    // -----------------------------------------------------------------------
516    // Persistence
517    // -----------------------------------------------------------------------
518
519    fn persist(&self) {
520        // Signal the background writer thread; it will flush within ~100 ms.
521        self.pending.store(true, Ordering::Release);
522    }
523}
524
525#[cfg(test)]
526mod tests {
527    use super::*;
528
529    #[test]
530    fn test_sticky_ttl_expiry() {
531        let store = StateStore::new_empty();
532        let fp = "conv-fp-ttl";
533        store.set_sticky(fp, "account1", 1); // 1 ms TTL
534        assert_eq!(store.get_sticky(fp).as_deref(), Some("account1"),
535            "sticky should be available immediately");
536        std::thread::sleep(std::time::Duration::from_millis(10));
537        assert!(store.get_sticky(fp).is_none(),
538            "sticky must expire after TTL elapses");
539    }
540
541    #[test]
542    fn test_cooldown_blocks_availability() {
543        let store = StateStore::new_empty();
544        store.set_cooldown("acc", 5_000); // 5s cooldown
545        assert!(!store.is_available("acc"), "account should be unavailable during cooldown");
546    }
547
548    #[test]
549    fn test_disable_blocks_availability() {
550        let store = StateStore::new_empty();
551        store.disable_account("acc");
552        assert!(!store.is_available("acc"), "disabled account must be unavailable");
553    }
554
555    #[test]
556    fn test_quota_accumulates() {
557        let store = StateStore::new_empty();
558        store.record_usage("acc", 100, 50);
559        store.record_usage("acc", 200, 75);
560        let snap = store.quota_snapshot();
561        let q = &snap["acc"];
562        assert_eq!(q.input_tokens, 300);
563        assert_eq!(q.output_tokens, 125);
564        assert_eq!(q.total_tokens(), 425);
565    }
566
567    #[test]
568    fn test_pinned_account_round_trip() {
569        let store = StateStore::new_empty();
570        assert!(store.get_pinned().is_none());
571        store.set_pinned(Some("myaccount".into()));
572        assert_eq!(store.get_pinned().as_deref(), Some("myaccount"));
573        store.set_pinned(None);
574        assert!(store.get_pinned().is_none());
575    }
576
577    #[test]
578    fn test_last_used_round_trip() {
579        let store = StateStore::new_empty();
580        assert!(store.get_last_used().is_none());
581        store.set_last_used("acc1");
582        assert_eq!(store.get_last_used().as_deref(), Some("acc1"));
583    }
584
585    #[test]
586    fn test_recent_requests_ring_buffer() {
587        let store = StateStore::new_empty();
588        // Fill past MAX_RECENT
589        for i in 0..=(MAX_RECENT + 5) {
590            store.record_request(RequestLog {
591                ts_ms: i as u64,
592                account: "acc".into(),
593                model: "m".into(),
594                status: 200,
595                input_tokens: 1,
596                output_tokens: 1,
597                duration_ms: 1,
598            });
599        }
600        let snap = store.recent_requests_snapshot();
601        assert_eq!(snap.len(), MAX_RECENT, "buffer must not grow beyond MAX_RECENT");
602        // Most recent first
603        assert!(snap[0].ts_ms > snap[snap.len() - 1].ts_ms, "snapshot must be newest-first");
604    }
605
606    #[test]
607    fn test_state_persistence_roundtrip() {
608        // Use a unique temp path so parallel tests don't collide
609        let path = std::env::temp_dir().join(format!(
610            "shunt_test_state_{}.json",
611            std::time::SystemTime::now()
612                .duration_since(std::time::UNIX_EPOCH)
613                .unwrap()
614                .as_nanos()
615        ));
616
617        {
618            let store = StateStore::load(&path);
619            store.set_cooldown("acc", 999_999_000); // far-future cooldown
620            store.record_usage("acc", 111, 222);
621            store.set_last_used("acc");
622            // Wait for the background writer (polls every 100 ms) to flush
623            std::thread::sleep(std::time::Duration::from_millis(300));
624        }
625
626        // Load a fresh store from the persisted file
627        let store2 = StateStore::load(&path);
628        assert!(!store2.is_available("acc"), "cooldown must survive restart");
629        let snap = store2.quota_snapshot();
630        assert_eq!(snap["acc"].input_tokens, 111, "quota must survive restart");
631        assert_eq!(snap["acc"].output_tokens, 222);
632        assert_eq!(store2.get_last_used().as_deref(), Some("acc"),
633            "last_used_account must survive restart");
634
635        let _ = std::fs::remove_file(&path);
636    }
637}
638
639/// "YYYY-MM-DD" string for today in UTC.
640fn today_key() -> String {
641    let secs = SystemTime::now()
642        .duration_since(UNIX_EPOCH)
643        .unwrap_or_default()
644        .as_secs();
645    epoch_to_ymd(secs)
646}
647
648/// Convert Unix epoch seconds to "YYYY-MM-DD" (UTC) using Hinnant's civil_from_days.
649fn epoch_to_ymd(secs: u64) -> String {
650    let days = (secs / 86400) as i64;
651    let z    = days + 719_468;
652    let era  = if z >= 0 { z } else { z - 146_096 } / 146_097;
653    let doe  = z - era * 146_097;
654    let yoe  = (doe - doe / 1_460 + doe / 36_524 - doe / 146_096) / 365;
655    let y    = yoe + era * 400;
656    let doy  = doe - (365 * yoe + yoe / 4 - yoe / 100);
657    let mp   = (5 * doy + 2) / 153;
658    let d    = doy - (153 * mp + 2) / 5 + 1;
659    let m    = if mp < 10 { mp + 3 } else { mp - 9 };
660    let y    = if m <= 2 { y + 1 } else { y };
661    format!("{y:04}-{m:02}-{d:02}")
662}
663
664fn write_to_disk(data: &StateData, path: &Path) -> Result<()> {
665    if let Some(parent) = path.parent() {
666        std::fs::create_dir_all(parent)?;
667    }
668    let tmp = path.with_extension("tmp");
669    std::fs::write(&tmp, serde_json::to_string_pretty(data)?)?;
670    std::fs::rename(&tmp, path)?;
671    Ok(())
672}