Skip to main content

linesmith_core/data_context/
cascade.rs

1//! OAuth usage fallback cascade.
2//!
3//! Glues the slice modules (`cache`, `credentials`, `fetcher`, `jsonl`,
4//! `usage`) into the full cascade from `docs/specs/data-fetching.md`
5//! §OAuth fallback cascade. The orchestrator is a pure function keyed
6//! on injected dependencies so every branch is exercised without real
7//! I/O, network, or Keychain access.
8//!
9//! The lock-active short-circuit runs before the credentials read so
10//! a process observing another's backoff window can answer from disk
11//! (or the JSONL fallback) without paying the Keychain subprocess.
12//! `NoCredentials`-vs-`Timeout` masking is preserved because
13//! credentials are still resolved before any endpoint call that could
14//! time out.
15
16use std::sync::Arc;
17use std::time::Duration;
18
19use chrono::{DateTime, Utc};
20
21use super::cache::{CacheError, CacheStore, CachedUsage, Lock, LockStore};
22use super::credentials::Credentials;
23use super::errors::{CredentialError, JsonlError, UsageError};
24use super::fetcher::{self, UsageTransport};
25use super::jsonl::{self, JsonlAggregate};
26use super::usage::{FiveHourWindow, JsonlUsage, SevenDayWindow, UsageApiResponse, UsageData};
27
28/// Default cache freshness window per
29/// `docs/specs/data-fetching.md` §OAuth usage cache stack.
30pub const DEFAULT_CACHE_DURATION: Duration = Duration::from_secs(180);
31
32/// Shorter TTL applied to error responses and lock-backoff windows
33/// for non-429 failures, per `docs/specs/data-fetching.md` §OAuth
34/// usage cache stack ("Error cache uses a shorter TTL (30s default)").
35pub const DEFAULT_ERROR_TTL: Duration = Duration::from_secs(30);
36
37/// Fallback backoff when a `429` arrives without a parseable
38/// `Retry-After`. Matches `DEFAULT_RATE_LIMIT_BACKOFF` in `fetcher.rs`
39/// (300s per ADR-0011 §Cache stack).
40pub const DEFAULT_RATE_LIMIT_BACKOFF: Duration = Duration::from_secs(300);
41
42/// Default endpoint base URL per ADR-0011 §Endpoint contract.
43pub const DEFAULT_API_BASE_URL: &str = "https://api.anthropic.com";
44
45/// Tunables threaded into [`resolve_usage`]. Out-of-box defaults match
46/// `docs/specs/data-fetching.md` §OAuth usage cache stack.
47#[derive(Debug, Clone)]
48pub struct UsageCascadeConfig {
49    pub api_base_url: String,
50    pub timeout: Duration,
51    pub cache_duration: Duration,
52}
53
54impl Default for UsageCascadeConfig {
55    fn default() -> Self {
56        Self {
57            api_base_url: DEFAULT_API_BASE_URL.into(),
58            timeout: fetcher::DEFAULT_TIMEOUT,
59            cache_duration: DEFAULT_CACHE_DURATION,
60        }
61    }
62}
63
64/// Resolve OAuth usage data using the full fallback cascade.
65///
66/// `credentials` and `jsonl` are lazily evaluated: the cascade does
67/// NOT invoke either on a fresh-cache or stale-lock-serve path,
68/// preserving the "no Keychain subprocess on cache hits" guarantee.
69/// `cache` and `lock` being `None` is equivalent to pointing at paths
70/// that don't exist: reads degrade to "miss" and writes are skipped.
71/// Write failures fall into two classes. Real bugs (disk full,
72/// missing parent dir, EACCES) log via `lsm_error!` (bypasses the
73/// level gate). The documented Windows MoveFileEx race-loser case
74/// logs via `lsm_debug!` (suppressible) so multi-terminal Windows
75/// users don't get persistent stderr noise on healthy runs. Either
76/// way the cascade still returns fetched data.
77pub fn resolve_usage(
78    cache: Option<&CacheStore>,
79    lock: Option<&LockStore>,
80    transport: &dyn UsageTransport,
81    credentials: &dyn Fn() -> Arc<Result<Credentials, CredentialError>>,
82    jsonl: &dyn Fn() -> Result<JsonlAggregate, JsonlError>,
83    now: &dyn Fn() -> DateTime<Utc>,
84    config: &UsageCascadeConfig,
85) -> Result<UsageData, UsageError> {
86    let cache_entry = read_cache(cache);
87    let lock_entry = read_lock(lock);
88    let now_ts = now();
89
90    if let Some(entry) = &cache_entry {
91        if is_fresh(entry, now_ts, config.cache_duration) {
92            if let Some(data) = entry.data.clone() {
93                return Ok(cached_to_usage_data(data));
94            }
95        }
96    }
97
98    let lock_active = lock_entry
99        .as_ref()
100        .is_some_and(|l| l.blocked_until > now_ts.timestamp());
101    if lock_active {
102        // Serve whatever we have without touching credentials: another
103        // process is in backoff and we must honor it.
104        let lock_error = lock_entry.as_ref().and_then(|l| l.error.as_deref());
105        let lock_from_401 = lock_error == Some("Unauthorized");
106        if let Some(entry) = &cache_entry {
107            // A lock from a 401 means the cached `data` was fetched
108            // with a now-revoked token; skip the stale-serve so
109            // invocation B (after A's 401) doesn't return the pre-401
110            // payload through this branch. Other lock errors (429,
111            // timeout) still serve stale — those are transient and
112            // the cached data was legitimately valid when fetched.
113            if !lock_from_401 {
114                if let Some(data) = entry.data.clone() {
115                    return Ok(cached_to_usage_data(data));
116                }
117            }
118            if let Some(cached) = &entry.error {
119                return jsonl_or(jsonl, now_ts, usage_error_from_code(&cached.code));
120            }
121        }
122        // No cache content (or a 401-lock bypassed the data entry):
123        // try JSONL before surfacing the lock's own error hint.
124        // Crucially, we still do NOT reach the endpoint — that would
125        // defeat the cross-process spam guard on cold-cache starts.
126        let lock_err = lock_error
127            .map(usage_error_from_code)
128            .unwrap_or(UsageError::RateLimited { retry_after: None });
129        return jsonl_or(jsonl, now_ts, lock_err);
130    }
131
132    let creds_arc = credentials();
133    let creds = match &*creds_arc {
134        Ok(c) => c.clone(),
135        // INVARIANT: credential failures never write a failure-lock.
136        // They're not network transients — the same error will recur
137        // on every invocation until the user fixes their creds file /
138        // Keychain ACL, so a lock would just replay the error and
139        // delay recovery. If a future CredentialError variant becomes
140        // genuinely retry-stable, add a matching `write_failure_lock`
141        // here and update the test suite accordingly.
142        Err(CredentialError::NoCredentials) => {
143            return jsonl_or(jsonl, now_ts, UsageError::NoCredentials)
144        }
145        Err(other) => {
146            // Preserve the specific variant so `rate-limit-segments.md`
147            // §Error message table can render `[Keychain error]` /
148            // `[Credentials unreadable]` etc. The `Clone` impl on
149            // `CredentialError` is lossy for io/serde inner errors but
150            // keeps the variant tag (all segments key off) intact.
151            return jsonl_or(jsonl, now_ts, UsageError::Credentials(other.clone()));
152        }
153    };
154
155    match fetcher::fetch_usage(transport, &config.api_base_url, &creds, config.timeout) {
156        Ok(response) => {
157            write_cache(cache, CachedUsage::with_data(response.clone()));
158            write_lock(
159                lock,
160                Lock {
161                    blocked_until: add_secs(now_ts.timestamp(), config.cache_duration),
162                    error: None,
163                },
164            );
165            Ok(UsageData::Endpoint(response.into_endpoint_usage()))
166        }
167        // 401 is the sole failure-path exception to "serve stale on
168        // error": the cached payload is tied to a no-longer-valid
169        // token, so reusing it would mislead the user. JSONL, however,
170        // is independent of token validity — fall through to it before
171        // surfacing the error so a user with a revoked token still
172        // sees their local transcript totals. The next invocation's
173        // lock-active branch refuses the stale data via the
174        // `lock_from_401` guard; we deliberately do NOT write an
175        // "Unauthorized" error into the cache here, because the
176        // cached error would then outlive the lock and mask a
177        // subsequent unrelated lock (e.g. a 429 after token refresh).
178        Err(UsageError::Unauthorized) => {
179            write_failure_lock(lock, now_ts, &UsageError::Unauthorized);
180            jsonl_or(jsonl, now_ts, UsageError::Unauthorized)
181        }
182        Err(err) => {
183            // Persist the backoff so concurrent processes honor it —
184            // without this, every statusline invocation during a 429
185            // or outage re-hits the endpoint.
186            write_failure_lock(lock, now_ts, &err);
187            if let Some(entry) = &cache_entry {
188                if let Some(data) = entry.data.clone() {
189                    return Ok(cached_to_usage_data(data));
190                }
191            }
192            jsonl_or(jsonl, now_ts, err)
193        }
194    }
195}
196
197/// Build a [`UsageData::Jsonl`] from the aggregator if it produced any
198/// data; otherwise surface `fallback` unchanged. Callers pass the
199/// endpoint-path error they would have returned, so a JSONL miss
200/// preserves the original failure reason the user sees. `now` is
201/// threaded through so the mapping can clamp future-dated block
202/// starts (clock skew) to a sane bound — see [`build_jsonl_usage`].
203fn jsonl_or(
204    jsonl: &dyn Fn() -> Result<JsonlAggregate, JsonlError>,
205    now: DateTime<Utc>,
206    fallback: UsageError,
207) -> Result<UsageData, UsageError> {
208    match build_jsonl_usage(jsonl(), now) {
209        Some(data) => Ok(UsageData::Jsonl(data)),
210        None => Err(fallback),
211    }
212}
213
214fn build_jsonl_usage(
215    result: Result<JsonlAggregate, JsonlError>,
216    now: DateTime<Utc>,
217) -> Option<JsonlUsage> {
218    let agg = match result {
219        Ok(agg) => agg,
220        Err(JsonlError::NoEntries | JsonlError::DirectoryMissing) => return None,
221        Err(other) => {
222            // `DataContext::resolve_usage_default` already collapses
223            // IoError / ParseError to NoEntries with a warn trace, so
224            // this arm is only reachable from direct test callers. Warn
225            // anyway so any future cascade caller that threads the real
226            // aggregator error through leaves a stderr breadcrumb.
227            crate::lsm_warn!(
228                "cascade: JSONL fallback unavailable ({other}); surfacing endpoint error"
229            );
230            return None;
231        }
232    };
233    // Clamp `block.start` to `floor_to_hour(now)` so a future-dated
234    // entry (clock skew) can't produce an `ends_at` further out than
235    // the current window's nominal close. The aggregator deliberately
236    // keeps token counts intact under mild skew so users don't lose
237    // their current session; this clamp normalizes the reset-timer
238    // surface without corrupting those totals.
239    let now_floor = jsonl::floor_to_hour(now);
240    let five_hour = agg.five_hour.as_ref().map(|block| {
241        let start = block.start.min(now_floor);
242        FiveHourWindow::new(block.token_counts, start)
243    });
244    let seven_day = SevenDayWindow::new(agg.seven_day.token_counts);
245    // Reaching here implies the aggregator returned `Ok(...)`; any
246    // aggregator failure (including the mod.rs-collapsed variants)
247    // kept us out of this branch. Token counts may still be zero —
248    // a parseable record can lie outside the 7d window or outside
249    // any active 5h block — so `five_hour: None` and/or a
250    // zero-valued `seven_day` are valid post-conditions here.
251    Some(JsonlUsage::new(five_hour, seven_day))
252}
253
254fn read_cache(cache: Option<&CacheStore>) -> Option<CachedUsage> {
255    cache.and_then(|c| match c.read() {
256        Ok(hit) => hit,
257        Err(e) => {
258            log_cache_read_failure("cache", &e);
259            None
260        }
261    })
262}
263
264fn read_lock(lock: Option<&LockStore>) -> Option<Lock> {
265    lock.and_then(|l| match l.read() {
266        Ok(hit) => hit,
267        Err(e) => {
268            log_cache_read_failure("lock", &e);
269            None
270        }
271    })
272}
273
274/// A cache/lock read error always collapses to "miss" so the cascade
275/// keeps serving the user, but not every error is equivalent. Ephemeral
276/// kinds (`NotFound`, truncated-read) are normal cold-start / partial-
277/// write symptoms and stay at debug. Persistent kinds (permission,
278/// ENOSPC, corrupt payload) are config defects that won't self-heal and
279/// silently force every invocation back onto the endpoint — escalate
280/// those so a user chasing "why does my statusline hammer the API"
281/// finds the cause without `LINESMITH_LOG=debug`.
282fn log_cache_read_failure(kind: &str, err: &super::cache::CacheError) {
283    use std::io::ErrorKind;
284    let io_kind = match err {
285        super::cache::CacheError::Io { cause, .. }
286        | super::cache::CacheError::Persist { cause, .. } => cause.kind(),
287    };
288    match io_kind {
289        ErrorKind::NotFound | ErrorKind::UnexpectedEof => {
290            crate::lsm_debug!("cascade: {kind} read failed: {err}; treating as miss");
291        }
292        _ => {
293            crate::lsm_warn!("cascade: {kind} read failed: {err}");
294        }
295    }
296}
297
298fn write_cache(cache: Option<&CacheStore>, entry: CachedUsage) {
299    if let Some(c) = cache {
300        if let Err(e) = c.write(&entry) {
301            log_persist_error("cache", &e);
302        }
303    }
304}
305
306fn write_lock(lock: Option<&LockStore>, entry: Lock) {
307    if let Some(l) = lock {
308        if let Err(e) = l.write(&entry) {
309            log_persist_error("lock", &e);
310        }
311    }
312}
313
314/// Routing tag for [`classify_persist_error`]. `Error` bypasses the
315/// log level gate (real bugs surface even with `LINESMITH_LOG=off`);
316/// `Debug` is gated (Windows MoveFileEx race losers are expected and
317/// shouldn't pollute stderr on healthy multi-terminal runs).
318#[derive(Debug, Clone, Copy, PartialEq, Eq)]
319enum PersistLogClass {
320    Error,
321    Debug,
322}
323
324/// Pure classification of a persistence failure. Returns the routing
325/// class plus the formatted message. Split from [`log_persist_error`]
326/// so tests can lock in the contract (route + message format) without
327/// touching global log state or capturing process stderr.
328fn classify_persist_error(kind: &str, err: &CacheError) -> (PersistLogClass, String) {
329    if is_transient_persist_race(err) {
330        (
331            PersistLogClass::Debug,
332            format!("cascade: {kind} write race-loser (Windows MoveFileEx): {err}"),
333        )
334    } else {
335        (
336            PersistLogClass::Error,
337            format!("cascade: {kind} write failed: {err}"),
338        )
339    }
340}
341
342/// Dispatch a classified persist error to the right severity sink.
343/// Generic over the emit closures so production callers pass the
344/// `lsm_debug!`/`lsm_error!` macros while tests pass capturing
345/// closures — the match arms themselves are shared, so a future
346/// arm-swap regression fails loud in the routing tests below.
347fn route_persist_error<D, E>(class: PersistLogClass, msg: &str, on_debug: D, on_error: E)
348where
349    D: FnOnce(&str),
350    E: FnOnce(&str),
351{
352    match class {
353        PersistLogClass::Debug => on_debug(msg),
354        PersistLogClass::Error => on_error(msg),
355    }
356}
357
358/// Real bugs (disk full, missing parent dir, EACCES) route through
359/// `lsm_error!`, which bypasses the level gate so a user with
360/// `LINESMITH_LOG=off` still sees the "statusline hammers the API"
361/// class of defect. The documented Windows MoveFileEx race-loser case
362/// (concurrent processes both calling `atomic_write_json`, the loser
363/// gets `PermissionDenied`) is expected per the cache.rs contract;
364/// route it through `lsm_debug!` so multi-terminal Windows users
365/// don't get persistent stderr noise on otherwise-healthy runs.
366fn log_persist_error(kind: &str, err: &CacheError) {
367    let (class, msg) = classify_persist_error(kind, err);
368    route_persist_error(
369        class,
370        &msg,
371        |s| crate::lsm_debug!("{s}"),
372        |s| crate::lsm_error!("{s}"),
373    );
374}
375
376#[cfg(windows)]
377fn is_transient_persist_race(err: &CacheError) -> bool {
378    matches!(
379        err,
380        CacheError::Persist { cause, .. }
381            if cause.kind() == std::io::ErrorKind::PermissionDenied
382    )
383}
384
385#[cfg(not(windows))]
386fn is_transient_persist_race(_err: &CacheError) -> bool {
387    // Unix `rename(2)` doesn't expose this race; PermissionDenied on
388    // Unix is always a real perm bug and stays loud.
389    false
390}
391
392fn write_failure_lock(lock: Option<&LockStore>, now_ts: DateTime<Utc>, err: &UsageError) {
393    let backoff = backoff_for_error(err);
394    write_lock(
395        lock,
396        Lock {
397            blocked_until: add_secs(now_ts.timestamp(), backoff),
398            error: Some(err.code().to_string()),
399        },
400    );
401}
402
403fn backoff_for_error(err: &UsageError) -> Duration {
404    match err {
405        UsageError::RateLimited {
406            retry_after: Some(d),
407        } => *d,
408        UsageError::RateLimited { retry_after: None } => DEFAULT_RATE_LIMIT_BACKOFF,
409        _ => DEFAULT_ERROR_TTL,
410    }
411}
412
413fn add_secs(base_ts: i64, secs: Duration) -> i64 {
414    // `LockStore::read` caps the read side of this (MAX_LOCK_DURATION
415    // ceiling in cache.rs), so saturating to i64::MAX here is safe —
416    // any pathological config gets sanitized on the next read.
417    let offset = i64::try_from(secs.as_secs()).unwrap_or(i64::MAX);
418    base_ts.saturating_add(offset)
419}
420
421/// Reconstruct a `UsageError` from a cached `.code()` tag. Used when
422/// an active lock or error-cached entry tells us "another process
423/// just saw X" and we want to honor that semantic downstream without
424/// having the full error payload. Unknown codes fall back to
425/// `NetworkError` — the most generic transient failure.
426///
427/// INVARIANT: credential-layer codes (`SubprocessFailed`, `MissingField`,
428/// `EmptyToken`, `IoError`) and JSONL-layer codes (`NoEntries`,
429/// `DirectoryMissing`) are intentionally NOT matched here and collapse
430/// to `NetworkError`. They're unreachable today because the credential
431/// arm at `resolve_usage` returns before any `write_failure_lock` call
432/// (see the matching "credential failures never write a failure-lock"
433/// invariant in `resolve_usage`), and JSONL errors never enter the
434/// cache's error-code path. If a future change persists one of those
435/// codes to the cache or lock, extend this match — the lsm-50fs bead
436/// tracks the structural fix.
437fn usage_error_from_code(code: &str) -> UsageError {
438    match code {
439        "NoCredentials" => UsageError::NoCredentials,
440        "Timeout" => UsageError::Timeout,
441        "RateLimited" => UsageError::RateLimited { retry_after: None },
442        "Unauthorized" => UsageError::Unauthorized,
443        "ParseError" => UsageError::ParseError,
444        _ => UsageError::NetworkError,
445    }
446}
447
448fn is_fresh(entry: &CachedUsage, now: DateTime<Utc>, ttl: Duration) -> bool {
449    // `cached_at > now` (clock skew) is filtered out by
450    // `CacheStore::read`, so a normal `age < ttl` check is enough.
451    match now.signed_duration_since(entry.cached_at).to_std() {
452        Ok(elapsed) => elapsed < ttl,
453        Err(_) => false,
454    }
455}
456
457fn cached_to_usage_data(data: super::cache::CachedData) -> UsageData {
458    let response: UsageApiResponse = data.into();
459    UsageData::Endpoint(response.into_endpoint_usage())
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465    use std::cell::{Cell, RefCell};
466    use std::io;
467
468    use chrono::Duration as ChronoDuration;
469    use tempfile::TempDir;
470
471    use crate::data_context::cache::{CacheStore, CachedUsage, Lock, LockStore};
472    use crate::data_context::credentials::Credentials;
473    use crate::data_context::errors::CredentialError;
474    use crate::data_context::fetcher::{HttpResponse, UsageTransport};
475    use crate::data_context::jsonl::{
476        FiveHourBlock, JsonlAggregate, SevenDayWindow as JsonlSevenDayWindow, TokenCounts,
477    };
478
479    struct FakeTransport {
480        response: RefCell<io::Result<HttpResponse>>,
481        calls: Cell<u32>,
482    }
483
484    impl FakeTransport {
485        fn ok(status: u16, body: &str, retry_after: Option<&str>) -> Self {
486            Self {
487                response: RefCell::new(Ok(HttpResponse {
488                    status,
489                    body: body.as_bytes().to_vec(),
490                    retry_after: retry_after.map(String::from),
491                })),
492                calls: Cell::new(0),
493            }
494        }
495
496        fn err(kind: io::ErrorKind) -> Self {
497            Self {
498                response: RefCell::new(Err(io::Error::new(kind, "fake"))),
499                calls: Cell::new(0),
500            }
501        }
502    }
503
504    impl UsageTransport for FakeTransport {
505        fn get(&self, _url: &str, _token: &str, _timeout: Duration) -> io::Result<HttpResponse> {
506            self.calls.set(self.calls.get() + 1);
507            match &*self.response.borrow() {
508                Ok(r) => Ok(HttpResponse {
509                    status: r.status,
510                    body: r.body.clone(),
511                    retry_after: r.retry_after.clone(),
512                }),
513                Err(e) => Err(io::Error::new(e.kind(), e.to_string())),
514            }
515        }
516    }
517
518    const SAMPLE_BODY: &str = r#"{
519        "five_hour":  { "utilization": 42.0, "resets_at": "2026-04-19T05:00:00Z" },
520        "seven_day":  { "utilization": 33.0, "resets_at": "2026-04-23T19:00:00Z" }
521    }"#;
522
523    fn sample_response() -> UsageApiResponse {
524        serde_json::from_str(SAMPLE_BODY).unwrap()
525    }
526
527    fn config() -> UsageCascadeConfig {
528        UsageCascadeConfig::default()
529    }
530
531    fn now_fn() -> impl Fn() -> DateTime<Utc> {
532        let ts = Utc::now();
533        move || ts
534    }
535
536    fn ok_creds() -> Arc<Result<Credentials, CredentialError>> {
537        Arc::new(Ok(Credentials::for_testing("test-token")))
538    }
539
540    fn no_creds() -> Arc<Result<Credentials, CredentialError>> {
541        Arc::new(Err(CredentialError::NoCredentials))
542    }
543
544    fn jsonl_empty() -> Result<JsonlAggregate, JsonlError> {
545        Err(JsonlError::NoEntries)
546    }
547
548    /// 7d-only JSONL aggregate. Exercises the case where the cascade
549    /// falls back to JSONL and the 7d window is populated but no 5h
550    /// block is active (e.g. the user hasn't coded in the last 5h).
551    fn jsonl_ok() -> Result<JsonlAggregate, JsonlError> {
552        Ok(JsonlAggregate {
553            five_hour: None,
554            seven_day: JsonlSevenDayWindow {
555                window_start: Utc::now() - ChronoDuration::days(7),
556                token_counts: TokenCounts::from_parts(1_000_000, 200_000, 0, 0),
557            },
558            source_paths: Vec::new(),
559        })
560    }
561
562    /// JSONL aggregate with an active 5h block. Start is `now - 1h` so
563    /// the block's `end()` (= start + 5h) lies ~4h in the future, a
564    /// realistic reset-timer window for the 5h-reset segment tests.
565    fn jsonl_ok_with_active_block() -> Result<JsonlAggregate, JsonlError> {
566        let now = Utc::now();
567        let start = now - ChronoDuration::hours(1);
568        Ok(JsonlAggregate {
569            five_hour: Some(FiveHourBlock {
570                start,
571                actual_last_activity: now,
572                token_counts: TokenCounts::from_parts(400_000, 20_000, 0, 0),
573                models: vec!["claude-opus-4-7".into()],
574                usage_limit_reset: None,
575            }),
576            seven_day: JsonlSevenDayWindow {
577                window_start: now - ChronoDuration::days(7),
578                token_counts: TokenCounts::from_parts(1_000_000, 200_000, 0, 0),
579            },
580            source_paths: Vec::new(),
581        })
582    }
583
584    fn stale_cache_entry(age: ChronoDuration) -> CachedUsage {
585        let mut entry = CachedUsage::with_data(sample_response());
586        entry.cached_at = Utc::now() - age;
587        entry
588    }
589
590    /// Assert that `data` is the `Jsonl` variant built from the
591    /// [`jsonl_ok`] fixture (no active 5h block, 7d window
592    /// populated with `1_000_000 + 200_000` tokens).
593    ///
594    /// Fallthrough tests use this instead of `matches!(data, UsageData::Jsonl(_))`
595    /// so that a cascade bug serving `SevenDayWindow::default()` or
596    /// dropping the window entirely gets caught.
597    fn assert_jsonl_matches_ok_fixture(data: &UsageData) {
598        let UsageData::Jsonl(j) = data else {
599            panic!("expected UsageData::Jsonl, got {data:?}");
600        };
601        assert!(
602            j.five_hour.is_none(),
603            "jsonl_ok fixture has no active 5h block",
604        );
605        assert_eq!(
606            j.seven_day.tokens.total(),
607            1_200_000,
608            "7d total must match jsonl_ok fixture (1M input + 200k output)",
609        );
610    }
611
612    #[test]
613    fn fresh_disk_cache_short_circuits_without_reading_credentials() {
614        let tmp = TempDir::new().unwrap();
615        let cache = CacheStore::new(tmp.path().to_path_buf());
616        cache
617            .write(&CachedUsage::with_data(sample_response()))
618            .unwrap();
619
620        let cred_calls = Cell::new(0u32);
621        let jsonl_calls = Cell::new(0u32);
622        let credentials = || {
623            cred_calls.set(cred_calls.get() + 1);
624            ok_creds()
625        };
626        let jsonl = || {
627            jsonl_calls.set(jsonl_calls.get() + 1);
628            jsonl_empty()
629        };
630        let transport = FakeTransport::ok(200, "", None);
631
632        let data = resolve_usage(
633            Some(&cache),
634            None,
635            &transport,
636            &credentials,
637            &jsonl,
638            &now_fn(),
639            &config(),
640        )
641        .expect("ok");
642
643        let UsageData::Endpoint(endpoint) = &data else {
644            panic!("expected endpoint variant, got {data:?}");
645        };
646        assert_eq!(endpoint.five_hour.unwrap().utilization.value(), 42.0);
647        assert_eq!(cred_calls.get(), 0, "credentials must not be called");
648        assert_eq!(jsonl_calls.get(), 0, "jsonl must not be called");
649        assert_eq!(transport.calls.get(), 0, "no HTTP on cache hit");
650    }
651
652    #[test]
653    fn stale_cache_without_lock_triggers_fetch_and_overwrites() {
654        let tmp = TempDir::new().unwrap();
655        let cache = CacheStore::new(tmp.path().to_path_buf());
656        cache
657            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
658            .unwrap();
659        let lock = LockStore::new(tmp.path().to_path_buf());
660
661        let transport = FakeTransport::ok(200, SAMPLE_BODY, None);
662        let data = resolve_usage(
663            Some(&cache),
664            Some(&lock),
665            &transport,
666            &ok_creds,
667            &jsonl_empty,
668            &now_fn(),
669            &config(),
670        )
671        .expect("ok");
672
673        assert!(matches!(data, UsageData::Endpoint(_)));
674        assert_eq!(transport.calls.get(), 1);
675        let refreshed = cache.read().unwrap().unwrap();
676        let age = Utc::now().signed_duration_since(refreshed.cached_at);
677        assert!(age.num_seconds() < 5, "cache must be re-stamped on success");
678    }
679
680    #[test]
681    fn stale_cache_with_active_lock_serves_stale_without_credentials() {
682        let tmp = TempDir::new().unwrap();
683        let cache = CacheStore::new(tmp.path().to_path_buf());
684        cache
685            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
686            .unwrap();
687        let lock = LockStore::new(tmp.path().to_path_buf());
688        lock.write(&Lock {
689            blocked_until: Utc::now().timestamp() + 60,
690            error: Some("rate-limited".into()),
691        })
692        .unwrap();
693
694        let cred_calls = Cell::new(0u32);
695        let credentials = || {
696            cred_calls.set(cred_calls.get() + 1);
697            ok_creds()
698        };
699        let transport = FakeTransport::ok(200, "", None);
700
701        let data = resolve_usage(
702            Some(&cache),
703            Some(&lock),
704            &transport,
705            &credentials,
706            &jsonl_empty,
707            &now_fn(),
708            &config(),
709        )
710        .expect("ok");
711
712        assert!(matches!(data, UsageData::Endpoint(_)));
713        assert_eq!(
714            cred_calls.get(),
715            0,
716            "active lock must short-circuit before credentials read",
717        );
718        assert_eq!(transport.calls.get(), 0, "no HTTP when lock + stale cache");
719    }
720
721    #[test]
722    fn no_credentials_surfaces_nocredentials_not_timeout() {
723        let transport = FakeTransport::err(io::ErrorKind::TimedOut);
724        let err = resolve_usage(
725            None,
726            None,
727            &transport,
728            &no_creds,
729            &jsonl_empty,
730            &now_fn(),
731            &config(),
732        )
733        .unwrap_err();
734        assert!(matches!(err, UsageError::NoCredentials));
735        assert_eq!(transport.calls.get(), 0, "no HTTP when credentials missing",);
736    }
737
738    #[test]
739    fn no_credentials_falls_through_to_jsonl_when_available() {
740        // ADR-0013: JSONL aggregation is the terminal fallback. A
741        // user with no OAuth credentials who still has Claude Code
742        // transcript history should see their local token totals
743        // rather than `[No credentials]`.
744        let data = resolve_usage(
745            None,
746            None,
747            &FakeTransport::ok(200, "", None),
748            &no_creds,
749            &jsonl_ok,
750            &now_fn(),
751            &config(),
752        )
753        .expect("ok");
754        assert_jsonl_matches_ok_fixture(&data);
755    }
756
757    #[test]
758    fn no_credentials_with_empty_jsonl_still_surfaces_nocredentials() {
759        // JSONL unavailable → original endpoint-path error wins so
760        // users on a clean machine see the actionable `[No credentials]`
761        // rather than a silent hide.
762        let err = resolve_usage(
763            None,
764            None,
765            &FakeTransport::ok(200, "", None),
766            &no_creds,
767            &jsonl_empty,
768            &now_fn(),
769            &config(),
770        )
771        .unwrap_err();
772        assert!(matches!(err, UsageError::NoCredentials));
773    }
774
775    #[test]
776    fn endpoint_200_writes_cache_and_lock() {
777        let tmp = TempDir::new().unwrap();
778        let cache = CacheStore::new(tmp.path().to_path_buf());
779        let lock = LockStore::new(tmp.path().to_path_buf());
780        let transport = FakeTransport::ok(200, SAMPLE_BODY, None);
781
782        let data = resolve_usage(
783            Some(&cache),
784            Some(&lock),
785            &transport,
786            &ok_creds,
787            &jsonl_empty,
788            &now_fn(),
789            &config(),
790        )
791        .expect("ok");
792
793        assert!(matches!(data, UsageData::Endpoint(_)));
794        assert!(cache.read().unwrap().is_some(), "cache must be populated");
795        let persisted_lock = lock.read().unwrap().unwrap();
796        let expected_blocked_until =
797            Utc::now().timestamp() + config().cache_duration.as_secs() as i64;
798        assert!(
799            (persisted_lock.blocked_until - expected_blocked_until).abs() < 5,
800            "lock blocked_until = {}, expected near {}",
801            persisted_lock.blocked_until,
802            expected_blocked_until,
803        );
804    }
805
806    #[test]
807    fn endpoint_401_falls_through_to_jsonl_when_available() {
808        // ADR-0013: a revoked/expired token invalidates the endpoint
809        // response but not the local transcript. JSONL has to kick in
810        // on 401 too, otherwise a user who rotates their token but
811        // hasn't re-auth'd sees `[Unauthorized]` instead of real data
812        // they could otherwise surface locally.
813        let transport = FakeTransport::ok(401, "", None);
814        let data = resolve_usage(
815            None,
816            None,
817            &transport,
818            &ok_creds,
819            &jsonl_ok,
820            &now_fn(),
821            &config(),
822        )
823        .expect("ok");
824        assert_jsonl_matches_ok_fixture(&data);
825        // Endpoint is still hit first — JSONL is a fallback, not a
826        // short-circuit. Regression guard against a future refactor
827        // that inverts the ordering.
828        assert_eq!(transport.calls.get(), 1);
829    }
830
831    #[test]
832    fn jsonl_fallback_clamps_future_dated_block_start_to_now() {
833        // Clock-skew regression (Codex P2, 2026-04-22): a future-dated
834        // entry makes `block.start = floor_to_hour(future_timestamp)`,
835        // which lies beyond `now`. Without clamping, `FiveHourWindow`
836        // would derive an `ends_at` further in the future than 5h,
837        // inflating the reset countdown and distorting `rate_limit_5h`
838        // tokens. The aggregator keeps the skewed block so mild-skew
839        // users don't lose their session; the cascade clamps
840        // `block.start` to `floor_to_hour(now)` before surfacing.
841        let now = Utc::now();
842        // Build a skewed block at +2h so `block.start` starts in the
843        // future and `ends_at = start + 5h` would land ~7h out.
844        let skewed_start = now + ChronoDuration::hours(2);
845        let skewed: Result<JsonlAggregate, JsonlError> = Ok(JsonlAggregate {
846            five_hour: Some(FiveHourBlock {
847                start: skewed_start,
848                actual_last_activity: now + ChronoDuration::minutes(30),
849                token_counts: TokenCounts::from_parts(100, 0, 0, 0),
850                models: vec!["claude-opus-4-7".into()],
851                usage_limit_reset: None,
852            }),
853            seven_day: JsonlSevenDayWindow {
854                window_start: now - ChronoDuration::days(7),
855                token_counts: TokenCounts::from_parts(100, 0, 0, 0),
856            },
857            source_paths: Vec::new(),
858        });
859        let skewed_closure = || match &skewed {
860            Ok(agg) => Ok(agg.clone()),
861            Err(_) => Err(JsonlError::NoEntries),
862        };
863        let now_clock = move || now;
864        let data = resolve_usage(
865            None,
866            None,
867            &FakeTransport::err(io::ErrorKind::TimedOut),
868            &ok_creds,
869            &skewed_closure,
870            &now_clock,
871            &config(),
872        )
873        .expect("ok");
874        let UsageData::Jsonl(j) = &data else {
875            panic!("expected jsonl variant, got {data:?}");
876        };
877        let window = j
878            .five_hour
879            .as_ref()
880            .expect("active block should populate five_hour window");
881        // Clamped: start cannot exceed floor_to_hour(now), so
882        // ends_at <= floor_to_hour(now) + 5h <= now + 5h.
883        assert!(
884            window.ends_at() <= now + ChronoDuration::hours(5),
885            "ends_at={:?} must be clamped at/before now + 5h ({:?})",
886            window.ends_at(),
887            now + ChronoDuration::hours(5),
888        );
889    }
890
891    #[test]
892    fn jsonl_fallback_surfaces_five_hour_window_with_ends_at() {
893        // End-to-end: under endpoint failure + active JSONL block, the
894        // cascade wraps `block.end()` as `FiveHourWindow.ends_at` so
895        // `rate_limit_5h_reset` can derive its countdown without a
896        // tier-aware `resets_at`.
897        let data = resolve_usage(
898            None,
899            None,
900            &FakeTransport::err(io::ErrorKind::TimedOut),
901            &ok_creds,
902            &jsonl_ok_with_active_block,
903            &now_fn(),
904            &config(),
905        )
906        .expect("ok");
907        let UsageData::Jsonl(j) = &data else {
908            panic!("expected jsonl variant, got {data:?}");
909        };
910        let window = j
911            .five_hour
912            .as_ref()
913            .expect("active block should populate five_hour window");
914        let expected_ends_at = Utc::now() + ChronoDuration::hours(4);
915        let drift = (window.ends_at() - expected_ends_at).num_seconds().abs();
916        assert!(
917            drift < 5,
918            "ends_at={:?} drifted {drift}s from expected",
919            window.ends_at(),
920        );
921        // Total from the active-block fixture (400_000 + 20_000 input+output).
922        assert_eq!(window.tokens.total(), 420_000);
923    }
924
925    #[test]
926    fn endpoint_401_with_empty_jsonl_surfaces_unauthorized() {
927        let err = resolve_usage(
928            None,
929            None,
930            &FakeTransport::ok(401, "", None),
931            &ok_creds,
932            &jsonl_empty,
933            &now_fn(),
934            &config(),
935        )
936        .unwrap_err();
937        assert!(matches!(err, UsageError::Unauthorized));
938    }
939
940    #[test]
941    fn endpoint_401_does_not_serve_stale_cache() {
942        let tmp = TempDir::new().unwrap();
943        let cache = CacheStore::new(tmp.path().to_path_buf());
944        cache
945            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
946            .unwrap();
947        let err = resolve_usage(
948            Some(&cache),
949            None,
950            &FakeTransport::ok(401, "", None),
951            &ok_creds,
952            &jsonl_empty,
953            &now_fn(),
954            &config(),
955        )
956        .unwrap_err();
957        assert!(matches!(err, UsageError::Unauthorized));
958    }
959
960    #[test]
961    fn invocation_after_401_does_not_serve_stale_cache_via_lock_active() {
962        // A→B sequence: invocation A gets a 401 that wrote a failure-
963        // lock with error="Unauthorized". Invocation B within the
964        // lock TTL must NOT serve the pre-401 cached `data: Some(...)`
965        // through the lock-active branch — the `lock_from_401` guard
966        // catches it. Same "401 does not serve stale" contract as
967        // endpoint_401_does_not_serve_stale_cache, but via the A→B
968        // code path that test doesn't exercise.
969        let tmp = TempDir::new().unwrap();
970        let cache = CacheStore::new(tmp.path().to_path_buf());
971        cache
972            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
973            .unwrap();
974        let lock = LockStore::new(tmp.path().to_path_buf());
975
976        // Invocation A: 401.
977        let transport_a = FakeTransport::ok(401, "", None);
978        let err_a = resolve_usage(
979            Some(&cache),
980            Some(&lock),
981            &transport_a,
982            &ok_creds,
983            &jsonl_empty,
984            &now_fn(),
985            &config(),
986        )
987        .unwrap_err();
988        assert!(matches!(err_a, UsageError::Unauthorized));
989
990        // Invocation B: lock active, cache still holds pre-401 data.
991        // Transport returns fresh 200 data if hit; we assert it isn't.
992        let transport_b = FakeTransport::ok(200, SAMPLE_BODY, None);
993        let err_b = resolve_usage(
994            Some(&cache),
995            Some(&lock),
996            &transport_b,
997            &ok_creds,
998            &jsonl_empty,
999            &now_fn(),
1000            &config(),
1001        )
1002        .unwrap_err();
1003        assert!(matches!(err_b, UsageError::Unauthorized));
1004        assert_eq!(
1005            transport_b.calls.get(),
1006            0,
1007            "active lock must still gate the endpoint on invocation B",
1008        );
1009    }
1010
1011    #[test]
1012    fn invocation_after_401_falls_through_to_jsonl_when_available() {
1013        // ADR-0013 parity for the A→B sequence: when invocation A
1014        // 401'd and invocation B has JSONL data, B gets local
1015        // transcript totals instead of either the stale cache or the
1016        // Unauthorized error.
1017        let tmp = TempDir::new().unwrap();
1018        let cache = CacheStore::new(tmp.path().to_path_buf());
1019        cache
1020            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
1021            .unwrap();
1022        let lock = LockStore::new(tmp.path().to_path_buf());
1023
1024        let data_a = resolve_usage(
1025            Some(&cache),
1026            Some(&lock),
1027            &FakeTransport::ok(401, "", None),
1028            &ok_creds,
1029            &jsonl_ok,
1030            &now_fn(),
1031            &config(),
1032        )
1033        .expect("A falls through to JSONL with jsonl_ok");
1034        assert_jsonl_matches_ok_fixture(&data_a);
1035
1036        let transport_b = FakeTransport::ok(200, SAMPLE_BODY, None);
1037        let data_b = resolve_usage(
1038            Some(&cache),
1039            Some(&lock),
1040            &transport_b,
1041            &ok_creds,
1042            &jsonl_ok,
1043            &now_fn(),
1044            &config(),
1045        )
1046        .expect("B returns JSONL on lock-active path");
1047        assert_jsonl_matches_ok_fixture(&data_b);
1048        assert_eq!(transport_b.calls.get(), 0);
1049    }
1050
1051    #[test]
1052    fn active_unauthorized_lock_rejects_stale_cached_data() {
1053        // Isolates the `lock_from_401` guard: seeds `cache.data =
1054        // Some(stale)` + `lock.error = Some("Unauthorized")` directly,
1055        // bypassing the A→B integration path. The seeded state is
1056        // realistic because a different process could have run the
1057        // 401 (writing the lock) while leaving our cache untouched.
1058        // Verifies the guard refuses to serve the stale data without
1059        // depending on the 401 handler's own write ordering.
1060        let tmp = TempDir::new().unwrap();
1061        let cache = CacheStore::new(tmp.path().to_path_buf());
1062        cache
1063            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
1064            .unwrap();
1065        let lock = LockStore::new(tmp.path().to_path_buf());
1066        lock.write(&Lock {
1067            blocked_until: Utc::now().timestamp() + 30,
1068            error: Some("Unauthorized".into()),
1069        })
1070        .unwrap();
1071
1072        let transport = FakeTransport::ok(200, SAMPLE_BODY, None);
1073        let err = resolve_usage(
1074            Some(&cache),
1075            Some(&lock),
1076            &transport,
1077            &ok_creds,
1078            &jsonl_empty,
1079            &now_fn(),
1080            &config(),
1081        )
1082        .unwrap_err();
1083        assert!(matches!(err, UsageError::Unauthorized));
1084        assert_eq!(transport.calls.get(), 0);
1085    }
1086
1087    #[test]
1088    fn endpoint_429_writes_lock_with_retry_after_backoff() {
1089        // Codex P1: without this, every concurrent process re-hits
1090        // the endpoint during a rate-limit window.
1091        let tmp = TempDir::new().unwrap();
1092        let cache = CacheStore::new(tmp.path().to_path_buf());
1093        let lock = LockStore::new(tmp.path().to_path_buf());
1094
1095        let _ = resolve_usage(
1096            Some(&cache),
1097            Some(&lock),
1098            &FakeTransport::ok(429, "", Some("120")),
1099            &ok_creds,
1100            &jsonl_empty,
1101            &now_fn(),
1102            &config(),
1103        );
1104
1105        let persisted = lock.read().unwrap().expect("lock must be written");
1106        let expected = Utc::now().timestamp() + 120;
1107        assert!(
1108            (persisted.blocked_until - expected).abs() < 5,
1109            "blocked_until={}, expected near {}",
1110            persisted.blocked_until,
1111            expected,
1112        );
1113        assert_eq!(persisted.error.as_deref(), Some("RateLimited"));
1114    }
1115
1116    #[test]
1117    fn endpoint_timeout_writes_lock_with_error_ttl() {
1118        let tmp = TempDir::new().unwrap();
1119        let lock = LockStore::new(tmp.path().to_path_buf());
1120
1121        let _ = resolve_usage(
1122            None,
1123            Some(&lock),
1124            &FakeTransport::err(io::ErrorKind::TimedOut),
1125            &ok_creds,
1126            &jsonl_empty,
1127            &now_fn(),
1128            &config(),
1129        );
1130
1131        let persisted = lock.read().unwrap().expect("lock must be written");
1132        let expected = Utc::now().timestamp() + DEFAULT_ERROR_TTL.as_secs() as i64;
1133        assert!(
1134            (persisted.blocked_until - expected).abs() < 5,
1135            "blocked_until={}, expected near {}",
1136            persisted.blocked_until,
1137            expected,
1138        );
1139        assert_eq!(persisted.error.as_deref(), Some("Timeout"));
1140    }
1141
1142    #[test]
1143    fn lock_written_on_429_blocks_next_process_from_hitting_endpoint() {
1144        // End-to-end P1a+P1b: process A gets a 429 and writes the
1145        // lock; process B observes the lock and skips the endpoint.
1146        // Without either half of the fix, B stampedes the rate-limited
1147        // endpoint.
1148        let tmp = TempDir::new().unwrap();
1149        let cache = CacheStore::new(tmp.path().to_path_buf());
1150        let lock = LockStore::new(tmp.path().to_path_buf());
1151
1152        let transport_a = FakeTransport::ok(429, "", Some("120"));
1153        let _ = resolve_usage(
1154            Some(&cache),
1155            Some(&lock),
1156            &transport_a,
1157            &ok_creds,
1158            &jsonl_empty,
1159            &now_fn(),
1160            &config(),
1161        );
1162
1163        let transport_b = FakeTransport::ok(200, SAMPLE_BODY, None);
1164        let result_b = resolve_usage(
1165            Some(&cache),
1166            Some(&lock),
1167            &transport_b,
1168            &ok_creds,
1169            &jsonl_empty,
1170            &now_fn(),
1171            &config(),
1172        );
1173        assert!(matches!(result_b, Err(UsageError::RateLimited { .. })));
1174        assert_eq!(
1175            transport_b.calls.get(),
1176            0,
1177            "process B must not hit endpoint"
1178        );
1179    }
1180
1181    #[test]
1182    fn endpoint_401_writes_lock_so_peers_skip_the_stale_token() {
1183        let tmp = TempDir::new().unwrap();
1184        let lock = LockStore::new(tmp.path().to_path_buf());
1185
1186        let _ = resolve_usage(
1187            None,
1188            Some(&lock),
1189            &FakeTransport::ok(401, "", None),
1190            &ok_creds,
1191            &jsonl_empty,
1192            &now_fn(),
1193            &config(),
1194        );
1195
1196        let persisted = lock.read().unwrap().expect("lock must be written");
1197        assert_eq!(persisted.error.as_deref(), Some("Unauthorized"));
1198    }
1199
1200    #[test]
1201    fn endpoint_429_with_stale_cache_serves_stale() {
1202        let tmp = TempDir::new().unwrap();
1203        let cache = CacheStore::new(tmp.path().to_path_buf());
1204        cache
1205            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
1206            .unwrap();
1207        let data = resolve_usage(
1208            Some(&cache),
1209            None,
1210            &FakeTransport::ok(429, "", Some("120")),
1211            &ok_creds,
1212            &jsonl_empty,
1213            &now_fn(),
1214            &config(),
1215        )
1216        .expect("ok");
1217        let UsageData::Endpoint(endpoint) = &data else {
1218            panic!("expected endpoint variant, got {data:?}");
1219        };
1220        assert_eq!(endpoint.five_hour.unwrap().utilization.value(), 42.0);
1221    }
1222
1223    #[test]
1224    fn endpoint_429_with_empty_jsonl_surfaces_ratelimited() {
1225        // Endpoint + JSONL both empty → original rate-limit error wins
1226        // so the user sees `[Rate limited]` rather than a silent hide.
1227        let err = resolve_usage(
1228            None,
1229            None,
1230            &FakeTransport::ok(429, "", None),
1231            &ok_creds,
1232            &jsonl_empty,
1233            &now_fn(),
1234            &config(),
1235        )
1236        .unwrap_err();
1237        assert!(matches!(err, UsageError::RateLimited { .. }));
1238    }
1239
1240    #[test]
1241    fn endpoint_429_falls_through_to_jsonl_when_available() {
1242        // ADR-0013: rate-limited users with a local transcript see
1243        // `~5h: ...` / `~7d: ...` rather than `[Rate limited]`.
1244        let transport = FakeTransport::ok(429, "", None);
1245        let data = resolve_usage(
1246            None,
1247            None,
1248            &transport,
1249            &ok_creds,
1250            &jsonl_ok,
1251            &now_fn(),
1252            &config(),
1253        )
1254        .expect("ok");
1255        assert_jsonl_matches_ok_fixture(&data);
1256        assert_eq!(transport.calls.get(), 1);
1257    }
1258
1259    #[test]
1260    fn endpoint_timeout_with_stale_cache_serves_stale() {
1261        let tmp = TempDir::new().unwrap();
1262        let cache = CacheStore::new(tmp.path().to_path_buf());
1263        cache
1264            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
1265            .unwrap();
1266        let data = resolve_usage(
1267            Some(&cache),
1268            None,
1269            &FakeTransport::err(io::ErrorKind::TimedOut),
1270            &ok_creds,
1271            &jsonl_empty,
1272            &now_fn(),
1273            &config(),
1274        )
1275        .expect("ok");
1276        assert!(matches!(data, UsageData::Endpoint(_)));
1277    }
1278
1279    #[test]
1280    fn endpoint_timeout_without_stale_falls_through_to_jsonl() {
1281        // ADR-0013: Timeout / NetworkError falls through to JSONL so
1282        // an offline user still sees their local token totals.
1283        let transport = FakeTransport::err(io::ErrorKind::TimedOut);
1284        let data = resolve_usage(
1285            None,
1286            None,
1287            &transport,
1288            &ok_creds,
1289            &jsonl_ok,
1290            &now_fn(),
1291            &config(),
1292        )
1293        .expect("ok");
1294        assert_jsonl_matches_ok_fixture(&data);
1295        assert_eq!(
1296            transport.calls.get(),
1297            1,
1298            "endpoint must be attempted before JSONL fallback",
1299        );
1300    }
1301
1302    #[test]
1303    fn endpoint_timeout_without_stale_or_jsonl_surfaces_original_error() {
1304        let err = resolve_usage(
1305            None,
1306            None,
1307            &FakeTransport::err(io::ErrorKind::TimedOut),
1308            &ok_creds,
1309            &jsonl_empty,
1310            &now_fn(),
1311            &config(),
1312        )
1313        .unwrap_err();
1314        assert!(matches!(err, UsageError::Timeout));
1315    }
1316
1317    #[test]
1318    fn endpoint_network_error_falls_through_same_as_timeout() {
1319        let err = resolve_usage(
1320            None,
1321            None,
1322            &FakeTransport::err(io::ErrorKind::ConnectionRefused),
1323            &ok_creds,
1324            &jsonl_empty,
1325            &now_fn(),
1326            &config(),
1327        )
1328        .unwrap_err();
1329        assert!(matches!(err, UsageError::NetworkError));
1330    }
1331
1332    #[test]
1333    fn endpoint_malformed_response_falls_through_to_jsonl() {
1334        let err = resolve_usage(
1335            None,
1336            None,
1337            &FakeTransport::ok(200, "{ not valid", None),
1338            &ok_creds,
1339            &jsonl_empty,
1340            &now_fn(),
1341            &config(),
1342        )
1343        .unwrap_err();
1344        assert!(matches!(err, UsageError::ParseError));
1345    }
1346
1347    #[test]
1348    fn cascade_tolerates_missing_cache_and_lock_stores() {
1349        // Mirrors the no-cache-root branch (HOME and XDG both unset):
1350        // cascade must still reach credentials + endpoint instead of
1351        // hard-erroring on cache I/O.
1352        let data = resolve_usage(
1353            None,
1354            None,
1355            &FakeTransport::ok(200, SAMPLE_BODY, None),
1356            &ok_creds,
1357            &jsonl_empty,
1358            &now_fn(),
1359            &config(),
1360        )
1361        .expect("ok");
1362        assert!(matches!(data, UsageData::Endpoint(_)));
1363    }
1364
1365    #[test]
1366    fn expired_lock_does_not_gate_fetch() {
1367        let tmp = TempDir::new().unwrap();
1368        let cache = CacheStore::new(tmp.path().to_path_buf());
1369        cache
1370            .write(&stale_cache_entry(ChronoDuration::minutes(10)))
1371            .unwrap();
1372        let lock = LockStore::new(tmp.path().to_path_buf());
1373        lock.write(&Lock {
1374            blocked_until: Utc::now().timestamp() - 60,
1375            error: None,
1376        })
1377        .unwrap();
1378
1379        let transport = FakeTransport::ok(200, SAMPLE_BODY, None);
1380        let _ = resolve_usage(
1381            Some(&cache),
1382            Some(&lock),
1383            &transport,
1384            &ok_creds,
1385            &jsonl_empty,
1386            &now_fn(),
1387            &config(),
1388        )
1389        .expect("ok");
1390        assert_eq!(
1391            transport.calls.get(),
1392            1,
1393            "expired lock must not block fetch"
1394        );
1395    }
1396
1397    #[test]
1398    fn active_lock_with_no_cached_data_does_not_hit_endpoint() {
1399        // Cold-cache start during another process's backoff window:
1400        // the lock must block the fetch even without stale data to
1401        // serve, else every concurrent statusline invocation stampedes
1402        // `/api/oauth/usage`. Flagged P1 by Codex.
1403        let tmp = TempDir::new().unwrap();
1404        let cache = CacheStore::new(tmp.path().to_path_buf());
1405        let lock = LockStore::new(tmp.path().to_path_buf());
1406        lock.write(&Lock {
1407            blocked_until: Utc::now().timestamp() + 60,
1408            error: Some("RateLimited".into()),
1409        })
1410        .unwrap();
1411
1412        let cred_calls = Cell::new(0u32);
1413        let credentials = || {
1414            cred_calls.set(cred_calls.get() + 1);
1415            ok_creds()
1416        };
1417        let transport = FakeTransport::ok(200, SAMPLE_BODY, None);
1418        let err = resolve_usage(
1419            Some(&cache),
1420            Some(&lock),
1421            &transport,
1422            &credentials,
1423            &jsonl_empty,
1424            &now_fn(),
1425            &config(),
1426        )
1427        .unwrap_err();
1428        assert!(matches!(err, UsageError::RateLimited { .. }));
1429        assert_eq!(cred_calls.get(), 0, "must not resolve credentials");
1430        assert_eq!(transport.calls.get(), 0, "must not hit endpoint");
1431    }
1432
1433    #[test]
1434    fn active_lock_falls_through_to_jsonl_when_available() {
1435        // ADR-0013: even when gated by another process's backoff lock,
1436        // a populated JSONL aggregate wins over the lock-hint error so
1437        // rate-limited users with local transcripts see `~5h: ...`.
1438        // The lock still gates the endpoint — no HTTP call may happen.
1439        let tmp = TempDir::new().unwrap();
1440        let cache = CacheStore::new(tmp.path().to_path_buf());
1441        let lock = LockStore::new(tmp.path().to_path_buf());
1442        lock.write(&Lock {
1443            blocked_until: Utc::now().timestamp() + 60,
1444            error: Some("RateLimited".into()),
1445        })
1446        .unwrap();
1447
1448        let transport = FakeTransport::ok(200, SAMPLE_BODY, None);
1449        let data = resolve_usage(
1450            Some(&cache),
1451            Some(&lock),
1452            &transport,
1453            &ok_creds,
1454            &jsonl_ok,
1455            &now_fn(),
1456            &config(),
1457        )
1458        .expect("ok");
1459        assert_jsonl_matches_ok_fixture(&data);
1460        assert_eq!(
1461            transport.calls.get(),
1462            0,
1463            "active lock must still gate the endpoint even with JSONL data"
1464        );
1465    }
1466
1467    #[test]
1468    fn active_lock_serves_cached_error_without_hitting_endpoint() {
1469        // When the cache carries a specific error tag (e.g. Unauthorized
1470        // from a prior 401), the lock-active path must surface that
1471        // code — not the generic lock-hint — so plugins/segments see
1472        // the real reason.
1473        let tmp = TempDir::new().unwrap();
1474        let cache = CacheStore::new(tmp.path().to_path_buf());
1475        cache
1476            .write(&CachedUsage::with_error("Unauthorized"))
1477            .unwrap();
1478        let lock = LockStore::new(tmp.path().to_path_buf());
1479        lock.write(&Lock {
1480            blocked_until: Utc::now().timestamp() + 60,
1481            error: Some("RateLimited".into()),
1482        })
1483        .unwrap();
1484
1485        let transport = FakeTransport::ok(200, "", None);
1486        let err = resolve_usage(
1487            Some(&cache),
1488            Some(&lock),
1489            &transport,
1490            &ok_creds,
1491            &jsonl_empty,
1492            &now_fn(),
1493            &config(),
1494        )
1495        .unwrap_err();
1496        assert!(matches!(err, UsageError::Unauthorized));
1497        assert_eq!(transport.calls.get(), 0);
1498    }
1499
1500    #[test]
1501    fn active_lock_with_cached_error_falls_through_to_jsonl_when_available() {
1502        // ADR-0013 + silent-failure review: when the cache carries a
1503        // specific error code AND the lock is active AND JSONL has
1504        // data, the JSONL fallback wins. Otherwise users with a
1505        // cached `Unauthorized` plus a valid transcript would see
1506        // `[Unauthorized]` instead of their local totals — the exact
1507        // failure mode the ADR rejects.
1508        let tmp = TempDir::new().unwrap();
1509        let cache = CacheStore::new(tmp.path().to_path_buf());
1510        cache
1511            .write(&CachedUsage::with_error("Unauthorized"))
1512            .unwrap();
1513        let lock = LockStore::new(tmp.path().to_path_buf());
1514        lock.write(&Lock {
1515            blocked_until: Utc::now().timestamp() + 60,
1516            error: Some("RateLimited".into()),
1517        })
1518        .unwrap();
1519
1520        let transport = FakeTransport::ok(200, "", None);
1521        let data = resolve_usage(
1522            Some(&cache),
1523            Some(&lock),
1524            &transport,
1525            &ok_creds,
1526            &jsonl_ok,
1527            &now_fn(),
1528            &config(),
1529        )
1530        .expect("ok");
1531        assert_jsonl_matches_ok_fixture(&data);
1532        assert_eq!(transport.calls.get(), 0);
1533    }
1534
1535    #[test]
1536    fn credential_failure_other_than_missing_preserves_variant_tag() {
1537        // `rate-limit-segments.md` §Error message table distinguishes
1538        // `[Keychain error]` from `[No credentials]`, so the cascade
1539        // must preserve the specific CredentialError flavor. Only
1540        // `NoCredentials` maps to the flat `UsageError::NoCredentials`;
1541        // everything else wraps.
1542        let creds_err: Arc<Result<Credentials, CredentialError>> =
1543            Arc::new(Err(CredentialError::MissingField {
1544                path: std::path::PathBuf::from("/x"),
1545            }));
1546        let credentials = || creds_err.clone();
1547        let err = resolve_usage(
1548            None,
1549            None,
1550            &FakeTransport::err(io::ErrorKind::TimedOut),
1551            &credentials,
1552            &jsonl_empty,
1553            &now_fn(),
1554            &config(),
1555        )
1556        .unwrap_err();
1557        assert!(
1558            matches!(
1559                err,
1560                UsageError::Credentials(CredentialError::MissingField { .. })
1561            ),
1562            "expected Credentials(MissingField), got {err:?}",
1563        );
1564        assert_eq!(err.code(), "MissingField", "variant tag must round-trip");
1565    }
1566
1567    #[test]
1568    fn subprocess_failed_cred_preserves_subprocess_tag() {
1569        // `SubprocessFailed` carries a non-Clone `io::Error`; the
1570        // lossy Clone impl on CredentialError must still preserve the
1571        // variant so segments can render `[Keychain error]`.
1572        let creds_err: Arc<Result<Credentials, CredentialError>> = Arc::new(Err(
1573            CredentialError::SubprocessFailed(io::Error::new(io::ErrorKind::PermissionDenied, "x")),
1574        ));
1575        let credentials = || creds_err.clone();
1576        let err = resolve_usage(
1577            None,
1578            None,
1579            &FakeTransport::err(io::ErrorKind::TimedOut),
1580            &credentials,
1581            &jsonl_empty,
1582            &now_fn(),
1583            &config(),
1584        )
1585        .unwrap_err();
1586        assert_eq!(err.code(), "SubprocessFailed");
1587    }
1588
1589    #[test]
1590    fn credential_variant_falls_through_to_jsonl_when_available() {
1591        // ADR-0013: non-`NoCredentials` cred failures (broken Keychain,
1592        // malformed credentials.json) still fall through to JSONL when
1593        // the transcript is readable, rather than hard-returning the
1594        // cred error variant. Common degraded-environment scenario.
1595        let creds_err: Arc<Result<Credentials, CredentialError>> = Arc::new(Err(
1596            CredentialError::SubprocessFailed(io::Error::new(io::ErrorKind::PermissionDenied, "x")),
1597        ));
1598        let credentials = || creds_err.clone();
1599        let data = resolve_usage(
1600            None,
1601            None,
1602            &FakeTransport::err(io::ErrorKind::TimedOut),
1603            &credentials,
1604            &jsonl_ok,
1605            &now_fn(),
1606            &config(),
1607        )
1608        .expect("ok");
1609        assert_jsonl_matches_ok_fixture(&data);
1610    }
1611
1612    // Cascade must still return fetched data when persistence breaks.
1613    // The write-side helpers log via `lsm_error!` and continue (the
1614    // cache.rs contract permits per-call failures), so this contract
1615    // is observable in both debug and release builds.
1616    #[test]
1617    fn cache_write_failure_does_not_block_returned_data() {
1618        let tmp = TempDir::new().unwrap();
1619        let blocking_file = tmp.path().join("blocked");
1620        std::fs::write(&blocking_file, "x").unwrap();
1621        let cache = CacheStore::new(blocking_file.join("nested"));
1622
1623        let data = resolve_usage(
1624            Some(&cache),
1625            None,
1626            &FakeTransport::ok(200, SAMPLE_BODY, None),
1627            &ok_creds,
1628            &jsonl_empty,
1629            &now_fn(),
1630            &config(),
1631        )
1632        .expect("ok");
1633        assert!(matches!(data, UsageData::Endpoint(_)));
1634    }
1635
1636    #[test]
1637    fn fresh_cache_is_source_endpoint_not_jsonl() {
1638        // Regression guard: it would be tempting to tag cached data
1639        // as `Jsonl` to signal "stale" — but the cache stores the
1640        // original endpoint payload, so `Endpoint` is correct.
1641        // Segments decide staleness via TTL, not via the tag.
1642        let tmp = TempDir::new().unwrap();
1643        let cache = CacheStore::new(tmp.path().to_path_buf());
1644        cache
1645            .write(&CachedUsage::with_data(sample_response()))
1646            .unwrap();
1647
1648        let data = resolve_usage(
1649            Some(&cache),
1650            None,
1651            &FakeTransport::ok(200, "", None),
1652            &ok_creds,
1653            &jsonl_empty,
1654            &now_fn(),
1655            &config(),
1656        )
1657        .expect("ok");
1658        assert!(matches!(data, UsageData::Endpoint(_)));
1659    }
1660
1661    #[test]
1662    fn clock_skew_future_cached_at_treats_entry_as_stale() {
1663        // `CacheStore::read` already drops entries with `cached_at >
1664        // now`, so the cascade sees no entry and falls through to the
1665        // endpoint. Pin the behavior here so a future relaxation of
1666        // `CacheStore::read` doesn't silently let the cascade serve
1667        // future-stamped junk.
1668        let tmp = TempDir::new().unwrap();
1669        let path = tmp.path().join("usage.json");
1670        let mut entry = CachedUsage::with_data(sample_response());
1671        entry.cached_at = Utc::now() + ChronoDuration::hours(1);
1672        std::fs::write(&path, serde_json::to_string(&entry).unwrap()).unwrap();
1673        let cache = CacheStore::new(tmp.path().to_path_buf());
1674
1675        let transport = FakeTransport::ok(200, SAMPLE_BODY, None);
1676        let _ = resolve_usage(
1677            Some(&cache),
1678            None,
1679            &transport,
1680            &ok_creds,
1681            &jsonl_empty,
1682            &now_fn(),
1683            &config(),
1684        )
1685        .expect("ok");
1686        assert_eq!(transport.calls.get(), 1);
1687    }
1688
1689    // --- classify_persist_error contract ---
1690    //
1691    // log_persist_error routes via macros (lsm_debug! / lsm_error!) which
1692    // write to stderr. classify_persist_error is the pure half. A refactor
1693    // that silently dropped EITHER emission path would still pass the
1694    // surviving cache_write_failure_does_not_block_* happy-path test
1695    // (which only asserts the cascade returns endpoint data); these
1696    // tests fail loud on the route + message format so the regression
1697    // can't sneak through.
1698
1699    fn make_io_error(kind: io::ErrorKind) -> CacheError {
1700        CacheError::Io {
1701            path: std::path::PathBuf::from("/test/path"),
1702            cause: io::Error::new(kind, "test"),
1703        }
1704    }
1705
1706    fn make_persist_error(kind: io::ErrorKind) -> CacheError {
1707        CacheError::Persist {
1708            path: std::path::PathBuf::from("/test/path"),
1709            cause: io::Error::new(kind, "test"),
1710        }
1711    }
1712
1713    #[test]
1714    fn classify_persist_error_routes_io_failure_to_error() {
1715        let (class, msg) = classify_persist_error("cache", &make_io_error(io::ErrorKind::NotFound));
1716        assert_eq!(class, PersistLogClass::Error);
1717        assert!(
1718            msg.contains("cascade: cache write failed:"),
1719            "expected loud-signal prefix, got {msg:?}"
1720        );
1721    }
1722
1723    #[test]
1724    fn classify_persist_error_routes_lock_kind_into_message() {
1725        let (class, msg) =
1726            classify_persist_error("lock", &make_persist_error(io::ErrorKind::OutOfMemory));
1727        assert_eq!(class, PersistLogClass::Error);
1728        assert!(
1729            msg.contains("cascade: lock write failed:"),
1730            "kind label must thread through, got {msg:?}"
1731        );
1732    }
1733
1734    #[cfg(unix)]
1735    #[test]
1736    fn classify_persist_error_routes_permission_denied_to_error_on_unix() {
1737        // PermissionDenied on unix is a real perm bug (EACCES), not a
1738        // transient race — `is_transient_persist_race` returns false
1739        // on cfg(not(windows)) so this stays loud.
1740        let (class, msg) = classify_persist_error(
1741            "cache",
1742            &make_persist_error(io::ErrorKind::PermissionDenied),
1743        );
1744        assert_eq!(class, PersistLogClass::Error);
1745        assert!(msg.contains("cascade: cache write failed:"));
1746    }
1747
1748    #[cfg(windows)]
1749    #[test]
1750    fn classify_persist_error_routes_persist_permission_denied_to_debug_on_windows() {
1751        // The documented MoveFileEx race-loser signature: Persist
1752        // variant + PermissionDenied cause. Routes to Debug so multi-
1753        // terminal Windows users don't see stderr noise.
1754        let (class, msg) = classify_persist_error(
1755            "cache",
1756            &make_persist_error(io::ErrorKind::PermissionDenied),
1757        );
1758        assert_eq!(class, PersistLogClass::Debug);
1759        assert!(
1760            msg.contains("race-loser") && msg.contains("Windows MoveFileEx"),
1761            "expected race-loser framing, got {msg:?}"
1762        );
1763    }
1764
1765    #[cfg(windows)]
1766    #[test]
1767    fn classify_persist_error_routes_io_permission_denied_to_error_on_windows() {
1768        // Even on Windows, PermissionDenied via the Io variant (not
1769        // Persist) is a real bug — only the Persist+PermissionDenied
1770        // combination is the MoveFileEx race signature.
1771        let (class, _msg) =
1772            classify_persist_error("cache", &make_io_error(io::ErrorKind::PermissionDenied));
1773        assert_eq!(class, PersistLogClass::Error);
1774    }
1775
1776    // Production `log_persist_error` and these tests share the SAME
1777    // `route_persist_error` match block, so a future arm-swap (Debug
1778    // routing to Error or vice versa) fails loud here.
1779
1780    #[test]
1781    fn route_persist_error_dispatches_debug_class_to_debug_closure_only() {
1782        let mut debug_calls = 0;
1783        let mut error_calls = 0;
1784        route_persist_error(
1785            PersistLogClass::Debug,
1786            "msg",
1787            |_| debug_calls += 1,
1788            |_| error_calls += 1,
1789        );
1790        assert_eq!((debug_calls, error_calls), (1, 0));
1791    }
1792
1793    #[test]
1794    fn route_persist_error_dispatches_error_class_to_error_closure_only() {
1795        let mut debug_calls = 0;
1796        let mut error_calls = 0;
1797        route_persist_error(
1798            PersistLogClass::Error,
1799            "msg",
1800            |_| debug_calls += 1,
1801            |_| error_calls += 1,
1802        );
1803        assert_eq!((debug_calls, error_calls), (0, 1));
1804    }
1805
1806    #[test]
1807    fn route_persist_error_passes_msg_through_unchanged() {
1808        let mut received: Option<String> = None;
1809        route_persist_error(
1810            PersistLogClass::Error,
1811            "cascade: cache write failed: disk full",
1812            |_| {},
1813            |s| received = Some(s.to_string()),
1814        );
1815        assert_eq!(
1816            received.as_deref(),
1817            Some("cascade: cache write failed: disk full")
1818        );
1819    }
1820}