oxi-ai 0.43.0

Unified LLM API — multi-provider streaming interface for AI coding assistants
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
//! models.dev live enrichment (Layer 2.5 of the catalog).
//!
//! Fetches the community-maintained model catalog from
//! <https://models.dev/api.json> (MIT, also used by opencode) and enriches
//! the built-in Layer 1 TOML entries with up-to-date pricing, context
//! windows, max output tokens, and reasoning flags.
//!
//! # Layering
//!
//! ```text
//! Layer 1   built-in TOML (compiled in)           fallback
//! Layer 2   user overrides (~/.oxi/catalog/...)    wins
//! Layer 2.5 models.dev enrichment (this module)   fills gaps / refreshes
//! Layer 3   /v1/models runtime discovery          local servers
//! ```
//!
//! Enrichment runs inside [`crate::model_db::get_all_models`] after
//! Layer 2 overrides are applied. Only fields that are missing or
//! unverifiable in Layer 1 are overwritten — see the precedence rules below.
//!
//! # Precedence (highest wins)
//!
//! 1. Layer 2 user override
//! 2. models.dev enrichment (this module) — only positive prices / known
//!    limits; never overwrites a verified Layer 1 value with a worse one
//! 3. Layer 1 built-in TOML
//!
//! # Offline behavior
//!
//! If the cache is fresh, enrichment is near-instant (file read). If the
//! cache is stale or absent, a live fetch is attempted (10s timeout, 2
//! retries). On total failure, [`get`] returns `None` and Layer 1 is used
//! unchanged — the application still works, only cost accuracy degrades.
//!
//! # Attribution
//!
//! Model data © [models.dev](https://models.dev) (MIT). See
//! <https://github.com/sst/models.dev>.

use std::collections::BTreeMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::OnceLock;
use std::time::Duration;
use std::time::SystemTime;

use serde::{Deserialize, Serialize};

use crate::Api;
use crate::catalog::provider::AuthMethod;

// ---------------------------------------------------------------------------
// Tunables
// ---------------------------------------------------------------------------

/// Local-only freshness window: if the cache file's mtime is within this
/// window, no HTTP request is made at all (zero-cost). Default 1 hour.
const DEFAULT_MTIME_WINDOW: Duration = Duration::from_secs(60 * 60);

/// Per-request timeout for the live fetch.
const FETCH_TIMEOUT: Duration = Duration::from_secs(10);

/// Number of retries on transient fetch failures.
const FETCH_RETRIES: u32 = 2;

/// Backoff between retries (first retry waits this long).
const RETRY_BACKOFF: Duration = Duration::from_millis(200);

/// Default models.dev endpoint.
const DEFAULT_URL: &str = "https://models.dev";

/// User-Agent sent to models.dev.
const USER_AGENT: &str = concat!("oxi/", env!("CARGO_PKG_VERSION"));

// ---------------------------------------------------------------------------
// Schema (mirrors models.dev `api.json`, see opencode `packages/core/src/models-dev.ts`)
// ---------------------------------------------------------------------------

/// Top-level catalog: provider id → provider.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct MdCatalog(pub BTreeMap<String, MdProvider>);

/// A single provider entry.
#[derive(Debug, Serialize, Deserialize)]
pub struct MdProvider {
    /// Display name.
    #[allow(dead_code)]
    pub name: String,
    /// Environment variables that hold the API key.
    #[allow(dead_code)]
    pub env: Vec<String>,
    /// AI SDK npm package identifying the API protocol.
    #[serde(default)]
    #[allow(dead_code)]
    pub npm: Option<String>,
    /// Native API base URL for OpenAI-compatible providers.
    #[serde(default)]
    #[allow(dead_code)]
    pub api: Option<String>,
    /// Link to provider documentation.
    #[serde(default)]
    #[allow(dead_code)]
    pub doc: Option<String>,
    /// Models served by this provider.
    pub models: BTreeMap<String, MdModel>,
}

/// A single model entry — serialised from models.dev `api.json`.
#[derive(Debug, Serialize, Deserialize)]
pub struct MdModel {
    /// Display name.
    #[allow(dead_code)]
    pub name: String,
    /// Model family (e.g. "claude-sonnet", "gpt-4").
    #[serde(default)]
    #[allow(dead_code)]
    pub family: Option<String>,
    /// Whether the model supports reasoning / chain-of-thought.
    pub reasoning: bool,
    /// Whether the model supports tool calling.
    #[serde(default)]
    pub tool_call: bool,
    /// Whether the model supports file attachments (images, PDFs).
    #[serde(default)]
    pub attachment: bool,
    /// Whether the model supports temperature control.
    #[serde(default)]
    #[allow(dead_code)]
    pub temperature: Option<bool>,
    /// Whether the model supports structured output / JSON mode.
    #[serde(default)]
    #[allow(dead_code)]
    pub structured_output: Option<bool>,
    /// Knowledge cutoff date.
    #[serde(default)]
    #[allow(dead_code)]
    pub knowledge: Option<String>,
    /// Release date of the model.
    #[serde(default)]
    #[allow(dead_code)]
    pub release_date: Option<String>,
    /// Last update time of this entry.
    #[serde(default)]
    #[allow(dead_code)]
    pub last_updated: Option<String>,
    /// Whether the model uses open weights.
    #[serde(default)]
    #[allow(dead_code)]
    pub open_weights: Option<bool>,
    /// Whether the model supports interleaved thinking + tool calls.
    #[serde(default)]
    #[allow(dead_code)]
    pub interleaved: Option<serde_json::Value>,
    /// Reasoning options (effort levels, budget tokens).
    #[serde(default)]
    #[allow(dead_code)]
    pub reasoning_options: Option<Vec<MdReasoningOption>>,
    /// Token limits.
    pub limit: MdLimit,
    /// Pricing (USD per million tokens). Optional — some are free.
    #[serde(default)]
    pub cost: Option<MdCost>,
    /// Supported input/output modalities.
    #[serde(default)]
    #[allow(dead_code)]
    pub modalities: Option<MdModalities>,
    /// Model status (alpha, beta, deprecated).
    #[serde(default)]
    #[allow(dead_code)]
    pub status: Option<String>,
    /// Per-model provider override (npm + api).
    #[serde(default)]
    pub provider: Option<MdModelProvider>,
}

/// Per-model provider override — lets a specific model use a different
/// API protocol or endpoint than its parent provider.
#[derive(Debug, Serialize, Deserialize)]
pub struct MdModelProvider {
    /// Override npm package (API protocol).
    #[serde(default)]
    pub npm: Option<String>,
    /// Override API base URL (empty = inherit from parent).
    #[serde(default)]
    pub api: Option<String>,
}

/// Token limits.
#[derive(Debug, Serialize, Deserialize)]
pub struct MdLimit {
    /// Maximum context window (total tokens).
    pub context: f64,
    /// Max input tokens (optional, for reasoning models with input budget).
    #[serde(default)]
    pub input: Option<f64>,
    /// Maximum output tokens (maps to oxi `max_tokens`).
    pub output: f64,
}

/// Pricing. All values are USD per million tokens.
#[derive(Debug, Serialize, Deserialize)]
#[allow(missing_docs)]
pub struct MdCost {
    /// Cost per million input tokens.
    pub input: f64,
    /// Cost per million output tokens.
    pub output: f64,
    /// Cost per million cached read tokens, if billed separately.
    #[serde(default)]
    pub cache_read: Option<f64>,
    /// Cost per million cached write tokens, if billed separately.
    #[serde(default)]
    pub cache_write: Option<f64>,
    /// Tiered pricing (e.g. context-length-based tiers).
    #[serde(default)]
    pub tiers: Option<Vec<MdCostTier>>,
    /// Context >200K pricing (Anthropic-specific extended pricing tier).
    #[serde(default)]
    pub context_over_200k: Option<MdCostTierData>,
    /// Separate pricing for reasoning/thinking tokens.
    #[serde(default)]
    pub reasoning: Option<f64>,
    /// Audio modality input pricing.
    #[serde(default)]
    pub input_audio: Option<f64>,
    /// Audio modality output pricing.
    #[serde(default)]
    pub output_audio: Option<f64>,
}

/// A single pricing tier (used within `tiers` array).
#[derive(Debug, Serialize, Deserialize)]
#[allow(missing_docs)]
pub struct MdCostTier {
    pub input: f64,
    pub output: f64,
    #[serde(default)]
    pub cache_read: Option<f64>,
    #[serde(default)]
    pub cache_write: Option<f64>,
    pub tier: MdTierSpec,
}

#[derive(Debug, Serialize, Deserialize)]
#[allow(missing_docs)]
pub struct MdTierSpec {
    #[serde(rename = "type")]
    pub kind: String,
    pub size: f64,
}

/// Context-over-200K pricing tier data (Anthropic-specific).
#[derive(Debug, Serialize, Deserialize)]
#[allow(missing_docs)]
pub struct MdCostTierData {
    pub input: f64,
    pub output: f64,
    #[serde(default)]
    pub cache_read: Option<f64>,
    #[serde(default)]
    pub cache_write: Option<f64>,
}

/// Supported input/output modalities.
#[derive(Debug, Serialize, Deserialize)]
#[allow(missing_docs)]
pub struct MdModalities {
    #[serde(default)]
    #[allow(dead_code)]
    pub input: Option<Vec<String>>,
    #[serde(default)]
    #[allow(dead_code)]
    pub output: Option<Vec<String>>,
}

/// Reasoning options (effort levels, budget tokens).
#[derive(Debug, Serialize, Deserialize)]
#[allow(missing_docs)]
pub struct MdReasoningOption {
    #[serde(rename = "type")]
    pub kind: String,
    #[serde(default)]
    #[allow(dead_code)]
    pub values: Option<Vec<Option<String>>>,
    #[serde(default)]
    #[allow(dead_code)]
    pub min: Option<f64>,
}

// ---------------------------------------------------------------------------
// Protocol resolver — npm → (Api + AuthMethod), 7줄 (본 설계 핵심)
// ---------------------------------------------------------------------------

/// Map a models.dev `npm` string to oxi's API type and authentication method.
///
/// This is the **only** protocol knowledge oxi has. For OpenAI-compatible
/// providers, the base URL from `MdProvider.api` is used at materialize time.
/// Fresh npm values not listed here default to OpenAI-compatible (`OpenAiCompletions`).
pub fn protocol_for(npm: &str) -> (Api, AuthMethod) {
    match npm {
        "@ai-sdk/anthropic" => (Api::AnthropicMessages, AuthMethod::XApiKey),
        "@ai-sdk/google" => (Api::GoogleGenerativeAi, AuthMethod::None),
        "@ai-sdk/google-vertex" | "@ai-sdk/google-vertex/anthropic" => {
            (Api::GoogleVertex, AuthMethod::None)
        }
        "@ai-sdk/mistral" => (Api::MistralConversations, AuthMethod::Bearer),
        "@ai-sdk/azure" => (Api::AzureOpenAiResponses, AuthMethod::ApiKey),
        "@ai-sdk/amazon-bedrock" => (Api::BedrockConverseStream, AuthMethod::None),
        // @ai-sdk/openai, @ai-sdk/openai-compatible, groq, xai, togetherai,
        // vercel, perplexity, cerebras, deepinfra, cohere, gateway, etc.
        // And any unknown npm → OpenAI-compatible with Bearer auth.
        _ => (Api::OpenAiCompletions, AuthMethod::Bearer),
    }
}

// ---------------------------------------------------------------------------
// NOTE: provider_map, reasoning_preserve, and enrich() were removed.
// These were used by the legacy TOML enrichment path. With the materialize
// approach (materialize.rs), models.dev data flows directly into
// BuiltinProviderEntry/BuiltinModelEntry without per-entry enrichment.
// ---------------------------------------------------------------------------

// ---------------------------------------------------------------------------
// Global state
// ---------------------------------------------------------------------------

/// Global enriched catalog, populated by [`init_models_dev`].
///
/// `Some(None)` after init means "init ran but no data was available"
/// (offline + no cache); the inner `Option` distinguishes that from
/// "init has not run yet" (`MODELS_DEV.get() == None`).
static MODELS_DEV: OnceLock<Option<Arc<MdCatalog>>> = OnceLock::new();

/// Initialize the models.dev catalog.
///
/// Fetches (or reads from cache) the catalog and stores it for later
/// enrichment. Safe to call multiple times — subsequent calls are no-ops.
/// Called once at bootstrap ([`crate`] consumers wire it in the CLI).
pub async fn init_models_dev() {
    if MODELS_DEV.get().is_some() {
        return;
    }
    let result = fetch_with_fallback().await;
    let arc_opt = result.map(Arc::new);
    // `set` is a race-safe no-op if another thread won the init race.
    let _ = MODELS_DEV.set(arc_opt);
}

/// Get the enriched catalog, if [`init_models_dev`] has run with data.
///
/// Returns `None` when init hasn't run, ran but found no data (offline), or
/// enrichment is disabled. Enrichment gracefully falls back to Layer 1 in
/// all these cases.
pub fn get() -> Option<&'static MdCatalog> {
    MODELS_DEV.get().and_then(|o| o.as_deref())
}

/// Force-refresh the models.dev cache.
///
/// Performs a conditional GET (ETag) regardless of the mtime window.
/// The result is written to the cache file. The in-memory catalog is
/// **not** updated (OnceLock is immutable) — the refreshed data takes
/// effect on the next process start.
///
/// Returns `true` if the cache was updated (200), `false` if unchanged
/// (304) or on error.
pub async fn refresh() -> bool {
    if !enabled() || fetch_disabled() {
        return false;
    }
    let etag = read_etag();
    match live_fetch_conditional(etag.as_deref()).await {
        Some(ConditionalResult::NotModified) => {
            tracing::info!("models.dev: already up to date (304)");
            touch_cache_mtime();
            false
        }
        Some(ConditionalResult::Updated(c, new_etag)) => {
            write_cache_atomic(&c);
            if let Some(e) = new_etag {
                write_etag(&e);
            }
            tracing::info!("models.dev: cache refreshed");
            true
        }
        None => {
            tracing::warn!("models.dev: refresh failed");
            false
        }
    }
}

/// Force-clear the cached catalog. Test-only.
#[cfg(test)]
pub fn reset_for_tests() {
    // OnceLock cannot be reset; tests instead construct MdCatalog directly
    // and call `enrich`. This stub documents that intent.
}

// ---------------------------------------------------------------------------
// Fetch / cache
// ---------------------------------------------------------------------------

/// Resolve the cache path.
///
/// - `OXI_MODELS_DEV_CACHE_PATH` overrides the location (test/enterprise use)
/// - otherwise `~/.oxi/cache/models-dev.json`
fn cache_path() -> Option<PathBuf> {
    if let Ok(custom) = std::env::var("OXI_MODELS_DEV_CACHE_PATH")
        && !custom.is_empty()
    {
        return Some(PathBuf::from(custom));
    }
    Some(
        dirs::home_dir()?
            .join(".oxi")
            .join("cache")
            .join("models-dev.json"),
    )
}

/// Whether enrichment is enabled at all.
///
/// - `OXI_MODELS_DEV=off` → disabled
/// - `OXI_MODELS_DEV=on` or `auto` (or unset) → enabled
fn enabled() -> bool {
    !matches!(
        std::env::var("OXI_MODELS_DEV").as_deref(),
        Ok("off") | Ok("OFF") | Ok("0") | Ok("false") | Ok("FALSE")
    )
}

/// Whether live network fetch is forbidden (air-gapped mode).
fn fetch_disabled() -> bool {
    matches!(
        std::env::var("OXI_MODELS_DEV_DISABLE_FETCH").as_deref(),
        Ok("1") | Ok("true") | Ok("TRUE")
    )
}

/// Configured models.dev endpoint.
fn models_url() -> String {
    std::env::var("OXI_MODELS_DEV_URL").unwrap_or_else(|_| DEFAULT_URL.to_string())
}

/// Configured mtime window (local-only freshness check).
///
/// `OXI_MODELS_DEV_MTIME_WINDOW` (seconds) overrides the default (1 hour).
/// Within this window, no HTTP request is made — zero-cost cache hit.
fn mtime_window() -> Duration {
    std::env::var("OXI_MODELS_DEV_MTIME_WINDOW")
        .ok()
        .and_then(|s| s.parse().ok())
        .map(Duration::from_secs)
        .unwrap_or(DEFAULT_MTIME_WINDOW)
}

/// Whether to force a conditional GET regardless of mtime window.
/// Set by `oxi models refresh` or `OXI_MODELS_DEV_FORCE_REFRESH=1`.
fn force_refresh() -> bool {
    matches!(
        std::env::var("OXI_MODELS_DEV_FORCE_REFRESH").as_deref(),
        Ok("1") | Ok("true") | Ok("TRUE")
    )
}

/// Cache-or-live fallback chain with conditional GET (ETag).
///
/// Sync resolution order:
/// 1. If cache mtime is within `mtime_window()` (default 1h) and not forced →
///    use cache, no HTTP (zero-cost).
/// 2. Otherwise, conditional GET with `If-None-Match` (stored ETag).
///    - `304 Not Modified` → cache is still valid, touch mtime, use cache.
///    - `200 OK` → write new cache + ETag, use new data.
/// 3. On fetch failure, use stale cache (any age) if available.
async fn fetch_with_fallback() -> Option<MdCatalog> {
    if !enabled() {
        return None;
    }

    // 1) Fresh disk cache within mtime window (unless force_refresh).
    if !force_refresh()
        && let Some(c) = read_cache_if_fresh()
    {
        tracing::debug!("models.dev: using cache within mtime window");
        return Some(c);
    }

    // 2) Conditional GET (unless air-gapped).
    if !fetch_disabled() {
        let etag = read_etag();
        match live_fetch_conditional(etag.as_deref()).await {
            Some(ConditionalResult::NotModified) => {
                // 304 means our cached data is still valid. But if the cache
                // file is missing/corrupt, we have the ETag but no data —
                // fall through to a non-conditional fetch to recover.
                if let Some(c) = read_cache_any() {
                    tracing::debug!("models.dev: 304 Not Modified, touching cache mtime");
                    touch_cache_mtime();
                    return Some(c);
                }
                tracing::warn!("models.dev: 304 received but cache missing — refetching");
                // Remove stale ETag and retry without conditional.
                clear_etag();
                if let Some(ConditionalResult::Updated(c, new_etag)) =
                    live_fetch_conditional(None).await
                {
                    write_cache_atomic(&c);
                    if let Some(e) = new_etag {
                        write_etag(&e);
                    }
                    return Some(c);
                }
            }
            Some(ConditionalResult::Updated(c, new_etag)) => {
                write_cache_atomic(&c);
                if let Some(e) = new_etag {
                    write_etag(&e);
                }
                return Some(c);
            }
            None => { /* fetch failed, fall through to stale */ }
        }
    }

    // 3) Stale cache is better than nothing.
    if let Some(c) = read_cache_any() {
        tracing::debug!("models.dev: using stale cache (live fetch unavailable)");
        return Some(c);
    }

    None
}

/// Result of a conditional GET.
enum ConditionalResult {
    /// Server returned 304 — data unchanged.
    NotModified,
    /// Server returned 200 — new data + optional new ETag.
    Updated(MdCatalog, Option<String>),
}

/// Read the cache only if its mtime is within the mtime window.
fn read_cache_if_fresh() -> Option<MdCatalog> {
    let path = cache_path()?;
    let meta = std::fs::metadata(&path).ok()?;
    let modified = meta.modified().ok()?;
    let age = SystemTime::now().duration_since(modified).ok()?;
    if age > mtime_window() {
        return None;
    }
    read_cache(&path)
}

/// Read the cache regardless of freshness.
fn read_cache_any() -> Option<MdCatalog> {
    let path = cache_path()?;
    read_cache(&path)
}

fn read_cache(path: &std::path::Path) -> Option<MdCatalog> {
    let body = std::fs::read_to_string(path).ok()?;
    match serde_json::from_str::<MdCatalog>(&body) {
        Ok(c) => Some(c),
        Err(e) => {
            tracing::warn!(error = %e, "models.dev: cache corrupt, ignoring");
            // Corrupt cache: remove so next run refetches cleanly.
            let _ = std::fs::remove_file(path);
            None
        }
    }
}

/// Touch the cache file's mtime to reset the mtime window (after 304).
fn touch_cache_mtime() {
    let Some(path) = cache_path() else { return };
    // Set mtime to now. `set_modified` is stable in Rust 1.75+.
    let now = std::time::SystemTime::now();
    let _ = filetime::set_file_mtime(&path, filetime::FileTime::from_system_time(now));
}

/// Path to the ETag sidecar file.
fn etag_path() -> Option<PathBuf> {
    let base = cache_path()?;
    Some(base.with_extension("json.etag"))
}

/// Read the stored ETag (if any) for conditional GET.
fn read_etag() -> Option<String> {
    let path = etag_path()?;
    let body = std::fs::read_to_string(&path).ok()?;
    let trimmed = body.trim();
    if trimmed.is_empty() {
        None
    } else {
        Some(trimmed.to_string())
    }
}

/// Write the ETag sidecar atomically.
fn write_etag(etag: &str) {
    let Some(path) = etag_path() else { return };
    let tmp = path.with_extension("json.etag.tmp");
    if std::fs::write(&tmp, etag).is_ok() {
        let _ = std::fs::rename(&tmp, &path);
    }
}

/// Remove the ETag sidecar (used when recovering from a stale-ETag state).
fn clear_etag() {
    let Some(path) = etag_path() else { return };
    let _ = std::fs::remove_file(&path);
}

/// Write the catalog atomically (temp + rename), per AGENTS.md I/O rules.
fn write_cache_atomic(catalog: &MdCatalog) {
    let Some(path) = cache_path() else {
        return;
    };
    let Some(parent) = path.parent() else {
        return;
    };
    if std::fs::create_dir_all(parent).is_err() {
        return;
    }
    let Ok(body) = serde_json::to_string(catalog) else {
        return;
    };
    // PID-suffixed temp name avoids concurrent-writer collisions.
    let tmp = path.with_file_name(format!("models-dev.json.{}.tmp", std::process::id()));
    if std::fs::write(&tmp, &body).is_err() {
        return;
    }
    if let Err(e) = std::fs::rename(&tmp, &path) {
        tracing::debug!(error = %e, "models.dev: cache rename failed");
        let _ = std::fs::remove_file(&tmp);
    }
}

/// Live fetch with bounded retries and conditional GET (ETag) support.
///
/// - If `etag` is `Some`, sends `If-None-Match` header.
/// - Returns `NotModified` on 304, `Updated` on 200, `None` on failure.
async fn live_fetch_conditional(etag: Option<&str>) -> Option<ConditionalResult> {
    let client = reqwest::Client::builder()
        .timeout(FETCH_TIMEOUT)
        .build()
        .ok()?;
    let url = format!("{}/api.json", models_url().trim_end_matches('/'));

    for attempt in 0..FETCH_RETRIES {
        let mut req = client.get(&url).header("User-Agent", USER_AGENT);
        if let Some(e) = etag {
            req = req.header("If-None-Match", e);
        }
        match req.send().await {
            Ok(resp) => {
                let status = resp.status();
                if status.as_u16() == 304 {
                    tracing::debug!("models.dev: 304 Not Modified");
                    return Some(ConditionalResult::NotModified);
                }
                if status.is_success() {
                    // Capture the new ETag (if any) before consuming the body.
                    let new_etag = resp
                        .headers()
                        .get(reqwest::header::ETAG)
                        .and_then(|v| v.to_str().ok())
                        .map(|s| s.to_string());
                    match resp.text().await {
                        Ok(body) => match serde_json::from_str::<MdCatalog>(&body) {
                            Ok(c) => {
                                tracing::debug!(
                                    models = c.0.values().map(|p| p.models.len()).sum::<usize>(),
                                    "models.dev: fetched"
                                );
                                return Some(ConditionalResult::Updated(c, new_etag));
                            }
                            Err(e) => {
                                tracing::warn!(error = %e, "models.dev: parse failed");
                                return None;
                            }
                        },
                        Err(e) => {
                            tracing::warn!(error = %e, "models.dev: body read failed");
                        }
                    }
                } else {
                    tracing::warn!(status = %status, "models.dev: non-success status");
                }
            }
            Err(e) => {
                tracing::warn!(error = %e, attempt, "models.dev: fetch failed");
            }
        }
        if attempt + 1 < FETCH_RETRIES {
            tokio::time::sleep(RETRY_BACKOFF).await;
        }
    }
    None
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    fn md(
        provider: &str,
        model_id: &str,
        cost: Option<(f64, f64)>,
        ctx: f64,
        output: f64,
        reasoning: bool,
    ) -> MdCatalog {
        let mut cat = MdCatalog::default();
        let m = MdModel {
            name: model_id.to_string(),
            family: None,
            reasoning,
            tool_call: false,
            attachment: false,
            temperature: None,
            structured_output: None,
            knowledge: None,
            release_date: None,
            last_updated: None,
            open_weights: None,
            interleaved: None,
            reasoning_options: None,
            limit: MdLimit {
                context: ctx,
                input: None,
                output,
            },
            cost: cost.map(|(i, o)| MdCost {
                input: i,
                output: o,
                cache_read: None,
                cache_write: None,
                tiers: None,
                context_over_200k: None,
                reasoning: None,
                input_audio: None,
                output_audio: None,
            }),
            modalities: None,
            status: None,
            provider: None,
        };
        let mut models = BTreeMap::new();
        models.insert(model_id.to_string(), m);
        cat.0.insert(
            provider.to_string(),
            MdProvider {
                name: provider.to_string(),
                env: vec![],
                npm: None,
                api: None,
                doc: None,
                models,
            },
        );
        cat
    }

    #[test]
    fn schema_parses_snapshot() {
        // Minimal valid api.json shape.
        let json = r#"{
            "deepseek": {
                "id": "deepseek",
                "name": "DeepSeek",
                "env": ["DEEPSEEK_API_KEY"],
                "npm": "@ai-sdk/openai-compatible",
                "api": "https://api.deepseek.com",
                "models": {
                    "deepseek-chat": {
                        "id": "deepseek-chat",
                        "name": "DeepSeek Chat",
                        "release_date": "2025-12-01",
                        "attachment": true,
                        "reasoning": false,
                        "tool_call": true,
                        "temperature": true,
                        "limit": { "context": 1000000, "output": 384000 },
                        "cost": { "input": 0.14, "output": 0.28, "cache_read": 0.0028 }
                    }
                }
            }
        }"#;
        let cat: MdCatalog = serde_json::from_str(json).unwrap();
        let m = &cat.0["deepseek"].models["deepseek-chat"];
        assert!((m.cost.as_ref().unwrap().input - 0.14).abs() < 1e-9);
        assert_eq!(m.limit.context, 1000000.0);
        assert_eq!(m.limit.output, 384000.0);
    }

    #[test]
    fn write_cache_roundtrips() {
        let cat = md(
            "deepseek",
            "deepseek-chat",
            Some((0.14, 0.28)),
            1000000.0,
            384000.0,
            false,
        );
        let tmp = std::env::temp_dir().join(format!("oxi-md-test-{}.json", std::process::id()));
        let body = serde_json::to_string(&cat).unwrap();
        std::fs::write(&tmp, &body).unwrap();
        let back: MdCatalog =
            serde_json::from_str(&std::fs::read_to_string(&tmp).unwrap()).unwrap();
        let _ = std::fs::remove_file(&tmp);
        assert!(back.0.contains_key("deepseek"));
    }
}