Skip to main content

tiy_core/catalog/
mod.rs

1//! Model catalog fetching, normalization, and metadata enrichment.
2//!
3//! This module provides a display-oriented model listing flow:
4//! 1. Fetch a provider's native model list
5//! 2. Extract a shared intermediate model shape from heterogeneous payloads
6//! 3. Enrich from an external catalog metadata store
7//! 4. Return unified model information while preserving the provider raw ID
8
9use async_trait::async_trait;
10use chrono::{DateTime, Utc};
11use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, AUTHORIZATION, CONTENT_TYPE};
12use reqwest::Client;
13use serde::{Deserialize, Serialize};
14use serde_json::{json, Value};
15use std::collections::{HashMap, HashSet};
16use std::fs;
17use std::path::{Path, PathBuf};
18
19use crate::protocol::common::apply_custom_headers;
20use crate::types::{HeaderPolicy, Provider};
21use sha2::{Digest, Sha256};
22use url::Url;
23
24const OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
25const XAI_BASE_URL: &str = "https://api.x.ai/v1";
26const GROQ_BASE_URL: &str = "https://api.groq.com/openai/v1";
27const OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1";
28const ZAI_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4";
29const DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com";
30const ZENMUX_BASE_URL: &str = "https://zenmux.ai/api/v1";
31const OLLAMA_BASE_URL: &str = "http://localhost:11434/v1";
32const ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com/v1";
33const MINIMAX_BASE_URL: &str = "https://api.minimax.io/anthropic";
34const MINIMAX_CN_BASE_URL: &str = "https://api.minimaxi.com/anthropic";
35const KIMI_CODING_BASE_URL: &str = "https://api.kimi.com/coding";
36const ANTHROPIC_VERSION: &str = "2023-06-01";
37const DEFAULT_CATALOG_MANIFEST_URL: &str =
38    "https://tiyagents.github.io/tiy-core/catalog/manifest.json";
39
40/// Request to fetch models from a provider.
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42pub struct FetchModelsRequest {
43    /// Provider to query.
44    pub provider: Provider,
45    /// API key override.
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub api_key: Option<String>,
48    /// Base URL override. Should point at the provider API base, such as `/v1`.
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub base_url: Option<String>,
51    /// Custom headers to add to the request.
52    #[serde(skip_serializing_if = "Option::is_none")]
53    pub headers: Option<HashMap<String, String>>,
54}
55
56impl FetchModelsRequest {
57    /// Create a request for the given provider.
58    pub fn new(provider: Provider) -> Self {
59        Self {
60            provider,
61            api_key: None,
62            base_url: None,
63            headers: None,
64        }
65    }
66}
67
68/// Provider-native extracted model fields.
69#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
70pub struct ProviderExtractedModel {
71    pub provider: Provider,
72    pub raw_id: String,
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub display_name: Option<String>,
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub description: Option<String>,
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub context_window: Option<u64>,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub max_output_tokens: Option<u64>,
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub max_input_tokens: Option<u64>,
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub created_at: Option<i64>,
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub modalities: Option<Vec<String>>,
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub capabilities: Option<Vec<String>>,
89    pub raw: Value,
90}
91
92/// External metadata used to enrich native provider models.
93#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
94pub struct CatalogModelMetadata {
95    pub canonical_model_key: String,
96    #[serde(default, skip_serializing_if = "Vec::is_empty")]
97    pub aliases: Vec<String>,
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub display_name: Option<String>,
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub description: Option<String>,
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub context_window: Option<u64>,
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub max_output_tokens: Option<u64>,
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub max_input_tokens: Option<u64>,
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub modalities: Option<Vec<String>>,
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub capabilities: Option<Vec<String>>,
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub pricing: Option<Value>,
114    pub source: String,
115    pub raw: Value,
116}
117
118/// Metadata match result from a store.
119#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
120pub struct CatalogModelMatch {
121    pub metadata: CatalogModelMetadata,
122    pub confidence: f32,
123    #[serde(skip_serializing_if = "Option::is_none")]
124    pub matched_alias: Option<String>,
125}
126
127/// Snapshot manifest published to a remote catalog endpoint.
128#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
129pub struct CatalogSnapshotManifest {
130    pub version: String,
131    pub generated_at: String,
132    pub snapshot_url: String,
133    #[serde(skip_serializing_if = "Option::is_none")]
134    pub sha256: Option<String>,
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub size_bytes: Option<u64>,
137}
138
139/// Catalog snapshot containing normalized model metadata.
140#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
141pub struct CatalogSnapshot {
142    pub version: String,
143    pub generated_at: String,
144    #[serde(default, skip_serializing_if = "Vec::is_empty")]
145    pub models: Vec<CatalogModelMetadata>,
146}
147
148/// Remote configuration for refreshing a snapshot from a published catalog.
149#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
150pub struct CatalogRemoteConfig {
151    pub manifest_url: String,
152    #[serde(skip_serializing_if = "Option::is_none")]
153    pub headers: Option<HashMap<String, String>>,
154}
155
156impl Default for CatalogRemoteConfig {
157    fn default() -> Self {
158        Self {
159            manifest_url: DEFAULT_CATALOG_MANIFEST_URL.to_string(),
160            headers: None,
161        }
162    }
163}
164
165impl CatalogRemoteConfig {
166    pub fn new(manifest_url: impl Into<String>) -> Self {
167        Self {
168            manifest_url: manifest_url.into(),
169            headers: None,
170        }
171    }
172}
173
174/// Result of refreshing a local catalog snapshot.
175#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
176pub enum CatalogRefreshResult {
177    Updated {
178        manifest: CatalogSnapshotManifest,
179        bytes_written: u64,
180        created: bool,
181    },
182    Unchanged {
183        manifest: CatalogSnapshotManifest,
184    },
185}
186
187/// Unified model data returned to applications.
188#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
189pub struct UnifiedModelInfo {
190    pub provider: Provider,
191    pub raw_id: String,
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub canonical_model_key: Option<String>,
194    #[serde(skip_serializing_if = "Option::is_none")]
195    pub display_name: Option<String>,
196    #[serde(skip_serializing_if = "Option::is_none")]
197    pub description: Option<String>,
198    #[serde(skip_serializing_if = "Option::is_none")]
199    pub context_window: Option<u64>,
200    #[serde(skip_serializing_if = "Option::is_none")]
201    pub max_output_tokens: Option<u64>,
202    #[serde(skip_serializing_if = "Option::is_none")]
203    pub max_input_tokens: Option<u64>,
204    #[serde(skip_serializing_if = "Option::is_none")]
205    pub created_at: Option<i64>,
206    #[serde(skip_serializing_if = "Option::is_none")]
207    pub modalities: Option<Vec<String>>,
208    #[serde(skip_serializing_if = "Option::is_none")]
209    pub capabilities: Option<Vec<String>>,
210    #[serde(skip_serializing_if = "Option::is_none")]
211    pub pricing: Option<Value>,
212    #[serde(skip_serializing_if = "Option::is_none")]
213    pub match_confidence: Option<f32>,
214    #[serde(default, skip_serializing_if = "Vec::is_empty")]
215    pub metadata_sources: Vec<String>,
216    pub raw: Value,
217}
218
219/// Result of listing models for a provider.
220#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
221pub struct ListModelsResult {
222    pub models: Vec<UnifiedModelInfo>,
223    pub raw_response: Value,
224}
225
226/// Error returned by model catalog operations.
227#[derive(Debug, thiserror::Error)]
228pub enum ModelCatalogError {
229    #[error("provider {provider} does not expose a supported list-models adapter yet")]
230    UnsupportedProvider { provider: Provider },
231    #[error("provider {provider} requires a base_url override for list-models requests")]
232    MissingBaseUrl { provider: Provider },
233    #[error("provider {provider} returned an invalid models payload: {message}")]
234    InvalidResponse { provider: Provider, message: String },
235    #[error("provider {provider} returned a repeating pagination cursor `{cursor}` while listing models")]
236    PaginationLoop { provider: Provider, cursor: String },
237    #[error("provider {provider} returned HTTP {status}: {body}")]
238    Http {
239        provider: Provider,
240        status: reqwest::StatusCode,
241        body: String,
242    },
243    #[error("request to provider {provider} failed: {source}")]
244    Request {
245        provider: Provider,
246        #[source]
247        source: reqwest::Error,
248    },
249}
250
251/// Error returned by snapshot load, save, or refresh operations.
252#[derive(Debug, thiserror::Error)]
253pub enum CatalogSnapshotError {
254    #[error("failed to read snapshot file {path}: {source}")]
255    ReadFile {
256        path: PathBuf,
257        #[source]
258        source: std::io::Error,
259    },
260    #[error("failed to write snapshot file {path}: {source}")]
261    WriteFile {
262        path: PathBuf,
263        #[source]
264        source: std::io::Error,
265    },
266    #[error("failed to parse snapshot file {path}: {source}")]
267    ParseSnapshot {
268        path: PathBuf,
269        #[source]
270        source: serde_json::Error,
271    },
272    #[error("failed to serialize snapshot data: {source}")]
273    SerializeSnapshot {
274        #[source]
275        source: serde_json::Error,
276    },
277    #[error("failed to fetch catalog manifest from {url}: {source}")]
278    FetchManifest {
279        url: String,
280        #[source]
281        source: reqwest::Error,
282    },
283    #[error("catalog manifest request to {url} returned HTTP {status}: {body}")]
284    FetchManifestHttp {
285        url: String,
286        status: reqwest::StatusCode,
287        body: String,
288    },
289    #[error("failed to parse catalog manifest from {url}: {source}")]
290    ParseManifest {
291        url: String,
292        #[source]
293        source: serde_json::Error,
294    },
295    #[error("failed to fetch catalog snapshot from {url}: {source}")]
296    FetchSnapshot {
297        url: String,
298        #[source]
299        source: reqwest::Error,
300    },
301    #[error("catalog snapshot request to {url} returned HTTP {status}: {body}")]
302    FetchSnapshotHttp {
303        url: String,
304        status: reqwest::StatusCode,
305        body: String,
306    },
307    #[error("manifest URL is invalid: {url}")]
308    InvalidManifestUrl { url: String },
309    #[error("snapshot URL is invalid: {url}")]
310    InvalidSnapshotUrl { url: String },
311    #[error("snapshot checksum mismatch: expected {expected}, got {actual}")]
312    ChecksumMismatch { expected: String, actual: String },
313    #[error("snapshot size mismatch: expected {expected} bytes, got {actual} bytes")]
314    SizeMismatch { expected: u64, actual: u64 },
315    #[error("snapshot version mismatch: manifest has {manifest_version}, snapshot has {snapshot_version}")]
316    VersionMismatch {
317        manifest_version: String,
318        snapshot_version: String,
319    },
320}
321
322/// Read-only source of catalog metadata.
323pub trait CatalogMetadataStore: Send + Sync {
324    fn find_by_raw_or_alias(
325        &self,
326        provider: &Provider,
327        raw_id: &str,
328        normalized_aliases: &[String],
329    ) -> Option<CatalogModelMatch>;
330}
331
332/// Metadata store that never returns enrichment data.
333#[derive(Debug, Default)]
334pub struct EmptyCatalogMetadataStore;
335
336impl CatalogMetadataStore for EmptyCatalogMetadataStore {
337    fn find_by_raw_or_alias(
338        &self,
339        _provider: &Provider,
340        _raw_id: &str,
341        _normalized_aliases: &[String],
342    ) -> Option<CatalogModelMatch> {
343        None
344    }
345}
346
347/// Simple in-memory metadata store for tests or embedded snapshots.
348#[derive(Debug, Clone, Default)]
349pub struct InMemoryCatalogMetadataStore {
350    entries: Vec<CatalogModelMetadata>,
351    alias_index: HashMap<String, usize>,
352}
353
354impl InMemoryCatalogMetadataStore {
355    pub fn new(entries: Vec<CatalogModelMetadata>) -> Self {
356        let mut alias_index = HashMap::new();
357
358        for (idx, entry) in entries.iter().enumerate() {
359            for alias in metadata_aliases(entry) {
360                alias_index.entry(alias).or_insert(idx);
361            }
362        }
363
364        Self {
365            entries,
366            alias_index,
367        }
368    }
369}
370
371impl CatalogMetadataStore for InMemoryCatalogMetadataStore {
372    fn find_by_raw_or_alias(
373        &self,
374        _provider: &Provider,
375        raw_id: &str,
376        normalized_aliases: &[String],
377    ) -> Option<CatalogModelMatch> {
378        let mut candidates = Vec::with_capacity(normalized_aliases.len() + 1);
379        candidates.extend(normalized_aliases.iter().cloned());
380        candidates.extend(normalized_alias_candidates(raw_id, None));
381
382        for candidate in candidates {
383            if let Some(idx) = self.alias_index.get(&candidate) {
384                let metadata = self.entries[*idx].clone();
385                return Some(CatalogModelMatch {
386                    metadata,
387                    confidence: 1.0,
388                    matched_alias: Some(candidate),
389                });
390            }
391        }
392
393        None
394    }
395}
396
397/// File-backed metadata store loaded from a local snapshot file.
398#[derive(Debug, Clone)]
399pub struct FileCatalogMetadataStore {
400    snapshot: CatalogSnapshot,
401    inner: InMemoryCatalogMetadataStore,
402}
403
404impl FileCatalogMetadataStore {
405    pub fn load(path: impl AsRef<Path>) -> Result<Self, CatalogSnapshotError> {
406        let path = path.as_ref();
407        let bytes = fs::read(path).map_err(|source| CatalogSnapshotError::ReadFile {
408            path: path.to_path_buf(),
409            source,
410        })?;
411        let snapshot = serde_json::from_slice::<CatalogSnapshot>(&bytes).map_err(|source| {
412            CatalogSnapshotError::ParseSnapshot {
413                path: path.to_path_buf(),
414                source,
415            }
416        })?;
417        Ok(Self::from_snapshot(snapshot))
418    }
419
420    pub fn try_load(path: impl AsRef<Path>) -> Result<Option<Self>, CatalogSnapshotError> {
421        let path = path.as_ref();
422        if !path.exists() {
423            return Ok(None);
424        }
425        Self::load(path).map(Some)
426    }
427
428    pub fn from_snapshot(snapshot: CatalogSnapshot) -> Self {
429        let inner = InMemoryCatalogMetadataStore::new(snapshot.models.clone());
430        Self { snapshot, inner }
431    }
432
433    pub fn snapshot(&self) -> &CatalogSnapshot {
434        &self.snapshot
435    }
436}
437
438impl CatalogMetadataStore for FileCatalogMetadataStore {
439    fn find_by_raw_or_alias(
440        &self,
441        provider: &Provider,
442        raw_id: &str,
443        normalized_aliases: &[String],
444    ) -> Option<CatalogModelMatch> {
445        self.inner
446            .find_by_raw_or_alias(provider, raw_id, normalized_aliases)
447    }
448}
449
450/// Fetch native models without metadata enrichment.
451pub async fn list_models(
452    request: FetchModelsRequest,
453) -> Result<ListModelsResult, ModelCatalogError> {
454    list_models_with_enrichment(request, &EmptyCatalogMetadataStore).await
455}
456
457/// Fetch native models and enrich them from an external metadata store.
458pub async fn list_models_with_enrichment(
459    request: FetchModelsRequest,
460    metadata_store: &dyn CatalogMetadataStore,
461) -> Result<ListModelsResult, ModelCatalogError> {
462    let adapter = adapter_for(&request.provider)?;
463    let raw_response = adapter.fetch_raw(&request).await?;
464    let extracted = adapter.extract_models(&raw_response)?;
465    let models = extracted
466        .into_iter()
467        .map(|model| enrich_model(model, metadata_store))
468        .collect();
469
470    Ok(ListModelsResult {
471        models,
472        raw_response,
473    })
474}
475
476/// Enrich a manually provided model ID using the same metadata snapshot used for
477/// fetched provider models.
478///
479/// This is useful when an application allows users to type a model ID directly,
480/// or when the upstream provider does not expose a list-models endpoint.
481///
482/// The returned [`UnifiedModelInfo`] preserves the caller-supplied `raw_id`.
483/// Metadata fields are filled from the provided [`CatalogMetadataStore`] when a
484/// matching snapshot entry is found.
485pub fn enrich_manual_model(
486    provider: Provider,
487    raw_id: impl Into<String>,
488    display_name: Option<String>,
489    metadata_store: &dyn CatalogMetadataStore,
490) -> UnifiedModelInfo {
491    let raw_id = raw_id.into();
492    enrich_model(
493        ProviderExtractedModel {
494            provider,
495            raw_id,
496            display_name,
497            description: None,
498            context_window: None,
499            max_output_tokens: None,
500            max_input_tokens: None,
501            created_at: None,
502            modalities: None,
503            capabilities: None,
504            raw: json!({}),
505        },
506        metadata_store,
507    )
508}
509
510/// Load a local snapshot path into a file-backed metadata store, if it exists.
511pub fn load_catalog_metadata_store(
512    snapshot_path: impl AsRef<Path>,
513) -> Result<Option<FileCatalogMetadataStore>, CatalogSnapshotError> {
514    FileCatalogMetadataStore::try_load(snapshot_path)
515}
516
517/// Refresh a local snapshot file from a remote manifest and snapshot endpoint.
518///
519/// Applications can call this in the background while continuing to use an
520/// already loaded local snapshot.
521pub async fn refresh_catalog_snapshot(
522    snapshot_path: impl AsRef<Path>,
523    config: &CatalogRemoteConfig,
524) -> Result<CatalogRefreshResult, CatalogSnapshotError> {
525    let snapshot_path = snapshot_path.as_ref();
526    let local_manifest_path = catalog_manifest_sidecar_path(snapshot_path);
527    let client = build_client();
528
529    let remote_manifest = fetch_remote_manifest(&client, config).await?;
530    let local_manifest = read_local_manifest(&local_manifest_path)?;
531
532    if snapshot_path.exists() {
533        if let Some(local_manifest) = local_manifest.as_ref() {
534            let same_version = local_manifest.version == remote_manifest.version;
535            let same_checksum = local_manifest.sha256 == remote_manifest.sha256;
536            if same_version && same_checksum {
537                return Ok(CatalogRefreshResult::Unchanged {
538                    manifest: remote_manifest,
539                });
540            }
541        }
542    }
543
544    let snapshot_url = resolve_snapshot_url(&config.manifest_url, &remote_manifest.snapshot_url)?;
545    let snapshot_bytes = fetch_remote_snapshot(&client, &snapshot_url, config).await?;
546
547    if let Some(expected_size) = remote_manifest.size_bytes {
548        let actual_size = snapshot_bytes.len() as u64;
549        if actual_size != expected_size {
550            return Err(CatalogSnapshotError::SizeMismatch {
551                expected: expected_size,
552                actual: actual_size,
553            });
554        }
555    }
556
557    if let Some(expected_sha) = remote_manifest.sha256.as_deref() {
558        let actual_sha = sha256_hex(&snapshot_bytes);
559        if actual_sha != expected_sha {
560            return Err(CatalogSnapshotError::ChecksumMismatch {
561                expected: expected_sha.to_string(),
562                actual: actual_sha,
563            });
564        }
565    }
566
567    let snapshot: CatalogSnapshot = serde_json::from_slice(&snapshot_bytes).map_err(|source| {
568        CatalogSnapshotError::ParseSnapshot {
569            path: snapshot_path.to_path_buf(),
570            source,
571        }
572    })?;
573
574    if snapshot.version != remote_manifest.version {
575        return Err(CatalogSnapshotError::VersionMismatch {
576            manifest_version: remote_manifest.version.clone(),
577            snapshot_version: snapshot.version,
578        });
579    }
580
581    let manifest_bytes = serde_json::to_vec_pretty(&remote_manifest)
582        .map_err(|source| CatalogSnapshotError::SerializeSnapshot { source })?;
583
584    let created = !snapshot_path.exists();
585    atomic_write(snapshot_path, &snapshot_bytes)?;
586    atomic_write(&local_manifest_path, &manifest_bytes)?;
587
588    Ok(CatalogRefreshResult::Updated {
589        manifest: remote_manifest,
590        bytes_written: snapshot_bytes.len() as u64,
591        created,
592    })
593}
594
595/// Build a snapshot document from metadata records.
596pub fn build_catalog_snapshot(
597    version: impl Into<String>,
598    generated_at: impl Into<String>,
599    models: Vec<CatalogModelMetadata>,
600) -> CatalogSnapshot {
601    CatalogSnapshot {
602        version: version.into(),
603        generated_at: generated_at.into(),
604        models,
605    }
606}
607
608/// Build a manifest document for a snapshot payload.
609pub fn build_catalog_snapshot_manifest(
610    version: impl Into<String>,
611    generated_at: impl Into<String>,
612    snapshot_url: impl Into<String>,
613    snapshot_bytes: &[u8],
614) -> CatalogSnapshotManifest {
615    CatalogSnapshotManifest {
616        version: version.into(),
617        generated_at: generated_at.into(),
618        snapshot_url: snapshot_url.into(),
619        sha256: Some(sha256_hex(snapshot_bytes)),
620        size_bytes: Some(snapshot_bytes.len() as u64),
621    }
622}
623
624/// Save a snapshot and its sidecar manifest to disk.
625pub fn save_catalog_snapshot(
626    snapshot_path: impl AsRef<Path>,
627    snapshot: &CatalogSnapshot,
628    manifest: &CatalogSnapshotManifest,
629) -> Result<(), CatalogSnapshotError> {
630    let snapshot_path = snapshot_path.as_ref();
631    let snapshot_bytes = serde_json::to_vec_pretty(snapshot)
632        .map_err(|source| CatalogSnapshotError::SerializeSnapshot { source })?;
633    let manifest_bytes = serde_json::to_vec_pretty(manifest)
634        .map_err(|source| CatalogSnapshotError::SerializeSnapshot { source })?;
635
636    atomic_write(snapshot_path, &snapshot_bytes)?;
637    atomic_write(
638        &catalog_manifest_sidecar_path(snapshot_path),
639        &manifest_bytes,
640    )?;
641    Ok(())
642}
643
644fn enrich_model(
645    model: ProviderExtractedModel,
646    metadata_store: &dyn CatalogMetadataStore,
647) -> UnifiedModelInfo {
648    let alias_candidates =
649        normalized_alias_candidates(&model.raw_id, model.display_name.as_deref());
650    let metadata_match =
651        metadata_store.find_by_raw_or_alias(&model.provider, &model.raw_id, &alias_candidates);
652
653    let metadata = metadata_match.as_ref().map(|m| &m.metadata);
654
655    UnifiedModelInfo {
656        provider: model.provider,
657        raw_id: model.raw_id,
658        canonical_model_key: metadata.map(|m| m.canonical_model_key.clone()),
659        display_name: prefer_option(
660            model.display_name,
661            metadata.and_then(|m| m.display_name.clone()),
662        ),
663        description: prefer_option(
664            model.description,
665            metadata.and_then(|m| m.description.clone()),
666        ),
667        context_window: prefer_option(
668            model.context_window,
669            metadata.and_then(|m| m.context_window),
670        ),
671        max_output_tokens: prefer_option(
672            model.max_output_tokens,
673            metadata.and_then(|m| m.max_output_tokens),
674        ),
675        max_input_tokens: prefer_option(
676            model.max_input_tokens,
677            metadata.and_then(|m| m.max_input_tokens),
678        ),
679        created_at: model.created_at,
680        modalities: prefer_option(
681            model.modalities,
682            metadata.and_then(|m| m.modalities.clone()),
683        ),
684        capabilities: prefer_option(
685            model.capabilities,
686            metadata.and_then(|m| m.capabilities.clone()),
687        ),
688        pricing: metadata.and_then(|m| m.pricing.clone()),
689        match_confidence: metadata_match.as_ref().map(|m| m.confidence),
690        metadata_sources: metadata.map(|m| vec![m.source.clone()]).unwrap_or_default(),
691        raw: model.raw,
692    }
693}
694
695fn prefer_option<T>(primary: Option<T>, fallback: Option<T>) -> Option<T> {
696    primary.or(fallback)
697}
698
699fn metadata_aliases(metadata: &CatalogModelMetadata) -> Vec<String> {
700    let mut aliases = Vec::new();
701    aliases.extend(normalized_alias_candidates(
702        &metadata.canonical_model_key,
703        metadata.display_name.as_deref(),
704    ));
705    for alias in &metadata.aliases {
706        aliases.extend(normalized_alias_candidates(
707            alias,
708            metadata.display_name.as_deref(),
709        ));
710    }
711    dedupe_strings(aliases)
712}
713
714fn dedupe_strings(values: Vec<String>) -> Vec<String> {
715    let mut seen = HashSet::new();
716    let mut out = Vec::new();
717    for value in values {
718        if seen.insert(value.clone()) {
719            out.push(value);
720        }
721    }
722    out
723}
724
725fn normalized_alias_candidates(raw_id: &str, display_name: Option<&str>) -> Vec<String> {
726    let mut values = Vec::new();
727    let raw_variants = [raw_id.to_string(), strip_vendor_prefix(raw_id)];
728
729    for variant in raw_variants {
730        let base = normalize_token(&variant);
731        if base.is_empty() {
732            continue;
733        }
734        values.push(base.clone());
735        let dotted = collapse_separators(base.replace('.', "-"));
736        if !dotted.is_empty() {
737            values.push(dotted);
738        }
739    }
740
741    if let Some(name) = display_name {
742        let normalized_name = normalize_token(name);
743        if !normalized_name.is_empty() {
744            values.push(normalized_name);
745        }
746    }
747
748    dedupe_strings(values)
749}
750
751fn normalize_token(input: &str) -> String {
752    let lowered = input.trim().to_lowercase();
753    let mut out = String::with_capacity(lowered.len());
754    let mut last_dash = false;
755
756    for ch in lowered.chars() {
757        let mapped = match ch {
758            'a'..='z' | '0'..='9' | '.' => Some(ch),
759            '/' | '_' | ' ' | ':' => Some('-'),
760            '-' => Some('-'),
761            _ => None,
762        };
763
764        if let Some(ch) = mapped {
765            if ch == '-' {
766                if last_dash {
767                    continue;
768                }
769                last_dash = true;
770            } else {
771                last_dash = false;
772            }
773            out.push(ch);
774        }
775    }
776
777    collapse_separators(out)
778}
779
780fn collapse_separators(mut value: String) -> String {
781    while value.contains("--") {
782        value = value.replace("--", "-");
783    }
784    value.trim_matches('-').to_string()
785}
786
787fn strip_vendor_prefix(value: &str) -> String {
788    for prefix in [
789        "anthropic/",
790        "anthropic:",
791        "openai/",
792        "openai:",
793        "google/",
794        "google:",
795        "groq/",
796        "groq:",
797        "xai/",
798        "xai:",
799        "deepseek/",
800        "deepseek:",
801        "openrouter/",
802        "openrouter:",
803        "zai/",
804        "zai:",
805        "zenmux/",
806        "zenmux:",
807        "minimax/",
808        "minimax:",
809        "kimi/",
810        "kimi:",
811    ] {
812        if let Some(stripped) = value.strip_prefix(prefix) {
813            return stripped.to_string();
814        }
815    }
816    value.to_string()
817}
818
819#[async_trait]
820trait ModelListAdapter: Send + Sync {
821    async fn fetch_raw(&self, request: &FetchModelsRequest) -> Result<Value, ModelCatalogError>;
822
823    fn extract_models(&self, raw: &Value)
824        -> Result<Vec<ProviderExtractedModel>, ModelCatalogError>;
825}
826
827fn adapter_for(provider: &Provider) -> Result<Box<dyn ModelListAdapter>, ModelCatalogError> {
828    match provider {
829        Provider::OpenAI
830        | Provider::OpenAICompatible
831        | Provider::XAI
832        | Provider::Groq
833        | Provider::OpenRouter
834        | Provider::ZAI
835        | Provider::DeepSeek
836        | Provider::Zenmux
837        | Provider::Ollama => Ok(Box::new(OpenAIModelsAdapter::new(provider.clone()))),
838        Provider::Anthropic | Provider::MiniMax | Provider::MiniMaxCN | Provider::KimiCoding => {
839            Ok(Box::new(AnthropicModelsAdapter::new(provider.clone())))
840        }
841        _ => Err(ModelCatalogError::UnsupportedProvider {
842            provider: provider.clone(),
843        }),
844    }
845}
846
847#[derive(Debug, Clone)]
848struct OpenAIModelsAdapter {
849    provider: Provider,
850    client: Client,
851}
852
853impl OpenAIModelsAdapter {
854    fn new(provider: Provider) -> Self {
855        Self {
856            provider,
857            client: build_client(),
858        }
859    }
860}
861
862#[async_trait]
863impl ModelListAdapter for OpenAIModelsAdapter {
864    async fn fetch_raw(&self, request: &FetchModelsRequest) -> Result<Value, ModelCatalogError> {
865        let url = join_url(&resolve_base_url(request)?, "models");
866        let headers = build_openai_headers(&self.provider, request);
867        send_json_request(&self.client, self.provider.clone(), &url, headers).await
868    }
869
870    fn extract_models(
871        &self,
872        raw: &Value,
873    ) -> Result<Vec<ProviderExtractedModel>, ModelCatalogError> {
874        extract_openai_models(&self.provider, raw)
875    }
876}
877
878#[derive(Debug, Clone)]
879struct AnthropicModelsAdapter {
880    provider: Provider,
881    client: Client,
882}
883
884impl AnthropicModelsAdapter {
885    fn new(provider: Provider) -> Self {
886        Self {
887            provider,
888            client: build_client(),
889        }
890    }
891}
892
893#[async_trait]
894impl ModelListAdapter for AnthropicModelsAdapter {
895    async fn fetch_raw(&self, request: &FetchModelsRequest) -> Result<Value, ModelCatalogError> {
896        let mut after_id: Option<String> = None;
897        let mut combined_pages = Vec::new();
898        let mut combined_data = Vec::new();
899        let mut seen_cursors = HashSet::new();
900
901        loop {
902            let url = join_url(&resolve_base_url(request)?, "models");
903            let headers = build_anthropic_headers(&self.provider, request);
904            let mut query = vec![("limit", "1000".to_string())];
905            if let Some(ref cursor) = after_id {
906                query.push(("after_id", cursor.clone()));
907            }
908
909            let response = send_json_request_with_query(
910                &self.client,
911                self.provider.clone(),
912                &url,
913                headers,
914                &query,
915            )
916            .await?;
917
918            combined_data.extend(
919                value_array(&self.provider, &response, "data")?
920                    .iter()
921                    .cloned(),
922            );
923            let has_more = response
924                .get("has_more")
925                .and_then(Value::as_bool)
926                .unwrap_or(false);
927            let next_after_id = response
928                .get("last_id")
929                .and_then(Value::as_str)
930                .map(ToString::to_string);
931            combined_pages.push(response);
932
933            if !has_more {
934                break;
935            }
936
937            let cursor = next_after_id.ok_or_else(|| ModelCatalogError::InvalidResponse {
938                provider: self.provider.clone(),
939                message: "paginated response is missing `last_id`".to_string(),
940            })?;
941
942            if !seen_cursors.insert(cursor.clone()) {
943                return Err(ModelCatalogError::PaginationLoop {
944                    provider: self.provider.clone(),
945                    cursor,
946                });
947            }
948
949            after_id = Some(cursor);
950        }
951
952        Ok(json!({
953            "data": combined_data,
954            "pages": combined_pages,
955        }))
956    }
957
958    fn extract_models(
959        &self,
960        raw: &Value,
961    ) -> Result<Vec<ProviderExtractedModel>, ModelCatalogError> {
962        extract_anthropic_models(&self.provider, raw)
963    }
964}
965
966fn build_client() -> Client {
967    Client::builder()
968        .connect_timeout(std::time::Duration::from_secs(30))
969        .build()
970        .unwrap_or_else(|_| Client::new())
971}
972
973fn resolve_base_url(request: &FetchModelsRequest) -> Result<String, ModelCatalogError> {
974    if let Some(base_url) = request.base_url.as_ref() {
975        return Ok(base_url.clone());
976    }
977
978    let base_url = match request.provider {
979        Provider::OpenAI => OPENAI_BASE_URL,
980        Provider::OpenAICompatible => {
981            return Err(ModelCatalogError::MissingBaseUrl {
982                provider: request.provider.clone(),
983            })
984        }
985        Provider::XAI => XAI_BASE_URL,
986        Provider::Groq => GROQ_BASE_URL,
987        Provider::OpenRouter => OPENROUTER_BASE_URL,
988        Provider::ZAI => ZAI_BASE_URL,
989        Provider::DeepSeek => DEEPSEEK_BASE_URL,
990        Provider::Zenmux => ZENMUX_BASE_URL,
991        Provider::Ollama => OLLAMA_BASE_URL,
992        Provider::Anthropic => ANTHROPIC_BASE_URL,
993        Provider::MiniMax => MINIMAX_BASE_URL,
994        Provider::MiniMaxCN => MINIMAX_CN_BASE_URL,
995        Provider::KimiCoding => KIMI_CODING_BASE_URL,
996        _ => {
997            return Err(ModelCatalogError::UnsupportedProvider {
998                provider: request.provider.clone(),
999            })
1000        }
1001    };
1002
1003    Ok(base_url.to_string())
1004}
1005
1006fn join_url(base_url: &str, path: &str) -> String {
1007    format!(
1008        "{}/{}",
1009        base_url.trim_end_matches('/'),
1010        path.trim_start_matches('/')
1011    )
1012}
1013
1014fn build_openai_headers(provider: &Provider, request: &FetchModelsRequest) -> HeaderMap {
1015    let mut headers = HeaderMap::new();
1016    headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
1017    headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
1018
1019    if let Some(api_key) = resolve_api_key(provider, request).filter(|key| !key.is_empty()) {
1020        let bearer = format!("Bearer {}", api_key);
1021        if let Ok(value) = HeaderValue::from_str(&bearer) {
1022            headers.insert(AUTHORIZATION, value);
1023        }
1024    }
1025
1026    apply_custom_headers(&mut headers, &request.headers, &HeaderPolicy::default());
1027
1028    headers
1029}
1030
1031fn build_anthropic_headers(provider: &Provider, request: &FetchModelsRequest) -> HeaderMap {
1032    let mut headers = HeaderMap::new();
1033    headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
1034    headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
1035    headers.insert(
1036        "anthropic-version",
1037        HeaderValue::from_static(ANTHROPIC_VERSION),
1038    );
1039
1040    if let Some(api_key) = resolve_api_key(provider, request).filter(|key| !key.is_empty()) {
1041        if let Ok(value) = HeaderValue::from_str(&api_key) {
1042            headers.insert("x-api-key", value);
1043        }
1044    }
1045
1046    apply_custom_headers(&mut headers, &request.headers, &HeaderPolicy::default());
1047
1048    headers
1049}
1050
1051fn resolve_api_key(provider: &Provider, request: &FetchModelsRequest) -> Option<String> {
1052    if let Some(api_key) = request.api_key.as_ref() {
1053        return Some(api_key.clone());
1054    }
1055
1056    let env_var = match provider {
1057        Provider::OpenAI => Some("OPENAI_API_KEY"),
1058        Provider::OpenAICompatible => Some("OPENAI_API_KEY"),
1059        Provider::XAI => Some("XAI_API_KEY"),
1060        Provider::Groq => Some("GROQ_API_KEY"),
1061        Provider::OpenRouter => Some("OPENROUTER_API_KEY"),
1062        Provider::ZAI => Some("ZAI_API_KEY"),
1063        Provider::DeepSeek => Some("DEEPSEEK_API_KEY"),
1064        Provider::Zenmux => Some("ZENMUX_API_KEY"),
1065        Provider::Anthropic => Some("ANTHROPIC_API_KEY"),
1066        Provider::MiniMax => Some("MINIMAX_API_KEY"),
1067        Provider::MiniMaxCN => Some("MINIMAX_CN_API_KEY"),
1068        Provider::KimiCoding => Some("KIMI_API_KEY"),
1069        Provider::Ollama => None,
1070        _ => None,
1071    };
1072
1073    env_var.and_then(|name| std::env::var(name).ok())
1074}
1075
1076fn read_local_manifest(
1077    manifest_path: &Path,
1078) -> Result<Option<CatalogSnapshotManifest>, CatalogSnapshotError> {
1079    if !manifest_path.exists() {
1080        return Ok(None);
1081    }
1082
1083    let bytes = fs::read(manifest_path).map_err(|source| CatalogSnapshotError::ReadFile {
1084        path: manifest_path.to_path_buf(),
1085        source,
1086    })?;
1087    let manifest = serde_json::from_slice::<CatalogSnapshotManifest>(&bytes).map_err(|source| {
1088        CatalogSnapshotError::ParseManifest {
1089            url: manifest_path.display().to_string(),
1090            source,
1091        }
1092    })?;
1093    Ok(Some(manifest))
1094}
1095
1096async fn fetch_remote_manifest(
1097    client: &Client,
1098    config: &CatalogRemoteConfig,
1099) -> Result<CatalogSnapshotManifest, CatalogSnapshotError> {
1100    let mut headers = HeaderMap::new();
1101    headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
1102    apply_custom_headers(&mut headers, &config.headers, &HeaderPolicy::default());
1103
1104    let response = client
1105        .get(&config.manifest_url)
1106        .headers(headers)
1107        .send()
1108        .await
1109        .map_err(|source| CatalogSnapshotError::FetchManifest {
1110            url: config.manifest_url.clone(),
1111            source,
1112        })?;
1113
1114    let status = response.status();
1115    if !status.is_success() {
1116        let body = response.text().await.unwrap_or_default();
1117        return Err(CatalogSnapshotError::FetchManifestHttp {
1118            url: config.manifest_url.clone(),
1119            status,
1120            body,
1121        });
1122    }
1123
1124    let bytes = response
1125        .bytes()
1126        .await
1127        .map_err(|source| CatalogSnapshotError::FetchManifest {
1128            url: config.manifest_url.clone(),
1129            source,
1130        })?;
1131
1132    serde_json::from_slice::<CatalogSnapshotManifest>(&bytes).map_err(|source| {
1133        CatalogSnapshotError::ParseManifest {
1134            url: config.manifest_url.clone(),
1135            source,
1136        }
1137    })
1138}
1139
1140async fn fetch_remote_snapshot(
1141    client: &Client,
1142    snapshot_url: &str,
1143    config: &CatalogRemoteConfig,
1144) -> Result<Vec<u8>, CatalogSnapshotError> {
1145    let mut headers = HeaderMap::new();
1146    headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
1147    apply_custom_headers(&mut headers, &config.headers, &HeaderPolicy::default());
1148
1149    let response = client
1150        .get(snapshot_url)
1151        .headers(headers)
1152        .send()
1153        .await
1154        .map_err(|source| CatalogSnapshotError::FetchSnapshot {
1155            url: snapshot_url.to_string(),
1156            source,
1157        })?;
1158
1159    let status = response.status();
1160    if !status.is_success() {
1161        let body = response.text().await.unwrap_or_default();
1162        return Err(CatalogSnapshotError::FetchSnapshotHttp {
1163            url: snapshot_url.to_string(),
1164            status,
1165            body,
1166        });
1167    }
1168
1169    response
1170        .bytes()
1171        .await
1172        .map(|bytes| bytes.to_vec())
1173        .map_err(|source| CatalogSnapshotError::FetchSnapshot {
1174            url: snapshot_url.to_string(),
1175            source,
1176        })
1177}
1178
1179fn resolve_snapshot_url(
1180    manifest_url: &str,
1181    snapshot_url: &str,
1182) -> Result<String, CatalogSnapshotError> {
1183    if let Ok(url) = Url::parse(snapshot_url) {
1184        return Ok(url.to_string());
1185    }
1186
1187    let base = Url::parse(manifest_url).map_err(|_| CatalogSnapshotError::InvalidManifestUrl {
1188        url: manifest_url.to_string(),
1189    })?;
1190    let joined = base
1191        .join(snapshot_url)
1192        .map_err(|_| CatalogSnapshotError::InvalidSnapshotUrl {
1193            url: snapshot_url.to_string(),
1194        })?;
1195    Ok(joined.to_string())
1196}
1197
1198fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), CatalogSnapshotError> {
1199    if let Some(parent) = path.parent() {
1200        fs::create_dir_all(parent).map_err(|source| CatalogSnapshotError::WriteFile {
1201            path: parent.to_path_buf(),
1202            source,
1203        })?;
1204    }
1205
1206    let temp_path = temporary_path_for(path);
1207    fs::write(&temp_path, bytes).map_err(|source| CatalogSnapshotError::WriteFile {
1208        path: temp_path.clone(),
1209        source,
1210    })?;
1211
1212    if path.exists() {
1213        fs::remove_file(path).map_err(|source| CatalogSnapshotError::WriteFile {
1214            path: path.to_path_buf(),
1215            source,
1216        })?;
1217    }
1218
1219    fs::rename(&temp_path, path).map_err(|source| CatalogSnapshotError::WriteFile {
1220        path: path.to_path_buf(),
1221        source,
1222    })?;
1223    Ok(())
1224}
1225
1226fn temporary_path_for(path: &Path) -> PathBuf {
1227    let file_name = path
1228        .file_name()
1229        .and_then(|name| name.to_str())
1230        .unwrap_or("catalog");
1231    path.with_file_name(format!("{}.tmp-{}", file_name, uuid::Uuid::new_v4()))
1232}
1233
1234fn sha256_hex(bytes: &[u8]) -> String {
1235    let mut hasher = Sha256::new();
1236    hasher.update(bytes);
1237    let digest = hasher.finalize();
1238    digest.iter().map(|byte| format!("{byte:02x}")).collect()
1239}
1240
1241/// Derive the local sidecar manifest path for a snapshot file.
1242pub fn catalog_manifest_sidecar_path(snapshot_path: impl AsRef<Path>) -> PathBuf {
1243    let snapshot_path = snapshot_path.as_ref();
1244    let stem = snapshot_path
1245        .file_stem()
1246        .and_then(|stem| stem.to_str())
1247        .unwrap_or("catalog");
1248
1249    let file_name = match snapshot_path.extension().and_then(|ext| ext.to_str()) {
1250        Some(ext) => format!("{stem}.manifest.{ext}"),
1251        None => format!("{stem}.manifest.json"),
1252    };
1253
1254    snapshot_path.with_file_name(file_name)
1255}
1256
1257async fn send_json_request(
1258    client: &Client,
1259    provider: Provider,
1260    url: &str,
1261    headers: HeaderMap,
1262) -> Result<Value, ModelCatalogError> {
1263    send_json_request_with_query(client, provider, url, headers, &[]).await
1264}
1265
1266async fn send_json_request_with_query(
1267    client: &Client,
1268    provider: Provider,
1269    url: &str,
1270    headers: HeaderMap,
1271    query: &[(&str, String)],
1272) -> Result<Value, ModelCatalogError> {
1273    let response = client
1274        .get(url)
1275        .headers(headers)
1276        .query(query)
1277        .send()
1278        .await
1279        .map_err(|source| ModelCatalogError::Request {
1280            provider: provider.clone(),
1281            source,
1282        })?;
1283
1284    let status = response.status();
1285    if !status.is_success() {
1286        let body = response.text().await.unwrap_or_default();
1287        return Err(ModelCatalogError::Http {
1288            provider,
1289            status,
1290            body,
1291        });
1292    }
1293
1294    response
1295        .json::<Value>()
1296        .await
1297        .map_err(|source| ModelCatalogError::Request { provider, source })
1298}
1299
1300fn extract_openai_models(
1301    provider: &Provider,
1302    raw: &Value,
1303) -> Result<Vec<ProviderExtractedModel>, ModelCatalogError> {
1304    let data = value_array(provider, raw, "data")?;
1305    data.iter()
1306        .map(|item| extract_model_record(provider, item))
1307        .collect()
1308}
1309
1310fn extract_anthropic_models(
1311    provider: &Provider,
1312    raw: &Value,
1313) -> Result<Vec<ProviderExtractedModel>, ModelCatalogError> {
1314    let data = value_array(provider, raw, "data")?;
1315    data.iter()
1316        .map(|item| extract_model_record(provider, item))
1317        .collect()
1318}
1319
1320fn extract_model_record(
1321    provider: &Provider,
1322    item: &Value,
1323) -> Result<ProviderExtractedModel, ModelCatalogError> {
1324    let raw_id = item.get("id").and_then(Value::as_str).ok_or_else(|| {
1325        ModelCatalogError::InvalidResponse {
1326            provider: provider.clone(),
1327            message: "model entry is missing string field `id`".to_string(),
1328        }
1329    })?;
1330
1331    Ok(ProviderExtractedModel {
1332        provider: provider.clone(),
1333        raw_id: raw_id.to_string(),
1334        display_name: optional_string(item, &["display_name", "name"]),
1335        description: optional_string(item, &["description"]),
1336        context_window: optional_u64(
1337            item,
1338            &["context_window", "context_length", "max_context_length"],
1339        )
1340        .or_else(|| {
1341            item.get("top_provider")
1342                .and_then(|v| optional_u64(v, &["context_length"]))
1343        }),
1344        max_output_tokens: optional_u64(
1345            item,
1346            &[
1347                "max_output_tokens",
1348                "max_completion_tokens",
1349                "max_tokens",
1350                "output_token_limit",
1351            ],
1352        )
1353        .or_else(|| {
1354            item.get("top_provider")
1355                .and_then(|v| optional_u64(v, &["max_completion_tokens", "max_output_tokens"]))
1356        }),
1357        max_input_tokens: optional_u64(item, &["max_input_tokens", "input_token_limit"]),
1358        created_at: optional_timestamp(item, &["created_at", "created"]),
1359        modalities: optional_string_array(item, &["modalities", "supported_modalities"])
1360            .or_else(|| collect_architecture_modalities(item))
1361            .or_else(|| collect_bool_keys(item.get("capabilities"), "supports_")),
1362        capabilities: optional_string_array(item, &["capabilities"])
1363            .or_else(|| collect_capabilities_object(item.get("capabilities")))
1364            .or_else(|| collect_supported_parameter_capabilities(item)),
1365        raw: item.clone(),
1366    })
1367}
1368
1369fn value_array<'a>(
1370    provider: &Provider,
1371    raw: &'a Value,
1372    field: &str,
1373) -> Result<&'a Vec<Value>, ModelCatalogError> {
1374    raw.get(field)
1375        .and_then(Value::as_array)
1376        .ok_or_else(|| ModelCatalogError::InvalidResponse {
1377            provider: provider.clone(),
1378            message: format!("response is missing array field `{}`", field),
1379        })
1380}
1381
1382fn optional_string(item: &Value, keys: &[&str]) -> Option<String> {
1383    keys.iter()
1384        .find_map(|key| item.get(*key).and_then(Value::as_str))
1385        .map(ToString::to_string)
1386}
1387
1388fn optional_u64(item: &Value, keys: &[&str]) -> Option<u64> {
1389    keys.iter().find_map(|key| parse_u64(item.get(*key)?))
1390}
1391
1392fn parse_u64(value: &Value) -> Option<u64> {
1393    match value {
1394        Value::Number(number) => number.as_u64(),
1395        Value::String(text) => text.parse::<u64>().ok(),
1396        _ => None,
1397    }
1398}
1399
1400fn optional_timestamp(item: &Value, keys: &[&str]) -> Option<i64> {
1401    keys.iter().find_map(|key| parse_timestamp(item.get(*key)?))
1402}
1403
1404fn parse_timestamp(value: &Value) -> Option<i64> {
1405    match value {
1406        Value::Number(number) => number.as_i64(),
1407        Value::String(text) => DateTime::parse_from_rfc3339(text)
1408            .map(|ts| ts.with_timezone(&Utc).timestamp_millis())
1409            .ok()
1410            .or_else(|| text.parse::<i64>().ok()),
1411        _ => None,
1412    }
1413}
1414
1415fn optional_string_array(item: &Value, keys: &[&str]) -> Option<Vec<String>> {
1416    keys.iter()
1417        .find_map(|key| parse_string_array(item.get(*key)?))
1418}
1419
1420fn parse_string_array(value: &Value) -> Option<Vec<String>> {
1421    match value {
1422        Value::Array(values) => {
1423            let items: Vec<String> = values
1424                .iter()
1425                .filter_map(Value::as_str)
1426                .map(ToString::to_string)
1427                .collect();
1428            if items.is_empty() {
1429                None
1430            } else {
1431                Some(items)
1432            }
1433        }
1434        Value::String(text) => Some(vec![text.to_string()]),
1435        _ => None,
1436    }
1437}
1438
1439fn collect_architecture_modalities(item: &Value) -> Option<Vec<String>> {
1440    let architecture = item.get("architecture")?;
1441    let mut items = Vec::new();
1442    if let Some(inputs) = architecture
1443        .get("input_modalities")
1444        .and_then(parse_string_array)
1445    {
1446        items.extend(inputs);
1447    }
1448    if let Some(outputs) = architecture
1449        .get("output_modalities")
1450        .and_then(parse_string_array)
1451    {
1452        items.extend(outputs);
1453    }
1454
1455    let items = dedupe_strings(items);
1456    if items.is_empty() {
1457        None
1458    } else {
1459        Some(items)
1460    }
1461}
1462
1463fn collect_bool_keys(value: Option<&Value>, prefix_to_strip: &str) -> Option<Vec<String>> {
1464    let object = value?.as_object()?;
1465    let mut items = Vec::new();
1466    for (key, value) in object {
1467        if value.as_bool() == Some(true) {
1468            items.push(key.trim_start_matches(prefix_to_strip).to_string());
1469        }
1470    }
1471    if items.is_empty() {
1472        None
1473    } else {
1474        Some(items)
1475    }
1476}
1477
1478fn collect_capabilities_object(value: Option<&Value>) -> Option<Vec<String>> {
1479    let object = value?.as_object()?;
1480    let mut items = Vec::new();
1481    for (key, value) in object {
1482        if value.as_bool() == Some(true) {
1483            items.push(key.to_string());
1484        }
1485    }
1486    if items.is_empty() {
1487        None
1488    } else {
1489        Some(items)
1490    }
1491}
1492
1493fn collect_supported_parameter_capabilities(item: &Value) -> Option<Vec<String>> {
1494    let parameters = item
1495        .get("supported_parameters")
1496        .and_then(parse_string_array)?;
1497
1498    let mut items = Vec::new();
1499    for parameter in parameters {
1500        match parameter.as_str() {
1501            "reasoning" | "include_reasoning" => items.push("reasoning".to_string()),
1502            "tools" | "tool_choice" | "parallel_tool_calls" => items.push("tools".to_string()),
1503            "response_format" | "structured_outputs" => {
1504                items.push("structured_outputs".to_string())
1505            }
1506            _ => {}
1507        }
1508    }
1509
1510    let items = dedupe_strings(items);
1511    if items.is_empty() {
1512        None
1513    } else {
1514        Some(items)
1515    }
1516}
1517
1518#[cfg(test)]
1519mod tests {
1520    use super::*;
1521
1522    #[test]
1523    fn normalizes_alias_candidates_across_provider_variants() {
1524        let aliases = normalized_alias_candidates("claude-opus-4-6", None);
1525        assert!(aliases.contains(&"claude-opus-4-6".to_string()));
1526
1527        let metadata = CatalogModelMetadata {
1528            canonical_model_key: "anthropic:claude-opus:4.6".to_string(),
1529            aliases: vec!["anthropic/claude-opus-4.6".to_string()],
1530            display_name: Some("Claude Opus 4.6".to_string()),
1531            description: None,
1532            context_window: None,
1533            max_output_tokens: None,
1534            max_input_tokens: None,
1535            modalities: None,
1536            capabilities: None,
1537            pricing: None,
1538            source: "openrouter".to_string(),
1539            raw: json!({}),
1540        };
1541        let store = InMemoryCatalogMetadataStore::new(vec![metadata]);
1542        let matched = store
1543            .find_by_raw_or_alias(&Provider::Anthropic, "claude-opus-4-6", &aliases)
1544            .expect("should match normalized alias");
1545
1546        assert_eq!(
1547            matched.metadata.canonical_model_key,
1548            "anthropic:claude-opus:4.6"
1549        );
1550    }
1551
1552    #[test]
1553    fn extracts_capabilities_from_supported_parameters() {
1554        let item = json!({
1555            "supported_parameters": [
1556                "max_tokens",
1557                "include_reasoning",
1558                "reasoning",
1559                "tool_choice",
1560                "tools",
1561                "response_format",
1562                "seed"
1563            ]
1564        });
1565
1566        assert_eq!(
1567            collect_supported_parameter_capabilities(&item),
1568            Some(vec![
1569                "reasoning".to_string(),
1570                "tools".to_string(),
1571                "structured_outputs".to_string()
1572            ])
1573        );
1574    }
1575}