use std::collections::{BTreeMap, BTreeSet};
use super::*;
pub fn validate_artifact(artifact: &ProviderCatalogArtifact) -> ProviderCatalogValidation {
let mut result = ProviderCatalogValidation::default();
if artifact.schema_version != PROVIDER_CATALOG_SCHEMA_VERSION {
result.errors.push(format!(
"schema_version must be {}, got {}",
PROVIDER_CATALOG_SCHEMA_VERSION, artifact.schema_version
));
}
if artifact.providers.is_empty() {
result.errors.push("catalog has no providers".to_string());
}
if artifact.models.is_empty() {
result.errors.push("catalog has no models".to_string());
}
let provider_ids: BTreeSet<_> = artifact.providers.iter().map(|p| p.id.as_str()).collect();
for provider in &artifact.providers {
if provider.id.trim().is_empty() {
result
.errors
.push("provider id cannot be empty".to_string());
}
if provider.display_name.trim().is_empty() {
result.errors.push(format!(
"provider {} display_name cannot be empty",
provider.id
));
}
if provider.endpoint.chat_endpoint.trim().is_empty() {
result.errors.push(format!(
"provider {} chat_endpoint cannot be empty",
provider.id
));
}
if provider.auth.required
&& provider.auth.env.is_empty()
&& provider.auth.style != "aws_sigv4"
{
result.errors.push(format!(
"provider {} requires auth but declares no auth env keys",
provider.id
));
}
if let Some(rate_limits) = &provider.rate_limits {
validate_rate_limits(
&format!("provider {}", provider.id),
rate_limits,
&mut result,
);
}
validate_extra_headers(provider, &mut result);
if let Some(healthcheck) = &provider.healthcheck {
validate_provider_healthcheck(provider, healthcheck, &mut result);
}
if let Some(local_runtime) = &provider.local_runtime {
validate_local_runtime(&provider.id, local_runtime, &mut result);
}
}
let mut alias_names = BTreeSet::new();
for alias in &artifact.aliases {
if alias.name.trim().is_empty() {
result.errors.push("alias name cannot be empty".to_string());
}
if !alias_names.insert(alias.name.as_str()) {
result
.errors
.push(format!("duplicate alias name {}", alias.name));
}
if !provider_ids.contains(alias.provider.as_str()) {
result.errors.push(format!(
"alias {} references unknown provider {}",
alias.name, alias.provider
));
}
}
let mut model_ids = BTreeSet::new();
let mut model_pairs = BTreeSet::new();
for model in &artifact.models {
if !model_ids.insert(model.id.as_str()) {
result
.errors
.push(format!("duplicate model id {}", model.id));
}
model_pairs.insert((model.provider.as_str(), model.id.as_str()));
if model.name.trim().is_empty() {
result
.errors
.push(format!("model {} name cannot be empty", model.id));
}
if !provider_ids.contains(model.provider.as_str()) {
result.errors.push(format!(
"model {} references unknown provider {}",
model.id, model.provider
));
}
validate_token_field(model, "family", &model.family, &mut result);
validate_token_field(model, "lineage", &model.lineage, &mut result);
for family in &model.complementary_with {
validate_token_field(model, "complementary_with", family, &mut result);
}
for selector in &model.avoid_as_reviewer_for {
validate_reviewer_selector(model, selector, &mut result);
}
if model.context_window == 0 {
result.errors.push(format!(
"model {} context_window must be positive",
model.id
));
}
if let Some(pricing) = &model.pricing {
validate_pricing(model, pricing, &mut result);
}
if let Some(rate_limits) = &model.rate_limits {
validate_rate_limits(&format!("model {}", model.id), rate_limits, &mut result);
}
if let Some(architecture) = &model.architecture {
validate_architecture(model, architecture, &mut result);
}
if let Some(memory) = &model.local_memory {
validate_local_memory(model, memory, &mut result);
}
if model.deprecation.status == DeprecationStatus::Deprecated
&& model
.deprecation
.note
.as_deref()
.unwrap_or("")
.trim()
.is_empty()
{
result.errors.push(format!(
"deprecated model {} must include deprecation.note",
model.id
));
}
if let Some(fast) = &model.fast_mode {
if let Some(pricing) = &fast.pricing {
validate_pricing(model, pricing, &mut result);
}
if let Some(status) = fast.status.as_deref() {
if !matches!(status, "ga" | "research_preview" | "deprecated") {
result.warnings.push(format!(
"model {} fast_mode.status {:?} is not one of ga|research_preview|deprecated",
model.id, status
));
}
}
}
}
for model in &artifact.models {
if let Some(target) = model.deprecation.superseded_by.as_deref() {
if !model_ids.contains(target) {
result.warnings.push(format!(
"model {} declares superseded_by {} with no matching catalog row",
model.id, target
));
}
}
}
{
let mut tiers_by_group: BTreeMap<&str, BTreeMap<&str, BTreeSet<&str>>> = BTreeMap::new();
for model in &artifact.models {
if model.deprecation.status == DeprecationStatus::Deprecated {
continue;
}
let Some(group) = model.equivalence_group.as_deref() else {
continue;
};
if group.trim().is_empty() {
continue;
}
tiers_by_group
.entry(group)
.or_default()
.entry(model.tier.as_str())
.or_default()
.insert(model.id.as_str());
}
for (group, tiers) in &tiers_by_group {
if tiers.len() > 1 {
let detail = tiers
.iter()
.map(|(tier, ids)| {
format!(
"{tier} ({})",
ids.iter().copied().collect::<Vec<_>>().join(", ")
)
})
.collect::<Vec<_>>()
.join("; ");
result.errors.push(format!(
"equivalence_group {group} declares conflicting tiers across its \
provider rows: {detail}. tier is a capability of the logical model — \
give every active row in the group the same tier (the conservative \
least-capable host baseline), not a per-provider value."
));
}
}
}
{
let local_provider_ids: BTreeSet<&str> = artifact
.providers
.iter()
.filter(|p| p.local_runtime.is_some())
.map(|p| p.id.as_str())
.collect();
let mut rows_by_group: BTreeMap<&str, Vec<&CatalogModel>> = BTreeMap::new();
for model in &artifact.models {
if model.deprecation.status == DeprecationStatus::Deprecated {
continue;
}
let Some(group) = model.equivalence_group.as_deref() else {
continue;
};
if group.trim().is_empty() {
continue;
}
rows_by_group.entry(group).or_default().push(model);
}
for (group, rows) in &rows_by_group {
let mut baseline: Option<BTreeSet<&str>> = None;
for model in rows {
let row: BTreeSet<&str> = model.strengths.iter().map(String::as_str).collect();
baseline = Some(match baseline {
None => row,
Some(acc) => acc.intersection(&row).copied().collect(),
});
}
let baseline = baseline.unwrap_or_default();
for model in rows {
if !local_provider_ids.contains(model.provider.as_str()) {
continue;
}
let row: BTreeSet<&str> = model.strengths.iter().map(String::as_str).collect();
let extras: Vec<&str> = row.difference(&baseline).copied().collect();
if !extras.is_empty() {
result.errors.push(format!(
"local-runtime row {}/{} in equivalence_group {group} claims strengths \
[{}] beyond the group's conservative baseline [{}]. A local route must \
not inherit a cloud peer's decoration — strengths must be the \
least-capable host baseline (a subset of every co-grouped row), or the \
local route reads as already-capable and suppresses real escalations.",
model.provider,
model.id,
extras.join(", "),
baseline.iter().copied().collect::<Vec<_>>().join(", "),
));
}
}
}
}
let model_by_pair: BTreeMap<(&str, &str), &CatalogModel> = artifact
.models
.iter()
.map(|model| ((model.provider.as_str(), model.id.as_str()), model))
.collect();
let dedicated_pairs: BTreeSet<(&str, &str)> = artifact
.models
.iter()
.filter(|model| model.availability == ModelAvailabilityStatus::Dedicated)
.map(|model| (model.provider.as_str(), model.id.as_str()))
.collect();
for alias in &artifact.aliases {
if !model_pairs.contains(&(alias.provider.as_str(), alias.model_id.as_str())) {
result.errors.push(format!(
"alias {} targets {}/{} without a catalog row",
alias.name, alias.provider, alias.model_id
));
}
if let Some(format) = alias.tool_format.as_deref() {
if format != "native" && format != "text" && format != "json" {
result.errors.push(format!(
"alias {} declares tool_format {:?}; must be \"native\", \"text\", or \"json\"",
alias.name, format
));
} else if let Some(model) =
model_by_pair.get(&(alias.provider.as_str(), alias.model_id.as_str()))
{
if format == "native" && !model.tool_support.native {
result.errors.push(format!(
"alias {} pins tool_format \"native\" but model {}/{} does not support native tool calling",
alias.name, alias.provider, alias.model_id
));
}
if (format == "text" || format == "json") && !model.tool_support.text {
result.errors.push(format!(
"alias {} pins tool_format {:?} (a text-channel format) but model {}/{} does not support text tool calling",
alias.name, format, alias.provider, alias.model_id
));
}
}
}
if is_tier_alias(&alias.name)
&& dedicated_pairs.contains(&(alias.provider.as_str(), alias.model_id.as_str()))
{
result.warnings.push(format!(
"tier alias {} targets dedicated-only model {}/{}; serverless callers will fail until the dedicated endpoint is provisioned",
alias.name, alias.provider, alias.model_id
));
}
}
for variant in &artifact.variants {
if variant.id.trim().is_empty() {
result.errors.push("variant id cannot be empty".to_string());
}
if !provider_ids.contains(variant.provider.as_str()) {
result.errors.push(format!(
"variant {} references unknown provider {}",
variant.id, variant.provider
));
}
if !model_pairs.contains(&(variant.provider.as_str(), variant.model_id.as_str())) {
result.errors.push(format!(
"variant {} targets {}/{} without a catalog row",
variant.id, variant.provider, variant.model_id
));
}
}
result
}
pub fn validate_current() -> ProviderCatalogValidation {
validate_artifact(&artifact())
}