#![doc = include_str!("../README.md")]
use serde::Deserialize;
use std::collections::{BTreeMap, HashMap};
use std::fmt::Write;
use std::path::Path;
type ModelsDevData = HashMap<String, ProviderData>;
type ContextWindowOverride = fn(&str, u32) -> u32;
#[derive(Debug, Deserialize)]
struct ProviderData {
#[allow(dead_code)]
id: String,
#[allow(dead_code)]
name: String,
#[serde(default)]
#[allow(dead_code)]
env: Vec<String>,
#[serde(default)]
models: HashMap<String, ModelData>,
}
#[derive(Debug, Deserialize)]
struct ModelData {
id: String,
name: String,
#[serde(default)]
tool_call: Option<bool>,
#[serde(default)]
reasoning: Option<bool>,
#[serde(default)]
#[allow(dead_code)]
cost: Option<CostData>,
#[serde(default)]
limit: Option<LimitData>,
#[serde(default)]
modalities: Option<ModalitiesData>,
}
#[derive(Debug, Deserialize, Default)]
struct ModalitiesData {
#[serde(default)]
input: Vec<String>,
}
#[derive(Debug, Deserialize)]
#[allow(dead_code)]
struct CostData {
#[serde(default)]
input: f64,
#[serde(default)]
output: f64,
}
#[derive(Debug, Deserialize)]
struct LimitData {
#[serde(default)]
context: u32,
#[serde(default)]
#[allow(dead_code)]
output: u32,
}
struct ProviderConfig {
dev_id: &'static str,
source_dev_id: Option<&'static str>,
extra_source_ids: &'static [&'static str],
model_filter: Option<fn(&str) -> bool>,
context_window_override: Option<ContextWindowOverride>,
enum_name: &'static str,
parser_name: &'static str,
display_name: &'static str,
env_var: Option<&'static str>,
oauth_provider_id: Option<&'static str>,
default_reasoning_levels: &'static [&'static str],
}
impl ProviderConfig {
const fn standard(
dev_id: &'static str,
enum_name: &'static str,
parser_name: &'static str,
display_name: &'static str,
env_var: Option<&'static str>,
) -> Self {
Self {
dev_id,
source_dev_id: None,
extra_source_ids: &[],
model_filter: None,
context_window_override: None,
enum_name,
parser_name,
display_name,
env_var,
oauth_provider_id: None,
default_reasoning_levels: &["low", "medium", "high"],
}
}
fn json_key(&self) -> &'static str {
self.source_dev_id.unwrap_or(self.dev_id)
}
}
#[allow(clippy::struct_field_names)]
struct DynamicProviderConfig {
enum_name: &'static str,
parser_name: &'static str,
display_name: &'static str,
}
const PROVIDERS: &[ProviderConfig] = &[
ProviderConfig::standard("anthropic", "Anthropic", "anthropic", "Anthropic", Some("ANTHROPIC_API_KEY")),
ProviderConfig {
dev_id: "codex",
source_dev_id: Some("openai"),
extra_source_ids: &[],
model_filter: Some(|id| id.contains("codex") || id.starts_with("gpt-5.") || id == "gpt-5"),
context_window_override: Some(codex_subscription_context_window),
enum_name: "Codex",
parser_name: "codex",
display_name: "Codex",
env_var: None,
oauth_provider_id: Some("codex"),
default_reasoning_levels: &["low", "medium", "high", "xhigh"],
},
ProviderConfig::standard("deepseek", "DeepSeek", "deepseek", "DeepSeek", Some("DEEPSEEK_API_KEY")),
ProviderConfig::standard("google", "Gemini", "gemini", "Gemini", Some("GEMINI_API_KEY")),
ProviderConfig::standard("moonshotai", "Moonshot", "moonshot", "Moonshot", Some("MOONSHOT_API_KEY")),
ProviderConfig::standard("openai", "Openai", "openai", "OpenAI", Some("OPENAI_API_KEY")),
ProviderConfig::standard("openrouter", "OpenRouter", "openrouter", "OpenRouter", Some("OPENROUTER_API_KEY")),
ProviderConfig {
extra_source_ids: &["zai-coding-plan"],
..ProviderConfig::standard("zai", "ZAi", "zai", "ZAI", Some("ZAI_API_KEY"))
},
ProviderConfig::standard("amazon-bedrock", "Bedrock", "bedrock", "AWS Bedrock", None),
];
const DYNAMIC_PROVIDERS: &[DynamicProviderConfig] = &[
DynamicProviderConfig { enum_name: "Ollama", parser_name: "ollama", display_name: "Ollama" },
DynamicProviderConfig { enum_name: "LlamaCpp", parser_name: "llamacpp", display_name: "LlamaCpp" },
];
const CODEX_SUBSCRIPTION_CONTEXT_WINDOW: u32 = 272_000;
fn codex_subscription_context_window(model_id: &str, default_context_window: u32) -> u32 {
match model_id {
"gpt-5.5" | "gpt-5.4" | "gpt-5.4-mini" | "gpt-5.3-codex" | "gpt-5.2" | "codex-auto-review" => {
CODEX_SUBSCRIPTION_CONTEXT_WINDOW
}
_ => default_context_window,
}
}
#[derive(Debug, Clone)]
struct ModelInfo {
variant_name: String,
model_id: String,
display_name: String,
context_window: u32,
reasoning_levels: Vec<String>,
input_modalities: Vec<String>,
}
type ProviderModels = BTreeMap<&'static str, Vec<ModelInfo>>;
struct CodegenCtx {
provider_models: ProviderModels,
}
pub struct GeneratedOutput {
pub rust_source: String,
pub provider_docs: HashMap<String, String>,
}
pub fn generate(models_json_path: &Path) -> Result<GeneratedOutput, String> {
let json_bytes = std::fs::read_to_string(models_json_path).map_err(|e| format!("read: {e}"))?;
let data: ModelsDevData = serde_json::from_str(&json_bytes).map_err(|e| format!("parse: {e}"))?;
let provider_models = build_provider_models(&data)?;
let ctx = CodegenCtx { provider_models };
Ok(GeneratedOutput { rust_source: emit_generated_source(&ctx), provider_docs: emit_provider_docs(&ctx) })
}
fn build_provider_models(data: &ModelsDevData) -> Result<ProviderModels, String> {
let mut provider_models = ProviderModels::new();
for cfg in PROVIDERS {
let json_key = cfg.json_key();
let provider_data =
data.get(json_key).ok_or_else(|| format!("Provider '{json_key}' not found in models.dev data"))?;
let mut models: Vec<ModelInfo> = collect_models_from(cfg, &provider_data.models);
for &extra_key in cfg.extra_source_ids {
if let Some(extra_data) = data.get(extra_key) {
let extra = collect_models_from(cfg, &extra_data.models);
let existing_ids: std::collections::HashSet<String> =
models.iter().map(|m| m.model_id.clone()).collect();
models.extend(extra.into_iter().filter(|m| !existing_ids.contains(&m.model_id)));
}
}
models.sort_by(|a, b| a.model_id.cmp(&b.model_id));
provider_models.insert(cfg.dev_id, models);
}
Ok(provider_models)
}
fn collect_models_from(cfg: &ProviderConfig, models: &HashMap<String, ModelData>) -> Vec<ModelInfo> {
models
.values()
.filter(|m| m.tool_call == Some(true))
.filter(|m| !is_alias(&m.id))
.filter(|m| cfg.model_filter.is_none_or(|f| f(&m.id)))
.map(|m| {
let reasoning_levels = if m.reasoning.unwrap_or(false) {
cfg.default_reasoning_levels.iter().map(|s| (*s).to_string()).collect()
} else {
Vec::new()
};
let input_modalities =
m.modalities.as_ref().map_or_else(|| vec!["text".to_string()], |md| md.input.clone());
let source_context_window = m.limit.as_ref().map_or(0, |l| l.context);
let context_window = cfg.context_window_override.map_or(source_context_window, |override_context_window| {
override_context_window(&m.id, source_context_window)
});
ModelInfo {
variant_name: model_id_to_variant(&m.id),
model_id: m.id.clone(),
display_name: m.name.clone(),
context_window,
reasoning_levels,
input_modalities,
}
})
.collect()
}
fn is_alias(id: &str) -> bool {
id.ends_with("-latest")
}
fn model_id_to_variant(id: &str) -> String {
let mut result = String::new();
let mut capitalize_next = true;
for ch in id.chars() {
if ch == '-' || ch == '.' || ch == '/' || ch == ':' {
capitalize_next = true;
} else if capitalize_next {
result.push(ch.to_ascii_uppercase());
capitalize_next = false;
} else {
result.push(ch);
}
}
if result.starts_with(|c: char| c.is_ascii_digit()) {
result.insert(0, '_');
}
result
}
fn emit_generated_source(ctx: &CodegenCtx) -> String {
let mut out = String::with_capacity(64_000);
emit_header(&mut out);
emit_provider_enums(&mut out, &ctx.provider_models);
emit_provider_impls(&mut out, &ctx.provider_models);
emit_llm_model_enum(&mut out);
emit_from_impls(&mut out);
emit_llm_model_impl(&mut out);
emit_display_impl(&mut out);
emit_fromstr_impl(&mut out);
out
}
fn emit_header(out: &mut String) {
pushln(out, "// Auto-generated from models.dev — do not edit manually");
pushln(out, "// Regenerated automatically by build.rs");
blank(out);
pushln(out, "use std::borrow::Cow;");
pushln(out, "use std::sync::LazyLock;");
pushln(out, "use crate::ReasoningEffort;");
blank(out);
}
fn emit_provider_enums(out: &mut String, provider_models: &ProviderModels) {
for cfg in PROVIDERS {
pushln(out, "#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]");
pushln(out, format!("pub enum {}Model {{", cfg.enum_name));
for model in &provider_models[cfg.dev_id] {
pushln(out, format!(" {},", model.variant_name));
}
pushln(out, "}");
blank(out);
}
}
fn emit_provider_impls(out: &mut String, provider_models: &ProviderModels) {
for cfg in PROVIDERS {
let models = &provider_models[cfg.dev_id];
let enum_name = format!("{}Model", cfg.enum_name);
pushln(out, format!("impl {enum_name} {{"));
pushln(out, " #[allow(clippy::too_many_lines)]");
pushln(out, " fn model_id(self) -> &'static str {");
pushln(out, " match self {");
for model in models {
pushln(out, format!(" Self::{} => \"{}\",", model.variant_name, model.model_id));
}
pushln(out, " }");
pushln(out, " }");
blank(out);
pushln(out, " #[allow(clippy::too_many_lines)]");
pushln(out, " fn display_name(self) -> &'static str {");
pushln(out, " match self {");
emit_grouped_arms(out, models, |m| escape_rust_string(&m.display_name), |name| format!("\"{name}\""));
pushln(out, " }");
pushln(out, " }");
blank(out);
pushln(out, " fn context_window(self) -> u32 {");
pushln(out, " match self {");
emit_grouped_arms(
out,
models,
|m| m.context_window.to_string(),
|val| format_number(val.parse::<u32>().unwrap()),
);
pushln(out, " }");
pushln(out, " }");
blank(out);
pushln(out, " pub fn reasoning_levels(self) -> &'static [ReasoningEffort] {");
pushln(out, " match self {");
emit_grouped_arms(out, models, |m| m.reasoning_levels.join(","), format_reasoning_levels_rhs);
pushln(out, " }");
pushln(out, " }");
blank(out);
pushln(out, " pub fn supports_reasoning(self) -> bool {");
pushln(out, " !self.reasoning_levels().is_empty()");
pushln(out, " }");
blank(out);
for modality in ["image", "audio"] {
pushln(out, format!(" pub fn supports_{modality}(self) -> bool {{"));
pushln(out, " match self {");
let mod_owned = modality.to_string();
emit_grouped_arms(
out,
models,
|m| m.input_modalities.contains(&mod_owned).to_string(),
std::string::ToString::to_string,
);
pushln(out, " }");
pushln(out, " }");
blank(out);
}
pushln(out, format!(" const ALL: &[{enum_name}] = &["));
for model in models {
pushln(out, format!(" Self::{},", model.variant_name));
}
pushln(out, " ];");
pushln(out, "}");
blank(out);
emit_from_str_impl(out, &enum_name, cfg.parser_name, models);
}
}
fn emit_from_str_impl(out: &mut String, enum_name: &str, parser_name: &str, models: &[ModelInfo]) {
pushln(out, format!("impl std::str::FromStr for {enum_name} {{"));
pushln(out, " type Err = String;");
blank(out);
pushln(out, " #[allow(clippy::too_many_lines)]");
pushln(out, " fn from_str(s: &str) -> Result<Self, Self::Err> {");
pushln(out, " match s {");
for model in models {
pushln(out, format!(" \"{}\" => Ok(Self::{}),", model.model_id, model.variant_name));
}
pushln(out, format!(" _ => Err(format!(\"Unknown {parser_name} model: '{{s}}'\")),"));
pushln(out, " }");
pushln(out, " }");
pushln(out, "}");
blank(out);
}
fn emit_grouped_arms(
out: &mut String,
models: &[ModelInfo],
key_fn: impl Fn(&ModelInfo) -> String,
fmt_val: impl Fn(&str) -> String,
) {
let mut groups: BTreeMap<String, Vec<&str>> = BTreeMap::new();
for model in models {
groups.entry(key_fn(model)).or_default().push(&model.variant_name);
}
for (key, variants) in &groups {
let rhs = fmt_val(key);
if variants.len() == 1 {
pushln(out, format!(" Self::{} => {rhs},", variants[0]));
} else {
let patterns: Vec<String> = variants.iter().map(|v| format!("Self::{v}")).collect();
pushln(out, format!(" {} => {rhs},", patterns.join(" | ")));
}
}
}
fn format_reasoning_levels_rhs(key: &str) -> String {
if key.is_empty() {
return "&[]".to_string();
}
let items: Vec<String> = key
.split(',')
.map(|l| {
let variant = level_str_to_variant(l);
format!("ReasoningEffort::{variant}")
})
.collect();
format!("&[{}]", items.join(", "))
}
fn level_str_to_variant(level: &str) -> &'static str {
match level {
"low" => "Low",
"medium" => "Medium",
"high" => "High",
"xhigh" => "Xhigh",
other => panic!("Unknown reasoning level: {other}"),
}
}
fn emit_llm_model_enum(out: &mut String) {
pushln(out, "/// A model from a specific provider");
pushln(out, "#[derive(Debug, Clone, PartialEq, Eq, Hash)]");
pushln(out, "pub enum LlmModel {");
for cfg in PROVIDERS {
pushln(out, format!(" {provider}({provider}Model),", provider = cfg.enum_name));
}
for dyn_cfg in DYNAMIC_PROVIDERS {
pushln(out, format!(" {}(String),", dyn_cfg.enum_name));
}
pushln(out, "}");
blank(out);
}
fn emit_from_impls(out: &mut String) {
for cfg in PROVIDERS {
pushln(out, format!("impl From<{}Model> for LlmModel {{", cfg.enum_name));
pushln(out, format!(" fn from(m: {}Model) -> Self {{ LlmModel::{}(m) }}", cfg.enum_name, cfg.enum_name));
pushln(out, "}");
blank(out);
}
}
fn emit_llm_model_impl(out: &mut String) {
pushln(out, "impl LlmModel {");
emit_llm_model_id(out);
emit_llm_display_name(out);
emit_llm_provider(out);
emit_llm_provider_display_name(out);
emit_llm_context_window(out);
emit_llm_required_env_var(out);
emit_llm_all_required_env_vars(out);
emit_llm_oauth_provider_id(out);
emit_llm_reasoning_levels(out);
emit_llm_supports_reasoning(out);
for modality in ["image", "audio"] {
emit_llm_supports_modality(out, modality);
}
emit_llm_all(out);
pushln(out, "}");
blank(out);
}
fn emit_llm_model_id(out: &mut String) {
pushln(out, " /// Raw model ID (e.g. `claude-opus-4-6`, `llama3.2`)");
pushln(out, " pub fn model_id(&self) -> Cow<'static, str> {");
pushln(out, " match self {");
for cfg in PROVIDERS {
pushln(out, format!(" Self::{}(m) => Cow::Borrowed(m.model_id()),", cfg.enum_name));
}
pushln(out, format!(" {} => Cow::Owned(s.clone()),", dynamic_pattern_with_binding("s")));
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_display_name(out: &mut String) {
pushln(out, " /// Human-readable display name (e.g. `Claude Opus 4.6`)");
pushln(out, " pub fn display_name(&self) -> Cow<'static, str> {");
pushln(out, " match self {");
for cfg in PROVIDERS {
pushln(out, format!(" Self::{}(m) => Cow::Borrowed(m.display_name()),", cfg.enum_name));
}
for dyn_cfg in DYNAMIC_PROVIDERS {
pushln(
out,
format!(
" Self::{}(s) => Cow::Owned(format!(\"{} {{s}}\")),",
dyn_cfg.enum_name, dyn_cfg.enum_name
),
);
}
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_provider(out: &mut String) {
pushln(out, " /// Provider identifier (e.g. `anthropic`)");
pushln(out, " pub fn provider(&self) -> &'static str {");
pushln(out, " match self {");
for cfg in PROVIDERS {
pushln(out, format!(" Self::{}(_) => \"{}\",", cfg.enum_name, cfg.parser_name));
}
for dyn_cfg in DYNAMIC_PROVIDERS {
pushln(out, format!(" Self::{}(_) => \"{}\",", dyn_cfg.enum_name, dyn_cfg.parser_name));
}
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_provider_display_name(out: &mut String) {
pushln(out, " /// Human-readable provider name (e.g. `AWS Bedrock`)");
pushln(out, " pub fn provider_display_name(&self) -> &'static str {");
pushln(out, " match self {");
for cfg in PROVIDERS {
pushln(out, format!(" Self::{}(_) => \"{}\",", cfg.enum_name, cfg.display_name));
}
for dyn_cfg in DYNAMIC_PROVIDERS {
pushln(out, format!(" Self::{}(_) => \"{}\",", dyn_cfg.enum_name, dyn_cfg.display_name));
}
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_context_window(out: &mut String) {
pushln(out, " /// Context window size in tokens (None for dynamic providers)");
pushln(out, " pub fn context_window(&self) -> Option<u32> {");
pushln(out, " match self {");
for cfg in PROVIDERS {
pushln(out, format!(" Self::{}(m) => Some(m.context_window()),", cfg.enum_name));
}
pushln(out, format!(" {} => None,", dynamic_pattern_with_binding("_")));
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_required_env_var(out: &mut String) {
pushln(out, " /// Required env var for this model's provider (None for local providers)");
pushln(out, " pub fn required_env_var(&self) -> Option<&'static str> {");
pushln(out, " match self {");
let mut none_arms: Vec<String> = Vec::new();
for cfg in PROVIDERS {
match cfg.env_var {
Some(var) => pushln(out, format!(" Self::{}(_) => Some(\"{}\"),", cfg.enum_name, var)),
None => none_arms.push(format!("Self::{}(_)", cfg.enum_name)),
}
}
for dyn_cfg in DYNAMIC_PROVIDERS {
none_arms.push(format!("Self::{}(_)", dyn_cfg.enum_name));
}
pushln(out, format!(" {} => None,", none_arms.join(" | ")));
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_all_required_env_vars(out: &mut String) {
let vars: Vec<&str> = PROVIDERS.iter().filter_map(|cfg| cfg.env_var).collect();
pushln(out, " /// All provider API key env var names (deduplicated, static)");
pushln(
out,
format!(
" pub const ALL_REQUIRED_ENV_VARS: &[&str] = &[{}];",
vars.iter().map(|v| format!("\"{v}\"")).collect::<Vec<_>>().join(", ")
),
);
blank(out);
}
fn emit_llm_oauth_provider_id(out: &mut String) {
pushln(out, " /// OAuth provider ID if this model requires OAuth login (e.g. `\"codex\"`)");
pushln(out, " pub fn oauth_provider_id(&self) -> Option<&'static str> {");
pushln(out, " match self {");
let mut none_arms: Vec<String> = Vec::new();
for cfg in PROVIDERS {
match cfg.oauth_provider_id {
Some(id) => pushln(out, format!(" Self::{}(_) => Some(\"{}\"),", cfg.enum_name, id)),
None => none_arms.push(format!("Self::{}(_)", cfg.enum_name)),
}
}
for dyn_cfg in DYNAMIC_PROVIDERS {
none_arms.push(format!("Self::{}(_)", dyn_cfg.enum_name));
}
pushln(out, format!(" {} => None,", none_arms.join(" | ")));
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_reasoning_levels(out: &mut String) {
pushln(out, " /// Reasoning levels supported by this model (empty if not a reasoning model)");
pushln(out, " pub fn reasoning_levels(&self) -> &'static [ReasoningEffort] {");
pushln(out, " match self {");
for cfg in PROVIDERS {
pushln(out, format!(" Self::{}(m) => m.reasoning_levels(),", cfg.enum_name));
}
pushln(out, format!(" {} => &[],", dynamic_pattern_with_binding("_")));
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_supports_reasoning(out: &mut String) {
pushln(out, " /// Whether this model supports reasoning/extended thinking");
pushln(out, " pub fn supports_reasoning(&self) -> bool {");
pushln(out, " !self.reasoning_levels().is_empty()");
pushln(out, " }");
blank(out);
}
fn emit_llm_supports_modality(out: &mut String, modality: &str) {
pushln(out, format!(" /// Whether this model supports {modality} input"));
pushln(out, format!(" pub fn supports_{modality}(&self) -> bool {{"));
pushln(out, " match self {");
for cfg in PROVIDERS {
pushln(out, format!(" Self::{}(m) => m.supports_{modality}(),", cfg.enum_name));
}
pushln(out, format!(" {} => false,", dynamic_pattern_with_binding("_")));
pushln(out, " }");
pushln(out, " }");
blank(out);
}
fn emit_llm_all(out: &mut String) {
pushln(out, " /// All catalog models (excludes dynamic providers)");
pushln(out, " pub fn all() -> &'static [LlmModel] {");
pushln(out, " static ALL: LazyLock<Vec<LlmModel>> = LazyLock::new(|| {");
pushln(out, " let mut v = Vec::new();");
for cfg in PROVIDERS {
pushln(
out,
format!(
" v.extend({}Model::ALL.iter().copied().map(LlmModel::{}));",
cfg.enum_name, cfg.enum_name
),
);
}
pushln(out, " v");
pushln(out, " });");
pushln(out, " &ALL");
pushln(out, " }");
}
fn emit_display_impl(out: &mut String) {
pushln(out, "impl std::fmt::Display for LlmModel {");
pushln(out, " fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {");
pushln(out, " write!(f, \"{}:{}\", self.provider(), self.model_id())");
pushln(out, " }");
pushln(out, "}");
blank(out);
}
fn emit_fromstr_impl(out: &mut String) {
pushln(out, "impl std::str::FromStr for LlmModel {");
pushln(out, " type Err = String;");
blank(out);
pushln(out, " /// Parse a `provider:model` string into an `LlmModel`");
pushln(out, " fn from_str(s: &str) -> Result<Self, Self::Err> {");
pushln(out, " let (provider_str, model_str) = s.split_once(':').unwrap_or((s, \"\"));");
pushln(out, " match provider_str {");
for cfg in PROVIDERS {
pushln(
out,
format!(
" \"{}\" => model_str.parse::<{}Model>().map(Self::{}),",
cfg.parser_name, cfg.enum_name, cfg.enum_name
),
);
}
for dyn_cfg in DYNAMIC_PROVIDERS {
pushln(
out,
format!(
" \"{}\" => Ok(Self::{}(model_str.to_string())),",
dyn_cfg.parser_name, dyn_cfg.enum_name
),
);
}
pushln(out, " _ => Err(format!(\"Unknown provider: '{provider_str}'\")),");
pushln(out, " }");
pushln(out, " }");
pushln(out, "}");
}
fn dynamic_pattern_with_binding(binding: &str) -> String {
DYNAMIC_PROVIDERS.iter().map(|d| format!("Self::{}({binding})", d.enum_name)).collect::<Vec<_>>().join(" | ")
}
fn format_number(n: u32) -> String {
let s = n.to_string();
if s.len() <= 4 {
return s;
}
let mut result = String::with_capacity(s.len() + s.len() / 3);
for (i, ch) in s.chars().enumerate() {
if i > 0 && (s.len() - i).is_multiple_of(3) {
result.push('_');
}
result.push(ch);
}
result
}
fn escape_rust_string(raw: &str) -> String {
raw.replace('\\', "\\\\").replace('"', "\\\"")
}
fn emit_provider_docs(ctx: &CodegenCtx) -> HashMap<String, String> {
let mut docs = HashMap::new();
for cfg in PROVIDERS {
let models = &ctx.provider_models[cfg.dev_id];
let mut doc = String::new();
pushln(&mut doc, format!("`{}` LLM provider.", cfg.display_name));
blank(&mut doc);
pushln(&mut doc, "# Authentication");
blank(&mut doc);
match cfg.env_var {
Some(var) => pushln(&mut doc, format!("Set the `{var}` environment variable.")),
None if cfg.oauth_provider_id.is_some() => {
pushln(&mut doc, "This provider uses OAuth authentication.");
}
None => {
pushln(
&mut doc,
"Uses the default AWS credential chain (environment variables, config files, IAM roles).",
);
}
}
blank(&mut doc);
pushln(&mut doc, "# Supported models");
blank(&mut doc);
pushln(&mut doc, "| Model ID | Name | Context | Reasoning | Image | Audio |");
pushln(&mut doc, "|----------|------|---------|-----------|-------|-------|");
for model in models {
let ctx_str = format_context_window(model.context_window);
let reasoning = if model.reasoning_levels.is_empty() { "" } else { "yes" };
let image = if model.input_modalities.contains(&"image".to_string()) { "yes" } else { "" };
let audio = if model.input_modalities.contains(&"audio".to_string()) { "yes" } else { "" };
pushln(
&mut doc,
format!(
"| `{}` | `{}` | `{}` | {} | {} | {} |",
model.model_id, model.display_name, ctx_str, reasoning, image, audio
),
);
}
docs.insert(cfg.dev_id.to_string(), doc);
}
for dyn_cfg in DYNAMIC_PROVIDERS {
let mut doc = String::new();
pushln(&mut doc, format!("`{}` LLM provider.", dyn_cfg.display_name));
blank(&mut doc);
pushln(
&mut doc,
format!("This provider accepts any model name at runtime (e.g. `{}:my-model`).", dyn_cfg.parser_name),
);
pushln(&mut doc, "No API key is required.");
docs.insert(dyn_cfg.parser_name.to_string(), doc);
}
docs
}
fn format_context_window(tokens: u32) -> String {
if tokens == 0 {
return "unknown".to_string();
}
if tokens >= 1_000_000 && tokens.is_multiple_of(1_000_000) {
format!("{}M", tokens / 1_000_000)
} else if tokens >= 1_000 && tokens.is_multiple_of(1_000) {
format!("{}k", tokens / 1_000)
} else {
format_number(tokens)
}
}
fn pushln(out: &mut String, line: impl AsRef<str>) {
writeln!(out, "{}", line.as_ref()).expect("writing to String should not fail");
}
fn blank(out: &mut String) {
pushln(out, "");
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::Value;
use serde_json::json;
use tempfile::NamedTempFile;
#[test]
fn generate_sorts_and_filters_models() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().expect("root object");
let anthropic = root.get_mut("anthropic").and_then(Value::as_object_mut).expect("anthropic provider");
anthropic.insert(
"models".to_string(),
json!({
"b-model": {
"id": "b-model",
"name": "B Model",
"tool_call": true,
"limit": {"context": 2000, "output": 0}
},
"a-model": {
"id": "a-model",
"name": "A Model",
"tool_call": true,
"limit": {"context": 1000, "output": 0}
},
"alpha-latest": {
"id": "alpha-latest",
"name": "Alias",
"tool_call": true,
"limit": {"context": 500, "output": 0}
},
"no-tools": {
"id": "no-tools",
"name": "No Tools",
"tool_call": false,
"limit": {"context": 500, "output": 0}
}
}),
);
let source = generate_from_value(&data);
let a_model = "\"a-model\" => Ok(Self::AModel),";
let b_model = "\"b-model\" => Ok(Self::BModel),";
let a_pos = source.find(a_model).expect("a-model parse arm");
let b_pos = source.find(b_model).expect("b-model parse arm");
assert!(a_pos < b_pos);
assert!(!source.contains("AlphaLatest"));
assert!(!source.contains("NoTools"));
}
#[test]
fn generate_contains_core_sections() {
let source = generate_from_value(&minimal_models_dev_json());
assert!(source.contains("pub enum LlmModel {"));
assert!(source.contains("impl std::str::FromStr for LlmModel {"));
assert!(source.contains("impl std::fmt::Display for LlmModel {"));
assert!(source.contains("pub fn required_env_var(&self) -> Option<&'static str> {"));
}
#[test]
fn generate_contains_dynamic_provider_arms() {
let source = generate_from_value(&minimal_models_dev_json());
assert!(source.contains("\"ollama\" => Ok(Self::Ollama(model_str.to_string())),"));
assert!(source.contains("\"llamacpp\" => Ok(Self::LlamaCpp(model_str.to_string())),"));
assert!(source.contains("Self::Ollama(_) | Self::LlamaCpp(_) => None,"));
}
#[test]
fn generate_codex_is_catalog_provider() {
let source = generate_from_value(&minimal_models_dev_json());
assert!(source.contains("pub enum CodexModel {"));
assert!(source.contains("\"codex\" => model_str.parse::<CodexModel>().map(Self::Codex),"));
assert!(source.contains("Self::Codex(m) => Some(m.context_window()),"));
}
#[test]
fn generate_oauth_provider_id_for_codex() {
let source = generate_from_value(&minimal_models_dev_json());
assert!(source.contains("Self::Codex(_) => Some(\"codex\"),"));
assert!(source.contains("pub fn oauth_provider_id(&self) -> Option<&'static str>"));
}
#[test]
fn generate_delegates_to_provider_impls() {
let source = generate_from_value(&minimal_models_dev_json());
assert!(source.contains("Self::Anthropic(m) => Cow::Borrowed(m.model_id()),"));
assert!(source.contains("Self::Anthropic(m) => Some(m.context_window()),"));
assert!(source.contains("\"anthropic\" => model_str.parse::<AnthropicModel>().map(Self::Anthropic),"));
}
#[test]
fn generate_formats_large_numbers_with_separators() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().expect("root object");
let anthropic = root.get_mut("anthropic").and_then(Value::as_object_mut).expect("anthropic provider");
anthropic.insert(
"models".to_string(),
json!({
"big-model": {
"id": "big-model",
"name": "Big Model",
"tool_call": true,
"limit": {"context": 200_000, "output": 0}
}
}),
);
let source = generate_from_value(&data);
assert!(source.contains("200_000"));
assert!(!source.contains("200000"));
}
#[test]
fn generate_groups_identical_match_arms() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().expect("root object");
let anthropic = root.get_mut("anthropic").and_then(Value::as_object_mut).expect("anthropic provider");
anthropic.insert(
"models".to_string(),
json!({
"model-a": {
"id": "model-a",
"name": "Same Name",
"tool_call": true,
"limit": {"context": 100_000, "output": 0}
},
"model-b": {
"id": "model-b",
"name": "Same Name",
"tool_call": true,
"limit": {"context": 100_000, "output": 0}
}
}),
);
let source = generate_from_value(&data);
assert!(source.contains("Self::ModelA | Self::ModelB => 100_000,"));
assert!(source.contains("Self::ModelA | Self::ModelB => \"Same Name\","));
}
#[test]
fn test_model_id_to_variant() {
assert_eq!(model_id_to_variant("claude-sonnet-4-5-20250929"), "ClaudeSonnet4520250929");
assert_eq!(model_id_to_variant("gemini-2.5-flash"), "Gemini25Flash");
assert_eq!(model_id_to_variant("deepseek-chat"), "DeepseekChat");
assert_eq!(model_id_to_variant("glm-4.5"), "Glm45");
}
#[test]
fn test_model_id_to_variant_with_slash_and_colon() {
assert_eq!(model_id_to_variant("anthropic/claude-opus-4.6"), "AnthropicClaudeOpus46");
assert_eq!(model_id_to_variant("openai/gpt-5.1-codex-max"), "OpenaiGpt51CodexMax");
assert_eq!(model_id_to_variant("deepseek/deepseek-r1:free"), "DeepseekDeepseekR1Free");
}
#[test]
fn test_is_alias() {
assert!(is_alias("claude-sonnet-4-5-latest"));
assert!(is_alias("claude-3-7-sonnet-latest"));
assert!(!is_alias("claude-sonnet-4-5-20250929"));
}
#[test]
fn generate_contains_reasoning_levels_and_supports_reasoning() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().expect("root object");
let anthropic = root.get_mut("anthropic").and_then(Value::as_object_mut).expect("anthropic provider");
anthropic.insert(
"models".to_string(),
json!({
"thinker": {
"id": "thinker",
"name": "Thinker",
"tool_call": true,
"reasoning": true,
"limit": {"context": 1000, "output": 0}
},
"fast": {
"id": "fast",
"name": "Fast",
"tool_call": true,
"reasoning": false,
"limit": {"context": 1000, "output": 0}
}
}),
);
let source = generate_from_value(&data);
assert!(source.contains("pub fn reasoning_levels(self) -> &'static [ReasoningEffort] {"));
assert!(
source
.contains("Self::Thinker => &[ReasoningEffort::Low, ReasoningEffort::Medium, ReasoningEffort::High],")
);
assert!(source.contains("Self::Fast => &[],"));
assert!(source.contains("pub fn supports_reasoning(self) -> bool {"));
assert!(source.contains("!self.reasoning_levels().is_empty()"));
assert!(source.contains("pub fn reasoning_levels(&self) -> &'static [ReasoningEffort] {"));
assert!(source.contains("Self::Ollama(_) | Self::LlamaCpp(_) => &[],"));
}
#[test]
fn generate_codex_gets_four_reasoning_levels() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().expect("root object");
let openai = root.get_mut("openai").and_then(Value::as_object_mut).expect("openai provider");
openai.insert(
"models".to_string(),
json!({
"gpt-5.4-codex": {
"id": "gpt-5.4-codex",
"name": "GPT-5.4 Codex",
"tool_call": true,
"reasoning": true,
"limit": {"context": 200_000, "output": 0}
}
}),
);
let source = generate_from_value(&data);
assert!(
source.contains(
"ReasoningEffort::Low, ReasoningEffort::Medium, ReasoningEffort::High, ReasoningEffort::Xhigh"
),
"Codex reasoning model should have 4 levels including Xhigh"
);
}
#[test]
fn generate_codex_overrides_gpt55_subscription_context_window() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().expect("root object");
let openai = root.get_mut("openai").and_then(Value::as_object_mut).expect("openai provider");
openai.insert(
"models".to_string(),
json!({
"gpt-5.5": {
"id": "gpt-5.5",
"name": "GPT-5.5",
"tool_call": true,
"reasoning": true,
"limit": {"context": 1_050_000, "output": 128_000}
}
}),
);
let source = generate_from_value(&data);
let codex_impl = generated_impl_block(&source, "CodexModel");
let openai_impl = generated_impl_block(&source, "OpenaiModel");
assert!(codex_impl.contains("Self::Gpt55 => 272_000,"));
assert!(openai_impl.contains("Self::Gpt55 => 1_050_000,"));
}
#[test]
fn codex_subscription_context_window_overrides_known_codex_models() {
for model_id in ["gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.2", "codex-auto-review"] {
assert_eq!(codex_subscription_context_window(model_id, 1_050_000), 272_000);
}
}
#[test]
fn codex_subscription_context_window_leaves_unknown_models_unchanged() {
assert_eq!(codex_subscription_context_window("gpt-5.3-codex-spark", 128_000), 128_000);
assert_eq!(codex_subscription_context_window("some-future-model", 400_000), 400_000);
}
fn generate_from_value(data: &Value) -> String {
let tmp = NamedTempFile::new().expect("temp file");
let json = serde_json::to_string(data).expect("serialize fixture");
std::fs::write(tmp.path(), json).expect("write fixture");
generate(tmp.path()).expect("codegen succeeds").rust_source
}
fn generated_impl_block<'a>(source: &'a str, enum_name: &str) -> &'a str {
let start_marker = format!("impl {enum_name} {{");
let start = source.find(&start_marker).expect("provider impl block start");
let rest = &source[start..];
let end_marker = format!("impl std::str::FromStr for {enum_name}");
let end = rest.find(&end_marker).expect("provider impl block end");
&rest[..end]
}
fn minimal_models_dev_json() -> Value {
let mut root = serde_json::Map::new();
for cfg in PROVIDERS {
let json_key = cfg.json_key();
root.entry(json_key.to_string()).or_insert_with(|| {
json!({
"id": json_key,
"name": json_key,
"env": [],
"models": {}
})
});
for &extra in cfg.extra_source_ids {
root.entry(extra.to_string()).or_insert_with(|| {
json!({
"id": extra,
"name": extra,
"env": [],
"models": {}
})
});
}
}
Value::Object(root)
}
#[test]
fn extra_source_ids_merges_models_into_provider() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().unwrap();
let extra = root.get_mut("zai-coding-plan").unwrap().as_object_mut().unwrap();
extra.insert(
"models".to_string(),
json!({
"extra-model": {
"id": "extra-model",
"name": "Extra Model",
"tool_call": true,
"limit": {"context": 4000, "output": 0}
}
}),
);
let source = generate_from_value(&data);
assert!(source.contains("\"extra-model\" => Ok(Self::ExtraModel),"));
}
#[test]
fn extra_source_ids_does_not_duplicate_existing_models() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().unwrap();
let zai = root.get_mut("zai").unwrap().as_object_mut().unwrap();
zai.insert(
"models".to_string(),
json!({
"shared-model": {
"id": "shared-model",
"name": "Shared Model",
"tool_call": true,
"limit": {"context": 1000, "output": 0}
}
}),
);
let extra = root.get_mut("zai-coding-plan").unwrap().as_object_mut().unwrap();
extra.insert(
"models".to_string(),
json!({
"shared-model": {
"id": "shared-model",
"name": "Shared Model Duplicate",
"tool_call": true,
"limit": {"context": 2000, "output": 0}
}
}),
);
let source = generate_from_value(&data);
let from_str_matches = source.matches("\"shared-model\" => Ok(Self::SharedModel),").count();
assert_eq!(from_str_matches, 1);
}
#[test]
fn generate_emits_provider_docs() {
let mut data = minimal_models_dev_json();
let root = data.as_object_mut().unwrap();
let anthropic = root.get_mut("anthropic").and_then(Value::as_object_mut).unwrap();
anthropic.insert(
"models".to_string(),
json!({
"claude-test": {
"id": "claude-test",
"name": "Claude Test",
"tool_call": true,
"reasoning": true,
"limit": {"context": 200_000, "output": 0},
"modalities": {"input": ["text", "image"]}
}
}),
);
let tmp = NamedTempFile::new().unwrap();
std::fs::write(tmp.path(), serde_json::to_string(&data).unwrap()).unwrap();
let output = generate(tmp.path()).unwrap();
let anthropic_doc = &output.provider_docs["anthropic"];
assert!(anthropic_doc.contains("`Anthropic` LLM provider."));
assert!(anthropic_doc.contains("`ANTHROPIC_API_KEY`"));
assert!(anthropic_doc.contains("| `claude-test` | `Claude Test` | `200k` | yes | yes | |"));
let ollama_doc = &output.provider_docs["ollama"];
assert!(ollama_doc.contains("`Ollama` LLM provider."));
assert!(ollama_doc.contains("any model name at runtime"));
}
#[test]
fn format_context_window_formats_correctly() {
assert_eq!(format_context_window(1_000_000), "1M");
assert_eq!(format_context_window(200_000), "200k");
assert_eq!(format_context_window(8_000), "8k");
assert_eq!(format_context_window(0), "unknown");
}
#[test]
fn level_str_to_variant_covers_all_reasoning_efforts() {
for effort in utils::ReasoningEffort::all() {
let _ = level_str_to_variant(effort.as_str());
}
}
}