use std::{collections::HashSet, sync::Arc};
use anyhow::{Context, Ok, Result};
use minijinja::Environment;
use crate::model_card::{ModelDeploymentCard, PromptContextMixin, PromptFormatterArtifact};
mod context;
mod formatters;
mod oai;
mod tokcfg;
use super::{OAIChatLikeRequest, OAIPromptFormatter, PromptFormatter};
pub use tokcfg::ChatTemplate;
use tokcfg::ChatTemplateValue;
impl PromptFormatter {
pub fn from_mdc(mdc: &ModelDeploymentCard) -> Result<PromptFormatter> {
let name_lower = mdc.display_name.to_lowercase();
if name_lower.contains("deepseek")
&& name_lower.contains("v3.2")
&& !name_lower.contains("exp")
{
tracing::info!("Detected DeepSeek V3.2 model (non-Exp), using native Rust formatter");
return Ok(Self::OAI(Arc::new(
super::deepseek_v32::DeepSeekV32Formatter::new_thinking(),
)));
}
match mdc
.prompt_formatter
.as_ref()
.ok_or(anyhow::anyhow!("MDC does not contain a prompt formatter"))?
{
PromptFormatterArtifact::HfTokenizerConfigJson(checked_file) => {
let Some(file) = checked_file.path() else {
anyhow::bail!(
"HfTokenizerConfigJson for {} is a URL, cannot load",
mdc.display_name
);
};
let contents = std::fs::read_to_string(file).with_context(|| {
format!(
"PromptFormatter.from_mdc fs:read_to_string '{}'",
file.display()
)
})?;
let mut config: ChatTemplate =
serde_json::from_str(&contents).inspect_err(|err| {
crate::log_json_err(&file.display().to_string(), &contents, err)
})?;
if let Some(PromptFormatterArtifact::HfChatTemplate {
file: checked_file, ..
}) = mdc.chat_template_file.as_ref()
{
let Some(chat_template_file) = checked_file.path() else {
anyhow::bail!(
"HfChatTemplate for {} is a URL, cannot load",
mdc.display_name
);
};
let chat_template =
std::fs::read_to_string(chat_template_file).with_context(|| {
format!("fs:read_to_string '{}'", chat_template_file.display())
})?;
config.chat_template = Some(ChatTemplateValue(either::Left(chat_template)));
}
Self::from_parts(
config,
mdc.prompt_context
.clone()
.map_or(ContextMixins::default(), |x| ContextMixins::new(&x)),
)
}
PromptFormatterArtifact::HfChatTemplate { .. } => Err(anyhow::anyhow!(
"prompt_formatter should not have type HfChatTemplate"
)),
}
}
pub fn from_parts(config: ChatTemplate, context: ContextMixins) -> Result<PromptFormatter> {
let formatter = HfTokenizerConfigJsonFormatter::new(config, context)?;
Ok(Self::OAI(Arc::new(formatter)))
}
}
struct JinjaEnvironment {
env: Environment<'static>,
}
#[derive(Debug)]
struct HfTokenizerConfigJsonFormatter {
env: Environment<'static>,
config: ChatTemplate,
mixins: Arc<ContextMixins>,
supports_add_generation_prompt: bool,
requires_content_arrays: bool,
}
#[derive(Debug, Clone, Default)]
pub struct ContextMixins {
context_mixins: HashSet<PromptContextMixin>,
}