dynamo-llm 1.2.0

// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! Prompt Formatting Module
//!
//! Handles formatting of LLM request prompts, including:
//! - Chat template rendering
//! - Tool usage formatting
//! - Generation prompt handling
//!
//! The module supports different prompt formatting strategies through the
//! PromptFormatter

// TODO:
// 1. Query if `add_generation_prompt` is present in the prompt template
// 2. Support for models with add_generation_prompt:
//    - PALS (Prefix-Assisted Language Sampling)
//    - Continuation - Detected on user turns, where we can return
//      partial assistant responses without add_generation_prompt

use anyhow::Result;
use minijinja::value::Value;
use std::collections::HashMap;
use std::sync::Arc;

use crate::preprocessor::media::MediaDecoder;

pub mod deepseek_common;
pub mod deepseek_v32;
pub mod deepseek_v4;
mod template;

pub use template::{ChatTemplate, ContextMixins};

/// Shared helper: extract a boolean thinking toggle from `chat_template_args`.
///
/// Reads the two equivalent keys (`thinking`, `enable_thinking` — vLLM's
/// canonical kwarg) in order and returns the first bool value found, or `None`
/// if neither key is present (or neither carries a bool). Used by the V4
/// formatter's `resolve_thinking_mode` and by the reasoning-parser gate in
/// `OpenAIPreprocessor::is_reasoning_disabled_by_request` so both paths agree
/// on the signal interpretation.
pub(crate) fn thinking_bool_from_args(
    args: Option<&HashMap<String, serde_json::Value>>,
) -> Option<bool> {
    let args = args?;
    for key in ["thinking", "enable_thinking"] {
        if let Some(v) = args.get(key).and_then(|x| x.as_bool()) {
            return Some(v);
        }
    }
    None
}

#[derive(Debug)]
pub enum TokenInput {
    Single(Vec<u32>),
    Batch(Vec<Vec<u32>>),
}

#[derive(Debug)]
pub enum TextInput {
    Single(String),
    Batch(Vec<String>),
}

#[derive(Debug)]
pub enum PromptInput {
    Tokens(TokenInput),
    Text(TextInput),
}

/// Trait that defines a request that can map to an OpenAI-like request.
pub trait OAIChatLikeRequest {
    fn model(&self) -> String;
    fn messages(&self) -> Value;
    fn typed_messages(&self) -> Option<&[dynamo_protocols::types::ChatCompletionRequestMessage]> {
        None
    }
    fn tools(&self) -> Option<Value> {
        None
    }
    fn tool_choice(&self) -> Option<Value> {
        None
    }
    fn response_format(&self) -> Option<Value> {
        None
    }

    fn should_add_generation_prompt(&self) -> bool;

    /// Optional additional args to merge into the chat template context
    fn chat_template_args(&self) -> Option<&HashMap<String, serde_json::Value>> {
        None
    }

    /// Returns the type of input for the prompt. Default is Text.
    fn prompt_input_type(&self) -> PromptInput {
        PromptInput::Text(TextInput::Single(String::new()))
    }

    /// Extract tokens if the input is pre-tokenized
    fn extract_tokens(&self) -> Option<TokenInput> {
        None
    }

    fn extract_text(&self) -> Option<TextInput> {
        None
    }

    fn media_io_kwargs(&self) -> Option<&MediaDecoder> {
        None
    }

    fn mm_processor_kwargs(&self) -> Option<&serde_json::Value> {
        None
    }
}

pub trait OAIPromptFormatter: Send + Sync + 'static {
    fn supports_add_generation_prompt(&self) -> bool;
    fn render(&self, req: &dyn OAIChatLikeRequest) -> Result<String>;

    /// Per-family image-placeholder template used when the chat template
    /// requires string content and the request contains images. `{n}` in
    /// the template is the 1-based image index. `None` when the
    /// formatter has no flatten strategy — MM-aware routing falls back
    /// to text-prefix routing for those families.
    fn image_placeholder_template(&self) -> Option<&'static str> {
        None
    }
}

#[derive(Clone)]
pub enum PromptFormatter {
    OAI(Arc<dyn OAIPromptFormatter>),
}

// No-op formatter: used for models without chat_template
#[derive(Debug, Default)]
pub struct NoOpFormatter;

impl OAIPromptFormatter for NoOpFormatter {
    fn supports_add_generation_prompt(&self) -> bool {
        false
    }

    fn render(&self, req: &dyn OAIChatLikeRequest) -> Result<String> {
        let messages = req.messages();

        let first_message = messages
            .get_item_by_index(0)
            .map_err(|_| anyhow::Error::msg("No message at index 0 or messages array is empty"))?;

        let content = first_message
            .get_attr("content")
            .map_err(|_| anyhow::Error::msg("First message has no 'content' field"))?;

        let content_str = content
            .as_str()
            .ok_or_else(|| anyhow::Error::msg("Message content is not a string"))?
            .to_string();
        Ok(content_str)
    }
}

impl PromptFormatter {
    pub fn no_op() -> Self {
        Self::OAI(Arc::new(NoOpFormatter))
    }
}