dynamo_llm/preprocessor/
prompt.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Prompt Formatting Module
5//!
6//! Handles formatting of LLM request prompts, including:
7//! - Chat template rendering
8//! - Tool usage formatting
9//! - Generation prompt handling
10//!
11//! The module supports different prompt formatting strategies through the
12//! PromptFormatter
13
14// TODO:
15// 1. Query if `add_generation_prompt` is present in the prompt template
16// 2. Support for models with add_generation_prompt:
17//    - PALS (Prefix-Assisted Language Sampling)
18//    - Continuation - Detected on user turns, where we can return
19//      partial assistant responses without add_generation_prompt
20
21use anyhow::Result;
22use minijinja::value::Value;
23use std::collections::HashMap;
24use std::sync::Arc;
25
26mod template;
27
28pub use template::ContextMixins;
29
30#[derive(Debug)]
31pub enum TokenInput {
32    Single(Vec<u32>),
33    Batch(Vec<Vec<u32>>),
34}
35
36#[derive(Debug)]
37pub enum TextInput {
38    Single(String),
39    Batch(Vec<String>),
40}
41
42#[derive(Debug)]
43pub enum PromptInput {
44    Tokens(TokenInput),
45    Text(TextInput),
46}
47
48/// Trait that defines a request that can map to an OpenAI-like request.
49pub trait OAIChatLikeRequest {
50    fn model(&self) -> String;
51    fn messages(&self) -> Value;
52    fn tools(&self) -> Option<Value> {
53        None
54    }
55    fn tool_choice(&self) -> Option<Value> {
56        None
57    }
58
59    fn should_add_generation_prompt(&self) -> bool;
60
61    /// Optional additional args to merge into the chat template context
62    fn chat_template_args(&self) -> Option<&HashMap<String, serde_json::Value>> {
63        None
64    }
65
66    /// Returns the type of input for the prompt. Default is Text.
67    fn prompt_input_type(&self) -> PromptInput {
68        PromptInput::Text(TextInput::Single(String::new()))
69    }
70
71    /// Extract tokens if the input is pre-tokenized
72    fn extract_tokens(&self) -> Option<TokenInput> {
73        None
74    }
75
76    fn extract_text(&self) -> Option<TextInput> {
77        None
78    }
79}
80
81pub trait OAIPromptFormatter: Send + Sync + 'static {
82    fn supports_add_generation_prompt(&self) -> bool;
83    fn render(&self, req: &dyn OAIChatLikeRequest) -> Result<String>;
84}
85
86pub enum PromptFormatter {
87    OAI(Arc<dyn OAIPromptFormatter>),
88}
89
90// No-op formatter: used for models without chat_template
91#[derive(Debug, Default)]
92pub struct NoOpFormatter;
93
94impl OAIPromptFormatter for NoOpFormatter {
95    fn supports_add_generation_prompt(&self) -> bool {
96        false
97    }
98
99    fn render(&self, req: &dyn OAIChatLikeRequest) -> Result<String> {
100        let messages = req.messages();
101
102        let first_message = messages
103            .get_item_by_index(0)
104            .map_err(|_| anyhow::Error::msg("No message at index 0 or messages array is empty"))?;
105
106        let content = first_message
107            .get_attr("content")
108            .map_err(|_| anyhow::Error::msg("First message has no 'content' field"))?;
109
110        let content_str = content
111            .as_str()
112            .ok_or_else(|| anyhow::Error::msg("Message content is not a string"))?
113            .to_string();
114        Ok(content_str)
115    }
116}
117
118impl PromptFormatter {
119    pub fn no_op() -> Self {
120        Self::OAI(Arc::new(NoOpFormatter))
121    }
122}