dynamo_llm/protocols/
common.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Engine Protocols
5//! ================
6//!
7//! This module contains the protocols in public API for the LLM Engine and AsyncEngine facades.
8//!
9//! The core components are the `CompletionRequest` and `StreamingCompletionResponse` objects.
10//!
11//! The `StreamingCompletionResponse` objects are the outputs of the LLM Engine; however, we
12//! need some additional information to propagate intermediate results for improved observability.
13//! The metadata is transferred via the other arms of the `StreamingResponse` enum.
14//!
15
16use anyhow::Result;
17use derive_builder::Builder;
18use serde::{Deserialize, Serialize};
19
20use super::TokenIdType;
21
22pub mod llm_backend;
23pub mod postprocessor;
24pub mod preprocessor;
25
26/// SamplingOptionsProvider is a trait that allows the caller to extract the sampling options from
27/// the object that implements it. This will mutate the object.
28pub trait SamplingOptionsProvider {
29    fn extract_sampling_options(&self) -> Result<SamplingOptions>;
30}
31
32pub trait StopConditionsProvider {
33    fn extract_stop_conditions(&self) -> Result<StopConditions>;
34}
35
36pub trait OutputOptionsProvider {
37    fn extract_output_options(&self) -> Result<OutputOptions>;
38}
39
40#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
41pub enum FinishReason {
42    #[serde(rename = "eos")]
43    EoS,
44
45    #[serde(rename = "length")]
46    Length,
47
48    #[serde(rename = "stop")]
49    Stop,
50
51    #[serde(rename = "error")]
52    Error(String),
53
54    #[serde(rename = "cancelled")]
55    Cancelled,
56
57    #[serde(rename = "content_filter")]
58    ContentFilter,
59}
60
61impl std::fmt::Display for FinishReason {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        match self {
64            FinishReason::EoS => write!(f, "eos"),
65            FinishReason::Length => write!(f, "length"),
66            FinishReason::Stop => write!(f, "stop"),
67            FinishReason::Error(msg) => write!(f, "error: {}", msg),
68            FinishReason::Cancelled => write!(f, "cancelled"),
69            FinishReason::ContentFilter => write!(f, "content_filter"),
70        }
71    }
72}
73
74impl std::str::FromStr for FinishReason {
75    type Err = anyhow::Error;
76
77    fn from_str(s: &str) -> Result<Self, Self::Err> {
78        match s {
79            "eos" => Ok(FinishReason::EoS),
80            "length" => Ok(FinishReason::Length),
81            "stop" => Ok(FinishReason::Stop),
82            "cancelled" => Ok(FinishReason::Cancelled),
83            s if s.starts_with("error: ") => Ok(FinishReason::Error(s[7..].to_string())),
84            _ => Err(anyhow::anyhow!("Invalid FinishReason variant: '{}'", s)),
85        }
86    }
87}
88
89impl From<FinishReason> for dynamo_async_openai::types::CompletionFinishReason {
90    fn from(reason: FinishReason) -> Self {
91        match reason {
92            FinishReason::EoS | FinishReason::Stop | FinishReason::Cancelled => {
93                dynamo_async_openai::types::CompletionFinishReason::Stop
94            }
95            FinishReason::ContentFilter => {
96                dynamo_async_openai::types::CompletionFinishReason::ContentFilter
97            }
98            FinishReason::Length => dynamo_async_openai::types::CompletionFinishReason::Length,
99            FinishReason::Error(_) => dynamo_async_openai::types::CompletionFinishReason::Stop,
100        }
101    }
102}
103
104impl From<dynamo_async_openai::types::CompletionFinishReason> for FinishReason {
105    fn from(reason: dynamo_async_openai::types::CompletionFinishReason) -> Self {
106        match reason {
107            dynamo_async_openai::types::CompletionFinishReason::Stop => FinishReason::Stop,
108            dynamo_async_openai::types::CompletionFinishReason::Length => FinishReason::Length,
109            dynamo_async_openai::types::CompletionFinishReason::ContentFilter => {
110                FinishReason::ContentFilter
111            }
112        }
113    }
114}
115
116/// LLM Inference Engines can accept a variety of input types. Not all Engines will support all
117/// input types. For example, the trtllm::AsyncEngine only supports `PromptType::Tokens` as an
118/// input type. The higher-level `Backend` class is a general wrapper around Engines that will
119/// enable many of the input options that require pre/postprocessing.
120#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
121pub enum PromptType {
122    /// If allowed, this input type allowed the caller to pass a list of token_ids directly to the
123    /// inference engine. This is an advanced feature that requires the caller to handle all of the
124    /// necessary prompt formatting and tokenization.
125    #[serde(rename = "token_ids")]
126    TokenIds(Vec<TokenIdType>),
127
128    /// If allowed, the raw text will be tokenized and converted to token_ids without any additional
129    /// preprocessing. This is an advanced features that requires the caller to correctly format the
130    /// prompt as defined by the model.
131    #[serde(rename = "raw")]
132    Raw(String),
133
134    /// If allowed, the `CompletionContext` will be preprocessed server-side. If the `Model` trait
135    /// `requires_prompt_template` returns true then the `CompletionContext` will be used to
136    /// to render the formatted prompt from the template. `Completion` is the preferred `PromptType`
137    /// for single turn completions.
138    #[serde(rename = "completion")]
139    Completion(CompletionContext),
140
141    /// If allowed, the `ChatContext` will be preprocessed server-side. Most chat models will have
142    /// a predefined prompt format/structure. If the `Model` trait `requires_prompt_template` returns
143    /// true then the `ChatContext` will be used to to render the formatted prompt from the template.
144    /// `ChatCompletion` is the preferred `PromptType` for multi-turn completions.
145    #[serde(rename = "chat_completion")]
146    ChatCompletion(ChatContext),
147
148    /// If allowed, then `Model::requires_prompt_template()` must also return true. The `serde_json::Value`
149    /// will be passed directly the prompt template. This allows for a complete generic data model and
150    /// prompt template to be passed to be defined and used by the server.
151    #[serde(rename = "custom_json")]
152    CustomJson(serde_json::Value),
153}
154
155/// TensorRT LLM does not perform preprocessing or postprocessing. The input_ids / token_ids
156/// are expected to be preprocessed by the client. The client is responsible for constructing
157/// the model specific prompt template and applying the tokenizer.
158///
159/// TensorRT LLM will perform some server side postprocessing to ensure that generation is
160/// efficiently stopped. See `StopConditions` below.
161#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
162pub struct CompletionRequest {
163    /// Type of prompt
164    pub prompt: PromptType,
165
166    /// StopConditions are conditions that the inference engine will use to stop generation.
167    pub stop_conditions: StopConditions,
168
169    /// SamplingOptions directs the inference engine to use sampling instead of greedy decoding.
170    /// More documentation on how and on the order in which sampling options are applied
171    /// are needed.
172    pub sampling_options: SamplingOptions,
173
174    #[builder(default)]
175    pub output_options: OutputOptions,
176
177    /// The computed checksum of the Model Deployment Card (MDC).
178    #[builder(default)]
179    pub mdc_sum: Option<String>,
180
181    /// User requested annotations for the request
182    #[builder(default)]
183    pub annotations: Option<Vec<String>>,
184}
185
186impl CompletionRequest {
187    pub fn builder() -> CompletionRequestBuilder {
188        CompletionRequestBuilder::default()
189    }
190}
191
192#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
193/// Defines the prompt template and system prompt for a completion request.
194/// If the model does not support prompt templates, the system_prompt will be ignored.
195pub struct CompletionContext {
196    /// Prompt sent by the user
197    pub prompt: String,
198
199    /// Optional system_prompt for models that support prompt templates with system_prompts.
200    pub system_prompt: Option<String>,
201}
202
203/// ChatTurn is a struct that contains the user and assistant messages in a chat.
204#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
205pub struct ChatTurn {
206    /// The user message
207    pub user: String,
208
209    /// The assistant response
210    pub assistant: String,
211}
212
213/// ChatContext is a struct that contains the role and context of a chat message
214/// along with a flattened CompletionContext.
215#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
216pub struct ChatContext {
217    /// CompletionContext for this chat turn
218    #[serde(flatten)]
219    pub completion: CompletionContext,
220
221    /// The history/context of the user and assistant messages in the chat context
222    pub context: Vec<ChatTurn>,
223}
224
225/// TensorRT LLM server-side stop conditions. These options allow for the server to evaluate
226/// the generated sequence and stop generation if the sequence meets a stop condition.
227#[derive(Serialize, Deserialize, Debug, Clone, Default)]
228pub struct StopConditions {
229    /// The maximum number of tokens to generate
230    pub max_tokens: Option<u32>,
231
232    /// List of strings that stop the generation when they are generated.
233    /// The returned output will not contain the stop strings.
234    pub stop: Option<Vec<String>>,
235
236    /// List of tokens that stop the generation when they are
237    /// generated. The returned output will NOT contain the stop tokens.
238    pub stop_token_ids_hidden: Option<Vec<TokenIdType>>,
239
240    /// The minimum number of tokens to generate
241    /// To ignore_eos, set min_tokens to max_tokens
242    pub min_tokens: Option<u32>,
243
244    /// Whether to ignore the EOS token and continue generating
245    /// tokens after the EOS token is generated.
246    // TODO(ignore_eos) - improve this my masking the EOS token with logit bias
247    pub ignore_eos: Option<bool>,
248
249    /// Maximum number of thinking tokens allowed
250    /// NOTE: Currently a passthrough - no enforcement logic implemented
251    pub max_thinking_tokens: Option<u32>,
252}
253
254impl StopConditions {
255    pub fn apply_ignore_eos(&mut self) {
256        if self.ignore_eos.unwrap_or(false) {
257            self.min_tokens = self.max_tokens;
258            self.stop = None;
259            self.stop_token_ids_hidden = None;
260        }
261    }
262}
263
264/// Temperature range for sampling.
265pub const TEMPERATURE_RANGE: (f32, f32) = (0.0, 1.0);
266
267/// Top P range for sampling.
268pub const TOP_P_RANGE: (f32, f32) = (0.0, 1.0);
269
270/// Frequency Penalty range for sampling.
271pub const FREQUENCY_PENALTY_RANGE: (f32, f32) = (-1.0, 1.0);
272
273/// Collection of options that control the sampling behavior of the inference engine.
274#[derive(Serialize, Deserialize, Debug, Clone, Default)]
275pub struct SamplingOptions {
276    /// Number of output sequences to return for the given prompt
277    pub n: Option<u8>,
278
279    /// Number of output sequences that are generated from the prompt.
280    /// From these `best_of` sequences, the top `n` sequences are returned.
281    /// `best_of` must be greater than or equal to `n`. This is treated as
282    /// the beam width when `use_beam_search` is True. By default, `best_of`
283    /// is set to `n`.
284    pub best_of: Option<u8>,
285
286    /// Float that penalizes new tokens based on whether they
287    /// appear in the generated text so far. Values > 0 encourage the model
288    /// to use new tokens, while values < 0 encourage the model to repeat
289    /// tokens.
290    pub presence_penalty: Option<f32>,
291
292    /// Float that penalizes new tokens based on their
293    /// frequency in the generated text so far. Values > 0 encourage the
294    /// model to use new tokens, while values < 0 encourage the model to
295    /// repeat tokens.
296    pub frequency_penalty: Option<f32>,
297
298    /// Float that penalizes new tokens based on whether
299    /// they appear in the prompt and the generated text so far. Values > 1
300    /// encourage the model to use new tokens, while values < 1 encourage
301    /// the model to repeat tokens.
302    pub repetition_penalty: Option<f32>,
303
304    /// Float that controls the randomness of the sampling. Lower
305    /// values make the model more deterministic, while higher values make
306    /// the model more random. Zero means greedy sampling.
307    pub temperature: Option<f32>,
308
309    /// Float that controls the cumulative probability of the top tokens
310    /// to consider. Must be in (0, 1]. Set to 1 to consider all tokens.
311    pub top_p: Option<f32>,
312
313    /// Integer that controls the number of top tokens to consider. Set
314    /// to -1 to consider all tokens.
315    pub top_k: Option<i32>,
316
317    /// Float that represents the minimum probability for a token to be
318    /// considered, relative to the probability of the most likely token.
319    /// Must be in [0, 1]. Set to 0 to disable this.
320    pub min_p: Option<f32>,
321
322    /// Whether to use beam search instead of sampling.
323    pub use_beam_search: Option<bool>,
324
325    /// Float that penalizes sequences based on their length.
326    /// Used in beam search.
327    pub length_penalty: Option<f32>,
328
329    /// The seed to use when sampling
330    pub seed: Option<i64>,
331
332    /// Whether to include the stop string in the output.
333    pub include_stop_str_in_output: Option<bool>,
334
335    /// Guided Decoding Options
336    pub guided_decoding: Option<GuidedDecodingOptions>,
337}
338
339/// Guided Decoding Options
340///
341/// Only one of `json`, `regex`, `choice`, or `grammar` should be set.
342#[derive(Serialize, Deserialize, Debug, Clone, Default)]
343pub struct GuidedDecodingOptions {
344    /// If specified, the output will follow the JSON schema. Can be a string, an object, or null.
345    #[serde(skip_serializing_if = "Option::is_none")]
346    pub json: Option<serde_json::Value>,
347
348    /// If specified, the output will follow the regex pattern. Can be a string or null.
349    #[serde(skip_serializing_if = "Option::is_none")]
350    pub regex: Option<String>,
351
352    /// If specified, the output will be exactly one of the choices.
353    #[serde(skip_serializing_if = "Option::is_none")]
354    pub choice: Option<Vec<String>>,
355
356    /// If specified, the output will follow the context-free grammar. Can be a string or null.
357    #[serde(skip_serializing_if = "Option::is_none")]
358    pub grammar: Option<String>,
359
360    /// If specified, the backend to use for guided decoding, can be backends like xgrammar or custom guided decoding backend
361    #[serde(skip_serializing_if = "Option::is_none")]
362    pub backend: Option<String>,
363}
364
365impl GuidedDecodingOptions {
366    /// Construct without validation
367    pub fn new(
368        json: Option<serde_json::Value>,
369        regex: Option<String>,
370        choice: Option<Vec<String>>,
371        grammar: Option<String>,
372        backend: Option<String>,
373    ) -> Self {
374        Self {
375            json,
376            regex,
377            choice,
378            grammar,
379            backend,
380        }
381    }
382
383    /// Construct and validate (fallible)
384    pub fn validated(
385        json: Option<serde_json::Value>,
386        regex: Option<String>,
387        choice: Option<Vec<String>>,
388        grammar: Option<String>,
389        backend: Option<String>,
390    ) -> Result<Self> {
391        let instance = Self::new(json, regex, choice, grammar, backend);
392        instance.validate()?;
393        Ok(instance)
394    }
395
396    /// Construct only if one field is Some (fallible)
397    pub fn from_optional(
398        json: Option<serde_json::Value>,
399        regex: Option<String>,
400        choice: Option<Vec<String>>,
401        grammar: Option<String>,
402        backend: Option<String>,
403    ) -> Result<Option<Self>> {
404        let is_empty_choice = choice.as_ref().is_none_or(|v| v.is_empty());
405        if json.is_none() && regex.is_none() && is_empty_choice && grammar.is_none() {
406            return Ok(None);
407        }
408        let instance = Self::validated(json, regex, choice, grammar, backend)?;
409        Ok(Some(instance))
410    }
411
412    /// Validate that only one guided decoding option is set
413    pub fn validate(&self) -> Result<()> {
414        let count = [
415            self.json.is_some(),
416            self.regex.is_some(),
417            self.choice.as_ref().is_some_and(|v| !v.is_empty()),
418            self.grammar.is_some(),
419        ]
420        .iter()
421        .filter(|&&v| v)
422        .count();
423
424        if count > 1 {
425            Err(anyhow::anyhow!(
426                "Only one of json, regex, choice, or grammar can be set, but multiple are specified: {:?}",
427                self
428            ))
429        } else {
430            Ok(())
431        }
432    }
433}
434
435impl SamplingOptions {
436    pub fn force_greedy(&mut self) {
437        self.presence_penalty = None;
438        self.frequency_penalty = None;
439        self.repetition_penalty = None;
440        self.temperature = None;
441        self.top_p = None;
442        self.top_k = None;
443        self.min_p = None;
444    }
445}
446
447/// Collection of options that control what information the inference engine returns in the response.
448#[derive(Serialize, Deserialize, Debug, Clone, Default)]
449pub struct OutputOptions {
450    /// Number of log probabilities to return per output token.
451    /// Note that the implementation follows the OpenAI API: The return
452    /// result includes the log probabilities on the `logprobs` most likely
453    /// tokens, as well the chosen tokens. The API will always return the
454    /// log probability of the sampled token, so there  may be up to
455    /// `logprobs+1` elements in the response
456    pub logprobs: Option<u32>,
457
458    /// Number of log probabilities to return per prompt token.
459    pub prompt_logprobs: Option<u32>,
460
461    /// Whether to skip special tokens in the output.
462    /// spaces_between_special_tokens: Whether to add spaces between special
463    /// tokens in the output.  Defaults to True.
464    pub skip_special_tokens: Option<bool>,
465
466    /// If true, the Context object will contain the prompt that was pass to
467    /// the tokenizer. This is useful for inspecting the behavior of prompt
468    /// templates that are applied during the backend preprocessing.
469    pub formatted_prompt: Option<bool>,
470}
471
472// Struct for log probability information
473#[derive(Debug, Serialize, Deserialize, Clone)]
474pub struct ChatCompletionLogprobs {
475    /// A list of message content tokens with log probability information.
476    #[serde(skip_serializing_if = "Option::is_none")]
477    pub content: Option<Vec<ChatCompletionTokenLogprob>>,
478
479    /// A list of message refusal tokens with log probability information.
480    #[serde(skip_serializing_if = "Option::is_none")]
481    pub refusal: Option<Vec<ChatCompletionTokenLogprob>>,
482}
483
484#[derive(Debug, Serialize, Deserialize, Clone)]
485pub struct ChatCompletionTokenLogprob {
486    /// The token.
487    pub token: String,
488
489    /// The log probability of this token, if it is within the top 20 most likely tokens.
490    /// Otherwise, the value `-9999.0` signifies that the token is very unlikely.
491    pub logprob: f64,
492
493    /// A list of integers representing the UTF-8 bytes representation of the token.
494    /// Useful in instances where characters are represented by multiple tokens and their
495    /// byte representations must be combined to generate the correct text representation.
496    /// Can be `None` if there is no bytes representation for the token.
497    pub bytes: Option<Vec<u8>>,
498
499    /// List of the most likely tokens and their log probability, at this token position.
500    /// In rare cases, there may be fewer than the requested number of `top_logprobs` returned.
501    pub top_logprobs: Vec<TopLogprob>,
502}
503
504#[derive(Debug, Serialize, Deserialize, Clone)]
505pub struct TopLogprob {
506    /// The token.
507    pub token: String,
508
509    /// The log probability of this token.
510    pub logprob: f64,
511
512    /// A list of integers representing the UTF-8 bytes representation of the token.
513    /// Can be `None` if there is no bytes representation for the token.
514    pub bytes: Option<Vec<u8>>,
515}
516
517#[derive(Serialize, Deserialize, Debug, Clone)]
518pub enum StreamState {
519    Active,
520    Finished(FinishReason),
521}
522
523#[derive(Serialize, Deserialize, Debug, Clone)]
524#[serde(rename_all = "snake_case")]
525pub enum Logits {
526    All(Vec<f32>),
527    Sparse(Vec<(u32, f32)>),
528}
529
530#[derive(Serialize, Deserialize, Debug, Clone)]
531#[serde(rename_all = "snake_case")]
532pub enum LogProbs {
533    Normalized(Logits),
534    Raw(Logits),
535}
536
537/// At each SequencePosition we hold position specific data
538pub struct SequencePositionData {
539    pub token_id: TokenIdType,
540
541    /// The log probability of the token
542    pub logprobs: Option<LogProbs>,
543}
544
545#[derive(Debug)]
546pub struct StreamingCompletionResponse {
547    pub delta: Delta,
548    pub logprobs: Option<ChatCompletionLogprobs>,
549}
550
551// todo(ryan) - we need to create a DeltaBuilder which is a mutable object that can be passed
552// around from the low-level compute engine to the high-level api. The DeltaBuilder will allow
553// us to construct the Delta object at multiple layers in the streaming response path.
554#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
555pub struct Delta {
556    pub is_complete: bool,
557
558    pub finish_reason: Option<FinishReason>,
559
560    // new token_ids
561    pub token_ids: Option<Vec<u32>>,
562
563    // tokens
564    pub tokens: Option<Vec<String>>,
565
566    // decoded text
567    pub text: Option<String>,
568
569    // current sequence length
570    // when stream, we expect this to increase by 1 on each response
571    pub sequence_length: Option<usize>,
572
573    // if the number of slots for a given request is greater than 1
574    // this indicates the index of the slot for the response
575    pub index: Option<usize>,
576
577    /// cumulative log probabilities
578    pub cum_log_probs: Option<f64>,
579
580    /// error message from engine
581    /// if this is set, is_complete should also be true
582    pub err_msg: Option<String>,
583
584    /// usage info
585    pub usage: Option<Usage>,
586}
587
588#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
589pub struct Usage {
590    pub input_tokens_count: usize,
591    pub output_tokens_count: usize,
592}
593
594impl CompletionContext {
595    /// Create a new CompletionContext
596    pub fn new(prompt: String, system_prompt: Option<String>) -> Self {
597        Self {
598            prompt,
599            system_prompt,
600        }
601    }
602
603    /// Create a new CompletionContext with only a prompt
604    pub fn from_prompt(prompt: String) -> Self {
605        Self {
606            prompt,
607            system_prompt: None,
608        }
609    }
610
611    /// Create a new CompletionContext with a prompt and system prompt
612    pub fn with_system_prompt(prompt: String, system_prompt: String) -> Self {
613        Self {
614            prompt,
615            system_prompt: Some(system_prompt),
616        }
617    }
618}
619
620// todo(ryan) - create a builder for chat context
621impl From<CompletionContext> for PromptType {
622    fn from(context: CompletionContext) -> Self {
623        PromptType::Completion(context)
624    }
625}
626
627#[cfg(test)]
628mod tests {
629
630    use super::*;
631
632    #[test]
633    fn test_completion_context_new() {
634        let prompt = "Hello, world!".to_string();
635        let system_prompt = Some("This is a system prompt.".to_string());
636        let context = CompletionContext::new(prompt.clone(), system_prompt.clone());
637
638        assert_eq!(context.prompt, prompt);
639        assert_eq!(context.system_prompt, system_prompt);
640    }
641
642    #[test]
643    fn test_completion_context_from_prompt() {
644        let prompt = "Hello, world!".to_string();
645        let context = CompletionContext::from_prompt(prompt.clone());
646
647        assert_eq!(context.prompt, prompt);
648        assert_eq!(context.system_prompt, None);
649    }
650
651    #[test]
652    fn test_completion_context_with_system_prompt() {
653        let prompt = "Hello, world!".to_string();
654        let system_prompt = "This is a system prompt.".to_string();
655        let context = CompletionContext::with_system_prompt(prompt.clone(), system_prompt.clone());
656
657        assert_eq!(context.prompt, prompt);
658        assert_eq!(context.system_prompt, Some(system_prompt));
659    }
660
661    #[test]
662    fn test_completion_context_into_prompt_type() {
663        let prompt = "Hello, world!".to_string();
664        let system_prompt = "This is a system prompt.".to_string();
665        let context = CompletionContext::with_system_prompt(prompt.clone(), system_prompt.clone());
666        let prompt_type: PromptType = context.into();
667
668        if let PromptType::Completion(completion_context) = prompt_type {
669            assert_eq!(completion_context.prompt, prompt);
670            assert_eq!(completion_context.system_prompt, Some(system_prompt));
671        } else {
672            panic!("Expected a Completion variant");
673        }
674    }
675
676    #[test]
677
678    fn test_guided_decoding_options_new_and_exclusive() {
679        // Only JSON set
680        let json_val = serde_json::json!({"type": "object"});
681        let backend = Some("xgrammar".to_string());
682        let opts = GuidedDecodingOptions::validated(
683            Some(json_val.clone()),
684            None,
685            None,
686            None,
687            backend.clone(),
688        );
689        assert!(opts.is_ok());
690        let opts = opts.unwrap();
691        assert_eq!(opts.json, Some(json_val));
692        assert!(opts.regex.is_none());
693        assert!(opts.choice.is_none());
694        assert!(opts.grammar.is_none());
695        assert_eq!(opts.backend, backend);
696
697        // Only regex set
698        let regex = Some(r"\d+".to_string());
699        let opts = GuidedDecodingOptions::validated(None, regex.clone(), None, None, None);
700        assert!(opts.is_ok());
701        let opts = opts.unwrap();
702        assert_eq!(opts.regex, regex);
703        assert!(opts.json.is_none());
704        assert!(opts.choice.is_none());
705        assert!(opts.grammar.is_none());
706
707        // Only choice set
708        let choice = Some(vec!["A".to_string(), "B".to_string()]);
709        let opts = GuidedDecodingOptions::validated(None, None, choice.clone(), None, None);
710        assert!(opts.is_ok());
711        let opts = opts.unwrap();
712        assert_eq!(opts.choice, choice);
713        assert!(opts.json.is_none());
714        assert!(opts.regex.is_none());
715        assert!(opts.grammar.is_none());
716
717        // Only grammar set
718        let grammar = Some("root ::= 'yes' | 'no'".to_string());
719        let opts = GuidedDecodingOptions::validated(None, None, None, grammar.clone(), None);
720        assert!(opts.is_ok());
721        let opts = opts.unwrap();
722        assert_eq!(opts.grammar, grammar);
723        assert!(opts.json.is_none());
724        assert!(opts.regex.is_none());
725        assert!(opts.choice.is_none());
726
727        // Multiple fields set (should error)
728        let opts = GuidedDecodingOptions::validated(
729            Some(serde_json::json!({})),
730            Some(r"\d+".to_string()),
731            None,
732            None,
733            None,
734        );
735        assert!(opts.is_err());
736
737        let opts = GuidedDecodingOptions::validated(
738            None,
739            Some(r"\d+".to_string()),
740            Some(vec!["A".to_string()]),
741            None,
742            None,
743        );
744        assert!(opts.is_err());
745
746        let opts = GuidedDecodingOptions::validated(
747            Some(serde_json::json!({})),
748            None,
749            Some(vec!["A".to_string()]),
750            Some("root ::= 'yes'".to_string()),
751            None,
752        );
753        assert!(opts.is_err());
754
755        // All fields None (should be ok, but not useful)
756        let opts = GuidedDecodingOptions::validated(None, None, None, None, None);
757        assert!(opts.is_ok());
758    }
759
760    #[test]
761    fn test_guided_decoding_options_from_optional() {
762        // All None returns Ok(None)
763        let opts = GuidedDecodingOptions::from_optional(None, None, None, None, None);
764        assert!(opts.is_ok());
765        assert!(opts.unwrap().is_none());
766
767        // Only one set returns Ok(Some)
768        let regex = Some(r"\w+".to_string());
769        let opts = GuidedDecodingOptions::from_optional(None, regex.clone(), None, None, None);
770        assert!(opts.is_ok());
771        let val = opts.unwrap();
772        assert!(val.is_some());
773        let val = val.unwrap();
774        assert_eq!(val.regex, regex);
775
776        // Multiple set returns Err
777        let opts = GuidedDecodingOptions::from_optional(
778            Some(serde_json::json!({})),
779            Some(r"\d+".to_string()),
780            None,
781            None,
782            None,
783        );
784        assert!(opts.is_err());
785
786        // Choice set but empty vector should not count as set
787        let opts = GuidedDecodingOptions::from_optional(None, None, Some(vec![]), None, None);
788        assert!(opts.is_ok());
789        let val = opts.unwrap();
790        assert!(val.is_none());
791
792        // Choice set with non-empty vector
793        let opts = GuidedDecodingOptions::from_optional(
794            None,
795            None,
796            Some(vec!["A".to_string()]),
797            None,
798            None,
799        );
800        assert!(opts.is_ok());
801        let val = opts.unwrap();
802        assert!(val.is_some());
803        let val = val.unwrap();
804        assert_eq!(val.choice, Some(vec!["A".to_string()]));
805    }
806}