openai_protocol/
completion.rs

1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4use serde_json::{Map, Value};
5
6use super::common::*;
7
8// ============================================================================
9// Completions API (v1/completions) - DEPRECATED but still supported
10// ============================================================================
11
12#[derive(Debug, Clone, Deserialize, Serialize)]
13pub struct CompletionRequest {
14    /// ID of the model to use (required for OpenAI, optional for some implementations, such as SGLang)
15    pub model: String,
16
17    /// The prompt(s) to generate completions for
18    pub prompt: StringOrArray,
19
20    /// The suffix that comes after a completion of inserted text
21    #[serde(skip_serializing_if = "Option::is_none")]
22    pub suffix: Option<String>,
23
24    /// The maximum number of tokens to generate
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub max_tokens: Option<u32>,
27
28    /// What sampling temperature to use, between 0 and 2
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub temperature: Option<f32>,
31
32    /// An alternative to sampling with temperature (nucleus sampling)
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub top_p: Option<f32>,
35
36    /// How many completions to generate for each prompt
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub n: Option<u32>,
39
40    /// Whether to stream back partial progress
41    #[serde(default)]
42    pub stream: bool,
43
44    /// Options for streaming response
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub stream_options: Option<StreamOptions>,
47
48    /// Include the log probabilities on the logprobs most likely tokens
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub logprobs: Option<u32>,
51
52    /// Echo back the prompt in addition to the completion
53    #[serde(default)]
54    pub echo: bool,
55
56    /// Up to 4 sequences where the API will stop generating further tokens
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub stop: Option<StringOrArray>,
59
60    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub presence_penalty: Option<f32>,
63
64    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub frequency_penalty: Option<f32>,
67
68    /// Generates best_of completions server-side and returns the "best"
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub best_of: Option<u32>,
71
72    /// Modify the likelihood of specified tokens appearing in the completion
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub logit_bias: Option<HashMap<String, f32>>,
75
76    /// A unique identifier representing your end-user
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub user: Option<String>,
79
80    /// If specified, our system will make a best effort to sample deterministically
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub seed: Option<i64>,
83
84    // -------- Engine Specific Sampling Parameters --------
85    /// Top-k sampling parameter (-1 to disable)
86    #[serde(skip_serializing_if = "Option::is_none")]
87    pub top_k: Option<i32>,
88
89    /// Min-p nucleus sampling parameter
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub min_p: Option<f32>,
92
93    /// Minimum number of tokens to generate
94    #[serde(skip_serializing_if = "Option::is_none")]
95    pub min_tokens: Option<u32>,
96
97    /// Repetition penalty for reducing repetitive text
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub repetition_penalty: Option<f32>,
100
101    /// Regex constraint for output generation
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub regex: Option<String>,
104
105    /// EBNF grammar constraint for structured output
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub ebnf: Option<String>,
108
109    /// JSON schema constraint for structured output
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub json_schema: Option<String>,
112
113    /// Specific token IDs to use as stop conditions
114    #[serde(skip_serializing_if = "Option::is_none")]
115    pub stop_token_ids: Option<Vec<u32>>,
116
117    /// Skip trimming stop tokens from output
118    #[serde(default)]
119    pub no_stop_trim: bool,
120
121    /// Ignore end-of-sequence tokens during generation
122    #[serde(default)]
123    pub ignore_eos: bool,
124
125    /// Skip special tokens during detokenization
126    #[serde(default = "default_true")]
127    pub skip_special_tokens: bool,
128
129    /// Path to LoRA adapter(s) for model customization
130    #[serde(skip_serializing_if = "Option::is_none")]
131    pub lora_path: Option<String>,
132
133    /// Session parameters for continual prompting
134    #[serde(skip_serializing_if = "Option::is_none")]
135    pub session_params: Option<HashMap<String, Value>>,
136
137    /// Return model hidden states
138    #[serde(default)]
139    pub return_hidden_states: bool,
140
141    /// Sampling seed for deterministic outputs
142    #[serde(skip_serializing_if = "Option::is_none")]
143    pub sampling_seed: Option<u64>,
144
145    /// Additional fields including bootstrap info for PD routing
146    #[serde(flatten)]
147    pub other: Map<String, Value>,
148}
149
150impl GenerationRequest for CompletionRequest {
151    fn is_stream(&self) -> bool {
152        self.stream
153    }
154
155    fn get_model(&self) -> Option<&str> {
156        Some(&self.model)
157    }
158
159    fn extract_text_for_routing(&self) -> String {
160        match &self.prompt {
161            StringOrArray::String(s) => s.clone(),
162            StringOrArray::Array(v) => v.join(" "),
163        }
164    }
165}
166
167// ============================================================================
168// Response Types
169// ============================================================================
170
171#[derive(Debug, Clone, Deserialize, Serialize)]
172pub struct CompletionResponse {
173    pub id: String,
174    pub object: String, // "text_completion"
175    pub created: u64,
176    pub model: String,
177    pub choices: Vec<CompletionChoice>,
178    #[serde(skip_serializing_if = "Option::is_none")]
179    pub usage: Option<Usage>,
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub system_fingerprint: Option<String>,
182}
183
184#[derive(Debug, Clone, Deserialize, Serialize)]
185pub struct CompletionChoice {
186    pub text: String,
187    pub index: u32,
188    #[serde(skip_serializing_if = "Option::is_none")]
189    pub logprobs: Option<LogProbs>,
190    pub finish_reason: Option<String>, // "stop", "length", "content_filter", etc.
191    /// Information about which stop condition was matched
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub matched_stop: Option<Value>, // Can be string or integer
194}
195
196#[derive(Debug, Clone, Deserialize, Serialize)]
197pub struct CompletionStreamResponse {
198    pub id: String,
199    pub object: String, // "text_completion"
200    pub created: u64,
201    pub choices: Vec<CompletionStreamChoice>,
202    pub model: String,
203    #[serde(skip_serializing_if = "Option::is_none")]
204    pub system_fingerprint: Option<String>,
205}
206
207#[derive(Debug, Clone, Deserialize, Serialize)]
208pub struct CompletionStreamChoice {
209    pub text: String,
210    pub index: u32,
211    #[serde(skip_serializing_if = "Option::is_none")]
212    pub logprobs: Option<LogProbs>,
213    pub finish_reason: Option<String>,
214}