pub struct ChatCompletionRequest {Show 47 fields
pub messages: Vec<ChatMessage>,
pub model: String,
pub frequency_penalty: Option<f32>,
pub function_call: Option<FunctionCall>,
pub functions: Option<Vec<Function>>,
pub logit_bias: Option<HashMap<String, f32>>,
pub logprobs: bool,
pub max_tokens: Option<u32>,
pub max_completion_tokens: Option<u32>,
pub metadata: Option<HashMap<String, String>>,
pub modalities: Option<Vec<String>>,
pub n: Option<u32>,
pub parallel_tool_calls: Option<bool>,
pub presence_penalty: Option<f32>,
pub prompt_cache_key: Option<String>,
pub reasoning_effort: Option<String>,
pub response_format: Option<ResponseFormat>,
pub safety_identifier: Option<String>,
pub seed: Option<i64>,
pub service_tier: Option<String>,
pub stop: Option<StringOrArray>,
pub stream: bool,
pub stream_options: Option<StreamOptions>,
pub temperature: Option<f32>,
pub tool_choice: Option<ToolChoice>,
pub tools: Option<Vec<Tool>>,
pub top_logprobs: Option<u32>,
pub top_p: Option<f32>,
pub verbosity: Option<i32>,
pub top_k: Option<i32>,
pub min_p: Option<f32>,
pub min_tokens: Option<u32>,
pub repetition_penalty: Option<f32>,
pub regex: Option<String>,
pub ebnf: Option<String>,
pub stop_token_ids: Option<Vec<u32>>,
pub no_stop_trim: bool,
pub ignore_eos: bool,
pub continue_final_message: bool,
pub skip_special_tokens: bool,
pub lora_path: Option<String>,
pub session_params: Option<HashMap<String, Value>>,
pub separate_reasoning: bool,
pub stream_reasoning: bool,
pub chat_template_kwargs: Option<HashMap<String, Value>>,
pub return_hidden_states: bool,
pub sampling_seed: Option<u64>,
}Fields§
§messages: Vec<ChatMessage>A list of messages comprising the conversation so far
model: StringID of the model to use
frequency_penalty: Option<f32>Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
function_call: Option<FunctionCall>Deprecated: Replaced by tool_choice
functions: Option<Vec<Function>>Deprecated: Replaced by tools
logit_bias: Option<HashMap<String, f32>>Modify the likelihood of specified tokens appearing in the completion
logprobs: boolWhether to return log probabilities of the output tokens
max_tokens: Option<u32>Deprecated: Replaced by max_completion_tokens
max_completion_tokens: Option<u32>An upper bound for the number of tokens that can be generated for a completion
metadata: Option<HashMap<String, String>>Developer-defined tags and values used for filtering completions in the dashboard
modalities: Option<Vec<String>>Output types that you would like the model to generate for this request
n: Option<u32>How many chat completion choices to generate for each input message
parallel_tool_calls: Option<bool>Whether to enable parallel function calling during tool use
presence_penalty: Option<f32>Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
prompt_cache_key: Option<String>Cache key for prompts (beta feature)
reasoning_effort: Option<String>Effort level for reasoning models (low, medium, high)
response_format: Option<ResponseFormat>An object specifying the format that the model must output
safety_identifier: Option<String>Safety identifier for content moderation
seed: Option<i64>Deprecated: This feature is in Legacy mode
service_tier: Option<String>The service tier to use for this request
stop: Option<StringOrArray>Up to 4 sequences where the API will stop generating further tokens
stream: boolIf set, partial message deltas will be sent
stream_options: Option<StreamOptions>Options for streaming response
temperature: Option<f32>What sampling temperature to use, between 0 and 2
tool_choice: Option<ToolChoice>Controls which (if any) tool is called by the model
tools: Option<Vec<Tool>>A list of tools the model may call
top_logprobs: Option<u32>An integer between 0 and 20 specifying the number of most likely tokens to return
top_p: Option<f32>An alternative to sampling with temperature
verbosity: Option<i32>Verbosity level for debugging
top_k: Option<i32>Top-k sampling parameter (-1 to disable)
min_p: Option<f32>Min-p nucleus sampling parameter
min_tokens: Option<u32>Minimum number of tokens to generate
repetition_penalty: Option<f32>Repetition penalty for reducing repetitive text
regex: Option<String>Regex constraint for output generation
ebnf: Option<String>EBNF grammar constraint for structured output
stop_token_ids: Option<Vec<u32>>Specific token IDs to use as stop conditions
no_stop_trim: boolSkip trimming stop tokens from output
ignore_eos: boolIgnore end-of-sequence tokens during generation
continue_final_message: boolContinue generating from final assistant message
skip_special_tokens: boolSkip special tokens during detokenization
lora_path: Option<String>Path to LoRA adapter(s) for model customization
session_params: Option<HashMap<String, Value>>Session parameters for continual prompting
separate_reasoning: boolSeparate reasoning content from final answer (O1-style models)
stream_reasoning: boolStream reasoning tokens during generation
chat_template_kwargs: Option<HashMap<String, Value>>Chat template kwargs
Return model hidden states
sampling_seed: Option<u64>Random seed for sampling for deterministic outputs
Trait Implementations§
Source§impl Clone for ChatCompletionRequest
impl Clone for ChatCompletionRequest
Source§fn clone(&self) -> ChatCompletionRequest
fn clone(&self) -> ChatCompletionRequest
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more