pub struct CompletionRequest {Show 34 fields
pub model: String,
pub prompt: StringOrArray,
pub suffix: Option<String>,
pub max_tokens: Option<u32>,
pub temperature: Option<f32>,
pub top_p: Option<f32>,
pub n: Option<u32>,
pub stream: bool,
pub stream_options: Option<StreamOptions>,
pub logprobs: Option<u32>,
pub echo: bool,
pub stop: Option<StringOrArray>,
pub presence_penalty: Option<f32>,
pub frequency_penalty: Option<f32>,
pub best_of: Option<u32>,
pub logit_bias: Option<HashMap<String, f32>>,
pub user: Option<String>,
pub seed: Option<i64>,
pub top_k: Option<i32>,
pub min_p: Option<f32>,
pub min_tokens: Option<u32>,
pub repetition_penalty: Option<f32>,
pub regex: Option<String>,
pub ebnf: Option<String>,
pub json_schema: Option<String>,
pub stop_token_ids: Option<Vec<u32>>,
pub no_stop_trim: bool,
pub ignore_eos: bool,
pub skip_special_tokens: bool,
pub lora_path: Option<String>,
pub session_params: Option<HashMap<String, Value>>,
pub return_hidden_states: bool,
pub sampling_seed: Option<u64>,
pub other: Map<String, Value>,
}Fields§
§model: StringID of the model to use (required for OpenAI, optional for some implementations, such as SGLang)
prompt: StringOrArrayThe prompt(s) to generate completions for
suffix: Option<String>The suffix that comes after a completion of inserted text
max_tokens: Option<u32>The maximum number of tokens to generate
temperature: Option<f32>What sampling temperature to use, between 0 and 2
top_p: Option<f32>An alternative to sampling with temperature (nucleus sampling)
n: Option<u32>How many completions to generate for each prompt
stream: boolWhether to stream back partial progress
stream_options: Option<StreamOptions>Options for streaming response
logprobs: Option<u32>Include the log probabilities on the logprobs most likely tokens
echo: boolEcho back the prompt in addition to the completion
stop: Option<StringOrArray>Up to 4 sequences where the API will stop generating further tokens
presence_penalty: Option<f32>Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
frequency_penalty: Option<f32>Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
best_of: Option<u32>Generates best_of completions server-side and returns the “best”
logit_bias: Option<HashMap<String, f32>>Modify the likelihood of specified tokens appearing in the completion
user: Option<String>A unique identifier representing your end-user
seed: Option<i64>If specified, our system will make a best effort to sample deterministically
top_k: Option<i32>Top-k sampling parameter (-1 to disable)
min_p: Option<f32>Min-p nucleus sampling parameter
min_tokens: Option<u32>Minimum number of tokens to generate
repetition_penalty: Option<f32>Repetition penalty for reducing repetitive text
regex: Option<String>Regex constraint for output generation
ebnf: Option<String>EBNF grammar constraint for structured output
json_schema: Option<String>JSON schema constraint for structured output
stop_token_ids: Option<Vec<u32>>Specific token IDs to use as stop conditions
no_stop_trim: boolSkip trimming stop tokens from output
ignore_eos: boolIgnore end-of-sequence tokens during generation
skip_special_tokens: boolSkip special tokens during detokenization
lora_path: Option<String>Path to LoRA adapter(s) for model customization
session_params: Option<HashMap<String, Value>>Session parameters for continual prompting
Return model hidden states
sampling_seed: Option<u64>Sampling seed for deterministic outputs
other: Map<String, Value>Additional fields including bootstrap info for PD routing
Trait Implementations§
Source§impl Clone for CompletionRequest
impl Clone for CompletionRequest
Source§fn clone(&self) -> CompletionRequest
fn clone(&self) -> CompletionRequest
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more