Struct GenerateRequest

Source

pub struct GenerateRequest {Show 35 fields
    pub text: Option<String>,
    pub model: Option<String>,
    pub input_ids: Option<InputIds>,
    pub input_embeds: Option<Value>,
    pub image_data: Option<Value>,
    pub video_data: Option<Value>,
    pub audio_data: Option<Value>,
    pub sampling_params: Option<SamplingParams>,
    pub return_logprob: Option<bool>,
    pub logprob_start_len: Option<i32>,
    pub top_logprobs_num: Option<i32>,
    pub token_ids_logprob: Option<Vec<u32>>,
    pub return_text_in_logprobs: bool,
    pub stream: bool,
    pub log_metrics: bool,
    pub return_hidden_states: bool,
    pub modalities: Option<Vec<String>>,
    pub session_params: Option<HashMap<String, Value>>,
    pub lora_path: Option<String>,
    pub lora_id: Option<String>,
    pub custom_logit_processor: Option<String>,
    pub bootstrap_host: Option<String>,
    pub bootstrap_port: Option<i32>,
    pub bootstrap_room: Option<i32>,
    pub bootstrap_pair_key: Option<String>,
    pub data_parallel_rank: Option<i32>,
    pub background: bool,
    pub conversation_id: Option<String>,
    pub priority: Option<i32>,
    pub extra_key: Option<String>,
    pub no_logs: bool,
    pub custom_labels: Option<HashMap<String, String>>,
    pub return_bytes: bool,
    pub return_entropy: bool,
    pub rid: Option<String>,
}

Fields§

§text: Option<String>

Text input - SGLang native format

§model: Option<String>§input_ids: Option<InputIds>

Input IDs for tokenized input

§input_embeds: Option<Value>

Input embeddings for direct embedding input Can be a 2D array (single request) or 3D array (batch of requests) Placeholder for future use

§image_data: Option<Value>

Image input data Can be an image instance, file name, URL, or base64 encoded string Supports single images, lists of images, or nested lists for batch processing Placeholder for future use

§video_data: Option<Value>

Video input data Can be a file name, URL, or base64 encoded string Supports single videos, lists of videos, or nested lists for batch processing Placeholder for future use

§audio_data: Option<Value>

Audio input data Can be a file name, URL, or base64 encoded string Supports single audio files, lists of audio, or nested lists for batch processing Placeholder for future use

§sampling_params: Option<SamplingParams>

Sampling parameters (sglang style)

§return_logprob: Option<bool>

Whether to return logprobs

§logprob_start_len: Option<i32>

If return logprobs, the start location in the prompt for returning logprobs.

§top_logprobs_num: Option<i32>

If return logprobs, the number of top logprobs to return at each position.

§token_ids_logprob: Option<Vec<u32>>

If return logprobs, the token ids to return logprob for.

§return_text_in_logprobs: bool

Whether to detokenize tokens in text in the returned logprobs.

§stream: bool

Whether to stream the response

§log_metrics: bool

Whether to log metrics for this request (e.g. health_generate calls do not log metrics)

§return_hidden_states: bool

Return model hidden states

§modalities: Option<Vec<String>>

The modalities of the image data [image, multi-images, video]

§session_params: Option<HashMap<String, Value>>

Session parameters for continual prompting

§lora_path: Option<String>

Path to LoRA adapter(s) for model customization

§lora_id: Option<String>

LoRA adapter ID (if pre-loaded)

§custom_logit_processor: Option<String>

Custom logit processor for advanced sampling control. Must be a serialized instance of CustomLogitProcessor in python/sglang/srt/sampling/custom_logit_processor.py Use the processor’s to_str() method to generate the serialized string.

§bootstrap_host: Option<String>

For disaggregated inference

§bootstrap_port: Option<i32>

For disaggregated inference

§bootstrap_room: Option<i32>

For disaggregated inference

§bootstrap_pair_key: Option<String>

For disaggregated inference

§data_parallel_rank: Option<i32>

Data parallel rank routing

§background: bool

Background response

§conversation_id: Option<String>

Conversation ID for tracking

§priority: Option<i32>

Priority for the request

§extra_key: Option<String>

Extra key for classifying the request (e.g. cache_salt)

§no_logs: bool

Whether to disallow logging for this request (e.g. due to ZDR)

§custom_labels: Option<HashMap<String, String>>

Custom metric labels

§return_bytes: bool

Whether to return bytes for image generation

§return_entropy: bool

Whether to return entropy

§rid: Option<String>

Request ID for tracking (inherited from BaseReq in Python)

GenerateRequest

Struct GenerateRequest Copy item path

Fields§

Trait Implementations§

impl Clone for GenerateRequest

fn clone(&self) -> GenerateRequest

fn clone_from(&mut self, source: &Self)

impl Debug for GenerateRequest

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<'de> Deserialize<'de> for GenerateRequest

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl GenerationRequest for GenerateRequest

fn is_stream(&self) -> bool

fn get_model(&self) -> Option<&str>

fn extract_text_for_routing(&self) -> String

impl Normalizable for GenerateRequest

fn normalize(&mut self)

impl Serialize for GenerateRequest

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

impl Validate for GenerateRequest

fn validate(&self) -> Result<(), ValidationErrors>

impl<'v_a> ValidateArgs<'v_a> for GenerateRequest

type Args = ()

fn validate_with_args(&self, args: Self::Args) -> Result<(), ValidationErrors>

Auto Trait Implementations§

impl Freeze for GenerateRequest

impl RefUnwindSafe for GenerateRequest

impl Send for GenerateRequest

impl Sync for GenerateRequest

impl Unpin for GenerateRequest

impl UnwindSafe for GenerateRequest

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Struct GenerateRequest

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,