pub struct Request {Show 17 fields
pub request_id: String,
pub priority: i32,
pub arrival_time: f64,
pub status: RequestStatus,
pub num_prompt_tokens: u32,
pub max_output_tokens: u32,
pub num_computed_tokens: u32,
pub num_output_tokens: u32,
pub num_tokens: u32,
pub num_cached_tokens: u32,
pub kv_blocks: Vec<BlockId>,
pub num_preemptions: u32,
pub first_token_time: Option<f64>,
pub completion_time: Option<f64>,
pub token_generation_times: Vec<f64>,
pub preempted_time: f64,
pub last_preempted_at: Option<f64>,
}Expand description
Request represents a single inference request in the simulation
Fields§
§request_id: StringUnique request ID
priority: i32Client priority (lower = higher priority)
arrival_time: f64Arrival time (simulated time)
status: RequestStatusRequest status
num_prompt_tokens: u32Number of input tokens
max_output_tokens: u32Maximum number of output tokens to generate
num_computed_tokens: u32Number of tokens computed so far
num_output_tokens: u32Number of output tokens generated so far
num_tokens: u32Total tokens (prompt + output)
num_cached_tokens: u32Number of prefix-cached tokens
kv_blocks: Vec<BlockId>KV cache blocks allocated to this request
num_preemptions: u32Number of times this request has been preempted
first_token_time: Option<f64>Time when first token was generated (TTFT tracking)
completion_time: Option<f64>Time when request completed
token_generation_times: Vec<f64>Per-token generation times
preempted_time: f64Time spent preempted (not running)
last_preempted_at: Option<f64>Last preemption start time
Implementations§
Source§impl Request
impl Request
Sourcepub fn new(
request_id: String,
priority: i32,
arrival_time: f64,
num_prompt_tokens: u32,
max_output_tokens: u32,
) -> Self
pub fn new( request_id: String, priority: i32, arrival_time: f64, num_prompt_tokens: u32, max_output_tokens: u32, ) -> Self
Create a new request
Sourcepub fn is_prefill(&self) -> bool
pub fn is_prefill(&self) -> bool
Check if this is in prefill phase
Sourcepub fn tokens_to_process(&self) -> u32
pub fn tokens_to_process(&self) -> u32
Get number of tokens needed to process
Sourcepub fn is_finished(&self) -> bool
pub fn is_finished(&self) -> bool
Check if request is done
Sourcepub fn kv_cache_size(&self, model: &ModelConfig) -> u64
pub fn kv_cache_size(&self, model: &ModelConfig) -> u64
Calculate KV cache requirement for this request
Sourcepub fn record_generated_tokens(
&mut self,
num_new_tokens: u32,
current_time: f64,
)
pub fn record_generated_tokens( &mut self, num_new_tokens: u32, current_time: f64, )
Record that tokens were generated (update output token count and total)
Sourcepub fn mark_preempted(&mut self, current_time: f64)
pub fn mark_preempted(&mut self, current_time: f64)
Mark request as preempted