pub struct AllocationRequest {
pub request_id: RequestId,
pub initial_tokens: usize,
pub max_sequence_length: usize,
pub num_layers: usize,
pub num_heads: usize,
pub head_dim: usize,
pub device: Device,
pub dtype: DataType,
pub priority: Priority,
}Expand description
KV cache allocation request
Fields§
§request_id: RequestIdRequest ID this allocation is for
initial_tokens: usizeInitial number of tokens
max_sequence_length: usizeMaximum expected sequence length
num_layers: usizeNumber of layers to cache
num_heads: usizeNumber of attention heads
head_dim: usizeHead dimension
device: DeviceTarget device
dtype: DataTypeData type for cache
priority: PriorityPriority level for allocation
Implementations§
Source§impl AllocationRequest
impl AllocationRequest
Sourcepub fn estimated_memory_bytes(&self) -> usize
pub fn estimated_memory_bytes(&self) -> usize
Calculate estimated memory requirement
Trait Implementations§
Source§impl Clone for AllocationRequest
impl Clone for AllocationRequest
Source§fn clone(&self) -> AllocationRequest
fn clone(&self) -> AllocationRequest
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreAuto Trait Implementations§
impl Freeze for AllocationRequest
impl RefUnwindSafe for AllocationRequest
impl Send for AllocationRequest
impl Sync for AllocationRequest
impl Unpin for AllocationRequest
impl UnsafeUnpin for AllocationRequest
impl UnwindSafe for AllocationRequest
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more