pub struct WgpuBlockManager {Show 26 fields
pub device: Device,
pub queue: Queue,
pub blocks: Vec<WgpuBlock>,
pub hidden_buf: Buffer,
pub hidden_buf2: Buffer,
pub attn_out_buf: Buffer,
pub ffn_gate_buf: Buffer,
pub ffn_up_buf: Buffer,
pub ffn_silu_buf: Buffer,
pub norm_buf: Buffer,
pub q_buf: Buffer,
pub k_buf: Buffer,
pub v_buf: Buffer,
pub embed_weight: Buffer,
pub lm_head_weight: Buffer,
pub logits_buf: Buffer,
pub grad_hidden_buf: Buffer,
pub grad_logits_buf: Buffer,
pub hidden_size: u32,
pub intermediate_size: u32,
pub num_heads: u32,
pub num_kv_heads: u32,
pub head_dim: u32,
pub max_seq_len: u32,
pub vocab_size: u32,
pub num_layers: u32,
}Expand description
Manages all transformer blocks on GPU + shared buffers.
Fields§
§device: Device§queue: Queue§blocks: Vec<WgpuBlock>§attn_out_buf: Buffer§ffn_gate_buf: Buffer§ffn_up_buf: Buffer§ffn_silu_buf: Buffer§norm_buf: Buffer§q_buf: Buffer§k_buf: Buffer§v_buf: Buffer§embed_weight: Buffer§lm_head_weight: Buffer§logits_buf: Buffer§grad_logits_buf: Buffer§intermediate_size: u32§num_heads: u32§num_kv_heads: u32§head_dim: u32§max_seq_len: u32§vocab_size: u32§num_layers: u32Implementations§
Source§impl WgpuBlockManager
impl WgpuBlockManager
Sourcepub fn new(
device: Device,
queue: Queue,
hidden_size: u32,
intermediate_size: u32,
num_heads: u32,
num_kv_heads: u32,
head_dim: u32,
num_layers: u32,
vocab_size: u32,
max_seq_len: u32,
_lora_rank: Option<u32>,
_lora_alpha: Option<f32>,
) -> Self
pub fn new( device: Device, queue: Queue, hidden_size: u32, intermediate_size: u32, num_heads: u32, num_kv_heads: u32, head_dim: u32, num_layers: u32, vocab_size: u32, max_seq_len: u32, _lora_rank: Option<u32>, _lora_alpha: Option<f32>, ) -> Self
Create a new block manager and upload all transformer weights to GPU.
weights_per_layer is a closure that returns the F32 weights for each layer.
This avoids holding all 28 layers in CPU memory simultaneously.
Sourcepub fn upload_layer(
&mut self,
layer_idx: usize,
input_norm: &[f32],
post_attn_norm: &[f32],
w_q: &[f32],
w_k: &[f32],
w_v: &[f32],
w_o: &[f32],
w_gate: &[f32],
w_up: &[f32],
w_down: &[f32],
lora_rank: Option<u32>,
lora_scale: Option<f32>,
)
pub fn upload_layer( &mut self, layer_idx: usize, input_norm: &[f32], post_attn_norm: &[f32], w_q: &[f32], w_k: &[f32], w_v: &[f32], w_o: &[f32], w_gate: &[f32], w_up: &[f32], w_down: &[f32], lora_rank: Option<u32>, lora_scale: Option<f32>, )
Upload a single transformer layer’s weights to GPU.
Sourcepub fn upload_embeddings(&mut self, embed: &[f32], lm_head: &[f32])
pub fn upload_embeddings(&mut self, embed: &[f32], lm_head: &[f32])
Upload embedding + lm_head weights.
Sourcepub fn gpu_memory_bytes(&self) -> u64
pub fn gpu_memory_bytes(&self) -> u64
Total GPU memory used (approximate, in bytes).
Sourcepub fn layer_count(&self) -> usize
pub fn layer_count(&self) -> usize
Number of uploaded layers.
Auto Trait Implementations§
impl Freeze for WgpuBlockManager
impl !RefUnwindSafe for WgpuBlockManager
impl Send for WgpuBlockManager
impl Sync for WgpuBlockManager
impl Unpin for WgpuBlockManager
impl UnsafeUnpin for WgpuBlockManager
impl !UnwindSafe for WgpuBlockManager
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> FmtForward for T
impl<T> FmtForward for T
Source§fn fmt_binary(self) -> FmtBinary<Self>where
Self: Binary,
fn fmt_binary(self) -> FmtBinary<Self>where
Self: Binary,
Causes
self to use its Binary implementation when Debug-formatted.Source§fn fmt_display(self) -> FmtDisplay<Self>where
Self: Display,
fn fmt_display(self) -> FmtDisplay<Self>where
Self: Display,
Causes
self to use its Display implementation when
Debug-formatted.Source§fn fmt_lower_exp(self) -> FmtLowerExp<Self>where
Self: LowerExp,
fn fmt_lower_exp(self) -> FmtLowerExp<Self>where
Self: LowerExp,
Causes
self to use its LowerExp implementation when
Debug-formatted.Source§fn fmt_lower_hex(self) -> FmtLowerHex<Self>where
Self: LowerHex,
fn fmt_lower_hex(self) -> FmtLowerHex<Self>where
Self: LowerHex,
Causes
self to use its LowerHex implementation when
Debug-formatted.Source§fn fmt_octal(self) -> FmtOctal<Self>where
Self: Octal,
fn fmt_octal(self) -> FmtOctal<Self>where
Self: Octal,
Causes
self to use its Octal implementation when Debug-formatted.Source§fn fmt_pointer(self) -> FmtPointer<Self>where
Self: Pointer,
fn fmt_pointer(self) -> FmtPointer<Self>where
Self: Pointer,
Causes
self to use its Pointer implementation when
Debug-formatted.Source§fn fmt_upper_exp(self) -> FmtUpperExp<Self>where
Self: UpperExp,
fn fmt_upper_exp(self) -> FmtUpperExp<Self>where
Self: UpperExp,
Causes
self to use its UpperExp implementation when
Debug-formatted.Source§fn fmt_upper_hex(self) -> FmtUpperHex<Self>where
Self: UpperHex,
fn fmt_upper_hex(self) -> FmtUpperHex<Self>where
Self: UpperHex,
Causes
self to use its UpperHex implementation when
Debug-formatted.Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pipe for Twhere
T: ?Sized,
impl<T> Pipe for Twhere
T: ?Sized,
Source§fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere
Self: Sized,
fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere
Self: Sized,
Pipes by value. This is generally the method you want to use. Read more
Source§fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere
R: 'a,
fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere
R: 'a,
Borrows
self and passes that borrow into the pipe function. Read moreSource§fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere
R: 'a,
fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere
R: 'a,
Mutably borrows
self and passes that borrow into the pipe function. Read moreSource§fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
Source§fn pipe_borrow_mut<'a, B, R>(
&'a mut self,
func: impl FnOnce(&'a mut B) -> R,
) -> R
fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> R
Source§fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
Borrows
self, then passes self.as_ref() into the pipe function.Source§fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
Mutably borrows
self, then passes self.as_mut() into the pipe
function.Source§fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
Borrows
self, then passes self.deref() into the pipe function.Source§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<T> Tap for T
impl<T> Tap for T
Source§fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
Immutable access to the
Borrow<B> of a value. Read moreSource§fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
Mutable access to the
BorrowMut<B> of a value. Read moreSource§fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
Immutable access to the
AsRef<R> view of a value. Read moreSource§fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
Mutable access to the
AsMut<R> view of a value. Read moreSource§fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
Immutable access to the
Deref::Target of a value. Read moreSource§fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
Mutable access to the
Deref::Target of a value. Read moreSource§fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self
fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self
Calls
.tap() only in debug builds, and is erased in release builds.Source§fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self
fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self
Calls
.tap_mut() only in debug builds, and is erased in release
builds.Source§fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
Calls
.tap_borrow() only in debug builds, and is erased in release
builds.Source§fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
Calls
.tap_borrow_mut() only in debug builds, and is erased in release
builds.Source§fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
Calls
.tap_ref() only in debug builds, and is erased in release
builds.Source§fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
Calls
.tap_ref_mut() only in debug builds, and is erased in release
builds.Source§fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
Calls
.tap_deref() only in debug builds, and is erased in release
builds.