pub struct InferenceConfig {
pub compute_backend: ComputeBackend,
pub quantization: Quantization,
pub batch_size: usize,
pub thread_count: usize,
pub block_size: usize,
pub use_flash_attention: bool,
pub device_type: DeviceType,
pub dtype: DType,
}Expand description
Configuration generated by auto-detection
Fields§
§compute_backend: ComputeBackendSelected compute backend
quantization: QuantizationRecommended quantization
batch_size: usizeRecommended batch size
thread_count: usizeRecommended thread count for CPU inference
block_size: usizeOptimal block size for attention
use_flash_attention: boolWhether to use flash attention
device_type: DeviceTypeDevice type for the backend crate
dtype: DTypeData type for tensors
Implementations§
Source§impl InferenceConfig
impl InferenceConfig
Sourcepub fn low_memory() -> Self
pub fn low_memory() -> Self
Create a config optimized for low memory usage
Sourcepub fn high_throughput() -> Self
pub fn high_throughput() -> Self
Create a config optimized for high throughput
Sourcepub fn low_latency() -> Self
pub fn low_latency() -> Self
Create a config optimized for low latency
Sourcepub fn estimated_tokens_per_second(&self) -> f32
pub fn estimated_tokens_per_second(&self) -> f32
Get estimated tokens per second for this configuration
Sourcepub fn power_efficient() -> Self
pub fn power_efficient() -> Self
Create a config optimized for power efficiency (uses ANE when available)
Trait Implementations§
Source§impl Clone for InferenceConfig
impl Clone for InferenceConfig
Source§fn clone(&self) -> InferenceConfig
fn clone(&self) -> InferenceConfig
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for InferenceConfig
impl Debug for InferenceConfig
Source§impl Default for InferenceConfig
impl Default for InferenceConfig
Source§impl<'de> Deserialize<'de> for InferenceConfig
impl<'de> Deserialize<'de> for InferenceConfig
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for InferenceConfig
impl RefUnwindSafe for InferenceConfig
impl Send for InferenceConfig
impl Sync for InferenceConfig
impl Unpin for InferenceConfig
impl UnwindSafe for InferenceConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more