Expand description
Re-exports§
pub use activation::SwiGlu;pub use activation::SwiGluConfig;pub use dynamic_batch::BatchConfig;pub use dynamic_batch::BatchDecision;pub use dynamic_batch::BatchMetrics;pub use dynamic_batch::BatchSlot;pub use dynamic_batch::ContinuousBatcher;pub use dynamic_batch::DraftedToken;pub use dynamic_batch::InferenceRequest;pub use dynamic_batch::LcgRng;pub use dynamic_batch::PagedKvManager;pub use dynamic_batch::PreemptionPolicy;pub use dynamic_batch::Priority;pub use dynamic_batch::RequestId;pub use dynamic_batch::SchedulingPolicy;pub use dynamic_batch::SpeculativeDecoder;pub use dynamic_batch::SpeculativeResult;pub use dynamic_batch::TokenBudgetAllocator;pub use error::DnnError;pub use error::DnnResult;pub use handle::DnnHandle;pub use position::AlibiBias;pub use position::DnnRng;pub use position::Rope;pub use position::RopeConfig;pub use position::alibi_slope;pub use types::Activation;pub use types::ConvAlgorithm;pub use types::ConvolutionDescriptor;pub use types::TensorDesc;pub use types::TensorDescMut;pub use types::TensorLayout;pub use types::pool_output_size;
Modules§
- activation
- CPU-reference gated-activation primitives.
- attn
- Attention mechanisms for transformer models.
- conv
- Convolution operations for deep learning.
- dynamic_
batch - Dynamic batching and continuous batching for inference serving.
- error
- Error types for the DNN crate.
- handle
- DNN handle management.
- layers
- CPU-side layer implementations.
- linear
- Fused linear (fully-connected) layer operations.
- moe
- Mixture of Experts (MoE) module.
- norm
- Normalization operations for DNN.
- pool
- Pooling operations for DNN.
- position
- CPU-reference positional-encoding primitives.
- prelude
- Prelude module for convenient glob imports.
- quantize
- Quantization and dequantization operations for DNN.
- resize
- Resize (interpolation) operations for DNN.
- rnn
- Recurrent Neural Network cells for DNN.
- types
- Core DNN type definitions.