Expand description
§qlora-rs
4-bit quantized LoRA (QLoRA) implementation for Rust.
This crate provides:
- NF4 (4-bit
NormalFloat) quantization - Double quantization for memory efficiency
QLoRAtraining with frozen quantized base weights- GGUF model export for inference deployment
§Quick Start
ⓘ
use qlora_rs::{QLoraConfig, QuantizedLinear, quantize_nf4};
use candle_core::Device;
// Quantize a weight tensor to 4-bit
let quantized = quantize_nf4(&weights, 64)?;
// Create QLoRA layer
let config = QLoraConfig::default();
let layer = QuantizedLinear::new(768, 768, config, &Device::Cpu)?;§Architecture
QLoRA keeps base model weights frozen in 4-bit precision while training
LoRA adapters in full precision. This enables fine-tuning large models
on consumer hardware.
Re-exports§
pub use error::QLoraError;pub use error::Result;pub use formats::export_model;pub use formats::export_native_format;pub use formats::ExportConfig;pub use formats::ExportFormat;pub use qlora::QLoraConfig;pub use qlora::QLoraLayer;pub use qlora::QuantizedLinear;pub use quantization::dequantize_nf4;pub use quantization::dequantize_nf4_with_dtype;pub use quantization::pad_for_quantization;pub use quantization::pad_for_quantization_with_info;pub use quantization::quantize_nf4;pub use quantization::unpad_tensor;pub use quantization::ComputeDType;pub use quantization::PaddingInfo;pub use quantization::QuantizationConfig;pub use quantization::QuantizationStrategy;pub use quantization::QuantizedTensor;pub use training::cross_entropy_loss;pub use training::PagedAdamW;pub use training::PagedAdamWState;pub use training::QLoraTrainer;pub use training::QLoraTrainingConfig;pub use training::TrainingMetrics;