Skip to main content

qlora_rs/
lib.rs

1//! # qlora-rs
2//!
3//! 4-bit quantized `LoRA` (`QLoRA`) implementation for Rust.
4//!
5//! This crate provides:
6//! - NF4 (4-bit `NormalFloat`) quantization
7//! - Double quantization for memory efficiency
8//! - `QLoRA` training with frozen quantized base weights
9//! - GGUF model export for inference deployment
10//!
11//! ## Quick Start
12//!
13//! ```rust,ignore
14//! use qlora_rs::{QLoraConfig, QuantizedLinear, quantize_nf4};
15//! use candle_core::Device;
16//!
17//! // Quantize a weight tensor to 4-bit
18//! let quantized = quantize_nf4(&weights, 64)?;
19//!
20//! // Create QLoRA layer
21//! let config = QLoraConfig::default();
22//! let layer = QuantizedLinear::new(768, 768, config, &Device::Cpu)?;
23//! ```
24//!
25//! ## Architecture
26//!
27//! `QLoRA` keeps base model weights frozen in 4-bit precision while training
28//! `LoRA` adapters in full precision. This enables fine-tuning large models
29//! on consumer hardware.
30
31#![warn(missing_docs)]
32#![warn(clippy::pedantic)]
33
34pub mod error;
35pub mod export;
36pub mod formats;
37#[cfg(feature = "cuda")]
38pub mod kernels;
39pub mod native;
40pub mod qlora;
41pub mod quantization;
42pub mod training;
43
44pub use error::{QLoraError, Result};
45pub use formats::{export_model, export_native_format, ExportConfig, ExportFormat};
46pub use qlora::{QLoraConfig, QLoraLayer, QuantizedLinear};
47pub use quantization::{
48    dequantize_nf4, dequantize_nf4_with_dtype, pad_for_quantization,
49    pad_for_quantization_with_info, quantize_nf4, unpad_tensor, ComputeDType, PaddingInfo,
50    QuantizationConfig, QuantizationStrategy, QuantizedTensor,
51};
52pub use training::{
53    cross_entropy_loss, PagedAdamW, PagedAdamWState, QLoraTrainer, QLoraTrainingConfig,
54    TrainingMetrics,
55};