use super::Recipe;
pub fn register_training_recipes(cookbook: &mut super::Cookbook) {
cookbook.add(
Recipe::new("training-lora", "LoRA Fine-tuning")
.with_problem("Fine-tune large models efficiently with Low-Rank Adaptation")
.with_components(vec!["entrenar", "aprender", "alimentar"])
.with_tags(vec!["training", "lora", "fine-tuning", "efficient", "llm"])
.with_code(
r#"use entrenar::prelude::*;
// Load base model
let model = Model::load("llama-7b.apr")?;
// Configure LoRA
let lora_config = LoraConfig {
r: 16, // Rank
alpha: 32, // Scaling factor
dropout: 0.1,
target_modules: vec!["q_proj", "v_proj"],
};
// Apply LoRA adapters
let model = model.with_lora(lora_config)?;
// Only ~0.1% of parameters are trainable now
println!("Trainable params: {}", model.trainable_params());
// Training loop
let optimizer = AdamW::new(model.trainable_params(), 1e-4);
for batch in dataloader {
let loss = model.forward(&batch)?;
loss.backward()?;
optimizer.step()?;
}
// Save LoRA weights only (small file)
model.save_lora("adapter.lora")?;
// Later: merge for inference
// let merged = Model::load("llama-7b.apr")?.merge_lora("adapter.lora")?;
"#,
)
.with_related(vec!["training-qlora", "training-autograd"])
.with_test_code(
r"#[cfg(test)]
mod tests {
#[test]
fn test_lora_config_rank_and_alpha() {
let rank = 16;
let alpha = 32;
assert!(rank > 0 && alpha >= rank);
}
#[test]
fn test_trainable_params_fraction() {
let total = 1_000_000;
let lora = 8192;
let fraction = lora as f64 / total as f64;
assert!(fraction < 0.1);
}
#[test]
fn test_dropout_in_valid_range() {
let dropout = 0.1_f64;
assert!(dropout >= 0.0 && dropout <= 1.0);
}
}",
),
);
cookbook.add(
Recipe::new("training-qlora", "QLoRA Quantized Fine-tuning")
.with_problem("Fine-tune 4-bit quantized models on consumer hardware")
.with_components(vec!["entrenar", "aprender"])
.with_tags(vec!["training", "qlora", "quantization", "4bit", "memory-efficient"])
.with_code(
r#"use entrenar::prelude::*;
// Load 4-bit quantized model
let model = Model::load_quantized("llama-7b.q4_k.gguf")?;
// QLoRA config (LoRA on quantized base)
let qlora_config = QLoraConfig {
lora: LoraConfig { r: 64, alpha: 16, dropout: 0.1, .. },
nf4: true, // NormalFloat4 quantization
double_quant: true, // Double quantization for memory
compute_dtype: F16, // Compute in fp16
};
let model = model.with_qlora(qlora_config)?;
// Train on 24GB GPU (fits 7B model!)
let trainer = Trainer::new(model)
.gradient_checkpointing(true)
.batch_size(4)
.gradient_accumulation(4);
trainer.train(&dataset, 3)?; // 3 epochs
"#,
)
.with_related(vec!["training-lora"])
.with_test_code(
r"#[cfg(test)]
mod tests {
#[test]
fn test_quantization_bits_valid() {
let bits = 4;
assert!(bits == 4 || bits == 8);
}
#[test]
fn test_effective_batch_size() {
let batch_size = 4;
let grad_accum = 4;
let effective = batch_size * grad_accum;
assert_eq!(effective, 16);
}
#[test]
fn test_nf4_requires_4bit() {
let nf4 = true;
let bits = 4;
assert!(nf4 && bits == 4);
}
}",
),
);
cookbook.add(
Recipe::new("training-autograd", "Custom Training with Autograd")
.with_problem("Build custom neural networks with automatic differentiation")
.with_components(vec!["entrenar", "trueno"])
.with_tags(vec!["training", "autograd", "neural-network", "custom"])
.with_code(
r#"use entrenar::autograd::*;
// Define model with autograd tensors
let w1 = Tensor::randn(&[784, 256]).requires_grad();
let w2 = Tensor::randn(&[256, 10]).requires_grad();
// Forward pass (computation graph built automatically)
fn forward(x: &Tensor, w1: &Tensor, w2: &Tensor) -> Tensor {
let h = x.matmul(w1).relu();
h.matmul(w2).softmax(-1)
}
// Training loop
let optimizer = SGD::new(vec![&w1, &w2], 0.01);
for (x, y) in dataloader {
let pred = forward(&x, &w1, &w2);
let loss = cross_entropy(&pred, &y);
// Backward pass (gradients computed automatically)
loss.backward();
optimizer.step();
optimizer.zero_grad();
}
// Gradients accessible
println!("w1 grad: {:?}", w1.grad());
"#,
)
.with_related(vec!["training-lora", "ml-random-forest"])
.with_test_code(
r"#[cfg(test)]
mod tests {
#[test]
fn test_weight_matrix_dimensions() {
let input_dim = 784;
let hidden_dim = 256;
let weights = vec![vec![0.0_f64; hidden_dim]; input_dim];
assert_eq!(weights.len(), input_dim);
}
#[test]
fn test_softmax_sums_to_one() {
let logits = vec![1.0_f64, 2.0, 3.0];
let max = logits.iter().copied().fold(f64::NEG_INFINITY, f64::max);
let exp_sum: f64 = logits.iter().map(|x| (x - max).exp()).sum();
let sum: f64 = logits.iter().map(|x| (x - max).exp() / exp_sum).sum();
assert!((sum - 1.0).abs() < 1e-6);
}
#[test]
fn test_learning_rate_positive() {
let lr = 0.01_f64;
assert!(lr > 0.0);
}
}",
),
);
}
pub fn register_data_recipes(cookbook: &mut super::Cookbook) {
cookbook.add(
Recipe::new("data-alimentar", "Zero-Copy Data Loading")
.with_problem("Load large datasets efficiently with memory mapping")
.with_components(vec!["alimentar", "trueno"])
.with_tags(vec!["data", "loading", "parquet", "arrow", "zero-copy"])
.with_code(
r#"use alimentar::prelude::*;
// Load Parquet with zero-copy (memory-mapped)
let dataset = ParquetDataset::open("data.parquet")?
.select(&["features", "label"])?
.filter(|row| row["label"].as_i64() > 0)?;
// Iterate with batching
let dataloader = DataLoader::new(dataset)
.batch_size(32)
.shuffle(true)
.num_workers(4);
for batch in dataloader {
// batch.features is Arrow array (zero-copy)
let features = batch["features"].as_tensor()?;
let labels = batch["label"].as_tensor()?;
model.train_step(&features, &labels)?;
}
// Streaming from remote (S3, HuggingFace)
let dataset = Dataset::from_hub("username/dataset")?
.streaming(true); // Don't download entire dataset
"#,
)
.with_related(vec!["data-preprocessing", "ml-random-forest"])
.with_test_code(
r#"#[cfg(test)]
mod tests {
#[test]
fn test_batch_size_config() {
let batch_size = 32_u32;
assert!(batch_size > 0);
}
#[test]
fn test_column_selection() {
let columns = vec!["features", "label"];
assert_eq!(columns.len(), 2);
}
#[test]
fn test_worker_count() {
let workers = 4;
assert!(workers > 0 && workers <= 16);
}
}"#,
),
);
cookbook.add(
Recipe::new("data-preprocessing", "Data Preprocessing Pipeline")
.with_problem("Build reproducible preprocessing pipelines")
.with_components(vec!["alimentar", "aprender"])
.with_tags(vec!["data", "preprocessing", "pipeline", "transforms"])
.with_code(
r#"use alimentar::prelude::*;
use aprender::preprocessing::*;
// Build preprocessing pipeline
let pipeline = Pipeline::new()
.add(StandardScaler::fit(&train_data)?)
.add(OneHotEncoder::fit(&["category"])?)
.add(Imputer::median());
// Apply to train/test
let X_train = pipeline.transform(&train_data)?;
let X_test = pipeline.transform(&test_data)?;
// Save pipeline for inference
pipeline.save("preprocess.pipeline")?;
// Later: load and apply
let pipeline = Pipeline::load("preprocess.pipeline")?;
let X_new = pipeline.transform(&new_data)?;
"#,
)
.with_related(vec!["data-alimentar"])
.with_test_code(
r#"#[cfg(test)]
mod tests {
#[test]
fn test_pipeline_step_count() {
let steps = vec!["scale", "encode", "impute"];
assert_eq!(steps.len(), 3);
}
#[test]
fn test_transform_preserves_row_count() {
let input_rows = 1000;
let output_rows = 1000;
assert_eq!(input_rows, output_rows);
}
#[test]
fn test_scaler_std_positive() {
let std_dev = 1.0_f64;
assert!(std_dev > 0.0);
}
}"#,
),
);
}
pub fn register_registry_recipes(cookbook: &mut super::Cookbook) {
cookbook.add(
Recipe::new("registry-pacha", "Model Registry with Pacha")
.with_problem("Version, sign, and distribute ML models securely")
.with_components(vec!["pacha", "aprender"])
.with_tags(vec!["registry", "versioning", "signing", "distribution", "mlops"])
.with_code(
r#"use pacha::prelude::*;
// Initialize registry
let registry = Registry::new("./models")?;
// Register model with metadata
let model_card = ModelCard {
name: "sentiment-classifier",
version: "1.0.0",
description: "BERT-based sentiment analysis",
metrics: hashmap!{
"accuracy" => 0.94,
"f1" => 0.92,
},
license: "MIT",
authors: vec!["team@example.com"],
};
// Push with Ed25519 signature
let artifact = registry.push(
"model.apr",
model_card,
SigningKey::from_env()?, // PACHA_SIGNING_KEY
)?;
println!("Registered: {}@{}", artifact.name, artifact.version);
println!("Hash: {}", artifact.blake3_hash);
// Pull model (verifies signature)
let model_path = registry.pull("sentiment-classifier", "1.0.0")?;
// List versions
for version in registry.versions("sentiment-classifier")? {
println!("{} - {}", version.version, version.created_at);
}
"#,
)
.with_related(vec!["registry-hf", "ml-serving"])
.with_test_code(
r#"#[cfg(test)]
mod tests {
#[test]
fn test_model_card_metadata() {
let name = "sentiment-classifier";
let version = "1.0.0";
assert!(!name.is_empty());
assert!(version.chars().filter(|c| *c == '.').count() == 2);
}
#[test]
fn test_version_string_format() {
let version = "1.0.0";
let parts: Vec<_> = version.split('.').collect();
assert_eq!(parts.len(), 3);
}
#[test]
fn test_hash_length() {
let blake3_hash = "a".repeat(64);
assert_eq!(blake3_hash.len(), 64);
}
}"#,
),
);
cookbook.add(
Recipe::new("registry-hf", "HuggingFace Hub Integration")
.with_problem("Download and cache models from HuggingFace Hub")
.with_components(vec!["hf-hub", "aprender", "realizar"])
.with_tags(vec!["registry", "huggingface", "download", "cache"])
.with_code(
r#"use hf_hub::api::sync::Api;
// Initialize API (uses HF_TOKEN env var if set)
let api = Api::new()?;
// Download model files
let repo = api.model("meta-llama/Llama-2-7b");
let model_path = repo.get("model.safetensors")?;
let config_path = repo.get("config.json")?;
// Files cached in ~/.cache/huggingface/hub/
println!("Model: {}", model_path.display());
// Download specific revision
let repo = api.model("meta-llama/Llama-2-7b").revision("main");
let path = repo.get("tokenizer.json")?;
// Progress callback
let repo = api.model("big-model").progress(|p| {
println!("Downloading: {:.1}%", p.percent * 100.0);
});
"#,
)
.with_related(vec!["registry-pacha", "speech-whisper"])
.with_test_code(
r#"#[cfg(test)]
mod tests {
#[test]
fn test_api_url_valid() {
let url = "https://huggingface.co";
assert!(url.starts_with("https://"));
}
#[test]
fn test_model_path_structure() {
let org = "meta-llama";
let model = "Llama-2-7b";
let path = format!("{}/{}", org, model);
assert_eq!(path.split('/').count(), 2);
}
#[test]
fn test_revision_default() {
let revision = "main";
assert_eq!(revision, "main");
}
}"#,
),
);
}
pub fn register_rag_recipes(cookbook: &mut super::Cookbook) {
cookbook.add(
Recipe::new("rag-pipeline", "RAG Pipeline with Trueno-RAG")
.with_problem("Build retrieval-augmented generation pipelines")
.with_components(vec!["trueno-rag", "trueno-db", "aprender"])
.with_tags(vec!["rag", "retrieval", "generation", "embeddings", "search"])
.with_code(
r#"use trueno_rag::prelude::*;
// Initialize RAG pipeline
let rag = RagPipeline::builder()
.chunker(SemanticChunker::new(512)) // Semantic chunking
.embedder(Embedder::load("bge-small-en")?)
.retriever(HybridRetriever::new()
.bm25_weight(0.3)
.dense_weight(0.7))
.reranker(CrossEncoder::load("ms-marco-MiniLM")?)
.build()?;
// Index documents
for doc in documents {
rag.add_document(&doc)?;
}
rag.build_index()?;
// Query with retrieval
let query = "What is the capital of France?";
let results = rag.retrieve(query, 5)?; // Top 5 chunks
for (i, chunk) in results.iter().enumerate() {
println!("{}. [score: {:.3}] {}", i+1, chunk.score, chunk.text);
}
// Full RAG with generation
let context = rag.retrieve_context(query, 3)?;
let prompt = format!("Context:\n{}\n\nQuestion: {}\nAnswer:", context, query);
let answer = llm.generate(&prompt)?;
"#,
)
.with_related(vec!["rag-semantic-search", "ml-serving"])
.with_test_code(
r"#[cfg(test)]
mod tests {
#[test]
fn test_top_k_parameter() {
let top_k = 5;
assert!(top_k > 0 && top_k <= 100);
}
#[test]
fn test_chunk_size_exceeds_overlap() {
let chunk_size = 512;
let overlap = 50;
assert!(chunk_size > overlap);
}
#[test]
fn test_retriever_weights_sum_to_one() {
let bm25_weight = 0.3_f64;
let vector_weight = 0.7_f64;
assert!((bm25_weight + vector_weight - 1.0).abs() < 1e-6);
}
}",
),
);
cookbook.add(
Recipe::new("rag-semantic-search", "Semantic Search Engine")
.with_problem("Build fast semantic search over documents")
.with_components(vec!["trueno-db", "trueno-rag"])
.with_tags(vec!["search", "semantic", "embeddings", "hnsw", "vector-db"])
.with_code(
r#"use trueno_db::prelude::*;
use trueno_rag::embeddings::*;
// Initialize vector store with HNSW index
let db = VectorDb::open("vectors.db")?
.with_index(HnswConfig {
m: 16,
ef_construction: 200,
ef_search: 50,
});
// Embed and store documents
let embedder = Embedder::load("bge-small-en")?;
for doc in documents {
let embedding = embedder.embed(&doc.text)?;
db.insert(&doc.id, &embedding, &doc.metadata)?;
}
// Search
let query_embedding = embedder.embed("machine learning")?;
let results = db.search(&query_embedding, 10)?;
for result in results {
println!("{}: {:.3}", result.id, result.score);
}
// Filtered search
let results = db.search_filtered(
&query_embedding,
10,
|meta| meta["category"] == "science",
)?;
"#,
)
.with_related(vec!["rag-pipeline"])
.with_test_code(
r"#[cfg(test)]
mod tests {
#[test]
fn test_hnsw_config_params() {
let m = 16;
let ef_construction = 200;
assert!(m >= 4 && m <= 64);
assert!(ef_construction >= m);
}
#[test]
fn test_search_result_ordering() {
let scores = vec![0.95, 0.85, 0.75];
let is_sorted = scores.windows(2).all(|w| w[0] >= w[1]);
assert!(is_sorted);
}
#[test]
fn test_filter_predicate() {
let min_score = 0.5_f64;
let result_score = 0.75_f64;
assert!(result_score >= min_score);
}
}",
),
);
}
pub fn register_viz_recipes(cookbook: &mut super::Cookbook) {
cookbook.add(
Recipe::new("viz-terminal", "Terminal Visualization")
.with_problem("Create charts and plots in the terminal")
.with_components(vec!["trueno-viz"])
.with_tags(vec!["visualization", "terminal", "charts", "ascii"])
.with_code(
r#"use trueno_viz::prelude::*;
// Line chart in terminal
let chart = LineChart::new()
.title("Training Loss")
.x_label("Epoch")
.y_label("Loss")
.series("train", &train_losses)
.series("val", &val_losses);
chart.render_terminal(80, 24)?; // 80x24 chars
// Histogram
let hist = Histogram::new(&data)
.bins(20)
.title("Distribution");
hist.render_terminal(60, 15)?;
// Scatter plot
let scatter = ScatterPlot::new()
.points(&x_vals, &y_vals)
.title("Correlation");
scatter.render_terminal(40, 20)?;
// Progress bars (integrated with training)
let pb = ProgressBar::new(total_epochs);
for epoch in 0..total_epochs {
// ... training ...
pb.set(epoch, format!("loss: {:.4}", loss));
}
"#,
)
.with_related(vec!["viz-png", "training-autograd"])
.with_test_code(
r"#[cfg(test)]
mod tests {
#[test]
fn test_chart_dimensions() {
let width = 80;
let height = 24;
assert!(width > 0 && height > 0);
}
#[test]
fn test_bin_count() {
let bins = 20;
assert!(bins > 0 && bins <= 100);
}
#[test]
fn test_series_data_finite() {
let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
assert!(data.iter().all(|x| x.is_finite()));
}
}",
),
);
cookbook.add(
Recipe::new("viz-png", "PNG Chart Export")
.with_problem("Export publication-quality charts as PNG images")
.with_components(vec!["trueno-viz"])
.with_tags(vec!["visualization", "png", "export", "charts"])
.with_code(
r#"use trueno_viz::prelude::*;
// Create chart
let chart = LineChart::new()
.title("Model Performance")
.x_label("Epoch")
.y_label("Accuracy")
.series("ResNet", &resnet_acc)
.series("VGG", &vgg_acc)
.legend(Position::TopRight);
// Export as PNG
chart.save_png("performance.png", 800, 600)?;
// With custom styling
let styled = chart
.background(Color::WHITE)
.grid(true)
.font_size(14);
styled.save_png("styled.png", 1200, 800)?;
// Batch export multiple charts
let charts = vec![
("loss", loss_chart),
("accuracy", acc_chart),
("confusion", confusion_matrix),
];
for (name, chart) in charts {
chart.save_png(&format!("{}.png", name), 800, 600)?;
}
"#,
)
.with_related(vec!["viz-terminal"])
.with_test_code(
r#"#[cfg(test)]
mod tests {
#[test]
fn test_image_dimensions() {
let width = 800;
let height = 600;
assert!(width > 0 && height > 0);
}
#[test]
fn test_chart_title_non_empty() {
let title = "Model Performance";
assert!(!title.is_empty());
}
#[test]
fn test_batch_export_count() {
let charts = vec!["loss", "accuracy", "confusion"];
assert_eq!(charts.len(), 3);
}
}"#,
),
);
}
pub fn register_rlhf_recipes(cookbook: &mut super::Cookbook) {
super::recipes_rlhf_alignment::register_rlhf_alignment_recipes(cookbook);
super::recipes_rlhf_training::register_rlhf_training_recipes(cookbook);
super::recipes_rlhf_efficiency::register_rlhf_efficiency_recipes(cookbook);
}