1use super::Recipe;
6
7pub fn register_training_recipes(cookbook: &mut super::Cookbook) {
8 cookbook.add(
10 Recipe::new("training-lora", "LoRA Fine-tuning")
11 .with_problem("Fine-tune large models efficiently with Low-Rank Adaptation")
12 .with_components(vec!["entrenar", "aprender", "alimentar"])
13 .with_tags(vec!["training", "lora", "fine-tuning", "efficient", "llm"])
14 .with_code(
15 r#"use entrenar::prelude::*;
16
17// Load base model
18let model = Model::load("llama-7b.apr")?;
19
20// Configure LoRA
21let lora_config = LoraConfig {
22 r: 16, // Rank
23 alpha: 32, // Scaling factor
24 dropout: 0.1,
25 target_modules: vec!["q_proj", "v_proj"],
26};
27
28// Apply LoRA adapters
29let model = model.with_lora(lora_config)?;
30
31// Only ~0.1% of parameters are trainable now
32println!("Trainable params: {}", model.trainable_params());
33
34// Training loop
35let optimizer = AdamW::new(model.trainable_params(), 1e-4);
36for batch in dataloader {
37 let loss = model.forward(&batch)?;
38 loss.backward()?;
39 optimizer.step()?;
40}
41
42// Save LoRA weights only (small file)
43model.save_lora("adapter.lora")?;
44
45// Later: merge for inference
46// let merged = Model::load("llama-7b.apr")?.merge_lora("adapter.lora")?;
47"#,
48 )
49 .with_related(vec!["training-qlora", "training-autograd"])
50 .with_test_code(
51 r"#[cfg(test)]
52mod tests {
53 #[test]
54 fn test_lora_config_rank_and_alpha() {
55 let rank = 16;
56 let alpha = 32;
57 assert!(rank > 0 && alpha >= rank);
58}
59
60 #[test]
61 fn test_trainable_params_fraction() {
62 let total = 1_000_000;
63 let lora = 8192;
64 let fraction = lora as f64 / total as f64;
65 assert!(fraction < 0.1);
66}
67
68 #[test]
69 fn test_dropout_in_valid_range() {
70 let dropout = 0.1_f64;
71 assert!(dropout >= 0.0 && dropout <= 1.0);
72}
73}",
74 ),
75 );
76
77 cookbook.add(
79 Recipe::new("training-qlora", "QLoRA Quantized Fine-tuning")
80 .with_problem("Fine-tune 4-bit quantized models on consumer hardware")
81 .with_components(vec!["entrenar", "aprender"])
82 .with_tags(vec!["training", "qlora", "quantization", "4bit", "memory-efficient"])
83 .with_code(
84 r#"use entrenar::prelude::*;
85
86// Load 4-bit quantized model
87let model = Model::load_quantized("llama-7b.q4_k.gguf")?;
88
89// QLoRA config (LoRA on quantized base)
90let qlora_config = QLoraConfig {
91 lora: LoraConfig { r: 64, alpha: 16, dropout: 0.1, .. },
92 nf4: true, // NormalFloat4 quantization
93 double_quant: true, // Double quantization for memory
94 compute_dtype: F16, // Compute in fp16
95};
96
97let model = model.with_qlora(qlora_config)?;
98
99// Train on 24GB GPU (fits 7B model!)
100let trainer = Trainer::new(model)
101 .gradient_checkpointing(true)
102 .batch_size(4)
103 .gradient_accumulation(4);
104
105trainer.train(&dataset, 3)?; // 3 epochs
106"#,
107 )
108 .with_related(vec!["training-lora"])
109 .with_test_code(
110 r"#[cfg(test)]
111mod tests {
112 #[test]
113 fn test_quantization_bits_valid() {
114 let bits = 4;
115 assert!(bits == 4 || bits == 8);
116}
117
118 #[test]
119 fn test_effective_batch_size() {
120 let batch_size = 4;
121 let grad_accum = 4;
122 let effective = batch_size * grad_accum;
123 assert_eq!(effective, 16);
124}
125
126 #[test]
127 fn test_nf4_requires_4bit() {
128 let nf4 = true;
129 let bits = 4;
130 assert!(nf4 && bits == 4);
131}
132}",
133 ),
134 );
135
136 cookbook.add(
138 Recipe::new("training-autograd", "Custom Training with Autograd")
139 .with_problem("Build custom neural networks with automatic differentiation")
140 .with_components(vec!["entrenar", "trueno"])
141 .with_tags(vec!["training", "autograd", "neural-network", "custom"])
142 .with_code(
143 r#"use entrenar::autograd::*;
144
145// Define model with autograd tensors
146let w1 = Tensor::randn(&[784, 256]).requires_grad();
147let w2 = Tensor::randn(&[256, 10]).requires_grad();
148
149// Forward pass (computation graph built automatically)
150fn forward(x: &Tensor, w1: &Tensor, w2: &Tensor) -> Tensor {
151 let h = x.matmul(w1).relu();
152 h.matmul(w2).softmax(-1)
153}
154
155// Training loop
156let optimizer = SGD::new(vec![&w1, &w2], 0.01);
157for (x, y) in dataloader {
158 let pred = forward(&x, &w1, &w2);
159 let loss = cross_entropy(&pred, &y);
160
161 // Backward pass (gradients computed automatically)
162 loss.backward();
163
164 optimizer.step();
165 optimizer.zero_grad();
166}
167
168// Gradients accessible
169println!("w1 grad: {:?}", w1.grad());
170"#,
171 )
172 .with_related(vec!["training-lora", "ml-random-forest"])
173 .with_test_code(
174 r"#[cfg(test)]
175mod tests {
176 #[test]
177 fn test_weight_matrix_dimensions() {
178 let input_dim = 784;
179 let hidden_dim = 256;
180 let weights = vec![vec![0.0_f64; hidden_dim]; input_dim];
181 assert_eq!(weights.len(), input_dim);
182}
183
184 #[test]
185 fn test_softmax_sums_to_one() {
186 let logits = vec![1.0_f64, 2.0, 3.0];
187 let max = logits.iter().copied().fold(f64::NEG_INFINITY, f64::max);
188 let exp_sum: f64 = logits.iter().map(|x| (x - max).exp()).sum();
189 let sum: f64 = logits.iter().map(|x| (x - max).exp() / exp_sum).sum();
190 assert!((sum - 1.0).abs() < 1e-6);
191}
192
193 #[test]
194 fn test_learning_rate_positive() {
195 let lr = 0.01_f64;
196 assert!(lr > 0.0);
197}
198}",
199 ),
200 );
201}
202
203pub fn register_data_recipes(cookbook: &mut super::Cookbook) {
208 cookbook.add(
210 Recipe::new("data-alimentar", "Zero-Copy Data Loading")
211 .with_problem("Load large datasets efficiently with memory mapping")
212 .with_components(vec!["alimentar", "trueno"])
213 .with_tags(vec!["data", "loading", "parquet", "arrow", "zero-copy"])
214 .with_code(
215 r#"use alimentar::prelude::*;
216
217// Load Parquet with zero-copy (memory-mapped)
218let dataset = ParquetDataset::open("data.parquet")?
219 .select(&["features", "label"])?
220 .filter(|row| row["label"].as_i64() > 0)?;
221
222// Iterate with batching
223let dataloader = DataLoader::new(dataset)
224 .batch_size(32)
225 .shuffle(true)
226 .num_workers(4);
227
228for batch in dataloader {
229 // batch.features is Arrow array (zero-copy)
230 let features = batch["features"].as_tensor()?;
231 let labels = batch["label"].as_tensor()?;
232
233 model.train_step(&features, &labels)?;
234}
235
236// Streaming from remote (S3, HuggingFace)
237let dataset = Dataset::from_hub("username/dataset")?
238 .streaming(true); // Don't download entire dataset
239"#,
240 )
241 .with_related(vec!["data-preprocessing", "ml-random-forest"])
242 .with_test_code(
243 r#"#[cfg(test)]
244mod tests {
245 #[test]
246 fn test_batch_size_config() {
247 let batch_size = 32_u32;
248 assert!(batch_size > 0);
249}
250
251 #[test]
252 fn test_column_selection() {
253 let columns = vec!["features", "label"];
254 assert_eq!(columns.len(), 2);
255}
256
257 #[test]
258 fn test_worker_count() {
259 let workers = 4;
260 assert!(workers > 0 && workers <= 16);
261}
262}"#,
263 ),
264 );
265
266 cookbook.add(
268 Recipe::new("data-preprocessing", "Data Preprocessing Pipeline")
269 .with_problem("Build reproducible preprocessing pipelines")
270 .with_components(vec!["alimentar", "aprender"])
271 .with_tags(vec!["data", "preprocessing", "pipeline", "transforms"])
272 .with_code(
273 r#"use alimentar::prelude::*;
274use aprender::preprocessing::*;
275
276// Build preprocessing pipeline
277let pipeline = Pipeline::new()
278 .add(StandardScaler::fit(&train_data)?)
279 .add(OneHotEncoder::fit(&["category"])?)
280 .add(Imputer::median());
281
282// Apply to train/test
283let X_train = pipeline.transform(&train_data)?;
284let X_test = pipeline.transform(&test_data)?;
285
286// Save pipeline for inference
287pipeline.save("preprocess.pipeline")?;
288
289// Later: load and apply
290let pipeline = Pipeline::load("preprocess.pipeline")?;
291let X_new = pipeline.transform(&new_data)?;
292"#,
293 )
294 .with_related(vec!["data-alimentar"])
295 .with_test_code(
296 r#"#[cfg(test)]
297mod tests {
298 #[test]
299 fn test_pipeline_step_count() {
300 let steps = vec!["scale", "encode", "impute"];
301 assert_eq!(steps.len(), 3);
302}
303
304 #[test]
305 fn test_transform_preserves_row_count() {
306 let input_rows = 1000;
307 let output_rows = 1000;
308 assert_eq!(input_rows, output_rows);
309}
310
311 #[test]
312 fn test_scaler_std_positive() {
313 let std_dev = 1.0_f64;
314 assert!(std_dev > 0.0);
315}
316}"#,
317 ),
318 );
319}
320
321pub fn register_registry_recipes(cookbook: &mut super::Cookbook) {
326 cookbook.add(
328 Recipe::new("registry-pacha", "Model Registry with Pacha")
329 .with_problem("Version, sign, and distribute ML models securely")
330 .with_components(vec!["pacha", "aprender"])
331 .with_tags(vec!["registry", "versioning", "signing", "distribution", "mlops"])
332 .with_code(
333 r#"use pacha::prelude::*;
334
335// Initialize registry
336let registry = Registry::new("./models")?;
337
338// Register model with metadata
339let model_card = ModelCard {
340 name: "sentiment-classifier",
341 version: "1.0.0",
342 description: "BERT-based sentiment analysis",
343 metrics: hashmap!{
344 "accuracy" => 0.94,
345 "f1" => 0.92,
346 },
347 license: "MIT",
348 authors: vec!["team@example.com"],
349};
350
351// Push with Ed25519 signature
352let artifact = registry.push(
353 "model.apr",
354 model_card,
355 SigningKey::from_env()?, // PACHA_SIGNING_KEY
356)?;
357
358println!("Registered: {}@{}", artifact.name, artifact.version);
359println!("Hash: {}", artifact.blake3_hash);
360
361// Pull model (verifies signature)
362let model_path = registry.pull("sentiment-classifier", "1.0.0")?;
363
364// List versions
365for version in registry.versions("sentiment-classifier")? {
366 println!("{} - {}", version.version, version.created_at);
367}
368"#,
369 )
370 .with_related(vec!["registry-hf", "ml-serving"])
371 .with_test_code(
372 r#"#[cfg(test)]
373mod tests {
374 #[test]
375 fn test_model_card_metadata() {
376 let name = "sentiment-classifier";
377 let version = "1.0.0";
378 assert!(!name.is_empty());
379 assert!(version.chars().filter(|c| *c == '.').count() == 2);
380}
381
382 #[test]
383 fn test_version_string_format() {
384 let version = "1.0.0";
385 let parts: Vec<_> = version.split('.').collect();
386 assert_eq!(parts.len(), 3);
387}
388
389 #[test]
390 fn test_hash_length() {
391 let blake3_hash = "a".repeat(64);
392 assert_eq!(blake3_hash.len(), 64);
393}
394}"#,
395 ),
396 );
397
398 cookbook.add(
400 Recipe::new("registry-hf", "HuggingFace Hub Integration")
401 .with_problem("Download and cache models from HuggingFace Hub")
402 .with_components(vec!["hf-hub", "aprender", "realizar"])
403 .with_tags(vec!["registry", "huggingface", "download", "cache"])
404 .with_code(
405 r#"use hf_hub::api::sync::Api;
406
407// Initialize API (uses HF_TOKEN env var if set)
408let api = Api::new()?;
409
410// Download model files
411let repo = api.model("meta-llama/Llama-2-7b");
412let model_path = repo.get("model.safetensors")?;
413let config_path = repo.get("config.json")?;
414
415// Files cached in ~/.cache/huggingface/hub/
416println!("Model: {}", model_path.display());
417
418// Download specific revision
419let repo = api.model("meta-llama/Llama-2-7b").revision("main");
420let path = repo.get("tokenizer.json")?;
421
422// Progress callback
423let repo = api.model("big-model").progress(|p| {
424 println!("Downloading: {:.1}%", p.percent * 100.0);
425});
426"#,
427 )
428 .with_related(vec!["registry-pacha", "speech-whisper"])
429 .with_test_code(
430 r#"#[cfg(test)]
431mod tests {
432 #[test]
433 fn test_api_url_valid() {
434 let url = "https://huggingface.co";
435 assert!(url.starts_with("https://"));
436}
437
438 #[test]
439 fn test_model_path_structure() {
440 let org = "meta-llama";
441 let model = "Llama-2-7b";
442 let path = format!("{}/{}", org, model);
443 assert_eq!(path.split('/').count(), 2);
444}
445
446 #[test]
447 fn test_revision_default() {
448 let revision = "main";
449 assert_eq!(revision, "main");
450}
451}"#,
452 ),
453 );
454}
455
456pub fn register_rag_recipes(cookbook: &mut super::Cookbook) {
461 cookbook.add(
463 Recipe::new("rag-pipeline", "RAG Pipeline with Trueno-RAG")
464 .with_problem("Build retrieval-augmented generation pipelines")
465 .with_components(vec!["trueno-rag", "trueno-db", "aprender"])
466 .with_tags(vec!["rag", "retrieval", "generation", "embeddings", "search"])
467 .with_code(
468 r#"use trueno_rag::prelude::*;
469
470// Initialize RAG pipeline
471let rag = RagPipeline::builder()
472 .chunker(SemanticChunker::new(512)) // Semantic chunking
473 .embedder(Embedder::load("bge-small-en")?)
474 .retriever(HybridRetriever::new()
475 .bm25_weight(0.3)
476 .dense_weight(0.7))
477 .reranker(CrossEncoder::load("ms-marco-MiniLM")?)
478 .build()?;
479
480// Index documents
481for doc in documents {
482 rag.add_document(&doc)?;
483}
484rag.build_index()?;
485
486// Query with retrieval
487let query = "What is the capital of France?";
488let results = rag.retrieve(query, 5)?; // Top 5 chunks
489
490for (i, chunk) in results.iter().enumerate() {
491 println!("{}. [score: {:.3}] {}", i+1, chunk.score, chunk.text);
492}
493
494// Full RAG with generation
495let context = rag.retrieve_context(query, 3)?;
496let prompt = format!("Context:\n{}\n\nQuestion: {}\nAnswer:", context, query);
497let answer = llm.generate(&prompt)?;
498"#,
499 )
500 .with_related(vec!["rag-semantic-search", "ml-serving"])
501 .with_test_code(
502 r"#[cfg(test)]
503mod tests {
504 #[test]
505 fn test_top_k_parameter() {
506 let top_k = 5;
507 assert!(top_k > 0 && top_k <= 100);
508}
509
510 #[test]
511 fn test_chunk_size_exceeds_overlap() {
512 let chunk_size = 512;
513 let overlap = 50;
514 assert!(chunk_size > overlap);
515}
516
517 #[test]
518 fn test_retriever_weights_sum_to_one() {
519 let bm25_weight = 0.3_f64;
520 let vector_weight = 0.7_f64;
521 assert!((bm25_weight + vector_weight - 1.0).abs() < 1e-6);
522}
523}",
524 ),
525 );
526
527 cookbook.add(
529 Recipe::new("rag-semantic-search", "Semantic Search Engine")
530 .with_problem("Build fast semantic search over documents")
531 .with_components(vec!["trueno-db", "trueno-rag"])
532 .with_tags(vec!["search", "semantic", "embeddings", "hnsw", "vector-db"])
533 .with_code(
534 r#"use trueno_db::prelude::*;
535use trueno_rag::embeddings::*;
536
537// Initialize vector store with HNSW index
538let db = VectorDb::open("vectors.db")?
539 .with_index(HnswConfig {
540 m: 16,
541 ef_construction: 200,
542 ef_search: 50,
543 });
544
545// Embed and store documents
546let embedder = Embedder::load("bge-small-en")?;
547for doc in documents {
548 let embedding = embedder.embed(&doc.text)?;
549 db.insert(&doc.id, &embedding, &doc.metadata)?;
550}
551
552// Search
553let query_embedding = embedder.embed("machine learning")?;
554let results = db.search(&query_embedding, 10)?;
555
556for result in results {
557 println!("{}: {:.3}", result.id, result.score);
558}
559
560// Filtered search
561let results = db.search_filtered(
562 &query_embedding,
563 10,
564 |meta| meta["category"] == "science",
565)?;
566"#,
567 )
568 .with_related(vec!["rag-pipeline"])
569 .with_test_code(
570 r"#[cfg(test)]
571mod tests {
572 #[test]
573 fn test_hnsw_config_params() {
574 let m = 16;
575 let ef_construction = 200;
576 assert!(m >= 4 && m <= 64);
577 assert!(ef_construction >= m);
578}
579
580 #[test]
581 fn test_search_result_ordering() {
582 let scores = vec![0.95, 0.85, 0.75];
583 let is_sorted = scores.windows(2).all(|w| w[0] >= w[1]);
584 assert!(is_sorted);
585}
586
587 #[test]
588 fn test_filter_predicate() {
589 let min_score = 0.5_f64;
590 let result_score = 0.75_f64;
591 assert!(result_score >= min_score);
592}
593}",
594 ),
595 );
596}
597
598pub fn register_viz_recipes(cookbook: &mut super::Cookbook) {
603 cookbook.add(
605 Recipe::new("viz-terminal", "Terminal Visualization")
606 .with_problem("Create charts and plots in the terminal")
607 .with_components(vec!["trueno-viz"])
608 .with_tags(vec!["visualization", "terminal", "charts", "ascii"])
609 .with_code(
610 r#"use trueno_viz::prelude::*;
611
612// Line chart in terminal
613let chart = LineChart::new()
614 .title("Training Loss")
615 .x_label("Epoch")
616 .y_label("Loss")
617 .series("train", &train_losses)
618 .series("val", &val_losses);
619
620chart.render_terminal(80, 24)?; // 80x24 chars
621
622// Histogram
623let hist = Histogram::new(&data)
624 .bins(20)
625 .title("Distribution");
626hist.render_terminal(60, 15)?;
627
628// Scatter plot
629let scatter = ScatterPlot::new()
630 .points(&x_vals, &y_vals)
631 .title("Correlation");
632scatter.render_terminal(40, 20)?;
633
634// Progress bars (integrated with training)
635let pb = ProgressBar::new(total_epochs);
636for epoch in 0..total_epochs {
637 // ... training ...
638 pb.set(epoch, format!("loss: {:.4}", loss));
639}
640"#,
641 )
642 .with_related(vec!["viz-png", "training-autograd"])
643 .with_test_code(
644 r"#[cfg(test)]
645mod tests {
646 #[test]
647 fn test_chart_dimensions() {
648 let width = 80;
649 let height = 24;
650 assert!(width > 0 && height > 0);
651}
652
653 #[test]
654 fn test_bin_count() {
655 let bins = 20;
656 assert!(bins > 0 && bins <= 100);
657}
658
659 #[test]
660 fn test_series_data_finite() {
661 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
662 assert!(data.iter().all(|x| x.is_finite()));
663}
664}",
665 ),
666 );
667
668 cookbook.add(
670 Recipe::new("viz-png", "PNG Chart Export")
671 .with_problem("Export publication-quality charts as PNG images")
672 .with_components(vec!["trueno-viz"])
673 .with_tags(vec!["visualization", "png", "export", "charts"])
674 .with_code(
675 r#"use trueno_viz::prelude::*;
676
677// Create chart
678let chart = LineChart::new()
679 .title("Model Performance")
680 .x_label("Epoch")
681 .y_label("Accuracy")
682 .series("ResNet", &resnet_acc)
683 .series("VGG", &vgg_acc)
684 .legend(Position::TopRight);
685
686// Export as PNG
687chart.save_png("performance.png", 800, 600)?;
688
689// With custom styling
690let styled = chart
691 .background(Color::WHITE)
692 .grid(true)
693 .font_size(14);
694styled.save_png("styled.png", 1200, 800)?;
695
696// Batch export multiple charts
697let charts = vec![
698 ("loss", loss_chart),
699 ("accuracy", acc_chart),
700 ("confusion", confusion_matrix),
701];
702for (name, chart) in charts {
703 chart.save_png(&format!("{}.png", name), 800, 600)?;
704}
705"#,
706 )
707 .with_related(vec!["viz-terminal"])
708 .with_test_code(
709 r#"#[cfg(test)]
710mod tests {
711 #[test]
712 fn test_image_dimensions() {
713 let width = 800;
714 let height = 600;
715 assert!(width > 0 && height > 0);
716}
717
718 #[test]
719 fn test_chart_title_non_empty() {
720 let title = "Model Performance";
721 assert!(!title.is_empty());
722}
723
724 #[test]
725 fn test_batch_export_count() {
726 let charts = vec!["loss", "accuracy", "confusion"];
727 assert_eq!(charts.len(), 3);
728}
729}"#,
730 ),
731 );
732}
733
734pub fn register_rlhf_recipes(cookbook: &mut super::Cookbook) {
739 super::recipes_rlhf_alignment::register_rlhf_alignment_recipes(cookbook);
740 super::recipes_rlhf_training::register_rlhf_training_recipes(cookbook);
741 super::recipes_rlhf_efficiency::register_rlhf_efficiency_recipes(cookbook);
742}