batuta/oracle/cookbook/
recipes_more.rs

1//! Additional recipe registrations
2//!
3//! Continuation of recipes module, split for file size compliance.
4
5use super::Recipe;
6
7pub fn register_training_recipes(cookbook: &mut super::Cookbook) {
8    // LoRA Fine-tuning
9    cookbook.add(
10        Recipe::new("training-lora", "LoRA Fine-tuning")
11            .with_problem("Fine-tune large models efficiently with Low-Rank Adaptation")
12            .with_components(vec!["entrenar", "aprender", "alimentar"])
13            .with_tags(vec!["training", "lora", "fine-tuning", "efficient", "llm"])
14            .with_code(
15                r#"use entrenar::prelude::*;
16
17// Load base model
18let model = Model::load("llama-7b.apr")?;
19
20// Configure LoRA
21let lora_config = LoraConfig {
22    r: 16,                    // Rank
23    alpha: 32,                // Scaling factor
24    dropout: 0.1,
25    target_modules: vec!["q_proj", "v_proj"],
26};
27
28// Apply LoRA adapters
29let model = model.with_lora(lora_config)?;
30
31// Only ~0.1% of parameters are trainable now
32println!("Trainable params: {}", model.trainable_params());
33
34// Training loop
35let optimizer = AdamW::new(model.trainable_params(), 1e-4);
36for batch in dataloader {
37    let loss = model.forward(&batch)?;
38    loss.backward()?;
39    optimizer.step()?;
40}
41
42// Save LoRA weights only (small file)
43model.save_lora("adapter.lora")?;
44
45// Later: merge for inference
46// let merged = Model::load("llama-7b.apr")?.merge_lora("adapter.lora")?;
47"#,
48            )
49            .with_related(vec!["training-qlora", "training-autograd"])
50            .with_test_code(
51                r"#[cfg(test)]
52mod tests {
53    #[test]
54    fn test_lora_config_rank_and_alpha() {
55    let rank = 16;
56    let alpha = 32;
57    assert!(rank > 0 && alpha >= rank);
58}
59
60    #[test]
61    fn test_trainable_params_fraction() {
62    let total = 1_000_000;
63    let lora = 8192;
64    let fraction = lora as f64 / total as f64;
65    assert!(fraction < 0.1);
66}
67
68    #[test]
69    fn test_dropout_in_valid_range() {
70    let dropout = 0.1_f64;
71    assert!(dropout >= 0.0 && dropout <= 1.0);
72}
73}",
74            ),
75    );
76
77    // QLoRA
78    cookbook.add(
79        Recipe::new("training-qlora", "QLoRA Quantized Fine-tuning")
80            .with_problem("Fine-tune 4-bit quantized models on consumer hardware")
81            .with_components(vec!["entrenar", "aprender"])
82            .with_tags(vec!["training", "qlora", "quantization", "4bit", "memory-efficient"])
83            .with_code(
84                r#"use entrenar::prelude::*;
85
86// Load 4-bit quantized model
87let model = Model::load_quantized("llama-7b.q4_k.gguf")?;
88
89// QLoRA config (LoRA on quantized base)
90let qlora_config = QLoraConfig {
91    lora: LoraConfig { r: 64, alpha: 16, dropout: 0.1, .. },
92    nf4: true,              // NormalFloat4 quantization
93    double_quant: true,     // Double quantization for memory
94    compute_dtype: F16,     // Compute in fp16
95};
96
97let model = model.with_qlora(qlora_config)?;
98
99// Train on 24GB GPU (fits 7B model!)
100let trainer = Trainer::new(model)
101    .gradient_checkpointing(true)
102    .batch_size(4)
103    .gradient_accumulation(4);
104
105trainer.train(&dataset, 3)?;  // 3 epochs
106"#,
107            )
108            .with_related(vec!["training-lora"])
109            .with_test_code(
110                r"#[cfg(test)]
111mod tests {
112    #[test]
113    fn test_quantization_bits_valid() {
114    let bits = 4;
115    assert!(bits == 4 || bits == 8);
116}
117
118    #[test]
119    fn test_effective_batch_size() {
120    let batch_size = 4;
121    let grad_accum = 4;
122    let effective = batch_size * grad_accum;
123    assert_eq!(effective, 16);
124}
125
126    #[test]
127    fn test_nf4_requires_4bit() {
128    let nf4 = true;
129    let bits = 4;
130    assert!(nf4 && bits == 4);
131}
132}",
133            ),
134    );
135
136    // Autograd
137    cookbook.add(
138        Recipe::new("training-autograd", "Custom Training with Autograd")
139            .with_problem("Build custom neural networks with automatic differentiation")
140            .with_components(vec!["entrenar", "trueno"])
141            .with_tags(vec!["training", "autograd", "neural-network", "custom"])
142            .with_code(
143                r#"use entrenar::autograd::*;
144
145// Define model with autograd tensors
146let w1 = Tensor::randn(&[784, 256]).requires_grad();
147let w2 = Tensor::randn(&[256, 10]).requires_grad();
148
149// Forward pass (computation graph built automatically)
150fn forward(x: &Tensor, w1: &Tensor, w2: &Tensor) -> Tensor {
151    let h = x.matmul(w1).relu();
152    h.matmul(w2).softmax(-1)
153}
154
155// Training loop
156let optimizer = SGD::new(vec![&w1, &w2], 0.01);
157for (x, y) in dataloader {
158    let pred = forward(&x, &w1, &w2);
159    let loss = cross_entropy(&pred, &y);
160
161    // Backward pass (gradients computed automatically)
162    loss.backward();
163
164    optimizer.step();
165    optimizer.zero_grad();
166}
167
168// Gradients accessible
169println!("w1 grad: {:?}", w1.grad());
170"#,
171            )
172            .with_related(vec!["training-lora", "ml-random-forest"])
173            .with_test_code(
174                r"#[cfg(test)]
175mod tests {
176    #[test]
177    fn test_weight_matrix_dimensions() {
178    let input_dim = 784;
179    let hidden_dim = 256;
180    let weights = vec![vec![0.0_f64; hidden_dim]; input_dim];
181    assert_eq!(weights.len(), input_dim);
182}
183
184    #[test]
185    fn test_softmax_sums_to_one() {
186    let logits = vec![1.0_f64, 2.0, 3.0];
187    let max = logits.iter().copied().fold(f64::NEG_INFINITY, f64::max);
188    let exp_sum: f64 = logits.iter().map(|x| (x - max).exp()).sum();
189    let sum: f64 = logits.iter().map(|x| (x - max).exp() / exp_sum).sum();
190    assert!((sum - 1.0).abs() < 1e-6);
191}
192
193    #[test]
194    fn test_learning_rate_positive() {
195    let lr = 0.01_f64;
196    assert!(lr > 0.0);
197}
198}",
199            ),
200    );
201}
202
203// =========================================================================
204// Data Loading Recipes
205// =========================================================================
206
207pub fn register_data_recipes(cookbook: &mut super::Cookbook) {
208    // Alimentar Data Loading
209    cookbook.add(
210        Recipe::new("data-alimentar", "Zero-Copy Data Loading")
211            .with_problem("Load large datasets efficiently with memory mapping")
212            .with_components(vec!["alimentar", "trueno"])
213            .with_tags(vec!["data", "loading", "parquet", "arrow", "zero-copy"])
214            .with_code(
215                r#"use alimentar::prelude::*;
216
217// Load Parquet with zero-copy (memory-mapped)
218let dataset = ParquetDataset::open("data.parquet")?
219    .select(&["features", "label"])?
220    .filter(|row| row["label"].as_i64() > 0)?;
221
222// Iterate with batching
223let dataloader = DataLoader::new(dataset)
224    .batch_size(32)
225    .shuffle(true)
226    .num_workers(4);
227
228for batch in dataloader {
229    // batch.features is Arrow array (zero-copy)
230    let features = batch["features"].as_tensor()?;
231    let labels = batch["label"].as_tensor()?;
232
233    model.train_step(&features, &labels)?;
234}
235
236// Streaming from remote (S3, HuggingFace)
237let dataset = Dataset::from_hub("username/dataset")?
238    .streaming(true);  // Don't download entire dataset
239"#,
240            )
241            .with_related(vec!["data-preprocessing", "ml-random-forest"])
242            .with_test_code(
243                r#"#[cfg(test)]
244mod tests {
245    #[test]
246    fn test_batch_size_config() {
247    let batch_size = 32_u32;
248    assert!(batch_size > 0);
249}
250
251    #[test]
252    fn test_column_selection() {
253    let columns = vec!["features", "label"];
254    assert_eq!(columns.len(), 2);
255}
256
257    #[test]
258    fn test_worker_count() {
259    let workers = 4;
260    assert!(workers > 0 && workers <= 16);
261}
262}"#,
263            ),
264    );
265
266    // Data Preprocessing
267    cookbook.add(
268        Recipe::new("data-preprocessing", "Data Preprocessing Pipeline")
269            .with_problem("Build reproducible preprocessing pipelines")
270            .with_components(vec!["alimentar", "aprender"])
271            .with_tags(vec!["data", "preprocessing", "pipeline", "transforms"])
272            .with_code(
273                r#"use alimentar::prelude::*;
274use aprender::preprocessing::*;
275
276// Build preprocessing pipeline
277let pipeline = Pipeline::new()
278    .add(StandardScaler::fit(&train_data)?)
279    .add(OneHotEncoder::fit(&["category"])?)
280    .add(Imputer::median());
281
282// Apply to train/test
283let X_train = pipeline.transform(&train_data)?;
284let X_test = pipeline.transform(&test_data)?;
285
286// Save pipeline for inference
287pipeline.save("preprocess.pipeline")?;
288
289// Later: load and apply
290let pipeline = Pipeline::load("preprocess.pipeline")?;
291let X_new = pipeline.transform(&new_data)?;
292"#,
293            )
294            .with_related(vec!["data-alimentar"])
295            .with_test_code(
296                r#"#[cfg(test)]
297mod tests {
298    #[test]
299    fn test_pipeline_step_count() {
300    let steps = vec!["scale", "encode", "impute"];
301    assert_eq!(steps.len(), 3);
302}
303
304    #[test]
305    fn test_transform_preserves_row_count() {
306    let input_rows = 1000;
307    let output_rows = 1000;
308    assert_eq!(input_rows, output_rows);
309}
310
311    #[test]
312    fn test_scaler_std_positive() {
313    let std_dev = 1.0_f64;
314    assert!(std_dev > 0.0);
315}
316}"#,
317            ),
318    );
319}
320
321// =========================================================================
322// Model Registry Recipes
323// =========================================================================
324
325pub fn register_registry_recipes(cookbook: &mut super::Cookbook) {
326    // Pacha Model Registry
327    cookbook.add(
328        Recipe::new("registry-pacha", "Model Registry with Pacha")
329            .with_problem("Version, sign, and distribute ML models securely")
330            .with_components(vec!["pacha", "aprender"])
331            .with_tags(vec!["registry", "versioning", "signing", "distribution", "mlops"])
332            .with_code(
333                r#"use pacha::prelude::*;
334
335// Initialize registry
336let registry = Registry::new("./models")?;
337
338// Register model with metadata
339let model_card = ModelCard {
340    name: "sentiment-classifier",
341    version: "1.0.0",
342    description: "BERT-based sentiment analysis",
343    metrics: hashmap!{
344        "accuracy" => 0.94,
345        "f1" => 0.92,
346    },
347    license: "MIT",
348    authors: vec!["team@example.com"],
349};
350
351// Push with Ed25519 signature
352let artifact = registry.push(
353    "model.apr",
354    model_card,
355    SigningKey::from_env()?,  // PACHA_SIGNING_KEY
356)?;
357
358println!("Registered: {}@{}", artifact.name, artifact.version);
359println!("Hash: {}", artifact.blake3_hash);
360
361// Pull model (verifies signature)
362let model_path = registry.pull("sentiment-classifier", "1.0.0")?;
363
364// List versions
365for version in registry.versions("sentiment-classifier")? {
366    println!("{} - {}", version.version, version.created_at);
367}
368"#,
369            )
370            .with_related(vec!["registry-hf", "ml-serving"])
371            .with_test_code(
372                r#"#[cfg(test)]
373mod tests {
374    #[test]
375    fn test_model_card_metadata() {
376    let name = "sentiment-classifier";
377    let version = "1.0.0";
378    assert!(!name.is_empty());
379    assert!(version.chars().filter(|c| *c == '.').count() == 2);
380}
381
382    #[test]
383    fn test_version_string_format() {
384    let version = "1.0.0";
385    let parts: Vec<_> = version.split('.').collect();
386    assert_eq!(parts.len(), 3);
387}
388
389    #[test]
390    fn test_hash_length() {
391    let blake3_hash = "a".repeat(64);
392    assert_eq!(blake3_hash.len(), 64);
393}
394}"#,
395            ),
396    );
397
398    // HuggingFace Integration
399    cookbook.add(
400        Recipe::new("registry-hf", "HuggingFace Hub Integration")
401            .with_problem("Download and cache models from HuggingFace Hub")
402            .with_components(vec!["hf-hub", "aprender", "realizar"])
403            .with_tags(vec!["registry", "huggingface", "download", "cache"])
404            .with_code(
405                r#"use hf_hub::api::sync::Api;
406
407// Initialize API (uses HF_TOKEN env var if set)
408let api = Api::new()?;
409
410// Download model files
411let repo = api.model("meta-llama/Llama-2-7b");
412let model_path = repo.get("model.safetensors")?;
413let config_path = repo.get("config.json")?;
414
415// Files cached in ~/.cache/huggingface/hub/
416println!("Model: {}", model_path.display());
417
418// Download specific revision
419let repo = api.model("meta-llama/Llama-2-7b").revision("main");
420let path = repo.get("tokenizer.json")?;
421
422// Progress callback
423let repo = api.model("big-model").progress(|p| {
424    println!("Downloading: {:.1}%", p.percent * 100.0);
425});
426"#,
427            )
428            .with_related(vec!["registry-pacha", "speech-whisper"])
429            .with_test_code(
430                r#"#[cfg(test)]
431mod tests {
432    #[test]
433    fn test_api_url_valid() {
434    let url = "https://huggingface.co";
435    assert!(url.starts_with("https://"));
436}
437
438    #[test]
439    fn test_model_path_structure() {
440    let org = "meta-llama";
441    let model = "Llama-2-7b";
442    let path = format!("{}/{}", org, model);
443    assert_eq!(path.split('/').count(), 2);
444}
445
446    #[test]
447    fn test_revision_default() {
448    let revision = "main";
449    assert_eq!(revision, "main");
450}
451}"#,
452            ),
453    );
454}
455
456// =========================================================================
457// RAG Pipeline Recipes
458// =========================================================================
459
460pub fn register_rag_recipes(cookbook: &mut super::Cookbook) {
461    // RAG Pipeline
462    cookbook.add(
463        Recipe::new("rag-pipeline", "RAG Pipeline with Trueno-RAG")
464            .with_problem("Build retrieval-augmented generation pipelines")
465            .with_components(vec!["trueno-rag", "trueno-db", "aprender"])
466            .with_tags(vec!["rag", "retrieval", "generation", "embeddings", "search"])
467            .with_code(
468                r#"use trueno_rag::prelude::*;
469
470// Initialize RAG pipeline
471let rag = RagPipeline::builder()
472    .chunker(SemanticChunker::new(512))  // Semantic chunking
473    .embedder(Embedder::load("bge-small-en")?)
474    .retriever(HybridRetriever::new()
475        .bm25_weight(0.3)
476        .dense_weight(0.7))
477    .reranker(CrossEncoder::load("ms-marco-MiniLM")?)
478    .build()?;
479
480// Index documents
481for doc in documents {
482    rag.add_document(&doc)?;
483}
484rag.build_index()?;
485
486// Query with retrieval
487let query = "What is the capital of France?";
488let results = rag.retrieve(query, 5)?;  // Top 5 chunks
489
490for (i, chunk) in results.iter().enumerate() {
491    println!("{}. [score: {:.3}] {}", i+1, chunk.score, chunk.text);
492}
493
494// Full RAG with generation
495let context = rag.retrieve_context(query, 3)?;
496let prompt = format!("Context:\n{}\n\nQuestion: {}\nAnswer:", context, query);
497let answer = llm.generate(&prompt)?;
498"#,
499            )
500            .with_related(vec!["rag-semantic-search", "ml-serving"])
501            .with_test_code(
502                r"#[cfg(test)]
503mod tests {
504    #[test]
505    fn test_top_k_parameter() {
506    let top_k = 5;
507    assert!(top_k > 0 && top_k <= 100);
508}
509
510    #[test]
511    fn test_chunk_size_exceeds_overlap() {
512    let chunk_size = 512;
513    let overlap = 50;
514    assert!(chunk_size > overlap);
515}
516
517    #[test]
518    fn test_retriever_weights_sum_to_one() {
519    let bm25_weight = 0.3_f64;
520    let vector_weight = 0.7_f64;
521    assert!((bm25_weight + vector_weight - 1.0).abs() < 1e-6);
522}
523}",
524            ),
525    );
526
527    // Semantic Search
528    cookbook.add(
529        Recipe::new("rag-semantic-search", "Semantic Search Engine")
530            .with_problem("Build fast semantic search over documents")
531            .with_components(vec!["trueno-db", "trueno-rag"])
532            .with_tags(vec!["search", "semantic", "embeddings", "hnsw", "vector-db"])
533            .with_code(
534                r#"use trueno_db::prelude::*;
535use trueno_rag::embeddings::*;
536
537// Initialize vector store with HNSW index
538let db = VectorDb::open("vectors.db")?
539    .with_index(HnswConfig {
540        m: 16,
541        ef_construction: 200,
542        ef_search: 50,
543    });
544
545// Embed and store documents
546let embedder = Embedder::load("bge-small-en")?;
547for doc in documents {
548    let embedding = embedder.embed(&doc.text)?;
549    db.insert(&doc.id, &embedding, &doc.metadata)?;
550}
551
552// Search
553let query_embedding = embedder.embed("machine learning")?;
554let results = db.search(&query_embedding, 10)?;
555
556for result in results {
557    println!("{}: {:.3}", result.id, result.score);
558}
559
560// Filtered search
561let results = db.search_filtered(
562    &query_embedding,
563    10,
564    |meta| meta["category"] == "science",
565)?;
566"#,
567            )
568            .with_related(vec!["rag-pipeline"])
569            .with_test_code(
570                r"#[cfg(test)]
571mod tests {
572    #[test]
573    fn test_hnsw_config_params() {
574    let m = 16;
575    let ef_construction = 200;
576    assert!(m >= 4 && m <= 64);
577    assert!(ef_construction >= m);
578}
579
580    #[test]
581    fn test_search_result_ordering() {
582    let scores = vec![0.95, 0.85, 0.75];
583    let is_sorted = scores.windows(2).all(|w| w[0] >= w[1]);
584    assert!(is_sorted);
585}
586
587    #[test]
588    fn test_filter_predicate() {
589    let min_score = 0.5_f64;
590    let result_score = 0.75_f64;
591    assert!(result_score >= min_score);
592}
593}",
594            ),
595    );
596}
597
598// =========================================================================
599// Visualization Recipes
600// =========================================================================
601
602pub fn register_viz_recipes(cookbook: &mut super::Cookbook) {
603    // Terminal Visualization
604    cookbook.add(
605        Recipe::new("viz-terminal", "Terminal Visualization")
606            .with_problem("Create charts and plots in the terminal")
607            .with_components(vec!["trueno-viz"])
608            .with_tags(vec!["visualization", "terminal", "charts", "ascii"])
609            .with_code(
610                r#"use trueno_viz::prelude::*;
611
612// Line chart in terminal
613let chart = LineChart::new()
614    .title("Training Loss")
615    .x_label("Epoch")
616    .y_label("Loss")
617    .series("train", &train_losses)
618    .series("val", &val_losses);
619
620chart.render_terminal(80, 24)?;  // 80x24 chars
621
622// Histogram
623let hist = Histogram::new(&data)
624    .bins(20)
625    .title("Distribution");
626hist.render_terminal(60, 15)?;
627
628// Scatter plot
629let scatter = ScatterPlot::new()
630    .points(&x_vals, &y_vals)
631    .title("Correlation");
632scatter.render_terminal(40, 20)?;
633
634// Progress bars (integrated with training)
635let pb = ProgressBar::new(total_epochs);
636for epoch in 0..total_epochs {
637    // ... training ...
638    pb.set(epoch, format!("loss: {:.4}", loss));
639}
640"#,
641            )
642            .with_related(vec!["viz-png", "training-autograd"])
643            .with_test_code(
644                r"#[cfg(test)]
645mod tests {
646    #[test]
647    fn test_chart_dimensions() {
648    let width = 80;
649    let height = 24;
650    assert!(width > 0 && height > 0);
651}
652
653    #[test]
654    fn test_bin_count() {
655    let bins = 20;
656    assert!(bins > 0 && bins <= 100);
657}
658
659    #[test]
660    fn test_series_data_finite() {
661    let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
662    assert!(data.iter().all(|x| x.is_finite()));
663}
664}",
665            ),
666    );
667
668    // PNG Export
669    cookbook.add(
670        Recipe::new("viz-png", "PNG Chart Export")
671            .with_problem("Export publication-quality charts as PNG images")
672            .with_components(vec!["trueno-viz"])
673            .with_tags(vec!["visualization", "png", "export", "charts"])
674            .with_code(
675                r#"use trueno_viz::prelude::*;
676
677// Create chart
678let chart = LineChart::new()
679    .title("Model Performance")
680    .x_label("Epoch")
681    .y_label("Accuracy")
682    .series("ResNet", &resnet_acc)
683    .series("VGG", &vgg_acc)
684    .legend(Position::TopRight);
685
686// Export as PNG
687chart.save_png("performance.png", 800, 600)?;
688
689// With custom styling
690let styled = chart
691    .background(Color::WHITE)
692    .grid(true)
693    .font_size(14);
694styled.save_png("styled.png", 1200, 800)?;
695
696// Batch export multiple charts
697let charts = vec![
698    ("loss", loss_chart),
699    ("accuracy", acc_chart),
700    ("confusion", confusion_matrix),
701];
702for (name, chart) in charts {
703    chart.save_png(&format!("{}.png", name), 800, 600)?;
704}
705"#,
706            )
707            .with_related(vec!["viz-terminal"])
708            .with_test_code(
709                r#"#[cfg(test)]
710mod tests {
711    #[test]
712    fn test_image_dimensions() {
713    let width = 800;
714    let height = 600;
715    assert!(width > 0 && height > 0);
716}
717
718    #[test]
719    fn test_chart_title_non_empty() {
720    let title = "Model Performance";
721    assert!(!title.is_empty());
722}
723
724    #[test]
725    fn test_batch_export_count() {
726    let charts = vec!["loss", "accuracy", "confusion"];
727    assert_eq!(charts.len(), 3);
728}
729}"#,
730            ),
731    );
732}
733
734// =========================================================================
735// RLHF & Alignment Recipes
736// =========================================================================
737
738pub fn register_rlhf_recipes(cookbook: &mut super::Cookbook) {
739    super::recipes_rlhf_alignment::register_rlhf_alignment_recipes(cookbook);
740    super::recipes_rlhf_training::register_rlhf_training_recipes(cookbook);
741    super::recipes_rlhf_efficiency::register_rlhf_efficiency_recipes(cookbook);
742}
batuta/oracle/cookbook/recipes_more.rs

batuta/oracle/cookbook/
recipes_more.rs