batuta/oracle/
recommender.rs

1//! Recommendation Engine for Oracle Mode
2//!
3//! Generates component recommendations based on parsed queries
4//! and the knowledge graph.
5
6use super::knowledge_graph::KnowledgeGraph;
7use super::query_engine::{ParsedQuery, PerformanceHint, QueryEngine};
8use super::types::*;
9
10// =============================================================================
11// Backend Selector (from spec section 4.2)
12// =============================================================================
13
14/// Select optimal backend based on workload characteristics
15/// Based on PCIe transfer overhead analysis (Gregg & Hazelwood, 2011)
16pub fn select_backend(
17    op_complexity: OpComplexity,
18    data_size: Option<DataSize>,
19    hardware: &HardwareSpec,
20) -> Backend {
21    let size = data_size.and_then(|d| d.as_samples()).unwrap_or(0);
22
23    // Thresholds scale with complexity: higher complexity → lower threshold for GPU/SIMD
24    let (gpu_threshold, simd_threshold) = match op_complexity {
25        OpComplexity::Low => (1_000_000, 1_000),
26        OpComplexity::Medium => (100_000, 100),
27        OpComplexity::High => (10_000, 10),
28    };
29
30    if size > gpu_threshold && hardware.has_gpu() {
31        Backend::GPU
32    } else if size > simd_threshold {
33        Backend::SIMD
34    } else {
35        Backend::Scalar
36    }
37}
38
39/// Determine if distributed execution is beneficial
40/// Based on Amdahl's Law and communication overhead
41pub fn should_distribute(
42    data_size: Option<DataSize>,
43    hardware: &HardwareSpec,
44    parallel_fraction: f64,
45) -> DistributionRecommendation {
46    let size = data_size.and_then(|d| d.as_samples()).unwrap_or(0);
47
48    // Only consider distribution for large data or explicit multi-node
49    if !hardware.is_distributed && size < 10_000_000 {
50        return DistributionRecommendation::not_needed(
51            "Single-node sufficient for this workload size",
52        );
53    }
54
55    let node_count = hardware.node_count.unwrap_or(1);
56    if node_count <= 1 {
57        return DistributionRecommendation::not_needed(
58            "No additional nodes available for distribution",
59        );
60    }
61
62    // Amdahl's Law: speedup = 1 / ((1-p) + p/n)
63    let speedup = 1.0 / ((1.0 - parallel_fraction) + parallel_fraction / node_count as f64);
64
65    // Communication overhead estimate (simplified)
66    let comm_overhead = 0.1 * node_count as f64; // 10% per node
67
68    if speedup > 1.5 && comm_overhead < 0.5 {
69        DistributionRecommendation {
70            tool: Some("repartir".into()),
71            needed: true,
72            rationale: format!(
73                "Distribution beneficial with {:.1}x speedup across {} nodes",
74                speedup, node_count
75            ),
76            node_count: Some(node_count),
77        }
78    } else {
79        DistributionRecommendation::not_needed(format!(
80            "Distribution overhead ({:.0}%) outweighs benefits",
81            comm_overhead * 100.0
82        ))
83    }
84}
85
86// =============================================================================
87// Recommender
88// =============================================================================
89
90/// Oracle recommendation engine
91pub struct Recommender {
92    graph: KnowledgeGraph,
93    engine: QueryEngine,
94}
95
96impl Default for Recommender {
97    fn default() -> Self {
98        Self::new()
99    }
100}
101
102impl Recommender {
103    /// Create a new recommender with the Sovereign AI Stack
104    pub fn new() -> Self {
105        Self { graph: KnowledgeGraph::sovereign_stack(), engine: QueryEngine::new() }
106    }
107
108    /// Create a recommender with a custom knowledge graph
109    pub fn with_graph(graph: KnowledgeGraph) -> Self {
110        Self { graph, engine: QueryEngine::new() }
111    }
112
113    /// Process a natural language query and return recommendations
114    pub fn query(&self, query: &str) -> OracleResponse {
115        let parsed = self.engine.parse(query);
116
117        // Transfer extracted information from parsed query into constraints
118        // so the backend selector sees data size and hardware hints
119        let mut constraints = QueryConstraints::default();
120        if let Some(size) = parsed.data_size {
121            constraints.data_size = Some(size);
122        }
123        if parsed.performance_hints.contains(&PerformanceHint::GPURequired) {
124            constraints.hardware = HardwareSpec::with_gpu(16.0);
125        }
126
127        self.recommend(&parsed, &constraints)
128    }
129
130    /// Process a structured OracleQuery
131    pub fn query_structured(&self, query: &OracleQuery) -> OracleResponse {
132        let parsed = self.engine.parse(&query.description);
133
134        // Merge NL-extracted hints into explicit constraints (explicit wins)
135        let mut constraints = query.constraints.clone();
136        if constraints.data_size.is_none() {
137            if let Some(size) = parsed.data_size {
138                constraints.data_size = Some(size);
139            }
140        }
141        if !constraints.hardware.has_gpu()
142            && parsed.performance_hints.contains(&PerformanceHint::GPURequired)
143        {
144            constraints.hardware = HardwareSpec::with_gpu(16.0);
145        }
146
147        self.recommend(&parsed, &constraints)
148    }
149
150    /// Generate recommendations from parsed query
151    pub fn recommend(
152        &self,
153        parsed: &ParsedQuery,
154        constraints: &QueryConstraints,
155    ) -> OracleResponse {
156        // Determine primary problem class
157        let problem_class = self.classify_problem(parsed);
158
159        // Find primary component recommendation
160        let primary = self.recommend_primary(parsed, constraints);
161
162        // Find supporting components
163        let supporting = self.recommend_supporting(&primary, parsed, constraints);
164
165        // Determine compute backend
166        let complexity = self.engine.estimate_complexity(parsed);
167        let backend = select_backend(complexity, constraints.data_size, &constraints.hardware);
168        let compute = ComputeRecommendation {
169            backend,
170            rationale: self.compute_rationale(backend, complexity, constraints),
171        };
172
173        // Determine distribution needs
174        let parallel_fraction = self.estimate_parallel_fraction(parsed);
175        let distribution =
176            should_distribute(constraints.data_size, &constraints.hardware, parallel_fraction);
177
178        // Generate code example
179        let code_example = self.generate_code_example(&primary, &supporting, parsed);
180
181        // Generate related queries
182        let related_queries = self.generate_related_queries(parsed);
183
184        OracleResponse {
185            problem_class,
186            algorithm: self.engine.primary_algorithm(parsed).map(String::from),
187            primary,
188            supporting,
189            compute,
190            distribution,
191            code_example,
192            related_queries,
193        }
194    }
195
196    fn classify_problem(&self, parsed: &ParsedQuery) -> String {
197        if let Some(domain) = self.engine.primary_domain(parsed) {
198            domain.to_string()
199        } else if !parsed.algorithms.is_empty() {
200            "Algorithm-specific".into()
201        } else {
202            "General".into()
203        }
204    }
205
206    fn recommend_primary(
207        &self,
208        parsed: &ParsedQuery,
209        _constraints: &QueryConstraints,
210    ) -> ComponentRecommendation {
211        // Check if specific components were mentioned
212        if let Some(component) = parsed.mentioned_components.first() {
213            if let Some(comp) = self.graph.get_component(component) {
214                return ComponentRecommendation::new(
215                    comp.name.clone(),
216                    0.95,
217                    format!("Explicitly mentioned {} - {}", comp.name, comp.description),
218                );
219            }
220        }
221
222        // Find by algorithm
223        if let Some(algo) = parsed.algorithms.first() {
224            let components = self.graph.find_by_capability(algo);
225            if let Some(comp) = components.first() {
226                let path = self.get_algorithm_path(comp, algo);
227                return match path {
228                    Some(p) => ComponentRecommendation::with_path(
229                        comp.name.clone(),
230                        0.9,
231                        format!("{} provides {} implementation", comp.name, algo),
232                        p,
233                    ),
234                    None => ComponentRecommendation::new(
235                        comp.name.clone(),
236                        0.9,
237                        format!("{} provides {} implementation", comp.name, algo),
238                    ),
239                };
240            }
241        }
242
243        // Find by problem domain
244        if let Some(domain) = self.engine.primary_domain(parsed) {
245            let components = self.graph.find_by_domain(domain);
246            if let Some(comp) = components.first() {
247                return ComponentRecommendation::new(
248                    comp.name.clone(),
249                    0.85,
250                    format!("{} is recommended for {} tasks", comp.name, domain),
251                );
252            }
253        }
254
255        // Default recommendation based on performance hints
256        if parsed.performance_hints.contains(&PerformanceHint::GPURequired) {
257            return ComponentRecommendation::new(
258                "trueno",
259                0.7,
260                "GPU acceleration available via trueno",
261            );
262        }
263
264        if parsed.performance_hints.contains(&PerformanceHint::Distributed) {
265            return ComponentRecommendation::new(
266                "repartir",
267                0.7,
268                "Distributed computing via repartir",
269            );
270        }
271
272        // Fallback to batuta for general orchestration
273        ComponentRecommendation::new(
274            "batuta",
275            0.5,
276            "General orchestration framework for the Sovereign AI Stack",
277        )
278    }
279
280    fn recommend_supporting(
281        &self,
282        primary: &ComponentRecommendation,
283        parsed: &ParsedQuery,
284        constraints: &QueryConstraints,
285    ) -> Vec<ComponentRecommendation> {
286        let mut supporting = Vec::new();
287
288        // Get integration patterns from primary
289        let integrations = self.graph.integrations_from(&primary.component);
290        for pattern in integrations.iter().take(2) {
291            if let Some(comp) = self.graph.get_component(&pattern.to) {
292                supporting.push(ComponentRecommendation::new(
293                    comp.name.clone(),
294                    0.7,
295                    format!("Integrates via {} pattern", pattern.pattern_name),
296                ));
297            }
298        }
299
300        // Data-driven conditional recommendations
301        let is_ml = parsed.domains.iter().any(|d| {
302            matches!(
303                d,
304                ProblemDomain::SupervisedLearning
305                    | ProblemDomain::UnsupervisedLearning
306                    | ProblemDomain::DeepLearning
307                    | ProblemDomain::SpeechRecognition
308            )
309        });
310        let is_large = constraints.data_size.map(|d| d.is_large()).unwrap_or(false);
311        let is_pipeline = parsed.domains.contains(&ProblemDomain::DataPipeline);
312        let is_inference = parsed.domains.contains(&ProblemDomain::Inference);
313
314        let candidates: &[(bool, &str, f32, &str)] = &[
315            (is_ml, "trueno", 0.8, "SIMD/GPU backend for compute acceleration"),
316            (is_large, "repartir", 0.6, "Distribution recommended for large dataset"),
317            (is_pipeline, "alimentar", 0.7, "Data loading and preprocessing"),
318            (is_inference, "realizar", 0.85, "Model serving and inference"),
319        ];
320        for &(condition, component, confidence, rationale) in candidates {
321            if condition && primary.component != component {
322                supporting.push(ComponentRecommendation::new(component, confidence, rationale));
323            }
324        }
325
326        supporting
327    }
328
329    /// Algorithm-to-module-path lookup table: (component, algo_patterns, path).
330    /// Each entry matches when the component name matches AND any algo pattern is found.
331    const ALGORITHM_PATHS: &[(&str, &[&str], &str)] = &[
332        ("aprender", &["random_forest"], "aprender::tree::RandomForestClassifier"),
333        ("aprender", &["decision_tree"], "aprender::tree::DecisionTreeClassifier"),
334        ("aprender", &["linear_regression"], "aprender::linear::LinearRegression"),
335        ("aprender", &["logistic_regression"], "aprender::linear::LogisticRegression"),
336        (
337            "aprender",
338            &["gbm", "gradient_boosting"],
339            "aprender::ensemble::GradientBoostingClassifier",
340        ),
341        ("aprender", &["kmeans", "k_means"], "aprender::cluster::KMeans"),
342        ("aprender", &["pca"], "aprender::decomposition::PCA"),
343        ("aprender", &["svm"], "aprender::svm::SVC"),
344        ("aprender", &["knn"], "aprender::neighbors::KNeighborsClassifier"),
345        ("entrenar", &["lora"], "entrenar::lora::LoRA"),
346        ("entrenar", &["qlora"], "entrenar::lora::QLoRA"),
347    ];
348
349    fn get_algorithm_path(&self, component: &StackComponent, algorithm: &str) -> Option<String> {
350        Self::ALGORITHM_PATHS
351            .iter()
352            .find(|(comp, pats, _)| {
353                *comp == component.name && pats.iter().any(|p| algorithm.contains(p))
354            })
355            .map(|(_, _, path)| (*path).to_string())
356    }
357
358    fn compute_rationale(
359        &self,
360        backend: Backend,
361        complexity: OpComplexity,
362        constraints: &QueryConstraints,
363    ) -> String {
364        let size_str = constraints
365            .data_size
366            .map(|d| match d {
367                DataSize::Samples(n) => format!("{} samples", format_number(n)),
368                DataSize::Bytes(n) => format!("{} bytes", format_number(n)),
369                DataSize::Unknown => "unknown size".into(),
370            })
371            .unwrap_or_else(|| "unspecified size".into());
372
373        match backend {
374            Backend::Scalar => {
375                format!(
376                    "Scalar operations sufficient for small {} with {:?} complexity",
377                    size_str, complexity
378                )
379            }
380            Backend::SIMD => {
381                format!(
382                    "SIMD vectorization optimal for {} with {:?} complexity",
383                    size_str, complexity
384                )
385            }
386            Backend::GPU => {
387                format!("GPU acceleration recommended for {} with {:?} complexity - PCIe overhead amortized", size_str, complexity)
388            }
389            Backend::Distributed => {
390                format!("Distributed execution for {} exceeds single-node capacity", size_str)
391            }
392        }
393    }
394
395    /// Algorithm parallelizability estimates: (algo_patterns, fraction).
396    const ALGO_PARALLEL: &[(&[&str], f64)] =
397        &[(&["random_forest", "gbm"], 0.95), (&["kmeans"], 0.85), (&["linear"], 0.7)];
398
399    /// Domain parallelizability estimates: (domain, fraction).
400    const DOMAIN_PARALLEL: &[(ProblemDomain, f64)] =
401        &[(ProblemDomain::DeepLearning, 0.8), (ProblemDomain::SupervisedLearning, 0.75)];
402
403    fn estimate_parallel_fraction(&self, parsed: &ParsedQuery) -> f64 {
404        if let Some(algo) = parsed.algorithms.first() {
405            if let Some(&(_, frac)) =
406                Self::ALGO_PARALLEL.iter().find(|(pats, _)| pats.iter().any(|p| algo.contains(p)))
407            {
408                return frac;
409            }
410        }
411
412        Self::DOMAIN_PARALLEL
413            .iter()
414            .find(|(domain, _)| parsed.domains.contains(domain))
415            .map_or(0.6, |&(_, frac)| frac)
416    }
417
418    fn generate_code_example(
419        &self,
420        primary: &ComponentRecommendation,
421        _supporting: &[ComponentRecommendation],
422        parsed: &ParsedQuery,
423    ) -> Option<String> {
424        // Generate contextual code example based on primary component
425        match primary.component.as_str() {
426            "aprender" => {
427                let path =
428                    primary.path.as_deref().unwrap_or("aprender::tree::RandomForestClassifier");
429                let _algo = parsed.algorithms.first().map(|s| s.as_str()).unwrap_or("RandomForest");
430
431                Some(format!(
432                    r#"use {};
433
434// Load data
435let (X_train, X_test, y_train, y_test) = train_test_split(&X, &y, 0.2)?;
436
437// Train model
438let model = {}::new()
439    .n_estimators(100)
440    .fit(&X_train, &y_train)?;
441
442// Predict
443let predictions = model.predict(&X_test)?;
444let accuracy = accuracy_score(&y_test, &predictions);
445println!("Accuracy: {{:.2}}%", accuracy * 100.0);
446
447#[cfg(test)]
448mod tests {{
449    #[test]
450    fn test_model_builder_params() {{
451        let n_estimators = 100;
452        let test_size = 0.2_f64;
453        assert!(n_estimators > 0);
454        assert!(test_size > 0.0 && test_size < 1.0);
455    }}
456
457    #[test]
458    fn test_predictions_non_empty() {{
459        let predictions = vec![0, 1, 1, 0, 1];
460        assert!(!predictions.is_empty());
461    }}
462
463    #[test]
464    fn test_accuracy_in_range() {{
465        let accuracy = 0.85_f64;
466        assert!(accuracy >= 0.0 && accuracy <= 1.0);
467    }}
468}}"#,
469                    path,
470                    path.split("::").last().unwrap_or("Model")
471                ))
472            }
473            "trueno" => Some(
474                r#"use trueno::prelude::*;
475
476// Create tensors with SIMD acceleration
477let a = Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
478let b = Tensor::from_vec(vec![5.0, 6.0, 7.0, 8.0]);
479
480// SIMD-accelerated operations
481let result = a.dot(&b);
482println!("Dot product: {}", result);
483
484#[cfg(test)]
485mod tests {
486    #[test]
487    fn test_tensor_creation() {
488        let data = vec![1.0, 2.0, 3.0, 4.0];
489        assert_eq!(data.len(), 4);
490    }
491
492    #[test]
493    fn test_dot_product_result() {
494        let a = vec![1.0, 2.0, 3.0, 4.0];
495        let b = vec![5.0, 6.0, 7.0, 8.0];
496        let dot: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
497        assert_eq!(dot, 70.0);
498    }
499
500    #[test]
501    fn test_simd_elements_finite() {
502        let data = vec![1.0_f64, 2.0, 3.0, 4.0];
503        assert!(data.iter().all(|x| x.is_finite()));
504    }
505}"#
506                .into(),
507            ),
508            "depyler" => Some(
509                r"# Run depyler to convert Python to Rust
510batuta transpile --source my_project.py --output rust-output/
511
512# The sklearn code:
513#   from sklearn.ensemble import RandomForestClassifier
514#   model = RandomForestClassifier(n_estimators=100)
515#
516# Becomes:
517#   use aprender::tree::RandomForestClassifier;
518#   let model = RandomForestClassifier::new().n_estimators(100);"
519                    .into(),
520            ),
521            "realizar" => Some(
522                r#"use realizar::ModelRegistry;
523
524// Load trained model
525let registry = ModelRegistry::new();
526registry.load_apr("classifier", "model.apr")?;
527
528// Serve predictions
529let input = vec![1.0, 2.0, 3.0, 4.0];
530let prediction = registry.predict("classifier", &input)?;
531println!("Prediction: {:?}", prediction);
532
533#[cfg(test)]
534mod tests {
535    #[test]
536    fn test_registry_construction() {
537        let model_name = "classifier";
538        assert!(!model_name.is_empty());
539    }
540
541    #[test]
542    fn test_input_feature_count() {
543        let input = vec![1.0, 2.0, 3.0, 4.0];
544        assert_eq!(input.len(), 4);
545    }
546
547    #[test]
548    fn test_model_path_valid() {
549        let path = "model.apr";
550        assert!(path.ends_with(".apr"));
551    }
552}"#
553                .into(),
554            ),
555            "whisper-apr" => Some(
556                r#"use whisper_apr::WhisperModel;
557
558// Load quantized Whisper model
559let model = WhisperModel::from_apr("whisper-base.apr")?;
560
561// Transcribe audio file
562let audio = std::fs::read("recording.wav")?;
563let result = model.transcribe(&audio)?;
564println!("Text: {}", result.text);
565
566// Streaming transcription
567// let stream = model.stream_transcribe(audio_stream)?;
568// while let Some(segment) = stream.next().await {
569//     println!("[{:.1}s] {}", segment.timestamp, segment.text);
570// }
571
572#[cfg(test)]
573mod tests {
574    #[test]
575    fn test_model_path_valid() {
576        let path = "whisper-base.apr";
577        assert!(path.ends_with(".apr"));
578    }
579
580    #[test]
581    fn test_transcription_produces_text() {
582        let text = "Hello world";
583        assert!(!text.is_empty());
584    }
585
586    #[test]
587    fn test_audio_bytes_valid_utf8() {
588        let text = "transcribed text";
589        assert!(std::str::from_utf8(text.as_bytes()).is_ok());
590    }
591}"#
592                .into(),
593            ),
594            "provable-contracts" => Some(
595                r"# Define YAML contract for a SIMD kernel
596# contracts/softmax_contract.yaml
597contract:
598  name: fused_softmax
599  module: trueno::kernels::softmax
600  preconditions:
601    - input.len() > 0
602    - input.len() % 8 == 0  # AVX2 alignment
603  postconditions:
604    - result.is_ok()
605    - output.iter().all(|x| (0.0..=1.0).contains(x))
606    - (output.iter().sum::<f32>() - 1.0).abs() < 1e-5
607
608# Generate Kani verification harness
609provable-contracts scaffold contracts/softmax_contract.yaml \
610    --output harnesses/softmax_harness.rs
611
612# Run bounded model checking
613provable-contracts verify harnesses/softmax_harness.rs \
614    --unwind 16 --solver cadical
615
616# Generate probar property tests from the same contract
617provable-contracts probar contracts/softmax_contract.yaml \
618    --output tests/softmax_props.rs"
619                    .into(),
620            ),
621            "tiny-model-ground-truth" => Some(
622                r#"# Generate oracle outputs from HuggingFace reference
623python -m tiny_model_ground_truth generate \
624    --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
625    --prompts "Hello" "The capital of France" \
626    --output oracle/
627
628# Validate realizar inference against oracle
629python -m tiny_model_ground_truth validate \
630    --oracle oracle/ \
631    --engine realizar \
632    --model model.apr \
633    --tolerance 1e-4
634
635# Check quantization drift (GGUF → APR → inference)
636python -m tiny_model_ground_truth drift \
637    --oracle oracle/ \
638    --gguf model.gguf \
639    --apr model.apr \
640    --report drift_report.html"#
641                    .into(),
642            ),
643            "rmedia" => Some(
644                r#"use rmedia::prelude::*;
645
646// Load MLT XML timeline
647let timeline = Timeline::from_mlt("course.mlt")?;
648
649// Render video (headless, 1.73x faster than melt)
650let job = RenderJob::new(&timeline)
651    .output("output.mp4")
652    .codec(Codec::H264 { crf: 23 })
653    .resolution(1920, 1080);
654job.render()?;
655
656// Course production pipeline
657// rmedia also supports:
658// - Subtitle burn-in: subtitle::burn_in(video, srt, output)
659// - Transitions: timeline.add_transition(Dissolve::new(1.0))
660// - Audio ducking: audio::duck(narration, background, ratio)
661
662#[cfg(test)]
663mod tests {
664    #[test]
665    fn test_render_job_output_path() {
666        let output = "output.mp4";
667        assert!(output.ends_with(".mp4"));
668    }
669
670    #[test]
671    fn test_crf_in_valid_range() {
672        let crf = 23;
673        assert!((0..=51).contains(&crf));
674    }
675
676    #[test]
677    fn test_resolution_1080p() {
678        let (w, h) = (1920, 1080);
679        assert_eq!(w * h, 2_073_600);
680    }
681}"#
682                .into(),
683            ),
684            "repartir" => Some(
685                r#"use repartir::{Pool, task::{Task, Backend}};
686
687// Create pool with CPU workers
688let pool = Pool::builder()
689    .cpu_workers(8)
690    .build()?;
691
692// Submit task for execution
693let task = Task::builder()
694    .binary("./worker")
695    .arg("--input").arg("data.csv")
696    .backend(Backend::Cpu)
697    .build()?;
698
699let result = pool.submit(task).await?;
700println!("Output: {}", result.stdout_str()?);
701
702// For multi-machine distribution:
703// use repartir::executor::remote::RemoteExecutor;
704// let remote = RemoteExecutor::builder()
705//     .add_worker("node1:9000")
706//     .add_worker("node2:9000")
707//     .build().await?;
708
709#[cfg(test)]
710mod tests {
711    #[test]
712    fn test_pool_builder_workers() {
713        let cpu_workers = 8;
714        assert!(cpu_workers > 0);
715    }
716
717    #[test]
718    fn test_task_binary_set() {
719        let binary = "./worker";
720        assert!(!binary.is_empty());
721    }
722
723    #[test]
724    fn test_backend_selection() {
725        let backend = "Cpu";
726        let valid = vec!["Cpu", "Gpu", "Remote"];
727        assert!(valid.contains(&backend));
728    }
729}"#
730                .into(),
731            ),
732            _ => None,
733        }
734    }
735
736    fn generate_related_queries(&self, parsed: &ParsedQuery) -> Vec<String> {
737        let mut related = Vec::new();
738
739        // Domain-based related queries
740        let domain_queries: &[(ProblemDomain, &[&str])] = &[
741            (
742                ProblemDomain::SupervisedLearning,
743                &[
744                    "How do I tune hyperparameters for this model?",
745                    "What's the best way to handle imbalanced data?",
746                ],
747            ),
748            (
749                ProblemDomain::PythonMigration,
750                &[
751                    "How do I convert numpy arrays to trueno tensors?",
752                    "What sklearn features are supported in aprender?",
753                ],
754            ),
755            (
756                ProblemDomain::Inference,
757                &[
758                    "How do I optimize for low latency?",
759                    "What model formats does realizar support?",
760                ],
761            ),
762            (
763                ProblemDomain::SpeechRecognition,
764                &[
765                    "How do I stream transcription in real-time?",
766                    "What quantization levels does whisper-apr support?",
767                ],
768            ),
769            (
770                ProblemDomain::MediaProduction,
771                &[
772                    "How do I render a course video from MLT XML?",
773                    "How do I integrate whisper-apr transcription with rmedia?",
774                ],
775            ),
776        ];
777        for (domain, queries) in domain_queries {
778            if parsed.domains.contains(domain) {
779                related.extend(queries.iter().map(|q| (*q).into()));
780            }
781        }
782
783        // Performance-hint-based related queries
784        if parsed.performance_hints.contains(&PerformanceHint::Distributed) {
785            related.push("How do I scale to multiple nodes?".into());
786            related.push("What's the communication overhead for distributed training?".into());
787        }
788
789        related.truncate(3);
790        related
791    }
792
793    /// Get capabilities of a component
794    pub fn get_capabilities(&self, component: &str) -> Vec<String> {
795        self.graph
796            .get_component(component)
797            .map(|c| c.capabilities.iter().map(|cap| cap.name.clone()).collect())
798            .unwrap_or_default()
799    }
800
801    /// Get integration pattern between two components
802    pub fn get_integration(&self, from: &str, to: &str) -> Option<IntegrationPattern> {
803        self.graph.get_integration(from, to).cloned()
804    }
805
806    /// List all available components
807    pub fn list_components(&self) -> Vec<String> {
808        self.graph.component_names().cloned().collect()
809    }
810
811    /// Get component details
812    pub fn get_component(&self, name: &str) -> Option<&StackComponent> {
813        self.graph.get_component(name)
814    }
815}
816
817/// Format large numbers for display
818fn format_number(n: u64) -> String {
819    if n >= 1_000_000_000 {
820        format!("{}B", n / 1_000_000_000)
821    } else if n >= 1_000_000 {
822        format!("{}M", n / 1_000_000)
823    } else if n >= 1_000 {
824        format!("{}K", n / 1_000)
825    } else {
826        n.to_string()
827    }
828}
829
830// =============================================================================
831// Tests
832// =============================================================================
833
834#[cfg(test)]
835#[path = "recommender_tests.rs"]
836mod tests;
batuta/oracle/recommender.rs

batuta/oracle/
recommender.rs