1#![warn(missing_docs)]
42#![warn(clippy::all)]
43
44pub mod adapter_manager;
45pub mod autodetect;
46pub mod backends;
47pub mod bitnet;
48pub mod capabilities;
49pub mod claude_flow;
50pub mod context;
51pub mod error;
52pub mod evaluation;
53pub mod gguf;
54pub mod hub;
55pub mod intelligence;
56pub mod kernels;
57pub mod kv_cache;
58pub mod lora;
59pub mod memory_pool;
60#[cfg(all(target_os = "macos", feature = "metal-compute"))]
61pub mod metal;
62pub mod models;
63pub mod optimization;
64pub mod paged_attention;
65pub mod policy_store;
66pub mod quality;
67pub mod quantize;
68pub mod reasoning_bank;
69pub mod reflection;
70pub mod ruvector_integration;
71pub mod serving;
72pub mod session;
73pub mod session_index;
74pub mod sona;
75pub mod speculative;
76pub mod tokenizer;
77pub mod training;
78pub mod types;
79pub mod witness_log;
80
81#[cfg(test)]
83mod tests;
84
85pub use adapter_manager::{AdapterConfig, AdapterManager, LoraAdapter};
87pub use autodetect::{
88 Architecture, ComputeBackend, CoreInfo, CpuFeatures, GpuBackend, GpuCapabilities,
89 InferenceConfig, Platform, SystemCapabilities,
90};
91#[cfg(feature = "candle")]
92pub use backends::CandleBackend;
93pub use backends::{
94 create_backend, DType, DeviceType, GenerateParams, GeneratedToken, LlmBackend,
95 ModelArchitecture, ModelConfig, ModelInfo, Quantization, SharedBackend, SpecialTokens,
96 StreamEvent, TokenStream, Tokenizer,
97};
98#[cfg(feature = "async-runtime")]
99pub use backends::{AsyncTokenStream, LlmBackendAsync};
100pub use claude_flow::{
101 AgentContext,
102 AgentCoordinator,
103 AgentRouter,
104 AgentState,
105 AgentType,
106 AnalyzerStats as ModelAnalyzerStats,
107 ClassificationResult,
108 ClaudeFlowAgent,
109 ClaudeFlowTask,
110 ClaudeModel,
112 ClaudeRequest,
113 ClaudeResponse,
114 ComplexityFactors,
116 ComplexityScore,
117 ComplexityWeights,
118 ContentBlock,
119 ContextManager,
120 ContextWindow,
121 CoordinatorStats,
122 CostEstimator,
123 FlowOptimizer,
124 HnswDistanceMetric,
125 HnswRouter,
127 HnswRouterConfig,
128 HnswRouterStats,
129 HnswRoutingResult,
130 HooksConfig,
131 HooksIntegration,
133 HybridRouter,
134 LatencySample,
135 LatencyStats as ClaudeLatencyStats,
136 LatencyTracker,
137 LearningMetrics,
138 Message,
139 MessageRole,
140 ModelRouter,
141 ModelRoutingDecision,
142 ModelSelector,
143 OptimizationConfig,
144 OptimizationResult,
145 PatternMatch,
146 PostEditInput,
147 PostEditResult,
148 PostTaskInput,
149 PostTaskResult,
150 PreEditInput,
151 PreEditResult,
152 PreTaskInput,
153 PreTaskResult,
154 QualityAssessment,
155 QualityMonitor,
156 ResponseStreamer,
157 RoutingDecision as AgentRoutingDecision,
158 SelectionCriteria,
159 SelectorStats,
160 SessionEndResult,
161 SessionMetrics,
162 SessionState as HooksSessionState,
163 StepResult,
164 StreamEvent as ClaudeStreamEvent,
165 StreamStats,
166 StreamToken,
167 TaskClassifier,
168 TaskComplexityAnalyzer,
169 TaskPattern,
170 TaskType,
171 UsageStats,
172 WorkflowResult,
173 WorkflowStep,
174};
175pub use error::{Result, RuvLLMError};
176pub use gguf::{
177 GgufFile,
178 GgufHeader,
179 GgufLoader,
181 GgufModelLoader,
182 GgufQuantType,
183 GgufValue,
184 LayerWeights,
185 LoadConfig,
186 LoadProgress,
187 LoadedTensor,
188 LoadedWeights,
189 ModelConfig as GgufModelConfig,
190 ModelInitializer,
191 ModelWeights,
192 ProgressModelBuilder,
193 QuantizedTensor,
194 QuantizedWeight,
195 StreamingLoader,
196 TensorCategory,
197 TensorInfo,
198 TensorNameMapper,
199 WeightTensor,
200};
201pub use hub::{
202 default_cache_dir,
203 get_hf_token,
204 get_model_info,
205 ChecksumVerifier,
206 DatasetInfo,
207 DownloadConfig,
208 DownloadError,
209 DownloadProgress,
210 Framework,
211 HardwareRequirements,
212 HubError,
214 License,
215 MetricResult,
216 ModelCard,
218 ModelCardBuilder,
219 ModelDownloader,
221 ModelInfo as HubModelInfo,
222 ModelMetadata,
223 ModelSize,
224 ModelUploader,
226 MultiProgress,
227 ProgressBar,
229 ProgressCallback,
230 ProgressIndicator,
231 ProgressStyle,
232 QuantizationLevel,
233 RuvLtraRegistry,
235 TaskType as HubTaskType,
236 UploadConfig,
237 UploadError,
238 UploadProgress,
239};
240pub use kv_cache::{
241 CacheQuantization, CacheTier, KvCacheConfig, KvCacheStats, PooledKvBlock, PooledKvCache,
242 PooledKvCacheStats, TwoTierKvCache,
243};
244pub use lora::{
245 AdaptFeedback, AdapterComposer, AdapterPool, AdapterRegistry, CompositionStrategy,
246 EwcRegularizer, LearningRateSchedule, MicroLoRA, MicroLoraConfig, TargetModule, TrainingConfig,
247 TrainingPipeline,
248};
249pub use memory_pool::{
250 ArenaStats, BufferPool, BufferPoolStats, BufferSize, InferenceArena, MemoryManager,
251 MemoryManagerConfig, MemoryManagerStats, PooledBuffer, ScratchSpace, ScratchSpaceManager,
252 ScratchStats, CACHE_LINE_SIZE, DEFAULT_ALIGNMENT,
253};
254pub use optimization::{
255 AdaptationResult, BatchSizeStrategy, ConsolidationStrategy, InferenceMetrics,
256 KvCachePressurePolicy, LatencyHistogram, LearningLoopStats, MetricsCollector, MetricsSnapshot,
257 MovingAverage, OptimizationDecision, OptimizationTrigger, RealtimeConfig, RealtimeOptimizer,
258 SonaLlm, SonaLlmConfig, SpeculativeConfig, TokenBudgetAllocation, TrainingSample,
259};
260pub use paged_attention::{PageBlock, PageTable, PagedAttention, PagedAttentionConfig};
261pub use policy_store::{PolicyEntry, PolicyStore, PolicyType, QuantizationPolicy, RouterPolicy};
262pub use quantize::{
263 dequantize_for_ane,
264 estimate_memory_q4,
266 estimate_memory_q5,
267 estimate_memory_q8,
268 quantize_ruvltra_q4,
270 quantize_ruvltra_q5,
271 quantize_ruvltra_q8,
272 MemoryEstimate,
273 Q4KMBlock,
275 Q5KMBlock,
276 Q8Block,
277 QuantConfig,
278 QuantProgress,
280 QuantStats,
281 RuvltraQuantizer,
283 TargetFormat,
284};
285pub use serving::{
286 BatchStats,
287 BatchedRequest,
289 CompletedRequest,
290 ContinuousBatchScheduler,
292 DecodeTask,
293 FinishReason,
294 GenerationResult,
295 InferenceRequest,
297 IterationPlan,
298 IterationScheduler,
299 KvCacheAllocation,
300 KvCacheManager,
302 KvCacheManagerStats,
303 KvCachePoolConfig,
304 PreemptionMode,
305 PrefillTask,
306 Priority,
307 PriorityPolicy,
308 RequestId,
309 RequestQueue,
310 RequestState,
311 RunningRequest,
312 ScheduledBatch,
313 SchedulerConfig,
314 SchedulerStats,
315 ServingEngine,
317 ServingEngineConfig,
318 ServingMetrics,
319 TokenBudget,
320 TokenOutput,
321};
322pub use session::{Session, SessionConfig, SessionManager};
323pub use session_index::{KvCacheReference, SessionIndex, SessionState};
324pub use sona::{LearningLoop, SonaConfig, SonaIntegration};
325pub use speculative::{
326 log_softmax, sample_from_probs, softmax, top_k_filter, top_p_filter, AtomicSpeculativeStats,
327 SpeculationTree, SpeculativeConfig as SpeculativeDecodingConfig, SpeculativeDecoder,
328 SpeculativeStats, TreeNode, VerificationResult,
329};
330pub use tokenizer::{
331 ChatMessage, ChatTemplate, Role, RuvTokenizer, StreamingDecodeBuffer, TokenizerSpecialTokens,
332};
333pub use training::{
334 AugmentationConfig,
335 ClaudeTaskDataset,
337 ClaudeTaskExample,
338 ComplexityLevel,
339 DatasetConfig,
340 DatasetGenerator,
341 DatasetStats,
342 DifficultyLevel,
343 DifficultyWeights,
344 DomainType,
345 EvaluationMetrics,
346 GrpoBatch,
347 GrpoConfig,
349 GrpoOptimizer,
350 GrpoSample,
351 GrpoStats,
352 GrpoUpdateResult,
353 McpToolCategory,
354 McpToolDef,
355 McpToolTrainer,
357 McpTrainingConfig,
358 ParamType,
359 SampleGroup,
360 StepBuilder,
361 TaskCategory,
362 TaskMetadata,
363 ToolCallDataset,
365 ToolCallExample,
366 ToolDatasetConfig,
367 ToolDatasetStats,
368 ToolParam,
369 ToolTrajectory,
370 TrainingCheckpoint,
371 TrainingResult,
372 TrainingStats,
373 TrajectoryBuilder,
374 TrajectoryMetadata,
375 TrajectoryStep,
376};
377pub use types::*;
378pub use witness_log::{
379 AsyncWriteConfig, LatencyBreakdown, RoutingDecision, WitnessEntry, WitnessLog, WitnessLogStats,
380};
381
382pub use models::{
384 AneDispatcher,
385 AneOptimization,
386 MemoryLayout,
387 QuantizationType,
388 RuvLtraAttention,
389 RuvLtraConfig,
391 RuvLtraDecoderLayer,
392 RuvLtraMLP,
393 RuvLtraModel,
395 RuvLtraModelInfo,
397};
398
399pub use capabilities::{
401 gate_feature, gate_feature_or, RuvectorCapabilities, ATTENTION_AVAILABLE, GNN_AVAILABLE,
402 GRAPH_AVAILABLE, HNSW_AVAILABLE, PARALLEL_AVAILABLE, SIMD_AVAILABLE, SONA_AVAILABLE,
403};
404pub use ruvector_integration::{
405 IndexStats,
406 IntegrationConfig,
407 IntegrationStats,
408 IntelligenceLayer,
410 IntelligenceLayerStats,
411 IntelligentRoutingDecision,
412 RuvectorIntegration,
414 SearchResultWithMetadata,
415 UnifiedIndex,
417 VectorMetadata,
418};
419
420pub use intelligence::{
422 FileSignalProvider, HumanVerdict, IntelligenceLoader, IntelligenceProvider, Outcome,
423 ProviderError, ProviderQualityWeights, ProviderResult, QualityFactors, QualitySignal,
424};
425
426pub use quality::{
428 CoherenceConfig,
429 CoherenceValidator,
431 CoherenceViolation,
432 CombinedValidator,
433 ComparisonResult,
434 ContradictionResult,
435 DiversificationSuggestion,
436 DiversityAnalyzer,
438 DiversityConfig,
439 DiversityResult,
440 FormatValidator,
441 ImprovementRecommendation,
442 JsonSchemaValidator,
443 LogicalFlowResult,
444 ModeCollapseResult,
445 QualityDimension,
446 QualityHistory,
447 QualityMetrics,
449 QualityScoringEngine,
451 QualitySummary,
452 QualityWeights,
453 RangeValidator,
454 SchemaValidator,
456 ScoringConfig,
457 ScoringContext,
458 SemanticConsistencyResult,
459 TrendAnalysis,
460 TrendDirection,
461 TypeValidator,
462 ValidationCombinator,
463 ValidationError,
464 ValidationResult,
465};
466
467pub use context::{
469 AgenticMemory,
471 AgenticMemoryConfig,
472 AttentionWeights,
473 CacheStats,
474 CachedToolResult,
475 ClaudeFlowBridgeConfig,
476 ClaudeFlowMemoryBridge,
478 CompressedEpisode,
479 ContextElement,
480 ContextManagerConfig,
481 ElementPriority,
482 Episode,
483 EpisodeMetadata,
484 EpisodeTrajectory,
485 EpisodicMemory,
487 EpisodicMemoryConfig,
488 IntelligentContextManager,
490 MemoryType,
491 PreparedContext,
492 PriorityScorer,
493 ScratchpadEntry,
494 SemanticCacheConfig,
495 SemanticToolCache,
497 SyncResult,
498 TaskContext,
499 WorkingMemory,
501 WorkingMemoryConfig,
502};
503
504pub use reflection::{
506 BaseAgent,
507 CompletenessChecker,
508 ConfidenceCheckRecord,
509 ConfidenceChecker,
511 ConfidenceConfig,
512 ConfidenceFactorWeights,
513 ConfidenceLevel,
514 ConsistencyChecker,
515 CorrectnessChecker,
516 CritiqueIssue,
517 CritiqueResult,
518 ErrorCategory,
519 ErrorCluster,
520 ErrorLearnerStats,
521 ErrorPattern,
522 ErrorPatternLearner,
524 ErrorPatternLearnerConfig,
525 ExecutionContext,
526 ExecutionResult,
527 IssueCategory,
528 Perspective,
530 PerspectiveConfig,
531 PreviousAttempt,
532 RecoveryOutcome,
533 RecoveryStrategy,
534 RecoverySuggestion,
535 Reflection,
536 ReflectionConfig,
537 ReflectionStrategy,
538 ReflectiveAgent,
540 ReflectiveAgentStats,
541 RetryConfig,
542 RevisionResult,
543 SimilarError,
544 UnifiedCritique,
545 WeakPoint,
546 WeaknessType,
547};
548
549pub use reasoning_bank::{
551 CompressedTrajectory,
552 ConsolidationConfig,
553 DistillationConfig,
554 FailurePattern as VerdictFailurePattern,
555 FisherInformation,
556 ImportanceScore,
557 KeyLesson,
558 MemoryDistiller,
560 Pattern,
561 PatternCategory,
562 PatternConsolidator,
564 PatternSearchResult,
565 PatternStats,
566 PatternStore,
568 PatternStoreConfig,
569 ReasoningBank,
571 ReasoningBankConfig,
572 ReasoningBankStats,
573 RecoveryStrategy as VerdictRecoveryStrategy,
574 RootCause,
575 StepOutcome,
576 Trajectory as ReasoningTrajectory,
578 TrajectoryId,
579 TrajectoryRecorder,
580 TrajectoryStep as ReasoningTrajectoryStep,
581 Verdict as ReasoningVerdict,
583 VerdictAnalyzer,
584};
585
586#[cfg(all(target_os = "macos", feature = "metal-compute"))]
588pub use metal::{
589 get_device_info, is_metal_available, shader_source, tile_sizes, AttentionParams, GemmParams,
590 MetalBuffer, MetalBufferPool, MetalConfig, MetalContext, MetalDeviceInfo, MetalPipelines,
591 NormParams, RopeParams,
592};
593
594#[derive(Debug, Clone)]
620pub struct RuvLLMConfig {
621 pub storage_path: String,
623 pub paged_attention: PagedAttentionConfig,
625 pub kv_cache: KvCacheConfig,
627 pub session: SessionConfig,
629 pub sona: SonaConfig,
631 pub max_sessions: usize,
633 pub embedding_dim: usize,
635}
636
637impl Default for RuvLLMConfig {
638 fn default() -> Self {
639 Self {
640 storage_path: ".ruvllm".to_string(),
641 paged_attention: PagedAttentionConfig::default(),
642 kv_cache: KvCacheConfig::default(),
643 session: SessionConfig::default(),
644 sona: SonaConfig::default(),
645 max_sessions: 1000,
646 embedding_dim: 768,
647 }
648 }
649}
650
651pub struct RuvLLMEngine {
699 config: RuvLLMConfig,
701 policy_store: PolicyStore,
703 session_manager: SessionManager,
705 session_index: SessionIndex,
707 adapter_manager: AdapterManager,
709 witness_log: WitnessLog,
711 sona: SonaIntegration,
713}
714
715impl RuvLLMEngine {
716 pub fn new(config: RuvLLMConfig) -> Result<Self> {
740 let storage_path = &config.storage_path;
741
742 let policy_store =
743 PolicyStore::new(&format!("{}/policies", storage_path), config.embedding_dim)?;
744
745 let session_index =
746 SessionIndex::new(&format!("{}/sessions", storage_path), config.embedding_dim)?;
747
748 let witness_log =
749 WitnessLog::new(&format!("{}/witness", storage_path), config.embedding_dim)?;
750
751 let session_manager = SessionManager::new(config.session.clone());
752 let adapter_manager = AdapterManager::new();
753 let sona = SonaIntegration::new(config.sona.clone());
754
755 Ok(Self {
756 config,
757 policy_store,
758 session_manager,
759 session_index,
760 adapter_manager,
761 witness_log,
762 sona,
763 })
764 }
765
766 pub fn create_session(&self, user_id: Option<&str>) -> Result<Session> {
791 let session = self.session_manager.create_session(user_id)?;
792
793 let state = SessionState::from_session(&session);
795 self.session_index.store(&state)?;
796
797 Ok(session)
798 }
799
800 pub fn get_session(&self, session_id: &str) -> Result<Option<Session>> {
802 self.session_manager.get_session(session_id)
803 }
804
805 pub fn search_policies(
831 &self,
832 context_embedding: &[f32],
833 limit: usize,
834 ) -> Result<Vec<PolicyEntry>> {
835 self.policy_store.search(context_embedding, limit)
836 }
837
838 pub fn record_witness(&self, entry: WitnessEntry) -> Result<()> {
868 self.witness_log.record(entry)
869 }
870
871 pub fn search_witness(
873 &self,
874 query_embedding: &[f32],
875 limit: usize,
876 ) -> Result<Vec<WitnessEntry>> {
877 self.witness_log.search(query_embedding, limit)
878 }
879
880 pub fn sona(&self) -> &SonaIntegration {
882 &self.sona
883 }
884
885 pub fn adapters(&self) -> &AdapterManager {
887 &self.adapter_manager
888 }
889
890 pub fn policies(&self) -> &PolicyStore {
892 &self.policy_store
893 }
894}