#![allow(missing_docs)]
#![warn(clippy::all)]
#![allow(clippy::incompatible_msrv)]
#![allow(clippy::too_many_arguments)]
#![allow(clippy::type_complexity)]
#![allow(clippy::manual_div_ceil)]
#![allow(clippy::derivable_impls)]
#![allow(clippy::excessive_precision)]
#![allow(clippy::vec_init_then_push)]
#![allow(clippy::needless_borrows_for_generic_args)]
#![allow(clippy::unnecessary_map_or)]
#![allow(clippy::needless_range_loop)]
#![allow(clippy::field_reassign_with_default)]
#![allow(clippy::manual_range_contains)]
#![allow(clippy::approx_constant)]
#![allow(clippy::useless_vec)]
#![allow(clippy::redundant_closure)]
#![allow(clippy::len_zero)]
#![allow(clippy::single_char_add_str)]
#![allow(clippy::collapsible_if)]
#![allow(clippy::double_ended_iterator_last)]
#![allow(clippy::manual_clamp)]
#![allow(clippy::len_without_is_empty)]
#![allow(clippy::clone_on_copy)]
#![allow(clippy::map_flatten)]
#![allow(clippy::manual_inspect)]
#![allow(clippy::useless_format)]
#![allow(clippy::needless_borrow)]
#![allow(clippy::return_self_not_must_use)]
#![allow(clippy::manual_strip)]
#![allow(clippy::identity_op)]
#![allow(clippy::should_implement_trait)]
#![allow(clippy::missing_const_for_thread_local)]
#![allow(clippy::manual_range_patterns)]
#![allow(clippy::question_mark)]
#![allow(clippy::let_and_return)]
#![allow(clippy::cast_lossless)]
#![allow(clippy::manual_map)]
#![allow(clippy::map_entry)]
#![allow(clippy::same_item_push)]
#![allow(clippy::or_fun_call)]
#![allow(clippy::unnecessary_cast)]
#![allow(clippy::implicit_saturating_sub)]
#![allow(clippy::ref_as_ptr)]
#![allow(clippy::multiple_bound_locations)]
#![allow(non_camel_case_types)]
#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(dead_code)]
#![allow(unused_mut)]
#![allow(mismatched_lifetime_syntaxes)]
#![allow(unreachable_code)]
#![allow(unused_assignments)]
#![allow(unused_must_use)]
#![allow(clippy::module_inception)]
#![allow(clippy::items_after_test_module)]
#![allow(clippy::new_without_default)]
#![allow(clippy::inherent_to_string)]
#![allow(clippy::manual_is_ascii_check)]
#![allow(private_interfaces)]
#![allow(unexpected_cfgs)]
#![allow(unused_doc_comments)]
#![allow(clippy::assign_op_pattern)]
#![allow(clippy::cast_slice_from_raw_parts)]
#![allow(clippy::cloned_ref_to_slice_refs)]
#![allow(clippy::double_comparisons)]
#![allow(clippy::for_kv_map)]
#![allow(clippy::manual_pattern_char_comparison)]
#![allow(clippy::mut_from_ref)]
#![allow(clippy::needless_question_mark)]
#![allow(clippy::unnecessary_unwrap)]
#![allow(clippy::needless_return)]
#![allow(clippy::missing_safety_doc)]
#![allow(clippy::unwrap_or_default)]
#![allow(clippy::assertions_on_constants)]
#![allow(clippy::if_same_then_else)]
pub mod adapter_manager;
pub mod autodetect;
pub mod backends;
pub mod bitnet;
pub mod capabilities;
pub mod claude_flow;
pub mod context;
pub mod error;
pub mod evaluation;
pub mod gguf;
pub mod hub;
pub mod intelligence;
pub mod kernels;
pub mod kv_cache;
pub mod lora;
pub mod memory_pool;
#[cfg(all(target_os = "macos", feature = "metal-compute"))]
pub mod metal;
pub mod models;
pub mod moe;
pub mod optimization;
pub mod paged_attention;
pub mod policy_store;
pub mod qat;
pub mod quality;
#[cfg(feature = "quantize")]
pub mod quantize;
pub mod reasoning_bank;
pub mod reflection;
pub mod ruvector_integration;
pub mod serving;
pub mod session;
pub mod session_index;
pub mod sona;
pub mod speculative;
pub mod tokenizer;
pub mod training;
pub mod types;
pub mod witness_log;
#[cfg(test)]
mod tests;
pub use adapter_manager::{AdapterConfig, AdapterManager, LoraAdapter};
pub use autodetect::{
Architecture, ComputeBackend, CoreInfo, CpuFeatures, GpuBackend, GpuCapabilities,
InferenceConfig, Platform, SystemCapabilities,
};
#[cfg(feature = "candle")]
pub use backends::CandleBackend;
pub use backends::{
create_backend, DType, DeviceType, GenerateParams, GeneratedToken, LlmBackend,
ModelArchitecture, ModelConfig, ModelInfo, Quantization, SharedBackend, SpecialTokens,
StreamEvent, TokenStream, Tokenizer,
};
#[cfg(feature = "async-runtime")]
pub use backends::{AsyncTokenStream, LlmBackendAsync};
pub use claude_flow::{
AgentContext,
AgentCoordinator,
AgentRouter,
AgentState,
AgentType,
AnalyzerStats as ModelAnalyzerStats,
ClassificationResult,
ClaudeFlowAgent,
ClaudeFlowTask,
ClaudeModel,
ClaudeRequest,
ClaudeResponse,
ComplexityFactors,
ComplexityScore,
ComplexityWeights,
ContentBlock,
ContextManager,
ContextWindow,
CoordinatorStats,
CostEstimator,
FlowOptimizer,
HnswDistanceMetric,
HnswRouter,
HnswRouterConfig,
HnswRouterStats,
HnswRoutingResult,
HooksConfig,
HooksIntegration,
HybridRouter,
LatencySample,
LatencyStats as ClaudeLatencyStats,
LatencyTracker,
LearningMetrics,
Message,
MessageRole,
ModelRouter,
ModelRoutingDecision,
ModelSelector,
OptimizationConfig,
OptimizationResult,
PatternMatch,
PostEditInput,
PostEditResult,
PostTaskInput,
PostTaskResult,
PreEditInput,
PreEditResult,
PreTaskInput,
PreTaskResult,
QualityAssessment,
QualityMonitor,
ResponseStreamer,
RoutingDecision as AgentRoutingDecision,
SelectionCriteria,
SelectorStats,
SessionEndResult,
SessionMetrics,
SessionState as HooksSessionState,
StepResult,
StreamEvent as ClaudeStreamEvent,
StreamStats,
StreamToken,
TaskClassifier,
TaskComplexityAnalyzer,
TaskPattern,
TaskType,
UsageStats,
WorkflowResult,
WorkflowStep,
};
pub use error::{Result, RuvLLMError};
pub use gguf::{
GgufFile,
GgufHeader,
GgufLoader,
GgufModelLoader,
GgufQuantType,
GgufValue,
LayerWeights,
LoadConfig,
LoadProgress,
LoadedTensor,
LoadedWeights,
ModelConfig as GgufModelConfig,
ModelInitializer,
ModelWeights,
ProgressModelBuilder,
QuantizedTensor,
QuantizedWeight,
StreamingLoader,
TensorCategory,
TensorInfo,
TensorNameMapper,
WeightTensor,
};
pub use hub::{
default_cache_dir,
get_hf_token,
get_model_info,
ChecksumVerifier,
DatasetInfo,
DownloadConfig,
DownloadError,
DownloadProgress,
Framework,
HardwareRequirements,
HubError,
License,
MetricResult,
ModelCard,
ModelCardBuilder,
ModelDownloader,
ModelInfo as HubModelInfo,
ModelMetadata,
ModelSize,
ModelUploader,
MultiProgress,
ProgressBar,
ProgressCallback,
ProgressIndicator,
ProgressStyle,
QuantizationLevel,
RuvLtraRegistry,
TaskType as HubTaskType,
UploadConfig,
UploadError,
UploadProgress,
};
pub use kv_cache::{
CacheQuantization, CacheTier, KvCacheConfig, KvCacheStats, PooledKvBlock, PooledKvCache,
PooledKvCacheStats, TwoTierKvCache,
};
pub use lora::{
AdaptFeedback, AdapterComposer, AdapterPool, AdapterRegistry, CompositionStrategy,
EwcRegularizer, LearningRateSchedule, MicroLoRA, MicroLoraConfig, TargetModule, TrainingConfig,
TrainingPipeline,
};
pub use memory_pool::{
ArenaStats, BufferPool, BufferPoolStats, BufferSize, InferenceArena, MemoryManager,
MemoryManagerConfig, MemoryManagerStats, PooledBuffer, ScratchSpace, ScratchSpaceManager,
ScratchStats, CACHE_LINE_SIZE, DEFAULT_ALIGNMENT,
};
pub use moe::{
AffinityConfig, ExpertAffinity, ExpertId, ExpertPrecision, MoeMetrics, MoeMetricsSummary,
PrecisionAllocator, PrecisionConfig,
};
pub use optimization::{
AdaptationResult, BatchSizeStrategy, ConsolidationStrategy, InferenceMetrics,
KvCachePressurePolicy, LatencyHistogram, LearningLoopStats, MetricsCollector, MetricsSnapshot,
MovingAverage, OptimizationDecision, OptimizationTrigger, RealtimeConfig, RealtimeOptimizer,
SonaLlm, SonaLlmConfig, SpeculativeConfig, TokenBudgetAllocation, TrainingSample,
};
pub use paged_attention::{PageBlock, PageTable, PagedAttention, PagedAttentionConfig};
pub use policy_store::{PolicyEntry, PolicyStore, PolicyType, QuantizationPolicy, RouterPolicy};
pub use qat::{
create_quantizer, piq2_quantizer, piq3_quantizer, uniform_quantizer, DifferentiableQuantizer,
PiQuantDifferentiable, QatConfig, QatLossWeights, QuantGranularity, SteVariant,
UniformQuantizer, DEFAULT_BITS, DEFAULT_QAT_LR, MAX_BITS, MIN_BITS,
};
#[cfg(feature = "quantize")]
pub use quantize::{
apply_incoherence,
dequantize_for_ane,
estimate_memory_q4,
estimate_memory_q5,
estimate_memory_q8,
hadamard_batch_inverse,
hadamard_batch_transform,
log2_exact,
next_power_of_2,
pad_to_power_of_2,
quantize_ruvltra_q4,
quantize_ruvltra_q5,
quantize_ruvltra_q8,
restore_incoherence,
HadamardTransform,
IncoherenceConfig,
IncoherenceEvent,
IncoherencePhase,
IncoherenceStats,
IncoherenceTransform,
MemoryEstimate,
Q4KMBlock,
Q5KMBlock,
Q8Block,
QuantConfig,
QuantProgress,
QuantStats,
RuvltraQuantizer,
TargetFormat,
MAX_LOG_DIM,
SIMD_LANES,
};
pub use serving::{
BatchStats,
BatchedRequest,
CompletedRequest,
ContinuousBatchScheduler,
DecodeTask,
FinishReason,
GenerationResult,
InferenceRequest,
IterationPlan,
IterationScheduler,
KvCacheAllocation,
KvCacheManager,
KvCacheManagerStats,
KvCachePoolConfig,
PreemptionMode,
PrefillTask,
Priority,
PriorityPolicy,
RequestId,
RequestQueue,
RequestState,
RunningRequest,
ScheduledBatch,
SchedulerConfig,
SchedulerStats,
ServingEngine,
ServingEngineConfig,
ServingMetrics,
TokenBudget,
TokenOutput,
};
pub use session::{Session, SessionConfig, SessionManager};
pub use session_index::{KvCacheReference, SessionIndex, SessionState};
pub use sona::{LearningLoop, SonaConfig, SonaIntegration};
pub use speculative::{
log_softmax, sample_from_probs, softmax, top_k_filter, top_p_filter, AtomicSpeculativeStats,
SpeculationTree, SpeculativeConfig as SpeculativeDecodingConfig, SpeculativeDecoder,
SpeculativeStats, TreeNode, VerificationResult,
};
pub use tokenizer::{
ChatMessage, ChatTemplate, Role, RuvTokenizer, StreamingDecodeBuffer, TokenizerSpecialTokens,
};
pub use training::{
AugmentationConfig,
ClaudeTaskDataset,
ClaudeTaskExample,
ComplexityLevel,
DatasetConfig,
DatasetGenerator,
DatasetStats,
DifficultyLevel,
DifficultyWeights,
DomainType,
EvaluationMetrics,
GrpoBatch,
GrpoConfig,
GrpoOptimizer,
GrpoSample,
GrpoStats,
GrpoUpdateResult,
McpToolCategory,
McpToolDef,
McpToolTrainer,
McpTrainingConfig,
ParamType,
SampleGroup,
StepBuilder,
TaskCategory,
TaskMetadata,
ToolCallDataset,
ToolCallExample,
ToolDatasetConfig,
ToolDatasetStats,
ToolParam,
ToolTrajectory,
TrainingCheckpoint,
TrainingResult,
TrainingStats,
TrajectoryBuilder,
TrajectoryMetadata,
TrajectoryStep,
};
pub use types::*;
pub use witness_log::{
AsyncWriteConfig, LatencyBreakdown, RoutingDecision, WitnessEntry, WitnessLog, WitnessLogStats,
};
pub use models::{
AneDispatcher,
AneOptimization,
MemoryLayout,
QuantizationType,
RuvLtraAttention,
RuvLtraConfig,
RuvLtraDecoderLayer,
RuvLtraMLP,
RuvLtraModel,
RuvLtraModelInfo,
};
pub use capabilities::{
gate_feature, gate_feature_or, RuvectorCapabilities, ATTENTION_AVAILABLE, GNN_AVAILABLE,
GRAPH_AVAILABLE, HNSW_AVAILABLE, PARALLEL_AVAILABLE, SIMD_AVAILABLE, SONA_AVAILABLE,
};
pub use ruvector_integration::{
IndexStats,
IntegrationConfig,
IntegrationStats,
IntelligenceLayer,
IntelligenceLayerStats,
IntelligentRoutingDecision,
RuvectorIntegration,
SearchResultWithMetadata,
UnifiedIndex,
VectorMetadata,
};
pub use intelligence::{
FileSignalProvider, HumanVerdict, IntelligenceLoader, IntelligenceProvider, Outcome,
ProviderError, ProviderQualityWeights, ProviderResult, QualityFactors, QualitySignal,
};
pub use quality::{
CoherenceConfig,
CoherenceValidator,
CoherenceViolation,
CombinedValidator,
ComparisonResult,
ContradictionResult,
DiversificationSuggestion,
DiversityAnalyzer,
DiversityConfig,
DiversityResult,
FormatValidator,
ImprovementRecommendation,
JsonSchemaValidator,
LogicalFlowResult,
ModeCollapseResult,
QualityDimension,
QualityHistory,
QualityMetrics,
QualityScoringEngine,
QualitySummary,
QualityWeights,
RangeValidator,
SchemaValidator,
ScoringConfig,
ScoringContext,
SemanticConsistencyResult,
TrendAnalysis,
TrendDirection,
TypeValidator,
ValidationCombinator,
ValidationError,
ValidationResult,
};
pub use context::{
AgenticMemory,
AgenticMemoryConfig,
AttentionWeights,
CacheStats,
CachedToolResult,
ClaudeFlowBridgeConfig,
ClaudeFlowMemoryBridge,
CompressedEpisode,
ContextElement,
ContextManagerConfig,
ElementPriority,
Episode,
EpisodeMetadata,
EpisodeTrajectory,
EpisodicMemory,
EpisodicMemoryConfig,
IntelligentContextManager,
MemoryType,
PreparedContext,
PriorityScorer,
ScratchpadEntry,
SemanticCacheConfig,
SemanticToolCache,
SyncResult,
TaskContext,
WorkingMemory,
WorkingMemoryConfig,
};
pub use reflection::{
BaseAgent,
CompletenessChecker,
ConfidenceCheckRecord,
ConfidenceChecker,
ConfidenceConfig,
ConfidenceFactorWeights,
ConfidenceLevel,
ConsistencyChecker,
CorrectnessChecker,
CritiqueIssue,
CritiqueResult,
ErrorCategory,
ErrorCluster,
ErrorLearnerStats,
ErrorPattern,
ErrorPatternLearner,
ErrorPatternLearnerConfig,
ExecutionContext,
ExecutionResult,
IssueCategory,
Perspective,
PerspectiveConfig,
PreviousAttempt,
RecoveryOutcome,
RecoveryStrategy,
RecoverySuggestion,
Reflection,
ReflectionConfig,
ReflectionStrategy,
ReflectiveAgent,
ReflectiveAgentStats,
RetryConfig,
RevisionResult,
SimilarError,
UnifiedCritique,
WeakPoint,
WeaknessType,
};
pub use reasoning_bank::{
CompressedTrajectory,
ConsolidationConfig,
DistillationConfig,
FailurePattern as VerdictFailurePattern,
FisherInformation,
ImportanceScore,
KeyLesson,
MemoryDistiller,
Pattern,
PatternCategory,
PatternConsolidator,
PatternSearchResult,
PatternStats,
PatternStore,
PatternStoreConfig,
ReasoningBank,
ReasoningBankConfig,
ReasoningBankStats,
RecoveryStrategy as VerdictRecoveryStrategy,
RootCause,
StepOutcome,
Trajectory as ReasoningTrajectory,
TrajectoryId,
TrajectoryRecorder,
TrajectoryStep as ReasoningTrajectoryStep,
Verdict as ReasoningVerdict,
VerdictAnalyzer,
};
#[cfg(all(target_os = "macos", feature = "metal-compute"))]
pub use metal::{
get_device_info, is_metal_available, shader_source, tile_sizes, AttentionParams, GemmParams,
MetalBuffer, MetalBufferPool, MetalConfig, MetalContext, MetalDeviceInfo, MetalPipelines,
NormParams, RopeParams,
};
#[derive(Debug, Clone)]
pub struct RuvLLMConfig {
pub storage_path: String,
pub paged_attention: PagedAttentionConfig,
pub kv_cache: KvCacheConfig,
pub session: SessionConfig,
pub sona: SonaConfig,
pub max_sessions: usize,
pub embedding_dim: usize,
}
impl Default for RuvLLMConfig {
fn default() -> Self {
Self {
storage_path: ".ruvllm".to_string(),
paged_attention: PagedAttentionConfig::default(),
kv_cache: KvCacheConfig::default(),
session: SessionConfig::default(),
sona: SonaConfig::default(),
max_sessions: 1000,
embedding_dim: 768,
}
}
}
pub struct RuvLLMEngine {
config: RuvLLMConfig,
policy_store: PolicyStore,
session_manager: SessionManager,
session_index: SessionIndex,
adapter_manager: AdapterManager,
witness_log: WitnessLog,
sona: SonaIntegration,
}
impl RuvLLMEngine {
pub fn new(config: RuvLLMConfig) -> Result<Self> {
let storage_path = &config.storage_path;
let policy_store =
PolicyStore::new(&format!("{}/policies", storage_path), config.embedding_dim)?;
let session_index =
SessionIndex::new(&format!("{}/sessions", storage_path), config.embedding_dim)?;
let witness_log =
WitnessLog::new(&format!("{}/witness", storage_path), config.embedding_dim)?;
let session_manager = SessionManager::new(config.session.clone());
let adapter_manager = AdapterManager::new();
let sona = SonaIntegration::new(config.sona.clone());
Ok(Self {
config,
policy_store,
session_manager,
session_index,
adapter_manager,
witness_log,
sona,
})
}
pub fn create_session(&self, user_id: Option<&str>) -> Result<Session> {
let session = self.session_manager.create_session(user_id)?;
let state = SessionState::from_session(&session);
self.session_index.store(&state)?;
Ok(session)
}
pub fn get_session(&self, session_id: &str) -> Result<Option<Session>> {
self.session_manager.get_session(session_id)
}
pub fn search_policies(
&self,
context_embedding: &[f32],
limit: usize,
) -> Result<Vec<PolicyEntry>> {
self.policy_store.search(context_embedding, limit)
}
pub fn record_witness(&self, entry: WitnessEntry) -> Result<()> {
self.witness_log.record(entry)
}
pub fn search_witness(
&self,
query_embedding: &[f32],
limit: usize,
) -> Result<Vec<WitnessEntry>> {
self.witness_log.search(query_embedding, limit)
}
pub fn sona(&self) -> &SonaIntegration {
&self.sona
}
pub fn adapters(&self) -> &AdapterManager {
&self.adapter_manager
}
pub fn policies(&self) -> &PolicyStore {
&self.policy_store
}
}