use std::collections::HashMap;
use crate::composite::{BackendRole, CompositeConfig, QueryFeature};
use crate::core::{BackendCapability, BackendKind};
pub struct SuggestionEngine {
#[allow(dead_code)]
backend_costs: HashMap<BackendKind, BackendCostProfile>,
}
impl SuggestionEngine {
pub fn new() -> Self {
Self {
backend_costs: Self::default_cost_profiles(),
}
}
pub fn suggest(
&self,
config: &CompositeConfig,
workload: &WorkloadPattern,
) -> Vec<OptimizationSuggestion> {
let mut suggestions = Vec::new();
let current_capabilities = self.analyze_capabilities(config);
suggestions.extend(self.suggest_for_workload(config, workload, ¤t_capabilities));
suggestions.extend(self.suggest_performance_optimizations(config, workload));
suggestions.extend(self.suggest_cost_optimizations(config, workload));
suggestions.sort_by_key(|s| std::cmp::Reverse(s.priority));
suggestions
}
fn suggest_for_workload(
&self,
config: &CompositeConfig,
workload: &WorkloadPattern,
_current_capabilities: &[BackendCapability],
) -> Vec<OptimizationSuggestion> {
let mut suggestions = Vec::new();
if workload.fulltext_search_ratio > 0.3 {
if !config
.backends
.iter()
.any(|b| b.kind == BackendKind::Elasticsearch && b.role == BackendRole::Search)
{
suggestions.push(OptimizationSuggestion {
priority: SuggestionPriority::High,
category: SuggestionCategory::Performance,
title: "Add Elasticsearch for full-text search".to_string(),
description: format!(
"Your workload has {:.0}% full-text search queries. \
Elasticsearch is optimized for this use case.",
workload.fulltext_search_ratio * 100.0
),
estimated_improvement: Some("3-10x faster full-text queries".to_string()),
implementation: Some(
"Add a secondary backend with role=Search, kind=Elasticsearch".to_string(),
),
});
}
}
if workload.chained_search_ratio > 0.2 {
if !config
.backends
.iter()
.any(|b| b.kind == BackendKind::Neo4j && b.role == BackendRole::Graph)
{
suggestions.push(OptimizationSuggestion {
priority: SuggestionPriority::Medium,
category: SuggestionCategory::Performance,
title: "Consider Neo4j for relationship-heavy queries".to_string(),
description: format!(
"Your workload has {:.0}% chained/relationship queries. \
Neo4j excels at graph traversals.",
workload.chained_search_ratio * 100.0
),
estimated_improvement: Some("2-5x faster chained queries".to_string()),
implementation: Some(
"Add a secondary backend with role=Graph, kind=Neo4j".to_string(),
),
});
}
}
if workload.write_ratio > 0.5 {
let primary = config
.backends
.iter()
.find(|b| b.role == BackendRole::Primary);
if let Some(p) = primary {
if p.kind == BackendKind::Sqlite {
suggestions.push(OptimizationSuggestion {
priority: SuggestionPriority::High,
category: SuggestionCategory::Scalability,
title: "Consider PostgreSQL for write-heavy workloads".to_string(),
description: format!(
"Your workload has {:.0}% write operations. \
PostgreSQL handles concurrent writes better than SQLite.",
workload.write_ratio * 100.0
),
estimated_improvement: Some(
"Better concurrent write performance".to_string(),
),
implementation: Some("Replace SQLite primary with PostgreSQL".to_string()),
});
}
}
}
if workload.estimated_data_size_gb > 100.0 {
if !config
.backends
.iter()
.any(|b| b.kind == BackendKind::S3 && b.role == BackendRole::Archive)
{
suggestions.push(OptimizationSuggestion {
priority: SuggestionPriority::Medium,
category: SuggestionCategory::Cost,
title: "Add S3 for archival storage".to_string(),
description: format!(
"With {:.0}GB of data, S3 can significantly reduce storage costs \
for historical/archived data.",
workload.estimated_data_size_gb
),
estimated_improvement: Some(
"70-90% storage cost reduction for archives".to_string(),
),
implementation: Some(
"Add a secondary backend with role=Archive, kind=S3".to_string(),
),
});
}
}
if workload.terminology_search_ratio > 0.1 {
suggestions.push(OptimizationSuggestion {
priority: SuggestionPriority::Low,
category: SuggestionCategory::Feature,
title: "Consider dedicated terminology service".to_string(),
description: format!(
"Your workload has {:.0}% terminology operations. \
A dedicated terminology service can improve expansion performance.",
workload.terminology_search_ratio * 100.0
),
estimated_improvement: Some("Faster code expansion and validation".to_string()),
implementation: Some("Add a secondary backend with role=Terminology".to_string()),
});
}
suggestions
}
fn suggest_performance_optimizations(
&self,
config: &CompositeConfig,
workload: &WorkloadPattern,
) -> Vec<OptimizationSuggestion> {
let mut suggestions = Vec::new();
if workload.read_ratio > 0.8
&& config.sync_config.mode == crate::composite::SyncMode::Synchronous
{
suggestions.push(OptimizationSuggestion {
priority: SuggestionPriority::Medium,
category: SuggestionCategory::Performance,
title: "Consider asynchronous sync for read-heavy workloads".to_string(),
description:
"With mostly read operations, asynchronous sync can reduce write latency \
without impacting read consistency."
.to_string(),
estimated_improvement: Some("Lower write latency".to_string()),
implementation: Some("Set sync_config.mode to Asynchronous".to_string()),
});
}
let enabled_count = config.backends.iter().filter(|b| b.enabled).count();
if enabled_count == 1 && workload.concurrent_users > 50 {
suggestions.push(OptimizationSuggestion {
priority: SuggestionPriority::High,
category: SuggestionCategory::Scalability,
title: "Add read replicas for high concurrency".to_string(),
description: format!(
"With {} concurrent users and a single backend, \
consider adding read replicas.",
workload.concurrent_users
),
estimated_improvement: Some("Better concurrent query performance".to_string()),
implementation: Some("Add secondary backends for read distribution".to_string()),
});
}
suggestions
}
fn suggest_cost_optimizations(
&self,
config: &CompositeConfig,
workload: &WorkloadPattern,
) -> Vec<OptimizationSuggestion> {
let mut suggestions = Vec::new();
if workload.queries_per_day < 100 {
let expensive_backends: Vec<_> = config
.backends
.iter()
.filter(|b| {
matches!(
b.kind,
BackendKind::Elasticsearch | BackendKind::Neo4j | BackendKind::Postgres
)
})
.collect();
if !expensive_backends.is_empty() {
suggestions.push(OptimizationSuggestion {
priority: SuggestionPriority::Low,
category: SuggestionCategory::Cost,
title: "Consider simpler setup for low volume".to_string(),
description: format!(
"With only {} queries/day, a SQLite-only setup may be sufficient \
and reduce operational costs.",
workload.queries_per_day
),
estimated_improvement: Some("Reduced infrastructure costs".to_string()),
implementation: Some("Use SQLite as primary without secondaries".to_string()),
});
}
}
suggestions
}
fn analyze_capabilities(&self, config: &CompositeConfig) -> Vec<BackendCapability> {
config
.backends
.iter()
.filter(|b| b.enabled)
.flat_map(|b| b.effective_capabilities())
.collect()
}
fn default_cost_profiles() -> HashMap<BackendKind, BackendCostProfile> {
let mut profiles = HashMap::new();
profiles.insert(
BackendKind::Sqlite,
BackendCostProfile {
setup_cost: 0.0,
monthly_cost: 0.0,
cost_per_query: 0.0001,
best_for: vec![
"Development".to_string(),
"Low volume".to_string(),
"Single node".to_string(),
],
},
);
profiles.insert(
BackendKind::Postgres,
BackendCostProfile {
setup_cost: 50.0,
monthly_cost: 50.0,
cost_per_query: 0.00005,
best_for: vec![
"Production CRUD".to_string(),
"Concurrent writes".to_string(),
"ACID transactions".to_string(),
],
},
);
profiles.insert(
BackendKind::Elasticsearch,
BackendCostProfile {
setup_cost: 100.0,
monthly_cost: 200.0,
cost_per_query: 0.00001,
best_for: vec![
"Full-text search".to_string(),
"Analytics".to_string(),
"Log aggregation".to_string(),
],
},
);
profiles.insert(
BackendKind::Neo4j,
BackendCostProfile {
setup_cost: 150.0,
monthly_cost: 300.0,
cost_per_query: 0.00002,
best_for: vec![
"Graph queries".to_string(),
"Relationship traversal".to_string(),
"Chained search".to_string(),
],
},
);
profiles.insert(
BackendKind::S3,
BackendCostProfile {
setup_cost: 10.0,
monthly_cost: 0.023, cost_per_query: 0.0004,
best_for: vec![
"Archival".to_string(),
"Large data".to_string(),
"Cost efficiency".to_string(),
],
},
);
profiles
}
}
impl Default for SuggestionEngine {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default)]
pub struct WorkloadPattern {
pub read_ratio: f64,
pub write_ratio: f64,
pub fulltext_search_ratio: f64,
pub chained_search_ratio: f64,
pub terminology_search_ratio: f64,
pub estimated_data_size_gb: f64,
pub queries_per_day: u64,
pub concurrent_users: u64,
pub required_features: Vec<QueryFeature>,
pub max_latency_ms: Option<u64>,
pub budget_monthly: Option<f64>,
}
impl WorkloadPattern {
pub fn development() -> Self {
Self {
read_ratio: 0.7,
write_ratio: 0.3,
fulltext_search_ratio: 0.1,
chained_search_ratio: 0.05,
terminology_search_ratio: 0.02,
estimated_data_size_gb: 1.0,
queries_per_day: 100,
concurrent_users: 5,
required_features: vec![],
max_latency_ms: Some(1000),
budget_monthly: Some(0.0),
}
}
pub fn production() -> Self {
Self {
read_ratio: 0.8,
write_ratio: 0.2,
fulltext_search_ratio: 0.2,
chained_search_ratio: 0.1,
terminology_search_ratio: 0.05,
estimated_data_size_gb: 100.0,
queries_per_day: 10000,
concurrent_users: 100,
required_features: vec![QueryFeature::BasicSearch, QueryFeature::FullTextSearch],
max_latency_ms: Some(200),
budget_monthly: Some(500.0),
}
}
pub fn high_volume() -> Self {
Self {
read_ratio: 0.9,
write_ratio: 0.1,
fulltext_search_ratio: 0.3,
chained_search_ratio: 0.15,
terminology_search_ratio: 0.1,
estimated_data_size_gb: 1000.0,
queries_per_day: 1000000,
concurrent_users: 1000,
required_features: vec![
QueryFeature::BasicSearch,
QueryFeature::FullTextSearch,
QueryFeature::ChainedSearch,
],
max_latency_ms: Some(100),
budget_monthly: Some(5000.0),
}
}
}
#[derive(Debug, Clone)]
pub struct OptimizationSuggestion {
pub priority: SuggestionPriority,
pub category: SuggestionCategory,
pub title: String,
pub description: String,
pub estimated_improvement: Option<String>,
pub implementation: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum SuggestionPriority {
Low,
Medium,
High,
Critical,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SuggestionCategory {
Performance,
Scalability,
Cost,
Feature,
Reliability,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
struct BackendCostProfile {
setup_cost: f64,
monthly_cost: f64,
cost_per_query: f64,
best_for: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::composite::CompositeConfigBuilder;
#[test]
fn test_suggestion_engine_creation() {
let engine = SuggestionEngine::new();
assert!(!engine.backend_costs.is_empty());
}
#[test]
fn test_suggest_for_development() {
let engine = SuggestionEngine::new();
let config = CompositeConfigBuilder::new()
.primary("sqlite", BackendKind::Sqlite)
.build()
.unwrap();
let workload = WorkloadPattern::development();
let suggestions = engine.suggest(&config, &workload);
assert!(suggestions.len() < 5);
}
#[test]
fn test_suggest_elasticsearch_for_fulltext() {
let engine = SuggestionEngine::new();
let config = CompositeConfigBuilder::new()
.primary("sqlite", BackendKind::Sqlite)
.build()
.unwrap();
let mut workload = WorkloadPattern::production();
workload.fulltext_search_ratio = 0.5;
let suggestions = engine.suggest(&config, &workload);
assert!(
suggestions
.iter()
.any(|s| s.title.contains("Elasticsearch"))
);
}
#[test]
fn test_workload_patterns() {
let dev = WorkloadPattern::development();
assert!(dev.queries_per_day < 1000);
let prod = WorkloadPattern::production();
assert!(prod.queries_per_day >= 1000);
let high = WorkloadPattern::high_volume();
assert!(high.queries_per_day >= 100000);
}
}