Skip to main content

helios_persistence/advisor/
suggestions.rs

1//! Optimization suggestions for composite storage configurations.
2//!
3//! This module provides intelligent suggestions for improving composite
4//! storage configurations based on workload patterns and requirements.
5
6use std::collections::HashMap;
7
8use crate::composite::{BackendRole, CompositeConfig, QueryFeature};
9use crate::core::{BackendCapability, BackendKind};
10
11/// Engine for generating optimization suggestions.
12pub struct SuggestionEngine {
13    /// Backend cost profiles.
14    #[allow(dead_code)]
15    backend_costs: HashMap<BackendKind, BackendCostProfile>,
16}
17
18impl SuggestionEngine {
19    /// Creates a new suggestion engine with default profiles.
20    pub fn new() -> Self {
21        Self {
22            backend_costs: Self::default_cost_profiles(),
23        }
24    }
25
26    /// Generates suggestions based on workload pattern.
27    pub fn suggest(
28        &self,
29        config: &CompositeConfig,
30        workload: &WorkloadPattern,
31    ) -> Vec<OptimizationSuggestion> {
32        let mut suggestions = Vec::new();
33
34        // Analyze current config
35        let current_capabilities = self.analyze_capabilities(config);
36
37        // Suggest backends based on workload
38        suggestions.extend(self.suggest_for_workload(config, workload, &current_capabilities));
39
40        // Suggest performance optimizations
41        suggestions.extend(self.suggest_performance_optimizations(config, workload));
42
43        // Suggest cost optimizations
44        suggestions.extend(self.suggest_cost_optimizations(config, workload));
45
46        // Sort by priority
47        suggestions.sort_by_key(|s| std::cmp::Reverse(s.priority));
48
49        suggestions
50    }
51
52    /// Generates suggestions for a specific workload pattern.
53    fn suggest_for_workload(
54        &self,
55        config: &CompositeConfig,
56        workload: &WorkloadPattern,
57        _current_capabilities: &[BackendCapability],
58    ) -> Vec<OptimizationSuggestion> {
59        let mut suggestions = Vec::new();
60
61        // Full-text search heavy workload
62        if workload.fulltext_search_ratio > 0.3 {
63            if !config
64                .backends
65                .iter()
66                .any(|b| b.kind == BackendKind::Elasticsearch && b.role == BackendRole::Search)
67            {
68                suggestions.push(OptimizationSuggestion {
69                    priority: SuggestionPriority::High,
70                    category: SuggestionCategory::Performance,
71                    title: "Add Elasticsearch for full-text search".to_string(),
72                    description: format!(
73                        "Your workload has {:.0}% full-text search queries. \
74                         Elasticsearch is optimized for this use case.",
75                        workload.fulltext_search_ratio * 100.0
76                    ),
77                    estimated_improvement: Some("3-10x faster full-text queries".to_string()),
78                    implementation: Some(
79                        "Add a secondary backend with role=Search, kind=Elasticsearch".to_string(),
80                    ),
81                });
82            }
83        }
84
85        // Chained search heavy workload
86        if workload.chained_search_ratio > 0.2 {
87            if !config
88                .backends
89                .iter()
90                .any(|b| b.kind == BackendKind::Neo4j && b.role == BackendRole::Graph)
91            {
92                suggestions.push(OptimizationSuggestion {
93                    priority: SuggestionPriority::Medium,
94                    category: SuggestionCategory::Performance,
95                    title: "Consider Neo4j for relationship-heavy queries".to_string(),
96                    description: format!(
97                        "Your workload has {:.0}% chained/relationship queries. \
98                         Neo4j excels at graph traversals.",
99                        workload.chained_search_ratio * 100.0
100                    ),
101                    estimated_improvement: Some("2-5x faster chained queries".to_string()),
102                    implementation: Some(
103                        "Add a secondary backend with role=Graph, kind=Neo4j".to_string(),
104                    ),
105                });
106            }
107        }
108
109        // High write workload
110        if workload.write_ratio > 0.5 {
111            let primary = config
112                .backends
113                .iter()
114                .find(|b| b.role == BackendRole::Primary);
115            if let Some(p) = primary {
116                if p.kind == BackendKind::Sqlite {
117                    suggestions.push(OptimizationSuggestion {
118                        priority: SuggestionPriority::High,
119                        category: SuggestionCategory::Scalability,
120                        title: "Consider PostgreSQL for write-heavy workloads".to_string(),
121                        description: format!(
122                            "Your workload has {:.0}% write operations. \
123                             PostgreSQL handles concurrent writes better than SQLite.",
124                            workload.write_ratio * 100.0
125                        ),
126                        estimated_improvement: Some(
127                            "Better concurrent write performance".to_string(),
128                        ),
129                        implementation: Some("Replace SQLite primary with PostgreSQL".to_string()),
130                    });
131                }
132            }
133        }
134
135        // Large data volume
136        if workload.estimated_data_size_gb > 100.0 {
137            if !config
138                .backends
139                .iter()
140                .any(|b| b.kind == BackendKind::S3 && b.role == BackendRole::Archive)
141            {
142                suggestions.push(OptimizationSuggestion {
143                    priority: SuggestionPriority::Medium,
144                    category: SuggestionCategory::Cost,
145                    title: "Add S3 for archival storage".to_string(),
146                    description: format!(
147                        "With {:.0}GB of data, S3 can significantly reduce storage costs \
148                         for historical/archived data.",
149                        workload.estimated_data_size_gb
150                    ),
151                    estimated_improvement: Some(
152                        "70-90% storage cost reduction for archives".to_string(),
153                    ),
154                    implementation: Some(
155                        "Add a secondary backend with role=Archive, kind=S3".to_string(),
156                    ),
157                });
158            }
159        }
160
161        // Terminology operations
162        if workload.terminology_search_ratio > 0.1 {
163            suggestions.push(OptimizationSuggestion {
164                priority: SuggestionPriority::Low,
165                category: SuggestionCategory::Feature,
166                title: "Consider dedicated terminology service".to_string(),
167                description: format!(
168                    "Your workload has {:.0}% terminology operations. \
169                     A dedicated terminology service can improve expansion performance.",
170                    workload.terminology_search_ratio * 100.0
171                ),
172                estimated_improvement: Some("Faster code expansion and validation".to_string()),
173                implementation: Some("Add a secondary backend with role=Terminology".to_string()),
174            });
175        }
176
177        suggestions
178    }
179
180    /// Suggests performance optimizations.
181    fn suggest_performance_optimizations(
182        &self,
183        config: &CompositeConfig,
184        workload: &WorkloadPattern,
185    ) -> Vec<OptimizationSuggestion> {
186        let mut suggestions = Vec::new();
187
188        // Check sync mode
189        if workload.read_ratio > 0.8
190            && config.sync_config.mode == crate::composite::SyncMode::Synchronous
191        {
192            suggestions.push(OptimizationSuggestion {
193                priority: SuggestionPriority::Medium,
194                category: SuggestionCategory::Performance,
195                title: "Consider asynchronous sync for read-heavy workloads".to_string(),
196                description:
197                    "With mostly read operations, asynchronous sync can reduce write latency \
198                             without impacting read consistency."
199                        .to_string(),
200                estimated_improvement: Some("Lower write latency".to_string()),
201                implementation: Some("Set sync_config.mode to Asynchronous".to_string()),
202            });
203        }
204
205        // Single backend bottleneck
206        let enabled_count = config.backends.iter().filter(|b| b.enabled).count();
207        if enabled_count == 1 && workload.concurrent_users > 50 {
208            suggestions.push(OptimizationSuggestion {
209                priority: SuggestionPriority::High,
210                category: SuggestionCategory::Scalability,
211                title: "Add read replicas for high concurrency".to_string(),
212                description: format!(
213                    "With {} concurrent users and a single backend, \
214                     consider adding read replicas.",
215                    workload.concurrent_users
216                ),
217                estimated_improvement: Some("Better concurrent query performance".to_string()),
218                implementation: Some("Add secondary backends for read distribution".to_string()),
219            });
220        }
221
222        suggestions
223    }
224
225    /// Suggests cost optimizations.
226    fn suggest_cost_optimizations(
227        &self,
228        config: &CompositeConfig,
229        workload: &WorkloadPattern,
230    ) -> Vec<OptimizationSuggestion> {
231        let mut suggestions = Vec::new();
232
233        // Check for over-provisioned backends
234        if workload.queries_per_day < 100 {
235            let expensive_backends: Vec<_> = config
236                .backends
237                .iter()
238                .filter(|b| {
239                    matches!(
240                        b.kind,
241                        BackendKind::Elasticsearch | BackendKind::Neo4j | BackendKind::Postgres
242                    )
243                })
244                .collect();
245
246            if !expensive_backends.is_empty() {
247                suggestions.push(OptimizationSuggestion {
248                    priority: SuggestionPriority::Low,
249                    category: SuggestionCategory::Cost,
250                    title: "Consider simpler setup for low volume".to_string(),
251                    description: format!(
252                        "With only {} queries/day, a SQLite-only setup may be sufficient \
253                         and reduce operational costs.",
254                        workload.queries_per_day
255                    ),
256                    estimated_improvement: Some("Reduced infrastructure costs".to_string()),
257                    implementation: Some("Use SQLite as primary without secondaries".to_string()),
258                });
259            }
260        }
261
262        suggestions
263    }
264
265    /// Analyzes capabilities of current configuration.
266    fn analyze_capabilities(&self, config: &CompositeConfig) -> Vec<BackendCapability> {
267        config
268            .backends
269            .iter()
270            .filter(|b| b.enabled)
271            .flat_map(|b| b.effective_capabilities())
272            .collect()
273    }
274
275    /// Creates default backend cost profiles.
276    fn default_cost_profiles() -> HashMap<BackendKind, BackendCostProfile> {
277        let mut profiles = HashMap::new();
278
279        profiles.insert(
280            BackendKind::Sqlite,
281            BackendCostProfile {
282                setup_cost: 0.0,
283                monthly_cost: 0.0,
284                cost_per_query: 0.0001,
285                best_for: vec![
286                    "Development".to_string(),
287                    "Low volume".to_string(),
288                    "Single node".to_string(),
289                ],
290            },
291        );
292
293        profiles.insert(
294            BackendKind::Postgres,
295            BackendCostProfile {
296                setup_cost: 50.0,
297                monthly_cost: 50.0,
298                cost_per_query: 0.00005,
299                best_for: vec![
300                    "Production CRUD".to_string(),
301                    "Concurrent writes".to_string(),
302                    "ACID transactions".to_string(),
303                ],
304            },
305        );
306
307        profiles.insert(
308            BackendKind::Elasticsearch,
309            BackendCostProfile {
310                setup_cost: 100.0,
311                monthly_cost: 200.0,
312                cost_per_query: 0.00001,
313                best_for: vec![
314                    "Full-text search".to_string(),
315                    "Analytics".to_string(),
316                    "Log aggregation".to_string(),
317                ],
318            },
319        );
320
321        profiles.insert(
322            BackendKind::Neo4j,
323            BackendCostProfile {
324                setup_cost: 150.0,
325                monthly_cost: 300.0,
326                cost_per_query: 0.00002,
327                best_for: vec![
328                    "Graph queries".to_string(),
329                    "Relationship traversal".to_string(),
330                    "Chained search".to_string(),
331                ],
332            },
333        );
334
335        profiles.insert(
336            BackendKind::S3,
337            BackendCostProfile {
338                setup_cost: 10.0,
339                monthly_cost: 0.023, // per GB
340                cost_per_query: 0.0004,
341                best_for: vec![
342                    "Archival".to_string(),
343                    "Large data".to_string(),
344                    "Cost efficiency".to_string(),
345                ],
346            },
347        );
348
349        profiles
350    }
351}
352
353impl Default for SuggestionEngine {
354    fn default() -> Self {
355        Self::new()
356    }
357}
358
359/// Workload pattern describing usage characteristics.
360#[derive(Debug, Clone, Default)]
361pub struct WorkloadPattern {
362    /// Ratio of read operations (0.0 to 1.0).
363    pub read_ratio: f64,
364
365    /// Ratio of write operations (0.0 to 1.0).
366    pub write_ratio: f64,
367
368    /// Ratio of full-text search queries.
369    pub fulltext_search_ratio: f64,
370
371    /// Ratio of chained/relationship search queries.
372    pub chained_search_ratio: f64,
373
374    /// Ratio of terminology-based searches.
375    pub terminology_search_ratio: f64,
376
377    /// Estimated data size in GB.
378    pub estimated_data_size_gb: f64,
379
380    /// Number of queries per day.
381    pub queries_per_day: u64,
382
383    /// Peak concurrent users.
384    pub concurrent_users: u64,
385
386    /// Required features.
387    pub required_features: Vec<QueryFeature>,
388
389    /// Latency requirements in ms.
390    pub max_latency_ms: Option<u64>,
391
392    /// Budget constraints (monthly).
393    pub budget_monthly: Option<f64>,
394}
395
396impl WorkloadPattern {
397    /// Creates a development workload pattern.
398    pub fn development() -> Self {
399        Self {
400            read_ratio: 0.7,
401            write_ratio: 0.3,
402            fulltext_search_ratio: 0.1,
403            chained_search_ratio: 0.05,
404            terminology_search_ratio: 0.02,
405            estimated_data_size_gb: 1.0,
406            queries_per_day: 100,
407            concurrent_users: 5,
408            required_features: vec![],
409            max_latency_ms: Some(1000),
410            budget_monthly: Some(0.0),
411        }
412    }
413
414    /// Creates a production workload pattern.
415    pub fn production() -> Self {
416        Self {
417            read_ratio: 0.8,
418            write_ratio: 0.2,
419            fulltext_search_ratio: 0.2,
420            chained_search_ratio: 0.1,
421            terminology_search_ratio: 0.05,
422            estimated_data_size_gb: 100.0,
423            queries_per_day: 10000,
424            concurrent_users: 100,
425            required_features: vec![QueryFeature::BasicSearch, QueryFeature::FullTextSearch],
426            max_latency_ms: Some(200),
427            budget_monthly: Some(500.0),
428        }
429    }
430
431    /// Creates a high-volume workload pattern.
432    pub fn high_volume() -> Self {
433        Self {
434            read_ratio: 0.9,
435            write_ratio: 0.1,
436            fulltext_search_ratio: 0.3,
437            chained_search_ratio: 0.15,
438            terminology_search_ratio: 0.1,
439            estimated_data_size_gb: 1000.0,
440            queries_per_day: 1000000,
441            concurrent_users: 1000,
442            required_features: vec![
443                QueryFeature::BasicSearch,
444                QueryFeature::FullTextSearch,
445                QueryFeature::ChainedSearch,
446            ],
447            max_latency_ms: Some(100),
448            budget_monthly: Some(5000.0),
449        }
450    }
451}
452
453/// An optimization suggestion.
454#[derive(Debug, Clone)]
455pub struct OptimizationSuggestion {
456    /// Priority of the suggestion.
457    pub priority: SuggestionPriority,
458
459    /// Category of optimization.
460    pub category: SuggestionCategory,
461
462    /// Suggestion title.
463    pub title: String,
464
465    /// Detailed description.
466    pub description: String,
467
468    /// Estimated improvement.
469    pub estimated_improvement: Option<String>,
470
471    /// Implementation guidance.
472    pub implementation: Option<String>,
473}
474
475/// Priority level for suggestions.
476#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
477pub enum SuggestionPriority {
478    /// Low priority (nice to have).
479    Low,
480    /// Medium priority.
481    Medium,
482    /// High priority.
483    High,
484    /// Critical (should address).
485    Critical,
486}
487
488/// Category of optimization suggestion.
489#[derive(Debug, Clone, Copy, PartialEq, Eq)]
490pub enum SuggestionCategory {
491    /// Performance improvement.
492    Performance,
493    /// Scalability improvement.
494    Scalability,
495    /// Cost optimization.
496    Cost,
497    /// Feature addition.
498    Feature,
499    /// Reliability improvement.
500    Reliability,
501}
502
503/// Cost profile for a backend type.
504#[derive(Debug, Clone)]
505#[allow(dead_code)]
506struct BackendCostProfile {
507    /// Initial setup cost.
508    setup_cost: f64,
509
510    /// Monthly operational cost.
511    monthly_cost: f64,
512
513    /// Cost per query (approximate).
514    cost_per_query: f64,
515
516    /// Best use cases.
517    best_for: Vec<String>,
518}
519
520#[cfg(test)]
521mod tests {
522    use super::*;
523    use crate::composite::CompositeConfigBuilder;
524
525    #[test]
526    fn test_suggestion_engine_creation() {
527        let engine = SuggestionEngine::new();
528        assert!(!engine.backend_costs.is_empty());
529    }
530
531    #[test]
532    fn test_suggest_for_development() {
533        let engine = SuggestionEngine::new();
534        let config = CompositeConfigBuilder::new()
535            .primary("sqlite", BackendKind::Sqlite)
536            .build()
537            .unwrap();
538
539        let workload = WorkloadPattern::development();
540        let suggestions = engine.suggest(&config, &workload);
541
542        // Should have minimal suggestions for dev workload with SQLite
543        assert!(suggestions.len() < 5);
544    }
545
546    #[test]
547    fn test_suggest_elasticsearch_for_fulltext() {
548        let engine = SuggestionEngine::new();
549        let config = CompositeConfigBuilder::new()
550            .primary("sqlite", BackendKind::Sqlite)
551            .build()
552            .unwrap();
553
554        let mut workload = WorkloadPattern::production();
555        workload.fulltext_search_ratio = 0.5; // High full-text search ratio
556
557        let suggestions = engine.suggest(&config, &workload);
558
559        // Should suggest Elasticsearch
560        assert!(
561            suggestions
562                .iter()
563                .any(|s| s.title.contains("Elasticsearch"))
564        );
565    }
566
567    #[test]
568    fn test_workload_patterns() {
569        let dev = WorkloadPattern::development();
570        assert!(dev.queries_per_day < 1000);
571
572        let prod = WorkloadPattern::production();
573        assert!(prod.queries_per_day >= 1000);
574
575        let high = WorkloadPattern::high_volume();
576        assert!(high.queries_per_day >= 100000);
577    }
578}