use anyhow::Result;
use scirs2_core::ndarray_ext::Array1;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum SuggestionType {
NewType,
NewField,
AddIndex,
DeprecateField,
AddRelation,
Optimization,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SchemaSuggestion {
pub suggestion_type: SuggestionType,
pub target_type: String,
pub field_name: Option<String>,
pub suggestion: String,
pub confidence: f32,
pub rationale: String,
pub impact_score: f32,
}
impl SchemaSuggestion {
pub fn new(suggestion_type: SuggestionType, target_type: String, suggestion: String) -> Self {
Self {
suggestion_type,
target_type,
field_name: None,
suggestion,
confidence: 0.8,
rationale: String::new(),
impact_score: 0.5,
}
}
pub fn with_field(mut self, field_name: String) -> Self {
self.field_name = Some(field_name);
self
}
pub fn with_confidence(mut self, confidence: f32) -> Self {
self.confidence = confidence;
self
}
pub fn with_rationale(mut self, rationale: String) -> Self {
self.rationale = rationale;
self
}
pub fn with_impact_score(mut self, score: f32) -> Self {
self.impact_score = score;
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryPattern {
pub field_combinations: Vec<Vec<String>>,
pub frequency: usize,
pub avg_execution_time_ms: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct RdfDataStats {
pub triple_count: usize,
pub predicate_frequency: HashMap<String, usize>,
pub object_types: HashMap<String, usize>,
}
pub struct SchemaSuggestionEngine {
pattern_analyzer: Arc<RwLock<PatternAnalyzer>>,
ml_model: Arc<RwLock<SuggestionModel>>,
suggestion_history: Arc<RwLock<Vec<SchemaSuggestion>>>,
rdf_stats: Arc<RwLock<RdfDataStats>>,
}
#[derive(Debug, Clone)]
pub struct PatternAnalyzer {
patterns: Vec<QueryPattern>,
min_frequency: usize,
}
impl PatternAnalyzer {
pub fn new() -> Self {
Self {
patterns: Vec::new(),
min_frequency: 5,
}
}
pub fn add_pattern(&mut self, pattern: QueryPattern) {
self.patterns.push(pattern);
}
pub fn analyze(&self) -> Vec<SchemaSuggestion> {
let mut suggestions = Vec::new();
for pattern in &self.patterns {
if pattern.frequency >= self.min_frequency && !pattern.field_combinations.is_empty() {
let fields = pattern
.field_combinations
.first()
.expect("collection validated to be non-empty");
if fields.len() > 1 {
suggestions.push(
SchemaSuggestion::new(
SuggestionType::AddIndex,
"Query".to_string(),
format!("Add index on fields: {}", fields.join(", ")),
)
.with_confidence(0.85)
.with_rationale(format!(
"These fields are frequently queried together ({} times)",
pattern.frequency
))
.with_impact_score(0.7),
);
}
}
if pattern.avg_execution_time_ms > 1000.0 {
suggestions.push(
SchemaSuggestion::new(
SuggestionType::Optimization,
"Query".to_string(),
"Consider adding materialized view or cache".to_string(),
)
.with_confidence(0.75)
.with_rationale(format!(
"Average execution time is {:.2}ms",
pattern.avg_execution_time_ms
))
.with_impact_score(0.8),
);
}
}
suggestions
}
pub fn set_min_frequency(&mut self, frequency: usize) {
self.min_frequency = frequency;
}
}
impl Default for PatternAnalyzer {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct SuggestionModel {
#[allow(dead_code)]
embeddings: HashMap<String, Array1<f32>>,
}
impl SuggestionModel {
pub fn new() -> Self {
Self {
embeddings: HashMap::new(),
}
}
pub fn predict(&mut self, rdf_stats: &RdfDataStats) -> Vec<SchemaSuggestion> {
let mut suggestions = Vec::new();
for (predicate, &frequency) in &rdf_stats.predicate_frequency {
if frequency > 100 {
let confidence = (frequency as f32 / rdf_stats.triple_count as f32).min(1.0);
suggestions.push(
SchemaSuggestion::new(
SuggestionType::NewField,
"RdfResource".to_string(),
format!("Add field '{}' to schema", predicate),
)
.with_field(predicate.clone())
.with_confidence(confidence)
.with_rationale(format!("Predicate appears {} times in RDF data", frequency))
.with_impact_score(0.6),
);
}
}
for (object_type, &count) in &rdf_stats.object_types {
if count > 50 {
suggestions.push(
SchemaSuggestion::new(
SuggestionType::NewType,
object_type.clone(),
format!("Create GraphQL type for '{}'", object_type),
)
.with_confidence(0.8)
.with_rationale(format!("{} instances found in RDF data", count))
.with_impact_score(0.75),
);
}
}
suggestions
}
pub fn train(&mut self, _examples: Vec<(RdfDataStats, Vec<SchemaSuggestion>)>) -> Result<()> {
Ok(())
}
}
impl Default for SuggestionModel {
fn default() -> Self {
Self::new()
}
}
impl SchemaSuggestionEngine {
pub fn new() -> Self {
Self {
pattern_analyzer: Arc::new(RwLock::new(PatternAnalyzer::new())),
ml_model: Arc::new(RwLock::new(SuggestionModel::new())),
suggestion_history: Arc::new(RwLock::new(Vec::new())),
rdf_stats: Arc::new(RwLock::new(RdfDataStats::default())),
}
}
pub async fn update_rdf_stats(&self, stats: RdfDataStats) -> Result<()> {
let mut rdf_stats = self.rdf_stats.write().await;
*rdf_stats = stats;
Ok(())
}
pub async fn add_query_pattern(&self, pattern: QueryPattern) -> Result<()> {
let mut analyzer = self.pattern_analyzer.write().await;
analyzer.add_pattern(pattern);
Ok(())
}
pub async fn generate_suggestions(&self) -> Result<Vec<SchemaSuggestion>> {
let mut all_suggestions = Vec::new();
let analyzer = self.pattern_analyzer.read().await;
let pattern_suggestions = analyzer.analyze();
all_suggestions.extend(pattern_suggestions);
let mut ml_model = self.ml_model.write().await;
let rdf_stats = self.rdf_stats.read().await;
let ml_suggestions = ml_model.predict(&rdf_stats);
all_suggestions.extend(ml_suggestions);
all_suggestions.sort_by(|a, b| {
let score_a = a.impact_score * a.confidence;
let score_b = b.impact_score * b.confidence;
score_b
.partial_cmp(&score_a)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut history = self.suggestion_history.write().await;
history.extend(all_suggestions.clone());
Ok(all_suggestions)
}
pub async fn get_history(&self) -> Vec<SchemaSuggestion> {
let history = self.suggestion_history.read().await;
history.clone()
}
pub async fn set_min_frequency(&self, frequency: usize) -> Result<()> {
let mut analyzer = self.pattern_analyzer.write().await;
analyzer.set_min_frequency(frequency);
Ok(())
}
pub async fn train_model(
&self,
examples: Vec<(RdfDataStats, Vec<SchemaSuggestion>)>,
) -> Result<()> {
let mut model = self.ml_model.write().await;
model.train(examples)
}
}
impl Default for SchemaSuggestionEngine {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_schema_suggestion_creation() {
let suggestion = SchemaSuggestion::new(
SuggestionType::NewField,
"User".to_string(),
"Add email field".to_string(),
);
assert_eq!(suggestion.target_type, "User");
assert_eq!(suggestion.suggestion_type, SuggestionType::NewField);
}
#[test]
fn test_schema_suggestion_builder() {
let suggestion = SchemaSuggestion::new(
SuggestionType::NewField,
"User".to_string(),
"Add field".to_string(),
)
.with_field("email".to_string())
.with_confidence(0.9)
.with_rationale("Common field".to_string())
.with_impact_score(0.8);
assert_eq!(suggestion.field_name, Some("email".to_string()));
assert_eq!(suggestion.confidence, 0.9);
assert_eq!(suggestion.impact_score, 0.8);
}
#[test]
fn test_pattern_analyzer() {
let mut analyzer = PatternAnalyzer::new();
let pattern = QueryPattern {
field_combinations: vec![vec!["name".to_string(), "email".to_string()]],
frequency: 10,
avg_execution_time_ms: 50.0,
};
analyzer.add_pattern(pattern);
let suggestions = analyzer.analyze();
assert!(!suggestions.is_empty());
}
#[test]
fn test_pattern_analyzer_slow_query() {
let mut analyzer = PatternAnalyzer::new();
let pattern = QueryPattern {
field_combinations: vec![vec!["data".to_string()]],
frequency: 5,
avg_execution_time_ms: 2000.0,
};
analyzer.add_pattern(pattern);
let suggestions = analyzer.analyze();
assert!(suggestions
.iter()
.any(|s| s.suggestion_type == SuggestionType::Optimization));
}
#[test]
fn test_suggestion_model() {
let mut model = SuggestionModel::new();
let mut stats = RdfDataStats::default();
stats.predicate_frequency.insert("name".to_string(), 150);
stats.triple_count = 1000;
let suggestions = model.predict(&stats);
assert!(!suggestions.is_empty());
}
#[test]
fn test_suggestion_model_new_type() {
let mut model = SuggestionModel::new();
let mut stats = RdfDataStats::default();
stats.object_types.insert("Person".to_string(), 100);
let suggestions = model.predict(&stats);
assert!(suggestions
.iter()
.any(|s| s.suggestion_type == SuggestionType::NewType));
}
#[tokio::test]
async fn test_engine_creation() {
let engine = SchemaSuggestionEngine::new();
let history = engine.get_history().await;
assert!(history.is_empty());
}
#[tokio::test]
async fn test_update_rdf_stats() {
let engine = SchemaSuggestionEngine::new();
let stats = RdfDataStats {
triple_count: 1000,
predicate_frequency: HashMap::new(),
object_types: HashMap::new(),
};
engine
.update_rdf_stats(stats)
.await
.expect("should succeed");
}
#[tokio::test]
async fn test_add_query_pattern() {
let engine = SchemaSuggestionEngine::new();
let pattern = QueryPattern {
field_combinations: vec![vec!["id".to_string()]],
frequency: 5,
avg_execution_time_ms: 100.0,
};
engine
.add_query_pattern(pattern)
.await
.expect("should succeed");
}
#[tokio::test]
async fn test_generate_suggestions() {
let engine = SchemaSuggestionEngine::new();
let mut stats = RdfDataStats::default();
stats.predicate_frequency.insert("email".to_string(), 200);
stats.triple_count = 1000;
engine
.update_rdf_stats(stats)
.await
.expect("should succeed");
let pattern = QueryPattern {
field_combinations: vec![vec!["name".to_string(), "email".to_string()]],
frequency: 10,
avg_execution_time_ms: 100.0,
};
engine
.add_query_pattern(pattern)
.await
.expect("should succeed");
let suggestions = engine.generate_suggestions().await.expect("should succeed");
assert!(!suggestions.is_empty());
}
#[tokio::test]
async fn test_set_min_frequency() {
let engine = SchemaSuggestionEngine::new();
engine.set_min_frequency(10).await.expect("should succeed");
}
#[tokio::test]
async fn test_train_model() {
let engine = SchemaSuggestionEngine::new();
let examples = vec![];
engine.train_model(examples).await.expect("should succeed");
}
#[test]
fn test_rdf_stats_default() {
let stats = RdfDataStats::default();
assert_eq!(stats.triple_count, 0);
assert!(stats.predicate_frequency.is_empty());
}
#[test]
fn test_suggestion_sorting() {
let mut suggestions = [
SchemaSuggestion::new(
SuggestionType::NewField,
"A".to_string(),
"test".to_string(),
)
.with_impact_score(0.5)
.with_confidence(0.5),
SchemaSuggestion::new(
SuggestionType::NewField,
"B".to_string(),
"test".to_string(),
)
.with_impact_score(0.9)
.with_confidence(0.9),
];
suggestions.sort_by(|a, b| {
let score_a = a.impact_score * a.confidence;
let score_b = b.impact_score * b.confidence;
score_b.partial_cmp(&score_a).expect("should succeed")
});
assert_eq!(suggestions[0].target_type, "B");
}
}