use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashSet};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Pattern {
pub name: String,
pub pattern_type: PatternType,
pub description: String,
pub confidence: f64,
pub occurrences: usize,
pub proposed_changes: Vec<ProposedChange>,
pub affected_entities: Vec<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum PatternType {
RepeatedStructure,
RepeatedProperty,
SchemaMismatch,
PerformanceDegradation,
OrphanedElement,
HierarchyInconsistency,
MissingProjection,
GuardNearMiss,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ProposedChange {
pub change_type: String,
pub target: String,
pub rationale: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct OntologyStats {
pub class_count: usize,
pub property_count: usize,
pub constraint_count: usize,
pub top_classes: BTreeMap<String, usize>,
pub top_properties: BTreeMap<String, usize>,
pub utilization_ratio: f64,
pub avg_properties_per_class: f64,
}
pub struct PatternMiner {
pub(crate) observations: Vec<Observation>,
patterns: Vec<Pattern>,
config: MinerConfig,
}
impl PatternMiner {
pub fn observation_count(&self) -> usize {
self.observations.len()
}
}
#[derive(Debug, Clone)]
pub struct Observation {
pub entity: String,
pub properties: BTreeMap<String, String>,
pub timestamp: u64,
pub source: ObservationSource,
}
#[derive(Debug, Clone, Copy)]
pub enum ObservationSource {
Data,
Artifact,
Receipt,
}
#[derive(Debug, Clone)]
pub struct MinerConfig {
pub min_confidence: f64,
pub min_occurrences: usize,
pub max_patterns: usize,
pub detect_schema_mismatches: bool,
pub detect_performance_anomalies: bool,
pub detect_orphaned_elements: bool,
pub perf_anomaly_threshold_pct: f64,
}
impl Default for MinerConfig {
fn default() -> Self {
Self {
min_confidence: 0.75,
min_occurrences: 3,
max_patterns: 50,
detect_schema_mismatches: true,
detect_performance_anomalies: true,
detect_orphaned_elements: true,
perf_anomaly_threshold_pct: 10.0,
}
}
}
impl PatternMiner {
pub fn new(config: MinerConfig) -> Self {
Self {
observations: Vec::new(),
patterns: Vec::new(),
config,
}
}
pub fn add_observation(&mut self, obs: Observation) {
self.observations.push(obs);
}
pub fn add_observations(&mut self, obs: Vec<Observation>) {
self.observations.extend(obs);
}
pub fn mine(&mut self) -> Result<Vec<Pattern>, String> {
self.patterns.clear();
if self.observations.is_empty() {
return Ok(vec![]);
}
self.mine_repeated_structures()?;
self.mine_schema_mismatches()?;
self.mine_performance_anomalies()?;
self.mine_orphaned_elements()?;
let mut patterns = self.patterns.clone();
patterns.retain(|p| {
p.occurrences >= self.config.min_occurrences
&& p.confidence >= self.config.min_confidence
});
patterns.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
});
patterns.truncate(self.config.max_patterns);
self.patterns = patterns.clone();
Ok(patterns)
}
fn mine_repeated_structures(&mut self) -> Result<(), String> {
let mut structure_counts: BTreeMap<String, Vec<String>> = BTreeMap::new();
for obs in &self.observations {
let mut props: Vec<_> = obs.properties.keys().cloned().collect();
props.sort();
let signature = props.join("|");
structure_counts
.entry(signature)
.or_default()
.push(obs.entity.clone());
}
for (signature, entities) in structure_counts {
if entities.len() >= self.config.min_occurrences {
let props: Vec<&str> = signature.split('|').collect();
let confidence = (entities.len() as f64) / (self.observations.len() as f64);
let pattern = Pattern {
name: format!("Repeated_{}", signature.replace('|', "_")),
pattern_type: PatternType::RepeatedStructure,
description: format!(
"Found {} instances with properties: {}",
entities.len(),
props.join(", ")
),
confidence,
occurrences: entities.len(),
proposed_changes: vec![ProposedChange {
change_type: "AddClass".to_string(),
target: format!("Class_{}", signature.replace('|', "_")),
rationale: "Consolidate repeated structure into a new class".to_string(),
}],
affected_entities: entities,
};
self.patterns.push(pattern);
}
}
Ok(())
}
fn mine_schema_mismatches(&mut self) -> Result<(), String> {
if !self.config.detect_schema_mismatches {
return Ok(());
}
let mut type_violations: BTreeMap<String, Vec<String>> = BTreeMap::new();
for (key, values) in self.group_by_property() {
let mut value_types = HashSet::new();
for val in &values {
let inferred_type = self.infer_type(val);
value_types.insert(inferred_type);
}
if value_types.len() > 1 {
let types: Vec<_> = value_types.iter().cloned().collect();
type_violations
.entry(key.clone())
.or_default()
.extend(types);
}
}
for (property, types) in type_violations {
if types.len() > 1 {
let pattern = Pattern {
name: format!("SchemaMismatch_{}", property),
pattern_type: PatternType::SchemaMismatch,
description: format!(
"Property '{}' has inconsistent types: {:?}",
property, types
),
confidence: 0.85,
occurrences: types.len(),
proposed_changes: vec![ProposedChange {
change_type: "TightenConstraint".to_string(),
target: property.clone(),
rationale: "Add type constraint to prevent mixed types".to_string(),
}],
affected_entities: vec![property],
};
self.patterns.push(pattern);
}
}
Ok(())
}
fn mine_performance_anomalies(&mut self) -> Result<(), String> {
if !self.config.detect_performance_anomalies {
return Ok(());
}
let mut latencies: BTreeMap<String, Vec<(u64, f64)>> = BTreeMap::new();
for obs in &self.observations {
if let Some(latency_str) = obs.properties.get("latency_us") {
if let Ok(latency) = latency_str.parse::<f64>() {
latencies
.entry(obs.entity.clone())
.or_default()
.push((obs.timestamp, latency));
}
}
}
for (entity, mut latency_samples) in latencies {
latency_samples.sort_by_key(|x| x.0);
if latency_samples.len() >= 2 {
let first_latency = latency_samples[0].1;
let last_latency = latency_samples[latency_samples.len() - 1].1;
let degradation_pct = ((last_latency - first_latency) / first_latency) * 100.0;
if degradation_pct > self.config.perf_anomaly_threshold_pct {
let pattern = Pattern {
name: format!("PerfDegradation_{}", entity),
pattern_type: PatternType::PerformanceDegradation,
description: format!(
"Entity '{}' latency degraded {:.1}% (from {:.0}μs to {:.0}μs)",
entity, degradation_pct, first_latency, last_latency
),
confidence: 0.90,
occurrences: latency_samples.len(),
proposed_changes: vec![ProposedChange {
change_type: "OptimizeOperator".to_string(),
target: entity.clone(),
rationale:
"Operator latency degraded; recommend profiling and optimization"
.to_string(),
}],
affected_entities: vec![entity],
};
self.patterns.push(pattern);
}
}
}
Ok(())
}
fn mine_orphaned_elements(&mut self) -> Result<(), String> {
if !self.config.detect_orphaned_elements {
return Ok(());
}
let mut usage_counts: BTreeMap<String, usize> = BTreeMap::new();
for obs in &self.observations {
for key in obs.properties.keys() {
*usage_counts.entry(key.clone()).or_insert(0) += 1;
}
}
for (element, count) in usage_counts {
let usage_ratio = count as f64 / self.observations.len() as f64;
if usage_ratio < 0.1 {
let pattern = Pattern {
name: format!("Orphaned_{}", element),
pattern_type: PatternType::OrphanedElement,
description: format!(
"Element '{}' used in only {:.1}% of observations",
element,
usage_ratio * 100.0
),
confidence: 0.7,
occurrences: count,
proposed_changes: vec![ProposedChange {
change_type: "Review".to_string(),
target: element.clone(),
rationale: "Low usage; consider removal or deprecation".to_string(),
}],
affected_entities: vec![element],
};
self.patterns.push(pattern);
}
}
Ok(())
}
fn group_by_property(&self) -> BTreeMap<String, Vec<String>> {
let mut grouped = BTreeMap::new();
for obs in &self.observations {
for (key, val) in &obs.properties {
grouped
.entry(key.clone())
.or_insert_with(Vec::new)
.push(val.clone());
}
}
grouped
}
fn infer_type(&self, value: &str) -> String {
if value.parse::<i64>().is_ok() {
"integer".to_string()
} else if value.parse::<f64>().is_ok() {
"float".to_string()
} else if value.to_lowercase() == "true" || value.to_lowercase() == "false" {
"boolean".to_string()
} else {
"string".to_string()
}
}
pub fn stats(&self) -> OntologyStats {
let mut stats = OntologyStats::default();
let mut property_freq: BTreeMap<String, usize> = BTreeMap::new();
for obs in &self.observations {
for key in obs.properties.keys() {
*property_freq.entry(key.clone()).or_insert(0) += 1;
}
}
let mut sorted_props: Vec<_> = property_freq.into_iter().collect();
sorted_props.sort_by(|a, b| b.1.cmp(&a.1));
stats.property_count = sorted_props.len();
stats.top_properties = sorted_props.into_iter().take(10).collect();
stats.utilization_ratio = if self.observations.is_empty() {
0.0
} else {
(self.patterns.len() as f64) / (self.observations.len() as f64)
};
stats
}
pub fn patterns(&self) -> &[Pattern] {
&self.patterns
}
pub fn clear(&mut self) {
self.observations.clear();
self.patterns.clear();
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_observations() -> Vec<Observation> {
vec![
Observation {
entity: "entity_1".to_string(),
properties: [
("type".to_string(), "user".to_string()),
("name".to_string(), "Alice".to_string()),
]
.iter()
.cloned()
.collect(),
timestamp: 1000,
source: ObservationSource::Data,
},
Observation {
entity: "entity_2".to_string(),
properties: [
("type".to_string(), "user".to_string()),
("name".to_string(), "Bob".to_string()),
]
.iter()
.cloned()
.collect(),
timestamp: 2000,
source: ObservationSource::Data,
},
Observation {
entity: "entity_3".to_string(),
properties: [
("type".to_string(), "user".to_string()),
("name".to_string(), "Charlie".to_string()),
]
.iter()
.cloned()
.collect(),
timestamp: 3000,
source: ObservationSource::Data,
},
]
}
#[test]
fn test_pattern_miner_creation() {
let miner = PatternMiner::new(MinerConfig::default());
assert_eq!(miner.patterns.len(), 0);
assert_eq!(miner.observations.len(), 0);
}
#[test]
fn test_add_observations() {
let mut miner = PatternMiner::new(MinerConfig::default());
let obs = create_test_observations();
miner.add_observations(obs.clone());
assert_eq!(miner.observations.len(), 3);
}
#[test]
fn test_mine_repeated_structures() {
let mut miner = PatternMiner::new(MinerConfig::default());
let obs = create_test_observations();
miner.add_observations(obs);
let patterns = miner.mine().unwrap();
assert!(!patterns.is_empty());
let repeated = patterns
.iter()
.find(|p| p.pattern_type == PatternType::RepeatedStructure);
assert!(repeated.is_some());
}
#[test]
fn test_stats() {
let mut miner = PatternMiner::new(MinerConfig::default());
let obs = create_test_observations();
miner.add_observations(obs);
miner.mine().unwrap();
let stats = miner.stats();
assert_eq!(stats.property_count, 2);
}
}