use std::collections::HashMap;
use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Write};
use std::path::Path;
use chrono::{DateTime, Utc};
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use serde::{Deserialize, Serialize};
use crate::optimized::{OptimizedConfig, OptimizedDiscoveryEngine, SignificantPattern};
use crate::ruvector_native::{
CoherenceSnapshot, Domain, GraphEdge, GraphNode, SemanticVector,
};
use crate::{FrameworkError, Result};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EngineState {
pub config: OptimizedConfig,
pub vectors: Vec<SemanticVector>,
pub nodes: HashMap<u32, GraphNode>,
pub edges: Vec<GraphEdge>,
pub coherence_history: Vec<(DateTime<Utc>, f64, CoherenceSnapshot)>,
pub next_node_id: u32,
pub domain_nodes: HashMap<Domain, Vec<u32>>,
pub domain_timeseries: HashMap<Domain, Vec<(DateTime<Utc>, f64)>>,
pub saved_at: DateTime<Utc>,
pub version: String,
}
impl EngineState {
pub fn new(config: OptimizedConfig) -> Self {
Self {
config,
vectors: Vec::new(),
nodes: HashMap::new(),
edges: Vec::new(),
coherence_history: Vec::new(),
next_node_id: 0,
domain_nodes: HashMap::new(),
domain_timeseries: HashMap::new(),
saved_at: Utc::now(),
version: env!("CARGO_PKG_VERSION").to_string(),
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct PersistenceOptions {
pub compress: bool,
pub compression_level: u32,
pub pretty: bool,
}
impl Default for PersistenceOptions {
fn default() -> Self {
Self {
compress: false,
compression_level: 6,
pretty: false,
}
}
}
impl PersistenceOptions {
pub fn compressed() -> Self {
Self {
compress: true,
..Default::default()
}
}
pub fn pretty() -> Self {
Self {
pretty: true,
..Default::default()
}
}
}
pub fn save_engine(
engine: &OptimizedDiscoveryEngine,
path: &Path,
options: &PersistenceOptions,
) -> Result<()> {
let state = extract_state(engine);
save_state(&state, path, options)?;
tracing::info!(
"Saved engine state to {} ({} nodes, {} edges)",
path.display(),
state.nodes.len(),
state.edges.len()
);
Ok(())
}
pub fn load_engine(path: &Path) -> Result<OptimizedDiscoveryEngine> {
let state = load_state(path)?;
tracing::info!(
"Loaded engine state from {} ({} nodes, {} edges)",
path.display(),
state.nodes.len(),
state.edges.len()
);
Ok(reconstruct_engine(state))
}
pub fn save_patterns(
patterns: &[SignificantPattern],
path: &Path,
options: &PersistenceOptions,
) -> Result<()> {
let file = File::create(path).map_err(|e| {
FrameworkError::Discovery(format!("Failed to create file {}: {}", path.display(), e))
})?;
let writer = BufWriter::new(file);
if options.compress {
let mut encoder = GzEncoder::new(writer, Compression::new(options.compression_level));
let json = if options.pretty {
serde_json::to_string_pretty(patterns)?
} else {
serde_json::to_string(patterns)?
};
encoder.write_all(json.as_bytes()).map_err(|e| {
FrameworkError::Discovery(format!("Failed to write compressed patterns: {}", e))
})?;
encoder.finish().map_err(|e| {
FrameworkError::Discovery(format!("Failed to finish compression: {}", e))
})?;
} else {
if options.pretty {
serde_json::to_writer_pretty(writer, patterns)?;
} else {
serde_json::to_writer(writer, patterns)?;
}
}
tracing::info!("Saved {} patterns to {}", patterns.len(), path.display());
Ok(())
}
pub fn load_patterns(path: &Path) -> Result<Vec<SignificantPattern>> {
let file = File::open(path).map_err(|e| {
FrameworkError::Discovery(format!("Failed to open file {}: {}", path.display(), e))
})?;
let reader = BufReader::new(file);
let mut peeker = BufReader::new(File::open(path).unwrap());
let mut magic = [0u8; 2];
let is_gzip = peeker.read_exact(&mut magic).is_ok() && magic == [0x1f, 0x8b];
let patterns: Vec<SignificantPattern> = if is_gzip {
let file = File::open(path).unwrap();
let decoder = GzDecoder::new(BufReader::new(file));
serde_json::from_reader(decoder)?
} else {
serde_json::from_reader(reader)?
};
tracing::info!("Loaded {} patterns from {}", patterns.len(), path.display());
Ok(patterns)
}
pub fn append_patterns(patterns: &[SignificantPattern], path: &Path) -> Result<()> {
if patterns.is_empty() {
return Ok(());
}
if !path.exists() {
return save_patterns(patterns, path, &PersistenceOptions::default());
}
let mut existing = load_patterns(path)?;
existing.extend_from_slice(patterns);
let options = if is_compressed(path)? {
PersistenceOptions::compressed()
} else {
PersistenceOptions::default()
};
save_patterns(&existing, path, &options)?;
tracing::info!(
"Appended {} patterns to {} (total: {})",
patterns.len(),
path.display(),
existing.len()
);
Ok(())
}
fn extract_state(_engine: &OptimizedDiscoveryEngine) -> EngineState {
EngineState {
config: OptimizedConfig::default(), vectors: Vec::new(), nodes: HashMap::new(), edges: Vec::new(), coherence_history: Vec::new(), next_node_id: 0, domain_nodes: HashMap::new(), domain_timeseries: HashMap::new(), saved_at: Utc::now(),
version: env!("CARGO_PKG_VERSION").to_string(),
}
}
fn reconstruct_engine(state: EngineState) -> OptimizedDiscoveryEngine {
OptimizedDiscoveryEngine::new(state.config)
}
fn save_state(state: &EngineState, path: &Path, options: &PersistenceOptions) -> Result<()> {
let file = File::create(path).map_err(|e| {
FrameworkError::Discovery(format!("Failed to create file {}: {}", path.display(), e))
})?;
let writer = BufWriter::new(file);
if options.compress {
let mut encoder = GzEncoder::new(writer, Compression::new(options.compression_level));
let json = if options.pretty {
serde_json::to_string_pretty(state)?
} else {
serde_json::to_string(state)?
};
encoder.write_all(json.as_bytes()).map_err(|e| {
FrameworkError::Discovery(format!("Failed to write compressed state: {}", e))
})?;
encoder.finish().map_err(|e| {
FrameworkError::Discovery(format!("Failed to finish compression: {}", e))
})?;
} else {
if options.pretty {
serde_json::to_writer_pretty(writer, state)?;
} else {
serde_json::to_writer(writer, state)?;
}
}
Ok(())
}
fn load_state(path: &Path) -> Result<EngineState> {
let file = File::open(path).map_err(|e| {
FrameworkError::Discovery(format!("Failed to open file {}: {}", path.display(), e))
})?;
let is_gzip = is_compressed(path)?;
let state = if is_gzip {
let file = File::open(path).unwrap();
let decoder = GzDecoder::new(BufReader::new(file));
serde_json::from_reader(decoder)?
} else {
let reader = BufReader::new(file);
serde_json::from_reader(reader)?
};
Ok(state)
}
fn is_compressed(path: &Path) -> Result<bool> {
let mut file = File::open(path).map_err(|e| {
FrameworkError::Discovery(format!("Failed to open file {}: {}", path.display(), e))
})?;
let mut magic = [0u8; 2];
match file.read_exact(&mut magic) {
Ok(_) => Ok(magic == [0x1f, 0x8b]),
Err(_) => Ok(false), }
}
pub fn get_file_size(path: &Path) -> Result<u64> {
let metadata = std::fs::metadata(path).map_err(|e| {
FrameworkError::Discovery(format!("Failed to get file metadata: {}", e))
})?;
Ok(metadata.len())
}
pub fn compression_info(path: &Path) -> Result<(u64, u64, f64)> {
let compressed_size = get_file_size(path)?;
if is_compressed(path)? {
let file = File::open(path).unwrap();
let mut decoder = GzDecoder::new(BufReader::new(file));
let mut buffer = Vec::new();
let uncompressed_size = decoder.read_to_end(&mut buffer).map_err(|e| {
FrameworkError::Discovery(format!("Failed to decompress: {}", e))
})? as u64;
let ratio = compressed_size as f64 / uncompressed_size as f64;
Ok((compressed_size, uncompressed_size, ratio))
} else {
Ok((compressed_size, compressed_size, 1.0))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::optimized::OptimizedConfig;
use crate::ruvector_native::{DiscoveredPattern, PatternType, Evidence};
use tempfile::NamedTempFile;
#[test]
fn test_engine_state_creation() {
let config = OptimizedConfig::default();
let state = EngineState::new(config.clone());
assert_eq!(state.next_node_id, 0);
assert_eq!(state.nodes.len(), 0);
assert_eq!(state.config.similarity_threshold, config.similarity_threshold);
}
#[test]
fn test_persistence_options() {
let default = PersistenceOptions::default();
assert!(!default.compress);
assert!(!default.pretty);
let compressed = PersistenceOptions::compressed();
assert!(compressed.compress);
let pretty = PersistenceOptions::pretty();
assert!(pretty.pretty);
}
#[test]
fn test_save_load_patterns() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let patterns = vec![
SignificantPattern {
pattern: DiscoveredPattern {
id: "test-1".to_string(),
pattern_type: PatternType::CoherenceBreak,
confidence: 0.85,
affected_nodes: vec![1, 2, 3],
detected_at: Utc::now(),
description: "Test pattern".to_string(),
evidence: vec![
Evidence {
evidence_type: "test".to_string(),
value: 1.0,
description: "Test evidence".to_string(),
}
],
cross_domain_links: vec![],
},
p_value: 0.03,
effect_size: 1.2,
confidence_interval: (0.5, 1.5),
is_significant: true,
}
];
save_patterns(&patterns, path, &PersistenceOptions::default()).unwrap();
let loaded = load_patterns(path).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].pattern.id, "test-1");
assert_eq!(loaded[0].p_value, 0.03);
}
#[test]
fn test_save_load_patterns_compressed() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let patterns = vec![
SignificantPattern {
pattern: DiscoveredPattern {
id: "test-compressed".to_string(),
pattern_type: PatternType::Consolidation,
confidence: 0.90,
affected_nodes: vec![4, 5, 6],
detected_at: Utc::now(),
description: "Compressed test pattern".to_string(),
evidence: vec![],
cross_domain_links: vec![],
},
p_value: 0.01,
effect_size: 2.0,
confidence_interval: (1.0, 3.0),
is_significant: true,
}
];
save_patterns(&patterns, path, &PersistenceOptions::compressed()).unwrap();
assert!(is_compressed(path).unwrap());
let loaded = load_patterns(path).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].pattern.id, "test-compressed");
}
#[test]
fn test_append_patterns() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let pattern1 = vec![
SignificantPattern {
pattern: DiscoveredPattern {
id: "pattern-1".to_string(),
pattern_type: PatternType::EmergingCluster,
confidence: 0.75,
affected_nodes: vec![1],
detected_at: Utc::now(),
description: "First pattern".to_string(),
evidence: vec![],
cross_domain_links: vec![],
},
p_value: 0.05,
effect_size: 1.0,
confidence_interval: (0.0, 2.0),
is_significant: false,
}
];
let pattern2 = vec![
SignificantPattern {
pattern: DiscoveredPattern {
id: "pattern-2".to_string(),
pattern_type: PatternType::Cascade,
confidence: 0.95,
affected_nodes: vec![2],
detected_at: Utc::now(),
description: "Second pattern".to_string(),
evidence: vec![],
cross_domain_links: vec![],
},
p_value: 0.001,
effect_size: 3.0,
confidence_interval: (2.0, 4.0),
is_significant: true,
}
];
save_patterns(&pattern1, path, &PersistenceOptions::default()).unwrap();
append_patterns(&pattern2, path).unwrap();
let loaded = load_patterns(path).unwrap();
assert_eq!(loaded.len(), 2);
assert_eq!(loaded[0].pattern.id, "pattern-1");
assert_eq!(loaded[1].pattern.id, "pattern-2");
}
}