use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
use chrono::{DateTime, Utc};
use crate::optimized::{OptimizedDiscoveryEngine, SignificantPattern};
use crate::ruvector_native::{CoherenceSnapshot, Domain, EdgeType};
use crate::{FrameworkError, Result};
#[derive(Debug, Clone)]
pub struct ExportFilter {
pub domains: Option<Vec<Domain>>,
pub min_edge_weight: Option<f64>,
pub time_range: Option<(DateTime<Utc>, DateTime<Utc>)>,
pub edge_types: Option<Vec<EdgeType>>,
pub max_nodes: Option<usize>,
}
impl ExportFilter {
pub fn domain(domain: Domain) -> Self {
Self {
domains: Some(vec![domain]),
min_edge_weight: None,
time_range: None,
edge_types: None,
max_nodes: None,
}
}
pub fn time_range(start: DateTime<Utc>, end: DateTime<Utc>) -> Self {
Self {
domains: None,
min_edge_weight: None,
time_range: Some((start, end)),
edge_types: None,
max_nodes: None,
}
}
pub fn min_weight(weight: f64) -> Self {
Self {
domains: None,
min_edge_weight: Some(weight),
time_range: None,
edge_types: None,
max_nodes: None,
}
}
pub fn and(mut self, other: ExportFilter) -> Self {
if let Some(d) = other.domains {
self.domains = Some(d);
}
if let Some(w) = other.min_edge_weight {
self.min_edge_weight = Some(w);
}
if let Some(t) = other.time_range {
self.time_range = Some(t);
}
if let Some(e) = other.edge_types {
self.edge_types = Some(e);
}
if let Some(n) = other.max_nodes {
self.max_nodes = Some(n);
}
self
}
}
pub fn export_graphml(
engine: &OptimizedDiscoveryEngine,
path: impl AsRef<Path>,
_filter: Option<ExportFilter>,
) -> Result<()> {
let file = File::create(path.as_ref())
.map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
let mut writer = BufWriter::new(file);
writeln!(writer, r#"<?xml version="1.0" encoding="UTF-8"?>"#)?;
writeln!(
writer,
r#"<graphml xmlns="http://graphml.graphdrawing.org/xmlns""#
)?;
writeln!(
writer,
r#" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance""#
)?;
writeln!(
writer,
r#" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns"#
)?;
writeln!(
writer,
r#" http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">"#
)?;
writeln!(
writer,
r#" <key id="domain" for="node" attr.name="domain" attr.type="string"/>"#
)?;
writeln!(
writer,
r#" <key id="external_id" for="node" attr.name="external_id" attr.type="string"/>"#
)?;
writeln!(
writer,
r#" <key id="weight" for="node" attr.name="weight" attr.type="double"/>"#
)?;
writeln!(
writer,
r#" <key id="timestamp" for="node" attr.name="timestamp" attr.type="string"/>"#
)?;
writeln!(
writer,
r#" <key id="edge_weight" for="edge" attr.name="weight" attr.type="double"/>"#
)?;
writeln!(
writer,
r#" <key id="edge_type" for="edge" attr.name="type" attr.type="string"/>"#
)?;
writeln!(
writer,
r#" <key id="edge_timestamp" for="edge" attr.name="timestamp" attr.type="string"/>"#
)?;
writeln!(
writer,
r#" <key id="cross_domain" for="edge" attr.name="cross_domain" attr.type="boolean"/>"#
)?;
writeln!(
writer,
r#" <graph id="discovery" edgedefault="undirected">"#
)?;
let stats = engine.stats();
writeln!(writer, r#" <!-- {} nodes in graph -->"#, stats.total_nodes)?;
writeln!(writer, r#" <!-- {} edges in graph -->"#, stats.total_edges)?;
writeln!(
writer,
r#" <!-- Cross-domain edges: {} -->"#,
stats.cross_domain_edges
)?;
writeln!(writer, " </graph>")?;
writeln!(writer, "</graphml>")?;
writer.flush()?;
Ok(())
}
pub fn export_dot(
engine: &OptimizedDiscoveryEngine,
path: impl AsRef<Path>,
_filter: Option<ExportFilter>,
) -> Result<()> {
let file = File::create(path.as_ref())
.map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
let mut writer = BufWriter::new(file);
let stats = engine.stats();
writeln!(writer, "graph discovery {{")?;
writeln!(writer, " layout=neato;")?;
writeln!(writer, " overlap=false;")?;
writeln!(writer, " splines=true;")?;
writeln!(writer, "")?;
writeln!(
writer,
" // Graph statistics: {} nodes, {} edges",
stats.total_nodes, stats.total_edges
)?;
writeln!(
writer,
" // Cross-domain edges: {}",
stats.cross_domain_edges
)?;
writeln!(writer, "")?;
writeln!(writer, " // Domain colors")?;
writeln!(
writer,
r#" node [style=filled, fontname="Arial", fontsize=10];"#
)?;
writeln!(writer, "")?;
for (domain, count) in &stats.domain_counts {
let color = domain_color(*domain);
writeln!(
writer,
" // {:?} domain: {} nodes [color={}]",
domain, count, color
)?;
}
writeln!(writer, "")?;
writeln!(writer, "}}")?;
writer.flush()?;
Ok(())
}
pub fn export_patterns_csv(
patterns: &[SignificantPattern],
path: impl AsRef<Path>,
) -> Result<()> {
let file = File::create(path.as_ref())
.map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
let mut writer = BufWriter::new(file);
writeln!(
writer,
"id,pattern_type,confidence,p_value,effect_size,ci_lower,ci_upper,is_significant,detected_at,description,affected_nodes_count,evidence_count"
)?;
for pattern in patterns {
let p = &pattern.pattern;
writeln!(
writer,
"{},{:?},{},{},{},{},{},{},{},\"{}\",{},{}",
csv_escape(&p.id),
p.pattern_type,
p.confidence,
pattern.p_value,
pattern.effect_size,
pattern.confidence_interval.0,
pattern.confidence_interval.1,
pattern.is_significant,
p.detected_at.to_rfc3339(),
csv_escape(&p.description),
p.affected_nodes.len(),
p.evidence.len()
)?;
}
writer.flush()?;
Ok(())
}
pub fn export_coherence_csv(
history: &[(DateTime<Utc>, f64, CoherenceSnapshot)],
path: impl AsRef<Path>,
) -> Result<()> {
let file = File::create(path.as_ref())
.map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
let mut writer = BufWriter::new(file);
writeln!(
writer,
"timestamp,mincut_value,node_count,edge_count,avg_edge_weight,partition_size_a,partition_size_b,boundary_nodes_count"
)?;
for (timestamp, mincut_value, snapshot) in history {
writeln!(
writer,
"{},{},{},{},{},{},{},{}",
timestamp.to_rfc3339(),
mincut_value,
snapshot.node_count,
snapshot.edge_count,
snapshot.avg_edge_weight,
snapshot.partition_sizes.0,
snapshot.partition_sizes.1,
snapshot.boundary_nodes.len()
)?;
}
writer.flush()?;
Ok(())
}
pub fn export_patterns_with_evidence_csv(
patterns: &[SignificantPattern],
path: impl AsRef<Path>,
) -> Result<()> {
let file = File::create(path.as_ref())
.map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
let mut writer = BufWriter::new(file);
writeln!(
writer,
"pattern_id,pattern_type,evidence_type,evidence_value,evidence_description,detected_at"
)?;
for pattern in patterns {
let p = &pattern.pattern;
for evidence in &p.evidence {
writeln!(
writer,
"{},{:?},{},{},\"{}\",{}",
csv_escape(&p.id),
p.pattern_type,
csv_escape(&evidence.evidence_type),
evidence.value,
csv_escape(&evidence.description),
p.detected_at.to_rfc3339()
)?;
}
}
writer.flush()?;
Ok(())
}
pub fn export_all(
engine: &OptimizedDiscoveryEngine,
patterns: &[SignificantPattern],
history: &[(DateTime<Utc>, f64, CoherenceSnapshot)],
output_dir: impl AsRef<Path>,
) -> Result<()> {
let dir = output_dir.as_ref();
std::fs::create_dir_all(dir)
.map_err(|e| FrameworkError::Config(format!("Failed to create directory: {}", e)))?;
export_graphml(engine, dir.join("graph.graphml"), None)?;
export_dot(engine, dir.join("graph.dot"), None)?;
export_patterns_csv(patterns, dir.join("patterns.csv"))?;
export_patterns_with_evidence_csv(patterns, dir.join("patterns_evidence.csv"))?;
export_coherence_csv(history, dir.join("coherence.csv"))?;
let readme = dir.join("README.md");
let readme_file = File::create(readme)
.map_err(|e| FrameworkError::Config(format!("Failed to create README: {}", e)))?;
let mut readme_writer = BufWriter::new(readme_file);
writeln!(readme_writer, "# RuVector Discovery Export")?;
writeln!(readme_writer, "")?;
writeln!(
readme_writer,
"Exported: {}",
Utc::now().to_rfc3339()
)?;
writeln!(readme_writer, "")?;
writeln!(readme_writer, "## Files")?;
writeln!(readme_writer, "")?;
writeln!(
readme_writer,
"- `graph.graphml` - Full graph in GraphML format (import into Gephi)"
)?;
writeln!(
readme_writer,
"- `graph.dot` - Full graph in DOT format (render with Graphviz)"
)?;
writeln!(readme_writer, "- `patterns.csv` - Discovered patterns")?;
writeln!(
readme_writer,
"- `patterns_evidence.csv` - Patterns with detailed evidence"
)?;
writeln!(
readme_writer,
"- `coherence.csv` - Coherence history over time"
)?;
writeln!(readme_writer, "")?;
writeln!(readme_writer, "## Visualization")?;
writeln!(readme_writer, "")?;
writeln!(readme_writer, "### Gephi (GraphML)")?;
writeln!(readme_writer, "1. Open Gephi")?;
writeln!(readme_writer, "2. File → Open → graph.graphml")?;
writeln!(
readme_writer,
"3. Layout → Force Atlas 2 or Fruchterman Reingold"
)?;
writeln!(
readme_writer,
"4. Color nodes by 'domain' attribute"
)?;
writeln!(readme_writer, "")?;
writeln!(readme_writer, "### Graphviz (DOT)")?;
writeln!(readme_writer, "```bash")?;
writeln!(readme_writer, "# PNG output")?;
writeln!(
readme_writer,
"dot -Tpng graph.dot -o graph.png"
)?;
writeln!(readme_writer, "")?;
writeln!(readme_writer, "# SVG output (vector, scalable)")?;
writeln!(
readme_writer,
"neato -Tsvg graph.dot -o graph.svg"
)?;
writeln!(readme_writer, "")?;
writeln!(readme_writer, "# Interactive SVG")?;
writeln!(
readme_writer,
"fdp -Tsvg graph.dot -o graph_interactive.svg"
)?;
writeln!(readme_writer, "```")?;
writeln!(readme_writer, "")?;
writeln!(readme_writer, "## Statistics")?;
writeln!(readme_writer, "")?;
let stats = engine.stats();
writeln!(readme_writer, "- Nodes: {}", stats.total_nodes)?;
writeln!(readme_writer, "- Edges: {}", stats.total_edges)?;
writeln!(
readme_writer,
"- Cross-domain edges: {}",
stats.cross_domain_edges
)?;
writeln!(readme_writer, "- Patterns detected: {}", patterns.len())?;
writeln!(
readme_writer,
"- Coherence snapshots: {}",
history.len()
)?;
readme_writer.flush()?;
Ok(())
}
fn csv_escape(s: &str) -> String {
if s.contains('"') || s.contains(',') || s.contains('\n') {
format!("\"{}\"", s.replace('"', "\"\""))
} else {
s.to_string()
}
}
fn domain_color(domain: Domain) -> &'static str {
match domain {
Domain::Climate => "lightblue",
Domain::Finance => "lightgreen",
Domain::Research => "lightyellow",
Domain::Medical => "lightpink",
Domain::Economic => "lavender",
Domain::Genomics => "palegreen",
Domain::Physics => "lightsteelblue",
Domain::Seismic => "sandybrown",
Domain::Ocean => "aquamarine",
Domain::Space => "plum",
Domain::Transportation => "peachpuff",
Domain::Geospatial => "lightgoldenrodyellow",
Domain::Government => "lightgray",
Domain::CrossDomain => "lightcoral",
}
}
fn domain_shape(domain: Domain) -> &'static str {
match domain {
Domain::Climate => "circle",
Domain::Finance => "box",
Domain::Research => "diamond",
Domain::Medical => "ellipse",
Domain::Economic => "octagon",
Domain::Genomics => "pentagon",
Domain::Physics => "triangle",
Domain::Seismic => "invtriangle",
Domain::Ocean => "trapezium",
Domain::Space => "star",
Domain::Transportation => "house",
Domain::Geospatial => "invhouse",
Domain::Government => "folder",
Domain::CrossDomain => "hexagon",
}
}
fn edge_type_label(edge_type: EdgeType) -> &'static str {
match edge_type {
EdgeType::Correlation => "correlation",
EdgeType::Similarity => "similarity",
EdgeType::Citation => "citation",
EdgeType::Causal => "causal",
EdgeType::CrossDomain => "cross_domain",
}
}
impl From<std::io::Error> for FrameworkError {
fn from(err: std::io::Error) -> Self {
FrameworkError::Config(format!("I/O error: {}", err))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_csv_escape() {
assert_eq!(csv_escape("simple"), "simple");
assert_eq!(csv_escape("with,comma"), "\"with,comma\"");
assert_eq!(csv_escape("with\"quote"), "\"with\"\"quote\"");
}
#[test]
fn test_domain_color() {
assert_eq!(domain_color(Domain::Climate), "lightblue");
assert_eq!(domain_color(Domain::Finance), "lightgreen");
}
#[test]
fn test_export_filter() {
let filter = ExportFilter::domain(Domain::Climate);
assert!(filter.domains.is_some());
let combined = filter.and(ExportFilter::min_weight(0.5));
assert_eq!(combined.min_edge_weight, Some(0.5));
}
}