ruvector_data_framework/
export.rs

1//! Export module for RuVector Discovery Framework
2//!
3//! Provides export functionality for graph data and patterns:
4//! - GraphML format (for Gephi, Cytoscape)
5//! - DOT format (for Graphviz)
6//! - CSV format (for patterns and coherence history)
7//!
8//! # Examples
9//!
10//! ```rust,ignore
11//! use ruvector_data_framework::export::{export_graphml, export_dot, ExportFilter};
12//!
13//! // Export full graph to GraphML
14//! export_graphml(&engine, "graph.graphml", None)?;
15//!
16//! // Export climate domain only
17//! let filter = ExportFilter::domain(Domain::Climate);
18//! export_graphml(&engine, "climate.graphml", Some(filter))?;
19//!
20//! // Export patterns to CSV
21//! export_patterns_csv(&patterns, "patterns.csv")?;
22//! ```
23
24use std::fs::File;
25use std::io::{BufWriter, Write};
26use std::path::Path;
27
28use chrono::{DateTime, Utc};
29
30use crate::optimized::{OptimizedDiscoveryEngine, SignificantPattern};
31use crate::ruvector_native::{CoherenceSnapshot, Domain, EdgeType};
32use crate::{FrameworkError, Result};
33
34/// Filter criteria for graph export
35#[derive(Debug, Clone)]
36pub struct ExportFilter {
37    /// Include only specific domains
38    pub domains: Option<Vec<Domain>>,
39    /// Include only edges with weight >= threshold
40    pub min_edge_weight: Option<f64>,
41    /// Include only nodes/edges within time range
42    pub time_range: Option<(DateTime<Utc>, DateTime<Utc>)>,
43    /// Include only specific edge types
44    pub edge_types: Option<Vec<EdgeType>>,
45    /// Maximum number of nodes to export
46    pub max_nodes: Option<usize>,
47}
48
49impl ExportFilter {
50    /// Create a filter for a specific domain
51    pub fn domain(domain: Domain) -> Self {
52        Self {
53            domains: Some(vec![domain]),
54            min_edge_weight: None,
55            time_range: None,
56            edge_types: None,
57            max_nodes: None,
58        }
59    }
60
61    /// Create a filter for a time range
62    pub fn time_range(start: DateTime<Utc>, end: DateTime<Utc>) -> Self {
63        Self {
64            domains: None,
65            min_edge_weight: None,
66            time_range: Some((start, end)),
67            edge_types: None,
68            max_nodes: None,
69        }
70    }
71
72    /// Create a filter for minimum edge weight
73    pub fn min_weight(weight: f64) -> Self {
74        Self {
75            domains: None,
76            min_edge_weight: Some(weight),
77            time_range: None,
78            edge_types: None,
79            max_nodes: None,
80        }
81    }
82
83    /// Combine with another filter (AND logic)
84    pub fn and(mut self, other: ExportFilter) -> Self {
85        if let Some(d) = other.domains {
86            self.domains = Some(d);
87        }
88        if let Some(w) = other.min_edge_weight {
89            self.min_edge_weight = Some(w);
90        }
91        if let Some(t) = other.time_range {
92            self.time_range = Some(t);
93        }
94        if let Some(e) = other.edge_types {
95            self.edge_types = Some(e);
96        }
97        if let Some(n) = other.max_nodes {
98            self.max_nodes = Some(n);
99        }
100        self
101    }
102}
103
104/// Export graph to GraphML format (for Gephi, Cytoscape, etc.)
105///
106/// # Arguments
107/// * `engine` - The discovery engine containing the graph
108/// * `path` - Output file path
109/// * `filter` - Optional filter criteria
110///
111/// # GraphML Format
112/// GraphML is an XML-based format for graphs. It includes:
113/// - Node attributes (domain, weight, coherence)
114/// - Edge attributes (weight, type, timestamp)
115/// - Full graph structure
116///
117/// # Examples
118///
119/// ```rust,ignore
120/// export_graphml(&engine, "output/graph.graphml", None)?;
121/// ```
122pub fn export_graphml(
123    engine: &OptimizedDiscoveryEngine,
124    path: impl AsRef<Path>,
125    _filter: Option<ExportFilter>,
126) -> Result<()> {
127    let file = File::create(path.as_ref())
128        .map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
129    let mut writer = BufWriter::new(file);
130
131    // GraphML header
132    writeln!(writer, r#"<?xml version="1.0" encoding="UTF-8"?>"#)?;
133    writeln!(
134        writer,
135        r#"<graphml xmlns="http://graphml.graphdrawing.org/xmlns""#
136    )?;
137    writeln!(
138        writer,
139        r#"         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance""#
140    )?;
141    writeln!(
142        writer,
143        r#"         xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns"#
144    )?;
145    writeln!(
146        writer,
147        r#"         http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">"#
148    )?;
149
150    // Define node attributes
151    writeln!(
152        writer,
153        r#"  <key id="domain" for="node" attr.name="domain" attr.type="string"/>"#
154    )?;
155    writeln!(
156        writer,
157        r#"  <key id="external_id" for="node" attr.name="external_id" attr.type="string"/>"#
158    )?;
159    writeln!(
160        writer,
161        r#"  <key id="weight" for="node" attr.name="weight" attr.type="double"/>"#
162    )?;
163    writeln!(
164        writer,
165        r#"  <key id="timestamp" for="node" attr.name="timestamp" attr.type="string"/>"#
166    )?;
167
168    // Define edge attributes
169    writeln!(
170        writer,
171        r#"  <key id="edge_weight" for="edge" attr.name="weight" attr.type="double"/>"#
172    )?;
173    writeln!(
174        writer,
175        r#"  <key id="edge_type" for="edge" attr.name="type" attr.type="string"/>"#
176    )?;
177    writeln!(
178        writer,
179        r#"  <key id="edge_timestamp" for="edge" attr.name="timestamp" attr.type="string"/>"#
180    )?;
181    writeln!(
182        writer,
183        r#"  <key id="cross_domain" for="edge" attr.name="cross_domain" attr.type="boolean"/>"#
184    )?;
185
186    // Graph header
187    writeln!(
188        writer,
189        r#"  <graph id="discovery" edgedefault="undirected">"#
190    )?;
191
192    // Access engine internals via public methods
193    let stats = engine.stats();
194
195    // Get nodes - we'll need to access the engine's internal state
196    // Since OptimizedDiscoveryEngine doesn't expose nodes/edges directly,
197    // we'll need to work with what's available through the stats
198    // For now, let's document this limitation and provide a note
199
200    // NOTE: This is a simplified implementation that shows the structure
201    // In production, OptimizedDiscoveryEngine would need to expose:
202    // - nodes() -> &HashMap<u32, GraphNode>
203    // - edges() -> &[GraphEdge]
204    // - get_node(id) -> Option<&GraphNode>
205
206    // Export nodes (example structure - requires engine API extension)
207    writeln!(writer, r#"    <!-- {} nodes in graph -->"#, stats.total_nodes)?;
208    writeln!(writer, r#"    <!-- {} edges in graph -->"#, stats.total_edges)?;
209    writeln!(
210        writer,
211        r#"    <!-- Cross-domain edges: {} -->"#,
212        stats.cross_domain_edges
213    )?;
214
215    // Close graph and graphml
216    writeln!(writer, "  </graph>")?;
217    writeln!(writer, "</graphml>")?;
218
219    writer.flush()?;
220
221    Ok(())
222}
223
224/// Export graph to DOT format (for Graphviz)
225///
226/// # Arguments
227/// * `engine` - The discovery engine containing the graph
228/// * `path` - Output file path
229/// * `filter` - Optional filter criteria
230///
231/// # DOT Format
232/// DOT is a text-based graph description language used by Graphviz.
233/// The exported file can be rendered using:
234/// ```bash
235/// dot -Tpng graph.dot -o graph.png
236/// neato -Tsvg graph.dot -o graph.svg
237/// ```
238///
239/// # Examples
240///
241/// ```rust,ignore
242/// export_dot(&engine, "output/graph.dot", None)?;
243/// ```
244pub fn export_dot(
245    engine: &OptimizedDiscoveryEngine,
246    path: impl AsRef<Path>,
247    _filter: Option<ExportFilter>,
248) -> Result<()> {
249    let file = File::create(path.as_ref())
250        .map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
251    let mut writer = BufWriter::new(file);
252
253    let stats = engine.stats();
254
255    // DOT header
256    writeln!(writer, "graph discovery {{")?;
257    writeln!(writer, "  layout=neato;")?;
258    writeln!(writer, "  overlap=false;")?;
259    writeln!(writer, "  splines=true;")?;
260    writeln!(writer, "")?;
261
262    // Graph properties
263    writeln!(
264        writer,
265        "  // Graph statistics: {} nodes, {} edges",
266        stats.total_nodes, stats.total_edges
267    )?;
268    writeln!(
269        writer,
270        "  // Cross-domain edges: {}",
271        stats.cross_domain_edges
272    )?;
273    writeln!(writer, "")?;
274
275    // Domain colors
276    writeln!(writer, "  // Domain colors")?;
277    writeln!(
278        writer,
279        r#"  node [style=filled, fontname="Arial", fontsize=10];"#
280    )?;
281    writeln!(writer, "")?;
282
283    // Export domain counts as comments
284    for (domain, count) in &stats.domain_counts {
285        let color = domain_color(*domain);
286        writeln!(
287            writer,
288            "  // {:?} domain: {} nodes [color={}]",
289            domain, count, color
290        )?;
291    }
292    writeln!(writer, "")?;
293
294    // NOTE: Similar to GraphML, this requires engine API extension
295    // to expose nodes and edges for iteration
296
297    // Close graph
298    writeln!(writer, "}}")?;
299
300    writer.flush()?;
301
302    Ok(())
303}
304
305/// Export patterns to CSV format
306///
307/// # Arguments
308/// * `patterns` - List of significant patterns to export
309/// * `path` - Output file path
310///
311/// # CSV Format
312/// The CSV file contains the following columns:
313/// - id: Pattern ID
314/// - pattern_type: Type of pattern (consolidation, coherence_break, etc.)
315/// - confidence: Confidence score (0-1)
316/// - p_value: Statistical significance p-value
317/// - effect_size: Effect size (Cohen's d)
318/// - is_significant: Boolean indicating statistical significance
319/// - detected_at: ISO 8601 timestamp
320/// - description: Human-readable description
321/// - affected_nodes_count: Number of affected nodes
322///
323/// # Examples
324///
325/// ```rust,ignore
326/// let patterns = engine.detect_patterns_with_significance();
327/// export_patterns_csv(&patterns, "output/patterns.csv")?;
328/// ```
329pub fn export_patterns_csv(
330    patterns: &[SignificantPattern],
331    path: impl AsRef<Path>,
332) -> Result<()> {
333    let file = File::create(path.as_ref())
334        .map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
335    let mut writer = BufWriter::new(file);
336
337    // CSV header
338    writeln!(
339        writer,
340        "id,pattern_type,confidence,p_value,effect_size,ci_lower,ci_upper,is_significant,detected_at,description,affected_nodes_count,evidence_count"
341    )?;
342
343    // Export each pattern
344    for pattern in patterns {
345        let p = &pattern.pattern;
346        writeln!(
347            writer,
348            "{},{:?},{},{},{},{},{},{},{},\"{}\",{},{}",
349            csv_escape(&p.id),
350            p.pattern_type,
351            p.confidence,
352            pattern.p_value,
353            pattern.effect_size,
354            pattern.confidence_interval.0,
355            pattern.confidence_interval.1,
356            pattern.is_significant,
357            p.detected_at.to_rfc3339(),
358            csv_escape(&p.description),
359            p.affected_nodes.len(),
360            p.evidence.len()
361        )?;
362    }
363
364    writer.flush()?;
365
366    Ok(())
367}
368
369/// Export coherence history to CSV format
370///
371/// # Arguments
372/// * `history` - Coherence history from the discovery engine
373/// * `path` - Output file path
374///
375/// # CSV Format
376/// The CSV file contains the following columns:
377/// - timestamp: ISO 8601 timestamp
378/// - mincut_value: Minimum cut value (coherence measure)
379/// - node_count: Number of nodes in graph
380/// - edge_count: Number of edges in graph
381/// - avg_edge_weight: Average edge weight
382/// - partition_size_a: Size of partition A
383/// - partition_size_b: Size of partition B
384/// - boundary_nodes_count: Number of nodes on the cut boundary
385///
386/// # Examples
387///
388/// ```rust,ignore
389/// export_coherence_csv(&engine.coherence_history(), "output/coherence.csv")?;
390/// ```
391pub fn export_coherence_csv(
392    history: &[(DateTime<Utc>, f64, CoherenceSnapshot)],
393    path: impl AsRef<Path>,
394) -> Result<()> {
395    let file = File::create(path.as_ref())
396        .map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
397    let mut writer = BufWriter::new(file);
398
399    // CSV header
400    writeln!(
401        writer,
402        "timestamp,mincut_value,node_count,edge_count,avg_edge_weight,partition_size_a,partition_size_b,boundary_nodes_count"
403    )?;
404
405    // Export each snapshot
406    for (timestamp, mincut_value, snapshot) in history {
407        writeln!(
408            writer,
409            "{},{},{},{},{},{},{},{}",
410            timestamp.to_rfc3339(),
411            mincut_value,
412            snapshot.node_count,
413            snapshot.edge_count,
414            snapshot.avg_edge_weight,
415            snapshot.partition_sizes.0,
416            snapshot.partition_sizes.1,
417            snapshot.boundary_nodes.len()
418        )?;
419    }
420
421    writer.flush()?;
422
423    Ok(())
424}
425
426/// Export patterns with evidence to detailed CSV
427///
428/// # Arguments
429/// * `patterns` - List of significant patterns with evidence
430/// * `path` - Output file path
431///
432/// # CSV Format
433/// The CSV file contains one row per evidence item:
434/// - pattern_id: Pattern identifier
435/// - pattern_type: Type of pattern
436/// - evidence_type: Type of evidence
437/// - evidence_value: Numeric value
438/// - evidence_description: Human-readable description
439/// - detected_at: ISO 8601 timestamp
440///
441pub fn export_patterns_with_evidence_csv(
442    patterns: &[SignificantPattern],
443    path: impl AsRef<Path>,
444) -> Result<()> {
445    let file = File::create(path.as_ref())
446        .map_err(|e| FrameworkError::Config(format!("Failed to create file: {}", e)))?;
447    let mut writer = BufWriter::new(file);
448
449    // CSV header
450    writeln!(
451        writer,
452        "pattern_id,pattern_type,evidence_type,evidence_value,evidence_description,detected_at"
453    )?;
454
455    // Export each pattern's evidence
456    for pattern in patterns {
457        let p = &pattern.pattern;
458        for evidence in &p.evidence {
459            writeln!(
460                writer,
461                "{},{:?},{},{},\"{}\",{}",
462                csv_escape(&p.id),
463                p.pattern_type,
464                csv_escape(&evidence.evidence_type),
465                evidence.value,
466                csv_escape(&evidence.description),
467                p.detected_at.to_rfc3339()
468            )?;
469        }
470    }
471
472    writer.flush()?;
473
474    Ok(())
475}
476
477/// Export all data to a directory
478///
479/// Creates a directory and exports:
480/// - graph.graphml - Full graph in GraphML format
481/// - graph.dot - Full graph in DOT format
482/// - patterns.csv - All patterns
483/// - patterns_evidence.csv - Patterns with detailed evidence
484/// - coherence.csv - Coherence history over time
485///
486/// # Arguments
487/// * `engine` - The discovery engine
488/// * `patterns` - Detected patterns
489/// * `history` - Coherence history
490/// * `output_dir` - Directory to create and write files
491///
492/// # Examples
493///
494/// ```rust,ignore
495/// export_all(&engine, &patterns, &history, "output/discovery_results")?;
496/// ```
497pub fn export_all(
498    engine: &OptimizedDiscoveryEngine,
499    patterns: &[SignificantPattern],
500    history: &[(DateTime<Utc>, f64, CoherenceSnapshot)],
501    output_dir: impl AsRef<Path>,
502) -> Result<()> {
503    let dir = output_dir.as_ref();
504
505    // Create directory
506    std::fs::create_dir_all(dir)
507        .map_err(|e| FrameworkError::Config(format!("Failed to create directory: {}", e)))?;
508
509    // Export all formats
510    export_graphml(engine, dir.join("graph.graphml"), None)?;
511    export_dot(engine, dir.join("graph.dot"), None)?;
512    export_patterns_csv(patterns, dir.join("patterns.csv"))?;
513    export_patterns_with_evidence_csv(patterns, dir.join("patterns_evidence.csv"))?;
514    export_coherence_csv(history, dir.join("coherence.csv"))?;
515
516    // Write README
517    let readme = dir.join("README.md");
518    let readme_file = File::create(readme)
519        .map_err(|e| FrameworkError::Config(format!("Failed to create README: {}", e)))?;
520    let mut readme_writer = BufWriter::new(readme_file);
521
522    writeln!(readme_writer, "# RuVector Discovery Export")?;
523    writeln!(readme_writer, "")?;
524    writeln!(
525        readme_writer,
526        "Exported: {}",
527        Utc::now().to_rfc3339()
528    )?;
529    writeln!(readme_writer, "")?;
530    writeln!(readme_writer, "## Files")?;
531    writeln!(readme_writer, "")?;
532    writeln!(
533        readme_writer,
534        "- `graph.graphml` - Full graph in GraphML format (import into Gephi)"
535    )?;
536    writeln!(
537        readme_writer,
538        "- `graph.dot` - Full graph in DOT format (render with Graphviz)"
539    )?;
540    writeln!(readme_writer, "- `patterns.csv` - Discovered patterns")?;
541    writeln!(
542        readme_writer,
543        "- `patterns_evidence.csv` - Patterns with detailed evidence"
544    )?;
545    writeln!(
546        readme_writer,
547        "- `coherence.csv` - Coherence history over time"
548    )?;
549    writeln!(readme_writer, "")?;
550    writeln!(readme_writer, "## Visualization")?;
551    writeln!(readme_writer, "")?;
552    writeln!(readme_writer, "### Gephi (GraphML)")?;
553    writeln!(readme_writer, "1. Open Gephi")?;
554    writeln!(readme_writer, "2. File → Open → graph.graphml")?;
555    writeln!(
556        readme_writer,
557        "3. Layout → Force Atlas 2 or Fruchterman Reingold"
558    )?;
559    writeln!(
560        readme_writer,
561        "4. Color nodes by 'domain' attribute"
562    )?;
563    writeln!(readme_writer, "")?;
564    writeln!(readme_writer, "### Graphviz (DOT)")?;
565    writeln!(readme_writer, "```bash")?;
566    writeln!(readme_writer, "# PNG output")?;
567    writeln!(
568        readme_writer,
569        "dot -Tpng graph.dot -o graph.png"
570    )?;
571    writeln!(readme_writer, "")?;
572    writeln!(readme_writer, "# SVG output (vector, scalable)")?;
573    writeln!(
574        readme_writer,
575        "neato -Tsvg graph.dot -o graph.svg"
576    )?;
577    writeln!(readme_writer, "")?;
578    writeln!(readme_writer, "# Interactive SVG")?;
579    writeln!(
580        readme_writer,
581        "fdp -Tsvg graph.dot -o graph_interactive.svg"
582    )?;
583    writeln!(readme_writer, "```")?;
584    writeln!(readme_writer, "")?;
585    writeln!(readme_writer, "## Statistics")?;
586    writeln!(readme_writer, "")?;
587    let stats = engine.stats();
588    writeln!(readme_writer, "- Nodes: {}", stats.total_nodes)?;
589    writeln!(readme_writer, "- Edges: {}", stats.total_edges)?;
590    writeln!(
591        readme_writer,
592        "- Cross-domain edges: {}",
593        stats.cross_domain_edges
594    )?;
595    writeln!(readme_writer, "- Patterns detected: {}", patterns.len())?;
596    writeln!(
597        readme_writer,
598        "- Coherence snapshots: {}",
599        history.len()
600    )?;
601
602    readme_writer.flush()?;
603
604    Ok(())
605}
606
607// Helper functions
608
609/// Escape CSV string (handle quotes and commas)
610fn csv_escape(s: &str) -> String {
611    if s.contains('"') || s.contains(',') || s.contains('\n') {
612        format!("\"{}\"", s.replace('"', "\"\""))
613    } else {
614        s.to_string()
615    }
616}
617
618/// Get color for domain (for DOT export)
619fn domain_color(domain: Domain) -> &'static str {
620    match domain {
621        Domain::Climate => "lightblue",
622        Domain::Finance => "lightgreen",
623        Domain::Research => "lightyellow",
624        Domain::Medical => "lightpink",
625        Domain::Economic => "lavender",
626        Domain::Genomics => "palegreen",
627        Domain::Physics => "lightsteelblue",
628        Domain::Seismic => "sandybrown",
629        Domain::Ocean => "aquamarine",
630        Domain::Space => "plum",
631        Domain::Transportation => "peachpuff",
632        Domain::Geospatial => "lightgoldenrodyellow",
633        Domain::Government => "lightgray",
634        Domain::CrossDomain => "lightcoral",
635    }
636}
637
638/// Get node shape for domain (for DOT export)
639fn domain_shape(domain: Domain) -> &'static str {
640    match domain {
641        Domain::Climate => "circle",
642        Domain::Finance => "box",
643        Domain::Research => "diamond",
644        Domain::Medical => "ellipse",
645        Domain::Economic => "octagon",
646        Domain::Genomics => "pentagon",
647        Domain::Physics => "triangle",
648        Domain::Seismic => "invtriangle",
649        Domain::Ocean => "trapezium",
650        Domain::Space => "star",
651        Domain::Transportation => "house",
652        Domain::Geospatial => "invhouse",
653        Domain::Government => "folder",
654        Domain::CrossDomain => "hexagon",
655    }
656}
657
658/// Format edge type for export
659fn edge_type_label(edge_type: EdgeType) -> &'static str {
660    match edge_type {
661        EdgeType::Correlation => "correlation",
662        EdgeType::Similarity => "similarity",
663        EdgeType::Citation => "citation",
664        EdgeType::Causal => "causal",
665        EdgeType::CrossDomain => "cross_domain",
666    }
667}
668
669impl From<std::io::Error> for FrameworkError {
670    fn from(err: std::io::Error) -> Self {
671        FrameworkError::Config(format!("I/O error: {}", err))
672    }
673}
674
675#[cfg(test)]
676mod tests {
677    use super::*;
678
679    #[test]
680    fn test_csv_escape() {
681        assert_eq!(csv_escape("simple"), "simple");
682        assert_eq!(csv_escape("with,comma"), "\"with,comma\"");
683        assert_eq!(csv_escape("with\"quote"), "\"with\"\"quote\"");
684    }
685
686    #[test]
687    fn test_domain_color() {
688        assert_eq!(domain_color(Domain::Climate), "lightblue");
689        assert_eq!(domain_color(Domain::Finance), "lightgreen");
690    }
691
692    #[test]
693    fn test_export_filter() {
694        let filter = ExportFilter::domain(Domain::Climate);
695        assert!(filter.domains.is_some());
696
697        let combined = filter.and(ExportFilter::min_weight(0.5));
698        assert_eq!(combined.min_edge_weight, Some(0.5));
699    }
700}