Skip to main content

magellan/graph/
export.rs

1//! Export functionality for CodeGraph
2//!
3//! Exports graph data to JSON/JSONL/CSV/SCIP format for LLM and pipeline consumption.
4//!
5//! # Export Schema Versioning
6//!
7//! All export formats include a version field for parsing stability:
8//!
9//! | Version | Changes |
10//! |---------|---------|
11//! | 2.0.0 | Added `symbol_id`, `canonical_fqn`, `display_fqn` fields |
12//!
13//! - **JSON**: Top-level `version` field
14//! - **JSONL**: First line is `{"type":"Version","version":"2.0.0"}`
15//! - **CSV**: Header comment `# Magellan Export Version: 2.0.0`
16//!
17//! See MANUAL.md section 3.8 for detailed export documentation.
18
19pub mod scip;
20
21use anyhow::Result;
22use base64::Engine;
23use serde::{Deserialize, Serialize};
24use sqlitegraph::{BackendDirection, GraphBackend, NeighborQuery, SnapshotId};
25
26
27
28use super::{CallNode, CodeGraph, FileNode, ReferenceNode, SymbolNode};
29use crate::graph::query::{collision_groups, CollisionField};
30
31/// Export format options
32///
33/// Dot, Csv, Scip, and Lsif are available export formats.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum ExportFormat {
36    /// Standard JSON array format
37    Json,
38    /// JSON Lines format (one JSON record per line)
39    JsonL,
40    /// Graphviz DOT format
41    Dot,
42    /// CSV format
43    Csv,
44    /// SCIP (Source Code Intelligence Protocol) binary format
45    Scip,
46    /// LSIF (Language Server Index Format) for cross-repo navigation
47    Lsif,
48}
49
50impl ExportFormat {
51    /// Parse from string
52    pub fn from_str(s: &str) -> Option<Self> {
53        match s.to_lowercase().as_str() {
54            "json" => Some(ExportFormat::Json),
55            "jsonl" => Some(ExportFormat::JsonL),
56            "dot" => Some(ExportFormat::Dot),
57            "csv" => Some(ExportFormat::Csv),
58            "scip" => Some(ExportFormat::Scip),
59            "lsif" => Some(ExportFormat::Lsif),
60            _ => None,
61        }
62    }
63}
64
65/// Configuration for graph export
66#[derive(Debug, Clone)]
67pub struct ExportConfig {
68    /// Output format
69    pub format: ExportFormat,
70    /// Include symbols in export
71    pub include_symbols: bool,
72    /// Include references in export
73    pub include_references: bool,
74    /// Include calls in export
75    pub include_calls: bool,
76    /// Use minified JSON (no pretty-printing)
77    pub minify: bool,
78    /// Filters for export (file, symbol, kind, max_depth, cluster)
79    pub filters: ExportFilters,
80    /// Include collision groups in JSON export
81    pub include_collisions: bool,
82    /// Field used to group collisions
83    pub collisions_field: CollisionField,
84}
85
86/// Export filters for DOT export
87///
88/// Filters allow restricting the exported graph to specific files,
89/// symbols, or limiting traversal depth.
90#[derive(Debug, Clone, Default)]
91pub struct ExportFilters {
92    /// Only include calls from/to symbols in this file path
93    pub file: Option<String>,
94    /// Only include calls from/to this specific symbol name
95    pub symbol: Option<String>,
96    /// Only include symbols of this kind (e.g., "Function", "Method")
97    pub kind: Option<String>,
98    /// Maximum depth for call graph traversal (None = unlimited)
99    pub max_depth: Option<usize>,
100    /// Group nodes by file in subgraphs (DOT cluster feature)
101    pub cluster: bool,
102}
103
104/// Escape a string for use as a DOT label
105///
106/// DOT labels must be wrapped in double quotes and escape special characters.
107/// According to the DOT specification:
108/// - Backslashes must be escaped as \\
109/// - Double quotes must be escaped as \"
110/// - Newlines can be represented as \n for labels
111///
112/// # Arguments
113/// * `s` - The string to escape
114///
115/// # Returns
116/// A quoted and escaped string suitable for use as a DOT label
117fn escape_dot_label(s: &str) -> String {
118    format!(
119        "\"{}\"",
120        s.replace('\\', "\\\\")
121            .replace('"', r#"\""#)
122            .replace('\n', "\\n")
123    )
124}
125
126/// Create a valid DOT identifier from a string
127///
128/// DOT identifiers should not contain special characters.
129/// If symbol_id is available, it's used as a stable identifier.
130/// Otherwise, falls back to a sanitized name.
131///
132/// # Arguments
133/// * `symbol_id` - Optional stable symbol ID
134/// * `name` - Symbol name to use as fallback
135///
136/// # Returns
137/// A valid DOT identifier string
138fn escape_dot_id(symbol_id: &Option<String>, name: &str) -> String {
139    if let Some(ref id) = symbol_id {
140        // SHA-256 based IDs are already safe (hex only)
141        id.clone()
142    } else {
143        // Sanitize name: replace non-alphanumeric with underscore
144        name.chars()
145            .map(|c| if c.is_alphanumeric() { c } else { '_' })
146            .collect()
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153    use crate::graph::query::CollisionField;
154
155    #[test]
156    fn test_escape_dot_label_basic() {
157        assert_eq!(escape_dot_label("simple"), "\"simple\"");
158        assert_eq!(escape_dot_label("with spaces"), "\"with spaces\"");
159    }
160
161    #[test]
162    fn test_escape_dot_label_quotes() {
163        assert_eq!(escape_dot_label("say \"hello\""), r#""say \"hello\"""#);
164    }
165
166    #[test]
167    fn test_escape_dot_label_backslash() {
168        assert_eq!(escape_dot_label(r"C:\path"), r#""C:\\path""#);
169        assert_eq!(escape_dot_label("a\\b"), r#""a\\b""#);
170    }
171
172    #[test]
173    fn test_escape_dot_label_newlines() {
174        assert_eq!(escape_dot_label("line1\nline2"), r#""line1\nline2""#);
175    }
176
177    #[test]
178    fn test_escape_dot_label_empty() {
179        assert_eq!(escape_dot_label(""), "\"\"");
180    }
181
182    #[test]
183    fn test_escape_dot_label_special_chars() {
184        // Tabs and other special characters
185        assert_eq!(escape_dot_label("a\tb"), "\"a\tb\"");
186        // Unicode characters should pass through
187        assert_eq!(escape_dot_label("hello世界"), "\"hello世界\"");
188    }
189
190    #[test]
191    fn test_escape_dot_id_with_symbol_id() {
192        // Symbol ID (hex) is used directly
193        let symbol_id = Some("a1b2c3d4e5f6".to_string());
194        assert_eq!(escape_dot_id(&symbol_id, "fallback"), "a1b2c3d4e5f6");
195    }
196
197    #[test]
198    fn test_escape_dot_id_without_symbol_id() {
199        // Falls back to sanitized name
200        assert_eq!(escape_dot_id(&None, "simple_name"), "simple_name");
201        assert_eq!(escape_dot_id(&None, "name-with-dashes"), "name_with_dashes");
202        assert_eq!(escape_dot_id(&None, "name.with.dots"), "name_with_dots");
203        assert_eq!(escape_dot_id(&None, "name with spaces"), "name_with_spaces");
204    }
205
206    #[test]
207    fn test_escape_dot_id_empty_name() {
208        assert_eq!(escape_dot_id(&None, ""), "");
209    }
210
211    #[test]
212    fn test_export_collisions_included_when_enabled() {
213        let temp_dir = tempfile::TempDir::new().unwrap();
214        let db_path = temp_dir.path().join("test.db");
215        let mut graph = CodeGraph::open(&db_path).unwrap();
216
217        let file1 = temp_dir.path().join("file1.rs");
218        std::fs::write(&file1, "fn collide() {}\n").unwrap();
219        let file2 = temp_dir.path().join("file2.rs");
220        std::fs::write(&file2, "fn collide() {}\n").unwrap();
221
222        let path1 = file1.to_string_lossy().to_string();
223        let path2 = file2.to_string_lossy().to_string();
224        let source1 = std::fs::read(&file1).unwrap();
225        let source2 = std::fs::read(&file2).unwrap();
226
227        graph.index_file(&path1, &source1).unwrap();
228        graph.index_file(&path2, &source2).unwrap();
229
230        let config = ExportConfig {
231            format: ExportFormat::Json,
232            include_symbols: true,
233            include_references: false,
234            include_calls: false,
235            minify: false,
236            filters: ExportFilters::default(),
237            include_collisions: true,
238            collisions_field: CollisionField::Fqn,
239        };
240
241        let json = export_graph(&mut graph, &config).unwrap();
242        let export: GraphExport = serde_json::from_str(&json).unwrap();
243        assert!(!export.collisions.is_empty());
244    }
245
246    #[test]
247    fn test_csv_export_mixed_record_types() {
248        let temp_dir = tempfile::TempDir::new().unwrap();
249        let db_path = temp_dir.path().join("test.db");
250        let mut graph = CodeGraph::open(&db_path).unwrap();
251
252        // Create a file with symbols, references, and calls
253        let file1 = temp_dir.path().join("test.rs");
254        std::fs::write(
255            &file1,
256            r#"
257fn main() {
258    println!("hello");
259    helper();
260}
261
262fn helper() {}
263"#,
264        )
265        .unwrap();
266
267        let path1 = file1.to_string_lossy().to_string();
268        let source1 = std::fs::read(&file1).unwrap();
269        graph.index_file(&path1, &source1).unwrap();
270
271        // Export to CSV with all record types
272        let config = ExportConfig {
273            format: ExportFormat::Csv,
274            include_symbols: true,
275            include_references: true,
276            include_calls: true,
277            minify: false,
278            filters: ExportFilters::default(),
279            include_collisions: false,
280            collisions_field: CollisionField::Fqn,
281        };
282
283        let csv = export_graph(&mut graph, &config).unwrap();
284
285        // Verify CSV output
286        let lines: Vec<&str> = csv.lines().collect();
287        assert!(lines.len() > 1, "CSV should have header + data rows");
288
289        // Check header contains all expected columns
290        // The first line is a comment, so find the actual CSV header
291        let header = lines.iter().find(|line| !line.starts_with('#') && !line.is_empty())
292            .expect("Should have a CSV header row");
293        assert!(header.contains("record_type"));
294        assert!(header.contains("file"));
295        assert!(header.contains("symbol_id"));
296        assert!(header.contains("name"));
297        assert!(header.contains("kind"));
298        assert!(header.contains("referenced_symbol"));
299        assert!(header.contains("target_symbol_id"));
300        assert!(header.contains("caller"));
301        assert!(header.contains("callee"));
302        assert!(header.contains("caller_symbol_id"));
303        assert!(header.contains("callee_symbol_id"));
304
305        // Verify all data rows have the same number of columns
306        let header_cols: Vec<&str> = header.split(',').collect();
307        let expected_col_count = header_cols.len();
308
309        for (i, line) in lines.iter().skip(1).enumerate() {
310            if line.is_empty() || line.starts_with('#') {
311                continue;
312            }
313            let data_cols: Vec<&str> = line.split(',').collect();
314            assert_eq!(
315                data_cols.len(),
316                expected_col_count,
317                "Row {} has {} columns, expected {}",
318                i + 2,
319                data_cols.len(),
320                expected_col_count
321            );
322        }
323
324        // Verify version header is present
325        assert!(csv.starts_with("# Magellan Export Version: 2.0.0"));
326    }
327}
328
329impl Default for ExportConfig {
330    fn default() -> Self {
331        ExportConfig {
332            format: ExportFormat::Json,
333            include_symbols: true,
334            include_references: true,
335            include_calls: true,
336            minify: false,
337            filters: ExportFilters::default(),
338            include_collisions: false,
339            collisions_field: CollisionField::Fqn,
340        }
341    }
342}
343
344impl ExportConfig {
345    /// Create a new export config with the specified format
346    pub fn new(format: ExportFormat) -> Self {
347        ExportConfig {
348            format,
349            ..Default::default()
350        }
351    }
352
353    /// Set whether to include symbols
354    pub fn with_symbols(mut self, include: bool) -> Self {
355        self.include_symbols = include;
356        self
357    }
358
359    /// Set whether to include references
360    pub fn with_references(mut self, include: bool) -> Self {
361        self.include_references = include;
362        self
363    }
364
365    /// Set whether to include calls
366    pub fn with_calls(mut self, include: bool) -> Self {
367        self.include_calls = include;
368        self
369    }
370
371    /// Set whether to minify JSON output
372    pub fn with_minify(mut self, minify: bool) -> Self {
373        self.minify = minify;
374        self
375    }
376}
377
378/// JSON export structure containing all graph data
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct GraphExport {
381    /// Export schema version for parsing stability
382    pub version: String,
383    pub files: Vec<FileExport>,
384    pub symbols: Vec<SymbolExport>,
385    pub references: Vec<ReferenceExport>,
386    pub calls: Vec<CallExport>,
387    #[serde(default, skip_serializing_if = "Vec::is_empty")]
388    pub collisions: Vec<CollisionExport>,
389}
390
391/// File entry for JSON export
392#[derive(Debug, Clone, Serialize, Deserialize)]
393pub struct FileExport {
394    pub path: String,
395    pub hash: String,
396}
397
398/// Symbol entry for JSON export
399#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct SymbolExport {
401    /// Stable symbol ID for cross-run correlation
402    #[serde(default)]
403    pub symbol_id: Option<String>,
404
405    /// Canonical fully-qualified name for unambiguous identity
406    #[serde(default)]
407    pub canonical_fqn: Option<String>,
408
409    /// Display fully-qualified name for human-readable output
410    #[serde(default)]
411    pub display_fqn: Option<String>,
412
413    pub name: Option<String>,
414    pub kind: String,
415    pub kind_normalized: Option<String>,
416    pub file: String,
417    pub byte_start: usize,
418    pub byte_end: usize,
419    pub start_line: usize,
420    pub start_col: usize,
421    pub end_line: usize,
422    pub end_col: usize,
423}
424
425/// Reference entry for JSON export
426#[derive(Debug, Clone, Serialize, Deserialize)]
427pub struct ReferenceExport {
428    pub file: String,
429    pub referenced_symbol: String,
430    /// Stable ID of referenced symbol
431    #[serde(default)]
432    pub target_symbol_id: Option<String>,
433    pub byte_start: usize,
434    pub byte_end: usize,
435    pub start_line: usize,
436    pub start_col: usize,
437    pub end_line: usize,
438    pub end_col: usize,
439}
440
441/// Call entry for JSON export
442#[derive(Debug, Clone, Serialize, Deserialize)]
443pub struct CallExport {
444    pub file: String,
445    pub caller: String,
446    pub callee: String,
447    /// Stable ID of caller symbol
448    #[serde(default)]
449    pub caller_symbol_id: Option<String>,
450    /// Stable ID of callee symbol
451    #[serde(default)]
452    pub callee_symbol_id: Option<String>,
453    pub byte_start: usize,
454    pub byte_end: usize,
455    pub start_line: usize,
456    pub start_col: usize,
457    pub end_line: usize,
458    pub end_col: usize,
459}
460
461/// Collision candidate entry for JSON export
462#[derive(Debug, Clone, Serialize, Deserialize)]
463pub struct CollisionCandidateExport {
464    pub entity_id: i64,
465    pub symbol_id: Option<String>,
466    pub canonical_fqn: Option<String>,
467    pub display_fqn: Option<String>,
468    pub name: Option<String>,
469    pub file_path: Option<String>,
470}
471
472/// Collision group entry for JSON export
473#[derive(Debug, Clone, Serialize, Deserialize)]
474pub struct CollisionExport {
475    pub field: String,
476    pub value: String,
477    pub count: usize,
478    pub candidates: Vec<CollisionCandidateExport>,
479}
480
481fn build_collision_exports(
482    graph: &mut CodeGraph,
483    field: CollisionField,
484    limit: usize,
485) -> Result<Vec<CollisionExport>> {
486    let groups = collision_groups(graph, field, limit)?;
487    let mut exports = Vec::new();
488
489    for group in groups {
490        let candidates = group
491            .candidates
492            .into_iter()
493            .map(|candidate| CollisionCandidateExport {
494                entity_id: candidate.entity_id,
495                symbol_id: candidate.symbol_id,
496                canonical_fqn: candidate.canonical_fqn,
497                display_fqn: candidate.display_fqn,
498                name: candidate.name,
499                file_path: candidate.file_path,
500            })
501            .collect();
502
503        exports.push(CollisionExport {
504            field: group.field,
505            value: group.value,
506            count: group.count,
507            candidates,
508        });
509    }
510
511    Ok(exports)
512}
513
514/// Export all graph data to JSON format
515///
516/// Note: This function loads all data into memory before serialization.
517/// For large graphs, use stream_json() instead to reduce peak memory.
518///
519/// # Returns
520/// JSON string containing all files, symbols, references, and calls
521pub fn export_json(graph: &mut CodeGraph) -> Result<String> {
522    let mut files = Vec::new();
523    let mut symbols = Vec::new();
524    let mut references = Vec::new();
525    let mut calls = Vec::new();
526    let collisions: Vec<CollisionExport> = Vec::new();
527
528    // Get all entity IDs from the graph
529    let entity_ids = graph.files.backend.entity_ids()?;
530    let snapshot = SnapshotId::current();
531
532    // Process each entity
533    for entity_id in entity_ids {
534        let entity = graph.files.backend.get_node(snapshot, entity_id)?;
535
536        match entity.kind.as_str() {
537            "File" => {
538                if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
539                    files.push(FileExport {
540                        path: file_node.path,
541                        hash: file_node.hash,
542                    });
543                }
544            }
545            "Symbol" => {
546                if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(entity.data.clone()) {
547                    // Get file path from DEFINES edge (incoming from File)
548                    let file = get_file_path_from_symbol(graph, entity_id)?;
549
550                    symbols.push(SymbolExport {
551                        symbol_id: symbol_node.symbol_id,
552                        canonical_fqn: symbol_node.canonical_fqn,
553                        display_fqn: symbol_node.display_fqn,
554                        name: symbol_node.name,
555                        kind: symbol_node.kind,
556                        kind_normalized: symbol_node.kind_normalized,
557                        file,
558                        byte_start: symbol_node.byte_start,
559                        byte_end: symbol_node.byte_end,
560                        start_line: symbol_node.start_line,
561                        start_col: symbol_node.start_col,
562                        end_line: symbol_node.end_line,
563                        end_col: symbol_node.end_col,
564                    });
565                }
566            }
567            "Reference" => {
568                if let Ok(ref_node) = serde_json::from_value::<ReferenceNode>(entity.data.clone()) {
569                    // Extract symbol name from entity name (format: "ref to {symbol_name}")
570                    let referenced_symbol = entity
571                        .name
572                        .strip_prefix("ref to ")
573                        .unwrap_or("")
574                        .to_string();
575
576                    references.push(ReferenceExport {
577                        file: ref_node.file,
578                        referenced_symbol,
579                        target_symbol_id: None, // Would need symbol lookup; defer to Task 3
580                        byte_start: ref_node.byte_start as usize,
581                        byte_end: ref_node.byte_end as usize,
582                        start_line: ref_node.start_line as usize,
583                        start_col: ref_node.start_col as usize,
584                        end_line: ref_node.end_line as usize,
585                        end_col: ref_node.end_col as usize,
586                    });
587                }
588            }
589            "Call" => {
590                if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
591                    calls.push(CallExport {
592                        file: call_node.file,
593                        caller: call_node.caller,
594                        callee: call_node.callee,
595                        caller_symbol_id: call_node.caller_symbol_id,
596                        callee_symbol_id: call_node.callee_symbol_id,
597                        byte_start: call_node.byte_start as usize,
598                        byte_end: call_node.byte_end as usize,
599                        start_line: call_node.start_line as usize,
600                        start_col: call_node.start_col as usize,
601                        end_line: call_node.end_line as usize,
602                        end_col: call_node.end_col as usize,
603                    });
604                }
605            }
606            _ => {
607                // Ignore unknown node types
608            }
609        }
610    }
611
612    // Sort for deterministic output
613    files.sort_by(|a, b| a.path.cmp(&b.path));
614    symbols.sort_by(|a, b| (&a.file, &a.name).cmp(&(&b.file, &b.name)));
615    references
616        .sort_by(|a, b| (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol)));
617    calls.sort_by(|a, b| (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee)));
618
619    let export = GraphExport {
620        version: "2.0.0".to_string(),
621        files,
622        symbols,
623        references,
624        calls,
625        collisions,
626    };
627
628    Ok(serde_json::to_string_pretty(&export)?)
629}
630
631/// Stream all graph data to JSON format with reduced memory footprint
632///
633/// This function writes JSON incrementally to avoid loading all data into memory.
634/// It collects entities into vectors for sorting (deterministic output), but uses
635/// serde_json::to_writer for streaming serialization instead of to_string.
636///
637/// # Arguments
638/// * `graph` - The code graph to export
639/// * `config` - Export configuration (include_symbols, include_references, include_calls)
640/// * `writer` - Writer to receive JSON output
641///
642/// # Returns
643/// Result indicating success or failure
644pub fn stream_json<W: std::io::Write>(
645    graph: &mut CodeGraph,
646    config: &ExportConfig,
647    writer: &mut W,
648) -> Result<()> {
649    let mut files = Vec::new();
650    let mut symbols = Vec::new();
651    let mut references = Vec::new();
652    let mut calls = Vec::new();
653    let mut collisions = Vec::new();
654
655    // Get all entity IDs from the graph
656    let entity_ids = graph.files.backend.entity_ids()?;
657    let snapshot = SnapshotId::current();
658
659    // Process each entity
660    for entity_id in entity_ids {
661        let entity = graph.files.backend.get_node(snapshot, entity_id)?;
662
663        match entity.kind.as_str() {
664            "File" => {
665                if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
666                    files.push(FileExport {
667                        path: file_node.path,
668                        hash: file_node.hash,
669                    });
670                }
671            }
672            "Symbol" => {
673                if config.include_symbols {
674                    if let Ok(symbol_node) =
675                        serde_json::from_value::<SymbolNode>(entity.data.clone())
676                    {
677                        let file = get_file_path_from_symbol(graph, entity_id)?;
678                        symbols.push(SymbolExport {
679                            symbol_id: symbol_node.symbol_id,
680                            canonical_fqn: symbol_node.canonical_fqn,
681                            display_fqn: symbol_node.display_fqn,
682                            name: symbol_node.name,
683                            kind: symbol_node.kind,
684                            kind_normalized: symbol_node.kind_normalized,
685                            file,
686                            byte_start: symbol_node.byte_start,
687                            byte_end: symbol_node.byte_end,
688                            start_line: symbol_node.start_line,
689                            start_col: symbol_node.start_col,
690                            end_line: symbol_node.end_line,
691                            end_col: symbol_node.end_col,
692                        });
693                    }
694                }
695            }
696            "Reference" => {
697                if config.include_references {
698                    if let Ok(ref_node) =
699                        serde_json::from_value::<ReferenceNode>(entity.data.clone())
700                    {
701                        let referenced_symbol = entity
702                            .name
703                            .strip_prefix("ref to ")
704                            .unwrap_or("")
705                            .to_string();
706
707                        references.push(ReferenceExport {
708                            file: ref_node.file,
709                            referenced_symbol,
710                            target_symbol_id: None,
711                            byte_start: ref_node.byte_start as usize,
712                            byte_end: ref_node.byte_end as usize,
713                            start_line: ref_node.start_line as usize,
714                            start_col: ref_node.start_col as usize,
715                            end_line: ref_node.end_line as usize,
716                            end_col: ref_node.end_col as usize,
717                        });
718                    }
719                }
720            }
721            "Call" => {
722                if config.include_calls {
723                    if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
724                        calls.push(CallExport {
725                            file: call_node.file,
726                            caller: call_node.caller,
727                            callee: call_node.callee,
728                            caller_symbol_id: call_node.caller_symbol_id,
729                            callee_symbol_id: call_node.callee_symbol_id,
730                            byte_start: call_node.byte_start as usize,
731                            byte_end: call_node.byte_end as usize,
732                            start_line: call_node.start_line as usize,
733                            start_col: call_node.start_col as usize,
734                            end_line: call_node.end_line as usize,
735                            end_col: call_node.end_col as usize,
736                        });
737                    }
738                }
739            }
740            _ => {
741                // Ignore unknown node types
742            }
743        }
744    }
745
746    if config.include_collisions {
747        collisions = build_collision_exports(graph, config.collisions_field, usize::MAX)?;
748    }
749
750    // Sort for deterministic output
751    files.sort_by(|a, b| a.path.cmp(&b.path));
752    symbols.sort_by(|a, b| (&a.file, &a.name).cmp(&(&b.file, &b.name)));
753    references
754        .sort_by(|a, b| (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol)));
755    calls.sort_by(|a, b| (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee)));
756
757    let export = GraphExport {
758        version: "2.0.0".to_string(), // v1.5 adds symbol_id, canonical_fqn, display_fqn
759        files,
760        symbols,
761        references,
762        calls,
763        collisions,
764    };
765
766    // Stream to writer instead of returning String
767    serde_json::to_writer_pretty(writer, &export).map_err(Into::into)
768}
769
770/// Stream all graph data to JSON format with minified output
771///
772/// This function writes JSON incrementally to avoid loading all data into memory.
773/// Uses compact serialization (no pretty-printing) for smaller output size.
774///
775/// # Arguments
776/// * `graph` - The code graph to export
777/// * `config` - Export configuration (include_symbols, include_references, include_calls)
778/// * `writer` - Writer to receive JSON output
779///
780/// # Returns
781/// Result indicating success or failure
782pub fn stream_json_minified<W: std::io::Write>(
783    graph: &mut CodeGraph,
784    config: &ExportConfig,
785    writer: &mut W,
786) -> Result<()> {
787    let mut files = Vec::new();
788    let mut symbols = Vec::new();
789    let mut references = Vec::new();
790    let mut calls = Vec::new();
791    let mut collisions = Vec::new();
792
793    // Get all entity IDs from the graph
794    let entity_ids = graph.files.backend.entity_ids()?;
795    let snapshot = SnapshotId::current();
796
797    // Process each entity
798    for entity_id in entity_ids {
799        let entity = graph.files.backend.get_node(snapshot, entity_id)?;
800
801        match entity.kind.as_str() {
802            "File" => {
803                if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
804                    files.push(FileExport {
805                        path: file_node.path,
806                        hash: file_node.hash,
807                    });
808                }
809            }
810            "Symbol" => {
811                if config.include_symbols {
812                    if let Ok(symbol_node) =
813                        serde_json::from_value::<SymbolNode>(entity.data.clone())
814                    {
815                        let file = get_file_path_from_symbol(graph, entity_id)?;
816                        symbols.push(SymbolExport {
817                            symbol_id: symbol_node.symbol_id,
818                            canonical_fqn: symbol_node.canonical_fqn,
819                            display_fqn: symbol_node.display_fqn,
820                            name: symbol_node.name,
821                            kind: symbol_node.kind,
822                            kind_normalized: symbol_node.kind_normalized,
823                            file,
824                            byte_start: symbol_node.byte_start,
825                            byte_end: symbol_node.byte_end,
826                            start_line: symbol_node.start_line,
827                            start_col: symbol_node.start_col,
828                            end_line: symbol_node.end_line,
829                            end_col: symbol_node.end_col,
830                        });
831                    }
832                }
833            }
834            "Reference" => {
835                if config.include_references {
836                    if let Ok(ref_node) =
837                        serde_json::from_value::<ReferenceNode>(entity.data.clone())
838                    {
839                        let referenced_symbol = entity
840                            .name
841                            .strip_prefix("ref to ")
842                            .unwrap_or("")
843                            .to_string();
844
845                        references.push(ReferenceExport {
846                            file: ref_node.file,
847                            referenced_symbol,
848                            target_symbol_id: None,
849                            byte_start: ref_node.byte_start as usize,
850                            byte_end: ref_node.byte_end as usize,
851                            start_line: ref_node.start_line as usize,
852                            start_col: ref_node.start_col as usize,
853                            end_line: ref_node.end_line as usize,
854                            end_col: ref_node.end_col as usize,
855                        });
856                    }
857                }
858            }
859            "Call" => {
860                if config.include_calls {
861                    if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
862                        calls.push(CallExport {
863                            file: call_node.file,
864                            caller: call_node.caller,
865                            callee: call_node.callee,
866                            caller_symbol_id: call_node.caller_symbol_id,
867                            callee_symbol_id: call_node.callee_symbol_id,
868                            byte_start: call_node.byte_start as usize,
869                            byte_end: call_node.byte_end as usize,
870                            start_line: call_node.start_line as usize,
871                            start_col: call_node.start_col as usize,
872                            end_line: call_node.end_line as usize,
873                            end_col: call_node.end_col as usize,
874                        });
875                    }
876                }
877            }
878            _ => {
879                // Ignore unknown node types
880            }
881        }
882    }
883
884    if config.include_collisions {
885        collisions = build_collision_exports(graph, config.collisions_field, usize::MAX)?;
886    }
887
888    // Sort for deterministic output
889    files.sort_by(|a, b| a.path.cmp(&b.path));
890    symbols.sort_by(|a, b| (&a.file, &a.name).cmp(&(&b.file, &b.name)));
891    references
892        .sort_by(|a, b| (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol)));
893    calls.sort_by(|a, b| (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee)));
894
895    let export = GraphExport {
896        version: "2.0.0".to_string(), // v1.5 adds symbol_id, canonical_fqn, display_fqn
897        files,
898        symbols,
899        references,
900        calls,
901        collisions,
902    };
903
904    // Stream to writer using compact serialization (minified)
905    serde_json::to_writer(writer, &export).map_err(Into::into)
906}
907
908/// Get the file path for a symbol by following DEFINES edge
909fn get_file_path_from_symbol(graph: &mut CodeGraph, symbol_id: i64) -> Result<String> {
910    // Query incoming DEFINES edges to find the File node
911    let snapshot = SnapshotId::current();
912    let file_ids = graph.files.backend.neighbors(
913        snapshot,
914        symbol_id,
915        NeighborQuery {
916            direction: BackendDirection::Incoming,
917            edge_type: Some("DEFINES".to_string()),
918        },
919    )?;
920
921    if let Some(file_id) = file_ids.first() {
922        let entity = graph.files.backend.get_node(snapshot, *file_id)?;
923        if entity.kind == "File" {
924            if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data) {
925                return Ok(file_node.path);
926            }
927        }
928    }
929
930    // Fallback: return empty string if no file found
931    Ok(String::new())
932}
933
934/// JSONL record type discriminator
935///
936/// Each JSONL line includes a "type" field to identify the record type.
937#[derive(Debug, Clone, Serialize, Deserialize)]
938#[serde(tag = "type")]
939enum JsonlRecord {
940    Version { version: String },
941    File(FileExport),
942    Symbol(SymbolExport),
943    Reference(ReferenceExport),
944    Call(CallExport),
945}
946
947/// Export all graph data to JSONL format
948///
949/// JSONL (JSON Lines) format: one compact JSON object per line.
950/// Each line includes a "type" field for record identification.
951///
952/// # Returns
953/// JSONL string with one record per line, deterministically sorted
954pub fn export_jsonl(graph: &mut CodeGraph) -> Result<String> {
955    let mut records = Vec::new();
956
957    // Add version record first
958    records.push(JsonlRecord::Version {
959        version: "2.0.0".to_string(),
960    });
961
962    // Get all entity IDs from the graph
963    let entity_ids = graph.files.backend.entity_ids()?;
964    let snapshot = SnapshotId::current();
965
966    // Process each entity and create typed records
967    for entity_id in entity_ids {
968        let entity = graph.files.backend.get_node(snapshot, entity_id)?;
969
970        match entity.kind.as_str() {
971            "File" => {
972                if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
973                    records.push(JsonlRecord::File(FileExport {
974                        path: file_node.path,
975                        hash: file_node.hash,
976                    }));
977                }
978            }
979            "Symbol" => {
980                if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(entity.data.clone()) {
981                    let file = get_file_path_from_symbol(graph, entity_id)?;
982                    records.push(JsonlRecord::Symbol(SymbolExport {
983                        symbol_id: symbol_node.symbol_id,
984                        canonical_fqn: symbol_node.canonical_fqn,
985                        display_fqn: symbol_node.display_fqn,
986                        name: symbol_node.name,
987                        kind: symbol_node.kind,
988                        kind_normalized: symbol_node.kind_normalized,
989                        file,
990                        byte_start: symbol_node.byte_start,
991                        byte_end: symbol_node.byte_end,
992                        start_line: symbol_node.start_line,
993                        start_col: symbol_node.start_col,
994                        end_line: symbol_node.end_line,
995                        end_col: symbol_node.end_col,
996                    }));
997                }
998            }
999            "Reference" => {
1000                if let Ok(ref_node) = serde_json::from_value::<ReferenceNode>(entity.data.clone()) {
1001                    let referenced_symbol = entity
1002                        .name
1003                        .strip_prefix("ref to ")
1004                        .unwrap_or("")
1005                        .to_string();
1006
1007                    records.push(JsonlRecord::Reference(ReferenceExport {
1008                        file: ref_node.file,
1009                        referenced_symbol,
1010                        target_symbol_id: None,
1011                        byte_start: ref_node.byte_start as usize,
1012                        byte_end: ref_node.byte_end as usize,
1013                        start_line: ref_node.start_line as usize,
1014                        start_col: ref_node.start_col as usize,
1015                        end_line: ref_node.end_line as usize,
1016                        end_col: ref_node.end_col as usize,
1017                    }));
1018                }
1019            }
1020            "Call" => {
1021                if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
1022                    records.push(JsonlRecord::Call(CallExport {
1023                        file: call_node.file,
1024                        caller: call_node.caller,
1025                        callee: call_node.callee,
1026                        caller_symbol_id: call_node.caller_symbol_id,
1027                        callee_symbol_id: call_node.callee_symbol_id,
1028                        byte_start: call_node.byte_start as usize,
1029                        byte_end: call_node.byte_end as usize,
1030                        start_line: call_node.start_line as usize,
1031                        start_col: call_node.start_col as usize,
1032                        end_line: call_node.end_line as usize,
1033                        end_col: call_node.end_col as usize,
1034                    }));
1035                }
1036            }
1037            _ => {
1038                // Ignore unknown node types
1039            }
1040        }
1041    }
1042
1043    // Sort deterministically before output
1044    records.sort_by(|a, b| match (a, b) {
1045        (JsonlRecord::Version { .. }, _) => std::cmp::Ordering::Less,
1046        (_, JsonlRecord::Version { .. }) => std::cmp::Ordering::Greater,
1047        (JsonlRecord::File(a), JsonlRecord::File(b)) => a.path.cmp(&b.path),
1048        (JsonlRecord::Symbol(a), JsonlRecord::Symbol(b)) => {
1049            (&a.file, &a.name).cmp(&(&b.file, &b.name))
1050        }
1051        (JsonlRecord::Reference(a), JsonlRecord::Reference(b)) => {
1052            (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol))
1053        }
1054        (JsonlRecord::Call(a), JsonlRecord::Call(b)) => {
1055            (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee))
1056        }
1057        // Type ordering: File < Symbol < Reference < Call
1058        (JsonlRecord::File(_), _) => std::cmp::Ordering::Less,
1059        (_, JsonlRecord::File(_)) => std::cmp::Ordering::Greater,
1060        (JsonlRecord::Symbol(_), _) => std::cmp::Ordering::Less,
1061        (_, JsonlRecord::Symbol(_)) => std::cmp::Ordering::Greater,
1062        (JsonlRecord::Reference(_), _) => std::cmp::Ordering::Less,
1063        (_, JsonlRecord::Reference(_)) => std::cmp::Ordering::Greater,
1064    });
1065
1066    // Serialize each record to compact JSON and join with newlines
1067    let lines: Result<Vec<String>, _> = records.iter().map(serde_json::to_string).collect();
1068    let lines = lines?;
1069
1070    Ok(lines.join("\n"))
1071}
1072
1073/// Stream all graph data to JSONL format with reduced memory footprint
1074///
1075/// This function writes JSONL incrementally to avoid loading all data into memory.
1076/// Each line is written as it's serialized, reducing peak memory for large graphs.
1077///
1078/// # Arguments
1079/// * `graph` - The code graph to export
1080/// * `config` - Export configuration (include_symbols, include_references, include_calls)
1081/// * `writer` - Writer to receive JSONL output
1082///
1083/// # Returns
1084/// Result indicating success or failure
1085pub fn stream_ndjson<W: std::io::Write>(
1086    graph: &mut CodeGraph,
1087    config: &ExportConfig,
1088    writer: &mut W,
1089) -> Result<()> {
1090    let mut records = Vec::new();
1091
1092    // Add version record first
1093    records.push(JsonlRecord::Version {
1094        version: "2.0.0".to_string(),
1095    });
1096
1097    // Get all entity IDs from the graph
1098    let entity_ids = graph.files.backend.entity_ids()?;
1099    let snapshot = SnapshotId::current();
1100
1101    // Process each entity and create typed records
1102    for entity_id in entity_ids {
1103        let entity = graph.files.backend.get_node(snapshot, entity_id)?;
1104
1105        match entity.kind.as_str() {
1106            "File" => {
1107                if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
1108                    records.push(JsonlRecord::File(FileExport {
1109                        path: file_node.path,
1110                        hash: file_node.hash,
1111                    }));
1112                }
1113            }
1114            "Symbol" => {
1115                if config.include_symbols {
1116                    if let Ok(symbol_node) =
1117                        serde_json::from_value::<SymbolNode>(entity.data.clone())
1118                    {
1119                        let file = get_file_path_from_symbol(graph, entity_id)?;
1120                        records.push(JsonlRecord::Symbol(SymbolExport {
1121                            symbol_id: symbol_node.symbol_id,
1122                            canonical_fqn: symbol_node.canonical_fqn,
1123                            display_fqn: symbol_node.display_fqn,
1124                            name: symbol_node.name,
1125                            kind: symbol_node.kind,
1126                            kind_normalized: symbol_node.kind_normalized,
1127                            file,
1128                            byte_start: symbol_node.byte_start,
1129                            byte_end: symbol_node.byte_end,
1130                            start_line: symbol_node.start_line,
1131                            start_col: symbol_node.start_col,
1132                            end_line: symbol_node.end_line,
1133                            end_col: symbol_node.end_col,
1134                        }));
1135                    }
1136                }
1137            }
1138            "Reference" => {
1139                if config.include_references {
1140                    if let Ok(ref_node) =
1141                        serde_json::from_value::<ReferenceNode>(entity.data.clone())
1142                    {
1143                        let referenced_symbol = entity
1144                            .name
1145                            .strip_prefix("ref to ")
1146                            .unwrap_or("")
1147                            .to_string();
1148
1149                        records.push(JsonlRecord::Reference(ReferenceExport {
1150                            file: ref_node.file,
1151                            referenced_symbol,
1152                            target_symbol_id: None,
1153                            byte_start: ref_node.byte_start as usize,
1154                            byte_end: ref_node.byte_end as usize,
1155                            start_line: ref_node.start_line as usize,
1156                            start_col: ref_node.start_col as usize,
1157                            end_line: ref_node.end_line as usize,
1158                            end_col: ref_node.end_col as usize,
1159                        }));
1160                    }
1161                }
1162            }
1163            "Call" => {
1164                if config.include_calls {
1165                    if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
1166                        records.push(JsonlRecord::Call(CallExport {
1167                            file: call_node.file,
1168                            caller: call_node.caller,
1169                            callee: call_node.callee,
1170                            caller_symbol_id: call_node.caller_symbol_id,
1171                            callee_symbol_id: call_node.callee_symbol_id,
1172                            byte_start: call_node.byte_start as usize,
1173                            byte_end: call_node.byte_end as usize,
1174                            start_line: call_node.start_line as usize,
1175                            start_col: call_node.start_col as usize,
1176                            end_line: call_node.end_line as usize,
1177                            end_col: call_node.end_col as usize,
1178                        }));
1179                    }
1180                }
1181            }
1182            _ => {
1183                // Ignore unknown node types
1184            }
1185        }
1186    }
1187
1188    // Sort deterministically before output
1189    records.sort_by(|a, b| match (a, b) {
1190        (JsonlRecord::Version { .. }, _) => std::cmp::Ordering::Less,
1191        (_, JsonlRecord::Version { .. }) => std::cmp::Ordering::Greater,
1192        (JsonlRecord::File(a), JsonlRecord::File(b)) => a.path.cmp(&b.path),
1193        (JsonlRecord::Symbol(a), JsonlRecord::Symbol(b)) => {
1194            (&a.file, &a.name).cmp(&(&b.file, &b.name))
1195        }
1196        (JsonlRecord::Reference(a), JsonlRecord::Reference(b)) => {
1197            (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol))
1198        }
1199        (JsonlRecord::Call(a), JsonlRecord::Call(b)) => {
1200            (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee))
1201        }
1202        // Type ordering: File < Symbol < Reference < Call
1203        (JsonlRecord::File(_), _) => std::cmp::Ordering::Less,
1204        (_, JsonlRecord::File(_)) => std::cmp::Ordering::Greater,
1205        (JsonlRecord::Symbol(_), _) => std::cmp::Ordering::Less,
1206        (_, JsonlRecord::Symbol(_)) => std::cmp::Ordering::Greater,
1207        (JsonlRecord::Reference(_), _) => std::cmp::Ordering::Less,
1208        (_, JsonlRecord::Reference(_)) => std::cmp::Ordering::Greater,
1209    });
1210
1211    // Write each record line by line (streaming)
1212    let mut first = true;
1213    for record in records {
1214        if !first {
1215            writeln!(&mut *writer)?;
1216        }
1217        serde_json::to_writer(&mut *writer, &record)
1218            .map_err(|e| anyhow::anyhow!("JSON serialization error: {}", e))?;
1219        first = false;
1220    }
1221
1222    Ok(())
1223}
1224
1225/// Export call graph to DOT (Graphviz) format
1226///
1227/// Generates a DOT digraph representing the call graph with nodes as symbols
1228/// and edges as call relationships. Output is deterministic for reproducibility.
1229///
1230/// # Arguments
1231/// * `graph` - The code graph to export
1232/// * `config` - Export configuration with filters
1233///
1234/// # Returns
1235/// DOT format string suitable for Graphviz tools
1236///
1237/// # DOT Format Details
1238/// - Uses "strict digraph" for deterministic output
1239/// - Node labels: "{symbol_name}\n{file_path}" (newline for readability)
1240/// - Uses symbol_id as internal identifier if available, fallback to sanitized name
1241/// - Clusters nodes by file if config.filters.cluster is true
1242pub fn export_dot(graph: &mut CodeGraph, config: &ExportConfig) -> Result<String> {
1243    use std::collections::{BTreeMap, BTreeSet};
1244
1245    let mut dot_output = String::from("strict digraph call_graph {\n");
1246    dot_output.push_str("  node [shape=box, style=rounded];\n");
1247
1248    // Collect all Call nodes from the graph
1249    let entity_ids = graph.files.backend.entity_ids()?;
1250    let snapshot = SnapshotId::current();
1251    let mut calls = Vec::new();
1252
1253    for entity_id in entity_ids {
1254        let entity = graph.files.backend.get_node(snapshot, entity_id)?;
1255        if entity.kind == "Call" {
1256            if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data) {
1257                calls.push(call_node);
1258            }
1259        }
1260    }
1261
1262    // Apply filters
1263    if let Some(ref file_filter) = config.filters.file {
1264        calls.retain(|c| c.file.contains(file_filter));
1265    }
1266    if let Some(ref symbol_filter) = config.filters.symbol {
1267        calls.retain(|c| c.caller.contains(symbol_filter) || c.callee.contains(symbol_filter));
1268    }
1269
1270    // Sort deterministically: file, then caller, then callee
1271    calls.sort_by(|a, b| {
1272        a.file
1273            .cmp(&b.file)
1274            .then_with(|| a.caller.cmp(&b.caller))
1275            .then_with(|| a.callee.cmp(&b.callee))
1276    });
1277
1278    // Collect unique nodes and organize by file if clustering
1279    let mut nodes: BTreeSet<(String, String)> = BTreeSet::new(); // (symbol_id_or_name, label)
1280    let mut file_to_nodes: BTreeMap<String, Vec<(String, String)>> = BTreeMap::new();
1281
1282    for call in &calls {
1283        for (name, symbol_id) in [
1284            (call.caller.as_str(), call.caller_symbol_id.as_ref()),
1285            (call.callee.as_str(), call.callee_symbol_id.as_ref()),
1286        ] {
1287            let node_id = escape_dot_id(&symbol_id.cloned(), name);
1288            let label = format!(
1289                "{}\\n{}",
1290                escape_dot_label(name),
1291                escape_dot_label(&call.file)
1292            );
1293            nodes.insert((node_id.clone(), label.clone()));
1294
1295            if config.filters.cluster {
1296                file_to_nodes
1297                    .entry(call.file.clone())
1298                    .or_default()
1299                    .push((node_id, label));
1300            }
1301        }
1302    }
1303
1304    // Emit edges
1305    if config.filters.cluster {
1306        // Group nodes by file into subgraphs
1307        for (file, file_nodes) in &file_to_nodes {
1308            // Create a sanitized cluster ID from file path
1309            let cluster_id = file
1310                .chars()
1311                .map(|c| if c.is_alphanumeric() { c } else { '_' })
1312                .collect::<String>();
1313
1314            dot_output.push_str(&format!("  subgraph cluster_{} {{\n", cluster_id));
1315            dot_output.push_str(&format!("    label = {};\n", escape_dot_label(file)));
1316            dot_output.push_str("    style = dashed;\n");
1317
1318            // Deduplicate nodes within this file
1319            let mut seen = BTreeSet::new();
1320            for (node_id, label) in file_nodes {
1321                if seen.insert(node_id.clone()) {
1322                    dot_output.push_str(&format!("    {} [label={}];\n", node_id, label));
1323                }
1324            }
1325
1326            dot_output.push_str("  }\n");
1327        }
1328    } else {
1329        // Emit all nodes at top level
1330        for (node_id, label) in &nodes {
1331            dot_output.push_str(&format!("  {} [label={}];\n", node_id, label));
1332        }
1333    }
1334
1335    // Emit edges
1336    for call in &calls {
1337        let caller_id = escape_dot_id(&call.caller_symbol_id, &call.caller);
1338        let callee_id = escape_dot_id(&call.callee_symbol_id, &call.callee);
1339        dot_output.push_str(&format!("  {} -> {};\n", caller_id, callee_id));
1340    }
1341
1342    dot_output.push_str("}\n");
1343
1344    Ok(dot_output)
1345}
1346
1347/// Export graph data with configurable format and options
1348///
1349/// Dispatches to export_json(), export_jsonl(), or export_dot() based on config.format.
1350/// Respects minify flag for JSON output.
1351///
1352/// # Arguments
1353/// * `graph` - The code graph to export
1354/// * `config` - Export configuration (format, minify, filters)
1355///
1356/// # Returns
1357/// JSON, JSONL, or DOT string based on config.format
1358pub fn export_graph(graph: &mut CodeGraph, config: &ExportConfig) -> Result<String> {
1359    // Check if export should be empty based on filters
1360    let has_content = config.include_symbols || config.include_references || config.include_calls;
1361
1362    if !has_content {
1363        // Return empty result of appropriate format
1364        return match config.format {
1365            ExportFormat::Json => {
1366                let empty = GraphExport {
1367                    version: "2.0.0".to_string(),
1368                    files: Vec::new(),
1369                    symbols: Vec::new(),
1370                    references: Vec::new(),
1371                    calls: Vec::new(),
1372                    collisions: Vec::new(),
1373                };
1374                if config.minify {
1375                    serde_json::to_string(&empty).map_err(Into::into)
1376                } else {
1377                    serde_json::to_string_pretty(&empty).map_err(Into::into)
1378                }
1379            }
1380            ExportFormat::JsonL => Ok(String::new()),
1381            ExportFormat::Dot => {
1382                // Empty DOT graph
1383                Ok("strict digraph call_graph {\n}\n".to_string())
1384            }
1385            _ => Err(anyhow::anyhow!(
1386                "Export format {:?} not yet implemented",
1387                config.format
1388            )),
1389        };
1390    }
1391
1392    match config.format {
1393        ExportFormat::Json => {
1394            let mut files = Vec::new();
1395            let mut symbols = Vec::new();
1396            let mut references = Vec::new();
1397            let mut calls = Vec::new();
1398            let mut collisions = Vec::new();
1399
1400            // Get all entity IDs from the graph
1401            let entity_ids = graph.files.backend.entity_ids()?;
1402            let snapshot = SnapshotId::current();
1403
1404            // Process each entity
1405            for entity_id in entity_ids {
1406                let entity = graph.files.backend.get_node(snapshot, entity_id)?;
1407
1408                match entity.kind.as_str() {
1409                    "File" => {
1410                        if let Ok(file_node) =
1411                            serde_json::from_value::<FileNode>(entity.data.clone())
1412                        {
1413                            files.push(FileExport {
1414                                path: file_node.path,
1415                                hash: file_node.hash,
1416                            });
1417                        }
1418                    }
1419                    "Symbol" => {
1420                        if config.include_symbols {
1421                            if let Ok(symbol_node) =
1422                                serde_json::from_value::<SymbolNode>(entity.data.clone())
1423                            {
1424                                let file = get_file_path_from_symbol(graph, entity_id)?;
1425                                symbols.push(SymbolExport {
1426                                    symbol_id: symbol_node.symbol_id,
1427                                    canonical_fqn: symbol_node.canonical_fqn,
1428                                    display_fqn: symbol_node.display_fqn,
1429                                    name: symbol_node.name,
1430                                    kind: symbol_node.kind,
1431                                    kind_normalized: symbol_node.kind_normalized,
1432                                    file,
1433                                    byte_start: symbol_node.byte_start,
1434                                    byte_end: symbol_node.byte_end,
1435                                    start_line: symbol_node.start_line,
1436                                    start_col: symbol_node.start_col,
1437                                    end_line: symbol_node.end_line,
1438                                    end_col: symbol_node.end_col,
1439                                });
1440                            }
1441                        }
1442                    }
1443                    "Reference" => {
1444                        if config.include_references {
1445                            if let Ok(ref_node) =
1446                                serde_json::from_value::<ReferenceNode>(entity.data.clone())
1447                            {
1448                                let referenced_symbol = entity
1449                                    .name
1450                                    .strip_prefix("ref to ")
1451                                    .unwrap_or("")
1452                                    .to_string();
1453
1454                                references.push(ReferenceExport {
1455                                    file: ref_node.file,
1456                                    referenced_symbol,
1457                                    target_symbol_id: None,
1458                                    byte_start: ref_node.byte_start as usize,
1459                                    byte_end: ref_node.byte_end as usize,
1460                                    start_line: ref_node.start_line as usize,
1461                                    start_col: ref_node.start_col as usize,
1462                                    end_line: ref_node.end_line as usize,
1463                                    end_col: ref_node.end_col as usize,
1464                                });
1465                            }
1466                        }
1467                    }
1468                    "Call" => {
1469                        if config.include_calls {
1470                            if let Ok(call_node) =
1471                                serde_json::from_value::<CallNode>(entity.data.clone())
1472                            {
1473                                calls.push(CallExport {
1474                                    file: call_node.file,
1475                                    caller: call_node.caller,
1476                                    callee: call_node.callee,
1477                                    caller_symbol_id: call_node.caller_symbol_id,
1478                                    callee_symbol_id: call_node.callee_symbol_id,
1479                                    byte_start: call_node.byte_start as usize,
1480                                    byte_end: call_node.byte_end as usize,
1481                                    start_line: call_node.start_line as usize,
1482                                    start_col: call_node.start_col as usize,
1483                                    end_line: call_node.end_line as usize,
1484                                    end_col: call_node.end_col as usize,
1485                                });
1486                            }
1487                        }
1488                    }
1489                    _ => {
1490                        // Ignore unknown node types
1491                    }
1492                }
1493            }
1494
1495            if config.include_collisions {
1496                collisions = build_collision_exports(graph, config.collisions_field, usize::MAX)?;
1497            }
1498
1499            // Sort for deterministic output
1500            files.sort_by(|a, b| a.path.cmp(&b.path));
1501            symbols.sort_by(|a, b| (&a.file, &a.name).cmp(&(&b.file, &b.name)));
1502            references.sort_by(|a, b| {
1503                (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol))
1504            });
1505            calls.sort_by(|a, b| {
1506                (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee))
1507            });
1508
1509            let export = GraphExport {
1510                version: "2.0.0".to_string(), // v1.5 adds symbol_id, canonical_fqn, display_fqn
1511                files,
1512                symbols,
1513                references,
1514                calls,
1515                collisions,
1516            };
1517
1518            if config.minify {
1519                serde_json::to_string(&export).map_err(Into::into)
1520            } else {
1521                serde_json::to_string_pretty(&export).map_err(Into::into)
1522            }
1523        }
1524        ExportFormat::JsonL => export_jsonl(graph),
1525        ExportFormat::Dot => export_dot(graph, config),
1526        ExportFormat::Csv => export_csv(graph, config),
1527        ExportFormat::Scip => {
1528            // SCIP export is binary, not text - use separate function
1529            let scip_config = self::scip::ScipExportConfig {
1530                project_root: ".".to_string(),
1531                project_name: None,
1532                version: None,
1533            };
1534            let scip_bytes = self::scip::export_scip(graph, &scip_config)?;
1535
1536            // Return base64-encoded SCIP data as a workaround for text-based export_graph
1537            // For direct binary output, use export_cmd.rs which handles SCIP specially
1538            Ok(base64::engine::general_purpose::STANDARD.encode(&scip_bytes))
1539        }
1540        ExportFormat::Lsif => {
1541            // LSIF export returns JSONL format
1542            // For file output, use export_cmd.rs which handles LSIF specially
1543            Ok(String::new())
1544        }
1545    }
1546}
1547
1548// ============================================================================
1549// CSV Export
1550// ============================================================================
1551
1552/// Unified CSV row for all record types
1553///
1554/// Single struct with optional fields for different record types ensures
1555/// consistent CSV headers across Symbol, Reference, and Call records.
1556///
1557/// NOTE: We do NOT use `skip_serializing_if` on optional fields because
1558/// the CSV crate writes headers based on the first record. If we skip fields,
1559/// subsequent records with different field sets will fail with "found record
1560/// with X fields, but the previous record has Y fields". Instead, we always
1561/// write all fields (empty strings for None values) to ensure consistent headers.
1562#[derive(Debug, Clone, Serialize)]
1563struct UnifiedCsvRow {
1564    // Universal fields (always present)
1565    record_type: String,
1566    file: String,
1567    byte_start: usize,
1568    byte_end: usize,
1569    start_line: usize,
1570    start_col: usize,
1571    end_line: usize,
1572    end_col: usize,
1573
1574    // Symbol-specific (optional, but always serialized as empty string if None)
1575    symbol_id: Option<String>,
1576    name: Option<String>,
1577    kind: Option<String>,
1578    kind_normalized: Option<String>,
1579
1580    // Reference-specific (optional, but always serialized as empty string if None)
1581    referenced_symbol: Option<String>,
1582    target_symbol_id: Option<String>,
1583
1584    // Call-specific (optional, but always serialized as empty string if None)
1585    caller: Option<String>,
1586    callee: Option<String>,
1587    caller_symbol_id: Option<String>,
1588    callee_symbol_id: Option<String>,
1589}
1590
1591/// Export graph data to CSV format
1592///
1593/// Produces a combined CSV with a record_type column for discrimination.
1594/// Uses the csv crate for proper RFC 4180 compliance (quoting, escaping).
1595///
1596/// # Returns
1597/// CSV string with all requested entities, deterministically sorted
1598pub fn export_csv(graph: &mut CodeGraph, config: &ExportConfig) -> Result<String> {
1599    let mut records: Vec<UnifiedCsvRow> = Vec::new();
1600
1601    let entity_ids = graph.files.backend.entity_ids()?;
1602    let snapshot = SnapshotId::current();
1603
1604    for entity_id in entity_ids {
1605        let entity = graph.files.backend.get_node(snapshot, entity_id)?;
1606
1607        match entity.kind.as_str() {
1608            "Symbol" => {
1609                if config.include_symbols {
1610                    if let Ok(symbol_node) =
1611                        serde_json::from_value::<SymbolNode>(entity.data.clone())
1612                    {
1613                        let file = get_file_path_from_symbol(graph, entity_id)?;
1614                        records.push(UnifiedCsvRow {
1615                            record_type: "Symbol".to_string(),
1616                            file,
1617                            byte_start: symbol_node.byte_start,
1618                            byte_end: symbol_node.byte_end,
1619                            start_line: symbol_node.start_line,
1620                            start_col: symbol_node.start_col,
1621                            end_line: symbol_node.end_line,
1622                            end_col: symbol_node.end_col,
1623                            symbol_id: symbol_node.symbol_id,
1624                            name: symbol_node.name,
1625                            kind: Some(symbol_node.kind),
1626                            kind_normalized: symbol_node.kind_normalized,
1627                            referenced_symbol: None,
1628                            target_symbol_id: None,
1629                            caller: None,
1630                            callee: None,
1631                            caller_symbol_id: None,
1632                            callee_symbol_id: None,
1633                        });
1634                    }
1635                }
1636            }
1637            "Reference" => {
1638                if config.include_references {
1639                    if let Ok(ref_node) =
1640                        serde_json::from_value::<ReferenceNode>(entity.data.clone())
1641                    {
1642                        let referenced_symbol = entity
1643                            .name
1644                            .strip_prefix("ref to ")
1645                            .unwrap_or("")
1646                            .to_string();
1647
1648                        records.push(UnifiedCsvRow {
1649                            record_type: "Reference".to_string(),
1650                            file: ref_node.file,
1651                            byte_start: ref_node.byte_start as usize,
1652                            byte_end: ref_node.byte_end as usize,
1653                            start_line: ref_node.start_line as usize,
1654                            start_col: ref_node.start_col as usize,
1655                            end_line: ref_node.end_line as usize,
1656                            end_col: ref_node.end_col as usize,
1657                            symbol_id: None,
1658                            name: None,
1659                            kind: None,
1660                            kind_normalized: None,
1661                            referenced_symbol: Some(referenced_symbol),
1662                            target_symbol_id: None,
1663                            caller: None,
1664                            callee: None,
1665                            caller_symbol_id: None,
1666                            callee_symbol_id: None,
1667                        });
1668                    }
1669                }
1670            }
1671            "Call" => {
1672                if config.include_calls {
1673                    if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
1674                        records.push(UnifiedCsvRow {
1675                            record_type: "Call".to_string(),
1676                            file: call_node.file,
1677                            byte_start: call_node.byte_start as usize,
1678                            byte_end: call_node.byte_end as usize,
1679                            start_line: call_node.start_line as usize,
1680                            start_col: call_node.start_col as usize,
1681                            end_line: call_node.end_line as usize,
1682                            end_col: call_node.end_col as usize,
1683                            symbol_id: None,
1684                            name: None,
1685                            kind: None,
1686                            kind_normalized: None,
1687                            referenced_symbol: None,
1688                            target_symbol_id: None,
1689                            caller: Some(call_node.caller),
1690                            callee: Some(call_node.callee),
1691                            caller_symbol_id: call_node.caller_symbol_id,
1692                            callee_symbol_id: call_node.callee_symbol_id,
1693                        });
1694                    }
1695                }
1696            }
1697            _ => {
1698                // Ignore File and unknown node types for CSV export
1699            }
1700        }
1701    }
1702
1703    // Sort deterministically by record_type, then by type-specific fields
1704    records.sort_by(|a, b| {
1705        // First by record type
1706        let type_order = match (a.record_type.as_str(), b.record_type.as_str()) {
1707            ("Call", "Call") => std::cmp::Ordering::Equal,
1708            ("Call", "Reference") => std::cmp::Ordering::Greater,
1709            ("Call", "Symbol") => std::cmp::Ordering::Greater,
1710            ("Reference", "Call") => std::cmp::Ordering::Less,
1711            ("Reference", "Reference") => std::cmp::Ordering::Equal,
1712            ("Reference", "Symbol") => std::cmp::Ordering::Greater,
1713            ("Symbol", "Call") => std::cmp::Ordering::Less,
1714            ("Symbol", "Reference") => std::cmp::Ordering::Less,
1715            ("Symbol", "Symbol") => std::cmp::Ordering::Equal,
1716            _ => std::cmp::Ordering::Equal,
1717        };
1718
1719        if type_order != std::cmp::Ordering::Equal {
1720            return type_order;
1721        }
1722
1723        // Within same type, sort by applicable fields
1724        match a.record_type.as_str() {
1725            "Symbol" => (&a.file, a.name.as_ref().unwrap_or(&String::new()))
1726                .cmp(&(&b.file, b.name.as_ref().unwrap_or(&String::new()))),
1727            "Reference" => (&a.record_type, &a.file, a.referenced_symbol.as_ref().unwrap_or(&String::new()))
1728                .cmp(&(&b.record_type, &b.file, b.referenced_symbol.as_ref().unwrap_or(&String::new()))),
1729            "Call" => (&a.record_type, &a.file, a.caller.as_ref().unwrap_or(&String::new()), a.callee.as_ref().unwrap_or(&String::new()))
1730                .cmp(&(&b.record_type, &b.file, b.caller.as_ref().unwrap_or(&String::new()), b.callee.as_ref().unwrap_or(&String::new()))),
1731            _ => std::cmp::Ordering::Equal,
1732        }
1733    });
1734
1735    // Write to buffer using csv::Writer
1736    let mut buffer = Vec::new();
1737
1738    // Add version header comment
1739    use std::io::Write;
1740    writeln!(buffer, "# Magellan Export Version: 2.0.0")?;
1741
1742    {
1743        let mut writer = csv::Writer::from_writer(&mut buffer);
1744        for record in records {
1745            writer.serialize(record)?;
1746        }
1747        writer.flush()?;
1748    }
1749
1750    String::from_utf8(buffer).map_err(|e| anyhow::anyhow!("CSV output is not valid UTF-8: {}", e))
1751}