pub mod scip;
use anyhow::Result;
use base64::Engine;
use serde::{Deserialize, Serialize};
use sqlitegraph::{BackendDirection, GraphBackend, NeighborQuery, SnapshotId};
use super::{CallNode, CodeGraph, FileNode, ReferenceNode, SymbolNode};
use crate::graph::query::{collision_groups, CollisionField};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExportFormat {
Json,
JsonL,
Dot,
Csv,
Scip,
Lsif,
}
impl ExportFormat {
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"json" => Some(ExportFormat::Json),
"jsonl" => Some(ExportFormat::JsonL),
"dot" => Some(ExportFormat::Dot),
"csv" => Some(ExportFormat::Csv),
"scip" => Some(ExportFormat::Scip),
"lsif" => Some(ExportFormat::Lsif),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub struct ExportConfig {
pub format: ExportFormat,
pub include_symbols: bool,
pub include_references: bool,
pub include_calls: bool,
pub minify: bool,
pub filters: ExportFilters,
pub include_collisions: bool,
pub collisions_field: CollisionField,
}
#[derive(Debug, Clone, Default)]
pub struct ExportFilters {
pub file: Option<String>,
pub symbol: Option<String>,
pub kind: Option<String>,
pub max_depth: Option<usize>,
pub cluster: bool,
}
fn escape_dot_label(s: &str) -> String {
format!(
"\"{}\"",
s.replace('\\', "\\\\")
.replace('"', r#"\""#)
.replace('\n', "\\n")
)
}
fn escape_dot_id(symbol_id: &Option<String>, name: &str) -> String {
if let Some(ref id) = symbol_id {
id.clone()
} else {
name.chars()
.map(|c| if c.is_alphanumeric() { c } else { '_' })
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::query::CollisionField;
#[test]
fn test_escape_dot_label_basic() {
assert_eq!(escape_dot_label("simple"), "\"simple\"");
assert_eq!(escape_dot_label("with spaces"), "\"with spaces\"");
}
#[test]
fn test_escape_dot_label_quotes() {
assert_eq!(escape_dot_label("say \"hello\""), r#""say \"hello\"""#);
}
#[test]
fn test_escape_dot_label_backslash() {
assert_eq!(escape_dot_label(r"C:\path"), r#""C:\\path""#);
assert_eq!(escape_dot_label("a\\b"), r#""a\\b""#);
}
#[test]
fn test_escape_dot_label_newlines() {
assert_eq!(escape_dot_label("line1\nline2"), r#""line1\nline2""#);
}
#[test]
fn test_escape_dot_label_empty() {
assert_eq!(escape_dot_label(""), "\"\"");
}
#[test]
fn test_escape_dot_label_special_chars() {
assert_eq!(escape_dot_label("a\tb"), "\"a\tb\"");
assert_eq!(escape_dot_label("hello世界"), "\"hello世界\"");
}
#[test]
fn test_escape_dot_id_with_symbol_id() {
let symbol_id = Some("a1b2c3d4e5f6".to_string());
assert_eq!(escape_dot_id(&symbol_id, "fallback"), "a1b2c3d4e5f6");
}
#[test]
fn test_escape_dot_id_without_symbol_id() {
assert_eq!(escape_dot_id(&None, "simple_name"), "simple_name");
assert_eq!(escape_dot_id(&None, "name-with-dashes"), "name_with_dashes");
assert_eq!(escape_dot_id(&None, "name.with.dots"), "name_with_dots");
assert_eq!(escape_dot_id(&None, "name with spaces"), "name_with_spaces");
}
#[test]
fn test_escape_dot_id_empty_name() {
assert_eq!(escape_dot_id(&None, ""), "");
}
#[test]
fn test_export_collisions_included_when_enabled() {
let temp_dir = tempfile::TempDir::new().unwrap();
let db_path = temp_dir.path().join("test.db");
let mut graph = CodeGraph::open(&db_path).unwrap();
let file1 = temp_dir.path().join("file1.rs");
std::fs::write(&file1, "fn collide() {}\n").unwrap();
let file2 = temp_dir.path().join("file2.rs");
std::fs::write(&file2, "fn collide() {}\n").unwrap();
let path1 = file1.to_string_lossy().to_string();
let path2 = file2.to_string_lossy().to_string();
let source1 = std::fs::read(&file1).unwrap();
let source2 = std::fs::read(&file2).unwrap();
graph.index_file(&path1, &source1).unwrap();
graph.index_file(&path2, &source2).unwrap();
let config = ExportConfig {
format: ExportFormat::Json,
include_symbols: true,
include_references: false,
include_calls: false,
minify: false,
filters: ExportFilters::default(),
include_collisions: true,
collisions_field: CollisionField::Fqn,
};
let json = export_graph(&mut graph, &config).unwrap();
let export: GraphExport = serde_json::from_str(&json).unwrap();
assert!(!export.collisions.is_empty());
}
#[test]
fn test_csv_export_mixed_record_types() {
let temp_dir = tempfile::TempDir::new().unwrap();
let db_path = temp_dir.path().join("test.db");
let mut graph = CodeGraph::open(&db_path).unwrap();
let file1 = temp_dir.path().join("test.rs");
std::fs::write(
&file1,
r#"
fn main() {
println!("hello");
helper();
}
fn helper() {}
"#,
)
.unwrap();
let path1 = file1.to_string_lossy().to_string();
let source1 = std::fs::read(&file1).unwrap();
graph.index_file(&path1, &source1).unwrap();
let config = ExportConfig {
format: ExportFormat::Csv,
include_symbols: true,
include_references: true,
include_calls: true,
minify: false,
filters: ExportFilters::default(),
include_collisions: false,
collisions_field: CollisionField::Fqn,
};
let csv = export_graph(&mut graph, &config).unwrap();
let lines: Vec<&str> = csv.lines().collect();
assert!(lines.len() > 1, "CSV should have header + data rows");
let header = lines
.iter()
.find(|line| !line.starts_with('#') && !line.is_empty())
.expect("Should have a CSV header row");
assert!(header.contains("record_type"));
assert!(header.contains("file"));
assert!(header.contains("symbol_id"));
assert!(header.contains("name"));
assert!(header.contains("kind"));
assert!(header.contains("referenced_symbol"));
assert!(header.contains("target_symbol_id"));
assert!(header.contains("caller"));
assert!(header.contains("callee"));
assert!(header.contains("caller_symbol_id"));
assert!(header.contains("callee_symbol_id"));
let header_cols: Vec<&str> = header.split(',').collect();
let expected_col_count = header_cols.len();
for (i, line) in lines.iter().skip(1).enumerate() {
if line.is_empty() || line.starts_with('#') {
continue;
}
let data_cols: Vec<&str> = line.split(',').collect();
assert_eq!(
data_cols.len(),
expected_col_count,
"Row {} has {} columns, expected {}",
i + 2,
data_cols.len(),
expected_col_count
);
}
assert!(csv.starts_with("# Magellan Export Version: 2.0.0"));
}
}
impl Default for ExportConfig {
fn default() -> Self {
ExportConfig {
format: ExportFormat::Json,
include_symbols: true,
include_references: true,
include_calls: true,
minify: false,
filters: ExportFilters::default(),
include_collisions: false,
collisions_field: CollisionField::Fqn,
}
}
}
impl ExportConfig {
pub fn new(format: ExportFormat) -> Self {
ExportConfig {
format,
..Default::default()
}
}
pub fn with_symbols(mut self, include: bool) -> Self {
self.include_symbols = include;
self
}
pub fn with_references(mut self, include: bool) -> Self {
self.include_references = include;
self
}
pub fn with_calls(mut self, include: bool) -> Self {
self.include_calls = include;
self
}
pub fn with_minify(mut self, minify: bool) -> Self {
self.minify = minify;
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphExport {
pub version: String,
pub files: Vec<FileExport>,
pub symbols: Vec<SymbolExport>,
pub references: Vec<ReferenceExport>,
pub calls: Vec<CallExport>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub collisions: Vec<CollisionExport>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileExport {
pub path: String,
pub hash: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SymbolExport {
#[serde(default)]
pub symbol_id: Option<String>,
#[serde(default)]
pub canonical_fqn: Option<String>,
#[serde(default)]
pub display_fqn: Option<String>,
pub name: Option<String>,
pub kind: String,
pub kind_normalized: Option<String>,
pub file: String,
pub byte_start: usize,
pub byte_end: usize,
pub start_line: usize,
pub start_col: usize,
pub end_line: usize,
pub end_col: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReferenceExport {
pub file: String,
pub referenced_symbol: String,
#[serde(default)]
pub target_symbol_id: Option<String>,
pub byte_start: usize,
pub byte_end: usize,
pub start_line: usize,
pub start_col: usize,
pub end_line: usize,
pub end_col: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CallExport {
pub file: String,
pub caller: String,
pub callee: String,
#[serde(default)]
pub caller_symbol_id: Option<String>,
#[serde(default)]
pub callee_symbol_id: Option<String>,
pub byte_start: usize,
pub byte_end: usize,
pub start_line: usize,
pub start_col: usize,
pub end_line: usize,
pub end_col: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollisionCandidateExport {
pub entity_id: i64,
pub symbol_id: Option<String>,
pub canonical_fqn: Option<String>,
pub display_fqn: Option<String>,
pub name: Option<String>,
pub file_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollisionExport {
pub field: String,
pub value: String,
pub count: usize,
pub candidates: Vec<CollisionCandidateExport>,
}
fn build_collision_exports(
graph: &mut CodeGraph,
field: CollisionField,
limit: usize,
) -> Result<Vec<CollisionExport>> {
let groups = collision_groups(graph, field, limit)?;
let mut exports = Vec::new();
for group in groups {
let candidates = group
.candidates
.into_iter()
.map(|candidate| CollisionCandidateExport {
entity_id: candidate.entity_id,
symbol_id: candidate.symbol_id,
canonical_fqn: candidate.canonical_fqn,
display_fqn: candidate.display_fqn,
name: candidate.name,
file_path: candidate.file_path,
})
.collect();
exports.push(CollisionExport {
field: group.field,
value: group.value,
count: group.count,
candidates,
});
}
Ok(exports)
}
pub fn export_json(graph: &mut CodeGraph) -> Result<String> {
let mut files = Vec::new();
let mut symbols = Vec::new();
let mut references = Vec::new();
let mut calls = Vec::new();
let collisions: Vec<CollisionExport> = Vec::new();
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
let entity = graph.files.backend.get_node(snapshot, entity_id)?;
match entity.kind.as_str() {
"File" => {
if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
files.push(FileExport {
path: file_node.path,
hash: file_node.hash,
});
}
}
"Symbol" => {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(entity.data.clone()) {
let file = get_file_path_from_symbol(graph, entity_id)?;
symbols.push(SymbolExport {
symbol_id: symbol_node.symbol_id,
canonical_fqn: symbol_node.canonical_fqn,
display_fqn: symbol_node.display_fqn,
name: symbol_node.name,
kind: symbol_node.kind,
kind_normalized: symbol_node.kind_normalized,
file,
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
});
}
}
"Reference" => {
if let Ok(ref_node) = serde_json::from_value::<ReferenceNode>(entity.data.clone()) {
let referenced_symbol = entity
.name
.strip_prefix("ref to ")
.unwrap_or("")
.to_string();
references.push(ReferenceExport {
file: ref_node.file,
referenced_symbol,
target_symbol_id: None, byte_start: ref_node.byte_start as usize,
byte_end: ref_node.byte_end as usize,
start_line: ref_node.start_line as usize,
start_col: ref_node.start_col as usize,
end_line: ref_node.end_line as usize,
end_col: ref_node.end_col as usize,
});
}
}
"Call" => {
if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
calls.push(CallExport {
file: call_node.file,
caller: call_node.caller,
callee: call_node.callee,
caller_symbol_id: call_node.caller_symbol_id,
callee_symbol_id: call_node.callee_symbol_id,
byte_start: call_node.byte_start as usize,
byte_end: call_node.byte_end as usize,
start_line: call_node.start_line as usize,
start_col: call_node.start_col as usize,
end_line: call_node.end_line as usize,
end_col: call_node.end_col as usize,
});
}
}
_ => {
}
}
}
files.sort_by(|a, b| a.path.cmp(&b.path));
symbols.sort_by(|a, b| (&a.file, &a.name).cmp(&(&b.file, &b.name)));
references
.sort_by(|a, b| (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol)));
calls.sort_by(|a, b| (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee)));
let export = GraphExport {
version: "2.0.0".to_string(),
files,
symbols,
references,
calls,
collisions,
};
Ok(serde_json::to_string_pretty(&export)?)
}
pub fn stream_json<W: std::io::Write>(
graph: &mut CodeGraph,
config: &ExportConfig,
writer: &mut W,
) -> Result<()> {
let mut files = Vec::new();
let mut symbols = Vec::new();
let mut references = Vec::new();
let mut calls = Vec::new();
let mut collisions = Vec::new();
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
let entity = graph.files.backend.get_node(snapshot, entity_id)?;
match entity.kind.as_str() {
"File" => {
if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
files.push(FileExport {
path: file_node.path,
hash: file_node.hash,
});
}
}
"Symbol" if config.include_symbols => {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(entity.data.clone()) {
let file = get_file_path_from_symbol(graph, entity_id)?;
symbols.push(SymbolExport {
symbol_id: symbol_node.symbol_id,
canonical_fqn: symbol_node.canonical_fqn,
display_fqn: symbol_node.display_fqn,
name: symbol_node.name,
kind: symbol_node.kind,
kind_normalized: symbol_node.kind_normalized,
file,
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
});
}
}
"Reference" if config.include_references => {
if let Ok(ref_node) = serde_json::from_value::<ReferenceNode>(entity.data.clone()) {
let referenced_symbol = entity
.name
.strip_prefix("ref to ")
.unwrap_or("")
.to_string();
references.push(ReferenceExport {
file: ref_node.file,
referenced_symbol,
target_symbol_id: None,
byte_start: ref_node.byte_start as usize,
byte_end: ref_node.byte_end as usize,
start_line: ref_node.start_line as usize,
start_col: ref_node.start_col as usize,
end_line: ref_node.end_line as usize,
end_col: ref_node.end_col as usize,
});
}
}
"Call" if config.include_calls => {
if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
calls.push(CallExport {
file: call_node.file,
caller: call_node.caller,
callee: call_node.callee,
caller_symbol_id: call_node.caller_symbol_id,
callee_symbol_id: call_node.callee_symbol_id,
byte_start: call_node.byte_start as usize,
byte_end: call_node.byte_end as usize,
start_line: call_node.start_line as usize,
start_col: call_node.start_col as usize,
end_line: call_node.end_line as usize,
end_col: call_node.end_col as usize,
});
}
}
_ => {
}
}
}
if config.include_collisions {
collisions = build_collision_exports(graph, config.collisions_field, usize::MAX)?;
}
files.sort_by(|a, b| a.path.cmp(&b.path));
symbols.sort_by(|a, b| (&a.file, &a.name).cmp(&(&b.file, &b.name)));
references
.sort_by(|a, b| (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol)));
calls.sort_by(|a, b| (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee)));
let export = GraphExport {
version: "2.0.0".to_string(), files,
symbols,
references,
calls,
collisions,
};
serde_json::to_writer_pretty(writer, &export).map_err(Into::into)
}
pub fn stream_json_minified<W: std::io::Write>(
graph: &mut CodeGraph,
config: &ExportConfig,
writer: &mut W,
) -> Result<()> {
let mut files = Vec::new();
let mut symbols = Vec::new();
let mut references = Vec::new();
let mut calls = Vec::new();
let mut collisions = Vec::new();
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
let entity = graph.files.backend.get_node(snapshot, entity_id)?;
match entity.kind.as_str() {
"File" => {
if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
files.push(FileExport {
path: file_node.path,
hash: file_node.hash,
});
}
}
"Symbol" if config.include_symbols => {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(entity.data.clone()) {
let file = get_file_path_from_symbol(graph, entity_id)?;
symbols.push(SymbolExport {
symbol_id: symbol_node.symbol_id,
canonical_fqn: symbol_node.canonical_fqn,
display_fqn: symbol_node.display_fqn,
name: symbol_node.name,
kind: symbol_node.kind,
kind_normalized: symbol_node.kind_normalized,
file,
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
});
}
}
"Reference" if config.include_references => {
if let Ok(ref_node) = serde_json::from_value::<ReferenceNode>(entity.data.clone()) {
let referenced_symbol = entity
.name
.strip_prefix("ref to ")
.unwrap_or("")
.to_string();
references.push(ReferenceExport {
file: ref_node.file,
referenced_symbol,
target_symbol_id: None,
byte_start: ref_node.byte_start as usize,
byte_end: ref_node.byte_end as usize,
start_line: ref_node.start_line as usize,
start_col: ref_node.start_col as usize,
end_line: ref_node.end_line as usize,
end_col: ref_node.end_col as usize,
});
}
}
"Call" if config.include_calls => {
if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
calls.push(CallExport {
file: call_node.file,
caller: call_node.caller,
callee: call_node.callee,
caller_symbol_id: call_node.caller_symbol_id,
callee_symbol_id: call_node.callee_symbol_id,
byte_start: call_node.byte_start as usize,
byte_end: call_node.byte_end as usize,
start_line: call_node.start_line as usize,
start_col: call_node.start_col as usize,
end_line: call_node.end_line as usize,
end_col: call_node.end_col as usize,
});
}
}
_ => {
}
}
}
if config.include_collisions {
collisions = build_collision_exports(graph, config.collisions_field, usize::MAX)?;
}
files.sort_by(|a, b| a.path.cmp(&b.path));
symbols.sort_by(|a, b| (&a.file, &a.name).cmp(&(&b.file, &b.name)));
references
.sort_by(|a, b| (&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol)));
calls.sort_by(|a, b| (&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee)));
let export = GraphExport {
version: "2.0.0".to_string(), files,
symbols,
references,
calls,
collisions,
};
serde_json::to_writer(writer, &export).map_err(Into::into)
}
fn get_file_path_from_symbol(graph: &mut CodeGraph, symbol_id: i64) -> Result<String> {
let snapshot = SnapshotId::current();
let file_ids = graph.files.backend.neighbors(
snapshot,
symbol_id,
NeighborQuery {
direction: BackendDirection::Incoming,
edge_type: Some("DEFINES".to_string()),
},
)?;
if let Some(file_id) = file_ids.first() {
let entity = graph.files.backend.get_node(snapshot, *file_id)?;
if entity.kind == "File" {
if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data) {
return Ok(file_node.path);
}
}
}
Ok(String::new())
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
enum JsonlRecord {
Version { version: String },
File(FileExport),
Symbol(SymbolExport),
Reference(ReferenceExport),
Call(CallExport),
}
pub fn export_jsonl(graph: &mut CodeGraph) -> Result<String> {
let mut records = Vec::new();
records.push(JsonlRecord::Version {
version: "2.0.0".to_string(),
});
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
let entity = graph.files.backend.get_node(snapshot, entity_id)?;
match entity.kind.as_str() {
"File" => {
if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
records.push(JsonlRecord::File(FileExport {
path: file_node.path,
hash: file_node.hash,
}));
}
}
"Symbol" => {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(entity.data.clone()) {
let file = get_file_path_from_symbol(graph, entity_id)?;
records.push(JsonlRecord::Symbol(SymbolExport {
symbol_id: symbol_node.symbol_id,
canonical_fqn: symbol_node.canonical_fqn,
display_fqn: symbol_node.display_fqn,
name: symbol_node.name,
kind: symbol_node.kind,
kind_normalized: symbol_node.kind_normalized,
file,
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
}));
}
}
"Reference" => {
if let Ok(ref_node) = serde_json::from_value::<ReferenceNode>(entity.data.clone()) {
let referenced_symbol = entity
.name
.strip_prefix("ref to ")
.unwrap_or("")
.to_string();
records.push(JsonlRecord::Reference(ReferenceExport {
file: ref_node.file,
referenced_symbol,
target_symbol_id: None,
byte_start: ref_node.byte_start as usize,
byte_end: ref_node.byte_end as usize,
start_line: ref_node.start_line as usize,
start_col: ref_node.start_col as usize,
end_line: ref_node.end_line as usize,
end_col: ref_node.end_col as usize,
}));
}
}
"Call" => {
if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
records.push(JsonlRecord::Call(CallExport {
file: call_node.file,
caller: call_node.caller,
callee: call_node.callee,
caller_symbol_id: call_node.caller_symbol_id,
callee_symbol_id: call_node.callee_symbol_id,
byte_start: call_node.byte_start as usize,
byte_end: call_node.byte_end as usize,
start_line: call_node.start_line as usize,
start_col: call_node.start_col as usize,
end_line: call_node.end_line as usize,
end_col: call_node.end_col as usize,
}));
}
}
_ => {
}
}
}
records.sort_by(|a, b| match (a, b) {
(JsonlRecord::Version { .. }, _) => std::cmp::Ordering::Less,
(_, JsonlRecord::Version { .. }) => std::cmp::Ordering::Greater,
(JsonlRecord::File(a), JsonlRecord::File(b)) => a.path.cmp(&b.path),
(JsonlRecord::Symbol(a), JsonlRecord::Symbol(b)) => {
(&a.file, &a.name).cmp(&(&b.file, &b.name))
}
(JsonlRecord::Reference(a), JsonlRecord::Reference(b)) => {
(&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol))
}
(JsonlRecord::Call(a), JsonlRecord::Call(b)) => {
(&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee))
}
(JsonlRecord::File(_), _) => std::cmp::Ordering::Less,
(_, JsonlRecord::File(_)) => std::cmp::Ordering::Greater,
(JsonlRecord::Symbol(_), _) => std::cmp::Ordering::Less,
(_, JsonlRecord::Symbol(_)) => std::cmp::Ordering::Greater,
(JsonlRecord::Reference(_), _) => std::cmp::Ordering::Less,
(_, JsonlRecord::Reference(_)) => std::cmp::Ordering::Greater,
});
let lines: Result<Vec<String>, _> = records.iter().map(serde_json::to_string).collect();
let lines = lines?;
Ok(lines.join("\n"))
}
pub fn stream_ndjson<W: std::io::Write>(
graph: &mut CodeGraph,
config: &ExportConfig,
writer: &mut W,
) -> Result<()> {
let mut records = Vec::new();
records.push(JsonlRecord::Version {
version: "2.0.0".to_string(),
});
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
let entity = graph.files.backend.get_node(snapshot, entity_id)?;
match entity.kind.as_str() {
"File" => {
if let Ok(file_node) = serde_json::from_value::<FileNode>(entity.data.clone()) {
records.push(JsonlRecord::File(FileExport {
path: file_node.path,
hash: file_node.hash,
}));
}
}
"Symbol" if config.include_symbols => {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(entity.data.clone()) {
let file = get_file_path_from_symbol(graph, entity_id)?;
records.push(JsonlRecord::Symbol(SymbolExport {
symbol_id: symbol_node.symbol_id,
canonical_fqn: symbol_node.canonical_fqn,
display_fqn: symbol_node.display_fqn,
name: symbol_node.name,
kind: symbol_node.kind,
kind_normalized: symbol_node.kind_normalized,
file,
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
}));
}
}
"Reference" if config.include_references => {
if let Ok(ref_node) = serde_json::from_value::<ReferenceNode>(entity.data.clone()) {
let referenced_symbol = entity
.name
.strip_prefix("ref to ")
.unwrap_or("")
.to_string();
records.push(JsonlRecord::Reference(ReferenceExport {
file: ref_node.file,
referenced_symbol,
target_symbol_id: None,
byte_start: ref_node.byte_start as usize,
byte_end: ref_node.byte_end as usize,
start_line: ref_node.start_line as usize,
start_col: ref_node.start_col as usize,
end_line: ref_node.end_line as usize,
end_col: ref_node.end_col as usize,
}));
}
}
"Call" if config.include_calls => {
if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
records.push(JsonlRecord::Call(CallExport {
file: call_node.file,
caller: call_node.caller,
callee: call_node.callee,
caller_symbol_id: call_node.caller_symbol_id,
callee_symbol_id: call_node.callee_symbol_id,
byte_start: call_node.byte_start as usize,
byte_end: call_node.byte_end as usize,
start_line: call_node.start_line as usize,
start_col: call_node.start_col as usize,
end_line: call_node.end_line as usize,
end_col: call_node.end_col as usize,
}));
}
}
_ => {
}
}
}
records.sort_by(|a, b| match (a, b) {
(JsonlRecord::Version { .. }, _) => std::cmp::Ordering::Less,
(_, JsonlRecord::Version { .. }) => std::cmp::Ordering::Greater,
(JsonlRecord::File(a), JsonlRecord::File(b)) => a.path.cmp(&b.path),
(JsonlRecord::Symbol(a), JsonlRecord::Symbol(b)) => {
(&a.file, &a.name).cmp(&(&b.file, &b.name))
}
(JsonlRecord::Reference(a), JsonlRecord::Reference(b)) => {
(&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol))
}
(JsonlRecord::Call(a), JsonlRecord::Call(b)) => {
(&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee))
}
(JsonlRecord::File(_), _) => std::cmp::Ordering::Less,
(_, JsonlRecord::File(_)) => std::cmp::Ordering::Greater,
(JsonlRecord::Symbol(_), _) => std::cmp::Ordering::Less,
(_, JsonlRecord::Symbol(_)) => std::cmp::Ordering::Greater,
(JsonlRecord::Reference(_), _) => std::cmp::Ordering::Less,
(_, JsonlRecord::Reference(_)) => std::cmp::Ordering::Greater,
});
let mut first = true;
for record in records {
if !first {
writeln!(&mut *writer)?;
}
serde_json::to_writer(&mut *writer, &record)
.map_err(|e| anyhow::anyhow!("JSON serialization error: {}", e))?;
first = false;
}
Ok(())
}
pub fn export_dot(graph: &mut CodeGraph, config: &ExportConfig) -> Result<String> {
use std::collections::{BTreeMap, BTreeSet};
let mut dot_output = String::from("strict digraph call_graph {\n");
dot_output.push_str(" node [shape=box, style=rounded];\n");
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
let mut calls = Vec::new();
for entity_id in entity_ids {
let entity = graph.files.backend.get_node(snapshot, entity_id)?;
if entity.kind == "Call" {
if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data) {
calls.push(call_node);
}
}
}
if let Some(ref file_filter) = config.filters.file {
calls.retain(|c| c.file.contains(file_filter));
}
if let Some(ref symbol_filter) = config.filters.symbol {
calls.retain(|c| c.caller.contains(symbol_filter) || c.callee.contains(symbol_filter));
}
calls.sort_by(|a, b| {
a.file
.cmp(&b.file)
.then_with(|| a.caller.cmp(&b.caller))
.then_with(|| a.callee.cmp(&b.callee))
});
let mut nodes: BTreeSet<(String, String)> = BTreeSet::new(); let mut file_to_nodes: BTreeMap<String, Vec<(String, String)>> = BTreeMap::new();
for call in &calls {
for (name, symbol_id) in [
(call.caller.as_str(), call.caller_symbol_id.as_ref()),
(call.callee.as_str(), call.callee_symbol_id.as_ref()),
] {
let node_id = escape_dot_id(&symbol_id.cloned(), name);
let label = format!(
"{}\\n{}",
escape_dot_label(name),
escape_dot_label(&call.file)
);
nodes.insert((node_id.clone(), label.clone()));
if config.filters.cluster {
file_to_nodes
.entry(call.file.clone())
.or_default()
.push((node_id, label));
}
}
}
if config.filters.cluster {
for (file, file_nodes) in &file_to_nodes {
let cluster_id = file
.chars()
.map(|c| if c.is_alphanumeric() { c } else { '_' })
.collect::<String>();
dot_output.push_str(&format!(" subgraph cluster_{} {{\n", cluster_id));
dot_output.push_str(&format!(" label = {};\n", escape_dot_label(file)));
dot_output.push_str(" style = dashed;\n");
let mut seen = BTreeSet::new();
for (node_id, label) in file_nodes {
if seen.insert(node_id.clone()) {
dot_output.push_str(&format!(" {} [label={}];\n", node_id, label));
}
}
dot_output.push_str(" }\n");
}
} else {
for (node_id, label) in &nodes {
dot_output.push_str(&format!(" {} [label={}];\n", node_id, label));
}
}
for call in &calls {
let caller_id = escape_dot_id(&call.caller_symbol_id, &call.caller);
let callee_id = escape_dot_id(&call.callee_symbol_id, &call.callee);
dot_output.push_str(&format!(" {} -> {};\n", caller_id, callee_id));
}
dot_output.push_str("}\n");
Ok(dot_output)
}
pub fn export_graph(graph: &mut CodeGraph, config: &ExportConfig) -> Result<String> {
let has_content = config.include_symbols || config.include_references || config.include_calls;
if !has_content {
return match config.format {
ExportFormat::Json => {
let empty = GraphExport {
version: "2.0.0".to_string(),
files: Vec::new(),
symbols: Vec::new(),
references: Vec::new(),
calls: Vec::new(),
collisions: Vec::new(),
};
if config.minify {
serde_json::to_string(&empty).map_err(Into::into)
} else {
serde_json::to_string_pretty(&empty).map_err(Into::into)
}
}
ExportFormat::JsonL => Ok(String::new()),
ExportFormat::Dot => {
Ok("strict digraph call_graph {\n}\n".to_string())
}
_ => Err(anyhow::anyhow!(
"Export format {:?} not yet implemented",
config.format
)),
};
}
match config.format {
ExportFormat::Json => {
let mut files = Vec::new();
let mut symbols = Vec::new();
let mut references = Vec::new();
let mut calls = Vec::new();
let mut collisions = Vec::new();
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
let entity = graph.files.backend.get_node(snapshot, entity_id)?;
match entity.kind.as_str() {
"File" => {
if let Ok(file_node) =
serde_json::from_value::<FileNode>(entity.data.clone())
{
files.push(FileExport {
path: file_node.path,
hash: file_node.hash,
});
}
}
"Symbol" if config.include_symbols => {
if let Ok(symbol_node) =
serde_json::from_value::<SymbolNode>(entity.data.clone())
{
let file = get_file_path_from_symbol(graph, entity_id)?;
symbols.push(SymbolExport {
symbol_id: symbol_node.symbol_id,
canonical_fqn: symbol_node.canonical_fqn,
display_fqn: symbol_node.display_fqn,
name: symbol_node.name,
kind: symbol_node.kind,
kind_normalized: symbol_node.kind_normalized,
file,
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
});
}
}
"Reference" if config.include_references => {
if let Ok(ref_node) =
serde_json::from_value::<ReferenceNode>(entity.data.clone())
{
let referenced_symbol = entity
.name
.strip_prefix("ref to ")
.unwrap_or("")
.to_string();
references.push(ReferenceExport {
file: ref_node.file,
referenced_symbol,
target_symbol_id: None,
byte_start: ref_node.byte_start as usize,
byte_end: ref_node.byte_end as usize,
start_line: ref_node.start_line as usize,
start_col: ref_node.start_col as usize,
end_line: ref_node.end_line as usize,
end_col: ref_node.end_col as usize,
});
}
}
"Call" if config.include_calls => {
if let Ok(call_node) =
serde_json::from_value::<CallNode>(entity.data.clone())
{
calls.push(CallExport {
file: call_node.file,
caller: call_node.caller,
callee: call_node.callee,
caller_symbol_id: call_node.caller_symbol_id,
callee_symbol_id: call_node.callee_symbol_id,
byte_start: call_node.byte_start as usize,
byte_end: call_node.byte_end as usize,
start_line: call_node.start_line as usize,
start_col: call_node.start_col as usize,
end_line: call_node.end_line as usize,
end_col: call_node.end_col as usize,
});
}
}
_ => {
}
}
}
if config.include_collisions {
collisions = build_collision_exports(graph, config.collisions_field, usize::MAX)?;
}
files.sort_by(|a, b| a.path.cmp(&b.path));
symbols.sort_by(|a, b| (&a.file, &a.name).cmp(&(&b.file, &b.name)));
references.sort_by(|a, b| {
(&a.file, &a.referenced_symbol).cmp(&(&b.file, &b.referenced_symbol))
});
calls.sort_by(|a, b| {
(&a.file, &a.caller, &a.callee).cmp(&(&b.file, &b.caller, &b.callee))
});
let export = GraphExport {
version: "2.0.0".to_string(), files,
symbols,
references,
calls,
collisions,
};
if config.minify {
serde_json::to_string(&export).map_err(Into::into)
} else {
serde_json::to_string_pretty(&export).map_err(Into::into)
}
}
ExportFormat::JsonL => export_jsonl(graph),
ExportFormat::Dot => export_dot(graph, config),
ExportFormat::Csv => export_csv(graph, config),
ExportFormat::Scip => {
let scip_config = self::scip::ScipExportConfig {
project_root: ".".to_string(),
project_name: None,
version: None,
};
let scip_bytes = self::scip::export_scip(graph, &scip_config)?;
Ok(base64::engine::general_purpose::STANDARD.encode(&scip_bytes))
}
ExportFormat::Lsif => {
Ok(String::new())
}
}
}
#[derive(Debug, Clone, Serialize)]
struct UnifiedCsvRow {
record_type: String,
file: String,
byte_start: usize,
byte_end: usize,
start_line: usize,
start_col: usize,
end_line: usize,
end_col: usize,
symbol_id: Option<String>,
name: Option<String>,
kind: Option<String>,
kind_normalized: Option<String>,
referenced_symbol: Option<String>,
target_symbol_id: Option<String>,
caller: Option<String>,
callee: Option<String>,
caller_symbol_id: Option<String>,
callee_symbol_id: Option<String>,
}
pub fn export_csv(graph: &mut CodeGraph, config: &ExportConfig) -> Result<String> {
let mut records: Vec<UnifiedCsvRow> = Vec::new();
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
let entity = graph.files.backend.get_node(snapshot, entity_id)?;
match entity.kind.as_str() {
"Symbol" if config.include_symbols => {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(entity.data.clone()) {
let file = get_file_path_from_symbol(graph, entity_id)?;
records.push(UnifiedCsvRow {
record_type: "Symbol".to_string(),
file,
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
symbol_id: symbol_node.symbol_id,
name: symbol_node.name,
kind: Some(symbol_node.kind),
kind_normalized: symbol_node.kind_normalized,
referenced_symbol: None,
target_symbol_id: None,
caller: None,
callee: None,
caller_symbol_id: None,
callee_symbol_id: None,
});
}
}
"Reference" if config.include_references => {
if let Ok(ref_node) = serde_json::from_value::<ReferenceNode>(entity.data.clone()) {
let referenced_symbol = entity
.name
.strip_prefix("ref to ")
.unwrap_or("")
.to_string();
records.push(UnifiedCsvRow {
record_type: "Reference".to_string(),
file: ref_node.file,
byte_start: ref_node.byte_start as usize,
byte_end: ref_node.byte_end as usize,
start_line: ref_node.start_line as usize,
start_col: ref_node.start_col as usize,
end_line: ref_node.end_line as usize,
end_col: ref_node.end_col as usize,
symbol_id: None,
name: None,
kind: None,
kind_normalized: None,
referenced_symbol: Some(referenced_symbol),
target_symbol_id: None,
caller: None,
callee: None,
caller_symbol_id: None,
callee_symbol_id: None,
});
}
}
"Call" if config.include_calls => {
if let Ok(call_node) = serde_json::from_value::<CallNode>(entity.data.clone()) {
records.push(UnifiedCsvRow {
record_type: "Call".to_string(),
file: call_node.file,
byte_start: call_node.byte_start as usize,
byte_end: call_node.byte_end as usize,
start_line: call_node.start_line as usize,
start_col: call_node.start_col as usize,
end_line: call_node.end_line as usize,
end_col: call_node.end_col as usize,
symbol_id: None,
name: None,
kind: None,
kind_normalized: None,
referenced_symbol: None,
target_symbol_id: None,
caller: Some(call_node.caller),
callee: Some(call_node.callee),
caller_symbol_id: call_node.caller_symbol_id,
callee_symbol_id: call_node.callee_symbol_id,
});
}
}
_ => {
}
}
}
records.sort_by(|a, b| {
let type_order = match (a.record_type.as_str(), b.record_type.as_str()) {
("Call", "Call") => std::cmp::Ordering::Equal,
("Call", "Reference") => std::cmp::Ordering::Greater,
("Call", "Symbol") => std::cmp::Ordering::Greater,
("Reference", "Call") => std::cmp::Ordering::Less,
("Reference", "Reference") => std::cmp::Ordering::Equal,
("Reference", "Symbol") => std::cmp::Ordering::Greater,
("Symbol", "Call") => std::cmp::Ordering::Less,
("Symbol", "Reference") => std::cmp::Ordering::Less,
("Symbol", "Symbol") => std::cmp::Ordering::Equal,
_ => std::cmp::Ordering::Equal,
};
if type_order != std::cmp::Ordering::Equal {
return type_order;
}
match a.record_type.as_str() {
"Symbol" => (&a.file, a.name.as_ref().unwrap_or(&String::new()))
.cmp(&(&b.file, b.name.as_ref().unwrap_or(&String::new()))),
"Reference" => (
&a.record_type,
&a.file,
a.referenced_symbol.as_ref().unwrap_or(&String::new()),
)
.cmp(&(
&b.record_type,
&b.file,
b.referenced_symbol.as_ref().unwrap_or(&String::new()),
)),
"Call" => (
&a.record_type,
&a.file,
a.caller.as_ref().unwrap_or(&String::new()),
a.callee.as_ref().unwrap_or(&String::new()),
)
.cmp(&(
&b.record_type,
&b.file,
b.caller.as_ref().unwrap_or(&String::new()),
b.callee.as_ref().unwrap_or(&String::new()),
)),
_ => std::cmp::Ordering::Equal,
}
});
let mut buffer = Vec::new();
use std::io::Write;
writeln!(buffer, "# Magellan Export Version: 2.0.0")?;
{
let mut writer = csv::Writer::from_writer(&mut buffer);
for record in records {
writer.serialize(record)?;
}
writer.flush()?;
}
String::from_utf8(buffer).map_err(|e| anyhow::anyhow!("CSV output is not valid UTF-8: {}", e))
}