Skip to main content

sqlite_graphrag/commands/
graph_export.rs

1//! Handler for the `graph-export` CLI subcommand.
2
3use crate::cli::GraphExportFormat;
4use crate::entity_type::EntityType;
5use crate::errors::AppError;
6use crate::output;
7use crate::paths::AppPaths;
8use crate::storage::connection::open_ro;
9use crate::storage::entities;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::fs;
13use std::path::PathBuf;
14use std::time::Instant;
15
16/// Optional nested subcommands. When absent, the default behavior exports
17/// the full entity snapshot for backward compatibility.
18#[derive(clap::Subcommand)]
19pub enum GraphSubcommand {
20    /// Traverse relationships from a starting entity using BFS
21    Traverse(GraphTraverseArgs),
22    /// Show graph statistics (node/edge counts, degree distribution)
23    Stats(GraphStatsArgs),
24    /// List entities stored in the graph with optional filters
25    Entities(GraphEntitiesArgs),
26}
27
28#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
29pub enum GraphTraverseFormat {
30    Json,
31}
32
33#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
34pub enum GraphStatsFormat {
35    Json,
36    Text,
37}
38
39#[derive(clap::Args)]
40#[command(after_long_help = "EXAMPLES:\n  \
41    # Export full entity snapshot as JSON (default)\n  \
42    sqlite-graphrag graph\n\n  \
43    # Traverse relationships from a starting entity\n  \
44    sqlite-graphrag graph traverse --from acme-corp --depth 2\n\n  \
45    # Show graph statistics as structured JSON\n  \
46    sqlite-graphrag graph stats --format json\n\n  \
47    # List entities filtered by type\n  \
48    sqlite-graphrag graph entities --entity-type person\n\n  \
49    # Export full snapshot in DOT format for Graphviz\n  \
50    sqlite-graphrag graph --format dot --output graph.dot\n\n  \
51NOTES:\n  \
52    Without a subcommand, exports the full entity+edge snapshot.\n  \
53    Use `traverse`, `stats`, or `entities` for targeted queries.")]
54pub struct GraphArgs {
55    /// Optional subcommand; without one, export the full entity snapshot.
56    #[command(subcommand)]
57    pub subcommand: Option<GraphSubcommand>,
58    /// Filter by namespace. Defaults to all namespaces.
59    #[arg(long)]
60    pub namespace: Option<String>,
61    /// Snapshot output format.
62    #[arg(long, value_enum, default_value = "json")]
63    pub format: GraphExportFormat,
64    /// File path to write output instead of stdout.
65    #[arg(long)]
66    pub output: Option<PathBuf>,
67    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
68    pub json: bool,
69    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
70    pub db: Option<String>,
71}
72
73#[derive(clap::Args)]
74#[command(after_long_help = "EXAMPLES:\n  \
75    # Traverse relationships from an entity with default depth (2)\n  \
76    sqlite-graphrag graph traverse --from acme-corp\n\n  \
77    # Increase traversal depth to 3 hops\n  \
78    sqlite-graphrag graph traverse --from acme-corp --depth 3\n\n  \
79    # Traverse within a specific namespace\n  \
80    sqlite-graphrag graph traverse --from acme-corp --namespace project-x\n\n  \
81NOTES:\n  \
82    Output is always JSON. The `hops` array contains each reachable entity\n  \
83    with its relation, direction (inbound/outbound), weight, and depth level.")]
84pub struct GraphTraverseArgs {
85    /// Root entity name for the traversal.
86    #[arg(long)]
87    pub from: String,
88    /// Maximum traversal depth.
89    #[arg(long, default_value_t = 2u32)]
90    pub depth: u32,
91    #[arg(long)]
92    pub namespace: Option<String>,
93    #[arg(long, value_enum, default_value = "json")]
94    pub format: GraphTraverseFormat,
95    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
96    pub json: bool,
97    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
98    pub db: Option<String>,
99}
100
101#[derive(clap::Args)]
102#[command(after_long_help = "EXAMPLES:\n  \
103    # Show stats for all namespaces (human-readable text)\n  \
104    sqlite-graphrag graph stats --format text\n\n  \
105    # Show stats as structured JSON\n  \
106    sqlite-graphrag graph stats --format json\n\n  \
107    # Show stats for a specific namespace\n  \
108    sqlite-graphrag graph stats --namespace project-x --format text\n\n  \
109NOTES:\n  \
110    Reports node_count, edge_count, avg_degree, and max_degree.\n  \
111    Default format is JSON. Use `--format text` for a compact single-line summary.")]
112pub struct GraphStatsArgs {
113    #[arg(long)]
114    pub namespace: Option<String>,
115    /// Output format for the stats response.
116    #[arg(long, value_enum, default_value = "json")]
117    pub format: GraphStatsFormat,
118    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
119    pub json: bool,
120    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
121    pub db: Option<String>,
122}
123
124/// Field to sort entities by in `graph entities`.
125#[derive(Debug, Clone, Copy, clap::ValueEnum)]
126pub enum EntitySortField {
127    /// Sort alphabetically by entity name.
128    Name,
129    /// Sort by degree (total number of relationships, descending by default).
130    Degree,
131    /// Sort by entity creation timestamp.
132    CreatedAt,
133}
134
135/// Sort direction for `graph entities`.
136#[derive(Debug, Clone, Copy, Default, clap::ValueEnum)]
137pub enum SortOrder {
138    #[default]
139    Asc,
140    Desc,
141}
142
143#[derive(clap::Args)]
144#[command(after_long_help = "EXAMPLES:\n  \
145    # List all entities (default limit applies)\n  \
146    sqlite-graphrag graph entities\n\n  \
147    # Filter by entity type\n  \
148    sqlite-graphrag graph entities --entity-type person\n\n  \
149    # Filter by namespace and type\n  \
150    sqlite-graphrag graph entities --namespace project-x --entity-type concept\n\n  \
151    # Paginate results (skip first 20, return next 10)\n  \
152    sqlite-graphrag graph entities --offset 20 --limit 10\n\n  \
153    # Sort by degree descending (most connected first)\n  \
154    sqlite-graphrag graph entities --sort-by degree --order desc\n\n  \
155    # Sort by creation date ascending\n  \
156    sqlite-graphrag graph entities --sort-by created-at --order asc\n\n  \
157NOTES:\n  \
158    Output is always JSON with `entities`, `total_count`, `limit`, and `offset` fields.\n  \
159    Entity types are strings extracted by GLiNER NER (e.g. `person`, `organization`, `location`).")]
160pub struct GraphEntitiesArgs {
161    #[arg(long)]
162    pub namespace: Option<String>,
163    /// Filter by entity type (one of the 13 canonical types).
164    #[arg(long, value_enum)]
165    pub entity_type: Option<EntityType>,
166    /// Maximum number of results to return.
167    #[arg(long, default_value_t = crate::constants::K_GRAPH_ENTITIES_DEFAULT_LIMIT)]
168    pub limit: usize,
169    /// Number of results to skip for pagination.
170    #[arg(long, default_value_t = 0usize)]
171    pub offset: usize,
172    /// Sort entities by this field. When omitted, the default order is by name ascending.
173    #[arg(long, value_enum, help = "Sort entities by field")]
174    pub sort_by: Option<EntitySortField>,
175    /// Sort direction: `asc` (default) or `desc`.
176    #[arg(long, value_enum, default_value_t = SortOrder::Asc, help = "Sort order")]
177    pub order: SortOrder,
178    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
179    pub json: bool,
180    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
181    pub db: Option<String>,
182}
183
184#[derive(Serialize, Clone)]
185struct NodeOut {
186    id: i64,
187    name: String,
188    namespace: String,
189    /// Deprecated alias of `type` kept for backward-compat with pre-v1.0.35 clients.
190    /// New consumers MUST read `type` instead. Will be removed in a future major release.
191    kind: String,
192    /// Canonical entity classification (organization, concept, person, etc.).
193    /// Mirrors `kind` while the deprecation window is active.
194    #[serde(rename = "type")]
195    r#type: String,
196}
197
198#[derive(Serialize)]
199struct EdgeOut {
200    from: String,
201    to: String,
202    relation: String,
203    weight: f64,
204}
205
206#[derive(Serialize)]
207struct GraphSnapshot {
208    nodes: Vec<NodeOut>,
209    entities: Vec<NodeOut>,
210    edges: Vec<EdgeOut>,
211    elapsed_ms: u64,
212}
213
214#[derive(Serialize)]
215struct TraverseHop {
216    entity: String,
217    relation: String,
218    direction: String,
219    weight: f64,
220    depth: u32,
221}
222
223#[derive(Serialize)]
224struct GraphTraverseResponse {
225    from: String,
226    namespace: String,
227    depth: u32,
228    hops: Vec<TraverseHop>,
229    elapsed_ms: u64,
230}
231
232#[derive(Serialize)]
233struct GraphStatsResponse {
234    namespace: Option<String>,
235    node_count: i64,
236    edge_count: i64,
237    avg_degree: f64,
238    max_degree: i64,
239    elapsed_ms: u64,
240}
241
242#[derive(Serialize)]
243struct EntityItem {
244    id: i64,
245    name: String,
246    entity_type: String,
247    namespace: String,
248    created_at: String,
249    /// Total number of relationships (inbound + outbound) for this entity.
250    degree: u32,
251    #[serde(skip_serializing_if = "Option::is_none")]
252    description: Option<String>,
253}
254
255#[derive(Serialize)]
256struct GraphEntitiesResponse {
257    entities: Vec<EntityItem>,
258    total_count: i64,
259    limit: usize,
260    offset: usize,
261    namespace: Option<String>,
262    elapsed_ms: u64,
263}
264
265pub fn run(args: GraphArgs) -> Result<(), AppError> {
266    match args.subcommand {
267        None => run_entities_snapshot(
268            args.db.as_deref(),
269            args.namespace.as_deref(),
270            args.format,
271            args.json,
272            args.output.as_deref(),
273        ),
274        Some(GraphSubcommand::Traverse(a)) => run_traverse(a),
275        Some(GraphSubcommand::Stats(a)) => run_stats(a),
276        Some(GraphSubcommand::Entities(a)) => run_entities(a),
277    }
278}
279
280fn run_entities_snapshot(
281    db: Option<&str>,
282    namespace: Option<&str>,
283    format: GraphExportFormat,
284    json: bool,
285    output_path: Option<&std::path::Path>,
286) -> Result<(), AppError> {
287    let inicio = Instant::now();
288    let paths = AppPaths::resolve(db)?;
289
290    crate::storage::connection::ensure_db_ready(&paths)?;
291
292    let conn = open_ro(&paths.db)?;
293
294    let nodes_raw = entities::list_entities(&conn, namespace)?;
295    let edges_raw = entities::list_relationships_by_namespace(&conn, namespace)?;
296
297    let id_to_name: HashMap<i64, String> =
298        nodes_raw.iter().map(|n| (n.id, n.name.clone())).collect();
299
300    let nodes: Vec<NodeOut> = nodes_raw
301        .into_iter()
302        .map(|n| NodeOut {
303            id: n.id,
304            name: n.name,
305            namespace: n.namespace,
306            r#type: n.kind.clone(),
307            kind: n.kind,
308        })
309        .collect();
310
311    let mut edges: Vec<EdgeOut> = Vec::with_capacity(edges_raw.len());
312    let mut orphan_edges: usize = 0;
313    for r in edges_raw {
314        let from = match id_to_name.get(&r.source_id) {
315            Some(n) => n.clone(),
316            None => {
317                orphan_edges += 1;
318                tracing::warn!(target: "graph_export", source_id = r.source_id, relation = %r.relation, "edge skipped: source entity not found in id_to_name map");
319                continue;
320            }
321        };
322        let to = match id_to_name.get(&r.target_id) {
323            Some(n) => n.clone(),
324            None => {
325                orphan_edges += 1;
326                tracing::warn!(target: "graph_export", target_id = r.target_id, relation = %r.relation, "edge skipped: target entity not found in id_to_name map");
327                continue;
328            }
329        };
330        edges.push(EdgeOut {
331            from,
332            to,
333            relation: r.relation,
334            weight: r.weight,
335        });
336    }
337    if orphan_edges > 0 {
338        tracing::warn!(target: "graph_export",
339            count = orphan_edges,
340            "edges skipped due to orphaned entity references"
341        );
342    }
343
344    let effective_format = if json {
345        GraphExportFormat::Json
346    } else {
347        format
348    };
349
350    if effective_format == GraphExportFormat::Ndjson {
351        let elapsed_ms = inicio.elapsed().as_millis() as u64;
352        render_ndjson_streaming(&nodes, &edges, elapsed_ms, output_path)?;
353        return Ok(());
354    }
355
356    let rendered = match effective_format {
357        GraphExportFormat::Json => {
358            let entities = nodes.clone();
359            render_json(&GraphSnapshot {
360                nodes,
361                entities,
362                edges,
363                elapsed_ms: inicio.elapsed().as_millis() as u64,
364            })?
365        }
366        GraphExportFormat::Dot => render_dot(&nodes, &edges),
367        GraphExportFormat::Mermaid => render_mermaid(&nodes, &edges),
368        GraphExportFormat::Ndjson => unreachable!("ndjson handled above"),
369    };
370
371    if let Some(path) = output_path.filter(|_| !json) {
372        fs::write(path, &rendered)?;
373        output::emit_progress(&format!("wrote {}", path.display()));
374    } else {
375        output::emit_text(&rendered);
376    }
377
378    Ok(())
379}
380
381fn run_traverse(args: GraphTraverseArgs) -> Result<(), AppError> {
382    let inicio = Instant::now();
383    let _ = args.format;
384    let paths = AppPaths::resolve(args.db.as_deref())?;
385
386    crate::storage::connection::ensure_db_ready(&paths)?;
387
388    let conn = open_ro(&paths.db)?;
389    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
390
391    let from_id = entities::find_entity_id(&conn, &namespace, &args.from)?
392        .ok_or_else(|| AppError::NotFound(format!("entity '{}' not found", args.from)))?;
393
394    let all_rels = entities::list_relationships_by_namespace(&conn, Some(&namespace))?;
395    let all_entities = entities::list_entities(&conn, Some(&namespace))?;
396    let id_to_name: HashMap<i64, String> = all_entities
397        .iter()
398        .map(|e| (e.id, e.name.clone()))
399        .collect();
400
401    let mut hops: Vec<TraverseHop> = Vec::with_capacity(16);
402    let mut visited: std::collections::HashSet<i64> =
403        std::collections::HashSet::with_capacity(args.depth as usize * 10);
404    let mut frontier: Vec<(i64, u32)> = vec![(from_id, 0)];
405
406    while let Some((current_id, current_depth)) = frontier.pop() {
407        if current_depth >= args.depth || visited.contains(&current_id) {
408            continue;
409        }
410        visited.insert(current_id);
411
412        for rel in &all_rels {
413            if rel.source_id == current_id {
414                if let Some(target_name) = id_to_name.get(&rel.target_id) {
415                    hops.push(TraverseHop {
416                        entity: target_name.clone(),
417                        relation: rel.relation.clone(),
418                        direction: "outbound".to_string(),
419                        weight: rel.weight,
420                        depth: current_depth + 1,
421                    });
422                    frontier.push((rel.target_id, current_depth + 1));
423                }
424            } else if rel.target_id == current_id {
425                if let Some(source_name) = id_to_name.get(&rel.source_id) {
426                    hops.push(TraverseHop {
427                        entity: source_name.clone(),
428                        relation: rel.relation.clone(),
429                        direction: "inbound".to_string(),
430                        weight: rel.weight,
431                        depth: current_depth + 1,
432                    });
433                    frontier.push((rel.source_id, current_depth + 1));
434                }
435            }
436        }
437    }
438
439    output::emit_json(&GraphTraverseResponse {
440        from: args.from,
441        namespace,
442        depth: args.depth,
443        hops,
444        elapsed_ms: inicio.elapsed().as_millis() as u64,
445    })?;
446
447    Ok(())
448}
449
450fn run_stats(args: GraphStatsArgs) -> Result<(), AppError> {
451    let inicio = Instant::now();
452    let paths = AppPaths::resolve(args.db.as_deref())?;
453
454    crate::storage::connection::ensure_db_ready(&paths)?;
455
456    let conn = open_ro(&paths.db)?;
457    let ns = args.namespace.as_deref();
458
459    let node_count: i64 = if let Some(n) = ns {
460        conn.query_row(
461            "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
462            rusqlite::params![n],
463            |r| r.get(0),
464        )?
465    } else {
466        conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?
467    };
468
469    let edge_count: i64 = if let Some(n) = ns {
470        conn.query_row(
471            "SELECT COUNT(*) FROM relationships r
472             JOIN entities s ON s.id = r.source_id
473             WHERE s.namespace = ?1",
474            rusqlite::params![n],
475            |r| r.get(0),
476        )?
477    } else {
478        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?
479    };
480
481    let max_degree: i64 = if let Some(n) = ns {
482        conn.query_row(
483            "SELECT COALESCE(MAX(degree), 0) FROM entities WHERE namespace = ?1",
484            rusqlite::params![n],
485            |r| r.get(0),
486        )?
487    } else {
488        conn.query_row("SELECT COALESCE(MAX(degree), 0) FROM entities", [], |r| {
489            r.get(0)
490        })?
491    };
492
493    // avg_degree = 2 * edge_count / node_count (each edge contributes 2 to total degree sum).
494    let avg_degree = if node_count > 0 {
495        2.0 * (edge_count as f64) / (node_count as f64)
496    } else {
497        0.0
498    };
499
500    let resp = GraphStatsResponse {
501        namespace: args.namespace,
502        node_count,
503        edge_count,
504        avg_degree,
505        max_degree,
506        elapsed_ms: inicio.elapsed().as_millis() as u64,
507    };
508
509    let effective_format = if args.json {
510        GraphStatsFormat::Json
511    } else {
512        args.format
513    };
514
515    match effective_format {
516        GraphStatsFormat::Json => output::emit_json(&resp)?,
517        GraphStatsFormat::Text => {
518            output::emit_text(&format!(
519                "nodes={} edges={} avg_degree={:.2} max_degree={} namespace={}",
520                resp.node_count,
521                resp.edge_count,
522                resp.avg_degree,
523                resp.max_degree,
524                resp.namespace.as_deref().unwrap_or("all"),
525            ));
526        }
527    }
528
529    Ok(())
530}
531
532/// Builds the `ORDER BY` clause fragment from sort options.
533///
534/// Returns a static SQL fragment such as `ORDER BY e.name ASC`.
535fn build_order_by(sort_by: Option<EntitySortField>, order: SortOrder) -> &'static str {
536    // The combinations are enumerated as static strings to avoid
537    // format!() allocations in the hot path and satisfy the borrow checker
538    // when the string is used inside conn.prepare().
539    match (sort_by, order) {
540        (None, SortOrder::Asc) | (Some(EntitySortField::Name), SortOrder::Asc) => {
541            "ORDER BY e.name ASC"
542        }
543        (Some(EntitySortField::Name), SortOrder::Desc) => "ORDER BY e.name DESC",
544        (Some(EntitySortField::Degree), SortOrder::Asc) => "ORDER BY degree ASC",
545        (Some(EntitySortField::Degree), SortOrder::Desc) => "ORDER BY degree DESC",
546        (Some(EntitySortField::CreatedAt), SortOrder::Asc) => "ORDER BY e.created_at ASC",
547        (Some(EntitySortField::CreatedAt), SortOrder::Desc) => "ORDER BY e.created_at DESC",
548        // Fallback: None/Desc → sort by name desc (consistent with dir variable).
549        (None, SortOrder::Desc) => "ORDER BY e.name DESC",
550    }
551}
552
553fn run_entities(args: GraphEntitiesArgs) -> Result<(), AppError> {
554    let inicio = Instant::now();
555    let paths = AppPaths::resolve(args.db.as_deref())?;
556
557    crate::storage::connection::ensure_db_ready(&paths)?;
558
559    let conn = open_ro(&paths.db)?;
560
561    let row_to_item = |r: &rusqlite::Row<'_>| -> rusqlite::Result<EntityItem> {
562        let ts: i64 = r.get(4)?;
563        let created_at = chrono::DateTime::from_timestamp(ts, 0)
564            .unwrap_or_default()
565            .format("%Y-%m-%dT%H:%M:%SZ")
566            .to_string();
567        Ok(EntityItem {
568            id: r.get(0)?,
569            name: r.get(1)?,
570            entity_type: r.get(2)?,
571            namespace: r.get(3)?,
572            created_at,
573            degree: r.get(5)?,
574            description: r.get(6)?,
575        })
576    };
577
578    let limit_i = args.limit as i64;
579    let offset_i = args.offset as i64;
580    let order_clause = build_order_by(args.sort_by, args.order);
581
582    let base_select = "SELECT e.id, e.name, COALESCE(e.type, ''), e.namespace, e.created_at,
583                        (SELECT COUNT(*) FROM relationships r
584                         WHERE r.source_id = e.id OR r.target_id = e.id) AS degree,
585                        e.description
586                 FROM entities e";
587
588    let (total_count, items) = match (
589        args.namespace.as_deref(),
590        args.entity_type.map(|et| et.as_str()),
591    ) {
592        (Some(ns), Some(et)) => {
593            let count: i64 = conn.query_row(
594                "SELECT COUNT(*) FROM entities WHERE namespace = ?1 AND type = ?2",
595                rusqlite::params![ns, et],
596                |r| r.get(0),
597            )?;
598            let sql = format!(
599                "{base_select} WHERE e.namespace = ?1 AND e.type = ?2 {order_clause} LIMIT ?3 OFFSET ?4"
600            );
601            let mut stmt = conn.prepare(&sql)?;
602            let rows = stmt
603                .query_map(rusqlite::params![ns, et, limit_i, offset_i], row_to_item)?
604                .collect::<rusqlite::Result<Vec<_>>>()?;
605            (count, rows)
606        }
607        (Some(ns), None) => {
608            let count: i64 = conn.query_row(
609                "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
610                rusqlite::params![ns],
611                |r| r.get(0),
612            )?;
613            let sql =
614                format!("{base_select} WHERE e.namespace = ?1 {order_clause} LIMIT ?2 OFFSET ?3");
615            let mut stmt = conn.prepare(&sql)?;
616            let rows = stmt
617                .query_map(rusqlite::params![ns, limit_i, offset_i], row_to_item)?
618                .collect::<rusqlite::Result<Vec<_>>>()?;
619            (count, rows)
620        }
621        (None, Some(et)) => {
622            let count: i64 = conn.query_row(
623                "SELECT COUNT(*) FROM entities WHERE type = ?1",
624                rusqlite::params![et],
625                |r| r.get(0),
626            )?;
627            let sql = format!("{base_select} WHERE e.type = ?1 {order_clause} LIMIT ?2 OFFSET ?3");
628            let mut stmt = conn.prepare(&sql)?;
629            let rows = stmt
630                .query_map(rusqlite::params![et, limit_i, offset_i], row_to_item)?
631                .collect::<rusqlite::Result<Vec<_>>>()?;
632            (count, rows)
633        }
634        (None, None) => {
635            let count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
636            let sql = format!("{base_select} {order_clause} LIMIT ?1 OFFSET ?2");
637            let mut stmt = conn.prepare(&sql)?;
638            let rows = stmt
639                .query_map(rusqlite::params![limit_i, offset_i], row_to_item)?
640                .collect::<rusqlite::Result<Vec<_>>>()?;
641            (count, rows)
642        }
643    };
644
645    output::emit_json(&GraphEntitiesResponse {
646        entities: items,
647        total_count,
648        limit: args.limit,
649        offset: args.offset,
650        namespace: args.namespace,
651        elapsed_ms: inicio.elapsed().as_millis() as u64,
652    })
653}
654
655fn render_json(snapshot: &GraphSnapshot) -> Result<String, AppError> {
656    Ok(serde_json::to_string_pretty(snapshot)?)
657}
658
659/// Streams the graph as NDJSON: one object per node, one per edge, then a summary.
660///
661/// Each line is flushed immediately so consumers can process incrementally.
662/// When `output_path` is `Some`, lines are written to the file; otherwise to stdout.
663fn render_ndjson_streaming(
664    nodes: &[NodeOut],
665    edges: &[EdgeOut],
666    elapsed_ms: u64,
667    output_path: Option<&std::path::Path>,
668) -> Result<(), AppError> {
669    #[derive(serde::Serialize)]
670    struct NdjsonNode<'a> {
671        kind: &'static str,
672        id: i64,
673        name: &'a str,
674        namespace: &'a str,
675        #[serde(rename = "type")]
676        r#type: &'a str,
677    }
678    #[derive(serde::Serialize)]
679    struct NdjsonEdge<'a> {
680        kind: &'static str,
681        from: &'a str,
682        to: &'a str,
683        relation: &'a str,
684        weight: f64,
685    }
686    #[derive(serde::Serialize)]
687    struct NdjsonSummary {
688        kind: &'static str,
689        nodes: usize,
690        edges: usize,
691        elapsed_ms: u64,
692    }
693
694    use std::io::Write as IoWrite;
695
696    let mut buf: Vec<u8> = Vec::with_capacity(4096);
697
698    let emit_line =
699        |buf: &mut Vec<u8>, line: &str, path: Option<&std::path::Path>| -> Result<(), AppError> {
700            buf.clear();
701            buf.extend_from_slice(line.as_bytes());
702            buf.push(b'\n');
703            if let Some(p) = path {
704                let mut f = std::fs::OpenOptions::new()
705                    .create(true)
706                    .append(true)
707                    .open(p)
708                    .map_err(AppError::Io)?;
709                f.write_all(buf).map_err(AppError::Io)?;
710            } else {
711                output::emit_text(line);
712            }
713            Ok(())
714        };
715
716    // Truncate the output file once before starting (avoids re-opening with append for every line).
717    if let Some(p) = output_path {
718        fs::write(p, b"")?;
719    }
720
721    for node in nodes {
722        let obj = NdjsonNode {
723            kind: "node",
724            id: node.id,
725            name: &node.name,
726            namespace: &node.namespace,
727            r#type: &node.r#type,
728        };
729        let line = serde_json::to_string(&obj)?;
730        emit_line(&mut buf, &line, output_path)?;
731    }
732
733    for edge in edges {
734        let obj = NdjsonEdge {
735            kind: "edge",
736            from: &edge.from,
737            to: &edge.to,
738            relation: &edge.relation,
739            weight: edge.weight,
740        };
741        let line = serde_json::to_string(&obj)?;
742        emit_line(&mut buf, &line, output_path)?;
743    }
744
745    let summary = NdjsonSummary {
746        kind: "summary",
747        nodes: nodes.len(),
748        edges: edges.len(),
749        elapsed_ms,
750    };
751    let line = serde_json::to_string(&summary)?;
752    emit_line(&mut buf, &line, output_path)?;
753
754    Ok(())
755}
756
757fn sanitize_dot_id(raw: &str) -> String {
758    raw.chars()
759        .map(|c| {
760            if c.is_ascii_alphanumeric() || c == '_' {
761                c
762            } else {
763                '_'
764            }
765        })
766        .collect()
767}
768
769fn render_dot(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
770    use std::fmt::Write;
771    let mut out = String::with_capacity(nodes.len() * 80 + edges.len() * 60 + 300);
772    out.push_str("digraph sqlite_graphrag {\n");
773    out.push_str("  graph [bgcolor=\"white\", fontname=\"Helvetica Neue\", fontsize=12, rankdir=LR, nodesep=0.8, ranksep=1.2];\n");
774    out.push_str("  node [shape=box, style=\"filled,rounded\", fillcolor=\"#F2F2F7\", fontname=\"Helvetica Neue\", fontsize=11, color=\"#C7C7CC\"];\n");
775    out.push_str("  edge [fontname=\"Helvetica Neue\", fontsize=9, color=\"#8E8E93\"];\n");
776    for node in nodes {
777        let node_id = sanitize_dot_id(&node.name);
778        let escaped = node.name.replace('"', "\\\"");
779        let _ = writeln!(out, "  {node_id} [label=\"{escaped}\"];");
780    }
781    for edge in edges {
782        let from = sanitize_dot_id(&edge.from);
783        let to = sanitize_dot_id(&edge.to);
784        let label = edge.relation.replace('"', "\\\"");
785        let _ = writeln!(out, "  {from} -> {to} [label=\"{label}\"];");
786    }
787    out.push_str("}\n");
788    out
789}
790
791fn sanitize_mermaid_id(raw: &str) -> String {
792    raw.chars()
793        .map(|c| {
794            if c.is_ascii_alphanumeric() || c == '_' {
795                c
796            } else {
797                '_'
798            }
799        })
800        .collect()
801}
802
803fn render_mermaid(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
804    use std::fmt::Write;
805    let mut out = String::with_capacity(nodes.len() * 50 + edges.len() * 40 + 200);
806    out.push_str("%%{init: {'theme': 'neutral', 'themeVariables': {'primaryColor': '#F2F2F7', 'primaryTextColor': '#1C1C1E', 'primaryBorderColor': '#C7C7CC', 'lineColor': '#8E8E93'}}}%%\n");
807    out.push_str("graph LR\n");
808    for node in nodes {
809        let id = sanitize_mermaid_id(&node.name);
810        let escaped = node.name.replace('"', "\\\"");
811        let _ = writeln!(out, "  {id}[\"{escaped}\"]");
812    }
813    for edge in edges {
814        let from = sanitize_mermaid_id(&edge.from);
815        let to = sanitize_mermaid_id(&edge.to);
816        let label = edge.relation.replace('|', "\\|");
817        let _ = writeln!(out, "  {from} -->|{label}| {to}");
818    }
819    out
820}
821
822#[cfg(test)]
823mod tests {
824    use super::*;
825    use crate::cli::{Cli, Commands};
826    use clap::Parser;
827
828    fn make_node(kind: &str) -> NodeOut {
829        NodeOut {
830            id: 1,
831            name: "test-entity".to_string(),
832            namespace: "default".to_string(),
833            kind: kind.to_string(),
834            r#type: kind.to_string(),
835        }
836    }
837
838    #[test]
839    fn node_out_type_duplicates_kind() {
840        let node = make_node("agent");
841        let json = serde_json::to_value(&node).expect("serialization must work");
842        assert_eq!(json["kind"], json["type"]);
843        assert_eq!(json["kind"], "agent");
844        assert_eq!(json["type"], "agent");
845    }
846
847    #[test]
848    fn node_out_serializes_all_fields() {
849        let node = make_node("document");
850        let json = serde_json::to_value(&node).expect("serialization must work");
851        assert!(json.get("id").is_some());
852        assert!(json.get("name").is_some());
853        assert!(json.get("namespace").is_some());
854        assert!(json.get("kind").is_some());
855        assert!(json.get("type").is_some());
856    }
857
858    #[test]
859    fn graph_snapshot_serializes_nodes_with_type() {
860        let node = make_node("concept");
861        let entities = vec![make_node("concept")];
862        let snapshot = GraphSnapshot {
863            nodes: vec![node],
864            entities,
865            edges: vec![],
866            elapsed_ms: 0,
867        };
868        let json_str = render_json(&snapshot).expect("rendering must work");
869        let json: serde_json::Value = serde_json::from_str(&json_str).expect("valid json");
870        let first_node = &json["nodes"][0];
871        assert_eq!(first_node["kind"], first_node["type"]);
872        assert_eq!(first_node["type"], "concept");
873    }
874
875    #[test]
876    fn graph_traverse_response_serializes_correctly() {
877        let resp = GraphTraverseResponse {
878            from: "entity-a".to_string(),
879            namespace: "global".to_string(),
880            depth: 2,
881            hops: vec![TraverseHop {
882                entity: "entity-b".to_string(),
883                relation: "uses".to_string(),
884                direction: "outbound".to_string(),
885                weight: 1.0,
886                depth: 1,
887            }],
888            elapsed_ms: 5,
889        };
890        let json = serde_json::to_value(&resp).unwrap();
891        assert_eq!(json["from"], "entity-a");
892        assert_eq!(json["depth"], 2);
893        assert!(json["hops"].is_array());
894        assert_eq!(json["hops"][0]["direction"], "outbound");
895    }
896
897    #[test]
898    fn graph_stats_response_serializes_correctly() {
899        let resp = GraphStatsResponse {
900            namespace: Some("global".to_string()),
901            node_count: 10,
902            edge_count: 15,
903            avg_degree: 3.0,
904            max_degree: 7,
905            elapsed_ms: 2,
906        };
907        let json = serde_json::to_value(&resp).unwrap();
908        assert_eq!(json["node_count"], 10);
909        assert_eq!(json["edge_count"], 15);
910        assert_eq!(json["avg_degree"], 3.0);
911        assert_eq!(json["max_degree"], 7);
912    }
913
914    fn compute_avg_degree(node_count: i64, edge_count: i64) -> f64 {
915        if node_count > 0 {
916            2.0 * (edge_count as f64) / (node_count as f64)
917        } else {
918            0.0
919        }
920    }
921
922    #[test]
923    fn avg_degree_is_zero_when_no_nodes() {
924        assert_eq!(compute_avg_degree(0, 0), 0.0);
925    }
926
927    #[test]
928    fn avg_degree_is_zero_when_nodes_but_no_edges() {
929        // Reproduces L1 bug: previously returned 1.0 instead of 0.0.
930        assert_eq!(compute_avg_degree(2, 0), 0.0);
931    }
932
933    #[test]
934    fn avg_degree_is_two_when_triangle() {
935        // 3 nodes, 3 edges: 2 * 3 / 3 = 2.0
936        assert_eq!(compute_avg_degree(3, 3), 2.0);
937    }
938
939    #[test]
940    fn graph_entities_response_serializes_required_fields() {
941        let resp = GraphEntitiesResponse {
942            entities: vec![EntityItem {
943                id: 1,
944                name: "claude-code".to_string(),
945                entity_type: "agent".to_string(),
946                namespace: "global".to_string(),
947                created_at: "2026-01-01T00:00:00Z".to_string(),
948                degree: 0,
949                description: None,
950            }],
951            total_count: 1,
952            limit: 50,
953            offset: 0,
954            namespace: Some("global".to_string()),
955            elapsed_ms: 3,
956        };
957        let json = serde_json::to_value(&resp).unwrap();
958        assert!(json["entities"].is_array());
959        assert_eq!(json["entities"][0]["name"], "claude-code");
960        assert_eq!(json["entities"][0]["entity_type"], "agent");
961        assert_eq!(json["total_count"], 1);
962        assert_eq!(json["limit"], 50);
963        assert_eq!(json["offset"], 0);
964        assert_eq!(json["namespace"], "global");
965    }
966
967    #[test]
968    fn entity_item_serializes_all_fields() {
969        let item = EntityItem {
970            id: 42,
971            name: "test-entity".to_string(),
972            entity_type: "concept".to_string(),
973            namespace: "project-a".to_string(),
974            created_at: "2026-04-19T12:00:00Z".to_string(),
975            degree: 3,
976            description: Some("test description".to_string()),
977        };
978        let json = serde_json::to_value(&item).unwrap();
979        assert_eq!(json["id"], 42);
980        assert_eq!(json["name"], "test-entity");
981        assert_eq!(json["entity_type"], "concept");
982        assert_eq!(json["namespace"], "project-a");
983        assert_eq!(json["created_at"], "2026-04-19T12:00:00Z");
984    }
985
986    #[test]
987    fn entity_item_entity_type_is_never_null() {
988        // P2-C: entity_type must never be null, even when DB column is empty.
989        let item = EntityItem {
990            id: 1,
991            name: "sem-tipo".to_string(),
992            entity_type: String::new(),
993            namespace: "ns".to_string(),
994            created_at: "2026-01-01T00:00:00Z".to_string(),
995            degree: 0,
996            description: None,
997        };
998        let json = serde_json::to_value(&item).unwrap();
999        assert!(
1000            !json["entity_type"].is_null(),
1001            "entity_type must not be null"
1002        );
1003        assert!(json["entity_type"].is_string());
1004    }
1005
1006    #[test]
1007    fn graph_traverse_cli_rejects_format_dot() {
1008        let parsed = Cli::try_parse_from([
1009            "sqlite-graphrag",
1010            "graph",
1011            "traverse",
1012            "--from",
1013            "AuthDecision",
1014            "--format",
1015            "dot",
1016        ]);
1017        assert!(parsed.is_err(), "graph traverse must reject format=dot");
1018    }
1019
1020    #[test]
1021    fn graph_stats_cli_accepts_format_text() {
1022        let parsed = Cli::try_parse_from(["sqlite-graphrag", "graph", "stats", "--format", "text"])
1023            .expect("graph stats --format text must be accepted");
1024
1025        match parsed.command {
1026            Commands::Graph(args) => match args.subcommand {
1027                Some(GraphSubcommand::Stats(stats)) => {
1028                    assert_eq!(stats.format, GraphStatsFormat::Text);
1029                }
1030                _ => unreachable!("unexpected subcommand"),
1031            },
1032            _ => unreachable!("unexpected command"),
1033        }
1034    }
1035
1036    #[test]
1037    fn graph_stats_cli_rejects_format_mermaid() {
1038        let parsed =
1039            Cli::try_parse_from(["sqlite-graphrag", "graph", "stats", "--format", "mermaid"]);
1040        assert!(parsed.is_err(), "graph stats must reject format=mermaid");
1041    }
1042
1043    #[test]
1044    fn graph_entities_response_has_no_items_key() {
1045        let resp = GraphEntitiesResponse {
1046            entities: vec![],
1047            total_count: 0,
1048            limit: 50,
1049            offset: 0,
1050            namespace: None,
1051            elapsed_ms: 0,
1052        };
1053        let json = serde_json::to_value(&resp).unwrap();
1054        assert!(
1055            json.get("items").is_none(),
1056            "legacy 'items' key must not appear"
1057        );
1058        assert!(
1059            json.get("entities").is_some(),
1060            "'entities' key must be present"
1061        );
1062    }
1063
1064    #[test]
1065    fn build_order_by_defaults_to_name_asc() {
1066        let clause = build_order_by(None, SortOrder::Asc);
1067        assert_eq!(clause, "ORDER BY e.name ASC");
1068    }
1069
1070    #[test]
1071    fn build_order_by_name_desc() {
1072        let clause = build_order_by(Some(EntitySortField::Name), SortOrder::Desc);
1073        assert_eq!(clause, "ORDER BY e.name DESC");
1074    }
1075
1076    #[test]
1077    fn build_order_by_degree_desc() {
1078        let clause = build_order_by(Some(EntitySortField::Degree), SortOrder::Desc);
1079        assert_eq!(clause, "ORDER BY degree DESC");
1080    }
1081
1082    #[test]
1083    fn build_order_by_degree_asc() {
1084        let clause = build_order_by(Some(EntitySortField::Degree), SortOrder::Asc);
1085        assert_eq!(clause, "ORDER BY degree ASC");
1086    }
1087
1088    #[test]
1089    fn build_order_by_created_at_asc() {
1090        let clause = build_order_by(Some(EntitySortField::CreatedAt), SortOrder::Asc);
1091        assert_eq!(clause, "ORDER BY e.created_at ASC");
1092    }
1093
1094    #[test]
1095    fn build_order_by_created_at_desc() {
1096        let clause = build_order_by(Some(EntitySortField::CreatedAt), SortOrder::Desc);
1097        assert_eq!(clause, "ORDER BY e.created_at DESC");
1098    }
1099
1100    #[test]
1101    fn graph_entities_cli_accepts_sort_by_degree_desc() {
1102        let parsed = Cli::try_parse_from([
1103            "sqlite-graphrag",
1104            "graph",
1105            "entities",
1106            "--sort-by",
1107            "degree",
1108            "--order",
1109            "desc",
1110        ])
1111        .expect("graph entities --sort-by degree --order desc must parse");
1112        match parsed.command {
1113            Commands::Graph(args) => match args.subcommand {
1114                Some(GraphSubcommand::Entities(e)) => {
1115                    assert!(matches!(e.sort_by, Some(EntitySortField::Degree)));
1116                    assert!(matches!(e.order, SortOrder::Desc));
1117                }
1118                _ => unreachable!("unexpected subcommand"),
1119            },
1120            _ => unreachable!("unexpected command"),
1121        }
1122    }
1123
1124    #[test]
1125    fn graph_entities_cli_accepts_sort_by_created_at_asc() {
1126        let parsed = Cli::try_parse_from([
1127            "sqlite-graphrag",
1128            "graph",
1129            "entities",
1130            "--sort-by",
1131            "created-at",
1132        ])
1133        .expect("graph entities --sort-by created-at must parse");
1134        match parsed.command {
1135            Commands::Graph(args) => match args.subcommand {
1136                Some(GraphSubcommand::Entities(e)) => {
1137                    assert!(matches!(e.sort_by, Some(EntitySortField::CreatedAt)));
1138                    assert!(matches!(e.order, SortOrder::Asc));
1139                }
1140                _ => unreachable!("unexpected subcommand"),
1141            },
1142            _ => unreachable!("unexpected command"),
1143        }
1144    }
1145
1146    #[test]
1147    fn graph_entities_cli_defaults_to_no_sort_by() {
1148        let parsed = Cli::try_parse_from(["sqlite-graphrag", "graph", "entities"])
1149            .expect("graph entities must parse without sort flags");
1150        match parsed.command {
1151            Commands::Graph(args) => match args.subcommand {
1152                Some(GraphSubcommand::Entities(e)) => {
1153                    assert!(e.sort_by.is_none(), "sort_by must default to None");
1154                    assert!(
1155                        matches!(e.order, SortOrder::Asc),
1156                        "order must default to Asc"
1157                    );
1158                }
1159                _ => unreachable!("unexpected subcommand"),
1160            },
1161            _ => unreachable!("unexpected command"),
1162        }
1163    }
1164}