Skip to main content

sqlite_graphrag/commands/
graph_export.rs

1//! Handler for the `graph-export` CLI subcommand.
2
3use crate::cli::GraphExportFormat;
4use crate::entity_type::EntityType;
5use crate::errors::AppError;
6use crate::output;
7use crate::paths::AppPaths;
8use crate::storage::connection::open_ro;
9use crate::storage::entities;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::fs;
13use std::path::PathBuf;
14use std::time::Instant;
15
16/// Optional nested subcommands. When absent, the default behavior exports
17/// the full entity snapshot for backward compatibility.
18#[derive(clap::Subcommand)]
19pub enum GraphSubcommand {
20    /// Traverse relationships from a starting entity using BFS
21    Traverse(GraphTraverseArgs),
22    /// Show graph statistics (node/edge counts, degree distribution)
23    Stats(GraphStatsArgs),
24    /// List entities stored in the graph with optional filters
25    Entities(GraphEntitiesArgs),
26}
27
28#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
29pub enum GraphTraverseFormat {
30    Json,
31}
32
33#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
34pub enum GraphStatsFormat {
35    Json,
36    Text,
37}
38
39#[derive(clap::Args)]
40#[command(after_long_help = "EXAMPLES:\n  \
41    # Export full entity snapshot as JSON (default)\n  \
42    sqlite-graphrag graph\n\n  \
43    # Traverse relationships from a starting entity\n  \
44    sqlite-graphrag graph traverse --from acme-corp --depth 2\n\n  \
45    # Show graph statistics as structured JSON\n  \
46    sqlite-graphrag graph stats --format json\n\n  \
47    # List entities filtered by type\n  \
48    sqlite-graphrag graph entities --entity-type person\n\n  \
49    # Export full snapshot in DOT format for Graphviz\n  \
50    sqlite-graphrag graph --format dot --output graph.dot\n\n  \
51NOTES:\n  \
52    Without a subcommand, exports the full entity+edge snapshot.\n  \
53    Use `traverse`, `stats`, or `entities` for targeted queries.")]
54pub struct GraphArgs {
55    /// Optional subcommand; without one, export the full entity snapshot.
56    #[command(subcommand)]
57    pub subcommand: Option<GraphSubcommand>,
58    /// Filter by namespace. Defaults to all namespaces.
59    #[arg(long)]
60    pub namespace: Option<String>,
61    /// Snapshot output format.
62    #[arg(long, value_enum, default_value = "json")]
63    pub format: GraphExportFormat,
64    /// File path to write output instead of stdout.
65    #[arg(long)]
66    pub output: Option<PathBuf>,
67    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
68    pub json: bool,
69    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
70    pub db: Option<String>,
71}
72
73#[derive(clap::Args)]
74#[command(after_long_help = "EXAMPLES:\n  \
75    # Traverse relationships from an entity with default depth (2)\n  \
76    sqlite-graphrag graph traverse --from acme-corp\n\n  \
77    # Increase traversal depth to 3 hops\n  \
78    sqlite-graphrag graph traverse --from acme-corp --depth 3\n\n  \
79    # Traverse within a specific namespace\n  \
80    sqlite-graphrag graph traverse --from acme-corp --namespace project-x\n\n  \
81NOTES:\n  \
82    Output is always JSON. The `hops` array contains each reachable entity\n  \
83    with its relation, direction (inbound/outbound), weight, and depth level.")]
84pub struct GraphTraverseArgs {
85    /// Root entity name for the traversal.
86    #[arg(long)]
87    pub from: String,
88    /// Maximum traversal depth.
89    #[arg(long, default_value_t = 2u32)]
90    pub depth: u32,
91    #[arg(long)]
92    pub namespace: Option<String>,
93    #[arg(long, value_enum, default_value = "json")]
94    pub format: GraphTraverseFormat,
95    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
96    pub json: bool,
97    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
98    pub db: Option<String>,
99}
100
101#[derive(clap::Args)]
102#[command(after_long_help = "EXAMPLES:\n  \
103    # Show stats for all namespaces (human-readable text)\n  \
104    sqlite-graphrag graph stats --format text\n\n  \
105    # Show stats as structured JSON\n  \
106    sqlite-graphrag graph stats --format json\n\n  \
107    # Show stats for a specific namespace\n  \
108    sqlite-graphrag graph stats --namespace project-x --format text\n\n  \
109NOTES:\n  \
110    Reports node_count, edge_count, avg_degree, and max_degree.\n  \
111    Default format is JSON. Use `--format text` for a compact single-line summary.")]
112pub struct GraphStatsArgs {
113    #[arg(long)]
114    pub namespace: Option<String>,
115    /// Output format for the stats response.
116    #[arg(long, value_enum, default_value = "json")]
117    pub format: GraphStatsFormat,
118    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
119    pub json: bool,
120    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
121    pub db: Option<String>,
122}
123
124/// Field to sort entities by in `graph entities`.
125#[derive(Debug, Clone, Copy, clap::ValueEnum)]
126pub enum EntitySortField {
127    /// Sort alphabetically by entity name.
128    Name,
129    /// Sort by degree (total number of relationships, descending by default).
130    Degree,
131    /// Sort by entity creation timestamp.
132    CreatedAt,
133}
134
135/// Sort direction for `graph entities`.
136#[derive(Debug, Clone, Copy, Default, clap::ValueEnum)]
137pub enum SortOrder {
138    #[default]
139    Asc,
140    Desc,
141}
142
143#[derive(clap::Args)]
144#[command(after_long_help = "EXAMPLES:\n  \
145    # List all entities (default limit applies)\n  \
146    sqlite-graphrag graph entities\n\n  \
147    # Filter by entity type\n  \
148    sqlite-graphrag graph entities --entity-type person\n\n  \
149    # Filter by namespace and type\n  \
150    sqlite-graphrag graph entities --namespace project-x --entity-type concept\n\n  \
151    # Paginate results (skip first 20, return next 10)\n  \
152    sqlite-graphrag graph entities --offset 20 --limit 10\n\n  \
153    # Sort by degree descending (most connected first)\n  \
154    sqlite-graphrag graph entities --sort-by degree --order desc\n\n  \
155    # Sort by creation date ascending\n  \
156    sqlite-graphrag graph entities --sort-by created-at --order asc\n\n  \
157NOTES:\n  \
158    Output is always JSON with `entities`, `total_count`, `limit`, and `offset` fields.\n  \
159    Entity types are strings extracted by GLiNER NER (e.g. `person`, `organization`, `location`).")]
160pub struct GraphEntitiesArgs {
161    #[arg(long)]
162    pub namespace: Option<String>,
163    /// Filter by entity type (one of the 13 canonical types).
164    #[arg(long, value_enum)]
165    pub entity_type: Option<EntityType>,
166    /// Maximum number of results to return.
167    #[arg(long, default_value_t = crate::constants::K_GRAPH_ENTITIES_DEFAULT_LIMIT)]
168    pub limit: usize,
169    /// Number of results to skip for pagination.
170    #[arg(long, default_value_t = 0usize)]
171    pub offset: usize,
172    /// Sort entities by this field. When omitted, the default order is by name ascending.
173    #[arg(long, value_enum, help = "Sort entities by field")]
174    pub sort_by: Option<EntitySortField>,
175    /// Sort direction: `asc` (default) or `desc`.
176    #[arg(long, value_enum, default_value_t = SortOrder::Asc, help = "Sort order")]
177    pub order: SortOrder,
178    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
179    pub json: bool,
180    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
181    pub db: Option<String>,
182}
183
184#[derive(Serialize)]
185struct NodeOut {
186    id: i64,
187    name: String,
188    namespace: String,
189    /// Deprecated alias of `type` kept for backward-compat with pre-v1.0.35 clients.
190    /// New consumers MUST read `type` instead. Will be removed in a future major release.
191    kind: String,
192    /// Canonical entity classification (organization, concept, person, etc.).
193    /// Mirrors `kind` while the deprecation window is active.
194    #[serde(rename = "type")]
195    r#type: String,
196}
197
198#[derive(Serialize)]
199struct EdgeOut {
200    from: String,
201    to: String,
202    relation: String,
203    weight: f64,
204}
205
206#[derive(Serialize)]
207struct GraphSnapshot {
208    nodes: Vec<NodeOut>,
209    edges: Vec<EdgeOut>,
210    elapsed_ms: u64,
211}
212
213#[derive(Serialize)]
214struct TraverseHop {
215    entity: String,
216    relation: String,
217    direction: String,
218    weight: f64,
219    depth: u32,
220}
221
222#[derive(Serialize)]
223struct GraphTraverseResponse {
224    from: String,
225    namespace: String,
226    depth: u32,
227    hops: Vec<TraverseHop>,
228    elapsed_ms: u64,
229}
230
231#[derive(Serialize)]
232struct GraphStatsResponse {
233    namespace: Option<String>,
234    node_count: i64,
235    edge_count: i64,
236    avg_degree: f64,
237    max_degree: i64,
238    elapsed_ms: u64,
239}
240
241#[derive(Serialize)]
242struct EntityItem {
243    id: i64,
244    name: String,
245    entity_type: String,
246    namespace: String,
247    created_at: String,
248    /// Total number of relationships (inbound + outbound) for this entity.
249    degree: u32,
250}
251
252#[derive(Serialize)]
253struct GraphEntitiesResponse {
254    entities: Vec<EntityItem>,
255    total_count: i64,
256    limit: usize,
257    offset: usize,
258    namespace: Option<String>,
259    elapsed_ms: u64,
260}
261
262pub fn run(args: GraphArgs) -> Result<(), AppError> {
263    match args.subcommand {
264        None => run_entities_snapshot(
265            args.db.as_deref(),
266            args.namespace.as_deref(),
267            args.format,
268            args.json,
269            args.output.as_deref(),
270        ),
271        Some(GraphSubcommand::Traverse(a)) => run_traverse(a),
272        Some(GraphSubcommand::Stats(a)) => run_stats(a),
273        Some(GraphSubcommand::Entities(a)) => run_entities(a),
274    }
275}
276
277fn run_entities_snapshot(
278    db: Option<&str>,
279    namespace: Option<&str>,
280    format: GraphExportFormat,
281    json: bool,
282    output_path: Option<&std::path::Path>,
283) -> Result<(), AppError> {
284    let inicio = Instant::now();
285    let paths = AppPaths::resolve(db)?;
286
287    crate::storage::connection::ensure_db_ready(&paths)?;
288
289    let conn = open_ro(&paths.db)?;
290
291    let nodes_raw = entities::list_entities(&conn, namespace)?;
292    let edges_raw = entities::list_relationships_by_namespace(&conn, namespace)?;
293
294    let id_to_name: HashMap<i64, String> =
295        nodes_raw.iter().map(|n| (n.id, n.name.clone())).collect();
296
297    let nodes: Vec<NodeOut> = nodes_raw
298        .into_iter()
299        .map(|n| NodeOut {
300            id: n.id,
301            name: n.name,
302            namespace: n.namespace,
303            r#type: n.kind.clone(),
304            kind: n.kind,
305        })
306        .collect();
307
308    let mut edges: Vec<EdgeOut> = Vec::with_capacity(edges_raw.len());
309    let mut orphan_edges: usize = 0;
310    for r in edges_raw {
311        let from = match id_to_name.get(&r.source_id) {
312            Some(n) => n.clone(),
313            None => {
314                orphan_edges += 1;
315                tracing::warn!(source_id = r.source_id, relation = %r.relation, "edge skipped: source entity not found in id_to_name map");
316                continue;
317            }
318        };
319        let to = match id_to_name.get(&r.target_id) {
320            Some(n) => n.clone(),
321            None => {
322                orphan_edges += 1;
323                tracing::warn!(target_id = r.target_id, relation = %r.relation, "edge skipped: target entity not found in id_to_name map");
324                continue;
325            }
326        };
327        edges.push(EdgeOut {
328            from,
329            to,
330            relation: r.relation,
331            weight: r.weight,
332        });
333    }
334    if orphan_edges > 0 {
335        tracing::warn!(
336            count = orphan_edges,
337            "edges skipped due to orphaned entity references"
338        );
339    }
340
341    let effective_format = if json {
342        GraphExportFormat::Json
343    } else {
344        format
345    };
346
347    if effective_format == GraphExportFormat::Ndjson {
348        let elapsed_ms = inicio.elapsed().as_millis() as u64;
349        render_ndjson_streaming(&nodes, &edges, elapsed_ms, output_path)?;
350        return Ok(());
351    }
352
353    let rendered = match effective_format {
354        GraphExportFormat::Json => render_json(&GraphSnapshot {
355            nodes,
356            edges,
357            elapsed_ms: inicio.elapsed().as_millis() as u64,
358        })?,
359        GraphExportFormat::Dot => render_dot(&nodes, &edges),
360        GraphExportFormat::Mermaid => render_mermaid(&nodes, &edges),
361        GraphExportFormat::Ndjson => unreachable!("ndjson handled above"),
362    };
363
364    if let Some(path) = output_path.filter(|_| !json) {
365        fs::write(path, &rendered)?;
366        output::emit_progress(&format!("wrote {}", path.display()));
367    } else {
368        output::emit_text(&rendered);
369    }
370
371    Ok(())
372}
373
374fn run_traverse(args: GraphTraverseArgs) -> Result<(), AppError> {
375    let inicio = Instant::now();
376    let _ = args.format;
377    let paths = AppPaths::resolve(args.db.as_deref())?;
378
379    crate::storage::connection::ensure_db_ready(&paths)?;
380
381    let conn = open_ro(&paths.db)?;
382    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
383
384    let from_id = entities::find_entity_id(&conn, &namespace, &args.from)?
385        .ok_or_else(|| AppError::NotFound(format!("entity '{}' not found", args.from)))?;
386
387    let all_rels = entities::list_relationships_by_namespace(&conn, Some(&namespace))?;
388    let all_entities = entities::list_entities(&conn, Some(&namespace))?;
389    let id_to_name: HashMap<i64, String> = all_entities
390        .iter()
391        .map(|e| (e.id, e.name.clone()))
392        .collect();
393
394    let mut hops: Vec<TraverseHop> = Vec::with_capacity(16);
395    let mut visited: std::collections::HashSet<i64> = std::collections::HashSet::new();
396    let mut frontier: Vec<(i64, u32)> = vec![(from_id, 0)];
397
398    while let Some((current_id, current_depth)) = frontier.pop() {
399        if current_depth >= args.depth || visited.contains(&current_id) {
400            continue;
401        }
402        visited.insert(current_id);
403
404        for rel in &all_rels {
405            if rel.source_id == current_id {
406                if let Some(target_name) = id_to_name.get(&rel.target_id) {
407                    hops.push(TraverseHop {
408                        entity: target_name.clone(),
409                        relation: rel.relation.clone(),
410                        direction: "outbound".to_string(),
411                        weight: rel.weight,
412                        depth: current_depth + 1,
413                    });
414                    frontier.push((rel.target_id, current_depth + 1));
415                }
416            } else if rel.target_id == current_id {
417                if let Some(source_name) = id_to_name.get(&rel.source_id) {
418                    hops.push(TraverseHop {
419                        entity: source_name.clone(),
420                        relation: rel.relation.clone(),
421                        direction: "inbound".to_string(),
422                        weight: rel.weight,
423                        depth: current_depth + 1,
424                    });
425                    frontier.push((rel.source_id, current_depth + 1));
426                }
427            }
428        }
429    }
430
431    output::emit_json(&GraphTraverseResponse {
432        from: args.from,
433        namespace,
434        depth: args.depth,
435        hops,
436        elapsed_ms: inicio.elapsed().as_millis() as u64,
437    })?;
438
439    Ok(())
440}
441
442fn run_stats(args: GraphStatsArgs) -> Result<(), AppError> {
443    let inicio = Instant::now();
444    let paths = AppPaths::resolve(args.db.as_deref())?;
445
446    crate::storage::connection::ensure_db_ready(&paths)?;
447
448    let conn = open_ro(&paths.db)?;
449    let ns = args.namespace.as_deref();
450
451    let node_count: i64 = if let Some(n) = ns {
452        conn.query_row(
453            "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
454            rusqlite::params![n],
455            |r| r.get(0),
456        )?
457    } else {
458        conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?
459    };
460
461    let edge_count: i64 = if let Some(n) = ns {
462        conn.query_row(
463            "SELECT COUNT(*) FROM relationships r
464             JOIN entities s ON s.id = r.source_id
465             WHERE s.namespace = ?1",
466            rusqlite::params![n],
467            |r| r.get(0),
468        )?
469    } else {
470        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?
471    };
472
473    let max_degree: i64 = if let Some(n) = ns {
474        conn.query_row(
475            "SELECT COALESCE(MAX(degree), 0) FROM entities WHERE namespace = ?1",
476            rusqlite::params![n],
477            |r| r.get(0),
478        )?
479    } else {
480        conn.query_row("SELECT COALESCE(MAX(degree), 0) FROM entities", [], |r| {
481            r.get(0)
482        })?
483    };
484
485    // avg_degree = 2 * edge_count / node_count (each edge contributes 2 to total degree sum).
486    let avg_degree = if node_count > 0 {
487        2.0 * (edge_count as f64) / (node_count as f64)
488    } else {
489        0.0
490    };
491
492    let resp = GraphStatsResponse {
493        namespace: args.namespace,
494        node_count,
495        edge_count,
496        avg_degree,
497        max_degree,
498        elapsed_ms: inicio.elapsed().as_millis() as u64,
499    };
500
501    let effective_format = if args.json {
502        GraphStatsFormat::Json
503    } else {
504        args.format
505    };
506
507    match effective_format {
508        GraphStatsFormat::Json => output::emit_json(&resp)?,
509        GraphStatsFormat::Text => {
510            output::emit_text(&format!(
511                "nodes={} edges={} avg_degree={:.2} max_degree={} namespace={}",
512                resp.node_count,
513                resp.edge_count,
514                resp.avg_degree,
515                resp.max_degree,
516                resp.namespace.as_deref().unwrap_or("all"),
517            ));
518        }
519    }
520
521    Ok(())
522}
523
524/// Builds the `ORDER BY` clause fragment from sort options.
525///
526/// Returns a static SQL fragment such as `ORDER BY e.name ASC`.
527fn build_order_by(sort_by: Option<EntitySortField>, order: SortOrder) -> &'static str {
528    // The combinations are enumerated as static strings to avoid
529    // format!() allocations in the hot path and satisfy the borrow checker
530    // when the string is used inside conn.prepare().
531    match (sort_by, order) {
532        (None, SortOrder::Asc) | (Some(EntitySortField::Name), SortOrder::Asc) => {
533            "ORDER BY e.name ASC"
534        }
535        (Some(EntitySortField::Name), SortOrder::Desc) => "ORDER BY e.name DESC",
536        (Some(EntitySortField::Degree), SortOrder::Asc) => "ORDER BY degree ASC",
537        (Some(EntitySortField::Degree), SortOrder::Desc) => "ORDER BY degree DESC",
538        (Some(EntitySortField::CreatedAt), SortOrder::Asc) => "ORDER BY e.created_at ASC",
539        (Some(EntitySortField::CreatedAt), SortOrder::Desc) => "ORDER BY e.created_at DESC",
540        // Fallback: None/Desc → sort by name desc (consistent with dir variable).
541        (None, SortOrder::Desc) => "ORDER BY e.name DESC",
542    }
543}
544
545fn run_entities(args: GraphEntitiesArgs) -> Result<(), AppError> {
546    let inicio = Instant::now();
547    let paths = AppPaths::resolve(args.db.as_deref())?;
548
549    crate::storage::connection::ensure_db_ready(&paths)?;
550
551    let conn = open_ro(&paths.db)?;
552
553    let row_to_item = |r: &rusqlite::Row<'_>| -> rusqlite::Result<EntityItem> {
554        let ts: i64 = r.get(4)?;
555        let created_at = chrono::DateTime::from_timestamp(ts, 0)
556            .unwrap_or_default()
557            .format("%Y-%m-%dT%H:%M:%SZ")
558            .to_string();
559        Ok(EntityItem {
560            id: r.get(0)?,
561            name: r.get(1)?,
562            entity_type: r.get(2)?,
563            namespace: r.get(3)?,
564            created_at,
565            degree: r.get(5)?,
566        })
567    };
568
569    let limit_i = args.limit as i64;
570    let offset_i = args.offset as i64;
571    let order_clause = build_order_by(args.sort_by, args.order);
572
573    let base_select = "SELECT e.id, e.name, COALESCE(e.type, ''), e.namespace, e.created_at,
574                        (SELECT COUNT(*) FROM relationships r
575                         WHERE r.source_id = e.id OR r.target_id = e.id) AS degree
576                 FROM entities e";
577
578    let (total_count, items) = match (
579        args.namespace.as_deref(),
580        args.entity_type.map(|et| et.as_str()),
581    ) {
582        (Some(ns), Some(et)) => {
583            let count: i64 = conn.query_row(
584                "SELECT COUNT(*) FROM entities WHERE namespace = ?1 AND type = ?2",
585                rusqlite::params![ns, et],
586                |r| r.get(0),
587            )?;
588            let sql = format!(
589                "{base_select} WHERE e.namespace = ?1 AND e.type = ?2 {order_clause} LIMIT ?3 OFFSET ?4"
590            );
591            let mut stmt = conn.prepare(&sql)?;
592            let rows = stmt
593                .query_map(rusqlite::params![ns, et, limit_i, offset_i], row_to_item)?
594                .collect::<rusqlite::Result<Vec<_>>>()?;
595            (count, rows)
596        }
597        (Some(ns), None) => {
598            let count: i64 = conn.query_row(
599                "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
600                rusqlite::params![ns],
601                |r| r.get(0),
602            )?;
603            let sql =
604                format!("{base_select} WHERE e.namespace = ?1 {order_clause} LIMIT ?2 OFFSET ?3");
605            let mut stmt = conn.prepare(&sql)?;
606            let rows = stmt
607                .query_map(rusqlite::params![ns, limit_i, offset_i], row_to_item)?
608                .collect::<rusqlite::Result<Vec<_>>>()?;
609            (count, rows)
610        }
611        (None, Some(et)) => {
612            let count: i64 = conn.query_row(
613                "SELECT COUNT(*) FROM entities WHERE type = ?1",
614                rusqlite::params![et],
615                |r| r.get(0),
616            )?;
617            let sql = format!("{base_select} WHERE e.type = ?1 {order_clause} LIMIT ?2 OFFSET ?3");
618            let mut stmt = conn.prepare(&sql)?;
619            let rows = stmt
620                .query_map(rusqlite::params![et, limit_i, offset_i], row_to_item)?
621                .collect::<rusqlite::Result<Vec<_>>>()?;
622            (count, rows)
623        }
624        (None, None) => {
625            let count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
626            let sql = format!("{base_select} {order_clause} LIMIT ?1 OFFSET ?2");
627            let mut stmt = conn.prepare(&sql)?;
628            let rows = stmt
629                .query_map(rusqlite::params![limit_i, offset_i], row_to_item)?
630                .collect::<rusqlite::Result<Vec<_>>>()?;
631            (count, rows)
632        }
633    };
634
635    output::emit_json(&GraphEntitiesResponse {
636        entities: items,
637        total_count,
638        limit: args.limit,
639        offset: args.offset,
640        namespace: args.namespace,
641        elapsed_ms: inicio.elapsed().as_millis() as u64,
642    })
643}
644
645fn render_json(snapshot: &GraphSnapshot) -> Result<String, AppError> {
646    Ok(serde_json::to_string_pretty(snapshot)?)
647}
648
649/// Streams the graph as NDJSON: one object per node, one per edge, then a summary.
650///
651/// Each line is flushed immediately so consumers can process incrementally.
652/// When `output_path` is `Some`, lines are written to the file; otherwise to stdout.
653fn render_ndjson_streaming(
654    nodes: &[NodeOut],
655    edges: &[EdgeOut],
656    elapsed_ms: u64,
657    output_path: Option<&std::path::Path>,
658) -> Result<(), AppError> {
659    #[derive(serde::Serialize)]
660    struct NdjsonNode<'a> {
661        kind: &'static str,
662        id: i64,
663        name: &'a str,
664        namespace: &'a str,
665        #[serde(rename = "type")]
666        r#type: &'a str,
667    }
668    #[derive(serde::Serialize)]
669    struct NdjsonEdge<'a> {
670        kind: &'static str,
671        from: &'a str,
672        to: &'a str,
673        relation: &'a str,
674        weight: f64,
675    }
676    #[derive(serde::Serialize)]
677    struct NdjsonSummary {
678        kind: &'static str,
679        nodes: usize,
680        edges: usize,
681        elapsed_ms: u64,
682    }
683
684    use std::io::Write as IoWrite;
685
686    let mut buf: Vec<u8> = Vec::with_capacity(4096);
687
688    let emit_line =
689        |buf: &mut Vec<u8>, line: &str, path: Option<&std::path::Path>| -> Result<(), AppError> {
690            buf.clear();
691            buf.extend_from_slice(line.as_bytes());
692            buf.push(b'\n');
693            if let Some(p) = path {
694                let mut f = std::fs::OpenOptions::new()
695                    .create(true)
696                    .append(true)
697                    .open(p)
698                    .map_err(AppError::Io)?;
699                f.write_all(buf).map_err(AppError::Io)?;
700            } else {
701                output::emit_text(line);
702            }
703            Ok(())
704        };
705
706    // Truncate the output file once before starting (avoids re-opening with append for every line).
707    if let Some(p) = output_path {
708        fs::write(p, b"")?;
709    }
710
711    for node in nodes {
712        let obj = NdjsonNode {
713            kind: "node",
714            id: node.id,
715            name: &node.name,
716            namespace: &node.namespace,
717            r#type: &node.r#type,
718        };
719        let line = serde_json::to_string(&obj)?;
720        emit_line(&mut buf, &line, output_path)?;
721    }
722
723    for edge in edges {
724        let obj = NdjsonEdge {
725            kind: "edge",
726            from: &edge.from,
727            to: &edge.to,
728            relation: &edge.relation,
729            weight: edge.weight,
730        };
731        let line = serde_json::to_string(&obj)?;
732        emit_line(&mut buf, &line, output_path)?;
733    }
734
735    let summary = NdjsonSummary {
736        kind: "summary",
737        nodes: nodes.len(),
738        edges: edges.len(),
739        elapsed_ms,
740    };
741    let line = serde_json::to_string(&summary)?;
742    emit_line(&mut buf, &line, output_path)?;
743
744    Ok(())
745}
746
747fn sanitize_dot_id(raw: &str) -> String {
748    raw.chars()
749        .map(|c| {
750            if c.is_ascii_alphanumeric() || c == '_' {
751                c
752            } else {
753                '_'
754            }
755        })
756        .collect()
757}
758
759fn render_dot(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
760    let mut out = String::new();
761    out.push_str("digraph sqlite-graphrag {\n");
762    for node in nodes {
763        let node_id = sanitize_dot_id(&node.name);
764        let escaped = node.name.replace('"', "\\\"");
765        out.push_str(&format!("  {node_id} [label=\"{escaped}\"];\n"));
766    }
767    for edge in edges {
768        let from = sanitize_dot_id(&edge.from);
769        let to = sanitize_dot_id(&edge.to);
770        let label = edge.relation.replace('"', "\\\"");
771        out.push_str(&format!("  {from} -> {to} [label=\"{label}\"];\n"));
772    }
773    out.push_str("}\n");
774    out
775}
776
777fn sanitize_mermaid_id(raw: &str) -> String {
778    raw.chars()
779        .map(|c| {
780            if c.is_ascii_alphanumeric() || c == '_' {
781                c
782            } else {
783                '_'
784            }
785        })
786        .collect()
787}
788
789fn render_mermaid(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
790    let mut out = String::new();
791    out.push_str("graph LR\n");
792    for node in nodes {
793        let id = sanitize_mermaid_id(&node.name);
794        let escaped = node.name.replace('"', "\\\"");
795        out.push_str(&format!("  {id}[\"{escaped}\"]\n"));
796    }
797    for edge in edges {
798        let from = sanitize_mermaid_id(&edge.from);
799        let to = sanitize_mermaid_id(&edge.to);
800        let label = edge.relation.replace('|', "\\|");
801        out.push_str(&format!("  {from} -->|{label}| {to}\n"));
802    }
803    out
804}
805
806#[cfg(test)]
807mod tests {
808    use super::*;
809    use crate::cli::{Cli, Commands};
810    use clap::Parser;
811
812    fn make_node(kind: &str) -> NodeOut {
813        NodeOut {
814            id: 1,
815            name: "test-entity".to_string(),
816            namespace: "default".to_string(),
817            kind: kind.to_string(),
818            r#type: kind.to_string(),
819        }
820    }
821
822    #[test]
823    fn node_out_type_duplicates_kind() {
824        let node = make_node("agent");
825        let json = serde_json::to_value(&node).expect("serialization must work");
826        assert_eq!(json["kind"], json["type"]);
827        assert_eq!(json["kind"], "agent");
828        assert_eq!(json["type"], "agent");
829    }
830
831    #[test]
832    fn node_out_serializes_all_fields() {
833        let node = make_node("document");
834        let json = serde_json::to_value(&node).expect("serialization must work");
835        assert!(json.get("id").is_some());
836        assert!(json.get("name").is_some());
837        assert!(json.get("namespace").is_some());
838        assert!(json.get("kind").is_some());
839        assert!(json.get("type").is_some());
840    }
841
842    #[test]
843    fn graph_snapshot_serializes_nodes_with_type() {
844        let node = make_node("concept");
845        let snapshot = GraphSnapshot {
846            nodes: vec![node],
847            edges: vec![],
848            elapsed_ms: 0,
849        };
850        let json_str = render_json(&snapshot).expect("rendering must work");
851        let json: serde_json::Value = serde_json::from_str(&json_str).expect("valid json");
852        let first_node = &json["nodes"][0];
853        assert_eq!(first_node["kind"], first_node["type"]);
854        assert_eq!(first_node["type"], "concept");
855    }
856
857    #[test]
858    fn graph_traverse_response_serializes_correctly() {
859        let resp = GraphTraverseResponse {
860            from: "entity-a".to_string(),
861            namespace: "global".to_string(),
862            depth: 2,
863            hops: vec![TraverseHop {
864                entity: "entity-b".to_string(),
865                relation: "uses".to_string(),
866                direction: "outbound".to_string(),
867                weight: 1.0,
868                depth: 1,
869            }],
870            elapsed_ms: 5,
871        };
872        let json = serde_json::to_value(&resp).unwrap();
873        assert_eq!(json["from"], "entity-a");
874        assert_eq!(json["depth"], 2);
875        assert!(json["hops"].is_array());
876        assert_eq!(json["hops"][0]["direction"], "outbound");
877    }
878
879    #[test]
880    fn graph_stats_response_serializes_correctly() {
881        let resp = GraphStatsResponse {
882            namespace: Some("global".to_string()),
883            node_count: 10,
884            edge_count: 15,
885            avg_degree: 3.0,
886            max_degree: 7,
887            elapsed_ms: 2,
888        };
889        let json = serde_json::to_value(&resp).unwrap();
890        assert_eq!(json["node_count"], 10);
891        assert_eq!(json["edge_count"], 15);
892        assert_eq!(json["avg_degree"], 3.0);
893        assert_eq!(json["max_degree"], 7);
894    }
895
896    fn compute_avg_degree(node_count: i64, edge_count: i64) -> f64 {
897        if node_count > 0 {
898            2.0 * (edge_count as f64) / (node_count as f64)
899        } else {
900            0.0
901        }
902    }
903
904    #[test]
905    fn avg_degree_is_zero_when_no_nodes() {
906        assert_eq!(compute_avg_degree(0, 0), 0.0);
907    }
908
909    #[test]
910    fn avg_degree_is_zero_when_nodes_but_no_edges() {
911        // Reproduces L1 bug: previously returned 1.0 instead of 0.0.
912        assert_eq!(compute_avg_degree(2, 0), 0.0);
913    }
914
915    #[test]
916    fn avg_degree_is_two_when_triangle() {
917        // 3 nodes, 3 edges: 2 * 3 / 3 = 2.0
918        assert_eq!(compute_avg_degree(3, 3), 2.0);
919    }
920
921    #[test]
922    fn graph_entities_response_serializes_required_fields() {
923        let resp = GraphEntitiesResponse {
924            entities: vec![EntityItem {
925                id: 1,
926                name: "claude-code".to_string(),
927                entity_type: "agent".to_string(),
928                namespace: "global".to_string(),
929                created_at: "2026-01-01T00:00:00Z".to_string(),
930                degree: 0,
931            }],
932            total_count: 1,
933            limit: 50,
934            offset: 0,
935            namespace: Some("global".to_string()),
936            elapsed_ms: 3,
937        };
938        let json = serde_json::to_value(&resp).unwrap();
939        assert!(json["entities"].is_array());
940        assert_eq!(json["entities"][0]["name"], "claude-code");
941        assert_eq!(json["entities"][0]["entity_type"], "agent");
942        assert_eq!(json["total_count"], 1);
943        assert_eq!(json["limit"], 50);
944        assert_eq!(json["offset"], 0);
945        assert_eq!(json["namespace"], "global");
946    }
947
948    #[test]
949    fn entity_item_serializes_all_fields() {
950        let item = EntityItem {
951            id: 42,
952            name: "test-entity".to_string(),
953            entity_type: "concept".to_string(),
954            namespace: "project-a".to_string(),
955            created_at: "2026-04-19T12:00:00Z".to_string(),
956            degree: 3,
957        };
958        let json = serde_json::to_value(&item).unwrap();
959        assert_eq!(json["id"], 42);
960        assert_eq!(json["name"], "test-entity");
961        assert_eq!(json["entity_type"], "concept");
962        assert_eq!(json["namespace"], "project-a");
963        assert_eq!(json["created_at"], "2026-04-19T12:00:00Z");
964    }
965
966    #[test]
967    fn entity_item_entity_type_is_never_null() {
968        // P2-C: entity_type must never be null, even when DB column is empty.
969        let item = EntityItem {
970            id: 1,
971            name: "sem-tipo".to_string(),
972            entity_type: String::new(),
973            namespace: "ns".to_string(),
974            created_at: "2026-01-01T00:00:00Z".to_string(),
975            degree: 0,
976        };
977        let json = serde_json::to_value(&item).unwrap();
978        assert!(
979            !json["entity_type"].is_null(),
980            "entity_type must not be null"
981        );
982        assert!(json["entity_type"].is_string());
983    }
984
985    #[test]
986    fn graph_traverse_cli_rejects_format_dot() {
987        let parsed = Cli::try_parse_from([
988            "sqlite-graphrag",
989            "graph",
990            "traverse",
991            "--from",
992            "AuthDecision",
993            "--format",
994            "dot",
995        ]);
996        assert!(parsed.is_err(), "graph traverse must reject format=dot");
997    }
998
999    #[test]
1000    fn graph_stats_cli_accepts_format_text() {
1001        let parsed = Cli::try_parse_from(["sqlite-graphrag", "graph", "stats", "--format", "text"])
1002            .expect("graph stats --format text must be accepted");
1003
1004        match parsed.command {
1005            Commands::Graph(args) => match args.subcommand {
1006                Some(GraphSubcommand::Stats(stats)) => {
1007                    assert_eq!(stats.format, GraphStatsFormat::Text);
1008                }
1009                _ => unreachable!("unexpected subcommand"),
1010            },
1011            _ => unreachable!("unexpected command"),
1012        }
1013    }
1014
1015    #[test]
1016    fn graph_stats_cli_rejects_format_mermaid() {
1017        let parsed =
1018            Cli::try_parse_from(["sqlite-graphrag", "graph", "stats", "--format", "mermaid"]);
1019        assert!(parsed.is_err(), "graph stats must reject format=mermaid");
1020    }
1021
1022    #[test]
1023    fn graph_entities_response_has_no_items_key() {
1024        let resp = GraphEntitiesResponse {
1025            entities: vec![],
1026            total_count: 0,
1027            limit: 50,
1028            offset: 0,
1029            namespace: None,
1030            elapsed_ms: 0,
1031        };
1032        let json = serde_json::to_value(&resp).unwrap();
1033        assert!(
1034            json.get("items").is_none(),
1035            "legacy 'items' key must not appear"
1036        );
1037        assert!(
1038            json.get("entities").is_some(),
1039            "'entities' key must be present"
1040        );
1041    }
1042
1043    #[test]
1044    fn build_order_by_defaults_to_name_asc() {
1045        let clause = build_order_by(None, SortOrder::Asc);
1046        assert_eq!(clause, "ORDER BY e.name ASC");
1047    }
1048
1049    #[test]
1050    fn build_order_by_name_desc() {
1051        let clause = build_order_by(Some(EntitySortField::Name), SortOrder::Desc);
1052        assert_eq!(clause, "ORDER BY e.name DESC");
1053    }
1054
1055    #[test]
1056    fn build_order_by_degree_desc() {
1057        let clause = build_order_by(Some(EntitySortField::Degree), SortOrder::Desc);
1058        assert_eq!(clause, "ORDER BY degree DESC");
1059    }
1060
1061    #[test]
1062    fn build_order_by_degree_asc() {
1063        let clause = build_order_by(Some(EntitySortField::Degree), SortOrder::Asc);
1064        assert_eq!(clause, "ORDER BY degree ASC");
1065    }
1066
1067    #[test]
1068    fn build_order_by_created_at_asc() {
1069        let clause = build_order_by(Some(EntitySortField::CreatedAt), SortOrder::Asc);
1070        assert_eq!(clause, "ORDER BY e.created_at ASC");
1071    }
1072
1073    #[test]
1074    fn build_order_by_created_at_desc() {
1075        let clause = build_order_by(Some(EntitySortField::CreatedAt), SortOrder::Desc);
1076        assert_eq!(clause, "ORDER BY e.created_at DESC");
1077    }
1078
1079    #[test]
1080    fn graph_entities_cli_accepts_sort_by_degree_desc() {
1081        let parsed = Cli::try_parse_from([
1082            "sqlite-graphrag",
1083            "graph",
1084            "entities",
1085            "--sort-by",
1086            "degree",
1087            "--order",
1088            "desc",
1089        ])
1090        .expect("graph entities --sort-by degree --order desc must parse");
1091        match parsed.command {
1092            Commands::Graph(args) => match args.subcommand {
1093                Some(GraphSubcommand::Entities(e)) => {
1094                    assert!(matches!(e.sort_by, Some(EntitySortField::Degree)));
1095                    assert!(matches!(e.order, SortOrder::Desc));
1096                }
1097                _ => unreachable!("unexpected subcommand"),
1098            },
1099            _ => unreachable!("unexpected command"),
1100        }
1101    }
1102
1103    #[test]
1104    fn graph_entities_cli_accepts_sort_by_created_at_asc() {
1105        let parsed = Cli::try_parse_from([
1106            "sqlite-graphrag",
1107            "graph",
1108            "entities",
1109            "--sort-by",
1110            "created-at",
1111        ])
1112        .expect("graph entities --sort-by created-at must parse");
1113        match parsed.command {
1114            Commands::Graph(args) => match args.subcommand {
1115                Some(GraphSubcommand::Entities(e)) => {
1116                    assert!(matches!(e.sort_by, Some(EntitySortField::CreatedAt)));
1117                    assert!(matches!(e.order, SortOrder::Asc));
1118                }
1119                _ => unreachable!("unexpected subcommand"),
1120            },
1121            _ => unreachable!("unexpected command"),
1122        }
1123    }
1124
1125    #[test]
1126    fn graph_entities_cli_defaults_to_no_sort_by() {
1127        let parsed = Cli::try_parse_from(["sqlite-graphrag", "graph", "entities"])
1128            .expect("graph entities must parse without sort flags");
1129        match parsed.command {
1130            Commands::Graph(args) => match args.subcommand {
1131                Some(GraphSubcommand::Entities(e)) => {
1132                    assert!(e.sort_by.is_none(), "sort_by must default to None");
1133                    assert!(
1134                        matches!(e.order, SortOrder::Asc),
1135                        "order must default to Asc"
1136                    );
1137                }
1138                _ => unreachable!("unexpected subcommand"),
1139            },
1140            _ => unreachable!("unexpected command"),
1141        }
1142    }
1143}