Skip to main content

sqlite_graphrag/commands/
graph_export.rs

1//! Handler for the `graph-export` CLI subcommand.
2
3use crate::cli::GraphExportFormat;
4use crate::entity_type::EntityType;
5use crate::errors::AppError;
6use crate::output;
7use crate::paths::AppPaths;
8use crate::storage::connection::open_ro;
9use crate::storage::entities;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::fs;
13use std::path::PathBuf;
14use std::time::Instant;
15
16/// Optional nested subcommands. When absent, the default behavior exports
17/// the full entity snapshot for backward compatibility.
18#[derive(clap::Subcommand)]
19pub enum GraphSubcommand {
20    /// Traverse relationships from a starting entity using BFS
21    Traverse(GraphTraverseArgs),
22    /// Show graph statistics (node/edge counts, degree distribution)
23    Stats(GraphStatsArgs),
24    /// List entities stored in the graph with optional filters
25    Entities(GraphEntitiesArgs),
26}
27
28#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
29pub enum GraphTraverseFormat {
30    Json,
31}
32
33#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
34pub enum GraphStatsFormat {
35    Json,
36    Text,
37}
38
39#[derive(clap::Args)]
40#[command(after_long_help = "EXAMPLES:\n  \
41    # Export full entity snapshot as JSON (default)\n  \
42    sqlite-graphrag graph\n\n  \
43    # Traverse relationships from a starting entity\n  \
44    sqlite-graphrag graph traverse --from acme-corp --depth 2\n\n  \
45    # Show graph statistics as structured JSON\n  \
46    sqlite-graphrag graph stats --format json\n\n  \
47    # List entities filtered by type\n  \
48    sqlite-graphrag graph entities --entity-type person\n\n  \
49    # Export full snapshot in DOT format for Graphviz\n  \
50    sqlite-graphrag graph --format dot --output graph.dot\n\n  \
51NOTES:\n  \
52    Without a subcommand, exports the full entity+edge snapshot.\n  \
53    Use `traverse`, `stats`, or `entities` for targeted queries.")]
54pub struct GraphArgs {
55    /// Optional subcommand; without one, export the full entity snapshot.
56    #[command(subcommand)]
57    pub subcommand: Option<GraphSubcommand>,
58    /// Filter by namespace. Defaults to all namespaces.
59    #[arg(long)]
60    pub namespace: Option<String>,
61    /// Snapshot output format.
62    #[arg(long, value_enum, default_value = "json")]
63    pub format: GraphExportFormat,
64    /// File path to write output instead of stdout.
65    #[arg(long)]
66    pub output: Option<PathBuf>,
67    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
68    pub json: bool,
69    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
70    pub db: Option<String>,
71}
72
73#[derive(clap::Args)]
74#[command(after_long_help = "EXAMPLES:\n  \
75    # Traverse relationships from an entity with default depth (2)\n  \
76    sqlite-graphrag graph traverse --from acme-corp\n\n  \
77    # Increase traversal depth to 3 hops\n  \
78    sqlite-graphrag graph traverse --from acme-corp --depth 3\n\n  \
79    # Traverse within a specific namespace\n  \
80    sqlite-graphrag graph traverse --from acme-corp --namespace project-x\n\n  \
81NOTES:\n  \
82    Output is always JSON. The `hops` array contains each reachable entity\n  \
83    with its relation, direction (inbound/outbound), weight, and depth level.")]
84pub struct GraphTraverseArgs {
85    /// Root entity name for the traversal.
86    #[arg(long)]
87    pub from: String,
88    /// Maximum traversal depth.
89    #[arg(long, default_value_t = 2u32)]
90    pub depth: u32,
91    #[arg(long)]
92    pub namespace: Option<String>,
93    #[arg(long, value_enum, default_value = "json")]
94    pub format: GraphTraverseFormat,
95    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
96    pub json: bool,
97    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
98    pub db: Option<String>,
99}
100
101#[derive(clap::Args)]
102#[command(after_long_help = "EXAMPLES:\n  \
103    # Show stats for all namespaces (human-readable text)\n  \
104    sqlite-graphrag graph stats --format text\n\n  \
105    # Show stats as structured JSON\n  \
106    sqlite-graphrag graph stats --format json\n\n  \
107    # Show stats for a specific namespace\n  \
108    sqlite-graphrag graph stats --namespace project-x --format text\n\n  \
109NOTES:\n  \
110    Reports node_count, edge_count, avg_degree, and max_degree.\n  \
111    Default format is JSON. Use `--format text` for a compact single-line summary.")]
112pub struct GraphStatsArgs {
113    #[arg(long)]
114    pub namespace: Option<String>,
115    /// Output format for the stats response.
116    #[arg(long, value_enum, default_value = "json")]
117    pub format: GraphStatsFormat,
118    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
119    pub json: bool,
120    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
121    pub db: Option<String>,
122}
123
124/// Field to sort entities by in `graph entities`.
125#[derive(Debug, Clone, Copy, clap::ValueEnum)]
126pub enum EntitySortField {
127    /// Sort alphabetically by entity name.
128    Name,
129    /// Sort by degree (total number of relationships, descending by default).
130    Degree,
131    /// Sort by entity creation timestamp.
132    CreatedAt,
133}
134
135/// Sort direction for `graph entities`.
136#[derive(Debug, Clone, Copy, Default, clap::ValueEnum)]
137pub enum SortOrder {
138    #[default]
139    Asc,
140    Desc,
141}
142
143#[derive(clap::Args)]
144#[command(after_long_help = "EXAMPLES:\n  \
145    # List all entities (default limit applies)\n  \
146    sqlite-graphrag graph entities\n\n  \
147    # Filter by entity type\n  \
148    sqlite-graphrag graph entities --entity-type person\n\n  \
149    # Filter by namespace and type\n  \
150    sqlite-graphrag graph entities --namespace project-x --entity-type concept\n\n  \
151    # Paginate results (skip first 20, return next 10)\n  \
152    sqlite-graphrag graph entities --offset 20 --limit 10\n\n  \
153    # Sort by degree descending (most connected first)\n  \
154    sqlite-graphrag graph entities --sort-by degree --order desc\n\n  \
155    # Sort by creation date ascending\n  \
156    sqlite-graphrag graph entities --sort-by created-at --order asc\n\n  \
157NOTES:\n  \
158    Output is always JSON with `entities`, `total_count`, `limit`, and `offset` fields.\n  \
159    Entity types are strings extracted by GLiNER NER (e.g. `person`, `organization`, `location`).")]
160pub struct GraphEntitiesArgs {
161    #[arg(long)]
162    pub namespace: Option<String>,
163    /// Filter by entity type (one of the 13 canonical types).
164    #[arg(long, value_enum)]
165    pub entity_type: Option<EntityType>,
166    /// Maximum number of results to return.
167    #[arg(long, default_value_t = crate::constants::K_GRAPH_ENTITIES_DEFAULT_LIMIT)]
168    pub limit: usize,
169    /// Number of results to skip for pagination.
170    #[arg(long, default_value_t = 0usize)]
171    pub offset: usize,
172    /// Sort entities by this field. When omitted, the default order is by name ascending.
173    #[arg(long, value_enum, help = "Sort entities by field")]
174    pub sort_by: Option<EntitySortField>,
175    /// Sort direction: `asc` (default) or `desc`.
176    #[arg(long, value_enum, default_value_t = SortOrder::Asc, help = "Sort order")]
177    pub order: SortOrder,
178    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
179    pub json: bool,
180    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
181    pub db: Option<String>,
182}
183
184#[derive(Serialize, Clone)]
185struct NodeOut {
186    id: i64,
187    name: String,
188    namespace: String,
189    /// Deprecated alias of `type` kept for backward-compat with pre-v1.0.35 clients.
190    /// New consumers MUST read `type` instead. Will be removed in a future major release.
191    kind: String,
192    /// Canonical entity classification (organization, concept, person, etc.).
193    /// Mirrors `kind` while the deprecation window is active.
194    #[serde(rename = "type")]
195    r#type: String,
196}
197
198#[derive(Serialize)]
199struct EdgeOut {
200    from: String,
201    to: String,
202    relation: String,
203    weight: f64,
204}
205
206#[derive(Serialize)]
207struct GraphSnapshot {
208    nodes: Vec<NodeOut>,
209    entities: Vec<NodeOut>,
210    edges: Vec<EdgeOut>,
211    elapsed_ms: u64,
212}
213
214#[derive(Serialize)]
215struct TraverseHop {
216    entity: String,
217    relation: String,
218    direction: String,
219    weight: f64,
220    depth: u32,
221}
222
223#[derive(Serialize)]
224struct GraphTraverseResponse {
225    from: String,
226    namespace: String,
227    depth: u32,
228    hops: Vec<TraverseHop>,
229    elapsed_ms: u64,
230}
231
232#[derive(Serialize)]
233struct GraphStatsResponse {
234    namespace: Option<String>,
235    node_count: i64,
236    edge_count: i64,
237    avg_degree: f64,
238    max_degree: i64,
239    elapsed_ms: u64,
240}
241
242#[derive(Serialize)]
243struct EntityItem {
244    id: i64,
245    name: String,
246    entity_type: String,
247    namespace: String,
248    created_at: String,
249    /// Total number of relationships (inbound + outbound) for this entity.
250    degree: u32,
251    #[serde(skip_serializing_if = "Option::is_none")]
252    description: Option<String>,
253}
254
255#[derive(Serialize)]
256struct GraphEntitiesResponse {
257    entities: Vec<EntityItem>,
258    total_count: i64,
259    limit: usize,
260    offset: usize,
261    namespace: Option<String>,
262    elapsed_ms: u64,
263}
264
265pub fn run(args: GraphArgs) -> Result<(), AppError> {
266    match args.subcommand {
267        None => run_entities_snapshot(
268            args.db.as_deref(),
269            args.namespace.as_deref(),
270            args.format,
271            args.json,
272            args.output.as_deref(),
273        ),
274        Some(GraphSubcommand::Traverse(a)) => run_traverse(a),
275        Some(GraphSubcommand::Stats(a)) => run_stats(a),
276        Some(GraphSubcommand::Entities(a)) => run_entities(a),
277    }
278}
279
280fn run_entities_snapshot(
281    db: Option<&str>,
282    namespace: Option<&str>,
283    format: GraphExportFormat,
284    json: bool,
285    output_path: Option<&std::path::Path>,
286) -> Result<(), AppError> {
287    let inicio = Instant::now();
288    let paths = AppPaths::resolve(db)?;
289
290    crate::storage::connection::ensure_db_ready(&paths)?;
291
292    let conn = open_ro(&paths.db)?;
293
294    let nodes_raw = entities::list_entities(&conn, namespace)?;
295    let edges_raw = entities::list_relationships_by_namespace(&conn, namespace)?;
296
297    let id_to_name: HashMap<i64, String> =
298        nodes_raw.iter().map(|n| (n.id, n.name.clone())).collect();
299
300    let nodes: Vec<NodeOut> = nodes_raw
301        .into_iter()
302        .map(|n| NodeOut {
303            id: n.id,
304            name: n.name,
305            namespace: n.namespace,
306            r#type: n.kind.clone(),
307            kind: n.kind,
308        })
309        .collect();
310
311    let mut edges: Vec<EdgeOut> = Vec::with_capacity(edges_raw.len());
312    let mut orphan_edges: usize = 0;
313    for r in edges_raw {
314        let from = match id_to_name.get(&r.source_id) {
315            Some(n) => n.clone(),
316            None => {
317                orphan_edges += 1;
318                tracing::warn!(source_id = r.source_id, relation = %r.relation, "edge skipped: source entity not found in id_to_name map");
319                continue;
320            }
321        };
322        let to = match id_to_name.get(&r.target_id) {
323            Some(n) => n.clone(),
324            None => {
325                orphan_edges += 1;
326                tracing::warn!(target_id = r.target_id, relation = %r.relation, "edge skipped: target entity not found in id_to_name map");
327                continue;
328            }
329        };
330        edges.push(EdgeOut {
331            from,
332            to,
333            relation: r.relation,
334            weight: r.weight,
335        });
336    }
337    if orphan_edges > 0 {
338        tracing::warn!(
339            count = orphan_edges,
340            "edges skipped due to orphaned entity references"
341        );
342    }
343
344    let effective_format = if json {
345        GraphExportFormat::Json
346    } else {
347        format
348    };
349
350    if effective_format == GraphExportFormat::Ndjson {
351        let elapsed_ms = inicio.elapsed().as_millis() as u64;
352        render_ndjson_streaming(&nodes, &edges, elapsed_ms, output_path)?;
353        return Ok(());
354    }
355
356    let rendered = match effective_format {
357        GraphExportFormat::Json => {
358            let entities = nodes.clone();
359            render_json(&GraphSnapshot {
360                nodes,
361                entities,
362                edges,
363                elapsed_ms: inicio.elapsed().as_millis() as u64,
364            })?
365        }
366        GraphExportFormat::Dot => render_dot(&nodes, &edges),
367        GraphExportFormat::Mermaid => render_mermaid(&nodes, &edges),
368        GraphExportFormat::Ndjson => unreachable!("ndjson handled above"),
369    };
370
371    if let Some(path) = output_path.filter(|_| !json) {
372        fs::write(path, &rendered)?;
373        output::emit_progress(&format!("wrote {}", path.display()));
374    } else {
375        output::emit_text(&rendered);
376    }
377
378    Ok(())
379}
380
381fn run_traverse(args: GraphTraverseArgs) -> Result<(), AppError> {
382    let inicio = Instant::now();
383    let _ = args.format;
384    let paths = AppPaths::resolve(args.db.as_deref())?;
385
386    crate::storage::connection::ensure_db_ready(&paths)?;
387
388    let conn = open_ro(&paths.db)?;
389    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
390
391    let from_id = entities::find_entity_id(&conn, &namespace, &args.from)?
392        .ok_or_else(|| AppError::NotFound(format!("entity '{}' not found", args.from)))?;
393
394    let all_rels = entities::list_relationships_by_namespace(&conn, Some(&namespace))?;
395    let all_entities = entities::list_entities(&conn, Some(&namespace))?;
396    let id_to_name: HashMap<i64, String> = all_entities
397        .iter()
398        .map(|e| (e.id, e.name.clone()))
399        .collect();
400
401    let mut hops: Vec<TraverseHop> = Vec::with_capacity(16);
402    let mut visited: std::collections::HashSet<i64> = std::collections::HashSet::new();
403    let mut frontier: Vec<(i64, u32)> = vec![(from_id, 0)];
404
405    while let Some((current_id, current_depth)) = frontier.pop() {
406        if current_depth >= args.depth || visited.contains(&current_id) {
407            continue;
408        }
409        visited.insert(current_id);
410
411        for rel in &all_rels {
412            if rel.source_id == current_id {
413                if let Some(target_name) = id_to_name.get(&rel.target_id) {
414                    hops.push(TraverseHop {
415                        entity: target_name.clone(),
416                        relation: rel.relation.clone(),
417                        direction: "outbound".to_string(),
418                        weight: rel.weight,
419                        depth: current_depth + 1,
420                    });
421                    frontier.push((rel.target_id, current_depth + 1));
422                }
423            } else if rel.target_id == current_id {
424                if let Some(source_name) = id_to_name.get(&rel.source_id) {
425                    hops.push(TraverseHop {
426                        entity: source_name.clone(),
427                        relation: rel.relation.clone(),
428                        direction: "inbound".to_string(),
429                        weight: rel.weight,
430                        depth: current_depth + 1,
431                    });
432                    frontier.push((rel.source_id, current_depth + 1));
433                }
434            }
435        }
436    }
437
438    output::emit_json(&GraphTraverseResponse {
439        from: args.from,
440        namespace,
441        depth: args.depth,
442        hops,
443        elapsed_ms: inicio.elapsed().as_millis() as u64,
444    })?;
445
446    Ok(())
447}
448
449fn run_stats(args: GraphStatsArgs) -> Result<(), AppError> {
450    let inicio = Instant::now();
451    let paths = AppPaths::resolve(args.db.as_deref())?;
452
453    crate::storage::connection::ensure_db_ready(&paths)?;
454
455    let conn = open_ro(&paths.db)?;
456    let ns = args.namespace.as_deref();
457
458    let node_count: i64 = if let Some(n) = ns {
459        conn.query_row(
460            "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
461            rusqlite::params![n],
462            |r| r.get(0),
463        )?
464    } else {
465        conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?
466    };
467
468    let edge_count: i64 = if let Some(n) = ns {
469        conn.query_row(
470            "SELECT COUNT(*) FROM relationships r
471             JOIN entities s ON s.id = r.source_id
472             WHERE s.namespace = ?1",
473            rusqlite::params![n],
474            |r| r.get(0),
475        )?
476    } else {
477        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?
478    };
479
480    let max_degree: i64 = if let Some(n) = ns {
481        conn.query_row(
482            "SELECT COALESCE(MAX(degree), 0) FROM entities WHERE namespace = ?1",
483            rusqlite::params![n],
484            |r| r.get(0),
485        )?
486    } else {
487        conn.query_row("SELECT COALESCE(MAX(degree), 0) FROM entities", [], |r| {
488            r.get(0)
489        })?
490    };
491
492    // avg_degree = 2 * edge_count / node_count (each edge contributes 2 to total degree sum).
493    let avg_degree = if node_count > 0 {
494        2.0 * (edge_count as f64) / (node_count as f64)
495    } else {
496        0.0
497    };
498
499    let resp = GraphStatsResponse {
500        namespace: args.namespace,
501        node_count,
502        edge_count,
503        avg_degree,
504        max_degree,
505        elapsed_ms: inicio.elapsed().as_millis() as u64,
506    };
507
508    let effective_format = if args.json {
509        GraphStatsFormat::Json
510    } else {
511        args.format
512    };
513
514    match effective_format {
515        GraphStatsFormat::Json => output::emit_json(&resp)?,
516        GraphStatsFormat::Text => {
517            output::emit_text(&format!(
518                "nodes={} edges={} avg_degree={:.2} max_degree={} namespace={}",
519                resp.node_count,
520                resp.edge_count,
521                resp.avg_degree,
522                resp.max_degree,
523                resp.namespace.as_deref().unwrap_or("all"),
524            ));
525        }
526    }
527
528    Ok(())
529}
530
531/// Builds the `ORDER BY` clause fragment from sort options.
532///
533/// Returns a static SQL fragment such as `ORDER BY e.name ASC`.
534fn build_order_by(sort_by: Option<EntitySortField>, order: SortOrder) -> &'static str {
535    // The combinations are enumerated as static strings to avoid
536    // format!() allocations in the hot path and satisfy the borrow checker
537    // when the string is used inside conn.prepare().
538    match (sort_by, order) {
539        (None, SortOrder::Asc) | (Some(EntitySortField::Name), SortOrder::Asc) => {
540            "ORDER BY e.name ASC"
541        }
542        (Some(EntitySortField::Name), SortOrder::Desc) => "ORDER BY e.name DESC",
543        (Some(EntitySortField::Degree), SortOrder::Asc) => "ORDER BY degree ASC",
544        (Some(EntitySortField::Degree), SortOrder::Desc) => "ORDER BY degree DESC",
545        (Some(EntitySortField::CreatedAt), SortOrder::Asc) => "ORDER BY e.created_at ASC",
546        (Some(EntitySortField::CreatedAt), SortOrder::Desc) => "ORDER BY e.created_at DESC",
547        // Fallback: None/Desc → sort by name desc (consistent with dir variable).
548        (None, SortOrder::Desc) => "ORDER BY e.name DESC",
549    }
550}
551
552fn run_entities(args: GraphEntitiesArgs) -> Result<(), AppError> {
553    let inicio = Instant::now();
554    let paths = AppPaths::resolve(args.db.as_deref())?;
555
556    crate::storage::connection::ensure_db_ready(&paths)?;
557
558    let conn = open_ro(&paths.db)?;
559
560    let row_to_item = |r: &rusqlite::Row<'_>| -> rusqlite::Result<EntityItem> {
561        let ts: i64 = r.get(4)?;
562        let created_at = chrono::DateTime::from_timestamp(ts, 0)
563            .unwrap_or_default()
564            .format("%Y-%m-%dT%H:%M:%SZ")
565            .to_string();
566        Ok(EntityItem {
567            id: r.get(0)?,
568            name: r.get(1)?,
569            entity_type: r.get(2)?,
570            namespace: r.get(3)?,
571            created_at,
572            degree: r.get(5)?,
573            description: r.get(6)?,
574        })
575    };
576
577    let limit_i = args.limit as i64;
578    let offset_i = args.offset as i64;
579    let order_clause = build_order_by(args.sort_by, args.order);
580
581    let base_select = "SELECT e.id, e.name, COALESCE(e.type, ''), e.namespace, e.created_at,
582                        (SELECT COUNT(*) FROM relationships r
583                         WHERE r.source_id = e.id OR r.target_id = e.id) AS degree,
584                        e.description
585                 FROM entities e";
586
587    let (total_count, items) = match (
588        args.namespace.as_deref(),
589        args.entity_type.map(|et| et.as_str()),
590    ) {
591        (Some(ns), Some(et)) => {
592            let count: i64 = conn.query_row(
593                "SELECT COUNT(*) FROM entities WHERE namespace = ?1 AND type = ?2",
594                rusqlite::params![ns, et],
595                |r| r.get(0),
596            )?;
597            let sql = format!(
598                "{base_select} WHERE e.namespace = ?1 AND e.type = ?2 {order_clause} LIMIT ?3 OFFSET ?4"
599            );
600            let mut stmt = conn.prepare(&sql)?;
601            let rows = stmt
602                .query_map(rusqlite::params![ns, et, limit_i, offset_i], row_to_item)?
603                .collect::<rusqlite::Result<Vec<_>>>()?;
604            (count, rows)
605        }
606        (Some(ns), None) => {
607            let count: i64 = conn.query_row(
608                "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
609                rusqlite::params![ns],
610                |r| r.get(0),
611            )?;
612            let sql =
613                format!("{base_select} WHERE e.namespace = ?1 {order_clause} LIMIT ?2 OFFSET ?3");
614            let mut stmt = conn.prepare(&sql)?;
615            let rows = stmt
616                .query_map(rusqlite::params![ns, limit_i, offset_i], row_to_item)?
617                .collect::<rusqlite::Result<Vec<_>>>()?;
618            (count, rows)
619        }
620        (None, Some(et)) => {
621            let count: i64 = conn.query_row(
622                "SELECT COUNT(*) FROM entities WHERE type = ?1",
623                rusqlite::params![et],
624                |r| r.get(0),
625            )?;
626            let sql = format!("{base_select} WHERE e.type = ?1 {order_clause} LIMIT ?2 OFFSET ?3");
627            let mut stmt = conn.prepare(&sql)?;
628            let rows = stmt
629                .query_map(rusqlite::params![et, limit_i, offset_i], row_to_item)?
630                .collect::<rusqlite::Result<Vec<_>>>()?;
631            (count, rows)
632        }
633        (None, None) => {
634            let count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
635            let sql = format!("{base_select} {order_clause} LIMIT ?1 OFFSET ?2");
636            let mut stmt = conn.prepare(&sql)?;
637            let rows = stmt
638                .query_map(rusqlite::params![limit_i, offset_i], row_to_item)?
639                .collect::<rusqlite::Result<Vec<_>>>()?;
640            (count, rows)
641        }
642    };
643
644    output::emit_json(&GraphEntitiesResponse {
645        entities: items,
646        total_count,
647        limit: args.limit,
648        offset: args.offset,
649        namespace: args.namespace,
650        elapsed_ms: inicio.elapsed().as_millis() as u64,
651    })
652}
653
654fn render_json(snapshot: &GraphSnapshot) -> Result<String, AppError> {
655    Ok(serde_json::to_string_pretty(snapshot)?)
656}
657
658/// Streams the graph as NDJSON: one object per node, one per edge, then a summary.
659///
660/// Each line is flushed immediately so consumers can process incrementally.
661/// When `output_path` is `Some`, lines are written to the file; otherwise to stdout.
662fn render_ndjson_streaming(
663    nodes: &[NodeOut],
664    edges: &[EdgeOut],
665    elapsed_ms: u64,
666    output_path: Option<&std::path::Path>,
667) -> Result<(), AppError> {
668    #[derive(serde::Serialize)]
669    struct NdjsonNode<'a> {
670        kind: &'static str,
671        id: i64,
672        name: &'a str,
673        namespace: &'a str,
674        #[serde(rename = "type")]
675        r#type: &'a str,
676    }
677    #[derive(serde::Serialize)]
678    struct NdjsonEdge<'a> {
679        kind: &'static str,
680        from: &'a str,
681        to: &'a str,
682        relation: &'a str,
683        weight: f64,
684    }
685    #[derive(serde::Serialize)]
686    struct NdjsonSummary {
687        kind: &'static str,
688        nodes: usize,
689        edges: usize,
690        elapsed_ms: u64,
691    }
692
693    use std::io::Write as IoWrite;
694
695    let mut buf: Vec<u8> = Vec::with_capacity(4096);
696
697    let emit_line =
698        |buf: &mut Vec<u8>, line: &str, path: Option<&std::path::Path>| -> Result<(), AppError> {
699            buf.clear();
700            buf.extend_from_slice(line.as_bytes());
701            buf.push(b'\n');
702            if let Some(p) = path {
703                let mut f = std::fs::OpenOptions::new()
704                    .create(true)
705                    .append(true)
706                    .open(p)
707                    .map_err(AppError::Io)?;
708                f.write_all(buf).map_err(AppError::Io)?;
709            } else {
710                output::emit_text(line);
711            }
712            Ok(())
713        };
714
715    // Truncate the output file once before starting (avoids re-opening with append for every line).
716    if let Some(p) = output_path {
717        fs::write(p, b"")?;
718    }
719
720    for node in nodes {
721        let obj = NdjsonNode {
722            kind: "node",
723            id: node.id,
724            name: &node.name,
725            namespace: &node.namespace,
726            r#type: &node.r#type,
727        };
728        let line = serde_json::to_string(&obj)?;
729        emit_line(&mut buf, &line, output_path)?;
730    }
731
732    for edge in edges {
733        let obj = NdjsonEdge {
734            kind: "edge",
735            from: &edge.from,
736            to: &edge.to,
737            relation: &edge.relation,
738            weight: edge.weight,
739        };
740        let line = serde_json::to_string(&obj)?;
741        emit_line(&mut buf, &line, output_path)?;
742    }
743
744    let summary = NdjsonSummary {
745        kind: "summary",
746        nodes: nodes.len(),
747        edges: edges.len(),
748        elapsed_ms,
749    };
750    let line = serde_json::to_string(&summary)?;
751    emit_line(&mut buf, &line, output_path)?;
752
753    Ok(())
754}
755
756fn sanitize_dot_id(raw: &str) -> String {
757    raw.chars()
758        .map(|c| {
759            if c.is_ascii_alphanumeric() || c == '_' {
760                c
761            } else {
762                '_'
763            }
764        })
765        .collect()
766}
767
768fn render_dot(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
769    let mut out = String::new();
770    out.push_str("digraph sqlite-graphrag {\n");
771    for node in nodes {
772        let node_id = sanitize_dot_id(&node.name);
773        let escaped = node.name.replace('"', "\\\"");
774        out.push_str(&format!("  {node_id} [label=\"{escaped}\"];\n"));
775    }
776    for edge in edges {
777        let from = sanitize_dot_id(&edge.from);
778        let to = sanitize_dot_id(&edge.to);
779        let label = edge.relation.replace('"', "\\\"");
780        out.push_str(&format!("  {from} -> {to} [label=\"{label}\"];\n"));
781    }
782    out.push_str("}\n");
783    out
784}
785
786fn sanitize_mermaid_id(raw: &str) -> String {
787    raw.chars()
788        .map(|c| {
789            if c.is_ascii_alphanumeric() || c == '_' {
790                c
791            } else {
792                '_'
793            }
794        })
795        .collect()
796}
797
798fn render_mermaid(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
799    let mut out = String::new();
800    out.push_str("graph LR\n");
801    for node in nodes {
802        let id = sanitize_mermaid_id(&node.name);
803        let escaped = node.name.replace('"', "\\\"");
804        out.push_str(&format!("  {id}[\"{escaped}\"]\n"));
805    }
806    for edge in edges {
807        let from = sanitize_mermaid_id(&edge.from);
808        let to = sanitize_mermaid_id(&edge.to);
809        let label = edge.relation.replace('|', "\\|");
810        out.push_str(&format!("  {from} -->|{label}| {to}\n"));
811    }
812    out
813}
814
815#[cfg(test)]
816mod tests {
817    use super::*;
818    use crate::cli::{Cli, Commands};
819    use clap::Parser;
820
821    fn make_node(kind: &str) -> NodeOut {
822        NodeOut {
823            id: 1,
824            name: "test-entity".to_string(),
825            namespace: "default".to_string(),
826            kind: kind.to_string(),
827            r#type: kind.to_string(),
828        }
829    }
830
831    #[test]
832    fn node_out_type_duplicates_kind() {
833        let node = make_node("agent");
834        let json = serde_json::to_value(&node).expect("serialization must work");
835        assert_eq!(json["kind"], json["type"]);
836        assert_eq!(json["kind"], "agent");
837        assert_eq!(json["type"], "agent");
838    }
839
840    #[test]
841    fn node_out_serializes_all_fields() {
842        let node = make_node("document");
843        let json = serde_json::to_value(&node).expect("serialization must work");
844        assert!(json.get("id").is_some());
845        assert!(json.get("name").is_some());
846        assert!(json.get("namespace").is_some());
847        assert!(json.get("kind").is_some());
848        assert!(json.get("type").is_some());
849    }
850
851    #[test]
852    fn graph_snapshot_serializes_nodes_with_type() {
853        let node = make_node("concept");
854        let entities = vec![make_node("concept")];
855        let snapshot = GraphSnapshot {
856            nodes: vec![node],
857            entities,
858            edges: vec![],
859            elapsed_ms: 0,
860        };
861        let json_str = render_json(&snapshot).expect("rendering must work");
862        let json: serde_json::Value = serde_json::from_str(&json_str).expect("valid json");
863        let first_node = &json["nodes"][0];
864        assert_eq!(first_node["kind"], first_node["type"]);
865        assert_eq!(first_node["type"], "concept");
866    }
867
868    #[test]
869    fn graph_traverse_response_serializes_correctly() {
870        let resp = GraphTraverseResponse {
871            from: "entity-a".to_string(),
872            namespace: "global".to_string(),
873            depth: 2,
874            hops: vec![TraverseHop {
875                entity: "entity-b".to_string(),
876                relation: "uses".to_string(),
877                direction: "outbound".to_string(),
878                weight: 1.0,
879                depth: 1,
880            }],
881            elapsed_ms: 5,
882        };
883        let json = serde_json::to_value(&resp).unwrap();
884        assert_eq!(json["from"], "entity-a");
885        assert_eq!(json["depth"], 2);
886        assert!(json["hops"].is_array());
887        assert_eq!(json["hops"][0]["direction"], "outbound");
888    }
889
890    #[test]
891    fn graph_stats_response_serializes_correctly() {
892        let resp = GraphStatsResponse {
893            namespace: Some("global".to_string()),
894            node_count: 10,
895            edge_count: 15,
896            avg_degree: 3.0,
897            max_degree: 7,
898            elapsed_ms: 2,
899        };
900        let json = serde_json::to_value(&resp).unwrap();
901        assert_eq!(json["node_count"], 10);
902        assert_eq!(json["edge_count"], 15);
903        assert_eq!(json["avg_degree"], 3.0);
904        assert_eq!(json["max_degree"], 7);
905    }
906
907    fn compute_avg_degree(node_count: i64, edge_count: i64) -> f64 {
908        if node_count > 0 {
909            2.0 * (edge_count as f64) / (node_count as f64)
910        } else {
911            0.0
912        }
913    }
914
915    #[test]
916    fn avg_degree_is_zero_when_no_nodes() {
917        assert_eq!(compute_avg_degree(0, 0), 0.0);
918    }
919
920    #[test]
921    fn avg_degree_is_zero_when_nodes_but_no_edges() {
922        // Reproduces L1 bug: previously returned 1.0 instead of 0.0.
923        assert_eq!(compute_avg_degree(2, 0), 0.0);
924    }
925
926    #[test]
927    fn avg_degree_is_two_when_triangle() {
928        // 3 nodes, 3 edges: 2 * 3 / 3 = 2.0
929        assert_eq!(compute_avg_degree(3, 3), 2.0);
930    }
931
932    #[test]
933    fn graph_entities_response_serializes_required_fields() {
934        let resp = GraphEntitiesResponse {
935            entities: vec![EntityItem {
936                id: 1,
937                name: "claude-code".to_string(),
938                entity_type: "agent".to_string(),
939                namespace: "global".to_string(),
940                created_at: "2026-01-01T00:00:00Z".to_string(),
941                degree: 0,
942                description: None,
943            }],
944            total_count: 1,
945            limit: 50,
946            offset: 0,
947            namespace: Some("global".to_string()),
948            elapsed_ms: 3,
949        };
950        let json = serde_json::to_value(&resp).unwrap();
951        assert!(json["entities"].is_array());
952        assert_eq!(json["entities"][0]["name"], "claude-code");
953        assert_eq!(json["entities"][0]["entity_type"], "agent");
954        assert_eq!(json["total_count"], 1);
955        assert_eq!(json["limit"], 50);
956        assert_eq!(json["offset"], 0);
957        assert_eq!(json["namespace"], "global");
958    }
959
960    #[test]
961    fn entity_item_serializes_all_fields() {
962        let item = EntityItem {
963            id: 42,
964            name: "test-entity".to_string(),
965            entity_type: "concept".to_string(),
966            namespace: "project-a".to_string(),
967            created_at: "2026-04-19T12:00:00Z".to_string(),
968            degree: 3,
969            description: Some("test description".to_string()),
970        };
971        let json = serde_json::to_value(&item).unwrap();
972        assert_eq!(json["id"], 42);
973        assert_eq!(json["name"], "test-entity");
974        assert_eq!(json["entity_type"], "concept");
975        assert_eq!(json["namespace"], "project-a");
976        assert_eq!(json["created_at"], "2026-04-19T12:00:00Z");
977    }
978
979    #[test]
980    fn entity_item_entity_type_is_never_null() {
981        // P2-C: entity_type must never be null, even when DB column is empty.
982        let item = EntityItem {
983            id: 1,
984            name: "sem-tipo".to_string(),
985            entity_type: String::new(),
986            namespace: "ns".to_string(),
987            created_at: "2026-01-01T00:00:00Z".to_string(),
988            degree: 0,
989            description: None,
990        };
991        let json = serde_json::to_value(&item).unwrap();
992        assert!(
993            !json["entity_type"].is_null(),
994            "entity_type must not be null"
995        );
996        assert!(json["entity_type"].is_string());
997    }
998
999    #[test]
1000    fn graph_traverse_cli_rejects_format_dot() {
1001        let parsed = Cli::try_parse_from([
1002            "sqlite-graphrag",
1003            "graph",
1004            "traverse",
1005            "--from",
1006            "AuthDecision",
1007            "--format",
1008            "dot",
1009        ]);
1010        assert!(parsed.is_err(), "graph traverse must reject format=dot");
1011    }
1012
1013    #[test]
1014    fn graph_stats_cli_accepts_format_text() {
1015        let parsed = Cli::try_parse_from(["sqlite-graphrag", "graph", "stats", "--format", "text"])
1016            .expect("graph stats --format text must be accepted");
1017
1018        match parsed.command {
1019            Commands::Graph(args) => match args.subcommand {
1020                Some(GraphSubcommand::Stats(stats)) => {
1021                    assert_eq!(stats.format, GraphStatsFormat::Text);
1022                }
1023                _ => unreachable!("unexpected subcommand"),
1024            },
1025            _ => unreachable!("unexpected command"),
1026        }
1027    }
1028
1029    #[test]
1030    fn graph_stats_cli_rejects_format_mermaid() {
1031        let parsed =
1032            Cli::try_parse_from(["sqlite-graphrag", "graph", "stats", "--format", "mermaid"]);
1033        assert!(parsed.is_err(), "graph stats must reject format=mermaid");
1034    }
1035
1036    #[test]
1037    fn graph_entities_response_has_no_items_key() {
1038        let resp = GraphEntitiesResponse {
1039            entities: vec![],
1040            total_count: 0,
1041            limit: 50,
1042            offset: 0,
1043            namespace: None,
1044            elapsed_ms: 0,
1045        };
1046        let json = serde_json::to_value(&resp).unwrap();
1047        assert!(
1048            json.get("items").is_none(),
1049            "legacy 'items' key must not appear"
1050        );
1051        assert!(
1052            json.get("entities").is_some(),
1053            "'entities' key must be present"
1054        );
1055    }
1056
1057    #[test]
1058    fn build_order_by_defaults_to_name_asc() {
1059        let clause = build_order_by(None, SortOrder::Asc);
1060        assert_eq!(clause, "ORDER BY e.name ASC");
1061    }
1062
1063    #[test]
1064    fn build_order_by_name_desc() {
1065        let clause = build_order_by(Some(EntitySortField::Name), SortOrder::Desc);
1066        assert_eq!(clause, "ORDER BY e.name DESC");
1067    }
1068
1069    #[test]
1070    fn build_order_by_degree_desc() {
1071        let clause = build_order_by(Some(EntitySortField::Degree), SortOrder::Desc);
1072        assert_eq!(clause, "ORDER BY degree DESC");
1073    }
1074
1075    #[test]
1076    fn build_order_by_degree_asc() {
1077        let clause = build_order_by(Some(EntitySortField::Degree), SortOrder::Asc);
1078        assert_eq!(clause, "ORDER BY degree ASC");
1079    }
1080
1081    #[test]
1082    fn build_order_by_created_at_asc() {
1083        let clause = build_order_by(Some(EntitySortField::CreatedAt), SortOrder::Asc);
1084        assert_eq!(clause, "ORDER BY e.created_at ASC");
1085    }
1086
1087    #[test]
1088    fn build_order_by_created_at_desc() {
1089        let clause = build_order_by(Some(EntitySortField::CreatedAt), SortOrder::Desc);
1090        assert_eq!(clause, "ORDER BY e.created_at DESC");
1091    }
1092
1093    #[test]
1094    fn graph_entities_cli_accepts_sort_by_degree_desc() {
1095        let parsed = Cli::try_parse_from([
1096            "sqlite-graphrag",
1097            "graph",
1098            "entities",
1099            "--sort-by",
1100            "degree",
1101            "--order",
1102            "desc",
1103        ])
1104        .expect("graph entities --sort-by degree --order desc must parse");
1105        match parsed.command {
1106            Commands::Graph(args) => match args.subcommand {
1107                Some(GraphSubcommand::Entities(e)) => {
1108                    assert!(matches!(e.sort_by, Some(EntitySortField::Degree)));
1109                    assert!(matches!(e.order, SortOrder::Desc));
1110                }
1111                _ => unreachable!("unexpected subcommand"),
1112            },
1113            _ => unreachable!("unexpected command"),
1114        }
1115    }
1116
1117    #[test]
1118    fn graph_entities_cli_accepts_sort_by_created_at_asc() {
1119        let parsed = Cli::try_parse_from([
1120            "sqlite-graphrag",
1121            "graph",
1122            "entities",
1123            "--sort-by",
1124            "created-at",
1125        ])
1126        .expect("graph entities --sort-by created-at must parse");
1127        match parsed.command {
1128            Commands::Graph(args) => match args.subcommand {
1129                Some(GraphSubcommand::Entities(e)) => {
1130                    assert!(matches!(e.sort_by, Some(EntitySortField::CreatedAt)));
1131                    assert!(matches!(e.order, SortOrder::Asc));
1132                }
1133                _ => unreachable!("unexpected subcommand"),
1134            },
1135            _ => unreachable!("unexpected command"),
1136        }
1137    }
1138
1139    #[test]
1140    fn graph_entities_cli_defaults_to_no_sort_by() {
1141        let parsed = Cli::try_parse_from(["sqlite-graphrag", "graph", "entities"])
1142            .expect("graph entities must parse without sort flags");
1143        match parsed.command {
1144            Commands::Graph(args) => match args.subcommand {
1145                Some(GraphSubcommand::Entities(e)) => {
1146                    assert!(e.sort_by.is_none(), "sort_by must default to None");
1147                    assert!(
1148                        matches!(e.order, SortOrder::Asc),
1149                        "order must default to Asc"
1150                    );
1151                }
1152                _ => unreachable!("unexpected subcommand"),
1153            },
1154            _ => unreachable!("unexpected command"),
1155        }
1156    }
1157}