Skip to main content

sqlite_graphrag/commands/
graph_export.rs

1//! Handler for the `graph-export` CLI subcommand.
2
3use crate::cli::GraphExportFormat;
4use crate::entity_type::EntityType;
5use crate::errors::AppError;
6use crate::output;
7use crate::paths::AppPaths;
8use crate::storage::connection::open_ro;
9use crate::storage::entities;
10use serde::Serialize;
11use std::collections::HashMap;
12use std::fs;
13use std::path::PathBuf;
14use std::time::Instant;
15
16/// Optional nested subcommands. When absent, the default behavior exports
17/// the full entity snapshot for backward compatibility.
18#[derive(clap::Subcommand)]
19pub enum GraphSubcommand {
20    /// Traverse relationships from a starting entity using BFS
21    Traverse(GraphTraverseArgs),
22    /// Show graph statistics (node/edge counts, degree distribution)
23    Stats(GraphStatsArgs),
24    /// List entities stored in the graph with optional filters
25    Entities(GraphEntitiesArgs),
26}
27
28#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
29pub enum GraphTraverseFormat {
30    Json,
31}
32
33#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq, Eq)]
34pub enum GraphStatsFormat {
35    Json,
36    Text,
37}
38
39#[derive(clap::Args)]
40#[command(after_long_help = "EXAMPLES:\n  \
41    # Export full entity snapshot as JSON (default)\n  \
42    sqlite-graphrag graph\n\n  \
43    # Traverse relationships from a starting entity\n  \
44    sqlite-graphrag graph traverse --from acme-corp --depth 2\n\n  \
45    # Show graph statistics as structured JSON\n  \
46    sqlite-graphrag graph stats --format json\n\n  \
47    # List entities filtered by type\n  \
48    sqlite-graphrag graph entities --entity-type person\n\n  \
49    # Export full snapshot in DOT format for Graphviz\n  \
50    sqlite-graphrag graph --format dot --output graph.dot\n\n  \
51NOTES:\n  \
52    Without a subcommand, exports the full entity+edge snapshot.\n  \
53    Use `traverse`, `stats`, or `entities` for targeted queries.")]
54pub struct GraphArgs {
55    /// Optional subcommand; without one, export the full entity snapshot.
56    #[command(subcommand)]
57    pub subcommand: Option<GraphSubcommand>,
58    /// Filter by namespace. Defaults to all namespaces.
59    #[arg(long)]
60    pub namespace: Option<String>,
61    /// Snapshot output format.
62    #[arg(long, value_enum, default_value = "json")]
63    pub format: GraphExportFormat,
64    /// File path to write output instead of stdout.
65    #[arg(long)]
66    pub output: Option<PathBuf>,
67    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
68    pub json: bool,
69    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
70    pub db: Option<String>,
71}
72
73#[derive(clap::Args)]
74#[command(after_long_help = "EXAMPLES:\n  \
75    # Traverse relationships from an entity with default depth (2)\n  \
76    sqlite-graphrag graph traverse --from acme-corp\n\n  \
77    # Increase traversal depth to 3 hops\n  \
78    sqlite-graphrag graph traverse --from acme-corp --depth 3\n\n  \
79    # Traverse within a specific namespace\n  \
80    sqlite-graphrag graph traverse --from acme-corp --namespace project-x\n\n  \
81NOTES:\n  \
82    Output is always JSON. The `hops` array contains each reachable entity\n  \
83    with its relation, direction (inbound/outbound), weight, and depth level.")]
84pub struct GraphTraverseArgs {
85    /// Root entity name for the traversal.
86    #[arg(long)]
87    pub from: String,
88    /// Maximum traversal depth.
89    #[arg(long, default_value_t = 2u32)]
90    pub depth: u32,
91    #[arg(long)]
92    pub namespace: Option<String>,
93    #[arg(long, value_enum, default_value = "json")]
94    pub format: GraphTraverseFormat,
95    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
96    pub json: bool,
97    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
98    pub db: Option<String>,
99}
100
101#[derive(clap::Args)]
102#[command(after_long_help = "EXAMPLES:\n  \
103    # Show stats for all namespaces (human-readable text)\n  \
104    sqlite-graphrag graph stats --format text\n\n  \
105    # Show stats as structured JSON\n  \
106    sqlite-graphrag graph stats --format json\n\n  \
107    # Show stats for a specific namespace\n  \
108    sqlite-graphrag graph stats --namespace project-x --format text\n\n  \
109NOTES:\n  \
110    Reports node_count, edge_count, avg_degree, and max_degree.\n  \
111    Default format is JSON. Use `--format text` for a compact single-line summary.")]
112pub struct GraphStatsArgs {
113    #[arg(long)]
114    pub namespace: Option<String>,
115    /// Output format for the stats response.
116    #[arg(long, value_enum, default_value = "json")]
117    pub format: GraphStatsFormat,
118    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
119    pub json: bool,
120    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
121    pub db: Option<String>,
122}
123
124/// Field to sort entities by in `graph entities`.
125#[derive(Debug, Clone, Copy, clap::ValueEnum)]
126pub enum EntitySortField {
127    /// Sort alphabetically by entity name.
128    Name,
129    /// Sort by degree (total number of relationships, descending by default).
130    Degree,
131    /// Sort by entity creation timestamp.
132    CreatedAt,
133}
134
135/// Sort direction for `graph entities`.
136#[derive(Debug, Clone, Copy, Default, clap::ValueEnum)]
137pub enum SortOrder {
138    #[default]
139    Asc,
140    Desc,
141}
142
143#[derive(clap::Args)]
144#[command(after_long_help = "EXAMPLES:\n  \
145    # List all entities (default limit applies)\n  \
146    sqlite-graphrag graph entities\n\n  \
147    # Filter by entity type\n  \
148    sqlite-graphrag graph entities --entity-type person\n\n  \
149    # Filter by namespace and type\n  \
150    sqlite-graphrag graph entities --namespace project-x --entity-type concept\n\n  \
151    # Paginate results (skip first 20, return next 10)\n  \
152    sqlite-graphrag graph entities --offset 20 --limit 10\n\n  \
153    # Sort by degree descending (most connected first)\n  \
154    sqlite-graphrag graph entities --sort-by degree --order desc\n\n  \
155    # Sort by creation date ascending\n  \
156    sqlite-graphrag graph entities --sort-by created-at --order asc\n\n  \
157NOTES:\n  \
158    Output is always JSON with `entities`, `total_count`, `limit`, and `offset` fields.\n  \
159    Entity types are strings extracted by GLiNER NER (e.g. `person`, `organization`, `location`).")]
160pub struct GraphEntitiesArgs {
161    #[arg(long)]
162    pub namespace: Option<String>,
163    /// Filter by entity type (one of the 13 canonical types).
164    #[arg(long, value_enum)]
165    pub entity_type: Option<EntityType>,
166    /// Maximum number of results to return.
167    #[arg(long, default_value_t = crate::constants::K_GRAPH_ENTITIES_DEFAULT_LIMIT)]
168    pub limit: usize,
169    /// Number of results to skip for pagination.
170    #[arg(long, default_value_t = 0usize)]
171    pub offset: usize,
172    /// Sort entities by this field. When omitted, the default order is by name ascending.
173    #[arg(long, value_enum, help = "Sort entities by field")]
174    pub sort_by: Option<EntitySortField>,
175    /// Sort direction: `asc` (default) or `desc`.
176    #[arg(long, value_enum, default_value_t = SortOrder::Asc, help = "Sort order")]
177    pub order: SortOrder,
178    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
179    pub json: bool,
180    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
181    pub db: Option<String>,
182}
183
184#[derive(Serialize, Clone)]
185struct NodeOut {
186    id: i64,
187    name: String,
188    namespace: String,
189    /// Deprecated alias of `type` kept for backward-compat with pre-v1.0.35 clients.
190    /// New consumers MUST read `type` instead. Will be removed in a future major release.
191    kind: String,
192    /// Canonical entity classification (organization, concept, person, etc.).
193    /// Mirrors `kind` while the deprecation window is active.
194    #[serde(rename = "type")]
195    r#type: String,
196}
197
198#[derive(Serialize)]
199struct EdgeOut {
200    from: String,
201    to: String,
202    relation: String,
203    weight: f64,
204}
205
206#[derive(Serialize)]
207struct GraphSnapshot {
208    nodes: Vec<NodeOut>,
209    entities: Vec<NodeOut>,
210    edges: Vec<EdgeOut>,
211    elapsed_ms: u64,
212}
213
214#[derive(Serialize)]
215struct TraverseHop {
216    entity: String,
217    relation: String,
218    direction: String,
219    weight: f64,
220    depth: u32,
221}
222
223#[derive(Serialize)]
224struct GraphTraverseResponse {
225    from: String,
226    namespace: String,
227    depth: u32,
228    hops: Vec<TraverseHop>,
229    elapsed_ms: u64,
230}
231
232#[derive(Serialize)]
233struct GraphStatsResponse {
234    namespace: Option<String>,
235    node_count: i64,
236    edge_count: i64,
237    avg_degree: f64,
238    max_degree: i64,
239    elapsed_ms: u64,
240}
241
242#[derive(Serialize)]
243struct EntityItem {
244    id: i64,
245    name: String,
246    entity_type: String,
247    namespace: String,
248    created_at: String,
249    /// Total number of relationships (inbound + outbound) for this entity.
250    degree: u32,
251    #[serde(skip_serializing_if = "Option::is_none")]
252    description: Option<String>,
253}
254
255#[derive(Serialize)]
256struct GraphEntitiesResponse {
257    entities: Vec<EntityItem>,
258    total_count: i64,
259    limit: usize,
260    offset: usize,
261    namespace: Option<String>,
262    elapsed_ms: u64,
263}
264
265pub fn run(args: GraphArgs) -> Result<(), AppError> {
266    match args.subcommand {
267        None => run_entities_snapshot(
268            args.db.as_deref(),
269            args.namespace.as_deref(),
270            args.format,
271            args.json,
272            args.output.as_deref(),
273        ),
274        Some(GraphSubcommand::Traverse(mut a)) => {
275            if a.db.is_none() {
276                a.db = args.db;
277            }
278            if a.namespace.is_none() {
279                a.namespace = args.namespace;
280            }
281            run_traverse(a)
282        }
283        Some(GraphSubcommand::Stats(mut a)) => {
284            if a.db.is_none() {
285                a.db = args.db;
286            }
287            if a.namespace.is_none() {
288                a.namespace = args.namespace;
289            }
290            run_stats(a)
291        }
292        Some(GraphSubcommand::Entities(mut a)) => {
293            if a.db.is_none() {
294                a.db = args.db;
295            }
296            if a.namespace.is_none() {
297                a.namespace = args.namespace;
298            }
299            run_entities(a)
300        }
301    }
302}
303
304fn run_entities_snapshot(
305    db: Option<&str>,
306    namespace: Option<&str>,
307    format: GraphExportFormat,
308    json: bool,
309    output_path: Option<&std::path::Path>,
310) -> Result<(), AppError> {
311    let inicio = Instant::now();
312    let paths = AppPaths::resolve(db)?;
313
314    crate::storage::connection::ensure_db_ready(&paths)?;
315
316    let conn = open_ro(&paths.db)?;
317
318    let nodes_raw = entities::list_entities(&conn, namespace)?;
319    let edges_raw = entities::list_relationships_by_namespace(&conn, namespace)?;
320
321    let id_to_name: HashMap<i64, String> =
322        nodes_raw.iter().map(|n| (n.id, n.name.clone())).collect();
323
324    let nodes: Vec<NodeOut> = nodes_raw
325        .into_iter()
326        .map(|n| NodeOut {
327            id: n.id,
328            name: n.name,
329            namespace: n.namespace,
330            r#type: n.kind.clone(),
331            kind: n.kind,
332        })
333        .collect();
334
335    let mut edges: Vec<EdgeOut> = Vec::with_capacity(edges_raw.len());
336    let mut orphan_edges: usize = 0;
337    for r in edges_raw {
338        let from = match id_to_name.get(&r.source_id) {
339            Some(n) => n.clone(),
340            None => {
341                orphan_edges += 1;
342                tracing::warn!(target: "graph_export", source_id = r.source_id, relation = %r.relation, "edge skipped: source entity not found in id_to_name map");
343                continue;
344            }
345        };
346        let to = match id_to_name.get(&r.target_id) {
347            Some(n) => n.clone(),
348            None => {
349                orphan_edges += 1;
350                tracing::warn!(target: "graph_export", target_id = r.target_id, relation = %r.relation, "edge skipped: target entity not found in id_to_name map");
351                continue;
352            }
353        };
354        edges.push(EdgeOut {
355            from,
356            to,
357            relation: r.relation,
358            weight: r.weight,
359        });
360    }
361    if orphan_edges > 0 {
362        tracing::warn!(target: "graph_export",
363            count = orphan_edges,
364            "edges skipped due to orphaned entity references"
365        );
366    }
367
368    let effective_format = if json {
369        GraphExportFormat::Json
370    } else {
371        format
372    };
373
374    if effective_format == GraphExportFormat::Ndjson {
375        let elapsed_ms = inicio.elapsed().as_millis() as u64;
376        render_ndjson_streaming(&nodes, &edges, elapsed_ms, output_path)?;
377        return Ok(());
378    }
379
380    let rendered = match effective_format {
381        GraphExportFormat::Json => {
382            let entities = nodes.clone();
383            render_json(&GraphSnapshot {
384                nodes,
385                entities,
386                edges,
387                elapsed_ms: inicio.elapsed().as_millis() as u64,
388            })?
389        }
390        GraphExportFormat::Dot => render_dot(&nodes, &edges),
391        GraphExportFormat::Mermaid => render_mermaid(&nodes, &edges),
392        GraphExportFormat::Ndjson => unreachable!("ndjson handled above"),
393    };
394
395    if let Some(path) = output_path.filter(|_| !json) {
396        fs::write(path, &rendered)?;
397        output::emit_progress(&format!("wrote {}", path.display()));
398    } else {
399        output::emit_text(&rendered);
400    }
401
402    Ok(())
403}
404
405fn run_traverse(args: GraphTraverseArgs) -> Result<(), AppError> {
406    let inicio = Instant::now();
407    let _ = args.format;
408    let paths = AppPaths::resolve(args.db.as_deref())?;
409
410    crate::storage::connection::ensure_db_ready(&paths)?;
411
412    let conn = open_ro(&paths.db)?;
413    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
414
415    let from_id = entities::find_entity_id(&conn, &namespace, &args.from)?
416        .ok_or_else(|| AppError::NotFound(format!("entity '{}' not found", args.from)))?;
417
418    let all_rels = entities::list_relationships_by_namespace(&conn, Some(&namespace))?;
419    let all_entities = entities::list_entities(&conn, Some(&namespace))?;
420    let id_to_name: HashMap<i64, String> = all_entities
421        .iter()
422        .map(|e| (e.id, e.name.clone()))
423        .collect();
424
425    let mut hops: Vec<TraverseHop> = Vec::with_capacity(16);
426    let mut visited: std::collections::HashSet<i64> =
427        std::collections::HashSet::with_capacity(args.depth as usize * 10);
428    let mut frontier: Vec<(i64, u32)> = vec![(from_id, 0)];
429
430    while let Some((current_id, current_depth)) = frontier.pop() {
431        if current_depth >= args.depth || visited.contains(&current_id) {
432            continue;
433        }
434        visited.insert(current_id);
435
436        for rel in &all_rels {
437            if rel.source_id == current_id {
438                if let Some(target_name) = id_to_name.get(&rel.target_id) {
439                    hops.push(TraverseHop {
440                        entity: target_name.clone(),
441                        relation: rel.relation.clone(),
442                        direction: "outbound".to_string(),
443                        weight: rel.weight,
444                        depth: current_depth + 1,
445                    });
446                    frontier.push((rel.target_id, current_depth + 1));
447                }
448            } else if rel.target_id == current_id {
449                if let Some(source_name) = id_to_name.get(&rel.source_id) {
450                    hops.push(TraverseHop {
451                        entity: source_name.clone(),
452                        relation: rel.relation.clone(),
453                        direction: "inbound".to_string(),
454                        weight: rel.weight,
455                        depth: current_depth + 1,
456                    });
457                    frontier.push((rel.source_id, current_depth + 1));
458                }
459            }
460        }
461    }
462
463    output::emit_json(&GraphTraverseResponse {
464        from: args.from,
465        namespace,
466        depth: args.depth,
467        hops,
468        elapsed_ms: inicio.elapsed().as_millis() as u64,
469    })?;
470
471    Ok(())
472}
473
474fn run_stats(args: GraphStatsArgs) -> Result<(), AppError> {
475    let inicio = Instant::now();
476    let paths = AppPaths::resolve(args.db.as_deref())?;
477
478    crate::storage::connection::ensure_db_ready(&paths)?;
479
480    let conn = open_ro(&paths.db)?;
481    let ns = args.namespace.as_deref();
482
483    let node_count: i64 = if let Some(n) = ns {
484        conn.query_row(
485            "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
486            rusqlite::params![n],
487            |r| r.get(0),
488        )?
489    } else {
490        conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?
491    };
492
493    let edge_count: i64 = if let Some(n) = ns {
494        conn.query_row(
495            "SELECT COUNT(*) FROM relationships r
496             JOIN entities s ON s.id = r.source_id
497             WHERE s.namespace = ?1",
498            rusqlite::params![n],
499            |r| r.get(0),
500        )?
501    } else {
502        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?
503    };
504
505    let max_degree: i64 = if let Some(n) = ns {
506        conn.query_row(
507            "SELECT COALESCE(MAX(degree), 0) FROM entities WHERE namespace = ?1",
508            rusqlite::params![n],
509            |r| r.get(0),
510        )?
511    } else {
512        conn.query_row("SELECT COALESCE(MAX(degree), 0) FROM entities", [], |r| {
513            r.get(0)
514        })?
515    };
516
517    // avg_degree = 2 * edge_count / node_count (each edge contributes 2 to total degree sum).
518    let avg_degree = if node_count > 0 {
519        2.0 * (edge_count as f64) / (node_count as f64)
520    } else {
521        0.0
522    };
523
524    let resp = GraphStatsResponse {
525        namespace: args.namespace,
526        node_count,
527        edge_count,
528        avg_degree,
529        max_degree,
530        elapsed_ms: inicio.elapsed().as_millis() as u64,
531    };
532
533    let effective_format = if args.json {
534        GraphStatsFormat::Json
535    } else {
536        args.format
537    };
538
539    match effective_format {
540        GraphStatsFormat::Json => output::emit_json(&resp)?,
541        GraphStatsFormat::Text => {
542            output::emit_text(&format!(
543                "nodes={} edges={} avg_degree={:.2} max_degree={} namespace={}",
544                resp.node_count,
545                resp.edge_count,
546                resp.avg_degree,
547                resp.max_degree,
548                resp.namespace.as_deref().unwrap_or("all"),
549            ));
550        }
551    }
552
553    Ok(())
554}
555
556/// Builds the `ORDER BY` clause fragment from sort options.
557///
558/// Returns a static SQL fragment such as `ORDER BY e.name ASC`.
559fn build_order_by(sort_by: Option<EntitySortField>, order: SortOrder) -> &'static str {
560    // The combinations are enumerated as static strings to avoid
561    // format!() allocations in the hot path and satisfy the borrow checker
562    // when the string is used inside conn.prepare().
563    match (sort_by, order) {
564        (None, SortOrder::Asc) | (Some(EntitySortField::Name), SortOrder::Asc) => {
565            "ORDER BY e.name ASC"
566        }
567        (Some(EntitySortField::Name), SortOrder::Desc) => "ORDER BY e.name DESC",
568        (Some(EntitySortField::Degree), SortOrder::Asc) => "ORDER BY degree ASC",
569        (Some(EntitySortField::Degree), SortOrder::Desc) => "ORDER BY degree DESC",
570        (Some(EntitySortField::CreatedAt), SortOrder::Asc) => "ORDER BY e.created_at ASC",
571        (Some(EntitySortField::CreatedAt), SortOrder::Desc) => "ORDER BY e.created_at DESC",
572        // Fallback: None/Desc → sort by name desc (consistent with dir variable).
573        (None, SortOrder::Desc) => "ORDER BY e.name DESC",
574    }
575}
576
577fn run_entities(args: GraphEntitiesArgs) -> Result<(), AppError> {
578    let inicio = Instant::now();
579    let paths = AppPaths::resolve(args.db.as_deref())?;
580
581    crate::storage::connection::ensure_db_ready(&paths)?;
582
583    let conn = open_ro(&paths.db)?;
584
585    let row_to_item = |r: &rusqlite::Row<'_>| -> rusqlite::Result<EntityItem> {
586        let ts: i64 = r.get(4)?;
587        let created_at = chrono::DateTime::from_timestamp(ts, 0)
588            .unwrap_or_default()
589            .format("%Y-%m-%dT%H:%M:%SZ")
590            .to_string();
591        Ok(EntityItem {
592            id: r.get(0)?,
593            name: r.get(1)?,
594            entity_type: r.get(2)?,
595            namespace: r.get(3)?,
596            created_at,
597            degree: r.get(5)?,
598            description: r.get(6)?,
599        })
600    };
601
602    let limit_i = args.limit as i64;
603    let offset_i = args.offset as i64;
604    let order_clause = build_order_by(args.sort_by, args.order);
605
606    let base_select = "SELECT e.id, e.name, COALESCE(e.type, ''), e.namespace, e.created_at,
607                        (SELECT COUNT(*) FROM relationships r
608                         WHERE r.source_id = e.id OR r.target_id = e.id) AS degree,
609                        e.description
610                 FROM entities e";
611
612    let (total_count, items) = match (
613        args.namespace.as_deref(),
614        args.entity_type.map(|et| et.as_str()),
615    ) {
616        (Some(ns), Some(et)) => {
617            let count: i64 = conn.query_row(
618                "SELECT COUNT(*) FROM entities WHERE namespace = ?1 AND type = ?2",
619                rusqlite::params![ns, et],
620                |r| r.get(0),
621            )?;
622            let sql = format!(
623                "{base_select} WHERE e.namespace = ?1 AND e.type = ?2 {order_clause} LIMIT ?3 OFFSET ?4"
624            );
625            let mut stmt = conn.prepare(&sql)?;
626            let rows = stmt
627                .query_map(rusqlite::params![ns, et, limit_i, offset_i], row_to_item)?
628                .collect::<rusqlite::Result<Vec<_>>>()?;
629            (count, rows)
630        }
631        (Some(ns), None) => {
632            let count: i64 = conn.query_row(
633                "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
634                rusqlite::params![ns],
635                |r| r.get(0),
636            )?;
637            let sql =
638                format!("{base_select} WHERE e.namespace = ?1 {order_clause} LIMIT ?2 OFFSET ?3");
639            let mut stmt = conn.prepare(&sql)?;
640            let rows = stmt
641                .query_map(rusqlite::params![ns, limit_i, offset_i], row_to_item)?
642                .collect::<rusqlite::Result<Vec<_>>>()?;
643            (count, rows)
644        }
645        (None, Some(et)) => {
646            let count: i64 = conn.query_row(
647                "SELECT COUNT(*) FROM entities WHERE type = ?1",
648                rusqlite::params![et],
649                |r| r.get(0),
650            )?;
651            let sql = format!("{base_select} WHERE e.type = ?1 {order_clause} LIMIT ?2 OFFSET ?3");
652            let mut stmt = conn.prepare(&sql)?;
653            let rows = stmt
654                .query_map(rusqlite::params![et, limit_i, offset_i], row_to_item)?
655                .collect::<rusqlite::Result<Vec<_>>>()?;
656            (count, rows)
657        }
658        (None, None) => {
659            let count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
660            let sql = format!("{base_select} {order_clause} LIMIT ?1 OFFSET ?2");
661            let mut stmt = conn.prepare(&sql)?;
662            let rows = stmt
663                .query_map(rusqlite::params![limit_i, offset_i], row_to_item)?
664                .collect::<rusqlite::Result<Vec<_>>>()?;
665            (count, rows)
666        }
667    };
668
669    output::emit_json(&GraphEntitiesResponse {
670        entities: items,
671        total_count,
672        limit: args.limit,
673        offset: args.offset,
674        namespace: args.namespace,
675        elapsed_ms: inicio.elapsed().as_millis() as u64,
676    })
677}
678
679fn render_json(snapshot: &GraphSnapshot) -> Result<String, AppError> {
680    Ok(serde_json::to_string_pretty(snapshot)?)
681}
682
683/// Streams the graph as NDJSON: one object per node, one per edge, then a summary.
684///
685/// Each line is flushed immediately so consumers can process incrementally.
686/// When `output_path` is `Some`, lines are written to the file; otherwise to stdout.
687fn render_ndjson_streaming(
688    nodes: &[NodeOut],
689    edges: &[EdgeOut],
690    elapsed_ms: u64,
691    output_path: Option<&std::path::Path>,
692) -> Result<(), AppError> {
693    #[derive(serde::Serialize)]
694    struct NdjsonNode<'a> {
695        kind: &'static str,
696        id: i64,
697        name: &'a str,
698        namespace: &'a str,
699        #[serde(rename = "type")]
700        r#type: &'a str,
701    }
702    #[derive(serde::Serialize)]
703    struct NdjsonEdge<'a> {
704        kind: &'static str,
705        from: &'a str,
706        to: &'a str,
707        relation: &'a str,
708        weight: f64,
709    }
710    #[derive(serde::Serialize)]
711    struct NdjsonSummary {
712        kind: &'static str,
713        nodes: usize,
714        edges: usize,
715        elapsed_ms: u64,
716    }
717
718    use std::io::Write as IoWrite;
719
720    let mut buf: Vec<u8> = Vec::with_capacity(4096);
721
722    let emit_line =
723        |buf: &mut Vec<u8>, line: &str, path: Option<&std::path::Path>| -> Result<(), AppError> {
724            buf.clear();
725            buf.extend_from_slice(line.as_bytes());
726            buf.push(b'\n');
727            if let Some(p) = path {
728                let mut f = std::fs::OpenOptions::new()
729                    .create(true)
730                    .append(true)
731                    .open(p)
732                    .map_err(AppError::Io)?;
733                f.write_all(buf).map_err(AppError::Io)?;
734            } else {
735                output::emit_text(line);
736            }
737            Ok(())
738        };
739
740    // Truncate the output file once before starting (avoids re-opening with append for every line).
741    if let Some(p) = output_path {
742        fs::write(p, b"")?;
743    }
744
745    for node in nodes {
746        let obj = NdjsonNode {
747            kind: "node",
748            id: node.id,
749            name: &node.name,
750            namespace: &node.namespace,
751            r#type: &node.r#type,
752        };
753        let line = serde_json::to_string(&obj)?;
754        emit_line(&mut buf, &line, output_path)?;
755    }
756
757    for edge in edges {
758        let obj = NdjsonEdge {
759            kind: "edge",
760            from: &edge.from,
761            to: &edge.to,
762            relation: &edge.relation,
763            weight: edge.weight,
764        };
765        let line = serde_json::to_string(&obj)?;
766        emit_line(&mut buf, &line, output_path)?;
767    }
768
769    let summary = NdjsonSummary {
770        kind: "summary",
771        nodes: nodes.len(),
772        edges: edges.len(),
773        elapsed_ms,
774    };
775    let line = serde_json::to_string(&summary)?;
776    emit_line(&mut buf, &line, output_path)?;
777
778    Ok(())
779}
780
781fn sanitize_dot_id(raw: &str) -> String {
782    raw.chars()
783        .map(|c| {
784            if c.is_ascii_alphanumeric() || c == '_' {
785                c
786            } else {
787                '_'
788            }
789        })
790        .collect()
791}
792
793fn render_dot(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
794    use std::fmt::Write;
795    let mut out = String::with_capacity(nodes.len() * 80 + edges.len() * 60 + 300);
796    out.push_str("digraph sqlite_graphrag {\n");
797    out.push_str("  graph [bgcolor=\"white\", fontname=\"Helvetica Neue\", fontsize=12, rankdir=LR, nodesep=0.8, ranksep=1.2];\n");
798    out.push_str("  node [shape=box, style=\"filled,rounded\", fillcolor=\"#F2F2F7\", fontname=\"Helvetica Neue\", fontsize=11, color=\"#C7C7CC\"];\n");
799    out.push_str("  edge [fontname=\"Helvetica Neue\", fontsize=9, color=\"#8E8E93\"];\n");
800    for node in nodes {
801        let node_id = sanitize_dot_id(&node.name);
802        let escaped = node.name.replace('"', "\\\"");
803        let _ = writeln!(out, "  {node_id} [label=\"{escaped}\"];");
804    }
805    for edge in edges {
806        let from = sanitize_dot_id(&edge.from);
807        let to = sanitize_dot_id(&edge.to);
808        let label = edge.relation.replace('"', "\\\"");
809        let _ = writeln!(out, "  {from} -> {to} [label=\"{label}\"];");
810    }
811    out.push_str("}\n");
812    out
813}
814
815fn sanitize_mermaid_id(raw: &str) -> String {
816    raw.chars()
817        .map(|c| {
818            if c.is_ascii_alphanumeric() || c == '_' {
819                c
820            } else {
821                '_'
822            }
823        })
824        .collect()
825}
826
827fn render_mermaid(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
828    use std::fmt::Write;
829    let mut out = String::with_capacity(nodes.len() * 50 + edges.len() * 40 + 200);
830    out.push_str("%%{init: {'theme': 'neutral', 'themeVariables': {'primaryColor': '#F2F2F7', 'primaryTextColor': '#1C1C1E', 'primaryBorderColor': '#C7C7CC', 'lineColor': '#8E8E93'}}}%%\n");
831    out.push_str("graph LR\n");
832    for node in nodes {
833        let id = sanitize_mermaid_id(&node.name);
834        let escaped = node.name.replace('"', "\\\"");
835        let _ = writeln!(out, "  {id}[\"{escaped}\"]");
836    }
837    for edge in edges {
838        let from = sanitize_mermaid_id(&edge.from);
839        let to = sanitize_mermaid_id(&edge.to);
840        let label = edge.relation.replace('|', "\\|");
841        let _ = writeln!(out, "  {from} -->|{label}| {to}");
842    }
843    out
844}
845
846#[cfg(test)]
847mod tests {
848    use super::*;
849    use crate::cli::{Cli, Commands};
850    use clap::Parser;
851
852    fn make_node(kind: &str) -> NodeOut {
853        NodeOut {
854            id: 1,
855            name: "test-entity".to_string(),
856            namespace: "default".to_string(),
857            kind: kind.to_string(),
858            r#type: kind.to_string(),
859        }
860    }
861
862    #[test]
863    fn node_out_type_duplicates_kind() {
864        let node = make_node("agent");
865        let json = serde_json::to_value(&node).expect("serialization must work");
866        assert_eq!(json["kind"], json["type"]);
867        assert_eq!(json["kind"], "agent");
868        assert_eq!(json["type"], "agent");
869    }
870
871    #[test]
872    fn node_out_serializes_all_fields() {
873        let node = make_node("document");
874        let json = serde_json::to_value(&node).expect("serialization must work");
875        assert!(json.get("id").is_some());
876        assert!(json.get("name").is_some());
877        assert!(json.get("namespace").is_some());
878        assert!(json.get("kind").is_some());
879        assert!(json.get("type").is_some());
880    }
881
882    #[test]
883    fn graph_snapshot_serializes_nodes_with_type() {
884        let node = make_node("concept");
885        let entities = vec![make_node("concept")];
886        let snapshot = GraphSnapshot {
887            nodes: vec![node],
888            entities,
889            edges: vec![],
890            elapsed_ms: 0,
891        };
892        let json_str = render_json(&snapshot).expect("rendering must work");
893        let json: serde_json::Value = serde_json::from_str(&json_str).expect("valid json");
894        let first_node = &json["nodes"][0];
895        assert_eq!(first_node["kind"], first_node["type"]);
896        assert_eq!(first_node["type"], "concept");
897    }
898
899    #[test]
900    fn graph_traverse_response_serializes_correctly() {
901        let resp = GraphTraverseResponse {
902            from: "entity-a".to_string(),
903            namespace: "global".to_string(),
904            depth: 2,
905            hops: vec![TraverseHop {
906                entity: "entity-b".to_string(),
907                relation: "uses".to_string(),
908                direction: "outbound".to_string(),
909                weight: 1.0,
910                depth: 1,
911            }],
912            elapsed_ms: 5,
913        };
914        let json = serde_json::to_value(&resp).unwrap();
915        assert_eq!(json["from"], "entity-a");
916        assert_eq!(json["depth"], 2);
917        assert!(json["hops"].is_array());
918        assert_eq!(json["hops"][0]["direction"], "outbound");
919    }
920
921    #[test]
922    fn graph_stats_response_serializes_correctly() {
923        let resp = GraphStatsResponse {
924            namespace: Some("global".to_string()),
925            node_count: 10,
926            edge_count: 15,
927            avg_degree: 3.0,
928            max_degree: 7,
929            elapsed_ms: 2,
930        };
931        let json = serde_json::to_value(&resp).unwrap();
932        assert_eq!(json["node_count"], 10);
933        assert_eq!(json["edge_count"], 15);
934        assert_eq!(json["avg_degree"], 3.0);
935        assert_eq!(json["max_degree"], 7);
936    }
937
938    fn compute_avg_degree(node_count: i64, edge_count: i64) -> f64 {
939        if node_count > 0 {
940            2.0 * (edge_count as f64) / (node_count as f64)
941        } else {
942            0.0
943        }
944    }
945
946    #[test]
947    fn avg_degree_is_zero_when_no_nodes() {
948        assert_eq!(compute_avg_degree(0, 0), 0.0);
949    }
950
951    #[test]
952    fn avg_degree_is_zero_when_nodes_but_no_edges() {
953        // Reproduces L1 bug: previously returned 1.0 instead of 0.0.
954        assert_eq!(compute_avg_degree(2, 0), 0.0);
955    }
956
957    #[test]
958    fn avg_degree_is_two_when_triangle() {
959        // 3 nodes, 3 edges: 2 * 3 / 3 = 2.0
960        assert_eq!(compute_avg_degree(3, 3), 2.0);
961    }
962
963    #[test]
964    fn graph_entities_response_serializes_required_fields() {
965        let resp = GraphEntitiesResponse {
966            entities: vec![EntityItem {
967                id: 1,
968                name: "claude-code".to_string(),
969                entity_type: "agent".to_string(),
970                namespace: "global".to_string(),
971                created_at: "2026-01-01T00:00:00Z".to_string(),
972                degree: 0,
973                description: None,
974            }],
975            total_count: 1,
976            limit: 50,
977            offset: 0,
978            namespace: Some("global".to_string()),
979            elapsed_ms: 3,
980        };
981        let json = serde_json::to_value(&resp).unwrap();
982        assert!(json["entities"].is_array());
983        assert_eq!(json["entities"][0]["name"], "claude-code");
984        assert_eq!(json["entities"][0]["entity_type"], "agent");
985        assert_eq!(json["total_count"], 1);
986        assert_eq!(json["limit"], 50);
987        assert_eq!(json["offset"], 0);
988        assert_eq!(json["namespace"], "global");
989    }
990
991    #[test]
992    fn entity_item_serializes_all_fields() {
993        let item = EntityItem {
994            id: 42,
995            name: "test-entity".to_string(),
996            entity_type: "concept".to_string(),
997            namespace: "project-a".to_string(),
998            created_at: "2026-04-19T12:00:00Z".to_string(),
999            degree: 3,
1000            description: Some("test description".to_string()),
1001        };
1002        let json = serde_json::to_value(&item).unwrap();
1003        assert_eq!(json["id"], 42);
1004        assert_eq!(json["name"], "test-entity");
1005        assert_eq!(json["entity_type"], "concept");
1006        assert_eq!(json["namespace"], "project-a");
1007        assert_eq!(json["created_at"], "2026-04-19T12:00:00Z");
1008    }
1009
1010    #[test]
1011    fn entity_item_entity_type_is_never_null() {
1012        // P2-C: entity_type must never be null, even when DB column is empty.
1013        let item = EntityItem {
1014            id: 1,
1015            name: "sem-tipo".to_string(),
1016            entity_type: String::new(),
1017            namespace: "ns".to_string(),
1018            created_at: "2026-01-01T00:00:00Z".to_string(),
1019            degree: 0,
1020            description: None,
1021        };
1022        let json = serde_json::to_value(&item).unwrap();
1023        assert!(
1024            !json["entity_type"].is_null(),
1025            "entity_type must not be null"
1026        );
1027        assert!(json["entity_type"].is_string());
1028    }
1029
1030    #[test]
1031    fn graph_traverse_cli_rejects_format_dot() {
1032        let parsed = Cli::try_parse_from([
1033            "sqlite-graphrag",
1034            "graph",
1035            "traverse",
1036            "--from",
1037            "AuthDecision",
1038            "--format",
1039            "dot",
1040        ]);
1041        assert!(parsed.is_err(), "graph traverse must reject format=dot");
1042    }
1043
1044    #[test]
1045    fn graph_stats_cli_accepts_format_text() {
1046        let parsed = Cli::try_parse_from(["sqlite-graphrag", "graph", "stats", "--format", "text"])
1047            .expect("graph stats --format text must be accepted");
1048
1049        match parsed.command {
1050            Some(Commands::Graph(args)) => match args.subcommand {
1051                Some(GraphSubcommand::Stats(stats)) => {
1052                    assert_eq!(stats.format, GraphStatsFormat::Text);
1053                }
1054                _ => unreachable!("unexpected subcommand"),
1055            },
1056            _ => unreachable!("unexpected command"),
1057        }
1058    }
1059
1060    #[test]
1061    fn graph_stats_cli_rejects_format_mermaid() {
1062        let parsed =
1063            Cli::try_parse_from(["sqlite-graphrag", "graph", "stats", "--format", "mermaid"]);
1064        assert!(parsed.is_err(), "graph stats must reject format=mermaid");
1065    }
1066
1067    #[test]
1068    fn graph_entities_response_has_no_items_key() {
1069        let resp = GraphEntitiesResponse {
1070            entities: vec![],
1071            total_count: 0,
1072            limit: 50,
1073            offset: 0,
1074            namespace: None,
1075            elapsed_ms: 0,
1076        };
1077        let json = serde_json::to_value(&resp).unwrap();
1078        assert!(
1079            json.get("items").is_none(),
1080            "legacy 'items' key must not appear"
1081        );
1082        assert!(
1083            json.get("entities").is_some(),
1084            "'entities' key must be present"
1085        );
1086    }
1087
1088    #[test]
1089    fn build_order_by_defaults_to_name_asc() {
1090        let clause = build_order_by(None, SortOrder::Asc);
1091        assert_eq!(clause, "ORDER BY e.name ASC");
1092    }
1093
1094    #[test]
1095    fn build_order_by_name_desc() {
1096        let clause = build_order_by(Some(EntitySortField::Name), SortOrder::Desc);
1097        assert_eq!(clause, "ORDER BY e.name DESC");
1098    }
1099
1100    #[test]
1101    fn build_order_by_degree_desc() {
1102        let clause = build_order_by(Some(EntitySortField::Degree), SortOrder::Desc);
1103        assert_eq!(clause, "ORDER BY degree DESC");
1104    }
1105
1106    #[test]
1107    fn build_order_by_degree_asc() {
1108        let clause = build_order_by(Some(EntitySortField::Degree), SortOrder::Asc);
1109        assert_eq!(clause, "ORDER BY degree ASC");
1110    }
1111
1112    #[test]
1113    fn build_order_by_created_at_asc() {
1114        let clause = build_order_by(Some(EntitySortField::CreatedAt), SortOrder::Asc);
1115        assert_eq!(clause, "ORDER BY e.created_at ASC");
1116    }
1117
1118    #[test]
1119    fn build_order_by_created_at_desc() {
1120        let clause = build_order_by(Some(EntitySortField::CreatedAt), SortOrder::Desc);
1121        assert_eq!(clause, "ORDER BY e.created_at DESC");
1122    }
1123
1124    #[test]
1125    fn graph_entities_cli_accepts_sort_by_degree_desc() {
1126        let parsed = Cli::try_parse_from([
1127            "sqlite-graphrag",
1128            "graph",
1129            "entities",
1130            "--sort-by",
1131            "degree",
1132            "--order",
1133            "desc",
1134        ])
1135        .expect("graph entities --sort-by degree --order desc must parse");
1136        match parsed.command {
1137            Some(Commands::Graph(args)) => match args.subcommand {
1138                Some(GraphSubcommand::Entities(e)) => {
1139                    assert!(matches!(e.sort_by, Some(EntitySortField::Degree)));
1140                    assert!(matches!(e.order, SortOrder::Desc));
1141                }
1142                _ => unreachable!("unexpected subcommand"),
1143            },
1144            _ => unreachable!("unexpected command"),
1145        }
1146    }
1147
1148    #[test]
1149    fn graph_entities_cli_accepts_sort_by_created_at_asc() {
1150        let parsed = Cli::try_parse_from([
1151            "sqlite-graphrag",
1152            "graph",
1153            "entities",
1154            "--sort-by",
1155            "created-at",
1156        ])
1157        .expect("graph entities --sort-by created-at must parse");
1158        match parsed.command {
1159            Some(Commands::Graph(args)) => match args.subcommand {
1160                Some(GraphSubcommand::Entities(e)) => {
1161                    assert!(matches!(e.sort_by, Some(EntitySortField::CreatedAt)));
1162                    assert!(matches!(e.order, SortOrder::Asc));
1163                }
1164                _ => unreachable!("unexpected subcommand"),
1165            },
1166            _ => unreachable!("unexpected command"),
1167        }
1168    }
1169
1170    #[test]
1171    fn graph_entities_cli_defaults_to_no_sort_by() {
1172        let parsed = Cli::try_parse_from(["sqlite-graphrag", "graph", "entities"])
1173            .expect("graph entities must parse without sort flags");
1174        match parsed.command {
1175            Some(Commands::Graph(args)) => match args.subcommand {
1176                Some(GraphSubcommand::Entities(e)) => {
1177                    assert!(e.sort_by.is_none(), "sort_by must default to None");
1178                    assert!(
1179                        matches!(e.order, SortOrder::Asc),
1180                        "order must default to Asc"
1181                    );
1182                }
1183                _ => unreachable!("unexpected subcommand"),
1184            },
1185            _ => unreachable!("unexpected command"),
1186        }
1187    }
1188}