Skip to main content

sqlite_graphrag/commands/
graph_export.rs

1use crate::cli::GraphExportFormat;
2use crate::errors::AppError;
3use crate::i18n::erros;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use crate::storage::entities;
8use serde::Serialize;
9use std::collections::HashMap;
10use std::fs;
11use std::path::PathBuf;
12use std::time::Instant;
13
14/// Sub-subcomandos opcionais. Quando ausente, o comportamento padrão exporta
15/// o snapshot completo de entidades (compatível com versões anteriores).
16#[derive(clap::Subcommand)]
17pub enum GraphSubcommand {
18    /// Traverse relationships from a starting entity using BFS
19    Traverse(GraphTraverseArgs),
20    /// Show graph statistics (node/edge counts, degree distribution)
21    Stats(GraphStatsArgs),
22    /// List entities stored in the graph with optional filters
23    Entities(GraphEntitiesArgs),
24}
25
26#[derive(clap::Args)]
27pub struct GraphArgs {
28    /// Subcomando opcional; sem subcomando exporta snapshot de entidades.
29    #[command(subcommand)]
30    pub subcommand: Option<GraphSubcommand>,
31    /// Filtra por namespace (padrão: todos).
32    #[arg(long)]
33    pub namespace: Option<String>,
34    /// Formato de saída do snapshot.
35    #[arg(long, value_enum, default_value = "json")]
36    pub format: GraphExportFormat,
37    /// Caminho de arquivo para gravar a saída (em vez de stdout).
38    #[arg(long)]
39    pub output: Option<PathBuf>,
40    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
41    pub json: bool,
42    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
43    pub db: Option<String>,
44}
45
46#[derive(clap::Args)]
47pub struct GraphTraverseArgs {
48    /// Nome da entidade de origem para a travessia
49    #[arg(long)]
50    pub from: String,
51    /// Profundidade máxima de travessia (default: 2)
52    #[arg(long, default_value_t = 2u32)]
53    pub depth: u32,
54    #[arg(long)]
55    pub namespace: Option<String>,
56    #[arg(long, value_enum, default_value = "json")]
57    pub format: GraphExportFormat,
58    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
59    pub json: bool,
60    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
61    pub db: Option<String>,
62}
63
64#[derive(clap::Args)]
65pub struct GraphStatsArgs {
66    #[arg(long)]
67    pub namespace: Option<String>,
68    /// Formato de saída (json ou text).
69    #[arg(long, value_enum, default_value = "json")]
70    pub format: GraphExportFormat,
71    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
72    pub json: bool,
73    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
74    pub db: Option<String>,
75}
76
77#[derive(clap::Args)]
78pub struct GraphEntitiesArgs {
79    #[arg(long)]
80    pub namespace: Option<String>,
81    /// Filtrar por tipo de entidade (ex: person, concept, agent).
82    #[arg(long)]
83    pub entity_type: Option<String>,
84    /// Número máximo de resultados a retornar.
85    #[arg(long, default_value_t = crate::constants::K_GRAPH_ENTITIES_DEFAULT_LIMIT)]
86    pub limit: usize,
87    /// Número de resultados a pular (paginação).
88    #[arg(long, default_value_t = 0usize)]
89    pub offset: usize,
90    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
91    pub json: bool,
92    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
93    pub db: Option<String>,
94}
95
96#[derive(Serialize)]
97struct NodeOut {
98    id: i64,
99    name: String,
100    namespace: String,
101    kind: String,
102    /// Duplicata de `kind` para compatibilidade com docs que usam `type`.
103    #[serde(rename = "type")]
104    r#type: String,
105}
106
107#[derive(Serialize)]
108struct EdgeOut {
109    from: String,
110    to: String,
111    relation: String,
112    weight: f64,
113}
114
115#[derive(Serialize)]
116struct GraphSnapshot {
117    nodes: Vec<NodeOut>,
118    edges: Vec<EdgeOut>,
119    elapsed_ms: u64,
120}
121
122#[derive(Serialize)]
123struct TraverseHop {
124    entity: String,
125    relation: String,
126    direction: String,
127    weight: f64,
128    depth: u32,
129}
130
131#[derive(Serialize)]
132struct GraphTraverseResponse {
133    from: String,
134    namespace: String,
135    depth: u32,
136    hops: Vec<TraverseHop>,
137    elapsed_ms: u64,
138}
139
140#[derive(Serialize)]
141struct GraphStatsResponse {
142    namespace: Option<String>,
143    node_count: i64,
144    edge_count: i64,
145    avg_degree: f64,
146    max_degree: i64,
147    elapsed_ms: u64,
148}
149
150#[derive(Serialize)]
151struct EntityItem {
152    id: i64,
153    name: String,
154    entity_type: String,
155    namespace: String,
156    created_at: String,
157}
158
159#[derive(Serialize)]
160struct GraphEntitiesResponse {
161    items: Vec<EntityItem>,
162    total_count: i64,
163    limit: usize,
164    offset: usize,
165    namespace: Option<String>,
166    elapsed_ms: u64,
167}
168
169pub fn run(args: GraphArgs) -> Result<(), AppError> {
170    match args.subcommand {
171        None => run_entities_snapshot(
172            args.db.as_deref(),
173            args.namespace.as_deref(),
174            args.format,
175            args.output.as_deref(),
176        ),
177        Some(GraphSubcommand::Traverse(a)) => run_traverse(a),
178        Some(GraphSubcommand::Stats(a)) => run_stats(a),
179        Some(GraphSubcommand::Entities(a)) => run_entities(a),
180    }
181}
182
183fn run_entities_snapshot(
184    db: Option<&str>,
185    namespace: Option<&str>,
186    format: GraphExportFormat,
187    output_path: Option<&std::path::Path>,
188) -> Result<(), AppError> {
189    let inicio = Instant::now();
190    let paths = AppPaths::resolve(db)?;
191
192    if !paths.db.exists() {
193        return Err(AppError::NotFound(erros::banco_nao_encontrado(
194            &paths.db.display().to_string(),
195        )));
196    }
197
198    let conn = open_ro(&paths.db)?;
199
200    let nodes_raw = entities::list_entities(&conn, namespace)?;
201    let edges_raw = entities::list_relationships_by_namespace(&conn, namespace)?;
202
203    let id_to_name: HashMap<i64, String> =
204        nodes_raw.iter().map(|n| (n.id, n.name.clone())).collect();
205
206    let nodes: Vec<NodeOut> = nodes_raw
207        .into_iter()
208        .map(|n| NodeOut {
209            id: n.id,
210            name: n.name,
211            namespace: n.namespace,
212            r#type: n.kind.clone(),
213            kind: n.kind,
214        })
215        .collect();
216
217    let mut edges: Vec<EdgeOut> = Vec::with_capacity(edges_raw.len());
218    for r in edges_raw {
219        let from = match id_to_name.get(&r.source_id) {
220            Some(n) => n.clone(),
221            None => continue,
222        };
223        let to = match id_to_name.get(&r.target_id) {
224            Some(n) => n.clone(),
225            None => continue,
226        };
227        edges.push(EdgeOut {
228            from,
229            to,
230            relation: r.relation,
231            weight: r.weight,
232        });
233    }
234
235    let rendered = match format {
236        GraphExportFormat::Json => render_json(&GraphSnapshot {
237            nodes,
238            edges,
239            elapsed_ms: inicio.elapsed().as_millis() as u64,
240        })?,
241        GraphExportFormat::Dot => render_dot(&nodes, &edges),
242        GraphExportFormat::Mermaid => render_mermaid(&nodes, &edges),
243    };
244
245    if let Some(path) = output_path {
246        fs::write(path, &rendered)?;
247        output::emit_progress(&format!("wrote {}", path.display()));
248    } else {
249        output::emit_text(&rendered);
250    }
251
252    Ok(())
253}
254
255fn run_traverse(args: GraphTraverseArgs) -> Result<(), AppError> {
256    let inicio = Instant::now();
257    let paths = AppPaths::resolve(args.db.as_deref())?;
258
259    if !paths.db.exists() {
260        return Err(AppError::NotFound(erros::banco_nao_encontrado(
261            &paths.db.display().to_string(),
262        )));
263    }
264
265    let conn = open_ro(&paths.db)?;
266    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
267
268    let from_id = entities::find_entity_id(&conn, &namespace, &args.from)?
269        .ok_or_else(|| AppError::NotFound(format!("entity '{}' not found", args.from)))?;
270
271    let all_rels = entities::list_relationships_by_namespace(&conn, Some(&namespace))?;
272    let all_entities = entities::list_entities(&conn, Some(&namespace))?;
273    let id_to_name: HashMap<i64, String> = all_entities
274        .iter()
275        .map(|e| (e.id, e.name.clone()))
276        .collect();
277
278    let mut hops: Vec<TraverseHop> = Vec::new();
279    let mut visited: std::collections::HashSet<i64> = std::collections::HashSet::new();
280    let mut frontier: Vec<(i64, u32)> = vec![(from_id, 0)];
281
282    while let Some((current_id, current_depth)) = frontier.pop() {
283        if current_depth >= args.depth || visited.contains(&current_id) {
284            continue;
285        }
286        visited.insert(current_id);
287
288        for rel in &all_rels {
289            if rel.source_id == current_id {
290                if let Some(target_name) = id_to_name.get(&rel.target_id) {
291                    hops.push(TraverseHop {
292                        entity: target_name.clone(),
293                        relation: rel.relation.clone(),
294                        direction: "outbound".to_string(),
295                        weight: rel.weight,
296                        depth: current_depth + 1,
297                    });
298                    frontier.push((rel.target_id, current_depth + 1));
299                }
300            } else if rel.target_id == current_id {
301                if let Some(source_name) = id_to_name.get(&rel.source_id) {
302                    hops.push(TraverseHop {
303                        entity: source_name.clone(),
304                        relation: rel.relation.clone(),
305                        direction: "inbound".to_string(),
306                        weight: rel.weight,
307                        depth: current_depth + 1,
308                    });
309                    frontier.push((rel.source_id, current_depth + 1));
310                }
311            }
312        }
313    }
314
315    output::emit_json(&GraphTraverseResponse {
316        from: args.from,
317        namespace,
318        depth: args.depth,
319        hops,
320        elapsed_ms: inicio.elapsed().as_millis() as u64,
321    })?;
322
323    Ok(())
324}
325
326fn run_stats(args: GraphStatsArgs) -> Result<(), AppError> {
327    let inicio = Instant::now();
328    let paths = AppPaths::resolve(args.db.as_deref())?;
329
330    if !paths.db.exists() {
331        return Err(AppError::NotFound(erros::banco_nao_encontrado(
332            &paths.db.display().to_string(),
333        )));
334    }
335
336    let conn = open_ro(&paths.db)?;
337    let ns = args.namespace.as_deref();
338
339    let node_count: i64 = if let Some(n) = ns {
340        conn.query_row(
341            "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
342            rusqlite::params![n],
343            |r| r.get(0),
344        )?
345    } else {
346        conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?
347    };
348
349    let edge_count: i64 = if let Some(n) = ns {
350        conn.query_row(
351            "SELECT COUNT(*) FROM relationships r
352             JOIN entities s ON s.id = r.source_id
353             WHERE s.namespace = ?1",
354            rusqlite::params![n],
355            |r| r.get(0),
356        )?
357    } else {
358        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?
359    };
360
361    let (avg_degree, max_degree): (f64, i64) = if let Some(n) = ns {
362        conn.query_row(
363            "SELECT COALESCE(AVG(degree), 0.0), COALESCE(MAX(degree), 0) FROM entities WHERE namespace = ?1",
364            rusqlite::params![n],
365            |r| Ok((r.get::<_, f64>(0)?, r.get::<_, i64>(1)?)),
366        )?
367    } else {
368        conn.query_row(
369            "SELECT COALESCE(AVG(degree), 0.0), COALESCE(MAX(degree), 0) FROM entities",
370            [],
371            |r| Ok((r.get::<_, f64>(0)?, r.get::<_, i64>(1)?)),
372        )?
373    };
374
375    let resp = GraphStatsResponse {
376        namespace: args.namespace,
377        node_count,
378        edge_count,
379        avg_degree,
380        max_degree,
381        elapsed_ms: inicio.elapsed().as_millis() as u64,
382    };
383
384    match args.format {
385        GraphExportFormat::Json => output::emit_json(&resp)?,
386        GraphExportFormat::Dot | GraphExportFormat::Mermaid => {
387            output::emit_text(&format!(
388                "nodes={} edges={} avg_degree={:.2} max_degree={} namespace={}",
389                resp.node_count,
390                resp.edge_count,
391                resp.avg_degree,
392                resp.max_degree,
393                resp.namespace.as_deref().unwrap_or("all"),
394            ));
395        }
396    }
397
398    Ok(())
399}
400
401fn run_entities(args: GraphEntitiesArgs) -> Result<(), AppError> {
402    let inicio = Instant::now();
403    let paths = AppPaths::resolve(args.db.as_deref())?;
404
405    if !paths.db.exists() {
406        return Err(AppError::NotFound(erros::banco_nao_encontrado(
407            &paths.db.display().to_string(),
408        )));
409    }
410
411    let conn = open_ro(&paths.db)?;
412
413    let row_to_item = |r: &rusqlite::Row<'_>| -> rusqlite::Result<EntityItem> {
414        let ts: i64 = r.get(4)?;
415        let created_at = chrono::DateTime::from_timestamp(ts, 0)
416            .unwrap_or_default()
417            .format("%Y-%m-%dT%H:%M:%SZ")
418            .to_string();
419        Ok(EntityItem {
420            id: r.get(0)?,
421            name: r.get(1)?,
422            entity_type: r.get(2)?,
423            namespace: r.get(3)?,
424            created_at,
425        })
426    };
427
428    let limit_i = args.limit as i64;
429    let offset_i = args.offset as i64;
430
431    let (total_count, items) = match (args.namespace.as_deref(), args.entity_type.as_deref()) {
432        (Some(ns), Some(et)) => {
433            let count: i64 = conn.query_row(
434                "SELECT COUNT(*) FROM entities WHERE namespace = ?1 AND type = ?2",
435                rusqlite::params![ns, et],
436                |r| r.get(0),
437            )?;
438            let mut stmt = conn.prepare(
439                "SELECT id, name, type, namespace, created_at FROM entities
440                 WHERE namespace = ?1 AND type = ?2
441                 ORDER BY name ASC LIMIT ?3 OFFSET ?4",
442            )?;
443            let rows = stmt
444                .query_map(rusqlite::params![ns, et, limit_i, offset_i], row_to_item)?
445                .collect::<rusqlite::Result<Vec<_>>>()?;
446            (count, rows)
447        }
448        (Some(ns), None) => {
449            let count: i64 = conn.query_row(
450                "SELECT COUNT(*) FROM entities WHERE namespace = ?1",
451                rusqlite::params![ns],
452                |r| r.get(0),
453            )?;
454            let mut stmt = conn.prepare(
455                "SELECT id, name, type, namespace, created_at FROM entities
456                 WHERE namespace = ?1
457                 ORDER BY name ASC LIMIT ?2 OFFSET ?3",
458            )?;
459            let rows = stmt
460                .query_map(rusqlite::params![ns, limit_i, offset_i], row_to_item)?
461                .collect::<rusqlite::Result<Vec<_>>>()?;
462            (count, rows)
463        }
464        (None, Some(et)) => {
465            let count: i64 = conn.query_row(
466                "SELECT COUNT(*) FROM entities WHERE type = ?1",
467                rusqlite::params![et],
468                |r| r.get(0),
469            )?;
470            let mut stmt = conn.prepare(
471                "SELECT id, name, type, namespace, created_at FROM entities
472                 WHERE type = ?1
473                 ORDER BY name ASC LIMIT ?2 OFFSET ?3",
474            )?;
475            let rows = stmt
476                .query_map(rusqlite::params![et, limit_i, offset_i], row_to_item)?
477                .collect::<rusqlite::Result<Vec<_>>>()?;
478            (count, rows)
479        }
480        (None, None) => {
481            let count: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
482            let mut stmt = conn.prepare(
483                "SELECT id, name, type, namespace, created_at FROM entities
484                 ORDER BY name ASC LIMIT ?1 OFFSET ?2",
485            )?;
486            let rows = stmt
487                .query_map(rusqlite::params![limit_i, offset_i], row_to_item)?
488                .collect::<rusqlite::Result<Vec<_>>>()?;
489            (count, rows)
490        }
491    };
492
493    output::emit_json(&GraphEntitiesResponse {
494        items,
495        total_count,
496        limit: args.limit,
497        offset: args.offset,
498        namespace: args.namespace,
499        elapsed_ms: inicio.elapsed().as_millis() as u64,
500    })
501}
502
503fn render_json(snapshot: &GraphSnapshot) -> Result<String, AppError> {
504    Ok(serde_json::to_string_pretty(snapshot)?)
505}
506
507fn sanitize_dot_id(raw: &str) -> String {
508    raw.chars()
509        .map(|c| {
510            if c.is_ascii_alphanumeric() || c == '_' {
511                c
512            } else {
513                '_'
514            }
515        })
516        .collect()
517}
518
519fn render_dot(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
520    let mut out = String::new();
521    out.push_str("digraph sqlite-graphrag {\n");
522    for node in nodes {
523        let node_id = sanitize_dot_id(&node.name);
524        let escaped = node.name.replace('"', "\\\"");
525        out.push_str(&format!("  {node_id} [label=\"{escaped}\"];\n"));
526    }
527    for edge in edges {
528        let from = sanitize_dot_id(&edge.from);
529        let to = sanitize_dot_id(&edge.to);
530        let label = edge.relation.replace('"', "\\\"");
531        out.push_str(&format!("  {from} -> {to} [label=\"{label}\"];\n"));
532    }
533    out.push_str("}\n");
534    out
535}
536
537fn sanitize_mermaid_id(raw: &str) -> String {
538    raw.chars()
539        .map(|c| {
540            if c.is_ascii_alphanumeric() || c == '_' {
541                c
542            } else {
543                '_'
544            }
545        })
546        .collect()
547}
548
549fn render_mermaid(nodes: &[NodeOut], edges: &[EdgeOut]) -> String {
550    let mut out = String::new();
551    out.push_str("graph LR\n");
552    for node in nodes {
553        let id = sanitize_mermaid_id(&node.name);
554        let escaped = node.name.replace('"', "\\\"");
555        out.push_str(&format!("  {id}[\"{escaped}\"]\n"));
556    }
557    for edge in edges {
558        let from = sanitize_mermaid_id(&edge.from);
559        let to = sanitize_mermaid_id(&edge.to);
560        let label = edge.relation.replace('|', "\\|");
561        out.push_str(&format!("  {from} -->|{label}| {to}\n"));
562    }
563    out
564}
565
566#[cfg(test)]
567mod testes {
568    use super::*;
569
570    fn cria_node(kind: &str) -> NodeOut {
571        NodeOut {
572            id: 1,
573            name: "entidade-teste".to_string(),
574            namespace: "default".to_string(),
575            kind: kind.to_string(),
576            r#type: kind.to_string(),
577        }
578    }
579
580    #[test]
581    fn node_out_type_duplica_kind() {
582        let node = cria_node("agent");
583        let json = serde_json::to_value(&node).expect("serialização deve funcionar");
584        assert_eq!(json["kind"], json["type"]);
585        assert_eq!(json["kind"], "agent");
586        assert_eq!(json["type"], "agent");
587    }
588
589    #[test]
590    fn node_out_serializa_todos_campos() {
591        let node = cria_node("document");
592        let json = serde_json::to_value(&node).expect("serialização deve funcionar");
593        assert!(json.get("id").is_some());
594        assert!(json.get("name").is_some());
595        assert!(json.get("namespace").is_some());
596        assert!(json.get("kind").is_some());
597        assert!(json.get("type").is_some());
598    }
599
600    #[test]
601    fn graph_snapshot_serializa_nodes_com_type() {
602        let node = cria_node("concept");
603        let snapshot = GraphSnapshot {
604            nodes: vec![node],
605            edges: vec![],
606            elapsed_ms: 0,
607        };
608        let json_str = render_json(&snapshot).expect("renderização deve funcionar");
609        let json: serde_json::Value = serde_json::from_str(&json_str).expect("json válido");
610        let primeiro_node = &json["nodes"][0];
611        assert_eq!(primeiro_node["kind"], primeiro_node["type"]);
612        assert_eq!(primeiro_node["type"], "concept");
613    }
614
615    #[test]
616    fn graph_traverse_response_serializa_corretamente() {
617        let resp = GraphTraverseResponse {
618            from: "entity-a".to_string(),
619            namespace: "global".to_string(),
620            depth: 2,
621            hops: vec![TraverseHop {
622                entity: "entity-b".to_string(),
623                relation: "uses".to_string(),
624                direction: "outbound".to_string(),
625                weight: 1.0,
626                depth: 1,
627            }],
628            elapsed_ms: 5,
629        };
630        let json = serde_json::to_value(&resp).unwrap();
631        assert_eq!(json["from"], "entity-a");
632        assert_eq!(json["depth"], 2);
633        assert!(json["hops"].is_array());
634        assert_eq!(json["hops"][0]["direction"], "outbound");
635    }
636
637    #[test]
638    fn graph_stats_response_serializa_corretamente() {
639        let resp = GraphStatsResponse {
640            namespace: Some("global".to_string()),
641            node_count: 10,
642            edge_count: 15,
643            avg_degree: 3.0,
644            max_degree: 7,
645            elapsed_ms: 2,
646        };
647        let json = serde_json::to_value(&resp).unwrap();
648        assert_eq!(json["node_count"], 10);
649        assert_eq!(json["edge_count"], 15);
650        assert_eq!(json["avg_degree"], 3.0);
651        assert_eq!(json["max_degree"], 7);
652    }
653
654    #[test]
655    fn graph_entities_response_serializa_campos_obrigatorios() {
656        let resp = GraphEntitiesResponse {
657            items: vec![EntityItem {
658                id: 1,
659                name: "claude-code".to_string(),
660                entity_type: "agent".to_string(),
661                namespace: "global".to_string(),
662                created_at: "2026-01-01T00:00:00Z".to_string(),
663            }],
664            total_count: 1,
665            limit: 50,
666            offset: 0,
667            namespace: Some("global".to_string()),
668            elapsed_ms: 3,
669        };
670        let json = serde_json::to_value(&resp).unwrap();
671        assert!(json["items"].is_array());
672        assert_eq!(json["items"][0]["name"], "claude-code");
673        assert_eq!(json["items"][0]["entity_type"], "agent");
674        assert_eq!(json["total_count"], 1);
675        assert_eq!(json["limit"], 50);
676        assert_eq!(json["offset"], 0);
677        assert_eq!(json["namespace"], "global");
678    }
679
680    #[test]
681    fn entity_item_serializa_todos_campos() {
682        let item = EntityItem {
683            id: 42,
684            name: "test-entity".to_string(),
685            entity_type: "concept".to_string(),
686            namespace: "project-a".to_string(),
687            created_at: "2026-04-19T12:00:00Z".to_string(),
688        };
689        let json = serde_json::to_value(&item).unwrap();
690        assert_eq!(json["id"], 42);
691        assert_eq!(json["name"], "test-entity");
692        assert_eq!(json["entity_type"], "concept");
693        assert_eq!(json["namespace"], "project-a");
694        assert_eq!(json["created_at"], "2026-04-19T12:00:00Z");
695    }
696}