Skip to main content

sqlite_graphrag/commands/
read.rs

1//! Handler for the `read` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use crate::storage::memories;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Read a memory by name (positional)\n  \
13    sqlite-graphrag read onboarding\n\n  \
14    # Read using the named flag form\n  \
15    sqlite-graphrag read --name onboarding\n\n  \
16    # Read by memory ID (integer emitted in JSON output of most commands)\n  \
17    sqlite-graphrag read --id 42 --json\n\n  \
18    # Read from a specific namespace\n  \
19    sqlite-graphrag read onboarding --namespace my-project")]
20pub struct ReadArgs {
21    /// Memory name as a positional argument. Alternative to `--name`.
22    #[arg(
23        value_name = "NAME",
24        conflicts_with = "name",
25        help = "Memory name (kebab-case slug); alternative to --name"
26    )]
27    pub name_positional: Option<String>,
28    /// Memory name to read. Returns NotFound (exit 4) if missing or soft-deleted.
29    #[arg(long)]
30    pub name: Option<String>,
31    /// Memory ID (integer) for direct lookup. Conflicts with --name and positional NAME.
32    #[arg(
33        long,
34        conflicts_with_all = ["name", "name_positional"],
35        help = "Memory ID (integer) for direct lookup"
36    )]
37    pub id: Option<i64>,
38    #[arg(
39        long,
40        help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
41    )]
42    pub namespace: Option<String>,
43    /// Include linked entities and relationships in the response.
44    #[arg(
45        long,
46        help = "Include graph context (entities + relationships) in response"
47    )]
48    pub with_graph: bool,
49    /// Output format: `json` (default, full envelope) or `raw` (the pure memory
50    /// body to stdout, no JSON wrapper). GAP-SG-50: `raw` lets the body be piped
51    /// without a `jaq -r '.body'` round-trip.
52    #[arg(
53        long,
54        value_enum,
55        default_value_t = ReadFormat::Json,
56        help = "Output format: json (default) or raw (pure body to stdout)"
57    )]
58    pub format: ReadFormat,
59    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
60    pub json: bool,
61    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
62    pub db: Option<String>,
63}
64
65/// GAP-SG-50: output format for `read`. `Raw` emits the pure body; `Json`
66/// emits the full structured envelope.
67#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum, Default)]
68#[value(rename_all = "lowercase")]
69pub enum ReadFormat {
70    #[default]
71    Json,
72    Raw,
73}
74
75#[derive(Serialize)]
76struct ReadResponse {
77    /// Canonical storage field. Preserved for compatibility with v2.0.0 clients.
78    id: i64,
79    /// Semantic alias of `id` for the contract documented in SKILL.md.
80    memory_id: i64,
81    namespace: String,
82    name: String,
83    /// Semantic alias of `memory_type` for the documented contract.
84    #[serde(rename = "type")]
85    type_alias: String,
86    memory_type: String,
87    description: String,
88    body: String,
89    body_hash: String,
90    session_id: Option<String>,
91    source: String,
92    metadata: serde_json::Value,
93    /// Most recent memory version, useful for optimistic control via `--expected-updated-at`.
94    version: i64,
95    created_at: i64,
96    /// RFC 3339 UTC timestamp parallel to `created_at` for ISO 8601 parsers.
97    created_at_iso: String,
98    updated_at: i64,
99    /// RFC 3339 UTC timestamp parallel to `updated_at` for ISO 8601 parsers.
100    updated_at_iso: String,
101    /// Linked entities (opt-in via --with-graph).
102    #[serde(skip_serializing_if = "Option::is_none")]
103    entities: Option<Vec<ReadEntityBinding>>,
104    /// Relationships from linked entities (opt-in via --with-graph).
105    #[serde(skip_serializing_if = "Option::is_none")]
106    relationships: Option<Vec<ReadRelationshipBinding>>,
107    /// Total execution time in milliseconds from handler start to serialisation.
108    elapsed_ms: u64,
109}
110
111#[derive(Serialize)]
112struct ReadEntityBinding {
113    entity_id: i64,
114    name: String,
115    entity_type: String,
116}
117
118#[derive(Serialize)]
119struct ReadRelationshipBinding {
120    from: String,
121    to: String,
122    relation: String,
123    weight: f64,
124}
125
126fn epoch_to_iso(epoch: i64) -> String {
127    crate::tz::epoch_to_iso(epoch)
128}
129
130pub fn run(args: ReadArgs) -> Result<(), AppError> {
131    let start = std::time::Instant::now();
132    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
133    let paths = AppPaths::resolve(args.db.as_deref())?;
134    crate::storage::connection::ensure_db_ready(&paths)?;
135    let conn = open_ro(&paths.db)?;
136
137    let row_opt = if let Some(id) = args.id {
138        let r = memories::read_full(&conn, id)?;
139        if let Some(ref row) = r {
140            if row.namespace != namespace {
141                return Err(AppError::NotFound(format!(
142                    "memory id {id} exists but belongs to namespace '{}', not '{namespace}'",
143                    row.namespace
144                )));
145            }
146        }
147        if r.is_none() {
148            // G55 S2: surface the requested id structurally so the message
149            // never drops it for the legacy `unknown` literal.
150            return Err(AppError::MemoryNotFoundById { id });
151        }
152        r
153    } else {
154        let name = args
155            .name_positional
156            .clone()
157            .or(args.name.clone())
158            .ok_or_else(|| {
159                AppError::Validation(
160                "name or --id required: pass name as positional argument, via --name, or use --id"
161                    .to_string(),
162            )
163            })?;
164        memories::read_by_name(&conn, &namespace, &name)?
165    };
166
167    match row_opt {
168        Some(row) => {
169            // GAP-SG-50: `--format raw` emits the pure body and returns early,
170            // before building the JSON envelope. The body is written verbatim so
171            // it can be redirected to a file or piped without parsing.
172            if args.format == ReadFormat::Raw {
173                output::emit_raw(row.body.as_bytes());
174                return Ok(());
175            }
176            // Resolve current version via memory_versions table (highest version for this memory_id).
177            let version: i64 = conn
178                .query_row(
179                    "SELECT COALESCE(MAX(version), 1) FROM memory_versions WHERE memory_id=?1",
180                    rusqlite::params![row.id],
181                    |r| r.get(0),
182                )
183                .unwrap_or(1);
184
185            // G22: optional graph context
186            let (entities, relationships) = if args.with_graph {
187                let mut ent_stmt = conn.prepare_cached(
188                    "SELECT e.id, e.name, e.type FROM memory_entities me \
189                     JOIN entities e ON e.id = me.entity_id \
190                     WHERE me.memory_id = ?1",
191                )?;
192                let ents: Vec<ReadEntityBinding> = ent_stmt
193                    .query_map(rusqlite::params![row.id], |r| {
194                        Ok(ReadEntityBinding {
195                            entity_id: r.get(0)?,
196                            name: r.get(1)?,
197                            entity_type: r.get(2)?,
198                        })
199                    })?
200                    .filter_map(|r| r.ok())
201                    .collect();
202                drop(ent_stmt);
203
204                let entity_ids: Vec<i64> = ents.iter().map(|e| e.entity_id).collect();
205                let rels: Vec<ReadRelationshipBinding> = if !entity_ids.is_empty() {
206                    let placeholders: String = entity_ids
207                        .iter()
208                        .map(|id| id.to_string())
209                        .collect::<Vec<_>>()
210                        .join(",");
211                    let sql = format!(
212                        "SELECT e1.name, e2.name, r.relation, r.weight \
213                         FROM relationships r \
214                         JOIN entities e1 ON e1.id = r.source_id \
215                         JOIN entities e2 ON e2.id = r.target_id \
216                         WHERE r.source_id IN ({placeholders}) OR r.target_id IN ({placeholders})"
217                    );
218                    let mut rel_stmt = conn.prepare(&sql)?;
219                    let result: Vec<ReadRelationshipBinding> = rel_stmt
220                        .query_map([], |r| {
221                            Ok(ReadRelationshipBinding {
222                                from: r.get(0)?,
223                                to: r.get(1)?,
224                                relation: r.get(2)?,
225                                weight: r.get(3)?,
226                            })
227                        })?
228                        .filter_map(|r| r.ok())
229                        .collect();
230                    drop(rel_stmt);
231                    result
232                } else {
233                    vec![]
234                };
235                (Some(ents), Some(rels))
236            } else {
237                (None, None)
238            };
239
240            let response = ReadResponse {
241                id: row.id,
242                memory_id: row.id,
243                namespace: row.namespace,
244                name: row.name,
245                type_alias: row.memory_type.clone(),
246                memory_type: row.memory_type,
247                description: row.description,
248                body: row.body,
249                body_hash: row.body_hash,
250                session_id: row.session_id,
251                source: row.source,
252                metadata: serde_json::from_str::<serde_json::Value>(&row.metadata)
253                    .unwrap_or(serde_json::Value::Null),
254                version,
255                created_at: row.created_at,
256                created_at_iso: epoch_to_iso(row.created_at),
257                updated_at: row.updated_at,
258                updated_at_iso: epoch_to_iso(row.updated_at),
259                entities,
260                relationships,
261                elapsed_ms: start.elapsed().as_millis() as u64,
262            };
263            output::emit_json(&response)?;
264        }
265        None => {
266            // G55 S2: when the lookup target is a name, use the structural
267            // `MemoryNotFound { name, namespace }` variant so the message is
268            // guaranteed to carry the requested identifier. The legacy
269            // `NotFound(String)` path is only reached via the `--id` branch
270            // (which now emits `MemoryNotFoundById` structurally a few lines
271            // above) or when a future caller needs ad-hoc messages.
272            if let Some(name) = args.name_positional.as_deref().or(args.name.as_deref()) {
273                return Err(AppError::MemoryNotFound {
274                    name: name.to_string(),
275                    namespace: namespace.clone(),
276                });
277            }
278            // Fallback: id lookup that did not match (defensive — the
279            // MemoryNotFoundById branch above already returned in the
280            // normal id-miss path).
281            if let Some(id) = args.id {
282                return Err(AppError::MemoryNotFoundById { id });
283            }
284            // Unreachable: the `else` branch above already validated that
285            // one of name/id is set. Keep a defensive message for future
286            // refactors that may restructure the lookup arms.
287            return Err(AppError::Validation(
288                "internal: read reached NotFound without name or id".to_string(),
289            ));
290        }
291    }
292
293    Ok(())
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    // GAP-SG-50: `read --format raw` must parse to ReadFormat::Raw; default is Json.
301    #[test]
302    fn read_format_flag_parses_raw_and_defaults_json() {
303        use crate::cli::{Cli, Commands};
304        use clap::Parser;
305
306        let raw = Cli::try_parse_from(["sqlite-graphrag", "read", "my-mem", "--format", "raw"])
307            .expect("parse raw");
308        match raw.command {
309            Some(Commands::Read(a)) => assert_eq!(a.format, ReadFormat::Raw),
310            other => panic!("expected read, got {other:?}"),
311        }
312
313        let dflt = Cli::try_parse_from(["sqlite-graphrag", "read", "my-mem"]).expect("parse");
314        match dflt.command {
315            Some(Commands::Read(a)) => assert_eq!(a.format, ReadFormat::Json),
316            other => panic!("expected read, got {other:?}"),
317        }
318    }
319
320    #[test]
321    fn epoch_to_iso_converts_zero_to_unix_epoch() {
322        // v1.0.68 (test fix): parse the ISO back into a DateTime<FixedOffset>
323        // and compare with chrono::DateTime::UNIX_EPOCH so the assertion is
324        // timezone-agnostic.  The previous `starts_with("1970-01-01T00:00:00")`
325        // assertion leaked the global SQLITE_GRAPHRAG_DISPLAY_TZ from sibling
326        // tests in the same process and failed on hosts where the default
327        // timezone is non-UTC.
328        let result = epoch_to_iso(0);
329        let parsed = chrono::DateTime::parse_from_rfc3339(&result)
330            .unwrap_or_else(|e| panic!("epoch_to_iso(0) returned non-RFC3339 `{result}`: {e}"));
331        assert_eq!(
332            parsed.timestamp(),
333            chrono::DateTime::UNIX_EPOCH.timestamp(),
334            "epoch 0 must map to the Unix epoch instant, got: {result}"
335        );
336    }
337
338    #[test]
339    fn epoch_to_iso_converts_known_timestamp() {
340        // v1.0.68 (test fix): 1_705_320_000 = 2024-01-15T12:00:00Z, not
341        // 2024-01-15T00:00:00Z (the previous test asserted the wrong instant).
342        // The fix uses parse + timestamp compare to be timezone-agnostic and
343        // to catch wrong-epoch regressions regardless of host TZ.
344        let result = epoch_to_iso(1_705_320_000);
345        let parsed = chrono::DateTime::parse_from_rfc3339(&result).unwrap_or_else(|e| {
346            panic!("epoch_to_iso(1705320000) returned non-RFC3339 `{result}`: {e}")
347        });
348        let expected = chrono::DateTime::parse_from_rfc3339("2024-01-15T12:00:00+00:00")
349            .expect("static RFC3339 is valid");
350        assert_eq!(
351            parsed.timestamp(),
352            expected.timestamp(),
353            "timestamp 1705320000 must map to 2024-01-15T12:00:00Z, got: {result}"
354        );
355    }
356
357    #[test]
358    fn epoch_to_iso_returns_fallback_for_invalid_negative_epoch() {
359        let result = epoch_to_iso(i64::MIN);
360        assert!(
361            !result.is_empty(),
362            "must return a non-empty string even for invalid epoch"
363        );
364    }
365
366    #[test]
367    fn read_response_serializes_id_and_memory_id_aliases() {
368        let resp = ReadResponse {
369            id: 42,
370            memory_id: 42,
371            namespace: "global".to_string(),
372            name: "my-mem".to_string(),
373            type_alias: "fact".to_string(),
374            memory_type: "fact".to_string(),
375            description: "desc".to_string(),
376            body: "body".to_string(),
377            body_hash: "abc123".to_string(),
378            session_id: None,
379            source: "agent".to_string(),
380            metadata: serde_json::json!({}),
381            version: 1,
382            created_at: 1_705_320_000,
383            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
384            updated_at: 1_705_320_000,
385            updated_at_iso: "2024-01-15T12:00:00Z".to_string(),
386            entities: None,
387            relationships: None,
388            elapsed_ms: 5,
389        };
390
391        let json = serde_json::to_value(&resp).expect("serialization failed");
392        assert_eq!(json["id"], 42);
393        assert_eq!(json["memory_id"], 42);
394        assert_eq!(json["type"], "fact");
395        assert_eq!(json["memory_type"], "fact");
396        assert_eq!(json["elapsed_ms"], 5u64);
397        assert!(
398            json["session_id"].is_null(),
399            "session_id None must serialize as null"
400        );
401        // metadata must serialize as a JSON object, not as an escaped string
402        assert!(
403            json["metadata"].is_object(),
404            "metadata must be a JSON object"
405        );
406    }
407
408    #[test]
409    fn read_response_session_id_some_serializes_string() {
410        let resp = ReadResponse {
411            id: 1,
412            memory_id: 1,
413            namespace: "global".to_string(),
414            name: "mem".to_string(),
415            type_alias: "skill".to_string(),
416            memory_type: "skill".to_string(),
417            description: "d".to_string(),
418            body: "b".to_string(),
419            body_hash: "h".to_string(),
420            session_id: Some("sess-123".to_string()),
421            source: "agent".to_string(),
422            metadata: serde_json::json!({}),
423            version: 2,
424            created_at: 0,
425            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
426            updated_at: 0,
427            updated_at_iso: "1970-01-01T00:00:00Z".to_string(),
428            entities: None,
429            relationships: None,
430            elapsed_ms: 0,
431        };
432
433        let json = serde_json::to_value(&resp).expect("serialization failed");
434        assert_eq!(json["session_id"], "sess-123");
435    }
436
437    #[test]
438    fn read_response_elapsed_ms_is_present() {
439        let resp = ReadResponse {
440            id: 7,
441            memory_id: 7,
442            namespace: "ns".to_string(),
443            name: "n".to_string(),
444            type_alias: "procedure".to_string(),
445            memory_type: "procedure".to_string(),
446            description: "d".to_string(),
447            body: "b".to_string(),
448            body_hash: "h".to_string(),
449            session_id: None,
450            source: "agent".to_string(),
451            metadata: serde_json::json!({}),
452            version: 3,
453            created_at: 1000,
454            created_at_iso: "1970-01-01T00:16:40Z".to_string(),
455            updated_at: 2000,
456            updated_at_iso: "1970-01-01T00:33:20Z".to_string(),
457            entities: None,
458            relationships: None,
459            elapsed_ms: 123,
460        };
461
462        let json = serde_json::to_value(&resp).expect("serialization failed");
463        assert_eq!(json["elapsed_ms"], 123u64);
464        assert!(json["created_at_iso"].is_string());
465        assert!(json["updated_at_iso"].is_string());
466    }
467
468    #[test]
469    fn read_response_metadata_object_not_escaped_string() {
470        // P2-A: metadata must serialize as a JSON object, not as an escaped string.
471        let resp = ReadResponse {
472            id: 3,
473            memory_id: 3,
474            namespace: "ns".to_string(),
475            name: "meta-test".to_string(),
476            type_alias: "fact".to_string(),
477            memory_type: "fact".to_string(),
478            description: "d".to_string(),
479            body: "b".to_string(),
480            body_hash: "h".to_string(),
481            session_id: None,
482            source: "agent".to_string(),
483            metadata: serde_json::json!({"key": "value", "number": 42}),
484            version: 1,
485            created_at: 0,
486            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
487            updated_at: 0,
488            updated_at_iso: "1970-01-01T00:00:00Z".to_string(),
489            entities: None,
490            relationships: None,
491            elapsed_ms: 1,
492        };
493
494        let json = serde_json::to_value(&resp).expect("serialization failed");
495        // Must be object, not a JSON string containing escaped JSON.
496        assert!(json["metadata"].is_object());
497        assert_eq!(json["metadata"]["key"], "value");
498        assert_eq!(json["metadata"]["number"], 42);
499    }
500
501    #[test]
502    fn read_response_metadata_fallback_to_null_for_invalid_json() {
503        // P2-A: fallback when metadata is an invalid string.
504        let raw = "invalid-json{{{";
505        let parsed =
506            serde_json::from_str::<serde_json::Value>(raw).unwrap_or(serde_json::Value::Null);
507        assert!(parsed.is_null());
508    }
509
510    // G55 S2 (v1.0.80): the structural `MemoryNotFound` variant must include
511    // the requested name and namespace in the message — never the legacy
512    // `unknown` literal that masked which lookup target failed.
513    #[test]
514    fn memory_not_found_structural_includes_name_and_namespace() {
515        let err = AppError::MemoryNotFound {
516            name: "atomwrite-projeto-contexto".to_string(),
517            namespace: "global".to_string(),
518        };
519        let msg = err.to_string();
520        assert!(msg.contains("atomwrite-projeto-contexto"), "got: {msg}");
521        assert!(msg.contains("global"), "got: {msg}");
522        assert!(
523            !msg.contains("unknown"),
524            "must not contain 'unknown': {msg}"
525        );
526        assert_eq!(err.exit_code(), 4);
527        assert!(err.is_permanent());
528    }
529
530    #[test]
531    fn memory_not_found_by_id_structural_includes_id() {
532        let err = AppError::MemoryNotFoundById { id: 42 };
533        let msg = err.to_string();
534        assert!(msg.contains("42"), "got: {msg}");
535        assert!(msg.contains("id=42"), "got: {msg}");
536        assert_eq!(err.exit_code(), 4);
537    }
538
539    #[test]
540    fn memory_not_found_pt_br_drops_english_fragments() {
541        // The pt-BR translation must not contain leftover English fragments
542        // like "not found" — that was the original G55 bug.
543        use crate::i18n::Language;
544        let err = AppError::MemoryNotFound {
545            name: "mem-fantasma".to_string(),
546            namespace: "global".to_string(),
547        };
548        let pt = err.localized_message_for(Language::Portuguese);
549        assert!(!pt.contains("not found"), "pt-BR fragment leaked: {pt}");
550        assert!(pt.contains("mem-fantasma"), "name missing in pt: {pt}");
551        assert!(pt.contains("global"), "namespace missing in pt: {pt}");
552    }
553}