Skip to main content

sqlite_graphrag/commands/
read.rs

1//! Handler for the `read` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use crate::storage::memories;
8use serde::Serialize;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Read a memory by name (positional)\n  \
13    sqlite-graphrag read onboarding\n\n  \
14    # Read using the named flag form\n  \
15    sqlite-graphrag read --name onboarding\n\n  \
16    # Read by memory ID (integer emitted in JSON output of most commands)\n  \
17    sqlite-graphrag read --id 42 --json\n\n  \
18    # Read from a specific namespace\n  \
19    sqlite-graphrag read onboarding --namespace my-project")]
20pub struct ReadArgs {
21    /// Memory name as a positional argument. Alternative to `--name`.
22    #[arg(
23        value_name = "NAME",
24        conflicts_with = "name",
25        help = "Memory name (kebab-case slug); alternative to --name"
26    )]
27    pub name_positional: Option<String>,
28    /// Memory name to read. Returns NotFound (exit 4) if missing or soft-deleted.
29    #[arg(long)]
30    pub name: Option<String>,
31    /// Memory ID (integer) for direct lookup. Conflicts with --name and positional NAME.
32    #[arg(
33        long,
34        conflicts_with_all = ["name", "name_positional"],
35        help = "Memory ID (integer) for direct lookup"
36    )]
37    pub id: Option<i64>,
38    #[arg(
39        long,
40        help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
41    )]
42    pub namespace: Option<String>,
43    /// Include linked entities and relationships in the response.
44    #[arg(
45        long,
46        help = "Include graph context (entities + relationships) in response"
47    )]
48    pub with_graph: bool,
49    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
50    pub json: bool,
51    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
52    pub db: Option<String>,
53}
54
55#[derive(Serialize)]
56struct ReadResponse {
57    /// Canonical storage field. Preserved for compatibility with v2.0.0 clients.
58    id: i64,
59    /// Semantic alias of `id` for the contract documented in SKILL.md.
60    memory_id: i64,
61    namespace: String,
62    name: String,
63    /// Semantic alias of `memory_type` for the documented contract.
64    #[serde(rename = "type")]
65    type_alias: String,
66    memory_type: String,
67    description: String,
68    body: String,
69    body_hash: String,
70    session_id: Option<String>,
71    source: String,
72    metadata: serde_json::Value,
73    /// Most recent memory version, useful for optimistic control via `--expected-updated-at`.
74    version: i64,
75    created_at: i64,
76    /// RFC 3339 UTC timestamp parallel to `created_at` for ISO 8601 parsers.
77    created_at_iso: String,
78    updated_at: i64,
79    /// RFC 3339 UTC timestamp parallel to `updated_at` for ISO 8601 parsers.
80    updated_at_iso: String,
81    /// Linked entities (opt-in via --with-graph).
82    #[serde(skip_serializing_if = "Option::is_none")]
83    entities: Option<Vec<ReadEntityBinding>>,
84    /// Relationships from linked entities (opt-in via --with-graph).
85    #[serde(skip_serializing_if = "Option::is_none")]
86    relationships: Option<Vec<ReadRelationshipBinding>>,
87    /// Total execution time in milliseconds from handler start to serialisation.
88    elapsed_ms: u64,
89}
90
91#[derive(Serialize)]
92struct ReadEntityBinding {
93    entity_id: i64,
94    name: String,
95    entity_type: String,
96}
97
98#[derive(Serialize)]
99struct ReadRelationshipBinding {
100    from: String,
101    to: String,
102    relation: String,
103    weight: f64,
104}
105
106fn epoch_to_iso(epoch: i64) -> String {
107    crate::tz::epoch_to_iso(epoch)
108}
109
110pub fn run(args: ReadArgs) -> Result<(), AppError> {
111    let start = std::time::Instant::now();
112    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
113    let paths = AppPaths::resolve(args.db.as_deref())?;
114    crate::storage::connection::ensure_db_ready(&paths)?;
115    let conn = open_ro(&paths.db)?;
116
117    let row_opt = if let Some(id) = args.id {
118        let r = memories::read_full(&conn, id)?;
119        if let Some(ref row) = r {
120            if row.namespace != namespace {
121                return Err(AppError::NotFound(format!(
122                    "memory id {id} exists but belongs to namespace '{}', not '{namespace}'",
123                    row.namespace
124                )));
125            }
126        }
127        if r.is_none() {
128            // G55 S2: surface the requested id structurally so the message
129            // never drops it for the legacy `unknown` literal.
130            return Err(AppError::MemoryNotFoundById { id });
131        }
132        r
133    } else {
134        let name = args
135            .name_positional
136            .clone()
137            .or(args.name.clone())
138            .ok_or_else(|| {
139                AppError::Validation(
140                "name or --id required: pass name as positional argument, via --name, or use --id"
141                    .to_string(),
142            )
143            })?;
144        memories::read_by_name(&conn, &namespace, &name)?
145    };
146
147    match row_opt {
148        Some(row) => {
149            // Resolve current version via memory_versions table (highest version for this memory_id).
150            let version: i64 = conn
151                .query_row(
152                    "SELECT COALESCE(MAX(version), 1) FROM memory_versions WHERE memory_id=?1",
153                    rusqlite::params![row.id],
154                    |r| r.get(0),
155                )
156                .unwrap_or(1);
157
158            // G22: optional graph context
159            let (entities, relationships) = if args.with_graph {
160                let mut ent_stmt = conn.prepare_cached(
161                    "SELECT e.id, e.name, e.type FROM memory_entities me \
162                     JOIN entities e ON e.id = me.entity_id \
163                     WHERE me.memory_id = ?1",
164                )?;
165                let ents: Vec<ReadEntityBinding> = ent_stmt
166                    .query_map(rusqlite::params![row.id], |r| {
167                        Ok(ReadEntityBinding {
168                            entity_id: r.get(0)?,
169                            name: r.get(1)?,
170                            entity_type: r.get(2)?,
171                        })
172                    })?
173                    .filter_map(|r| r.ok())
174                    .collect();
175                drop(ent_stmt);
176
177                let entity_ids: Vec<i64> = ents.iter().map(|e| e.entity_id).collect();
178                let rels: Vec<ReadRelationshipBinding> = if !entity_ids.is_empty() {
179                    let placeholders: String = entity_ids
180                        .iter()
181                        .map(|id| id.to_string())
182                        .collect::<Vec<_>>()
183                        .join(",");
184                    let sql = format!(
185                        "SELECT e1.name, e2.name, r.relation, r.weight \
186                         FROM relationships r \
187                         JOIN entities e1 ON e1.id = r.source_id \
188                         JOIN entities e2 ON e2.id = r.target_id \
189                         WHERE r.source_id IN ({placeholders}) OR r.target_id IN ({placeholders})"
190                    );
191                    let mut rel_stmt = conn.prepare(&sql)?;
192                    let result: Vec<ReadRelationshipBinding> = rel_stmt
193                        .query_map([], |r| {
194                            Ok(ReadRelationshipBinding {
195                                from: r.get(0)?,
196                                to: r.get(1)?,
197                                relation: r.get(2)?,
198                                weight: r.get(3)?,
199                            })
200                        })?
201                        .filter_map(|r| r.ok())
202                        .collect();
203                    drop(rel_stmt);
204                    result
205                } else {
206                    vec![]
207                };
208                (Some(ents), Some(rels))
209            } else {
210                (None, None)
211            };
212
213            let response = ReadResponse {
214                id: row.id,
215                memory_id: row.id,
216                namespace: row.namespace,
217                name: row.name,
218                type_alias: row.memory_type.clone(),
219                memory_type: row.memory_type,
220                description: row.description,
221                body: row.body,
222                body_hash: row.body_hash,
223                session_id: row.session_id,
224                source: row.source,
225                metadata: serde_json::from_str::<serde_json::Value>(&row.metadata)
226                    .unwrap_or(serde_json::Value::Null),
227                version,
228                created_at: row.created_at,
229                created_at_iso: epoch_to_iso(row.created_at),
230                updated_at: row.updated_at,
231                updated_at_iso: epoch_to_iso(row.updated_at),
232                entities,
233                relationships,
234                elapsed_ms: start.elapsed().as_millis() as u64,
235            };
236            output::emit_json(&response)?;
237        }
238        None => {
239            // G55 S2: when the lookup target is a name, use the structural
240            // `MemoryNotFound { name, namespace }` variant so the message is
241            // guaranteed to carry the requested identifier. The legacy
242            // `NotFound(String)` path is only reached via the `--id` branch
243            // (which now emits `MemoryNotFoundById` structurally a few lines
244            // above) or when a future caller needs ad-hoc messages.
245            if let Some(name) = args.name_positional.as_deref().or(args.name.as_deref()) {
246                return Err(AppError::MemoryNotFound {
247                    name: name.to_string(),
248                    namespace: namespace.clone(),
249                });
250            }
251            // Fallback: id lookup that did not match (defensive — the
252            // MemoryNotFoundById branch above already returned in the
253            // normal id-miss path).
254            if let Some(id) = args.id {
255                return Err(AppError::MemoryNotFoundById { id });
256            }
257            // Unreachable: the `else` branch above already validated that
258            // one of name/id is set. Keep a defensive message for future
259            // refactors that may restructure the lookup arms.
260            return Err(AppError::Validation(
261                "internal: read reached NotFound without name or id".to_string(),
262            ));
263        }
264    }
265
266    Ok(())
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    #[test]
274    fn epoch_to_iso_converts_zero_to_unix_epoch() {
275        // v1.0.68 (test fix): parse the ISO back into a DateTime<FixedOffset>
276        // and compare with chrono::DateTime::UNIX_EPOCH so the assertion is
277        // timezone-agnostic.  The previous `starts_with("1970-01-01T00:00:00")`
278        // assertion leaked the global SQLITE_GRAPHRAG_DISPLAY_TZ from sibling
279        // tests in the same process and failed on hosts where the default
280        // timezone is non-UTC.
281        let result = epoch_to_iso(0);
282        let parsed = chrono::DateTime::parse_from_rfc3339(&result)
283            .unwrap_or_else(|e| panic!("epoch_to_iso(0) returned non-RFC3339 `{result}`: {e}"));
284        assert_eq!(
285            parsed.timestamp(),
286            chrono::DateTime::UNIX_EPOCH.timestamp(),
287            "epoch 0 must map to the Unix epoch instant, got: {result}"
288        );
289    }
290
291    #[test]
292    fn epoch_to_iso_converts_known_timestamp() {
293        // v1.0.68 (test fix): 1_705_320_000 = 2024-01-15T12:00:00Z, not
294        // 2024-01-15T00:00:00Z (the previous test asserted the wrong instant).
295        // The fix uses parse + timestamp compare to be timezone-agnostic and
296        // to catch wrong-epoch regressions regardless of host TZ.
297        let result = epoch_to_iso(1_705_320_000);
298        let parsed = chrono::DateTime::parse_from_rfc3339(&result).unwrap_or_else(|e| {
299            panic!("epoch_to_iso(1705320000) returned non-RFC3339 `{result}`: {e}")
300        });
301        let expected = chrono::DateTime::parse_from_rfc3339("2024-01-15T12:00:00+00:00")
302            .expect("static RFC3339 is valid");
303        assert_eq!(
304            parsed.timestamp(),
305            expected.timestamp(),
306            "timestamp 1705320000 must map to 2024-01-15T12:00:00Z, got: {result}"
307        );
308    }
309
310    #[test]
311    fn epoch_to_iso_returns_fallback_for_invalid_negative_epoch() {
312        let result = epoch_to_iso(i64::MIN);
313        assert!(
314            !result.is_empty(),
315            "must return a non-empty string even for invalid epoch"
316        );
317    }
318
319    #[test]
320    fn read_response_serializes_id_and_memory_id_aliases() {
321        let resp = ReadResponse {
322            id: 42,
323            memory_id: 42,
324            namespace: "global".to_string(),
325            name: "my-mem".to_string(),
326            type_alias: "fact".to_string(),
327            memory_type: "fact".to_string(),
328            description: "desc".to_string(),
329            body: "body".to_string(),
330            body_hash: "abc123".to_string(),
331            session_id: None,
332            source: "agent".to_string(),
333            metadata: serde_json::json!({}),
334            version: 1,
335            created_at: 1_705_320_000,
336            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
337            updated_at: 1_705_320_000,
338            updated_at_iso: "2024-01-15T12:00:00Z".to_string(),
339            entities: None,
340            relationships: None,
341            elapsed_ms: 5,
342        };
343
344        let json = serde_json::to_value(&resp).expect("serialization failed");
345        assert_eq!(json["id"], 42);
346        assert_eq!(json["memory_id"], 42);
347        assert_eq!(json["type"], "fact");
348        assert_eq!(json["memory_type"], "fact");
349        assert_eq!(json["elapsed_ms"], 5u64);
350        assert!(
351            json["session_id"].is_null(),
352            "session_id None must serialize as null"
353        );
354        // metadata must serialize as a JSON object, not as an escaped string
355        assert!(
356            json["metadata"].is_object(),
357            "metadata must be a JSON object"
358        );
359    }
360
361    #[test]
362    fn read_response_session_id_some_serializes_string() {
363        let resp = ReadResponse {
364            id: 1,
365            memory_id: 1,
366            namespace: "global".to_string(),
367            name: "mem".to_string(),
368            type_alias: "skill".to_string(),
369            memory_type: "skill".to_string(),
370            description: "d".to_string(),
371            body: "b".to_string(),
372            body_hash: "h".to_string(),
373            session_id: Some("sess-123".to_string()),
374            source: "agent".to_string(),
375            metadata: serde_json::json!({}),
376            version: 2,
377            created_at: 0,
378            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
379            updated_at: 0,
380            updated_at_iso: "1970-01-01T00:00:00Z".to_string(),
381            entities: None,
382            relationships: None,
383            elapsed_ms: 0,
384        };
385
386        let json = serde_json::to_value(&resp).expect("serialization failed");
387        assert_eq!(json["session_id"], "sess-123");
388    }
389
390    #[test]
391    fn read_response_elapsed_ms_is_present() {
392        let resp = ReadResponse {
393            id: 7,
394            memory_id: 7,
395            namespace: "ns".to_string(),
396            name: "n".to_string(),
397            type_alias: "procedure".to_string(),
398            memory_type: "procedure".to_string(),
399            description: "d".to_string(),
400            body: "b".to_string(),
401            body_hash: "h".to_string(),
402            session_id: None,
403            source: "agent".to_string(),
404            metadata: serde_json::json!({}),
405            version: 3,
406            created_at: 1000,
407            created_at_iso: "1970-01-01T00:16:40Z".to_string(),
408            updated_at: 2000,
409            updated_at_iso: "1970-01-01T00:33:20Z".to_string(),
410            entities: None,
411            relationships: None,
412            elapsed_ms: 123,
413        };
414
415        let json = serde_json::to_value(&resp).expect("serialization failed");
416        assert_eq!(json["elapsed_ms"], 123u64);
417        assert!(json["created_at_iso"].is_string());
418        assert!(json["updated_at_iso"].is_string());
419    }
420
421    #[test]
422    fn read_response_metadata_object_not_escaped_string() {
423        // P2-A: metadata must serialize as a JSON object, not as an escaped string.
424        let resp = ReadResponse {
425            id: 3,
426            memory_id: 3,
427            namespace: "ns".to_string(),
428            name: "meta-test".to_string(),
429            type_alias: "fact".to_string(),
430            memory_type: "fact".to_string(),
431            description: "d".to_string(),
432            body: "b".to_string(),
433            body_hash: "h".to_string(),
434            session_id: None,
435            source: "agent".to_string(),
436            metadata: serde_json::json!({"key": "value", "number": 42}),
437            version: 1,
438            created_at: 0,
439            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
440            updated_at: 0,
441            updated_at_iso: "1970-01-01T00:00:00Z".to_string(),
442            entities: None,
443            relationships: None,
444            elapsed_ms: 1,
445        };
446
447        let json = serde_json::to_value(&resp).expect("serialization failed");
448        // Must be object, not a JSON string containing escaped JSON.
449        assert!(json["metadata"].is_object());
450        assert_eq!(json["metadata"]["key"], "value");
451        assert_eq!(json["metadata"]["number"], 42);
452    }
453
454    #[test]
455    fn read_response_metadata_fallback_to_null_for_invalid_json() {
456        // P2-A: fallback when metadata is an invalid string.
457        let raw = "invalid-json{{{";
458        let parsed =
459            serde_json::from_str::<serde_json::Value>(raw).unwrap_or(serde_json::Value::Null);
460        assert!(parsed.is_null());
461    }
462
463    // G55 S2 (v1.0.80): the structural `MemoryNotFound` variant must include
464    // the requested name and namespace in the message — never the legacy
465    // `unknown` literal that masked which lookup target failed.
466    #[test]
467    fn memory_not_found_structural_includes_name_and_namespace() {
468        let err = AppError::MemoryNotFound {
469            name: "atomwrite-projeto-contexto".to_string(),
470            namespace: "global".to_string(),
471        };
472        let msg = err.to_string();
473        assert!(msg.contains("atomwrite-projeto-contexto"), "got: {msg}");
474        assert!(msg.contains("global"), "got: {msg}");
475        assert!(
476            !msg.contains("unknown"),
477            "must not contain 'unknown': {msg}"
478        );
479        assert_eq!(err.exit_code(), 4);
480        assert!(err.is_permanent());
481    }
482
483    #[test]
484    fn memory_not_found_by_id_structural_includes_id() {
485        let err = AppError::MemoryNotFoundById { id: 42 };
486        let msg = err.to_string();
487        assert!(msg.contains("42"), "got: {msg}");
488        assert!(msg.contains("id=42"), "got: {msg}");
489        assert_eq!(err.exit_code(), 4);
490    }
491
492    #[test]
493    fn memory_not_found_pt_br_drops_english_fragments() {
494        // The pt-BR translation must not contain leftover English fragments
495        // like "not found" — that was the original G55 bug.
496        use crate::i18n::Language;
497        let err = AppError::MemoryNotFound {
498            name: "mem-fantasma".to_string(),
499            namespace: "global".to_string(),
500        };
501        let pt = err.localized_message_for(Language::Portuguese);
502        assert!(!pt.contains("not found"), "pt-BR fragment leaked: {pt}");
503        assert!(pt.contains("mem-fantasma"), "name missing in pt: {pt}");
504        assert!(pt.contains("global"), "namespace missing in pt: {pt}");
505    }
506}