sqlite_graphrag/output.rs
1//! Single point of terminal I/O for the CLI (stdout JSON, stderr human).
2//!
3//! All user-visible output must go through this module; direct `println!` in
4//! other modules is forbidden.
5
6use crate::errors::AppError;
7use serde::Serialize;
8
9/// Output format variants accepted by `--format` CLI flags.
10#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
11pub enum OutputFormat {
12 #[default]
13 Json,
14 Text,
15 Markdown,
16}
17
18/// Restricted JSON-only format for commands that always emit JSON.
19#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
20pub enum JsonOutputFormat {
21 #[default]
22 Json,
23}
24
25/// Serializes `value` as pretty-printed JSON and writes it to stdout with a trailing newline.
26///
27/// Flushes stdout after writing. A `BrokenPipe` error is silenced so that
28/// piping to consumers that close early (e.g. `head`) does not surface an error.
29///
30/// # Errors
31/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
32pub fn emit_json<T: Serialize>(value: &T) -> Result<(), AppError> {
33 let json = serde_json::to_string_pretty(value)?;
34 let mut out = std::io::stdout().lock();
35 if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
36 .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
37 .and_then(|()| std::io::Write::flush(&mut out))
38 {
39 if e.kind() == std::io::ErrorKind::BrokenPipe {
40 return Ok(());
41 }
42 return Err(AppError::Io(e));
43 }
44 Ok(())
45}
46
47/// Serializes `value` as compact (single-line) JSON and writes it to stdout with a trailing newline.
48///
49/// Flushes stdout after writing. A `BrokenPipe` error is silenced.
50///
51/// # Errors
52/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
53pub fn emit_json_compact<T: Serialize>(value: &T) -> Result<(), AppError> {
54 let json = serde_json::to_string(value)?;
55 let mut out = std::io::stdout().lock();
56 if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
57 .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
58 .and_then(|()| std::io::Write::flush(&mut out))
59 {
60 if e.kind() == std::io::ErrorKind::BrokenPipe {
61 return Ok(());
62 }
63 return Err(AppError::Io(e));
64 }
65 Ok(())
66}
67
68/// Writes `msg` followed by a newline to stdout and flushes.
69///
70/// A `BrokenPipe` error is silenced gracefully.
71pub fn emit_text(msg: &str) {
72 let mut out = std::io::stdout().lock();
73 let _ = std::io::Write::write_all(&mut out, msg.as_bytes())
74 .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
75 .and_then(|()| std::io::Write::flush(&mut out));
76}
77
78/// Logs `msg` as a structured `tracing::info!` event (does not write to stdout).
79pub fn emit_progress(msg: &str) {
80 tracing::info!(message = msg);
81}
82
83/// Emits a bilingual progress message honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
84/// Usage: `output::emit_progress_i18n("Computing embedding...", "Calculando embedding...")`.
85pub fn emit_progress_i18n(en: &str, pt: &str) {
86 use crate::i18n::{current, Language};
87 match current() {
88 Language::English => tracing::info!(message = en),
89 Language::Portuguese => tracing::info!(message = pt),
90 }
91}
92
93/// Emits a JSON error envelope to stdout for machine consumers.
94///
95/// Ensures the stdout JSON contract is honoured even on error paths:
96/// `{"error": true, "code": <exit_code>, "message": "<localized_msg>"}`.
97/// A `BrokenPipe` error is silenced so piping to early-closing consumers
98/// does not surface a secondary error.
99pub fn emit_error_json(code: i32, message: &str) {
100 #[derive(serde::Serialize)]
101 struct ErrorEnvelope<'a> {
102 error: bool,
103 code: i32,
104 message: &'a str,
105 }
106 let envelope = ErrorEnvelope {
107 error: true,
108 code,
109 message,
110 };
111 let _ = emit_json(&envelope);
112}
113
114/// Emits a localised error message to stderr with the `Error:`/`Erro:` prefix.
115///
116/// Centralises human-readable error output following Pattern 5 (`output.rs` is the
117/// SOLE I/O point of the CLI). Does not log via `tracing` — call `tracing::error!`
118/// explicitly before this function when structured observability is desired.
119pub fn emit_error(localized_msg: &str) {
120 eprintln!("{}: {}", crate::i18n::error_prefix(), localized_msg);
121}
122
123/// Emits a bilingual error to stderr honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
124/// Usage: `output::emit_error_i18n("invariant violated", "invariante violado")`.
125pub fn emit_error_i18n(en: &str, pt: &str) {
126 use crate::i18n::{current, Language};
127 let msg = match current() {
128 Language::English => en,
129 Language::Portuguese => pt,
130 };
131 emit_error(msg);
132}
133
134/// JSON payload emitted by the `remember` subcommand.
135///
136/// All fields are required by the JSON contract (see `docs/schemas/remember.schema.json`).
137/// `operation` is an alias of `action` for compatibility with clients using the old field name.
138///
139/// # Examples
140///
141/// ```
142/// use sqlite_graphrag::output::RememberResponse;
143///
144/// let resp = RememberResponse {
145/// memory_id: 1,
146/// name: "nota-inicial".into(),
147/// namespace: "global".into(),
148/// action: "created".into(),
149/// operation: "created".into(),
150/// version: 1,
151/// entities_persisted: 0,
152/// relationships_persisted: 0,
153/// relationships_truncated: false,
154/// chunks_created: 1,
155/// chunks_persisted: 0,
156/// urls_persisted: 0,
157/// extraction_method: None,
158/// merged_into_memory_id: None,
159/// warnings: vec![],
160/// created_at: 1_700_000_000,
161/// created_at_iso: "2023-11-14T22:13:20Z".into(),
162/// elapsed_ms: 42,
163/// name_was_normalized: false,
164/// original_name: None,
165/// };
166///
167/// let json = serde_json::to_string(&resp).unwrap();
168/// assert!(json.contains("\"memory_id\":1"));
169/// assert!(json.contains("\"elapsed_ms\":42"));
170/// assert!(json.contains("\"merged_into_memory_id\":null"));
171/// assert!(json.contains("\"urls_persisted\":0"));
172/// assert!(json.contains("\"relationships_truncated\":false"));
173/// ```
174#[derive(Serialize)]
175pub struct RememberResponse {
176 pub memory_id: i64,
177 pub name: String,
178 pub namespace: String,
179 pub action: String,
180 /// Semantic alias of `action` for compatibility with the contract documented in SKILL.md and AGENT_PROTOCOL.md.
181 pub operation: String,
182 pub version: i64,
183 pub entities_persisted: usize,
184 pub relationships_persisted: usize,
185 /// True when the relationship builder hit the cap before covering all entity pairs.
186 /// Callers can use this to decide whether to increase GRAPHRAG_MAX_RELATIONSHIPS_PER_MEMORY.
187 pub relationships_truncated: bool,
188 /// Total number of chunks the body was split into BEFORE dedup.
189 ///
190 /// For single-chunk bodies this equals 1 even though no row is added to
191 /// the `memory_chunks` table — the memory row itself acts as the chunk.
192 /// Use `chunks_persisted` to know how many rows were actually written.
193 pub chunks_created: usize,
194 /// Number of chunks actually written to chunks/embeddings tables. Always <= chunks_created.
195 ///
196 /// Equal when no chunk had identical normalized text already in DB; less when dedup skipped
197 /// some. Equals zero for single-chunk bodies (the memory row is the chunk) and equals
198 /// `chunks_created` for multi-chunk bodies. Added in v1.0.23 to disambiguate from
199 /// `chunks_created` and reflect database state precisely.
200 pub chunks_persisted: usize,
201 /// Number of unique URLs inserted into `memory_urls` for this memory.
202 /// Added in v1.0.24 — split URLs out of the entity graph (P0-2 fix).
203 #[serde(default)]
204 pub urls_persisted: usize,
205 /// Extraction method used: "gliner-{variant}+regex" or "regex-only". None when NER is not enabled.
206 #[serde(skip_serializing_if = "Option::is_none")]
207 pub extraction_method: Option<String>,
208 pub merged_into_memory_id: Option<i64>,
209 pub warnings: Vec<String>,
210 /// Timestamp Unix epoch seconds.
211 pub created_at: i64,
212 /// RFC 3339 UTC timestamp string parallel to `created_at` for ISO 8601 parsers.
213 pub created_at_iso: String,
214 /// Total execution time in milliseconds from handler start to serialisation.
215 pub elapsed_ms: u64,
216 /// True when the user-supplied `--name` differed from the persisted slug
217 /// (i.e. kebab-case normalization changed the value). Added in v1.0.32 so
218 /// callers can detect normalization without parsing stderr WARN logs.
219 #[serde(default)]
220 pub name_was_normalized: bool,
221 /// Original user-supplied `--name` value before normalization.
222 /// Present only when `name_was_normalized == true`; omitted otherwise to
223 /// keep the common (already-kebab) payload small.
224 #[serde(skip_serializing_if = "Option::is_none")]
225 pub original_name: Option<String>,
226}
227
228/// Individual item returned by the `recall` query.
229///
230/// The `memory_type` field is serialised as `"type"` in JSON to maintain
231/// compatibility with external clients — the Rust name uses `memory_type`
232/// to avoid conflict with the reserved keyword.
233///
234/// # Examples
235///
236/// ```
237/// use sqlite_graphrag::output::RecallItem;
238///
239/// let item = RecallItem {
240/// memory_id: 7,
241/// name: "nota-rust".into(),
242/// namespace: "global".into(),
243/// memory_type: "user".into(),
244/// description: "aprendizado de Rust".into(),
245/// snippet: "ownership e borrowing".into(),
246/// distance: 0.12,
247/// score: 0.88,
248/// source: "direct".into(),
249/// graph_depth: None,
250/// };
251///
252/// let json = serde_json::to_string(&item).unwrap();
253/// // Rust field `memory_type` appears as `"type"` in JSON.
254/// assert!(json.contains("\"type\":\"user\""));
255/// assert!(!json.contains("memory_type"));
256/// assert!(json.contains("\"distance\":0.12"));
257/// ```
258#[derive(Serialize, Clone)]
259pub struct RecallItem {
260 pub memory_id: i64,
261 pub name: String,
262 pub namespace: String,
263 #[serde(rename = "type")]
264 pub memory_type: String,
265 pub description: String,
266 pub snippet: String,
267 pub distance: f32,
268 /// Cosine similarity in `[0.0, 1.0]` derived as `1.0 - distance` and clamped
269 /// to that interval. Always populated to satisfy the documented contract
270 /// (M-A5 in v1.0.40); higher means more similar. For graph hits the value
271 /// reflects the hop-derived distance proxy and should be interpreted
272 /// alongside `graph_depth` rather than as a true cosine score.
273 pub score: f32,
274 pub source: String,
275 /// Number of graph hops between this match and the seed memories.
276 ///
277 /// Set to `None` for direct vector matches (where `distance` is meaningful)
278 /// and to `Some(N)` for traversal results, with `N=0` when the depth could
279 /// not be tracked precisely. Added in v1.0.23 to disambiguate graph results
280 /// from the `distance: 0.0` placeholder previously used for graph entries.
281 /// Field is omitted from JSON output when `None`.
282 #[serde(skip_serializing_if = "Option::is_none")]
283 pub graph_depth: Option<u32>,
284}
285
286impl RecallItem {
287 /// Computes the similarity score from a vector distance, clamped to
288 /// `[0.0, 1.0]`. Cosine distance returned by sqlite-vec lives in `[0, 2]`
289 /// in theory but the embedder produces unit-norm vectors so the practical
290 /// range is `[0, 1]`. Centralized so every constructor keeps the contract.
291 pub fn score_from_distance(distance: f32) -> f32 {
292 let raw = 1.0 - distance;
293 if raw.is_nan() {
294 0.0
295 } else {
296 raw.clamp(0.0, 1.0)
297 }
298 }
299}
300
301/// Full response envelope returned by the `recall` subcommand.
302///
303/// Contains both direct vector matches and graph-traversal matches, plus the
304/// aggregated `results` list that merges both for callers that do not need
305/// to distinguish the source.
306#[derive(Serialize)]
307pub struct RecallResponse {
308 pub query: String,
309 pub k: usize,
310 pub direct_matches: Vec<RecallItem>,
311 pub graph_matches: Vec<RecallItem>,
312 /// Aggregated alias of `direct_matches` + `graph_matches` for the contract documented in SKILL.md.
313 pub results: Vec<RecallItem>,
314 /// Total execution time in milliseconds from handler start to serialisation.
315 pub elapsed_ms: u64,
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321 use serde::Serialize;
322
323 #[derive(Serialize)]
324 struct Dummy {
325 val: u32,
326 }
327
328 // Non-serializable type to force a JSON serialization error
329 struct NotSerializable;
330 impl Serialize for NotSerializable {
331 fn serialize<S: serde::Serializer>(&self, _: S) -> Result<S::Ok, S::Error> {
332 Err(serde::ser::Error::custom(
333 "intentional serialization failure",
334 ))
335 }
336 }
337
338 #[test]
339 fn emit_json_returns_ok_for_valid_value() {
340 let v = Dummy { val: 42 };
341 assert!(emit_json(&v).is_ok());
342 }
343
344 #[test]
345 fn emit_json_returns_err_for_non_serializable_value() {
346 let v = NotSerializable;
347 assert!(emit_json(&v).is_err());
348 }
349
350 #[test]
351 fn emit_json_compact_returns_ok_for_valid_value() {
352 let v = Dummy { val: 7 };
353 assert!(emit_json_compact(&v).is_ok());
354 }
355
356 #[test]
357 fn emit_json_compact_returns_err_for_non_serializable_value() {
358 let v = NotSerializable;
359 assert!(emit_json_compact(&v).is_err());
360 }
361
362 #[test]
363 fn emit_text_does_not_panic() {
364 emit_text("mensagem de teste");
365 }
366
367 #[test]
368 fn emit_progress_does_not_panic() {
369 emit_progress("progresso de teste");
370 }
371
372 #[test]
373 fn remember_response_serializes_correctly() {
374 let r = RememberResponse {
375 memory_id: 1,
376 name: "teste".to_string(),
377 namespace: "ns".to_string(),
378 action: "created".to_string(),
379 operation: "created".to_string(),
380 version: 1,
381 entities_persisted: 2,
382 relationships_persisted: 3,
383 relationships_truncated: false,
384 chunks_created: 4,
385 chunks_persisted: 4,
386 urls_persisted: 2,
387 extraction_method: None,
388 merged_into_memory_id: None,
389 warnings: vec!["aviso".to_string()],
390 created_at: 1776569715,
391 created_at_iso: "2026-04-19T03:34:15Z".to_string(),
392 elapsed_ms: 123,
393 name_was_normalized: false,
394 original_name: None,
395 };
396 let json = serde_json::to_string(&r).unwrap();
397 assert!(json.contains("memory_id"));
398 assert!(json.contains("aviso"));
399 assert!(json.contains("\"namespace\""));
400 assert!(json.contains("\"merged_into_memory_id\""));
401 assert!(json.contains("\"operation\""));
402 assert!(json.contains("\"created_at\""));
403 assert!(json.contains("\"created_at_iso\""));
404 assert!(json.contains("\"elapsed_ms\""));
405 assert!(json.contains("\"urls_persisted\""));
406 assert!(json.contains("\"relationships_truncated\":false"));
407 }
408
409 #[test]
410 fn recall_item_serializes_renamed_type_field() {
411 let item = RecallItem {
412 memory_id: 10,
413 name: "entidade".to_string(),
414 namespace: "ns".to_string(),
415 memory_type: "entity".to_string(),
416 description: "desc".to_string(),
417 snippet: "trecho".to_string(),
418 distance: 0.5,
419 score: RecallItem::score_from_distance(0.5),
420 source: "db".to_string(),
421 graph_depth: None,
422 };
423 let json = serde_json::to_string(&item).unwrap();
424 assert!(json.contains("\"type\""));
425 assert!(!json.contains("memory_type"));
426 // Field is omitted from JSON when None.
427 assert!(!json.contains("graph_depth"));
428 assert!(json.contains("\"score\":0.5"));
429 }
430
431 #[test]
432 fn recall_response_serializes_with_lists() {
433 let resp = RecallResponse {
434 query: "busca".to_string(),
435 k: 10,
436 direct_matches: vec![],
437 graph_matches: vec![],
438 results: vec![],
439 elapsed_ms: 42,
440 };
441 let json = serde_json::to_string(&resp).unwrap();
442 assert!(json.contains("direct_matches"));
443 assert!(json.contains("graph_matches"));
444 assert!(json.contains("\"k\":"));
445 assert!(json.contains("\"results\""));
446 assert!(json.contains("\"elapsed_ms\""));
447 }
448
449 #[test]
450 fn error_envelope_serializes_correctly() {
451 #[derive(serde::Serialize)]
452 struct ErrorEnvelope<'a> {
453 error: bool,
454 code: i32,
455 message: &'a str,
456 }
457 let envelope = ErrorEnvelope {
458 error: true,
459 code: 10,
460 message: "database disk image is malformed",
461 };
462 let json = serde_json::to_value(&envelope).unwrap();
463 assert_eq!(json["error"], true);
464 assert_eq!(json["code"], 10);
465 assert_eq!(json["message"], "database disk image is malformed");
466 }
467
468 #[test]
469 fn output_format_default_is_json() {
470 let fmt = OutputFormat::default();
471 assert!(matches!(fmt, OutputFormat::Json));
472 }
473
474 #[test]
475 fn output_format_variants_exist() {
476 let _text = OutputFormat::Text;
477 let _md = OutputFormat::Markdown;
478 let _json = OutputFormat::Json;
479 }
480
481 #[test]
482 fn recall_item_clone_produces_equal_value() {
483 let item = RecallItem {
484 memory_id: 99,
485 name: "clone".to_string(),
486 namespace: "ns".to_string(),
487 memory_type: "relation".to_string(),
488 description: "d".to_string(),
489 snippet: "s".to_string(),
490 distance: 0.1,
491 score: RecallItem::score_from_distance(0.1),
492 source: "src".to_string(),
493 graph_depth: Some(2),
494 };
495 let cloned = item.clone();
496 assert_eq!(cloned.memory_id, item.memory_id);
497 assert_eq!(cloned.name, item.name);
498 assert_eq!(cloned.graph_depth, Some(2));
499 }
500}