sqlite_graphrag/output.rs
1//! Single point of terminal I/O for the CLI (stdout JSON, stderr human).
2//!
3//! All user-visible output must go through this module; direct `println!` in
4//! other modules is forbidden.
5
6use crate::errors::AppError;
7use serde::Serialize;
8
9/// Output format variants accepted by `--format` CLI flags.
10#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
11pub enum OutputFormat {
12 #[default]
13 Json,
14 Text,
15 Markdown,
16}
17
18/// Restricted JSON-only format for commands that always emit JSON.
19#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
20pub enum JsonOutputFormat {
21 #[default]
22 Json,
23}
24
25/// Serializes `value` as pretty-printed JSON and writes it to stdout with a trailing newline.
26///
27/// Flushes stdout after writing. A `BrokenPipe` error is silenced so that
28/// piping to consumers that close early (e.g. `head`) does not surface an error.
29///
30/// # Errors
31/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
32#[inline]
33pub fn emit_json<T: Serialize>(value: &T) -> Result<(), AppError> {
34 let json = serde_json::to_string_pretty(value)?;
35 let mut out = std::io::stdout().lock();
36 if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
37 .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
38 .and_then(|()| std::io::Write::flush(&mut out))
39 {
40 if e.kind() == std::io::ErrorKind::BrokenPipe {
41 return Ok(());
42 }
43 return Err(AppError::Io(e));
44 }
45 Ok(())
46}
47
48/// Serializes `value` as compact (single-line) JSON and writes it to stdout with a trailing newline.
49///
50/// Flushes stdout after writing. A `BrokenPipe` error is silenced.
51///
52/// # Errors
53/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
54#[inline]
55pub fn emit_json_compact<T: Serialize>(value: &T) -> Result<(), AppError> {
56 let json = serde_json::to_string(value)?;
57 let mut out = std::io::stdout().lock();
58 if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
59 .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
60 .and_then(|()| std::io::Write::flush(&mut out))
61 {
62 if e.kind() == std::io::ErrorKind::BrokenPipe {
63 return Ok(());
64 }
65 return Err(AppError::Io(e));
66 }
67 Ok(())
68}
69
70/// Writes compact JSON to stdout, silently ignoring serialization and I/O errors.
71/// Designed for NDJSON streaming where partial output is acceptable.
72#[inline]
73pub fn emit_json_line<T: Serialize>(value: &T) {
74 if let Ok(json) = serde_json::to_string(value) {
75 let mut out = std::io::stdout().lock();
76 let _ = std::io::Write::write_all(&mut out, json.as_bytes());
77 let _ = std::io::Write::write_all(&mut out, b"\n");
78 let _ = std::io::Write::flush(&mut out);
79 }
80}
81
82/// Writes `msg` followed by a newline to stdout and flushes.
83///
84/// A `BrokenPipe` error is silenced gracefully.
85#[inline]
86pub fn emit_text(msg: &str) {
87 let mut out = std::io::stdout().lock();
88 let _ = std::io::Write::write_all(&mut out, msg.as_bytes())
89 .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
90 .and_then(|()| std::io::Write::flush(&mut out));
91}
92
93/// Logs `msg` as a structured `tracing::info!` event (does not write to stdout).
94#[inline]
95pub fn emit_progress(msg: &str) {
96 tracing::info!(target: "output", message = msg);
97}
98
99/// Emits a bilingual progress message honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
100/// Usage: `output::emit_progress_i18n("Computing embedding...", "Calculando embedding...")`.
101pub fn emit_progress_i18n(en: &str, pt: &str) {
102 use crate::i18n::{current, Language};
103 match current() {
104 Language::English => tracing::info!(target: "output", message = en),
105 Language::Portuguese => tracing::info!(target: "output", message = pt),
106 }
107}
108
109/// Emits a JSON error envelope to stdout for machine consumers.
110///
111/// Ensures the stdout JSON contract is honoured even on error paths:
112/// `{"error": true, "code": <exit_code>, "message": "<localized_msg>"}`.
113/// A `BrokenPipe` error is silenced so piping to early-closing consumers
114/// does not surface a secondary error.
115#[cold]
116#[inline(never)]
117pub fn emit_error_json(code: i32, message: &str) {
118 #[derive(serde::Serialize)]
119 struct ErrorEnvelope<'a> {
120 error: bool,
121 code: i32,
122 message: &'a str,
123 }
124 let envelope = ErrorEnvelope {
125 error: true,
126 code,
127 message,
128 };
129 if emit_json(&envelope).is_err() {
130 use std::io::Write;
131 let escaped = message.replace('\\', "\\\\").replace('"', "\\\"");
132 let _ = writeln!(
133 std::io::stdout().lock(),
134 r#"{{"error":true,"code":{code},"message":"{escaped}"}}"#
135 );
136 }
137}
138
139/// Emits a localised error message to stderr via the `tracing` subscriber.
140///
141/// ADR-0047 / BUG-12 v1.0.88: prior implementation also called `eprintln!`
142/// which produced a SECOND stderr line (Error:/Erro: prefix) for the same
143/// error, on top of the structured `tracing::error!` line. Operators and
144/// log parsers observed duplicated stderr lines.
145///
146/// The tracing subscriber is configured for stderr at `main.rs:115`, so a
147/// single `tracing::error!` call already produces the human-readable line.
148/// Callers that want a plain stderr line without tracing (e.g. one-shot
149/// scripts) should use `eprintln!` directly instead of this helper.
150///
151/// Centralises human-readable error output following Pattern 5 (`output.rs` is
152/// the SOLE I/O point of the CLI).
153#[cold]
154#[inline(never)]
155pub fn emit_error(localized_msg: &str) {
156 tracing::error!(target: "output", message = localized_msg);
157}
158
159/// Emits a bilingual error to stderr honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
160/// Usage: `output::emit_error_i18n("invariant violated", "invariante violado")`.
161#[cold]
162#[inline(never)]
163pub fn emit_error_i18n(en: &str, pt: &str) {
164 use crate::i18n::{current, Language};
165 let msg = match current() {
166 Language::English => en,
167 Language::Portuguese => pt,
168 };
169 emit_error(msg);
170}
171
172/// JSON payload emitted by the `remember` subcommand.
173///
174/// All fields are required by the JSON contract (see `docs/schemas/remember.schema.json`).
175/// `operation` is an alias of `action` for compatibility with clients using the old field name.
176///
177/// # Examples
178///
179/// ```
180/// use sqlite_graphrag::output::RememberResponse;
181///
182/// let resp = RememberResponse {
183/// memory_id: 1,
184/// name: "nota-inicial".into(),
185/// namespace: "global".into(),
186/// action: "created".into(),
187/// operation: "created".into(),
188/// version: 1,
189/// entities_persisted: 0,
190/// relationships_persisted: 0,
191/// relationships_truncated: false,
192/// chunks_created: 1,
193/// chunks_persisted: 0,
194/// urls_persisted: 0,
195/// extraction_method: None,
196/// merged_into_memory_id: None,
197/// warnings: vec![],
198/// created_at: 1_700_000_000,
199/// created_at_iso: "2023-11-14T22:13:20Z".into(),
200/// elapsed_ms: 42,
201/// name_was_normalized: false,
202/// original_name: None,
203/// backend_invoked: None,
204/// };
205///
206/// let json = serde_json::to_string(&resp).unwrap();
207/// assert!(json.contains("\"memory_id\":1"));
208/// assert!(json.contains("\"elapsed_ms\":42"));
209/// assert!(json.contains("\"merged_into_memory_id\":null"));
210/// assert!(json.contains("\"urls_persisted\":0"));
211/// assert!(json.contains("\"relationships_truncated\":false"));
212/// ```
213#[derive(Serialize)]
214pub struct RememberResponse {
215 pub memory_id: i64,
216 pub name: String,
217 pub namespace: String,
218 pub action: String,
219 /// Semantic alias of `action` for compatibility with the contract documented in SKILL.md.
220 pub operation: String,
221 pub version: i64,
222 pub entities_persisted: usize,
223 pub relationships_persisted: usize,
224 /// True when the relationship builder hit the cap before covering all entity pairs.
225 /// Callers can use this to decide whether to increase GRAPHRAG_MAX_RELATIONSHIPS_PER_MEMORY.
226 pub relationships_truncated: bool,
227 /// Total number of chunks the body was split into BEFORE dedup.
228 ///
229 /// For single-chunk bodies this equals 1 even though no row is added to
230 /// the `memory_chunks` table — the memory row itself acts as the chunk.
231 /// Use `chunks_persisted` to know how many rows were actually written.
232 pub chunks_created: usize,
233 /// Number of chunks actually written to chunks/embeddings tables. Always <= chunks_created.
234 ///
235 /// Equal when no chunk had identical normalized text already in DB; less when dedup skipped
236 /// some. Equals zero for single-chunk bodies (the memory row is the chunk) and equals
237 /// `chunks_created` for multi-chunk bodies. Added in v1.0.23 to disambiguate from
238 /// `chunks_created` and reflect database state precisely.
239 pub chunks_persisted: usize,
240 /// Number of unique URLs inserted into `memory_urls` for this memory.
241 /// Added in v1.0.24 — split URLs out of the entity graph (P0-2 fix).
242 #[serde(default)]
243 pub urls_persisted: usize,
244 /// Extraction method used: "gliner-{variant}+regex" or "regex-only". None when NER is not enabled.
245 #[serde(skip_serializing_if = "Option::is_none")]
246 pub extraction_method: Option<String>,
247 pub merged_into_memory_id: Option<i64>,
248 pub warnings: Vec<String>,
249 /// Timestamp Unix epoch seconds.
250 pub created_at: i64,
251 /// RFC 3339 UTC timestamp string parallel to `created_at` for ISO 8601 parsers.
252 pub created_at_iso: String,
253 /// Total execution time in milliseconds from handler start to serialisation.
254 pub elapsed_ms: u64,
255 /// True when the user-supplied `--name` differed from the persisted slug
256 /// (i.e. kebab-case normalization changed the value). Added in v1.0.32 so
257 /// callers can detect normalization without parsing stderr WARN logs.
258 #[serde(default)]
259 pub name_was_normalized: bool,
260 /// Original user-supplied `--name` value before normalization.
261 /// Present only when `name_was_normalized == true`; omitted otherwise to
262 /// keep the common (already-kebab) payload small.
263 #[serde(skip_serializing_if = "Option::is_none")]
264 pub original_name: Option<String>,
265 /// v1.0.84 (ADR-0042): discriminador do backend LLM que efetivamente
266 /// executou o embedding da passagem. `"claude" | "codex" | "none"`.
267 /// Absent on the wire when `None` (kept for happy-path envelope cleanliness).
268 #[serde(skip_serializing_if = "Option::is_none")]
269 pub backend_invoked: Option<&'static str>,
270}
271
272/// Individual item returned by the `recall` query.
273///
274/// The `memory_type` field is serialised as `"type"` in JSON to maintain
275/// compatibility with external clients — the Rust name uses `memory_type`
276/// to avoid conflict with the reserved keyword.
277///
278/// # Examples
279///
280/// ```
281/// use sqlite_graphrag::output::RecallItem;
282///
283/// let item = RecallItem {
284/// memory_id: 7,
285/// name: "nota-rust".into(),
286/// namespace: "global".into(),
287/// memory_type: "user".into(),
288/// description: "aprendizado de Rust".into(),
289/// snippet: "ownership e borrowing".into(),
290/// distance: 0.12,
291/// score: 0.88,
292/// source: "direct".into(),
293/// graph_depth: None,
294/// };
295///
296/// let json = serde_json::to_string(&item).unwrap();
297/// // Rust field `memory_type` appears as `"type"` in JSON.
298/// assert!(json.contains("\"type\":\"user\""));
299/// assert!(!json.contains("memory_type"));
300/// assert!(json.contains("\"distance\":0.12"));
301/// ```
302#[derive(Serialize, Clone)]
303pub struct RecallItem {
304 pub memory_id: i64,
305 pub name: String,
306 pub namespace: String,
307 #[serde(rename = "type")]
308 pub memory_type: String,
309 pub description: String,
310 pub snippet: String,
311 pub distance: f32,
312 /// Cosine similarity in `[0.0, 1.0]` derived as `1.0 - distance` and clamped
313 /// to that interval. Always populated to satisfy the documented contract
314 /// (M-A5 in v1.0.40); higher means more similar. For graph hits the value
315 /// reflects the hop-derived distance proxy and should be interpreted
316 /// alongside `graph_depth` rather than as a true cosine score.
317 pub score: f32,
318 pub source: String,
319 /// Number of graph hops between this match and the seed memories.
320 ///
321 /// Set to `None` for direct vector matches (where `distance` is meaningful)
322 /// and to `Some(N)` for traversal results, with `N=0` when the depth could
323 /// not be tracked precisely. Added in v1.0.23 to disambiguate graph results
324 /// from the `distance: 0.0` placeholder previously used for graph entries.
325 /// Field is omitted from JSON output when `None`.
326 #[serde(skip_serializing_if = "Option::is_none")]
327 pub graph_depth: Option<u32>,
328}
329
330impl RecallItem {
331 /// Computes the similarity score from a vector distance, clamped to
332 /// `[0.0, 1.0]`. Cosine distance returned by sqlite-vec lives in `[0, 2]`
333 /// in theory but the embedder produces unit-norm vectors so the practical
334 /// range is `[0, 1]`. Centralized so every constructor keeps the contract.
335 #[inline]
336 pub fn score_from_distance(distance: f32) -> f32 {
337 let raw = 1.0 - distance;
338 if raw.is_nan() {
339 0.0
340 } else {
341 raw.clamp(0.0, 1.0)
342 }
343 }
344}
345
346/// Full response envelope returned by the `recall` subcommand.
347///
348/// Contains both direct vector matches and graph-traversal matches, plus the
349/// aggregated `results` list that merges both for callers that do not need
350/// to distinguish the source.
351#[derive(Serialize)]
352pub struct RecallResponse {
353 pub query: String,
354 pub k: usize,
355 pub direct_matches: Vec<RecallItem>,
356 pub graph_matches: Vec<RecallItem>,
357 /// Aggregated alias of `direct_matches` + `graph_matches` for the contract documented in SKILL.md.
358 pub results: Vec<RecallItem>,
359 /// Total execution time in milliseconds from handler start to serialisation.
360 pub elapsed_ms: u64,
361 /// G58 (v1.0.80): `true` when the live query embedding failed and the
362 /// handler fell back to FTS5 BM25 + LIKE prefix. Symmetric to
363 /// `fts_degraded` in `hybrid-search`. Absent on the wire when false.
364 #[serde(skip_serializing_if = "std::ops::Not::not", default)]
365 pub vec_degraded: bool,
366 /// G58 (v1.0.80): human-readable description of the embedding failure
367 /// that triggered the fallback. Absent on the wire when `vec_degraded`
368 /// is false or the failure had no message.
369 #[serde(skip_serializing_if = "std::option::Option::is_none")]
370 pub vec_error: Option<String>,
371 /// G58 (v1.0.80): advisory warning echoed for callers that branch on
372 /// top-level status. Distinguishes a FTS5-only fallback from a clean
373 /// hybrid response so downstream pipelines can lower their confidence.
374 #[serde(skip_serializing_if = "std::option::Option::is_none")]
375 pub warning: Option<String>,
376 /// v1.0.84 (ADR-0042): discriminador do backend LLM que efetivamente
377 /// executou o embedding live. `"claude" | "codex" | "none"`. Absent
378 /// on the wire when `None` (kept for happy-path envelope cleanliness).
379 #[serde(skip_serializing_if = "std::option::Option::is_none")]
380 pub backend_invoked: Option<&'static str>,
381 /// v1.0.84 (ADR-0042): reason code discriminador de degradação
382 /// (`"embedding_failed" | "cancelled" | "timeout"`). Absent when
383 /// `vec_degraded` is false.
384 #[serde(skip_serializing_if = "std::option::Option::is_none")]
385 pub vec_degraded_reason: Option<String>,
386}
387
388#[cfg(test)]
389mod tests {
390 use super::*;
391 use serde::Serialize;
392
393 #[derive(Serialize)]
394 struct Dummy {
395 val: u32,
396 }
397
398 // Non-serializable type to force a JSON serialization error
399 struct NotSerializable;
400 impl Serialize for NotSerializable {
401 fn serialize<S: serde::Serializer>(&self, _: S) -> Result<S::Ok, S::Error> {
402 Err(serde::ser::Error::custom(
403 "intentional serialization failure",
404 ))
405 }
406 }
407
408 #[test]
409 fn emit_json_returns_ok_for_valid_value() {
410 let v = Dummy { val: 42 };
411 assert!(emit_json(&v).is_ok());
412 }
413
414 #[test]
415 fn emit_json_returns_err_for_non_serializable_value() {
416 let v = NotSerializable;
417 assert!(emit_json(&v).is_err());
418 }
419
420 #[test]
421 fn emit_json_compact_returns_ok_for_valid_value() {
422 let v = Dummy { val: 7 };
423 assert!(emit_json_compact(&v).is_ok());
424 }
425
426 #[test]
427 fn emit_json_compact_returns_err_for_non_serializable_value() {
428 let v = NotSerializable;
429 assert!(emit_json_compact(&v).is_err());
430 }
431
432 #[test]
433 fn emit_text_does_not_panic() {
434 emit_text("mensagem de teste");
435 }
436
437 #[test]
438 fn emit_progress_does_not_panic() {
439 emit_progress("progresso de teste");
440 }
441
442 #[test]
443 fn remember_response_serializes_correctly() {
444 let r = RememberResponse {
445 memory_id: 1,
446 name: "teste".to_string(),
447 namespace: "ns".to_string(),
448 action: "created".to_string(),
449 operation: "created".to_string(),
450 version: 1,
451 entities_persisted: 2,
452 relationships_persisted: 3,
453 relationships_truncated: false,
454 chunks_created: 4,
455 chunks_persisted: 4,
456 urls_persisted: 2,
457 extraction_method: None,
458 merged_into_memory_id: None,
459 warnings: vec!["aviso".to_string()],
460 created_at: 1776569715,
461 created_at_iso: "2026-04-19T03:34:15Z".to_string(),
462 elapsed_ms: 123,
463 name_was_normalized: false,
464 original_name: None,
465 backend_invoked: None,
466 };
467 let json = serde_json::to_string(&r).unwrap();
468 assert!(json.contains("memory_id"));
469 assert!(json.contains("aviso"));
470 assert!(json.contains("\"namespace\""));
471 assert!(json.contains("\"merged_into_memory_id\""));
472 assert!(json.contains("\"operation\""));
473 assert!(json.contains("\"created_at\""));
474 assert!(json.contains("\"created_at_iso\""));
475 assert!(json.contains("\"elapsed_ms\""));
476 assert!(json.contains("\"urls_persisted\""));
477 assert!(json.contains("\"relationships_truncated\":false"));
478 }
479
480 #[test]
481 fn recall_item_serializes_renamed_type_field() {
482 let item = RecallItem {
483 memory_id: 10,
484 name: "entidade".to_string(),
485 namespace: "ns".to_string(),
486 memory_type: "entity".to_string(),
487 description: "desc".to_string(),
488 snippet: "trecho".to_string(),
489 distance: 0.5,
490 score: RecallItem::score_from_distance(0.5),
491 source: "db".to_string(),
492 graph_depth: None,
493 };
494 let json = serde_json::to_string(&item).unwrap();
495 assert!(json.contains("\"type\""));
496 assert!(!json.contains("memory_type"));
497 // Field is omitted from JSON when None.
498 assert!(!json.contains("graph_depth"));
499 assert!(json.contains("\"score\":0.5"));
500 }
501
502 #[test]
503 fn recall_response_serializes_with_lists() {
504 let resp = RecallResponse {
505 query: "busca".to_string(),
506 k: 10,
507 direct_matches: vec![],
508 graph_matches: vec![],
509 results: vec![],
510 elapsed_ms: 42,
511 vec_degraded: false,
512 vec_error: None,
513 warning: None,
514 backend_invoked: None,
515 vec_degraded_reason: None,
516 };
517 let json = serde_json::to_string(&resp).unwrap();
518 assert!(json.contains("direct_matches"));
519 assert!(json.contains("graph_matches"));
520 assert!(json.contains("\"k\":"));
521 assert!(json.contains("\"results\""));
522 assert!(json.contains("\"elapsed_ms\""));
523 // G58: clean response must NOT carry the degradation fields.
524 assert!(!json.contains("vec_degraded"));
525 assert!(!json.contains("vec_error"));
526 assert!(!json.contains("warning"));
527 }
528
529 #[test]
530 fn recall_response_serializes_vec_degraded_when_fallback_fired() {
531 let resp = RecallResponse {
532 query: "busca".to_string(),
533 k: 10,
534 direct_matches: vec![],
535 graph_matches: vec![],
536 results: vec![],
537 elapsed_ms: 42,
538 vec_degraded: true,
539 vec_error: Some("embedding cancelled by external signal".to_string()),
540 warning: Some("live query embedding unavailable; results are FTS5 BM25 only (semantic relevance reduced)".to_string()),
541 backend_invoked: None,
542 vec_degraded_reason: Some("embedding cancelled by external signal".to_string()),
543 };
544 let json = serde_json::to_string(&resp).unwrap();
545 assert!(json.contains("\"vec_degraded\":true"));
546 assert!(json.contains("\"vec_error\":\"embedding cancelled by external signal\""));
547 assert!(json.contains("\"warning\":\"live query embedding unavailable"));
548 }
549
550 #[test]
551 fn error_envelope_serializes_correctly() {
552 #[derive(serde::Serialize)]
553 struct ErrorEnvelope<'a> {
554 error: bool,
555 code: i32,
556 message: &'a str,
557 }
558 let envelope = ErrorEnvelope {
559 error: true,
560 code: 10,
561 message: "database disk image is malformed",
562 };
563 let json = serde_json::to_value(&envelope).unwrap();
564 assert_eq!(json["error"], true);
565 assert_eq!(json["code"], 10);
566 assert_eq!(json["message"], "database disk image is malformed");
567 }
568
569 #[test]
570 fn output_format_default_is_json() {
571 let fmt = OutputFormat::default();
572 assert!(matches!(fmt, OutputFormat::Json));
573 }
574
575 #[test]
576 fn output_format_variants_exist() {
577 let _text = OutputFormat::Text;
578 let _md = OutputFormat::Markdown;
579 let _json = OutputFormat::Json;
580 }
581
582 #[test]
583 fn recall_item_clone_produces_equal_value() {
584 let item = RecallItem {
585 memory_id: 99,
586 name: "clone".to_string(),
587 namespace: "ns".to_string(),
588 memory_type: "relation".to_string(),
589 description: "d".to_string(),
590 snippet: "s".to_string(),
591 distance: 0.1,
592 score: RecallItem::score_from_distance(0.1),
593 source: "src".to_string(),
594 graph_depth: Some(2),
595 };
596 let cloned = item.clone();
597 assert_eq!(cloned.memory_id, item.memory_id);
598 assert_eq!(cloned.name, item.name);
599 assert_eq!(cloned.graph_depth, Some(2));
600 }
601}