Skip to main content

sqlite_graphrag/
output.rs

1//! Single point of terminal I/O for the CLI (stdout JSON, stderr human).
2//!
3//! All user-visible output must go through this module; direct `println!` in
4//! other modules is forbidden.
5
6use crate::errors::AppError;
7use serde::Serialize;
8
9/// Output format variants accepted by `--format` CLI flags.
10#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
11pub enum OutputFormat {
12    #[default]
13    Json,
14    Text,
15    Markdown,
16}
17
18/// Restricted JSON-only format for commands that always emit JSON.
19#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
20pub enum JsonOutputFormat {
21    #[default]
22    Json,
23}
24
25/// Serializes `value` as pretty-printed JSON and writes it to stdout with a trailing newline.
26///
27/// Flushes stdout after writing. A `BrokenPipe` error is silenced so that
28/// piping to consumers that close early (e.g. `head`) does not surface an error.
29///
30/// # Errors
31/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
32pub fn emit_json<T: Serialize>(value: &T) -> Result<(), AppError> {
33    let json = serde_json::to_string_pretty(value)?;
34    let mut out = std::io::stdout().lock();
35    if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
36        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
37        .and_then(|()| std::io::Write::flush(&mut out))
38    {
39        if e.kind() == std::io::ErrorKind::BrokenPipe {
40            return Ok(());
41        }
42        return Err(AppError::Io(e));
43    }
44    Ok(())
45}
46
47/// Serializes `value` as compact (single-line) JSON and writes it to stdout with a trailing newline.
48///
49/// Flushes stdout after writing. A `BrokenPipe` error is silenced.
50///
51/// # Errors
52/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
53pub fn emit_json_compact<T: Serialize>(value: &T) -> Result<(), AppError> {
54    let json = serde_json::to_string(value)?;
55    let mut out = std::io::stdout().lock();
56    if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
57        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
58        .and_then(|()| std::io::Write::flush(&mut out))
59    {
60        if e.kind() == std::io::ErrorKind::BrokenPipe {
61            return Ok(());
62        }
63        return Err(AppError::Io(e));
64    }
65    Ok(())
66}
67
68/// Writes `msg` followed by a newline to stdout and flushes.
69///
70/// A `BrokenPipe` error is silenced gracefully.
71pub fn emit_text(msg: &str) {
72    let mut out = std::io::stdout().lock();
73    let _ = std::io::Write::write_all(&mut out, msg.as_bytes())
74        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
75        .and_then(|()| std::io::Write::flush(&mut out));
76}
77
78/// Logs `msg` as a structured `tracing::info!` event (does not write to stdout).
79pub fn emit_progress(msg: &str) {
80    tracing::info!(message = msg);
81}
82
83/// Emits a bilingual progress message honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
84/// Usage: `output::emit_progress_i18n("Computing embedding...", "Calculando embedding...")`.
85pub fn emit_progress_i18n(en: &str, pt: &str) {
86    use crate::i18n::{current, Language};
87    match current() {
88        Language::English => tracing::info!(message = en),
89        Language::Portuguese => tracing::info!(message = pt),
90    }
91}
92
93/// Emits a JSON error envelope to stdout for machine consumers.
94///
95/// Ensures the stdout JSON contract is honoured even on error paths:
96/// `{"error": true, "code": <exit_code>, "message": "<localized_msg>"}`.
97/// A `BrokenPipe` error is silenced so piping to early-closing consumers
98/// does not surface a secondary error.
99pub fn emit_error_json(code: i32, message: &str) {
100    #[derive(serde::Serialize)]
101    struct ErrorEnvelope<'a> {
102        error: bool,
103        code: i32,
104        message: &'a str,
105    }
106    let envelope = ErrorEnvelope {
107        error: true,
108        code,
109        message,
110    };
111    if emit_json(&envelope).is_err() {
112        use std::io::Write;
113        let escaped = message.replace('\\', "\\\\").replace('"', "\\\"");
114        let _ = writeln!(
115            std::io::stdout().lock(),
116            r#"{{"error":true,"code":{code},"message":"{escaped}"}}"#
117        );
118    }
119}
120
121/// Emits a localised error message to stderr with the `Error:`/`Erro:` prefix.
122///
123/// Centralises human-readable error output following Pattern 5 (`output.rs` is the
124/// SOLE I/O point of the CLI). Does not log via `tracing` — call `tracing::error!`
125/// explicitly before this function when structured observability is desired.
126pub fn emit_error(localized_msg: &str) {
127    eprintln!("{}: {}", crate::i18n::error_prefix(), localized_msg);
128}
129
130/// Emits a bilingual error to stderr honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
131/// Usage: `output::emit_error_i18n("invariant violated", "invariante violado")`.
132pub fn emit_error_i18n(en: &str, pt: &str) {
133    use crate::i18n::{current, Language};
134    let msg = match current() {
135        Language::English => en,
136        Language::Portuguese => pt,
137    };
138    emit_error(msg);
139}
140
141/// JSON payload emitted by the `remember` subcommand.
142///
143/// All fields are required by the JSON contract (see `docs/schemas/remember.schema.json`).
144/// `operation` is an alias of `action` for compatibility with clients using the old field name.
145///
146/// # Examples
147///
148/// ```
149/// use sqlite_graphrag::output::RememberResponse;
150///
151/// let resp = RememberResponse {
152///     memory_id: 1,
153///     name: "nota-inicial".into(),
154///     namespace: "global".into(),
155///     action: "created".into(),
156///     operation: "created".into(),
157///     version: 1,
158///     entities_persisted: 0,
159///     relationships_persisted: 0,
160///     relationships_truncated: false,
161///     chunks_created: 1,
162///     chunks_persisted: 0,
163///     urls_persisted: 0,
164///     extraction_method: None,
165///     merged_into_memory_id: None,
166///     warnings: vec![],
167///     created_at: 1_700_000_000,
168///     created_at_iso: "2023-11-14T22:13:20Z".into(),
169///     elapsed_ms: 42,
170///     name_was_normalized: false,
171///     original_name: None,
172/// };
173///
174/// let json = serde_json::to_string(&resp).unwrap();
175/// assert!(json.contains("\"memory_id\":1"));
176/// assert!(json.contains("\"elapsed_ms\":42"));
177/// assert!(json.contains("\"merged_into_memory_id\":null"));
178/// assert!(json.contains("\"urls_persisted\":0"));
179/// assert!(json.contains("\"relationships_truncated\":false"));
180/// ```
181#[derive(Serialize)]
182pub struct RememberResponse {
183    pub memory_id: i64,
184    pub name: String,
185    pub namespace: String,
186    pub action: String,
187    /// Semantic alias of `action` for compatibility with the contract documented in SKILL.md and AGENT_PROTOCOL.md.
188    pub operation: String,
189    pub version: i64,
190    pub entities_persisted: usize,
191    pub relationships_persisted: usize,
192    /// True when the relationship builder hit the cap before covering all entity pairs.
193    /// Callers can use this to decide whether to increase GRAPHRAG_MAX_RELATIONSHIPS_PER_MEMORY.
194    pub relationships_truncated: bool,
195    /// Total number of chunks the body was split into BEFORE dedup.
196    ///
197    /// For single-chunk bodies this equals 1 even though no row is added to
198    /// the `memory_chunks` table — the memory row itself acts as the chunk.
199    /// Use `chunks_persisted` to know how many rows were actually written.
200    pub chunks_created: usize,
201    /// Number of chunks actually written to chunks/embeddings tables. Always <= chunks_created.
202    ///
203    /// Equal when no chunk had identical normalized text already in DB; less when dedup skipped
204    /// some. Equals zero for single-chunk bodies (the memory row is the chunk) and equals
205    /// `chunks_created` for multi-chunk bodies. Added in v1.0.23 to disambiguate from
206    /// `chunks_created` and reflect database state precisely.
207    pub chunks_persisted: usize,
208    /// Number of unique URLs inserted into `memory_urls` for this memory.
209    /// Added in v1.0.24 — split URLs out of the entity graph (P0-2 fix).
210    #[serde(default)]
211    pub urls_persisted: usize,
212    /// Extraction method used: "gliner-{variant}+regex" or "regex-only". None when NER is not enabled.
213    #[serde(skip_serializing_if = "Option::is_none")]
214    pub extraction_method: Option<String>,
215    pub merged_into_memory_id: Option<i64>,
216    pub warnings: Vec<String>,
217    /// Timestamp Unix epoch seconds.
218    pub created_at: i64,
219    /// RFC 3339 UTC timestamp string parallel to `created_at` for ISO 8601 parsers.
220    pub created_at_iso: String,
221    /// Total execution time in milliseconds from handler start to serialisation.
222    pub elapsed_ms: u64,
223    /// True when the user-supplied `--name` differed from the persisted slug
224    /// (i.e. kebab-case normalization changed the value). Added in v1.0.32 so
225    /// callers can detect normalization without parsing stderr WARN logs.
226    #[serde(default)]
227    pub name_was_normalized: bool,
228    /// Original user-supplied `--name` value before normalization.
229    /// Present only when `name_was_normalized == true`; omitted otherwise to
230    /// keep the common (already-kebab) payload small.
231    #[serde(skip_serializing_if = "Option::is_none")]
232    pub original_name: Option<String>,
233}
234
235/// Individual item returned by the `recall` query.
236///
237/// The `memory_type` field is serialised as `"type"` in JSON to maintain
238/// compatibility with external clients — the Rust name uses `memory_type`
239/// to avoid conflict with the reserved keyword.
240///
241/// # Examples
242///
243/// ```
244/// use sqlite_graphrag::output::RecallItem;
245///
246/// let item = RecallItem {
247///     memory_id: 7,
248///     name: "nota-rust".into(),
249///     namespace: "global".into(),
250///     memory_type: "user".into(),
251///     description: "aprendizado de Rust".into(),
252///     snippet: "ownership e borrowing".into(),
253///     distance: 0.12,
254///     score: 0.88,
255///     source: "direct".into(),
256///     graph_depth: None,
257/// };
258///
259/// let json = serde_json::to_string(&item).unwrap();
260/// // Rust field `memory_type` appears as `"type"` in JSON.
261/// assert!(json.contains("\"type\":\"user\""));
262/// assert!(!json.contains("memory_type"));
263/// assert!(json.contains("\"distance\":0.12"));
264/// ```
265#[derive(Serialize, Clone)]
266pub struct RecallItem {
267    pub memory_id: i64,
268    pub name: String,
269    pub namespace: String,
270    #[serde(rename = "type")]
271    pub memory_type: String,
272    pub description: String,
273    pub snippet: String,
274    pub distance: f32,
275    /// Cosine similarity in `[0.0, 1.0]` derived as `1.0 - distance` and clamped
276    /// to that interval. Always populated to satisfy the documented contract
277    /// (M-A5 in v1.0.40); higher means more similar. For graph hits the value
278    /// reflects the hop-derived distance proxy and should be interpreted
279    /// alongside `graph_depth` rather than as a true cosine score.
280    pub score: f32,
281    pub source: String,
282    /// Number of graph hops between this match and the seed memories.
283    ///
284    /// Set to `None` for direct vector matches (where `distance` is meaningful)
285    /// and to `Some(N)` for traversal results, with `N=0` when the depth could
286    /// not be tracked precisely. Added in v1.0.23 to disambiguate graph results
287    /// from the `distance: 0.0` placeholder previously used for graph entries.
288    /// Field is omitted from JSON output when `None`.
289    #[serde(skip_serializing_if = "Option::is_none")]
290    pub graph_depth: Option<u32>,
291}
292
293impl RecallItem {
294    /// Computes the similarity score from a vector distance, clamped to
295    /// `[0.0, 1.0]`. Cosine distance returned by sqlite-vec lives in `[0, 2]`
296    /// in theory but the embedder produces unit-norm vectors so the practical
297    /// range is `[0, 1]`. Centralized so every constructor keeps the contract.
298    pub fn score_from_distance(distance: f32) -> f32 {
299        let raw = 1.0 - distance;
300        if raw.is_nan() {
301            0.0
302        } else {
303            raw.clamp(0.0, 1.0)
304        }
305    }
306}
307
308/// Full response envelope returned by the `recall` subcommand.
309///
310/// Contains both direct vector matches and graph-traversal matches, plus the
311/// aggregated `results` list that merges both for callers that do not need
312/// to distinguish the source.
313#[derive(Serialize)]
314pub struct RecallResponse {
315    pub query: String,
316    pub k: usize,
317    pub direct_matches: Vec<RecallItem>,
318    pub graph_matches: Vec<RecallItem>,
319    /// Aggregated alias of `direct_matches` + `graph_matches` for the contract documented in SKILL.md.
320    pub results: Vec<RecallItem>,
321    /// Total execution time in milliseconds from handler start to serialisation.
322    pub elapsed_ms: u64,
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    use serde::Serialize;
329
330    #[derive(Serialize)]
331    struct Dummy {
332        val: u32,
333    }
334
335    // Non-serializable type to force a JSON serialization error
336    struct NotSerializable;
337    impl Serialize for NotSerializable {
338        fn serialize<S: serde::Serializer>(&self, _: S) -> Result<S::Ok, S::Error> {
339            Err(serde::ser::Error::custom(
340                "intentional serialization failure",
341            ))
342        }
343    }
344
345    #[test]
346    fn emit_json_returns_ok_for_valid_value() {
347        let v = Dummy { val: 42 };
348        assert!(emit_json(&v).is_ok());
349    }
350
351    #[test]
352    fn emit_json_returns_err_for_non_serializable_value() {
353        let v = NotSerializable;
354        assert!(emit_json(&v).is_err());
355    }
356
357    #[test]
358    fn emit_json_compact_returns_ok_for_valid_value() {
359        let v = Dummy { val: 7 };
360        assert!(emit_json_compact(&v).is_ok());
361    }
362
363    #[test]
364    fn emit_json_compact_returns_err_for_non_serializable_value() {
365        let v = NotSerializable;
366        assert!(emit_json_compact(&v).is_err());
367    }
368
369    #[test]
370    fn emit_text_does_not_panic() {
371        emit_text("mensagem de teste");
372    }
373
374    #[test]
375    fn emit_progress_does_not_panic() {
376        emit_progress("progresso de teste");
377    }
378
379    #[test]
380    fn remember_response_serializes_correctly() {
381        let r = RememberResponse {
382            memory_id: 1,
383            name: "teste".to_string(),
384            namespace: "ns".to_string(),
385            action: "created".to_string(),
386            operation: "created".to_string(),
387            version: 1,
388            entities_persisted: 2,
389            relationships_persisted: 3,
390            relationships_truncated: false,
391            chunks_created: 4,
392            chunks_persisted: 4,
393            urls_persisted: 2,
394            extraction_method: None,
395            merged_into_memory_id: None,
396            warnings: vec!["aviso".to_string()],
397            created_at: 1776569715,
398            created_at_iso: "2026-04-19T03:34:15Z".to_string(),
399            elapsed_ms: 123,
400            name_was_normalized: false,
401            original_name: None,
402        };
403        let json = serde_json::to_string(&r).unwrap();
404        assert!(json.contains("memory_id"));
405        assert!(json.contains("aviso"));
406        assert!(json.contains("\"namespace\""));
407        assert!(json.contains("\"merged_into_memory_id\""));
408        assert!(json.contains("\"operation\""));
409        assert!(json.contains("\"created_at\""));
410        assert!(json.contains("\"created_at_iso\""));
411        assert!(json.contains("\"elapsed_ms\""));
412        assert!(json.contains("\"urls_persisted\""));
413        assert!(json.contains("\"relationships_truncated\":false"));
414    }
415
416    #[test]
417    fn recall_item_serializes_renamed_type_field() {
418        let item = RecallItem {
419            memory_id: 10,
420            name: "entidade".to_string(),
421            namespace: "ns".to_string(),
422            memory_type: "entity".to_string(),
423            description: "desc".to_string(),
424            snippet: "trecho".to_string(),
425            distance: 0.5,
426            score: RecallItem::score_from_distance(0.5),
427            source: "db".to_string(),
428            graph_depth: None,
429        };
430        let json = serde_json::to_string(&item).unwrap();
431        assert!(json.contains("\"type\""));
432        assert!(!json.contains("memory_type"));
433        // Field is omitted from JSON when None.
434        assert!(!json.contains("graph_depth"));
435        assert!(json.contains("\"score\":0.5"));
436    }
437
438    #[test]
439    fn recall_response_serializes_with_lists() {
440        let resp = RecallResponse {
441            query: "busca".to_string(),
442            k: 10,
443            direct_matches: vec![],
444            graph_matches: vec![],
445            results: vec![],
446            elapsed_ms: 42,
447        };
448        let json = serde_json::to_string(&resp).unwrap();
449        assert!(json.contains("direct_matches"));
450        assert!(json.contains("graph_matches"));
451        assert!(json.contains("\"k\":"));
452        assert!(json.contains("\"results\""));
453        assert!(json.contains("\"elapsed_ms\""));
454    }
455
456    #[test]
457    fn error_envelope_serializes_correctly() {
458        #[derive(serde::Serialize)]
459        struct ErrorEnvelope<'a> {
460            error: bool,
461            code: i32,
462            message: &'a str,
463        }
464        let envelope = ErrorEnvelope {
465            error: true,
466            code: 10,
467            message: "database disk image is malformed",
468        };
469        let json = serde_json::to_value(&envelope).unwrap();
470        assert_eq!(json["error"], true);
471        assert_eq!(json["code"], 10);
472        assert_eq!(json["message"], "database disk image is malformed");
473    }
474
475    #[test]
476    fn output_format_default_is_json() {
477        let fmt = OutputFormat::default();
478        assert!(matches!(fmt, OutputFormat::Json));
479    }
480
481    #[test]
482    fn output_format_variants_exist() {
483        let _text = OutputFormat::Text;
484        let _md = OutputFormat::Markdown;
485        let _json = OutputFormat::Json;
486    }
487
488    #[test]
489    fn recall_item_clone_produces_equal_value() {
490        let item = RecallItem {
491            memory_id: 99,
492            name: "clone".to_string(),
493            namespace: "ns".to_string(),
494            memory_type: "relation".to_string(),
495            description: "d".to_string(),
496            snippet: "s".to_string(),
497            distance: 0.1,
498            score: RecallItem::score_from_distance(0.1),
499            source: "src".to_string(),
500            graph_depth: Some(2),
501        };
502        let cloned = item.clone();
503        assert_eq!(cloned.memory_id, item.memory_id);
504        assert_eq!(cloned.name, item.name);
505        assert_eq!(cloned.graph_depth, Some(2));
506    }
507}