Skip to main content

sqlite_graphrag/
output.rs

1//! Single point of terminal I/O for the CLI (stdout JSON, stderr human).
2//!
3//! All user-visible output must go through this module; direct `println!` in
4//! other modules is forbidden.
5
6use crate::errors::AppError;
7use serde::Serialize;
8
9/// Output format variants accepted by `--format` CLI flags.
10#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
11pub enum OutputFormat {
12    #[default]
13    Json,
14    Text,
15    Markdown,
16}
17
18/// Restricted JSON-only format for commands that always emit JSON.
19#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
20pub enum JsonOutputFormat {
21    #[default]
22    Json,
23}
24
25/// Serializes `value` as pretty-printed JSON and writes it to stdout with a trailing newline.
26///
27/// Flushes stdout after writing. A `BrokenPipe` error is silenced so that
28/// piping to consumers that close early (e.g. `head`) does not surface an error.
29///
30/// # Errors
31/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
32#[inline]
33pub fn emit_json<T: Serialize>(value: &T) -> Result<(), AppError> {
34    let json = serde_json::to_string_pretty(value)?;
35    let mut out = std::io::stdout().lock();
36    if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
37        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
38        .and_then(|()| std::io::Write::flush(&mut out))
39    {
40        if e.kind() == std::io::ErrorKind::BrokenPipe {
41            return Ok(());
42        }
43        return Err(AppError::Io(e));
44    }
45    Ok(())
46}
47
48/// Serializes `value` as compact (single-line) JSON and writes it to stdout with a trailing newline.
49///
50/// Flushes stdout after writing. A `BrokenPipe` error is silenced.
51///
52/// # Errors
53/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
54#[inline]
55pub fn emit_json_compact<T: Serialize>(value: &T) -> Result<(), AppError> {
56    let json = serde_json::to_string(value)?;
57    let mut out = std::io::stdout().lock();
58    if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
59        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
60        .and_then(|()| std::io::Write::flush(&mut out))
61    {
62        if e.kind() == std::io::ErrorKind::BrokenPipe {
63            return Ok(());
64        }
65        return Err(AppError::Io(e));
66    }
67    Ok(())
68}
69
70/// Writes compact JSON to stdout, silently ignoring serialization and I/O errors.
71/// Designed for NDJSON streaming where partial output is acceptable.
72#[inline]
73pub fn emit_json_line<T: Serialize>(value: &T) {
74    if let Ok(json) = serde_json::to_string(value) {
75        let mut out = std::io::stdout().lock();
76        let _ = std::io::Write::write_all(&mut out, json.as_bytes());
77        let _ = std::io::Write::write_all(&mut out, b"\n");
78        let _ = std::io::Write::flush(&mut out);
79    }
80}
81
82/// Writes `msg` followed by a newline to stdout and flushes.
83///
84/// A `BrokenPipe` error is silenced gracefully.
85#[inline]
86pub fn emit_text(msg: &str) {
87    let mut out = std::io::stdout().lock();
88    let _ = std::io::Write::write_all(&mut out, msg.as_bytes())
89        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
90        .and_then(|()| std::io::Write::flush(&mut out));
91}
92
93/// Logs `msg` as a structured `tracing::info!` event (does not write to stdout).
94/// v1.0.89: suppressed when stderr is not a terminal (pipe) to avoid
95/// polluting JSON pipelines when the user redirects stderr with `2>&1`.
96#[inline]
97pub fn emit_progress(msg: &str) {
98    if std::io::IsTerminal::is_terminal(&std::io::stderr()) {
99        tracing::info!(target: "output", message = msg);
100    }
101}
102
103/// Emits a bilingual progress message honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
104/// v1.0.89: suppressed when stderr is not a terminal (pipe).
105pub fn emit_progress_i18n(en: &str, pt: &str) {
106    if !std::io::IsTerminal::is_terminal(&std::io::stderr()) {
107        return;
108    }
109    use crate::i18n::{current, Language};
110    match current() {
111        Language::English => tracing::info!(target: "output", message = en),
112        Language::Portuguese => tracing::info!(target: "output", message = pt),
113    }
114}
115
116/// Emits a JSON error envelope to stdout for machine consumers.
117///
118/// Ensures the stdout JSON contract is honoured even on error paths:
119/// `{"error": true, "code": <exit_code>, "message": "<localized_msg>"}`.
120/// A `BrokenPipe` error is silenced so piping to early-closing consumers
121/// does not surface a secondary error.
122#[cold]
123#[inline(never)]
124pub fn emit_error_json(code: i32, message: &str) {
125    #[derive(serde::Serialize)]
126    struct ErrorEnvelope<'a> {
127        error: bool,
128        code: i32,
129        message: &'a str,
130    }
131    let envelope = ErrorEnvelope {
132        error: true,
133        code,
134        message,
135    };
136    if emit_json(&envelope).is_err() {
137        use std::io::Write;
138        let escaped = message.replace('\\', "\\\\").replace('"', "\\\"");
139        let _ = writeln!(
140            std::io::stdout().lock(),
141            r#"{{"error":true,"code":{code},"message":"{escaped}"}}"#
142        );
143    }
144}
145
146/// Emits a localised error message to stderr via the `tracing` subscriber.
147///
148/// ADR-0047 / BUG-12 v1.0.88: prior implementation also called `eprintln!`
149/// which produced a SECOND stderr line (Error:/Erro: prefix) for the same
150/// error, on top of the structured `tracing::error!` line. Operators and
151/// log parsers observed duplicated stderr lines.
152///
153/// The tracing subscriber is configured for stderr at `main.rs:115`, so a
154/// single `tracing::error!` call already produces the human-readable line.
155/// Callers that want a plain stderr line without tracing (e.g. one-shot
156/// scripts) should use `eprintln!` directly instead of this helper.
157///
158/// Centralises human-readable error output following Pattern 5 (`output.rs` is
159/// the SOLE I/O point of the CLI).
160#[cold]
161#[inline(never)]
162pub fn emit_error(localized_msg: &str) {
163    tracing::error!(target: "output", message = localized_msg);
164}
165
166/// Emits a bilingual error to stderr honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
167/// Usage: `output::emit_error_i18n("invariant violated", "invariante violado")`.
168#[cold]
169#[inline(never)]
170pub fn emit_error_i18n(en: &str, pt: &str) {
171    use crate::i18n::{current, Language};
172    let msg = match current() {
173        Language::English => en,
174        Language::Portuguese => pt,
175    };
176    emit_error(msg);
177}
178
179/// JSON payload emitted by the `remember` subcommand.
180///
181/// All fields are required by the JSON contract (see `docs/schemas/remember.schema.json`).
182/// `operation` is an alias of `action` for compatibility with clients using the old field name.
183///
184/// # Examples
185///
186/// ```
187/// use sqlite_graphrag::output::RememberResponse;
188///
189/// let resp = RememberResponse {
190///     memory_id: 1,
191///     name: "nota-inicial".into(),
192///     namespace: "global".into(),
193///     action: "created".into(),
194///     operation: "created".into(),
195///     version: 1,
196///     entities_persisted: 0,
197///     relationships_persisted: 0,
198///     relationships_truncated: false,
199///     chunks_created: 1,
200///     chunks_persisted: 0,
201///     urls_persisted: 0,
202///     extraction_method: None,
203///     merged_into_memory_id: None,
204///     warnings: vec![],
205///     created_at: 1_700_000_000,
206///     created_at_iso: "2023-11-14T22:13:20Z".into(),
207///     elapsed_ms: 42,
208///     name_was_normalized: false,
209///     original_name: None,
210///     backend_invoked: None,
211/// };
212///
213/// let json = serde_json::to_string(&resp).unwrap();
214/// assert!(json.contains("\"memory_id\":1"));
215/// assert!(json.contains("\"elapsed_ms\":42"));
216/// assert!(json.contains("\"merged_into_memory_id\":null"));
217/// assert!(json.contains("\"urls_persisted\":0"));
218/// assert!(json.contains("\"relationships_truncated\":false"));
219/// ```
220#[derive(Serialize)]
221pub struct RememberResponse {
222    pub memory_id: i64,
223    pub name: String,
224    pub namespace: String,
225    pub action: String,
226    /// Semantic alias of `action` for compatibility with the contract documented in SKILL.md.
227    pub operation: String,
228    pub version: i64,
229    pub entities_persisted: usize,
230    pub relationships_persisted: usize,
231    /// True when the relationship builder hit the cap before covering all entity pairs.
232    /// Callers can use this to decide whether to increase GRAPHRAG_MAX_RELATIONSHIPS_PER_MEMORY.
233    pub relationships_truncated: bool,
234    /// Total number of chunks the body was split into BEFORE dedup.
235    ///
236    /// For single-chunk bodies this equals 1 even though no row is added to
237    /// the `memory_chunks` table — the memory row itself acts as the chunk.
238    /// Use `chunks_persisted` to know how many rows were actually written.
239    pub chunks_created: usize,
240    /// Number of chunks actually written to chunks/embeddings tables. Always <= chunks_created.
241    ///
242    /// Equal when no chunk had identical normalized text already in DB; less when dedup skipped
243    /// some. Equals zero for single-chunk bodies (the memory row is the chunk) and equals
244    /// `chunks_created` for multi-chunk bodies. Added in v1.0.23 to disambiguate from
245    /// `chunks_created` and reflect database state precisely.
246    pub chunks_persisted: usize,
247    /// Number of unique URLs inserted into `memory_urls` for this memory.
248    /// Added in v1.0.24 — split URLs out of the entity graph (P0-2 fix).
249    #[serde(default)]
250    pub urls_persisted: usize,
251    /// Extraction method used: "gliner-{variant}+regex" or "regex-only". None when NER is not enabled.
252    #[serde(skip_serializing_if = "Option::is_none")]
253    pub extraction_method: Option<String>,
254    pub merged_into_memory_id: Option<i64>,
255    pub warnings: Vec<String>,
256    /// Timestamp Unix epoch seconds.
257    pub created_at: i64,
258    /// RFC 3339 UTC timestamp string parallel to `created_at` for ISO 8601 parsers.
259    pub created_at_iso: String,
260    /// Total execution time in milliseconds from handler start to serialisation.
261    pub elapsed_ms: u64,
262    /// True when the user-supplied `--name` differed from the persisted slug
263    /// (i.e. kebab-case normalization changed the value). Added in v1.0.32 so
264    /// callers can detect normalization without parsing stderr WARN logs.
265    #[serde(default)]
266    pub name_was_normalized: bool,
267    /// Original user-supplied `--name` value before normalization.
268    /// Present only when `name_was_normalized == true`; omitted otherwise to
269    /// keep the common (already-kebab) payload small.
270    #[serde(skip_serializing_if = "Option::is_none")]
271    pub original_name: Option<String>,
272    /// v1.0.84 (ADR-0042): discriminador do backend LLM que efetivamente
273    /// executou o embedding da passagem. `"claude" | "codex" | "none"`.
274    /// Absent on the wire when `None` (kept for happy-path envelope cleanliness).
275    #[serde(skip_serializing_if = "Option::is_none")]
276    pub backend_invoked: Option<&'static str>,
277}
278
279/// Individual item returned by the `recall` query.
280///
281/// The `memory_type` field is serialised as `"type"` in JSON to maintain
282/// compatibility with external clients — the Rust name uses `memory_type`
283/// to avoid conflict with the reserved keyword.
284///
285/// # Examples
286///
287/// ```
288/// use sqlite_graphrag::output::RecallItem;
289///
290/// let item = RecallItem {
291///     memory_id: 7,
292///     name: "nota-rust".into(),
293///     namespace: "global".into(),
294///     memory_type: "user".into(),
295///     description: "aprendizado de Rust".into(),
296///     snippet: "ownership e borrowing".into(),
297///     distance: 0.12,
298///     score: 0.88,
299///     source: "direct".into(),
300///     graph_depth: None,
301/// };
302///
303/// let json = serde_json::to_string(&item).unwrap();
304/// // Rust field `memory_type` appears as `"type"` in JSON.
305/// assert!(json.contains("\"type\":\"user\""));
306/// assert!(!json.contains("memory_type"));
307/// assert!(json.contains("\"distance\":0.12"));
308/// ```
309#[derive(Serialize, Clone)]
310pub struct RecallItem {
311    pub memory_id: i64,
312    pub name: String,
313    pub namespace: String,
314    #[serde(rename = "type")]
315    pub memory_type: String,
316    pub description: String,
317    pub snippet: String,
318    pub distance: f32,
319    /// Cosine similarity in `[0.0, 1.0]` derived as `1.0 - distance` and clamped
320    /// to that interval. Always populated to satisfy the documented contract
321    /// (M-A5 in v1.0.40); higher means more similar. For graph hits the value
322    /// reflects the hop-derived distance proxy and should be interpreted
323    /// alongside `graph_depth` rather than as a true cosine score.
324    pub score: f32,
325    pub source: String,
326    /// Number of graph hops between this match and the seed memories.
327    ///
328    /// Set to `None` for direct vector matches (where `distance` is meaningful)
329    /// and to `Some(N)` for traversal results, with `N=0` when the depth could
330    /// not be tracked precisely. Added in v1.0.23 to disambiguate graph results
331    /// from the `distance: 0.0` placeholder previously used for graph entries.
332    /// Field is omitted from JSON output when `None`.
333    #[serde(skip_serializing_if = "Option::is_none")]
334    pub graph_depth: Option<u32>,
335}
336
337impl RecallItem {
338    /// Computes the similarity score from a vector distance, clamped to
339    /// `[0.0, 1.0]`. Cosine distance returned by sqlite-vec lives in `[0, 2]`
340    /// in theory but the embedder produces unit-norm vectors so the practical
341    /// range is `[0, 1]`. Centralized so every constructor keeps the contract.
342    #[inline]
343    pub fn score_from_distance(distance: f32) -> f32 {
344        let raw = 1.0 - distance;
345        if raw.is_nan() {
346            0.0
347        } else {
348            raw.clamp(0.0, 1.0)
349        }
350    }
351}
352
353/// Full response envelope returned by the `recall` subcommand.
354///
355/// Contains both direct vector matches and graph-traversal matches, plus the
356/// aggregated `results` list that merges both for callers that do not need
357/// to distinguish the source.
358#[derive(Serialize)]
359pub struct RecallResponse {
360    pub query: String,
361    pub k: usize,
362    pub direct_matches: Vec<RecallItem>,
363    pub graph_matches: Vec<RecallItem>,
364    /// Aggregated alias of `direct_matches` + `graph_matches` for the contract documented in SKILL.md.
365    pub results: Vec<RecallItem>,
366    /// Total execution time in milliseconds from handler start to serialisation.
367    pub elapsed_ms: u64,
368    /// G58 (v1.0.80): `true` when the live query embedding failed and the
369    /// handler fell back to FTS5 BM25 + LIKE prefix. Symmetric to
370    /// `fts_degraded` in `hybrid-search`. Absent on the wire when false.
371    #[serde(skip_serializing_if = "std::ops::Not::not", default)]
372    pub vec_degraded: bool,
373    /// G58 (v1.0.80): human-readable description of the embedding failure
374    /// that triggered the fallback. Absent on the wire when `vec_degraded`
375    /// is false or the failure had no message.
376    #[serde(skip_serializing_if = "std::option::Option::is_none")]
377    pub vec_error: Option<String>,
378    /// G58 (v1.0.80): advisory warning echoed for callers that branch on
379    /// top-level status. Distinguishes a FTS5-only fallback from a clean
380    /// hybrid response so downstream pipelines can lower their confidence.
381    #[serde(skip_serializing_if = "std::option::Option::is_none")]
382    pub warning: Option<String>,
383    /// v1.0.84 (ADR-0042): discriminador do backend LLM que efetivamente
384    /// executou o embedding live. `"claude" | "codex" | "none"`. Absent
385    /// on the wire when `None` (kept for happy-path envelope cleanliness).
386    #[serde(skip_serializing_if = "std::option::Option::is_none")]
387    pub backend_invoked: Option<&'static str>,
388    /// v1.0.84 (ADR-0042): reason code discriminador de degradação
389    /// (`"embedding_failed" | "cancelled" | "timeout"`). Absent when
390    /// `vec_degraded` is false.
391    #[serde(skip_serializing_if = "std::option::Option::is_none")]
392    pub vec_degraded_reason: Option<String>,
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398    use serde::Serialize;
399
400    #[derive(Serialize)]
401    struct Dummy {
402        val: u32,
403    }
404
405    // Non-serializable type to force a JSON serialization error
406    struct NotSerializable;
407    impl Serialize for NotSerializable {
408        fn serialize<S: serde::Serializer>(&self, _: S) -> Result<S::Ok, S::Error> {
409            Err(serde::ser::Error::custom(
410                "intentional serialization failure",
411            ))
412        }
413    }
414
415    #[test]
416    fn emit_json_returns_ok_for_valid_value() {
417        let v = Dummy { val: 42 };
418        assert!(emit_json(&v).is_ok());
419    }
420
421    #[test]
422    fn emit_json_returns_err_for_non_serializable_value() {
423        let v = NotSerializable;
424        assert!(emit_json(&v).is_err());
425    }
426
427    #[test]
428    fn emit_json_compact_returns_ok_for_valid_value() {
429        let v = Dummy { val: 7 };
430        assert!(emit_json_compact(&v).is_ok());
431    }
432
433    #[test]
434    fn emit_json_compact_returns_err_for_non_serializable_value() {
435        let v = NotSerializable;
436        assert!(emit_json_compact(&v).is_err());
437    }
438
439    #[test]
440    fn emit_text_does_not_panic() {
441        emit_text("mensagem de teste");
442    }
443
444    #[test]
445    fn emit_progress_does_not_panic() {
446        emit_progress("progresso de teste");
447    }
448
449    #[test]
450    fn remember_response_serializes_correctly() {
451        let r = RememberResponse {
452            memory_id: 1,
453            name: "teste".to_string(),
454            namespace: "ns".to_string(),
455            action: "created".to_string(),
456            operation: "created".to_string(),
457            version: 1,
458            entities_persisted: 2,
459            relationships_persisted: 3,
460            relationships_truncated: false,
461            chunks_created: 4,
462            chunks_persisted: 4,
463            urls_persisted: 2,
464            extraction_method: None,
465            merged_into_memory_id: None,
466            warnings: vec!["aviso".to_string()],
467            created_at: 1776569715,
468            created_at_iso: "2026-04-19T03:34:15Z".to_string(),
469            elapsed_ms: 123,
470            name_was_normalized: false,
471            original_name: None,
472            backend_invoked: None,
473        };
474        let json = serde_json::to_string(&r).unwrap();
475        assert!(json.contains("memory_id"));
476        assert!(json.contains("aviso"));
477        assert!(json.contains("\"namespace\""));
478        assert!(json.contains("\"merged_into_memory_id\""));
479        assert!(json.contains("\"operation\""));
480        assert!(json.contains("\"created_at\""));
481        assert!(json.contains("\"created_at_iso\""));
482        assert!(json.contains("\"elapsed_ms\""));
483        assert!(json.contains("\"urls_persisted\""));
484        assert!(json.contains("\"relationships_truncated\":false"));
485    }
486
487    #[test]
488    fn recall_item_serializes_renamed_type_field() {
489        let item = RecallItem {
490            memory_id: 10,
491            name: "entidade".to_string(),
492            namespace: "ns".to_string(),
493            memory_type: "entity".to_string(),
494            description: "desc".to_string(),
495            snippet: "trecho".to_string(),
496            distance: 0.5,
497            score: RecallItem::score_from_distance(0.5),
498            source: "db".to_string(),
499            graph_depth: None,
500        };
501        let json = serde_json::to_string(&item).unwrap();
502        assert!(json.contains("\"type\""));
503        assert!(!json.contains("memory_type"));
504        // Field is omitted from JSON when None.
505        assert!(!json.contains("graph_depth"));
506        assert!(json.contains("\"score\":0.5"));
507    }
508
509    #[test]
510    fn recall_response_serializes_with_lists() {
511        let resp = RecallResponse {
512            query: "busca".to_string(),
513            k: 10,
514            direct_matches: vec![],
515            graph_matches: vec![],
516            results: vec![],
517            elapsed_ms: 42,
518            vec_degraded: false,
519            vec_error: None,
520            warning: None,
521            backend_invoked: None,
522            vec_degraded_reason: None,
523        };
524        let json = serde_json::to_string(&resp).unwrap();
525        assert!(json.contains("direct_matches"));
526        assert!(json.contains("graph_matches"));
527        assert!(json.contains("\"k\":"));
528        assert!(json.contains("\"results\""));
529        assert!(json.contains("\"elapsed_ms\""));
530        // G58: clean response must NOT carry the degradation fields.
531        assert!(!json.contains("vec_degraded"));
532        assert!(!json.contains("vec_error"));
533        assert!(!json.contains("warning"));
534    }
535
536    #[test]
537    fn recall_response_serializes_vec_degraded_when_fallback_fired() {
538        let resp = RecallResponse {
539            query: "busca".to_string(),
540            k: 10,
541            direct_matches: vec![],
542            graph_matches: vec![],
543            results: vec![],
544            elapsed_ms: 42,
545            vec_degraded: true,
546            vec_error: Some("embedding cancelled by external signal".to_string()),
547            warning: Some("live query embedding unavailable; results are FTS5 BM25 only (semantic relevance reduced)".to_string()),
548            backend_invoked: None,
549            vec_degraded_reason: Some("embedding cancelled by external signal".to_string()),
550        };
551        let json = serde_json::to_string(&resp).unwrap();
552        assert!(json.contains("\"vec_degraded\":true"));
553        assert!(json.contains("\"vec_error\":\"embedding cancelled by external signal\""));
554        assert!(json.contains("\"warning\":\"live query embedding unavailable"));
555    }
556
557    #[test]
558    fn error_envelope_serializes_correctly() {
559        #[derive(serde::Serialize)]
560        struct ErrorEnvelope<'a> {
561            error: bool,
562            code: i32,
563            message: &'a str,
564        }
565        let envelope = ErrorEnvelope {
566            error: true,
567            code: 10,
568            message: "database disk image is malformed",
569        };
570        let json = serde_json::to_value(&envelope).unwrap();
571        assert_eq!(json["error"], true);
572        assert_eq!(json["code"], 10);
573        assert_eq!(json["message"], "database disk image is malformed");
574    }
575
576    #[test]
577    fn output_format_default_is_json() {
578        let fmt = OutputFormat::default();
579        assert!(matches!(fmt, OutputFormat::Json));
580    }
581
582    #[test]
583    fn output_format_variants_exist() {
584        let _text = OutputFormat::Text;
585        let _md = OutputFormat::Markdown;
586        let _json = OutputFormat::Json;
587    }
588
589    #[test]
590    fn recall_item_clone_produces_equal_value() {
591        let item = RecallItem {
592            memory_id: 99,
593            name: "clone".to_string(),
594            namespace: "ns".to_string(),
595            memory_type: "relation".to_string(),
596            description: "d".to_string(),
597            snippet: "s".to_string(),
598            distance: 0.1,
599            score: RecallItem::score_from_distance(0.1),
600            source: "src".to_string(),
601            graph_depth: Some(2),
602        };
603        let cloned = item.clone();
604        assert_eq!(cloned.memory_id, item.memory_id);
605        assert_eq!(cloned.name, item.name);
606        assert_eq!(cloned.graph_depth, Some(2));
607    }
608}