Skip to main content

sqlite_graphrag/
output.rs

1//! Single point of terminal I/O for the CLI (stdout JSON, stderr human).
2//!
3//! All user-visible output must go through this module; direct `println!` in
4//! other modules is forbidden.
5
6use crate::errors::AppError;
7use serde::Serialize;
8
9/// Output format variants accepted by `--format` CLI flags.
10#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
11pub enum OutputFormat {
12    #[default]
13    Json,
14    Text,
15    Markdown,
16}
17
18/// Restricted JSON-only format for commands that always emit JSON.
19#[derive(Debug, Clone, Copy, clap::ValueEnum, Default)]
20pub enum JsonOutputFormat {
21    #[default]
22    Json,
23}
24
25/// Serializes `value` as pretty-printed JSON and writes it to stdout with a trailing newline.
26///
27/// Flushes stdout after writing. A `BrokenPipe` error is silenced so that
28/// piping to consumers that close early (e.g. `head`) does not surface an error.
29///
30/// # Errors
31/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
32#[inline]
33pub fn emit_json<T: Serialize>(value: &T) -> Result<(), AppError> {
34    let json = serde_json::to_string_pretty(value)?;
35    let mut out = std::io::stdout().lock();
36    if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
37        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
38        .and_then(|()| std::io::Write::flush(&mut out))
39    {
40        if e.kind() == std::io::ErrorKind::BrokenPipe {
41            return Ok(());
42        }
43        return Err(AppError::Io(e));
44    }
45    Ok(())
46}
47
48/// Serializes `value` as compact (single-line) JSON and writes it to stdout with a trailing newline.
49///
50/// Flushes stdout after writing. A `BrokenPipe` error is silenced.
51///
52/// # Errors
53/// Returns `Err` when serialization fails or when a non-`BrokenPipe` I/O error occurs.
54#[inline]
55pub fn emit_json_compact<T: Serialize>(value: &T) -> Result<(), AppError> {
56    let json = serde_json::to_string(value)?;
57    let mut out = std::io::stdout().lock();
58    if let Err(e) = std::io::Write::write_all(&mut out, json.as_bytes())
59        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
60        .and_then(|()| std::io::Write::flush(&mut out))
61    {
62        if e.kind() == std::io::ErrorKind::BrokenPipe {
63            return Ok(());
64        }
65        return Err(AppError::Io(e));
66    }
67    Ok(())
68}
69
70/// Writes compact JSON to stdout, silently ignoring serialization and I/O errors.
71/// Designed for NDJSON streaming where partial output is acceptable.
72#[inline]
73pub fn emit_json_line<T: Serialize>(value: &T) {
74    if let Ok(json) = serde_json::to_string(value) {
75        let mut out = std::io::stdout().lock();
76        let _ = std::io::Write::write_all(&mut out, json.as_bytes());
77        let _ = std::io::Write::write_all(&mut out, b"\n");
78        let _ = std::io::Write::flush(&mut out);
79    }
80}
81
82/// Writes `msg` followed by a newline to stdout and flushes.
83///
84/// A `BrokenPipe` error is silenced gracefully.
85#[inline]
86pub fn emit_text(msg: &str) {
87    let mut out = std::io::stdout().lock();
88    let _ = std::io::Write::write_all(&mut out, msg.as_bytes())
89        .and_then(|()| std::io::Write::write_all(&mut out, b"\n"))
90        .and_then(|()| std::io::Write::flush(&mut out));
91}
92
93/// Logs `msg` as a structured `tracing::info!` event (does not write to stdout).
94#[inline]
95pub fn emit_progress(msg: &str) {
96    tracing::info!(target: "output", message = msg);
97}
98
99/// Emits a bilingual progress message honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
100/// Usage: `output::emit_progress_i18n("Computing embedding...", "Calculando embedding...")`.
101pub fn emit_progress_i18n(en: &str, pt: &str) {
102    use crate::i18n::{current, Language};
103    match current() {
104        Language::English => tracing::info!(target: "output", message = en),
105        Language::Portuguese => tracing::info!(target: "output", message = pt),
106    }
107}
108
109/// Emits a JSON error envelope to stdout for machine consumers.
110///
111/// Ensures the stdout JSON contract is honoured even on error paths:
112/// `{"error": true, "code": <exit_code>, "message": "<localized_msg>"}`.
113/// A `BrokenPipe` error is silenced so piping to early-closing consumers
114/// does not surface a secondary error.
115#[cold]
116#[inline(never)]
117pub fn emit_error_json(code: i32, message: &str) {
118    #[derive(serde::Serialize)]
119    struct ErrorEnvelope<'a> {
120        error: bool,
121        code: i32,
122        message: &'a str,
123    }
124    let envelope = ErrorEnvelope {
125        error: true,
126        code,
127        message,
128    };
129    if emit_json(&envelope).is_err() {
130        use std::io::Write;
131        let escaped = message.replace('\\', "\\\\").replace('"', "\\\"");
132        let _ = writeln!(
133            std::io::stdout().lock(),
134            r#"{{"error":true,"code":{code},"message":"{escaped}"}}"#
135        );
136    }
137}
138
139/// Emits a localised error message to stderr with the `Error:`/`Erro:` prefix.
140///
141/// Centralises human-readable error output following Pattern 5 (`output.rs` is the
142/// SOLE I/O point of the CLI). Does not log via `tracing` — call `tracing::error!`
143/// explicitly before this function when structured observability is desired.
144#[cold]
145#[inline(never)]
146pub fn emit_error(localized_msg: &str) {
147    tracing::error!(target: "output", message = localized_msg);
148    eprintln!("{}: {}", crate::i18n::error_prefix(), localized_msg);
149}
150
151/// Emits a bilingual error to stderr honouring `--lang` or `SQLITE_GRAPHRAG_LANG`.
152/// Usage: `output::emit_error_i18n("invariant violated", "invariante violado")`.
153#[cold]
154#[inline(never)]
155pub fn emit_error_i18n(en: &str, pt: &str) {
156    use crate::i18n::{current, Language};
157    let msg = match current() {
158        Language::English => en,
159        Language::Portuguese => pt,
160    };
161    emit_error(msg);
162}
163
164/// JSON payload emitted by the `remember` subcommand.
165///
166/// All fields are required by the JSON contract (see `docs/schemas/remember.schema.json`).
167/// `operation` is an alias of `action` for compatibility with clients using the old field name.
168///
169/// # Examples
170///
171/// ```
172/// use sqlite_graphrag::output::RememberResponse;
173///
174/// let resp = RememberResponse {
175///     memory_id: 1,
176///     name: "nota-inicial".into(),
177///     namespace: "global".into(),
178///     action: "created".into(),
179///     operation: "created".into(),
180///     version: 1,
181///     entities_persisted: 0,
182///     relationships_persisted: 0,
183///     relationships_truncated: false,
184///     chunks_created: 1,
185///     chunks_persisted: 0,
186///     urls_persisted: 0,
187///     extraction_method: None,
188///     merged_into_memory_id: None,
189///     warnings: vec![],
190///     created_at: 1_700_000_000,
191///     created_at_iso: "2023-11-14T22:13:20Z".into(),
192///     elapsed_ms: 42,
193///     name_was_normalized: false,
194///     original_name: None,
195/// };
196///
197/// let json = serde_json::to_string(&resp).unwrap();
198/// assert!(json.contains("\"memory_id\":1"));
199/// assert!(json.contains("\"elapsed_ms\":42"));
200/// assert!(json.contains("\"merged_into_memory_id\":null"));
201/// assert!(json.contains("\"urls_persisted\":0"));
202/// assert!(json.contains("\"relationships_truncated\":false"));
203/// ```
204#[derive(Serialize)]
205pub struct RememberResponse {
206    pub memory_id: i64,
207    pub name: String,
208    pub namespace: String,
209    pub action: String,
210    /// Semantic alias of `action` for compatibility with the contract documented in SKILL.md.
211    pub operation: String,
212    pub version: i64,
213    pub entities_persisted: usize,
214    pub relationships_persisted: usize,
215    /// True when the relationship builder hit the cap before covering all entity pairs.
216    /// Callers can use this to decide whether to increase GRAPHRAG_MAX_RELATIONSHIPS_PER_MEMORY.
217    pub relationships_truncated: bool,
218    /// Total number of chunks the body was split into BEFORE dedup.
219    ///
220    /// For single-chunk bodies this equals 1 even though no row is added to
221    /// the `memory_chunks` table — the memory row itself acts as the chunk.
222    /// Use `chunks_persisted` to know how many rows were actually written.
223    pub chunks_created: usize,
224    /// Number of chunks actually written to chunks/embeddings tables. Always <= chunks_created.
225    ///
226    /// Equal when no chunk had identical normalized text already in DB; less when dedup skipped
227    /// some. Equals zero for single-chunk bodies (the memory row is the chunk) and equals
228    /// `chunks_created` for multi-chunk bodies. Added in v1.0.23 to disambiguate from
229    /// `chunks_created` and reflect database state precisely.
230    pub chunks_persisted: usize,
231    /// Number of unique URLs inserted into `memory_urls` for this memory.
232    /// Added in v1.0.24 — split URLs out of the entity graph (P0-2 fix).
233    #[serde(default)]
234    pub urls_persisted: usize,
235    /// Extraction method used: "gliner-{variant}+regex" or "regex-only". None when NER is not enabled.
236    #[serde(skip_serializing_if = "Option::is_none")]
237    pub extraction_method: Option<String>,
238    pub merged_into_memory_id: Option<i64>,
239    pub warnings: Vec<String>,
240    /// Timestamp Unix epoch seconds.
241    pub created_at: i64,
242    /// RFC 3339 UTC timestamp string parallel to `created_at` for ISO 8601 parsers.
243    pub created_at_iso: String,
244    /// Total execution time in milliseconds from handler start to serialisation.
245    pub elapsed_ms: u64,
246    /// True when the user-supplied `--name` differed from the persisted slug
247    /// (i.e. kebab-case normalization changed the value). Added in v1.0.32 so
248    /// callers can detect normalization without parsing stderr WARN logs.
249    #[serde(default)]
250    pub name_was_normalized: bool,
251    /// Original user-supplied `--name` value before normalization.
252    /// Present only when `name_was_normalized == true`; omitted otherwise to
253    /// keep the common (already-kebab) payload small.
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub original_name: Option<String>,
256    /// v1.0.84 (ADR-0042): discriminador do backend LLM que efetivamente
257    /// executou o embedding da passagem. `"claude" | "codex" | "none"`.
258    /// Absent on the wire when `None` (kept for happy-path envelope cleanliness).
259    #[serde(skip_serializing_if = "Option::is_none")]
260    pub backend_invoked: Option<&'static str>,
261}
262
263/// Individual item returned by the `recall` query.
264///
265/// The `memory_type` field is serialised as `"type"` in JSON to maintain
266/// compatibility with external clients — the Rust name uses `memory_type`
267/// to avoid conflict with the reserved keyword.
268///
269/// # Examples
270///
271/// ```
272/// use sqlite_graphrag::output::RecallItem;
273///
274/// let item = RecallItem {
275///     memory_id: 7,
276///     name: "nota-rust".into(),
277///     namespace: "global".into(),
278///     memory_type: "user".into(),
279///     description: "aprendizado de Rust".into(),
280///     snippet: "ownership e borrowing".into(),
281///     distance: 0.12,
282///     score: 0.88,
283///     source: "direct".into(),
284///     graph_depth: None,
285/// };
286///
287/// let json = serde_json::to_string(&item).unwrap();
288/// // Rust field `memory_type` appears as `"type"` in JSON.
289/// assert!(json.contains("\"type\":\"user\""));
290/// assert!(!json.contains("memory_type"));
291/// assert!(json.contains("\"distance\":0.12"));
292/// ```
293#[derive(Serialize, Clone)]
294pub struct RecallItem {
295    pub memory_id: i64,
296    pub name: String,
297    pub namespace: String,
298    #[serde(rename = "type")]
299    pub memory_type: String,
300    pub description: String,
301    pub snippet: String,
302    pub distance: f32,
303    /// Cosine similarity in `[0.0, 1.0]` derived as `1.0 - distance` and clamped
304    /// to that interval. Always populated to satisfy the documented contract
305    /// (M-A5 in v1.0.40); higher means more similar. For graph hits the value
306    /// reflects the hop-derived distance proxy and should be interpreted
307    /// alongside `graph_depth` rather than as a true cosine score.
308    pub score: f32,
309    pub source: String,
310    /// Number of graph hops between this match and the seed memories.
311    ///
312    /// Set to `None` for direct vector matches (where `distance` is meaningful)
313    /// and to `Some(N)` for traversal results, with `N=0` when the depth could
314    /// not be tracked precisely. Added in v1.0.23 to disambiguate graph results
315    /// from the `distance: 0.0` placeholder previously used for graph entries.
316    /// Field is omitted from JSON output when `None`.
317    #[serde(skip_serializing_if = "Option::is_none")]
318    pub graph_depth: Option<u32>,
319}
320
321impl RecallItem {
322    /// Computes the similarity score from a vector distance, clamped to
323    /// `[0.0, 1.0]`. Cosine distance returned by sqlite-vec lives in `[0, 2]`
324    /// in theory but the embedder produces unit-norm vectors so the practical
325    /// range is `[0, 1]`. Centralized so every constructor keeps the contract.
326    #[inline]
327    pub fn score_from_distance(distance: f32) -> f32 {
328        let raw = 1.0 - distance;
329        if raw.is_nan() {
330            0.0
331        } else {
332            raw.clamp(0.0, 1.0)
333        }
334    }
335}
336
337/// Full response envelope returned by the `recall` subcommand.
338///
339/// Contains both direct vector matches and graph-traversal matches, plus the
340/// aggregated `results` list that merges both for callers that do not need
341/// to distinguish the source.
342#[derive(Serialize)]
343pub struct RecallResponse {
344    pub query: String,
345    pub k: usize,
346    pub direct_matches: Vec<RecallItem>,
347    pub graph_matches: Vec<RecallItem>,
348    /// Aggregated alias of `direct_matches` + `graph_matches` for the contract documented in SKILL.md.
349    pub results: Vec<RecallItem>,
350    /// Total execution time in milliseconds from handler start to serialisation.
351    pub elapsed_ms: u64,
352    /// G58 (v1.0.80): `true` when the live query embedding failed and the
353    /// handler fell back to FTS5 BM25 + LIKE prefix. Symmetric to
354    /// `fts_degraded` in `hybrid-search`. Absent on the wire when false.
355    #[serde(skip_serializing_if = "std::ops::Not::not", default)]
356    pub vec_degraded: bool,
357    /// G58 (v1.0.80): human-readable description of the embedding failure
358    /// that triggered the fallback. Absent on the wire when `vec_degraded`
359    /// is false or the failure had no message.
360    #[serde(skip_serializing_if = "std::option::Option::is_none")]
361    pub vec_error: Option<String>,
362    /// G58 (v1.0.80): advisory warning echoed for callers that branch on
363    /// top-level status. Distinguishes a FTS5-only fallback from a clean
364    /// hybrid response so downstream pipelines can lower their confidence.
365    #[serde(skip_serializing_if = "std::option::Option::is_none")]
366    pub warning: Option<String>,
367    /// v1.0.84 (ADR-0042): discriminador do backend LLM que efetivamente
368    /// executou o embedding live. `"claude" | "codex" | "none"`. Absent
369    /// on the wire when `None` (kept for happy-path envelope cleanliness).
370    #[serde(skip_serializing_if = "std::option::Option::is_none")]
371    pub backend_invoked: Option<&'static str>,
372    /// v1.0.84 (ADR-0042): reason code discriminador de degradação
373    /// (`"embedding_failed" | "cancelled" | "timeout"`). Absent when
374    /// `vec_degraded` is false.
375    #[serde(skip_serializing_if = "std::option::Option::is_none")]
376    pub vec_degraded_reason: Option<String>,
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382    use serde::Serialize;
383
384    #[derive(Serialize)]
385    struct Dummy {
386        val: u32,
387    }
388
389    // Non-serializable type to force a JSON serialization error
390    struct NotSerializable;
391    impl Serialize for NotSerializable {
392        fn serialize<S: serde::Serializer>(&self, _: S) -> Result<S::Ok, S::Error> {
393            Err(serde::ser::Error::custom(
394                "intentional serialization failure",
395            ))
396        }
397    }
398
399    #[test]
400    fn emit_json_returns_ok_for_valid_value() {
401        let v = Dummy { val: 42 };
402        assert!(emit_json(&v).is_ok());
403    }
404
405    #[test]
406    fn emit_json_returns_err_for_non_serializable_value() {
407        let v = NotSerializable;
408        assert!(emit_json(&v).is_err());
409    }
410
411    #[test]
412    fn emit_json_compact_returns_ok_for_valid_value() {
413        let v = Dummy { val: 7 };
414        assert!(emit_json_compact(&v).is_ok());
415    }
416
417    #[test]
418    fn emit_json_compact_returns_err_for_non_serializable_value() {
419        let v = NotSerializable;
420        assert!(emit_json_compact(&v).is_err());
421    }
422
423    #[test]
424    fn emit_text_does_not_panic() {
425        emit_text("mensagem de teste");
426    }
427
428    #[test]
429    fn emit_progress_does_not_panic() {
430        emit_progress("progresso de teste");
431    }
432
433    #[test]
434    fn remember_response_serializes_correctly() {
435        let r = RememberResponse {
436            memory_id: 1,
437            name: "teste".to_string(),
438            namespace: "ns".to_string(),
439            action: "created".to_string(),
440            operation: "created".to_string(),
441            version: 1,
442            entities_persisted: 2,
443            relationships_persisted: 3,
444            relationships_truncated: false,
445            chunks_created: 4,
446            chunks_persisted: 4,
447            urls_persisted: 2,
448            extraction_method: None,
449            merged_into_memory_id: None,
450            warnings: vec!["aviso".to_string()],
451            created_at: 1776569715,
452            created_at_iso: "2026-04-19T03:34:15Z".to_string(),
453            elapsed_ms: 123,
454            name_was_normalized: false,
455            original_name: None,
456            backend_invoked: None,
457        };
458        let json = serde_json::to_string(&r).unwrap();
459        assert!(json.contains("memory_id"));
460        assert!(json.contains("aviso"));
461        assert!(json.contains("\"namespace\""));
462        assert!(json.contains("\"merged_into_memory_id\""));
463        assert!(json.contains("\"operation\""));
464        assert!(json.contains("\"created_at\""));
465        assert!(json.contains("\"created_at_iso\""));
466        assert!(json.contains("\"elapsed_ms\""));
467        assert!(json.contains("\"urls_persisted\""));
468        assert!(json.contains("\"relationships_truncated\":false"));
469    }
470
471    #[test]
472    fn recall_item_serializes_renamed_type_field() {
473        let item = RecallItem {
474            memory_id: 10,
475            name: "entidade".to_string(),
476            namespace: "ns".to_string(),
477            memory_type: "entity".to_string(),
478            description: "desc".to_string(),
479            snippet: "trecho".to_string(),
480            distance: 0.5,
481            score: RecallItem::score_from_distance(0.5),
482            source: "db".to_string(),
483            graph_depth: None,
484        };
485        let json = serde_json::to_string(&item).unwrap();
486        assert!(json.contains("\"type\""));
487        assert!(!json.contains("memory_type"));
488        // Field is omitted from JSON when None.
489        assert!(!json.contains("graph_depth"));
490        assert!(json.contains("\"score\":0.5"));
491    }
492
493    #[test]
494    fn recall_response_serializes_with_lists() {
495        let resp = RecallResponse {
496            query: "busca".to_string(),
497            k: 10,
498            direct_matches: vec![],
499            graph_matches: vec![],
500            results: vec![],
501            elapsed_ms: 42,
502            vec_degraded: false,
503            vec_error: None,
504            warning: None,
505            backend_invoked: None,
506            vec_degraded_reason: None,
507        };
508        let json = serde_json::to_string(&resp).unwrap();
509        assert!(json.contains("direct_matches"));
510        assert!(json.contains("graph_matches"));
511        assert!(json.contains("\"k\":"));
512        assert!(json.contains("\"results\""));
513        assert!(json.contains("\"elapsed_ms\""));
514        // G58: clean response must NOT carry the degradation fields.
515        assert!(!json.contains("vec_degraded"));
516        assert!(!json.contains("vec_error"));
517        assert!(!json.contains("warning"));
518    }
519
520    #[test]
521    fn recall_response_serializes_vec_degraded_when_fallback_fired() {
522        let resp = RecallResponse {
523            query: "busca".to_string(),
524            k: 10,
525            direct_matches: vec![],
526            graph_matches: vec![],
527            results: vec![],
528            elapsed_ms: 42,
529            vec_degraded: true,
530            vec_error: Some("embedding cancelled by external signal".to_string()),
531            warning: Some("live query embedding unavailable; results are FTS5 BM25 only (semantic relevance reduced)".to_string()),
532            backend_invoked: None,
533            vec_degraded_reason: Some("embedding cancelled by external signal".to_string()),
534        };
535        let json = serde_json::to_string(&resp).unwrap();
536        assert!(json.contains("\"vec_degraded\":true"));
537        assert!(json.contains("\"vec_error\":\"embedding cancelled by external signal\""));
538        assert!(json.contains("\"warning\":\"live query embedding unavailable"));
539    }
540
541    #[test]
542    fn error_envelope_serializes_correctly() {
543        #[derive(serde::Serialize)]
544        struct ErrorEnvelope<'a> {
545            error: bool,
546            code: i32,
547            message: &'a str,
548        }
549        let envelope = ErrorEnvelope {
550            error: true,
551            code: 10,
552            message: "database disk image is malformed",
553        };
554        let json = serde_json::to_value(&envelope).unwrap();
555        assert_eq!(json["error"], true);
556        assert_eq!(json["code"], 10);
557        assert_eq!(json["message"], "database disk image is malformed");
558    }
559
560    #[test]
561    fn output_format_default_is_json() {
562        let fmt = OutputFormat::default();
563        assert!(matches!(fmt, OutputFormat::Json));
564    }
565
566    #[test]
567    fn output_format_variants_exist() {
568        let _text = OutputFormat::Text;
569        let _md = OutputFormat::Markdown;
570        let _json = OutputFormat::Json;
571    }
572
573    #[test]
574    fn recall_item_clone_produces_equal_value() {
575        let item = RecallItem {
576            memory_id: 99,
577            name: "clone".to_string(),
578            namespace: "ns".to_string(),
579            memory_type: "relation".to_string(),
580            description: "d".to_string(),
581            snippet: "s".to_string(),
582            distance: 0.1,
583            score: RecallItem::score_from_distance(0.1),
584            source: "src".to_string(),
585            graph_depth: Some(2),
586        };
587        let cloned = item.clone();
588        assert_eq!(cloned.memory_id, item.memory_id);
589        assert_eq!(cloned.name, item.name);
590        assert_eq!(cloned.graph_depth, Some(2));
591    }
592}