sqlite_graphrag/commands/
recall.rs

1//! Handler for the `recall` CLI subcommand.
2
3use crate::cli::MemoryType;
4use crate::errors::AppError;
5use crate::graph::traverse_from_memories_with_hops;
6use crate::i18n::errors_msg;
7use crate::output::{self, JsonOutputFormat, RecallItem, RecallResponse};
8use crate::paths::AppPaths;
9use crate::storage::connection::open_ro;
10use crate::storage::entities;
11use crate::storage::memories;
12
13/// Arguments for the `recall` subcommand.
14///
15/// When `--namespace` is omitted the query runs against the `global` namespace,
16/// which is the default namespace used by `remember` when no `--namespace` flag
17/// is provided. Pass an explicit `--namespace` value to search a different
18/// isolated namespace.
19#[derive(clap::Args)]
20#[command(after_long_help = "EXAMPLES:\n  \
21    # Semantic search for top 5 matches\n  \
22    sqlite-graphrag recall \"authentication design\" --k 5\n\n  \
23    # Disable automatic graph expansion\n  \
24    sqlite-graphrag recall \"JWT tokens\" --k 3 --no-graph\n\n  \
25    # Limit graph traversal depth and minimum edge weight\n  \
26    sqlite-graphrag recall \"auth\" --k 5 --max-hops 2 --min-weight 0.3\n\n  \
27    # Filter by memory type\n  \
28    sqlite-graphrag recall \"deployment\" --type decision --k 10\n\n  \
29    # Cap results by distance threshold\n  \
30    sqlite-graphrag recall \"API design\" --k 5 --max-distance 0.8\n\n  \
31NOTES:\n  \
32    When --no-graph is active, graph traversal is skipped and every result has\n  \
33    source=\"direct\". The source field is therefore redundant with --no-graph and\n  \
34    may be ignored by callers in that mode.")]
35pub struct RecallArgs {
36    #[arg(
37        allow_hyphen_values = true,
38        help = "Search query string (semantic vector search via sqlite-vec)"
39    )]
40    pub query: String,
41    /// Maximum number of direct vector matches to return.
42    ///
43    /// Note: this flag controls only `direct_matches`. Graph traversal results
44    /// (`graph_matches`) are unbounded by default; use `--max-graph-results` to
45    /// cap them independently. The `results` field aggregates both lists.
46    /// Validated to the inclusive range `1..=4096` (the upper bound matches
47    /// `sqlite-vec`'s knn limit; out-of-range values are rejected at parse time).
48    #[arg(short = 'k', long, aliases = ["limit", "top-k"], default_value = "10", value_parser = crate::parsers::parse_k_range)]
49    pub k: usize,
50    /// Filter by memory.type. Note: distinct from graph entity_type
51    /// (project/tool/person/file/concept/incident/decision/memory/dashboard/issue_tracker/organization/location/date)
52    /// used in --entities-file.
53    #[arg(long, value_enum)]
54    pub r#type: Option<MemoryType>,
55    #[arg(long)]
56    pub namespace: Option<String>,
57    #[arg(long)]
58    pub no_graph: bool,
59    /// Disable -k cap and return all direct matches without truncation.
60    ///
61    /// When set, the `-k`/`--k` flag is ignored for `direct_matches` and the
62    /// response includes every match above the distance threshold. Useful when
63    /// callers need the complete set rather than a top-N preview.
64    #[arg(long)]
65    pub precise: bool,
66    #[arg(long, default_value = "2")]
67    pub max_hops: u32,
68    #[arg(long, default_value = "0.3")]
69    pub min_weight: f64,
70    /// Cap the size of `graph_matches` to at most N entries.
71    ///
72    /// Defaults to unbounded (`None`) so existing pipelines see the same shape
73    /// as in v1.0.22 and earlier. Set this when a query touches a dense graph
74    /// neighbourhood and the caller only needs a top-N preview. Added in v1.0.23.
75    #[arg(long, value_name = "N")]
76    pub max_graph_results: Option<usize>,
77    /// Filter results by maximum distance. Results with distance greater than this value
78    /// are excluded. If all matches exceed this threshold, the command exits with code 4
79    /// (`not found`) per the documented public contract.
80    /// Default `1.0` disables the filter and preserves the top-k behavior.
81    #[arg(long, alias = "min-distance", default_value = "1.0")]
82    pub max_distance: f32,
83    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
84    pub format: JsonOutputFormat,
85    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
86    pub db: Option<String>,
87    /// Accept `--json` as a no-op because output is already JSON by default.
88    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
89    pub json: bool,
90    /// Search across all namespaces instead of a single namespace.
91    ///
92    /// Cannot be combined with `--namespace`. When set, the query runs against
93    /// every namespace and results include a `namespace` field to identify origin.
94    #[arg(long, conflicts_with = "namespace")]
95    pub all_namespaces: bool,
96    /// G58 (v1.0.80): skip the live query embedding and use FTS5 BM25 +
97    /// LIKE prefix exclusively. Useful in CI/CD with tight OAuth quota and
98    /// in deterministic regression tests that need stable ranking.
99    #[arg(
100        long,
101        help = "Skip live query embedding; use FTS5 BM25 + LIKE prefix only"
102    )]
103    pub fallback_fts_only: bool,
104}
105
106#[tracing::instrument(skip_all, level = "debug", name = "recall")]
107pub fn run(
108    args: RecallArgs,
109    llm_backend: crate::cli::LlmBackendChoice,
110    embedding_backend: crate::cli::EmbeddingBackendChoice,
111) -> Result<(), AppError> {
112    let start = std::time::Instant::now();
113    let _ = args.format;
114    tracing::debug!(target: "recall", query = %args.query, k = args.k, "searching");
115
116    // G20: reject graph-specific flags when --no-graph is active
117    if args.no_graph {
118        if args.max_hops != 2 {
119            return Err(AppError::Validation(
120                "--max-hops has no effect with --no-graph; remove one".to_string(),
121            ));
122        }
123        if (args.min_weight - 0.3).abs() > f64::EPSILON {
124            return Err(AppError::Validation(
125                "--min-weight has no effect with --no-graph; remove one".to_string(),
126            ));
127        }
128    }
129
130    if args.query.trim().is_empty() {
131        return Err(AppError::Validation(crate::i18n::validation::empty_query()));
132    }
133    // Resolve the list of namespaces to search:
134    // - empty vec  => all namespaces (sentinel used by knn_search)
135    // - single vec => one namespace (default or --namespace value)
136    let namespaces: Vec<String> = if args.all_namespaces {
137        Vec::new()
138    } else {
139        vec![crate::namespace::resolve_namespace(
140            args.namespace.as_deref(),
141        )?]
142    };
143    // Single namespace string used for graph traversal and error messages.
144    let namespace_for_graph = namespaces
145        .first()
146        .cloned()
147        .unwrap_or_else(|| "global".to_string());
148    let paths = AppPaths::resolve(args.db.as_deref())?;
149
150    crate::storage::connection::ensure_db_ready(&paths)?;
151
152    output::emit_progress_i18n(
153        "Computing query embedding...",
154        "Calculando embedding da consulta...",
155    );
156    let conn = open_ro(&paths.db)?;
157    // G58 (v1.0.80): when the live embedding fails (timeout, OAuth contention,
158    // rate limit, missing CLI), fall back to FTS5 BM25 + LIKE prefix and
159    // surface the degradation through `vec_degraded` + `vec_error` + `warning`
160    // on the response envelope. The `--fallback-fts-only` flag forces the
161    // skip without even attempting the embedding subprocess.
162    // v1.0.84 (ADR-0042): tuple de 4 elementos. `backend_invoked` carrega
163    // o discriminador do backend que efetivamente invocou o LLM (ou `None`
164    // quando o caller pediu `--fallback-fts-only` e nunca chamou o subprocesso).
165    let (embedding, vec_degraded, vec_error, backend_invoked) = if args.fallback_fts_only {
166        (
167            None,
168            true,
169            Some("fallback_fts_only requested".to_string()),
170            None,
171        )
172    } else {
173        // v1.0.82 (GAP-003): forward --llm-backend to embed_with_fallback.
174        // v1.0.84 (ADR-0042): extrai o backend que efetivamente invocou o
175        // LLM para popular `backend_invoked` no envelope de resposta.
176        // v1.0.85 (G58 / ADR-0043): retry determinístico em OAuthQuota
177        // (codex ↔ claude) e backoff 750ms em SlotExhausted antes de
178        // aceitar a degradação para FTS5-puro.
179        match crate::embedder::try_embed_query_with_embedding_choice(
180            &paths.models,
181            &args.query,
182            embedding_backend,
183            llm_backend,
184        ) {
185            Ok((v, backend)) => (Some(v), false, None, Some(backend.as_str())),
186            Err(reason) => {
187                let msg = reason.to_string();
188                tracing::warn!(target: "recall", fallback_reason = %msg, reason_code = %reason.reason_code(), "live embedding failed; falling back to FTS5");
189                (None, true, Some(msg), None)
190            }
191        }
192    };
193
194    let memory_type_str = args.r#type.map(|t| t.as_str());
195    // When --precise is set, lift the -k cap so every match is returned; the
196    // max_distance filter below will trim irrelevant results instead.
197    let effective_k = if args.precise { 100_000 } else { args.k };
198
199    // G58: if the embedding is unavailable, route the entire direct path
200    // through FTS5 BM25 + LIKE prefix. Graph traversal is suppressed because
201    // it depends on the KNN results to seed the expansion; without the
202    // embedding, no seed exists.
203    let (direct_matches, memory_ids): (Vec<RecallItem>, Vec<i64>) =
204        if let Some(emb) = embedding.as_ref() {
205            let knn_results =
206                memories::knn_search(&conn, emb, &namespaces, memory_type_str, effective_k)?;
207            let mut items: Vec<RecallItem> = Vec::with_capacity(knn_results.len());
208            let mut memory_ids: Vec<i64> = Vec::with_capacity(knn_results.len());
209            for (memory_id, distance) in knn_results {
210                let row = {
211                    let mut stmt = conn.prepare_cached(
212                        "SELECT id, namespace, name, type, description, body, body_hash,
213                            session_id, source, metadata, created_at, updated_at
214                     FROM memories WHERE id=?1 AND deleted_at IS NULL",
215                    )?;
216                    stmt.query_row(rusqlite::params![memory_id], |r| {
217                        Ok(memories::MemoryRow {
218                            id: r.get(0)?,
219                            namespace: r.get(1)?,
220                            name: r.get(2)?,
221                            memory_type: r.get(3)?,
222                            description: r.get(4)?,
223                            body: r.get(5)?,
224                            body_hash: r.get(6)?,
225                            session_id: r.get(7)?,
226                            source: r.get(8)?,
227                            metadata: r.get(9)?,
228                            created_at: r.get(10)?,
229                            updated_at: r.get(11)?,
230                            deleted_at: None,
231                        })
232                    })
233                    .ok()
234                };
235                if let Some(row) = row {
236                    let snippet: String = row.body.chars().take(300).collect();
237                    items.push(RecallItem {
238                        memory_id: row.id,
239                        name: row.name,
240                        namespace: row.namespace,
241                        memory_type: row.memory_type,
242                        description: row.description,
243                        snippet,
244                        distance,
245                        score: RecallItem::score_from_distance(distance),
246                        source: "direct".to_string(),
247                        graph_depth: None,
248                    });
249                    memory_ids.push(memory_id);
250                }
251            }
252            (items, memory_ids)
253        } else {
254            // FTS5 BM25 + LIKE prefix fallback path. The same `fts_search` helper
255            // is used as in `hybrid-search`; distance is approximated by
256            // 1.0 / (rank + 1) so the score is in (0, 1] and comparable to the
257            // vector path's `1.0 - distance`. Note: only the FIRST effective_k
258            // results are kept to preserve the top-N contract.
259            let fts_rows = memories::fts_search(
260                &conn,
261                &args.query,
262                &namespace_for_graph,
263                memory_type_str,
264                effective_k,
265            )?;
266            let mut items: Vec<RecallItem> = Vec::with_capacity(fts_rows.len());
267            for (rank, row) in fts_rows.into_iter().enumerate() {
268                let dist = 1.0 - 1.0 / (rank as f32 + 1.0);
269                let snippet: String = row.body.chars().take(300).collect();
270                items.push(RecallItem {
271                    memory_id: row.id,
272                    name: row.name,
273                    namespace: row.namespace,
274                    memory_type: row.memory_type,
275                    description: row.description,
276                    snippet,
277                    distance: dist,
278                    score: RecallItem::score_from_distance(dist),
279                    source: "fts_fallback".to_string(),
280                    graph_depth: None,
281                });
282            }
283            (items, Vec::new())
284        };
285
286    let mut graph_matches = Vec::with_capacity(8);
287    if let Some(emb) = (!args.no_graph).then_some(()).and(embedding.as_ref()) {
288        let entity_knn = entities::knn_search(&conn, emb, &namespace_for_graph, 5)?;
289        let entity_ids: Vec<i64> = entity_knn.iter().map(|(id, _)| *id).collect();
290
291        let all_seed_ids: Vec<i64> = memory_ids
292            .iter()
293            .chain(entity_ids.iter())
294            .copied()
295            .collect();
296
297        if !all_seed_ids.is_empty() {
298            let graph_memory_ids = traverse_from_memories_with_hops(
299                &conn,
300                &all_seed_ids,
301                &namespace_for_graph,
302                args.min_weight,
303                args.max_hops,
304            )?;
305
306            for (graph_mem_id, hop) in graph_memory_ids {
307                // v1.0.23: respect the optional cap on graph results so dense
308                // neighbourhoods do not flood the response unintentionally.
309                if let Some(cap) = args.max_graph_results {
310                    if graph_matches.len() >= cap {
311                        break;
312                    }
313                }
314                let row = {
315                    let mut stmt = conn.prepare_cached(
316                        "SELECT id, namespace, name, type, description, body, body_hash,
317                                session_id, source, metadata, created_at, updated_at
318                         FROM memories WHERE id=?1 AND deleted_at IS NULL",
319                    )?;
320                    stmt.query_row(rusqlite::params![graph_mem_id], |r| {
321                        Ok(memories::MemoryRow {
322                            id: r.get(0)?,
323                            namespace: r.get(1)?,
324                            name: r.get(2)?,
325                            memory_type: r.get(3)?,
326                            description: r.get(4)?,
327                            body: r.get(5)?,
328                            body_hash: r.get(6)?,
329                            session_id: r.get(7)?,
330                            source: r.get(8)?,
331                            metadata: r.get(9)?,
332                            created_at: r.get(10)?,
333                            updated_at: r.get(11)?,
334                            deleted_at: None,
335                        })
336                    })
337                    .ok()
338                };
339                if let Some(row) = row {
340                    let snippet: String = row.body.chars().take(300).collect();
341                    let graph_distance = 1.0 - 1.0 / (hop as f32 + 1.0);
342                    graph_matches.push(RecallItem {
343                        memory_id: row.id,
344                        name: row.name,
345                        namespace: row.namespace,
346                        memory_type: row.memory_type,
347                        description: row.description,
348                        snippet,
349                        distance: graph_distance,
350                        score: RecallItem::score_from_distance(graph_distance),
351                        source: "graph".to_string(),
352                        graph_depth: Some(hop),
353                    });
354                }
355            }
356        }
357    }
358
359    // Filtrar por max_distance se < 1.0 (ativado). Se nenhum hit dentro do threshold, exit 4.
360    if args.max_distance < 1.0 && !vec_degraded {
361        let has_relevant = direct_matches
362            .iter()
363            .any(|item| item.distance <= args.max_distance);
364        if !has_relevant {
365            return Err(AppError::NotFound(errors_msg::no_recall_results(
366                args.max_distance,
367                &args.query,
368                &namespace_for_graph,
369            )));
370        }
371    }
372
373    let results: Vec<RecallItem> = direct_matches
374        .iter()
375        .cloned()
376        .chain(graph_matches.iter().cloned())
377        .collect();
378
379    let warning = if vec_degraded {
380        Some(
381            "live query embedding unavailable; results are FTS5 BM25 only (semantic relevance reduced)"
382                .to_string(),
383        )
384    } else {
385        None
386    };
387
388    output::emit_json(&RecallResponse {
389        query: args.query,
390        k: args.k,
391        direct_matches,
392        graph_matches,
393        results,
394        elapsed_ms: start.elapsed().as_millis() as u64,
395        vec_degraded,
396        vec_error: vec_error.clone(),
397        warning,
398        backend_invoked,
399        vec_degraded_reason: if vec_degraded { vec_error } else { None },
400    })?;
401
402    Ok(())
403}
404
405#[cfg(test)]
406mod tests {
407    use crate::output::{RecallItem, RecallResponse};
408
409    fn make_item(name: &str, distance: f32, source: &str) -> RecallItem {
410        RecallItem {
411            memory_id: 1,
412            name: name.to_string(),
413            namespace: "global".to_string(),
414            memory_type: "fact".to_string(),
415            description: "desc".to_string(),
416            snippet: "snippet".to_string(),
417            distance,
418            score: RecallItem::score_from_distance(distance),
419            source: source.to_string(),
420            graph_depth: if source == "graph" { Some(0) } else { None },
421        }
422    }
423
424    // Bug M-A5: every RecallItem carries a non-null cosine similarity score.
425    #[test]
426    fn recall_item_score_is_present_and_finite_for_direct_match() {
427        let item = make_item("mem", 0.25, "direct");
428        let json = serde_json::to_value(&item).expect("serialization failed");
429        let score = json["score"].as_f64().expect("score must be a number");
430        assert!(
431            (0.0..=1.0).contains(&score),
432            "score must be in [0, 1], got {score}"
433        );
434        assert!(
435            (score - 0.75).abs() < 1e-6,
436            "score must equal 1 - distance for canonical case"
437        );
438    }
439
440    #[test]
441    fn recall_item_score_clamps_distance_outside_unit_range() {
442        // Pathological distances must not yield score outside [0, 1] or NaN.
443        assert_eq!(RecallItem::score_from_distance(2.0), 0.0);
444        assert_eq!(RecallItem::score_from_distance(-0.5), 1.0);
445        assert_eq!(RecallItem::score_from_distance(f32::NAN), 0.0);
446    }
447
448    #[test]
449    fn recall_response_serializes_required_fields() {
450        let resp = RecallResponse {
451            query: "rust memory".to_string(),
452            k: 5,
453            direct_matches: vec![make_item("mem-a", 0.12, "direct")],
454            graph_matches: vec![],
455            results: vec![make_item("mem-a", 0.12, "direct")],
456            elapsed_ms: 42,
457            vec_degraded: false,
458            vec_error: None,
459            warning: None,
460            backend_invoked: None,
461            vec_degraded_reason: None,
462        };
463
464        let json = serde_json::to_value(&resp).expect("serialization failed");
465        assert_eq!(json["query"], "rust memory");
466        assert_eq!(json["k"], 5);
467        assert_eq!(json["elapsed_ms"], 42u64);
468        assert!(json["direct_matches"].is_array());
469        assert!(json["graph_matches"].is_array());
470        assert!(json["results"].is_array());
471    }
472
473    #[test]
474    fn recall_item_serializes_renamed_type() {
475        let item = make_item("mem-test", 0.25, "direct");
476        let json = serde_json::to_value(&item).expect("serialization failed");
477
478        // The memory_type field is renamed to "type" in JSON
479        assert_eq!(json["type"], "fact");
480        assert_eq!(json["distance"], 0.25f32);
481        assert_eq!(json["source"], "direct");
482    }
483
484    #[test]
485    fn recall_response_results_contains_direct_and_graph() {
486        let direct = make_item("d-mem", 0.10, "direct");
487        let graph = make_item("g-mem", 0.0, "graph");
488
489        let resp = RecallResponse {
490            query: "query".to_string(),
491            k: 10,
492            direct_matches: vec![direct.clone()],
493            graph_matches: vec![graph.clone()],
494            results: vec![direct, graph],
495            elapsed_ms: 10,
496            vec_degraded: false,
497            vec_error: None,
498            warning: None,
499            backend_invoked: None,
500            vec_degraded_reason: None,
501        };
502
503        let json = serde_json::to_value(&resp).expect("serialization failed");
504        assert_eq!(json["direct_matches"].as_array().unwrap().len(), 1);
505        assert_eq!(json["graph_matches"].as_array().unwrap().len(), 1);
506        assert_eq!(json["results"].as_array().unwrap().len(), 2);
507        assert_eq!(json["results"][0]["source"], "direct");
508        assert_eq!(json["results"][1]["source"], "graph");
509    }
510
511    #[test]
512    fn recall_response_empty_serializes_empty_arrays() {
513        let resp = RecallResponse {
514            query: "nothing".to_string(),
515            k: 3,
516            direct_matches: vec![],
517            graph_matches: vec![],
518            results: vec![],
519            elapsed_ms: 1,
520            vec_degraded: false,
521            vec_error: None,
522            warning: None,
523            backend_invoked: None,
524            vec_degraded_reason: None,
525        };
526
527        let json = serde_json::to_value(&resp).expect("serialization failed");
528        assert_eq!(json["direct_matches"].as_array().unwrap().len(), 0);
529        assert_eq!(json["results"].as_array().unwrap().len(), 0);
530    }
531
532    #[test]
533    fn graph_matches_distance_uses_hop_count_proxy() {
534        // Verify the hop-count proxy formula: 1.0 - 1.0 / (hop + 1.0)
535        // hop=0 → 0.0 (seed-level entity, identity distance)
536        // hop=1 → 0.5
537        // hop=2 → ≈ 0.667
538        // hop=3 → 0.75
539        let cases: &[(u32, f32)] = &[(0, 0.0), (1, 0.5), (2, 0.6667), (3, 0.75)];
540        for &(hop, expected) in cases {
541            let d = 1.0_f32 - 1.0 / (hop as f32 + 1.0);
542            assert!(
543                (d - expected).abs() < 0.001,
544                "hop={hop} expected={expected} got={d}"
545            );
546        }
547    }
548}
sqlite_graphrag/commands/recall.rs

sqlite_graphrag/commands/
recall.rs