1use crate::cli::MemoryType;
4use crate::errors::AppError;
5use crate::graph::traverse_from_memories_with_hops;
6use crate::i18n::errors_msg;
7use crate::output::{self, JsonOutputFormat, RecallItem, RecallResponse};
8use crate::paths::AppPaths;
9use crate::storage::connection::open_ro;
10use crate::storage::entities;
11use crate::storage::memories;
12
13#[derive(clap::Args)]
20#[command(after_long_help = "EXAMPLES:\n \
21 # Semantic search for top 5 matches\n \
22 sqlite-graphrag recall \"authentication design\" --k 5\n\n \
23 # Disable automatic graph expansion\n \
24 sqlite-graphrag recall \"JWT tokens\" --k 3 --no-graph\n\n \
25 # Limit graph traversal depth and minimum edge weight\n \
26 sqlite-graphrag recall \"auth\" --k 5 --max-hops 2 --min-weight 0.3\n\n \
27 # Filter by memory type\n \
28 sqlite-graphrag recall \"deployment\" --type decision --k 10\n\n \
29 # Cap results by distance threshold\n \
30 sqlite-graphrag recall \"API design\" --k 5 --max-distance 0.8\n\n \
31NOTES:\n \
32 When --no-graph is active, graph traversal is skipped and every result has\n \
33 source=\"direct\". The source field is therefore redundant with --no-graph and\n \
34 may be ignored by callers in that mode.")]
35pub struct RecallArgs {
36 #[arg(
37 allow_hyphen_values = true,
38 help = "Search query string (semantic vector search via sqlite-vec)"
39 )]
40 pub query: String,
41 #[arg(short = 'k', long, aliases = ["limit", "top-k"], default_value = "10", value_parser = crate::parsers::parse_k_range)]
49 pub k: usize,
50 #[arg(long, value_enum)]
54 pub r#type: Option<MemoryType>,
55 #[arg(long)]
56 pub namespace: Option<String>,
57 #[arg(long)]
58 pub no_graph: bool,
59 #[arg(long)]
65 pub precise: bool,
66 #[arg(long, default_value = "2")]
67 pub max_hops: u32,
68 #[arg(long, default_value = "0.3")]
69 pub min_weight: f64,
70 #[arg(long, value_name = "N")]
76 pub max_graph_results: Option<usize>,
77 #[arg(long, alias = "min-distance", default_value = "1.0")]
82 pub max_distance: f32,
83 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
84 pub format: JsonOutputFormat,
85 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
86 pub db: Option<String>,
87 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
89 pub json: bool,
90 #[arg(long, conflicts_with = "namespace")]
95 pub all_namespaces: bool,
96 #[command(flatten)]
97 pub daemon: crate::cli::DaemonOpts,
98}
99
100#[tracing::instrument(skip_all, level = "debug", name = "recall")]
101pub fn run(args: RecallArgs) -> Result<(), AppError> {
102 let start = std::time::Instant::now();
103 let _ = args.format;
104 tracing::debug!(target: "recall", query = %args.query, k = args.k, "searching");
105
106 if args.no_graph {
108 if args.max_hops != 2 {
109 return Err(AppError::Validation(
110 "--max-hops has no effect with --no-graph; remove one".to_string(),
111 ));
112 }
113 if (args.min_weight - 0.3).abs() > f64::EPSILON {
114 return Err(AppError::Validation(
115 "--min-weight has no effect with --no-graph; remove one".to_string(),
116 ));
117 }
118 }
119
120 if args.query.trim().is_empty() {
121 return Err(AppError::Validation(crate::i18n::validation::empty_query()));
122 }
123 let namespaces: Vec<String> = if args.all_namespaces {
127 Vec::new()
128 } else {
129 vec![crate::namespace::resolve_namespace(
130 args.namespace.as_deref(),
131 )?]
132 };
133 let namespace_for_graph = namespaces
135 .first()
136 .cloned()
137 .unwrap_or_else(|| "global".to_string());
138 let paths = AppPaths::resolve(args.db.as_deref())?;
139
140 crate::storage::connection::ensure_db_ready(&paths)?;
141
142 output::emit_progress_i18n(
143 "Computing query embedding...",
144 "Calculando embedding da consulta...",
145 );
146 let embedding = crate::daemon::embed_query_or_local(
147 &paths.models,
148 &args.query,
149 args.daemon.autostart_daemon,
150 )?;
151
152 let conn = open_ro(&paths.db)?;
153
154 let memory_type_str = args.r#type.map(|t| t.as_str());
155 let effective_k = if args.precise { 100_000 } else { args.k };
158 let knn_results =
159 memories::knn_search(&conn, &embedding, &namespaces, memory_type_str, effective_k)?;
160
161 let mut direct_matches = Vec::with_capacity(effective_k);
162 let mut memory_ids: Vec<i64> = Vec::with_capacity(effective_k);
163 for (memory_id, distance) in knn_results {
164 let row = {
165 let mut stmt = conn.prepare_cached(
166 "SELECT id, namespace, name, type, description, body, body_hash,
167 session_id, source, metadata, created_at, updated_at
168 FROM memories WHERE id=?1 AND deleted_at IS NULL",
169 )?;
170 stmt.query_row(rusqlite::params![memory_id], |r| {
171 Ok(memories::MemoryRow {
172 id: r.get(0)?,
173 namespace: r.get(1)?,
174 name: r.get(2)?,
175 memory_type: r.get(3)?,
176 description: r.get(4)?,
177 body: r.get(5)?,
178 body_hash: r.get(6)?,
179 session_id: r.get(7)?,
180 source: r.get(8)?,
181 metadata: r.get(9)?,
182 created_at: r.get(10)?,
183 updated_at: r.get(11)?,
184 deleted_at: None,
185 })
186 })
187 .ok()
188 };
189 if let Some(row) = row {
190 let snippet: String = row.body.chars().take(300).collect();
191 direct_matches.push(RecallItem {
192 memory_id: row.id,
193 name: row.name,
194 namespace: row.namespace,
195 memory_type: row.memory_type,
196 description: row.description,
197 snippet,
198 distance,
199 score: RecallItem::score_from_distance(distance),
200 source: "direct".to_string(),
201 graph_depth: None,
203 });
204 memory_ids.push(memory_id);
205 }
206 }
207
208 let mut graph_matches = Vec::with_capacity(8);
209 if !args.no_graph {
210 let entity_knn = entities::knn_search(&conn, &embedding, &namespace_for_graph, 5)?;
211 let entity_ids: Vec<i64> = entity_knn.iter().map(|(id, _)| *id).collect();
212
213 let all_seed_ids: Vec<i64> = memory_ids
214 .iter()
215 .chain(entity_ids.iter())
216 .copied()
217 .collect();
218
219 if !all_seed_ids.is_empty() {
220 let graph_memory_ids = traverse_from_memories_with_hops(
221 &conn,
222 &all_seed_ids,
223 &namespace_for_graph,
224 args.min_weight,
225 args.max_hops,
226 )?;
227
228 for (graph_mem_id, hop) in graph_memory_ids {
229 if let Some(cap) = args.max_graph_results {
232 if graph_matches.len() >= cap {
233 break;
234 }
235 }
236 let row = {
237 let mut stmt = conn.prepare_cached(
238 "SELECT id, namespace, name, type, description, body, body_hash,
239 session_id, source, metadata, created_at, updated_at
240 FROM memories WHERE id=?1 AND deleted_at IS NULL",
241 )?;
242 stmt.query_row(rusqlite::params![graph_mem_id], |r| {
243 Ok(memories::MemoryRow {
244 id: r.get(0)?,
245 namespace: r.get(1)?,
246 name: r.get(2)?,
247 memory_type: r.get(3)?,
248 description: r.get(4)?,
249 body: r.get(5)?,
250 body_hash: r.get(6)?,
251 session_id: r.get(7)?,
252 source: r.get(8)?,
253 metadata: r.get(9)?,
254 created_at: r.get(10)?,
255 updated_at: r.get(11)?,
256 deleted_at: None,
257 })
258 })
259 .ok()
260 };
261 if let Some(row) = row {
262 let snippet: String = row.body.chars().take(300).collect();
263 let graph_distance = 1.0 - 1.0 / (hop as f32 + 1.0);
269 graph_matches.push(RecallItem {
270 memory_id: row.id,
271 name: row.name,
272 namespace: row.namespace,
273 memory_type: row.memory_type,
274 description: row.description,
275 snippet,
276 distance: graph_distance,
277 score: RecallItem::score_from_distance(graph_distance),
278 source: "graph".to_string(),
279 graph_depth: Some(hop),
280 });
281 }
282 }
283 }
284 }
285
286 if args.max_distance < 1.0 {
288 let has_relevant = direct_matches
289 .iter()
290 .any(|item| item.distance <= args.max_distance);
291 if !has_relevant {
292 return Err(AppError::NotFound(errors_msg::no_recall_results(
293 args.max_distance,
294 &args.query,
295 &namespace_for_graph,
296 )));
297 }
298 }
299
300 let results: Vec<RecallItem> = direct_matches
301 .iter()
302 .cloned()
303 .chain(graph_matches.iter().cloned())
304 .collect();
305
306 output::emit_json(&RecallResponse {
307 query: args.query,
308 k: args.k,
309 direct_matches,
310 graph_matches,
311 results,
312 elapsed_ms: start.elapsed().as_millis() as u64,
313 })?;
314
315 Ok(())
316}
317
318#[cfg(test)]
319mod tests {
320 use crate::output::{RecallItem, RecallResponse};
321
322 fn make_item(name: &str, distance: f32, source: &str) -> RecallItem {
323 RecallItem {
324 memory_id: 1,
325 name: name.to_string(),
326 namespace: "global".to_string(),
327 memory_type: "fact".to_string(),
328 description: "desc".to_string(),
329 snippet: "snippet".to_string(),
330 distance,
331 score: RecallItem::score_from_distance(distance),
332 source: source.to_string(),
333 graph_depth: if source == "graph" { Some(0) } else { None },
334 }
335 }
336
337 #[test]
339 fn recall_item_score_is_present_and_finite_for_direct_match() {
340 let item = make_item("mem", 0.25, "direct");
341 let json = serde_json::to_value(&item).expect("serialization failed");
342 let score = json["score"].as_f64().expect("score must be a number");
343 assert!(
344 (0.0..=1.0).contains(&score),
345 "score must be in [0, 1], got {score}"
346 );
347 assert!(
348 (score - 0.75).abs() < 1e-6,
349 "score must equal 1 - distance for canonical case"
350 );
351 }
352
353 #[test]
354 fn recall_item_score_clamps_distance_outside_unit_range() {
355 assert_eq!(RecallItem::score_from_distance(2.0), 0.0);
357 assert_eq!(RecallItem::score_from_distance(-0.5), 1.0);
358 assert_eq!(RecallItem::score_from_distance(f32::NAN), 0.0);
359 }
360
361 #[test]
362 fn recall_response_serializes_required_fields() {
363 let resp = RecallResponse {
364 query: "rust memory".to_string(),
365 k: 5,
366 direct_matches: vec![make_item("mem-a", 0.12, "direct")],
367 graph_matches: vec![],
368 results: vec![make_item("mem-a", 0.12, "direct")],
369 elapsed_ms: 42,
370 };
371
372 let json = serde_json::to_value(&resp).expect("serialization failed");
373 assert_eq!(json["query"], "rust memory");
374 assert_eq!(json["k"], 5);
375 assert_eq!(json["elapsed_ms"], 42u64);
376 assert!(json["direct_matches"].is_array());
377 assert!(json["graph_matches"].is_array());
378 assert!(json["results"].is_array());
379 }
380
381 #[test]
382 fn recall_item_serializes_renamed_type() {
383 let item = make_item("mem-test", 0.25, "direct");
384 let json = serde_json::to_value(&item).expect("serialization failed");
385
386 assert_eq!(json["type"], "fact");
388 assert_eq!(json["distance"], 0.25f32);
389 assert_eq!(json["source"], "direct");
390 }
391
392 #[test]
393 fn recall_response_results_contains_direct_and_graph() {
394 let direct = make_item("d-mem", 0.10, "direct");
395 let graph = make_item("g-mem", 0.0, "graph");
396
397 let resp = RecallResponse {
398 query: "query".to_string(),
399 k: 10,
400 direct_matches: vec![direct.clone()],
401 graph_matches: vec![graph.clone()],
402 results: vec![direct, graph],
403 elapsed_ms: 10,
404 };
405
406 let json = serde_json::to_value(&resp).expect("serialization failed");
407 assert_eq!(json["direct_matches"].as_array().unwrap().len(), 1);
408 assert_eq!(json["graph_matches"].as_array().unwrap().len(), 1);
409 assert_eq!(json["results"].as_array().unwrap().len(), 2);
410 assert_eq!(json["results"][0]["source"], "direct");
411 assert_eq!(json["results"][1]["source"], "graph");
412 }
413
414 #[test]
415 fn recall_response_empty_serializes_empty_arrays() {
416 let resp = RecallResponse {
417 query: "nothing".to_string(),
418 k: 3,
419 direct_matches: vec![],
420 graph_matches: vec![],
421 results: vec![],
422 elapsed_ms: 1,
423 };
424
425 let json = serde_json::to_value(&resp).expect("serialization failed");
426 assert_eq!(json["direct_matches"].as_array().unwrap().len(), 0);
427 assert_eq!(json["results"].as_array().unwrap().len(), 0);
428 }
429
430 #[test]
431 fn graph_matches_distance_uses_hop_count_proxy() {
432 let cases: &[(u32, f32)] = &[(0, 0.0), (1, 0.5), (2, 0.6667), (3, 0.75)];
438 for &(hop, expected) in cases {
439 let d = 1.0_f32 - 1.0 / (hop as f32 + 1.0);
440 assert!(
441 (d - expected).abs() < 0.001,
442 "hop={hop} expected={expected} got={d}"
443 );
444 }
445 }
446}