Skip to main content

sqlite_graphrag/commands/
remember.rs

1use crate::chunking;
2use crate::cli::MemoryType;
3use crate::errors::AppError;
4use crate::i18n::erros;
5use crate::output::{self, OutputFormat, RememberResponse};
6use crate::paths::AppPaths;
7use crate::storage::chunks as storage_chunks;
8use crate::storage::connection::open_rw;
9use crate::storage::entities::{NewEntity, NewRelationship};
10use crate::storage::memories::NewMemory;
11use crate::storage::{entities, memories, versions};
12use serde::Deserialize;
13use std::io::Read as _;
14
15#[derive(clap::Args)]
16pub struct RememberArgs {
17    #[arg(long)]
18    pub name: String,
19    #[arg(long, value_enum)]
20    pub r#type: MemoryType,
21    #[arg(long)]
22    pub description: String,
23    #[arg(long)]
24    pub body: Option<String>,
25    #[arg(long)]
26    pub body_file: Option<std::path::PathBuf>,
27    #[arg(long)]
28    pub body_stdin: bool,
29    #[arg(long)]
30    pub entities_file: Option<std::path::PathBuf>,
31    #[arg(long)]
32    pub relationships_file: Option<std::path::PathBuf>,
33    #[arg(long)]
34    pub graph_stdin: bool,
35    #[arg(long, default_value = "global")]
36    pub namespace: Option<String>,
37    #[arg(long)]
38    pub metadata: Option<String>,
39    #[arg(long)]
40    pub metadata_file: Option<std::path::PathBuf>,
41    #[arg(long)]
42    pub force_merge: bool,
43    #[arg(
44        long,
45        value_name = "EPOCH_OR_RFC3339",
46        value_parser = crate::parsers::parse_expected_updated_at,
47        long_help = "Optimistic lock: reject if updated_at does not match. \
48Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
49    )]
50    pub expected_updated_at: Option<i64>,
51    #[arg(long)]
52    pub skip_extraction: bool,
53    #[arg(long)]
54    pub session_id: Option<String>,
55    #[arg(long, value_enum, default_value = "json")]
56    pub format: OutputFormat,
57    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
58    pub json: bool,
59    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
60    pub db: Option<String>,
61}
62
63#[derive(Deserialize, Default)]
64struct GraphInput {
65    #[serde(default)]
66    entities: Vec<NewEntity>,
67    #[serde(default)]
68    relationships: Vec<NewRelationship>,
69}
70
71pub fn run(args: RememberArgs) -> Result<(), AppError> {
72    use crate::constants::*;
73
74    let inicio = std::time::Instant::now();
75    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
76
77    if args.name.is_empty() || args.name.len() > MAX_MEMORY_NAME_LEN {
78        return Err(AppError::Validation(
79            crate::i18n::validacao::nome_comprimento(MAX_MEMORY_NAME_LEN),
80        ));
81    }
82
83    if args.name.starts_with("__") {
84        return Err(AppError::Validation(
85            crate::i18n::validacao::nome_reservado(),
86        ));
87    }
88
89    {
90        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
91            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
92        if !slug_re.is_match(&args.name) {
93            return Err(AppError::Validation(crate::i18n::validacao::nome_kebab(
94                &args.name,
95            )));
96        }
97    }
98
99    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
100        return Err(AppError::Validation(
101            crate::i18n::validacao::descricao_excede(MAX_MEMORY_DESCRIPTION_LEN),
102        ));
103    }
104
105    let mut raw_body = if let Some(b) = args.body {
106        b
107    } else if let Some(path) = args.body_file {
108        std::fs::read_to_string(&path).map_err(AppError::Io)?
109    } else if args.body_stdin || args.graph_stdin {
110        let mut buf = String::new();
111        std::io::stdin()
112            .read_to_string(&mut buf)
113            .map_err(AppError::Io)?;
114        buf
115    } else {
116        String::new()
117    };
118
119    let mut graph = GraphInput::default();
120    if !args.skip_extraction {
121        if let Some(path) = args.entities_file {
122            let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
123            graph.entities = serde_json::from_str(&content)?;
124        }
125        if let Some(path) = args.relationships_file {
126            let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
127            graph.relationships = serde_json::from_str(&content)?;
128        }
129        if args.graph_stdin {
130            if let Ok(g) = serde_json::from_str::<GraphInput>(&raw_body) {
131                graph = g;
132                raw_body = String::new();
133            }
134        }
135    }
136
137    if graph.entities.len() > MAX_ENTITIES_PER_MEMORY {
138        return Err(AppError::LimitExceeded(erros::limite_entidades(
139            MAX_ENTITIES_PER_MEMORY,
140        )));
141    }
142    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
143        return Err(AppError::LimitExceeded(erros::limite_relacionamentos(
144            MAX_RELATIONSHIPS_PER_MEMORY,
145        )));
146    }
147
148    if raw_body.len() > MAX_MEMORY_BODY_LEN {
149        return Err(AppError::LimitExceeded(
150            crate::i18n::validacao::body_excede(MAX_MEMORY_BODY_LEN),
151        ));
152    }
153
154    let metadata: serde_json::Value = if let Some(m) = args.metadata {
155        serde_json::from_str(&m)?
156    } else if let Some(path) = args.metadata_file {
157        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
158        serde_json::from_str(&content)?
159    } else {
160        serde_json::json!({})
161    };
162
163    let paths = AppPaths::resolve(args.db.as_deref())?;
164    let mut conn = open_rw(&paths.db)?;
165
166    {
167        use crate::constants::MAX_NAMESPACES_ACTIVE;
168        let active_count: u32 = conn.query_row(
169            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
170            [],
171            |r| r.get::<_, i64>(0).map(|v| v as u32),
172        )?;
173        let ns_exists: bool = conn.query_row(
174            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
175            rusqlite::params![namespace],
176            |r| r.get::<_, i64>(0).map(|v| v > 0),
177        )?;
178        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
179            return Err(AppError::NamespaceError(format!(
180                "limite de {MAX_NAMESPACES_ACTIVE} namespaces ativos excedido ao tentar criar '{namespace}'"
181            )));
182        }
183    }
184
185    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
186    let embedder = crate::embedder::get_embedder(&paths.models)?;
187
188    let chunks_info = chunking::split_into_chunks(&raw_body);
189    let chunks_created = chunks_info.len();
190
191    let (body_for_storage, embedding) = if chunks_info.len() == 1 {
192        (
193            raw_body.clone(),
194            crate::embedder::embed_passage(embedder, &raw_body)?,
195        )
196    } else {
197        output::emit_progress_i18n(
198            &format!("Embedding {} chunks...", chunks_info.len()),
199            &format!("Embedando {} chunks...", chunks_info.len()),
200        );
201        let texts: Vec<String> = chunks_info.iter().map(|c| c.text.clone()).collect();
202        let chunk_embeddings = crate::embedder::embed_passages_batch(embedder, &texts)?;
203        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
204        (raw_body.clone(), aggregated)
205    };
206
207    let body_hash = blake3::hash(body_for_storage.as_bytes())
208        .to_hex()
209        .to_string();
210    let snippet: String = body_for_storage.chars().take(200).collect();
211
212    let memory_type = args.r#type.as_str();
213    let new_memory = NewMemory {
214        namespace: namespace.clone(),
215        name: args.name.clone(),
216        memory_type: memory_type.to_string(),
217        description: args.description.clone(),
218        body: body_for_storage.clone(),
219        body_hash: body_hash.clone(),
220        session_id: args.session_id.clone(),
221        source: "agent".to_string(),
222        metadata,
223    };
224
225    let mut warnings = Vec::new();
226
227    let (memory_id, action, version) = match memories::find_by_name(&conn, &namespace, &args.name)?
228    {
229        Some((existing_id, _updated_at, _current_version)) => {
230            if !args.force_merge {
231                return Err(AppError::Duplicate(erros::memoria_duplicada(
232                    &args.name, &namespace,
233                )));
234            }
235            if let Some(hash_id) = memories::find_by_hash(&conn, &namespace, &body_hash)? {
236                if hash_id != existing_id {
237                    warnings.push(format!(
238                        "identical body already exists as memory id {hash_id}"
239                    ));
240                }
241            }
242            let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
243
244            if chunks_info.len() > 1 {
245                storage_chunks::delete_chunks(&tx, existing_id)?;
246            }
247
248            let next_v = versions::next_version(&tx, existing_id)?;
249            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
250            versions::insert_version(
251                &tx,
252                existing_id,
253                next_v,
254                &args.name,
255                memory_type,
256                &args.description,
257                &body_for_storage,
258                &serde_json::to_string(&new_memory.metadata)?,
259                None,
260                "edit",
261            )?;
262            memories::upsert_vec(
263                &tx,
264                existing_id,
265                &namespace,
266                memory_type,
267                &embedding,
268                &args.name,
269                &snippet,
270            )?;
271            tx.commit()?;
272            (existing_id, "updated".to_string(), next_v)
273        }
274        None => {
275            if let Some(hash_id) = memories::find_by_hash(&conn, &namespace, &body_hash)? {
276                warnings.push(format!(
277                    "identical body already exists as memory id {hash_id}"
278                ));
279            }
280            let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
281            let id = memories::insert(&tx, &new_memory)?;
282            versions::insert_version(
283                &tx,
284                id,
285                1,
286                &args.name,
287                memory_type,
288                &args.description,
289                &body_for_storage,
290                &serde_json::to_string(&new_memory.metadata)?,
291                None,
292                "create",
293            )?;
294            memories::upsert_vec(
295                &tx,
296                id,
297                &namespace,
298                memory_type,
299                &embedding,
300                &args.name,
301                &snippet,
302            )?;
303            tx.commit()?;
304            (id, "created".to_string(), 1)
305        }
306    };
307
308    if chunks_info.len() > 1 {
309        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
310        let chunks: Vec<storage_chunks::Chunk> = chunks_info
311            .iter()
312            .enumerate()
313            .map(|(i, c)| storage_chunks::Chunk {
314                memory_id,
315                chunk_idx: i as i32,
316                chunk_text: c.text.clone(),
317                start_offset: c.start_offset as i32,
318                end_offset: c.end_offset as i32,
319                token_count: c.token_count_approx as i32,
320            })
321            .collect();
322        storage_chunks::insert_chunks(&tx, &chunks)?;
323
324        let texts: Vec<String> = chunks_info.iter().map(|c| c.text.clone()).collect();
325        let chunk_embeddings = crate::embedder::embed_passages_batch(embedder, &texts)?;
326
327        for (i, emb) in chunk_embeddings.iter().enumerate() {
328            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
329        }
330        tx.commit()?;
331    }
332
333    let mut entities_persisted = 0usize;
334    let mut relationships_persisted = 0usize;
335
336    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
337        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
338        for entity in &graph.entities {
339            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
340            let entity_text = match &entity.description {
341                Some(desc) => format!("{} {}", entity.name, desc),
342                None => entity.name.clone(),
343            };
344            let entity_embedding = crate::embedder::embed_passage(embedder, &entity_text)?;
345            entities::upsert_entity_vec(
346                &tx,
347                entity_id,
348                &namespace,
349                &entity.entity_type,
350                &entity_embedding,
351                &entity.name,
352            )?;
353            entities::link_memory_entity(&tx, memory_id, entity_id)?;
354            entities::increment_degree(&tx, entity_id)?;
355            entities_persisted += 1;
356        }
357        for rel in &graph.relationships {
358            let source_entity = NewEntity {
359                name: rel.source.clone(),
360                entity_type: "concept".to_string(),
361                description: None,
362            };
363            let target_entity = NewEntity {
364                name: rel.target.clone(),
365                entity_type: "concept".to_string(),
366                description: None,
367            };
368            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
369            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
370            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
371            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
372            relationships_persisted += 1;
373        }
374        tx.commit()?;
375    }
376
377    let created_at_epoch = chrono::Utc::now().timestamp();
378    let created_at_iso = crate::tz::formatar_iso(chrono::Utc::now());
379
380    output::emit_json(&RememberResponse {
381        memory_id,
382        name: args.name,
383        namespace,
384        action: action.clone(),
385        operation: action,
386        version,
387        entities_persisted,
388        relationships_persisted,
389        chunks_created,
390        merged_into_memory_id: None,
391        warnings,
392        created_at: created_at_epoch,
393        created_at_iso,
394        elapsed_ms: inicio.elapsed().as_millis() as u64,
395    })?;
396
397    Ok(())
398}
399
400#[cfg(test)]
401mod testes {
402    use crate::output::RememberResponse;
403
404    #[test]
405    fn remember_response_serializa_campos_obrigatorios() {
406        let resp = RememberResponse {
407            memory_id: 42,
408            name: "minha-mem".to_string(),
409            namespace: "global".to_string(),
410            action: "created".to_string(),
411            operation: "created".to_string(),
412            version: 1,
413            entities_persisted: 0,
414            relationships_persisted: 0,
415            chunks_created: 1,
416            merged_into_memory_id: None,
417            warnings: vec![],
418            created_at: 1_705_320_000,
419            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
420            elapsed_ms: 55,
421        };
422
423        let json = serde_json::to_value(&resp).expect("serialização falhou");
424        assert_eq!(json["memory_id"], 42);
425        assert_eq!(json["action"], "created");
426        assert_eq!(json["operation"], "created");
427        assert_eq!(json["version"], 1);
428        assert_eq!(json["elapsed_ms"], 55u64);
429        assert!(json["warnings"].is_array());
430        assert!(json["merged_into_memory_id"].is_null());
431    }
432
433    #[test]
434    fn remember_response_action_e_operation_sao_aliases() {
435        let resp = RememberResponse {
436            memory_id: 1,
437            name: "mem".to_string(),
438            namespace: "global".to_string(),
439            action: "updated".to_string(),
440            operation: "updated".to_string(),
441            version: 2,
442            entities_persisted: 3,
443            relationships_persisted: 1,
444            chunks_created: 2,
445            merged_into_memory_id: None,
446            warnings: vec![],
447            created_at: 0,
448            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
449            elapsed_ms: 0,
450        };
451
452        let json = serde_json::to_value(&resp).expect("serialização falhou");
453        assert_eq!(
454            json["action"], json["operation"],
455            "action e operation devem ser iguais"
456        );
457        assert_eq!(json["entities_persisted"], 3);
458        assert_eq!(json["relationships_persisted"], 1);
459        assert_eq!(json["chunks_created"], 2);
460    }
461
462    #[test]
463    fn remember_response_warnings_lista_mensagens() {
464        let resp = RememberResponse {
465            memory_id: 5,
466            name: "dup-mem".to_string(),
467            namespace: "global".to_string(),
468            action: "created".to_string(),
469            operation: "created".to_string(),
470            version: 1,
471            entities_persisted: 0,
472            relationships_persisted: 0,
473            chunks_created: 1,
474            merged_into_memory_id: None,
475            warnings: vec!["identical body already exists as memory id 3".to_string()],
476            created_at: 0,
477            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
478            elapsed_ms: 10,
479        };
480
481        let json = serde_json::to_value(&resp).expect("serialização falhou");
482        let warnings = json["warnings"]
483            .as_array()
484            .expect("warnings deve ser array");
485        assert_eq!(warnings.len(), 1);
486        assert!(warnings[0].as_str().unwrap().contains("identical body"));
487    }
488
489    #[test]
490    fn nome_invalido_prefixo_reservado_retorna_validation_error() {
491        use crate::errors::AppError;
492        // Valida a lógica de rejeição de nomes com prefixo "__" diretamente
493        let nome = "__reservado";
494        let resultado: Result<(), AppError> = if nome.starts_with("__") {
495            Err(AppError::Validation(
496                crate::i18n::validacao::nome_reservado(),
497            ))
498        } else {
499            Ok(())
500        };
501        assert!(resultado.is_err());
502        if let Err(AppError::Validation(msg)) = resultado {
503            assert!(!msg.is_empty());
504        }
505    }
506
507    #[test]
508    fn nome_muito_longo_retorna_validation_error() {
509        use crate::errors::AppError;
510        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
511        let resultado: Result<(), AppError> =
512            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
513                Err(AppError::Validation(
514                    crate::i18n::validacao::nome_comprimento(crate::constants::MAX_MEMORY_NAME_LEN),
515                ))
516            } else {
517                Ok(())
518            };
519        assert!(resultado.is_err());
520    }
521
522    #[test]
523    fn remember_response_merged_into_memory_id_some_serializa_inteiro() {
524        let resp = RememberResponse {
525            memory_id: 10,
526            name: "mem-mergeada".to_string(),
527            namespace: "global".to_string(),
528            action: "updated".to_string(),
529            operation: "updated".to_string(),
530            version: 3,
531            entities_persisted: 0,
532            relationships_persisted: 0,
533            chunks_created: 1,
534            merged_into_memory_id: Some(7),
535            warnings: vec![],
536            created_at: 0,
537            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
538            elapsed_ms: 0,
539        };
540
541        let json = serde_json::to_value(&resp).expect("serialização falhou");
542        assert_eq!(json["merged_into_memory_id"], 7);
543    }
544}