Skip to main content

sqlite_graphrag/commands/
remember.rs

1use crate::chunking;
2use crate::cli::MemoryType;
3use crate::errors::AppError;
4use crate::i18n::erros;
5use crate::output::{self, JsonOutputFormat, RememberResponse};
6use crate::paths::AppPaths;
7use crate::storage::chunks as storage_chunks;
8use crate::storage::connection::open_rw;
9use crate::storage::entities::{NewEntity, NewRelationship};
10use crate::storage::memories::NewMemory;
11use crate::storage::{entities, memories, versions};
12use serde::Deserialize;
13use std::io::Read as _;
14
15#[derive(clap::Args)]
16pub struct RememberArgs {
17    #[arg(long)]
18    pub name: String,
19    #[arg(long, value_enum)]
20    pub r#type: MemoryType,
21    #[arg(long)]
22    pub description: String,
23    #[arg(long)]
24    pub body: Option<String>,
25    #[arg(long)]
26    pub body_file: Option<std::path::PathBuf>,
27    #[arg(long)]
28    pub body_stdin: bool,
29    #[arg(long)]
30    pub entities_file: Option<std::path::PathBuf>,
31    #[arg(long)]
32    pub relationships_file: Option<std::path::PathBuf>,
33    #[arg(long)]
34    pub graph_stdin: bool,
35    #[arg(long, default_value = "global")]
36    pub namespace: Option<String>,
37    #[arg(long)]
38    pub metadata: Option<String>,
39    #[arg(long)]
40    pub metadata_file: Option<std::path::PathBuf>,
41    #[arg(long)]
42    pub force_merge: bool,
43    #[arg(
44        long,
45        value_name = "EPOCH_OR_RFC3339",
46        value_parser = crate::parsers::parse_expected_updated_at,
47        long_help = "Optimistic lock: reject if updated_at does not match. \
48Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
49    )]
50    pub expected_updated_at: Option<i64>,
51    #[arg(long)]
52    pub skip_extraction: bool,
53    #[arg(long)]
54    pub session_id: Option<String>,
55    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
56    pub format: JsonOutputFormat,
57    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
58    pub json: bool,
59    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
60    pub db: Option<String>,
61}
62
63#[derive(Deserialize, Default)]
64struct GraphInput {
65    #[serde(default)]
66    entities: Vec<NewEntity>,
67    #[serde(default)]
68    relationships: Vec<NewRelationship>,
69}
70
71pub fn run(args: RememberArgs) -> Result<(), AppError> {
72    use crate::constants::*;
73
74    let inicio = std::time::Instant::now();
75    let _ = args.format;
76    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
77
78    if args.name.is_empty() || args.name.len() > MAX_MEMORY_NAME_LEN {
79        return Err(AppError::Validation(
80            crate::i18n::validacao::nome_comprimento(MAX_MEMORY_NAME_LEN),
81        ));
82    }
83
84    if args.name.starts_with("__") {
85        return Err(AppError::Validation(
86            crate::i18n::validacao::nome_reservado(),
87        ));
88    }
89
90    {
91        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
92            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
93        if !slug_re.is_match(&args.name) {
94            return Err(AppError::Validation(crate::i18n::validacao::nome_kebab(
95                &args.name,
96            )));
97        }
98    }
99
100    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
101        return Err(AppError::Validation(
102            crate::i18n::validacao::descricao_excede(MAX_MEMORY_DESCRIPTION_LEN),
103        ));
104    }
105
106    let mut raw_body = if let Some(b) = args.body {
107        b
108    } else if let Some(path) = args.body_file {
109        std::fs::read_to_string(&path).map_err(AppError::Io)?
110    } else if args.body_stdin || args.graph_stdin {
111        let mut buf = String::new();
112        std::io::stdin()
113            .read_to_string(&mut buf)
114            .map_err(AppError::Io)?;
115        buf
116    } else {
117        String::new()
118    };
119
120    let mut graph = GraphInput::default();
121    if !args.skip_extraction {
122        if let Some(path) = args.entities_file {
123            let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
124            graph.entities = serde_json::from_str(&content)?;
125        }
126        if let Some(path) = args.relationships_file {
127            let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
128            graph.relationships = serde_json::from_str(&content)?;
129        }
130        if args.graph_stdin {
131            if let Ok(g) = serde_json::from_str::<GraphInput>(&raw_body) {
132                graph = g;
133                raw_body = String::new();
134            }
135        }
136    }
137
138    if graph.entities.len() > MAX_ENTITIES_PER_MEMORY {
139        return Err(AppError::LimitExceeded(erros::limite_entidades(
140            MAX_ENTITIES_PER_MEMORY,
141        )));
142    }
143    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
144        return Err(AppError::LimitExceeded(erros::limite_relacionamentos(
145            MAX_RELATIONSHIPS_PER_MEMORY,
146        )));
147    }
148
149    if raw_body.len() > MAX_MEMORY_BODY_LEN {
150        return Err(AppError::LimitExceeded(
151            crate::i18n::validacao::body_excede(MAX_MEMORY_BODY_LEN),
152        ));
153    }
154
155    let metadata: serde_json::Value = if let Some(m) = args.metadata {
156        serde_json::from_str(&m)?
157    } else if let Some(path) = args.metadata_file {
158        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
159        serde_json::from_str(&content)?
160    } else {
161        serde_json::json!({})
162    };
163
164    let paths = AppPaths::resolve(args.db.as_deref())?;
165    let mut conn = open_rw(&paths.db)?;
166
167    {
168        use crate::constants::MAX_NAMESPACES_ACTIVE;
169        let active_count: u32 = conn.query_row(
170            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
171            [],
172            |r| r.get::<_, i64>(0).map(|v| v as u32),
173        )?;
174        let ns_exists: bool = conn.query_row(
175            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
176            rusqlite::params![namespace],
177            |r| r.get::<_, i64>(0).map(|v| v > 0),
178        )?;
179        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
180            return Err(AppError::NamespaceError(format!(
181                "limite de {MAX_NAMESPACES_ACTIVE} namespaces ativos excedido ao tentar criar '{namespace}'"
182            )));
183        }
184    }
185
186    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
187    let embedder = crate::embedder::get_embedder(&paths.models)?;
188
189    let chunks_info = chunking::split_into_chunks(&raw_body);
190    let chunks_created = chunks_info.len();
191
192    let (body_for_storage, embedding) = if chunks_info.len() == 1 {
193        (
194            raw_body.clone(),
195            crate::embedder::embed_passage(embedder, &raw_body)?,
196        )
197    } else {
198        output::emit_progress_i18n(
199            &format!("Embedding {} chunks...", chunks_info.len()),
200            &format!("Embedando {} chunks...", chunks_info.len()),
201        );
202        let texts: Vec<String> = chunks_info.iter().map(|c| c.text.clone()).collect();
203        let chunk_embeddings = crate::embedder::embed_passages_batch(embedder, &texts)?;
204        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
205        (raw_body.clone(), aggregated)
206    };
207
208    let body_hash = blake3::hash(body_for_storage.as_bytes())
209        .to_hex()
210        .to_string();
211    let snippet: String = body_for_storage.chars().take(200).collect();
212
213    let memory_type = args.r#type.as_str();
214    let new_memory = NewMemory {
215        namespace: namespace.clone(),
216        name: args.name.clone(),
217        memory_type: memory_type.to_string(),
218        description: args.description.clone(),
219        body: body_for_storage.clone(),
220        body_hash: body_hash.clone(),
221        session_id: args.session_id.clone(),
222        source: "agent".to_string(),
223        metadata,
224    };
225
226    let mut warnings = Vec::new();
227
228    let (memory_id, action, version) = match memories::find_by_name(&conn, &namespace, &args.name)?
229    {
230        Some((existing_id, _updated_at, _current_version)) => {
231            if !args.force_merge {
232                return Err(AppError::Duplicate(erros::memoria_duplicada(
233                    &args.name, &namespace,
234                )));
235            }
236            if let Some(hash_id) = memories::find_by_hash(&conn, &namespace, &body_hash)? {
237                if hash_id != existing_id {
238                    warnings.push(format!(
239                        "identical body already exists as memory id {hash_id}"
240                    ));
241                }
242            }
243            let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
244
245            if chunks_info.len() > 1 {
246                storage_chunks::delete_chunks(&tx, existing_id)?;
247            }
248
249            let next_v = versions::next_version(&tx, existing_id)?;
250            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
251            versions::insert_version(
252                &tx,
253                existing_id,
254                next_v,
255                &args.name,
256                memory_type,
257                &args.description,
258                &body_for_storage,
259                &serde_json::to_string(&new_memory.metadata)?,
260                None,
261                "edit",
262            )?;
263            memories::upsert_vec(
264                &tx,
265                existing_id,
266                &namespace,
267                memory_type,
268                &embedding,
269                &args.name,
270                &snippet,
271            )?;
272            tx.commit()?;
273            (existing_id, "updated".to_string(), next_v)
274        }
275        None => {
276            if let Some(hash_id) = memories::find_by_hash(&conn, &namespace, &body_hash)? {
277                warnings.push(format!(
278                    "identical body already exists as memory id {hash_id}"
279                ));
280            }
281            let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
282            let id = memories::insert(&tx, &new_memory)?;
283            versions::insert_version(
284                &tx,
285                id,
286                1,
287                &args.name,
288                memory_type,
289                &args.description,
290                &body_for_storage,
291                &serde_json::to_string(&new_memory.metadata)?,
292                None,
293                "create",
294            )?;
295            memories::upsert_vec(
296                &tx,
297                id,
298                &namespace,
299                memory_type,
300                &embedding,
301                &args.name,
302                &snippet,
303            )?;
304            tx.commit()?;
305            (id, "created".to_string(), 1)
306        }
307    };
308
309    if chunks_info.len() > 1 {
310        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
311        let chunks: Vec<storage_chunks::Chunk> = chunks_info
312            .iter()
313            .enumerate()
314            .map(|(i, c)| storage_chunks::Chunk {
315                memory_id,
316                chunk_idx: i as i32,
317                chunk_text: c.text.clone(),
318                start_offset: c.start_offset as i32,
319                end_offset: c.end_offset as i32,
320                token_count: c.token_count_approx as i32,
321            })
322            .collect();
323        storage_chunks::insert_chunks(&tx, &chunks)?;
324
325        let texts: Vec<String> = chunks_info.iter().map(|c| c.text.clone()).collect();
326        let chunk_embeddings = crate::embedder::embed_passages_batch(embedder, &texts)?;
327
328        for (i, emb) in chunk_embeddings.iter().enumerate() {
329            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
330        }
331        tx.commit()?;
332    }
333
334    let mut entities_persisted = 0usize;
335    let mut relationships_persisted = 0usize;
336
337    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
338        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
339        for entity in &graph.entities {
340            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
341            let entity_text = match &entity.description {
342                Some(desc) => format!("{} {}", entity.name, desc),
343                None => entity.name.clone(),
344            };
345            let entity_embedding = crate::embedder::embed_passage(embedder, &entity_text)?;
346            entities::upsert_entity_vec(
347                &tx,
348                entity_id,
349                &namespace,
350                &entity.entity_type,
351                &entity_embedding,
352                &entity.name,
353            )?;
354            entities::link_memory_entity(&tx, memory_id, entity_id)?;
355            entities::increment_degree(&tx, entity_id)?;
356            entities_persisted += 1;
357        }
358        for rel in &graph.relationships {
359            let source_entity = NewEntity {
360                name: rel.source.clone(),
361                entity_type: "concept".to_string(),
362                description: None,
363            };
364            let target_entity = NewEntity {
365                name: rel.target.clone(),
366                entity_type: "concept".to_string(),
367                description: None,
368            };
369            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
370            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
371            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
372            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
373            relationships_persisted += 1;
374        }
375        tx.commit()?;
376    }
377
378    let created_at_epoch = chrono::Utc::now().timestamp();
379    let created_at_iso = crate::tz::formatar_iso(chrono::Utc::now());
380
381    output::emit_json(&RememberResponse {
382        memory_id,
383        name: args.name,
384        namespace,
385        action: action.clone(),
386        operation: action,
387        version,
388        entities_persisted,
389        relationships_persisted,
390        chunks_created,
391        merged_into_memory_id: None,
392        warnings,
393        created_at: created_at_epoch,
394        created_at_iso,
395        elapsed_ms: inicio.elapsed().as_millis() as u64,
396    })?;
397
398    Ok(())
399}
400
401#[cfg(test)]
402mod testes {
403    use crate::output::RememberResponse;
404
405    #[test]
406    fn remember_response_serializa_campos_obrigatorios() {
407        let resp = RememberResponse {
408            memory_id: 42,
409            name: "minha-mem".to_string(),
410            namespace: "global".to_string(),
411            action: "created".to_string(),
412            operation: "created".to_string(),
413            version: 1,
414            entities_persisted: 0,
415            relationships_persisted: 0,
416            chunks_created: 1,
417            merged_into_memory_id: None,
418            warnings: vec![],
419            created_at: 1_705_320_000,
420            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
421            elapsed_ms: 55,
422        };
423
424        let json = serde_json::to_value(&resp).expect("serialização falhou");
425        assert_eq!(json["memory_id"], 42);
426        assert_eq!(json["action"], "created");
427        assert_eq!(json["operation"], "created");
428        assert_eq!(json["version"], 1);
429        assert_eq!(json["elapsed_ms"], 55u64);
430        assert!(json["warnings"].is_array());
431        assert!(json["merged_into_memory_id"].is_null());
432    }
433
434    #[test]
435    fn remember_response_action_e_operation_sao_aliases() {
436        let resp = RememberResponse {
437            memory_id: 1,
438            name: "mem".to_string(),
439            namespace: "global".to_string(),
440            action: "updated".to_string(),
441            operation: "updated".to_string(),
442            version: 2,
443            entities_persisted: 3,
444            relationships_persisted: 1,
445            chunks_created: 2,
446            merged_into_memory_id: None,
447            warnings: vec![],
448            created_at: 0,
449            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
450            elapsed_ms: 0,
451        };
452
453        let json = serde_json::to_value(&resp).expect("serialização falhou");
454        assert_eq!(
455            json["action"], json["operation"],
456            "action e operation devem ser iguais"
457        );
458        assert_eq!(json["entities_persisted"], 3);
459        assert_eq!(json["relationships_persisted"], 1);
460        assert_eq!(json["chunks_created"], 2);
461    }
462
463    #[test]
464    fn remember_response_warnings_lista_mensagens() {
465        let resp = RememberResponse {
466            memory_id: 5,
467            name: "dup-mem".to_string(),
468            namespace: "global".to_string(),
469            action: "created".to_string(),
470            operation: "created".to_string(),
471            version: 1,
472            entities_persisted: 0,
473            relationships_persisted: 0,
474            chunks_created: 1,
475            merged_into_memory_id: None,
476            warnings: vec!["identical body already exists as memory id 3".to_string()],
477            created_at: 0,
478            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
479            elapsed_ms: 10,
480        };
481
482        let json = serde_json::to_value(&resp).expect("serialização falhou");
483        let warnings = json["warnings"]
484            .as_array()
485            .expect("warnings deve ser array");
486        assert_eq!(warnings.len(), 1);
487        assert!(warnings[0].as_str().unwrap().contains("identical body"));
488    }
489
490    #[test]
491    fn nome_invalido_prefixo_reservado_retorna_validation_error() {
492        use crate::errors::AppError;
493        // Valida a lógica de rejeição de nomes com prefixo "__" diretamente
494        let nome = "__reservado";
495        let resultado: Result<(), AppError> = if nome.starts_with("__") {
496            Err(AppError::Validation(
497                crate::i18n::validacao::nome_reservado(),
498            ))
499        } else {
500            Ok(())
501        };
502        assert!(resultado.is_err());
503        if let Err(AppError::Validation(msg)) = resultado {
504            assert!(!msg.is_empty());
505        }
506    }
507
508    #[test]
509    fn nome_muito_longo_retorna_validation_error() {
510        use crate::errors::AppError;
511        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
512        let resultado: Result<(), AppError> =
513            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
514                Err(AppError::Validation(
515                    crate::i18n::validacao::nome_comprimento(crate::constants::MAX_MEMORY_NAME_LEN),
516                ))
517            } else {
518                Ok(())
519            };
520        assert!(resultado.is_err());
521    }
522
523    #[test]
524    fn remember_response_merged_into_memory_id_some_serializa_inteiro() {
525        let resp = RememberResponse {
526            memory_id: 10,
527            name: "mem-mergeada".to_string(),
528            namespace: "global".to_string(),
529            action: "updated".to_string(),
530            operation: "updated".to_string(),
531            version: 3,
532            entities_persisted: 0,
533            relationships_persisted: 0,
534            chunks_created: 1,
535            merged_into_memory_id: Some(7),
536            warnings: vec![],
537            created_at: 0,
538            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
539            elapsed_ms: 0,
540        };
541
542        let json = serde_json::to_value(&resp).expect("serialização falhou");
543        assert_eq!(json["merged_into_memory_id"], 7);
544    }
545}