Skip to main content

sqlite_graphrag/commands/
remember.rs

1use crate::chunking;
2use crate::cli::MemoryType;
3use crate::errors::AppError;
4use crate::i18n::erros;
5use crate::output::{self, JsonOutputFormat, RememberResponse};
6use crate::paths::AppPaths;
7use crate::storage::chunks as storage_chunks;
8use crate::storage::connection::open_rw;
9use crate::storage::entities::{NewEntity, NewRelationship};
10use crate::storage::memories::NewMemory;
11use crate::storage::{entities, memories, versions};
12use serde::Deserialize;
13use std::io::Read as _;
14
15#[derive(clap::Args)]
16pub struct RememberArgs {
17    #[arg(long)]
18    pub name: String,
19    #[arg(long, value_enum)]
20    pub r#type: MemoryType,
21    #[arg(long)]
22    pub description: String,
23    #[arg(long)]
24    pub body: Option<String>,
25    #[arg(long)]
26    pub body_file: Option<std::path::PathBuf>,
27    #[arg(long)]
28    pub body_stdin: bool,
29    #[arg(long)]
30    pub entities_file: Option<std::path::PathBuf>,
31    #[arg(long)]
32    pub relationships_file: Option<std::path::PathBuf>,
33    #[arg(long)]
34    pub graph_stdin: bool,
35    #[arg(long, default_value = "global")]
36    pub namespace: Option<String>,
37    #[arg(long)]
38    pub metadata: Option<String>,
39    #[arg(long)]
40    pub metadata_file: Option<std::path::PathBuf>,
41    #[arg(long)]
42    pub force_merge: bool,
43    #[arg(
44        long,
45        value_name = "EPOCH_OR_RFC3339",
46        value_parser = crate::parsers::parse_expected_updated_at,
47        long_help = "Optimistic lock: reject if updated_at does not match. \
48Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
49    )]
50    pub expected_updated_at: Option<i64>,
51    #[arg(long)]
52    pub skip_extraction: bool,
53    #[arg(long)]
54    pub session_id: Option<String>,
55    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
56    pub format: JsonOutputFormat,
57    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
58    pub json: bool,
59    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
60    pub db: Option<String>,
61}
62
63#[derive(Deserialize, Default)]
64struct GraphInput {
65    #[serde(default)]
66    entities: Vec<NewEntity>,
67    #[serde(default)]
68    relationships: Vec<NewRelationship>,
69}
70
71pub fn run(args: RememberArgs) -> Result<(), AppError> {
72    use crate::constants::*;
73
74    let inicio = std::time::Instant::now();
75    let _ = args.format;
76    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
77
78    if args.name.is_empty() || args.name.len() > MAX_MEMORY_NAME_LEN {
79        return Err(AppError::Validation(
80            crate::i18n::validacao::nome_comprimento(MAX_MEMORY_NAME_LEN),
81        ));
82    }
83
84    if args.name.starts_with("__") {
85        return Err(AppError::Validation(
86            crate::i18n::validacao::nome_reservado(),
87        ));
88    }
89
90    {
91        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
92            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
93        if !slug_re.is_match(&args.name) {
94            return Err(AppError::Validation(crate::i18n::validacao::nome_kebab(
95                &args.name,
96            )));
97        }
98    }
99
100    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
101        return Err(AppError::Validation(
102            crate::i18n::validacao::descricao_excede(MAX_MEMORY_DESCRIPTION_LEN),
103        ));
104    }
105
106    let mut raw_body = if let Some(b) = args.body {
107        if b.len() > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
108            return Err(AppError::LimitExceeded(format!(
109                "documento tem {} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember",
110                b.len()
111            )));
112        }
113        b
114    } else if let Some(path) = args.body_file {
115        let file_len = std::fs::metadata(&path).map_err(AppError::Io)?.len() as usize;
116        if file_len > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
117            return Err(AppError::LimitExceeded(format!(
118                "arquivo tem {file_len} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember"
119            )));
120        }
121        std::fs::read_to_string(&path).map_err(AppError::Io)?
122    } else if args.body_stdin || args.graph_stdin {
123        let mut buf = String::new();
124        std::io::stdin()
125            .read_to_string(&mut buf)
126            .map_err(AppError::Io)?;
127        if buf.len() > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
128            return Err(AppError::LimitExceeded(format!(
129                "entrada stdin tem {} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember",
130                buf.len()
131            )));
132        }
133        buf
134    } else {
135        String::new()
136    };
137
138    let mut graph = GraphInput::default();
139    if !args.skip_extraction {
140        if let Some(path) = args.entities_file {
141            let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
142            graph.entities = serde_json::from_str(&content)?;
143        }
144        if let Some(path) = args.relationships_file {
145            let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
146            graph.relationships = serde_json::from_str(&content)?;
147        }
148        if args.graph_stdin {
149            if let Ok(g) = serde_json::from_str::<GraphInput>(&raw_body) {
150                graph = g;
151                raw_body = String::new();
152            }
153        }
154    }
155
156    if graph.entities.len() > MAX_ENTITIES_PER_MEMORY {
157        return Err(AppError::LimitExceeded(erros::limite_entidades(
158            MAX_ENTITIES_PER_MEMORY,
159        )));
160    }
161    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
162        return Err(AppError::LimitExceeded(erros::limite_relacionamentos(
163            MAX_RELATIONSHIPS_PER_MEMORY,
164        )));
165    }
166
167    if raw_body.len() > MAX_MEMORY_BODY_LEN {
168        return Err(AppError::LimitExceeded(
169            crate::i18n::validacao::body_excede(MAX_MEMORY_BODY_LEN),
170        ));
171    }
172
173    let metadata: serde_json::Value = if let Some(m) = args.metadata {
174        serde_json::from_str(&m)?
175    } else if let Some(path) = args.metadata_file {
176        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
177        serde_json::from_str(&content)?
178    } else {
179        serde_json::json!({})
180    };
181
182    let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
183    let snippet: String = raw_body.chars().take(200).collect();
184
185    let paths = AppPaths::resolve(args.db.as_deref())?;
186    let mut conn = open_rw(&paths.db)?;
187
188    {
189        use crate::constants::MAX_NAMESPACES_ACTIVE;
190        let active_count: u32 = conn.query_row(
191            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
192            [],
193            |r| r.get::<_, i64>(0).map(|v| v as u32),
194        )?;
195        let ns_exists: bool = conn.query_row(
196            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
197            rusqlite::params![namespace],
198            |r| r.get::<_, i64>(0).map(|v| v > 0),
199        )?;
200        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
201            return Err(AppError::NamespaceError(format!(
202                "limite de {MAX_NAMESPACES_ACTIVE} namespaces ativos excedido ao tentar criar '{namespace}'"
203            )));
204        }
205    }
206
207    let existing_memory = memories::find_by_name(&conn, &namespace, &args.name)?;
208    if existing_memory.is_some() && !args.force_merge {
209        return Err(AppError::Duplicate(erros::memoria_duplicada(
210            &args.name, &namespace,
211        )));
212    }
213
214    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
215
216    output::emit_progress_i18n(
217        &format!(
218            "Remember stage: validated input; available memory {} MB",
219            crate::memory_guard::available_memory_mb()
220        ),
221        &format!(
222            "Etapa remember: entrada validada; memória disponível {} MB",
223            crate::memory_guard::available_memory_mb()
224        ),
225    );
226
227    let chunks_info = chunking::split_into_chunks(&raw_body);
228    let chunks_created = chunks_info.len();
229
230    output::emit_progress_i18n(
231        &format!(
232            "Remember stage: chunking produced {} chunks; process RSS {} MB",
233            chunks_created,
234            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
235        ),
236        &format!(
237            "Etapa remember: chunking gerou {} chunks; RSS do processo {} MB",
238            chunks_created,
239            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
240        ),
241    );
242
243    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
244        return Err(AppError::LimitExceeded(format!(
245            "documento gera {chunks_created} chunks; limite operacional seguro atual é {} chunks; divida o documento antes de usar remember",
246            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
247        )));
248    }
249
250    if chunks_created > 1
251        && raw_body.len() > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES
252    {
253        return Err(AppError::LimitExceeded(format!(
254            "documento multi-chunk tem {} bytes; limite operacional seguro atual é {} bytes; reduza ou divida o documento antes de usar remember",
255            raw_body.len(),
256            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES
257        )));
258    }
259
260    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
261    let embedder = crate::embedder::get_embedder(&paths.models)?;
262
263    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
264
265    let embedding = if chunks_info.len() == 1 {
266        crate::embedder::embed_passage(embedder, &raw_body)?
267    } else {
268        output::emit_progress_i18n(
269            &format!("Embedding {} chunks...", chunks_info.len()),
270            &format!("Embedando {} chunks...", chunks_info.len()),
271        );
272        let chunk_embeddings = crate::embedder::embed_passages_serial(
273            embedder,
274            chunks_info
275                .iter()
276                .map(|c| chunking::chunk_text(&raw_body, c)),
277        )?;
278        output::emit_progress_i18n(
279            &format!(
280                "Remember stage: chunk embeddings complete; process RSS {} MB",
281                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
282            ),
283            &format!(
284                "Etapa remember: embeddings dos chunks concluídos; RSS do processo {} MB",
285                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
286            ),
287        );
288        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
289        chunk_embeddings_cache = Some(chunk_embeddings);
290        aggregated
291    };
292    let body_for_storage = raw_body;
293
294    let memory_type = args.r#type.as_str();
295    let new_memory = NewMemory {
296        namespace: namespace.clone(),
297        name: args.name.clone(),
298        memory_type: memory_type.to_string(),
299        description: args.description.clone(),
300        body: body_for_storage,
301        body_hash: body_hash.clone(),
302        session_id: args.session_id.clone(),
303        source: "agent".to_string(),
304        metadata,
305    };
306
307    let mut warnings = Vec::new();
308
309    let (memory_id, action, version) = match existing_memory {
310        Some((existing_id, _updated_at, _current_version)) => {
311            if let Some(hash_id) = duplicate_hash_id {
312                if hash_id != existing_id {
313                    warnings.push(format!(
314                        "identical body already exists as memory id {hash_id}"
315                    ));
316                }
317            }
318            let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
319
320            if chunks_info.len() > 1 {
321                storage_chunks::delete_chunks(&tx, existing_id)?;
322            }
323
324            let next_v = versions::next_version(&tx, existing_id)?;
325            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
326            versions::insert_version(
327                &tx,
328                existing_id,
329                next_v,
330                &args.name,
331                memory_type,
332                &args.description,
333                &new_memory.body,
334                &serde_json::to_string(&new_memory.metadata)?,
335                None,
336                "edit",
337            )?;
338            memories::upsert_vec(
339                &tx,
340                existing_id,
341                &namespace,
342                memory_type,
343                &embedding,
344                &args.name,
345                &snippet,
346            )?;
347            tx.commit()?;
348            (existing_id, "updated".to_string(), next_v)
349        }
350        None => {
351            if let Some(hash_id) = duplicate_hash_id {
352                warnings.push(format!(
353                    "identical body already exists as memory id {hash_id}"
354                ));
355            }
356            let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
357            let id = memories::insert(&tx, &new_memory)?;
358            versions::insert_version(
359                &tx,
360                id,
361                1,
362                &args.name,
363                memory_type,
364                &args.description,
365                &new_memory.body,
366                &serde_json::to_string(&new_memory.metadata)?,
367                None,
368                "create",
369            )?;
370            memories::upsert_vec(
371                &tx,
372                id,
373                &namespace,
374                memory_type,
375                &embedding,
376                &args.name,
377                &snippet,
378            )?;
379            tx.commit()?;
380            (id, "created".to_string(), 1)
381        }
382    };
383
384    if chunks_info.len() > 1 {
385        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
386        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
387
388        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
389            AppError::Internal(anyhow::anyhow!(
390                "chunk embeddings cache missing for multi-chunk remember path"
391            ))
392        })?;
393
394        for (i, emb) in chunk_embeddings.iter().enumerate() {
395            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
396        }
397        tx.commit()?;
398        output::emit_progress_i18n(
399            &format!(
400                "Remember stage: persisted chunk vectors; process RSS {} MB",
401                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
402            ),
403            &format!(
404                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
405                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
406            ),
407        );
408    }
409
410    let mut entities_persisted = 0usize;
411    let mut relationships_persisted = 0usize;
412
413    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
414        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
415        for entity in &graph.entities {
416            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
417            let entity_text = match &entity.description {
418                Some(desc) => format!("{} {}", entity.name, desc),
419                None => entity.name.clone(),
420            };
421            let entity_embedding = crate::embedder::embed_passage(embedder, &entity_text)?;
422            entities::upsert_entity_vec(
423                &tx,
424                entity_id,
425                &namespace,
426                &entity.entity_type,
427                &entity_embedding,
428                &entity.name,
429            )?;
430            entities::link_memory_entity(&tx, memory_id, entity_id)?;
431            entities::increment_degree(&tx, entity_id)?;
432            entities_persisted += 1;
433        }
434        for rel in &graph.relationships {
435            let source_entity = NewEntity {
436                name: rel.source.clone(),
437                entity_type: "concept".to_string(),
438                description: None,
439            };
440            let target_entity = NewEntity {
441                name: rel.target.clone(),
442                entity_type: "concept".to_string(),
443                description: None,
444            };
445            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
446            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
447            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
448            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
449            relationships_persisted += 1;
450        }
451        tx.commit()?;
452    }
453
454    let created_at_epoch = chrono::Utc::now().timestamp();
455    let created_at_iso = crate::tz::formatar_iso(chrono::Utc::now());
456
457    output::emit_json(&RememberResponse {
458        memory_id,
459        name: args.name,
460        namespace,
461        action: action.clone(),
462        operation: action,
463        version,
464        entities_persisted,
465        relationships_persisted,
466        chunks_created,
467        merged_into_memory_id: None,
468        warnings,
469        created_at: created_at_epoch,
470        created_at_iso,
471        elapsed_ms: inicio.elapsed().as_millis() as u64,
472    })?;
473
474    Ok(())
475}
476
477#[cfg(test)]
478mod testes {
479    use crate::output::RememberResponse;
480
481    #[test]
482    fn remember_response_serializa_campos_obrigatorios() {
483        let resp = RememberResponse {
484            memory_id: 42,
485            name: "minha-mem".to_string(),
486            namespace: "global".to_string(),
487            action: "created".to_string(),
488            operation: "created".to_string(),
489            version: 1,
490            entities_persisted: 0,
491            relationships_persisted: 0,
492            chunks_created: 1,
493            merged_into_memory_id: None,
494            warnings: vec![],
495            created_at: 1_705_320_000,
496            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
497            elapsed_ms: 55,
498        };
499
500        let json = serde_json::to_value(&resp).expect("serialização falhou");
501        assert_eq!(json["memory_id"], 42);
502        assert_eq!(json["action"], "created");
503        assert_eq!(json["operation"], "created");
504        assert_eq!(json["version"], 1);
505        assert_eq!(json["elapsed_ms"], 55u64);
506        assert!(json["warnings"].is_array());
507        assert!(json["merged_into_memory_id"].is_null());
508    }
509
510    #[test]
511    fn remember_response_action_e_operation_sao_aliases() {
512        let resp = RememberResponse {
513            memory_id: 1,
514            name: "mem".to_string(),
515            namespace: "global".to_string(),
516            action: "updated".to_string(),
517            operation: "updated".to_string(),
518            version: 2,
519            entities_persisted: 3,
520            relationships_persisted: 1,
521            chunks_created: 2,
522            merged_into_memory_id: None,
523            warnings: vec![],
524            created_at: 0,
525            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
526            elapsed_ms: 0,
527        };
528
529        let json = serde_json::to_value(&resp).expect("serialização falhou");
530        assert_eq!(
531            json["action"], json["operation"],
532            "action e operation devem ser iguais"
533        );
534        assert_eq!(json["entities_persisted"], 3);
535        assert_eq!(json["relationships_persisted"], 1);
536        assert_eq!(json["chunks_created"], 2);
537    }
538
539    #[test]
540    fn remember_response_warnings_lista_mensagens() {
541        let resp = RememberResponse {
542            memory_id: 5,
543            name: "dup-mem".to_string(),
544            namespace: "global".to_string(),
545            action: "created".to_string(),
546            operation: "created".to_string(),
547            version: 1,
548            entities_persisted: 0,
549            relationships_persisted: 0,
550            chunks_created: 1,
551            merged_into_memory_id: None,
552            warnings: vec!["identical body already exists as memory id 3".to_string()],
553            created_at: 0,
554            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
555            elapsed_ms: 10,
556        };
557
558        let json = serde_json::to_value(&resp).expect("serialização falhou");
559        let warnings = json["warnings"]
560            .as_array()
561            .expect("warnings deve ser array");
562        assert_eq!(warnings.len(), 1);
563        assert!(warnings[0].as_str().unwrap().contains("identical body"));
564    }
565
566    #[test]
567    fn nome_invalido_prefixo_reservado_retorna_validation_error() {
568        use crate::errors::AppError;
569        // Valida a lógica de rejeição de nomes com prefixo "__" diretamente
570        let nome = "__reservado";
571        let resultado: Result<(), AppError> = if nome.starts_with("__") {
572            Err(AppError::Validation(
573                crate::i18n::validacao::nome_reservado(),
574            ))
575        } else {
576            Ok(())
577        };
578        assert!(resultado.is_err());
579        if let Err(AppError::Validation(msg)) = resultado {
580            assert!(!msg.is_empty());
581        }
582    }
583
584    #[test]
585    fn nome_muito_longo_retorna_validation_error() {
586        use crate::errors::AppError;
587        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
588        let resultado: Result<(), AppError> =
589            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
590                Err(AppError::Validation(
591                    crate::i18n::validacao::nome_comprimento(crate::constants::MAX_MEMORY_NAME_LEN),
592                ))
593            } else {
594                Ok(())
595            };
596        assert!(resultado.is_err());
597    }
598
599    #[test]
600    fn remember_response_merged_into_memory_id_some_serializa_inteiro() {
601        let resp = RememberResponse {
602            memory_id: 10,
603            name: "mem-mergeada".to_string(),
604            namespace: "global".to_string(),
605            action: "updated".to_string(),
606            operation: "updated".to_string(),
607            version: 3,
608            entities_persisted: 0,
609            relationships_persisted: 0,
610            chunks_created: 1,
611            merged_into_memory_id: Some(7),
612            warnings: vec![],
613            created_at: 0,
614            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
615            elapsed_ms: 0,
616        };
617
618        let json = serde_json::to_value(&resp).expect("serialização falhou");
619        assert_eq!(json["merged_into_memory_id"], 7);
620    }
621}