1use crate::chunking;
2use crate::cli::MemoryType;
3use crate::errors::AppError;
4use crate::i18n::erros;
5use crate::output::{self, JsonOutputFormat, RememberResponse};
6use crate::paths::AppPaths;
7use crate::storage::chunks as storage_chunks;
8use crate::storage::connection::open_rw;
9use crate::storage::entities::{NewEntity, NewRelationship};
10use crate::storage::memories::NewMemory;
11use crate::storage::{entities, memories, versions};
12use serde::Deserialize;
13use std::io::Read as _;
14
15#[derive(clap::Args)]
16pub struct RememberArgs {
17 #[arg(long)]
18 pub name: String,
19 #[arg(long, value_enum)]
20 pub r#type: MemoryType,
21 #[arg(long)]
22 pub description: String,
23 #[arg(long)]
24 pub body: Option<String>,
25 #[arg(long)]
26 pub body_file: Option<std::path::PathBuf>,
27 #[arg(long)]
28 pub body_stdin: bool,
29 #[arg(long)]
30 pub entities_file: Option<std::path::PathBuf>,
31 #[arg(long)]
32 pub relationships_file: Option<std::path::PathBuf>,
33 #[arg(long)]
34 pub graph_stdin: bool,
35 #[arg(long, default_value = "global")]
36 pub namespace: Option<String>,
37 #[arg(long)]
38 pub metadata: Option<String>,
39 #[arg(long)]
40 pub metadata_file: Option<std::path::PathBuf>,
41 #[arg(long)]
42 pub force_merge: bool,
43 #[arg(
44 long,
45 value_name = "EPOCH_OR_RFC3339",
46 value_parser = crate::parsers::parse_expected_updated_at,
47 long_help = "Optimistic lock: reject if updated_at does not match. \
48Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
49 )]
50 pub expected_updated_at: Option<i64>,
51 #[arg(long)]
52 pub skip_extraction: bool,
53 #[arg(long)]
54 pub session_id: Option<String>,
55 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
56 pub format: JsonOutputFormat,
57 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
58 pub json: bool,
59 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
60 pub db: Option<String>,
61}
62
63#[derive(Deserialize, Default)]
64struct GraphInput {
65 #[serde(default)]
66 entities: Vec<NewEntity>,
67 #[serde(default)]
68 relationships: Vec<NewRelationship>,
69}
70
71pub fn run(args: RememberArgs) -> Result<(), AppError> {
72 use crate::constants::*;
73
74 let inicio = std::time::Instant::now();
75 let _ = args.format;
76 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
77
78 if args.name.is_empty() || args.name.len() > MAX_MEMORY_NAME_LEN {
79 return Err(AppError::Validation(
80 crate::i18n::validacao::nome_comprimento(MAX_MEMORY_NAME_LEN),
81 ));
82 }
83
84 if args.name.starts_with("__") {
85 return Err(AppError::Validation(
86 crate::i18n::validacao::nome_reservado(),
87 ));
88 }
89
90 {
91 let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
92 .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
93 if !slug_re.is_match(&args.name) {
94 return Err(AppError::Validation(crate::i18n::validacao::nome_kebab(
95 &args.name,
96 )));
97 }
98 }
99
100 if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
101 return Err(AppError::Validation(
102 crate::i18n::validacao::descricao_excede(MAX_MEMORY_DESCRIPTION_LEN),
103 ));
104 }
105
106 let mut raw_body = if let Some(b) = args.body {
107 if b.len() > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
108 return Err(AppError::LimitExceeded(format!(
109 "documento tem {} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember",
110 b.len()
111 )));
112 }
113 b
114 } else if let Some(path) = args.body_file {
115 let file_len = std::fs::metadata(&path).map_err(AppError::Io)?.len() as usize;
116 if file_len > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
117 return Err(AppError::LimitExceeded(format!(
118 "arquivo tem {file_len} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember"
119 )));
120 }
121 std::fs::read_to_string(&path).map_err(AppError::Io)?
122 } else if args.body_stdin || args.graph_stdin {
123 let mut buf = String::new();
124 std::io::stdin()
125 .read_to_string(&mut buf)
126 .map_err(AppError::Io)?;
127 if buf.len() > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
128 return Err(AppError::LimitExceeded(format!(
129 "entrada stdin tem {} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember",
130 buf.len()
131 )));
132 }
133 buf
134 } else {
135 String::new()
136 };
137
138 let mut graph = GraphInput::default();
139 if !args.skip_extraction {
140 if let Some(path) = args.entities_file {
141 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
142 graph.entities = serde_json::from_str(&content)?;
143 }
144 if let Some(path) = args.relationships_file {
145 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
146 graph.relationships = serde_json::from_str(&content)?;
147 }
148 if args.graph_stdin {
149 if let Ok(g) = serde_json::from_str::<GraphInput>(&raw_body) {
150 graph = g;
151 raw_body = String::new();
152 }
153 }
154 }
155
156 if graph.entities.len() > MAX_ENTITIES_PER_MEMORY {
157 return Err(AppError::LimitExceeded(erros::limite_entidades(
158 MAX_ENTITIES_PER_MEMORY,
159 )));
160 }
161 if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
162 return Err(AppError::LimitExceeded(erros::limite_relacionamentos(
163 MAX_RELATIONSHIPS_PER_MEMORY,
164 )));
165 }
166
167 if raw_body.len() > MAX_MEMORY_BODY_LEN {
168 return Err(AppError::LimitExceeded(
169 crate::i18n::validacao::body_excede(MAX_MEMORY_BODY_LEN),
170 ));
171 }
172
173 let metadata: serde_json::Value = if let Some(m) = args.metadata {
174 serde_json::from_str(&m)?
175 } else if let Some(path) = args.metadata_file {
176 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
177 serde_json::from_str(&content)?
178 } else {
179 serde_json::json!({})
180 };
181
182 let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
183 let snippet: String = raw_body.chars().take(200).collect();
184
185 let paths = AppPaths::resolve(args.db.as_deref())?;
186 let mut conn = open_rw(&paths.db)?;
187
188 {
189 use crate::constants::MAX_NAMESPACES_ACTIVE;
190 let active_count: u32 = conn.query_row(
191 "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
192 [],
193 |r| r.get::<_, i64>(0).map(|v| v as u32),
194 )?;
195 let ns_exists: bool = conn.query_row(
196 "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
197 rusqlite::params![namespace],
198 |r| r.get::<_, i64>(0).map(|v| v > 0),
199 )?;
200 if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
201 return Err(AppError::NamespaceError(format!(
202 "limite de {MAX_NAMESPACES_ACTIVE} namespaces ativos excedido ao tentar criar '{namespace}'"
203 )));
204 }
205 }
206
207 let existing_memory = memories::find_by_name(&conn, &namespace, &args.name)?;
208 if existing_memory.is_some() && !args.force_merge {
209 return Err(AppError::Duplicate(erros::memoria_duplicada(
210 &args.name, &namespace,
211 )));
212 }
213
214 let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
215
216 output::emit_progress_i18n(
217 &format!(
218 "Remember stage: validated input; available memory {} MB",
219 crate::memory_guard::available_memory_mb()
220 ),
221 &format!(
222 "Etapa remember: entrada validada; memória disponível {} MB",
223 crate::memory_guard::available_memory_mb()
224 ),
225 );
226
227 let chunks_info = chunking::split_into_chunks(&raw_body);
228 let chunks_created = chunks_info.len();
229
230 output::emit_progress_i18n(
231 &format!(
232 "Remember stage: chunking produced {} chunks; process RSS {} MB",
233 chunks_created,
234 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
235 ),
236 &format!(
237 "Etapa remember: chunking gerou {} chunks; RSS do processo {} MB",
238 chunks_created,
239 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
240 ),
241 );
242
243 if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
244 return Err(AppError::LimitExceeded(format!(
245 "documento gera {chunks_created} chunks; limite operacional seguro atual é {} chunks; divida o documento antes de usar remember",
246 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
247 )));
248 }
249
250 if chunks_created > 1
251 && raw_body.len() > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES
252 {
253 return Err(AppError::LimitExceeded(format!(
254 "documento multi-chunk tem {} bytes; limite operacional seguro atual é {} bytes; reduza ou divida o documento antes de usar remember",
255 raw_body.len(),
256 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES
257 )));
258 }
259
260 output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
261 let embedder = crate::embedder::get_embedder(&paths.models)?;
262
263 let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
264
265 let embedding = if chunks_info.len() == 1 {
266 crate::embedder::embed_passage(embedder, &raw_body)?
267 } else {
268 output::emit_progress_i18n(
269 &format!("Embedding {} chunks...", chunks_info.len()),
270 &format!("Embedando {} chunks...", chunks_info.len()),
271 );
272 let chunk_embeddings = crate::embedder::embed_passages_serial(
273 embedder,
274 chunks_info
275 .iter()
276 .map(|c| chunking::chunk_text(&raw_body, c)),
277 )?;
278 output::emit_progress_i18n(
279 &format!(
280 "Remember stage: chunk embeddings complete; process RSS {} MB",
281 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
282 ),
283 &format!(
284 "Etapa remember: embeddings dos chunks concluídos; RSS do processo {} MB",
285 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
286 ),
287 );
288 let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
289 chunk_embeddings_cache = Some(chunk_embeddings);
290 aggregated
291 };
292 let body_for_storage = raw_body;
293
294 let memory_type = args.r#type.as_str();
295 let new_memory = NewMemory {
296 namespace: namespace.clone(),
297 name: args.name.clone(),
298 memory_type: memory_type.to_string(),
299 description: args.description.clone(),
300 body: body_for_storage,
301 body_hash: body_hash.clone(),
302 session_id: args.session_id.clone(),
303 source: "agent".to_string(),
304 metadata,
305 };
306
307 let mut warnings = Vec::new();
308
309 let (memory_id, action, version) = match existing_memory {
310 Some((existing_id, _updated_at, _current_version)) => {
311 if let Some(hash_id) = duplicate_hash_id {
312 if hash_id != existing_id {
313 warnings.push(format!(
314 "identical body already exists as memory id {hash_id}"
315 ));
316 }
317 }
318 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
319
320 if chunks_info.len() > 1 {
321 storage_chunks::delete_chunks(&tx, existing_id)?;
322 }
323
324 let next_v = versions::next_version(&tx, existing_id)?;
325 memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
326 versions::insert_version(
327 &tx,
328 existing_id,
329 next_v,
330 &args.name,
331 memory_type,
332 &args.description,
333 &new_memory.body,
334 &serde_json::to_string(&new_memory.metadata)?,
335 None,
336 "edit",
337 )?;
338 memories::upsert_vec(
339 &tx,
340 existing_id,
341 &namespace,
342 memory_type,
343 &embedding,
344 &args.name,
345 &snippet,
346 )?;
347 tx.commit()?;
348 (existing_id, "updated".to_string(), next_v)
349 }
350 None => {
351 if let Some(hash_id) = duplicate_hash_id {
352 warnings.push(format!(
353 "identical body already exists as memory id {hash_id}"
354 ));
355 }
356 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
357 let id = memories::insert(&tx, &new_memory)?;
358 versions::insert_version(
359 &tx,
360 id,
361 1,
362 &args.name,
363 memory_type,
364 &args.description,
365 &new_memory.body,
366 &serde_json::to_string(&new_memory.metadata)?,
367 None,
368 "create",
369 )?;
370 memories::upsert_vec(
371 &tx,
372 id,
373 &namespace,
374 memory_type,
375 &embedding,
376 &args.name,
377 &snippet,
378 )?;
379 tx.commit()?;
380 (id, "created".to_string(), 1)
381 }
382 };
383
384 if chunks_info.len() > 1 {
385 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
386 storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
387
388 let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
389 AppError::Internal(anyhow::anyhow!(
390 "chunk embeddings cache missing for multi-chunk remember path"
391 ))
392 })?;
393
394 for (i, emb) in chunk_embeddings.iter().enumerate() {
395 storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
396 }
397 tx.commit()?;
398 output::emit_progress_i18n(
399 &format!(
400 "Remember stage: persisted chunk vectors; process RSS {} MB",
401 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
402 ),
403 &format!(
404 "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
405 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
406 ),
407 );
408 }
409
410 let mut entities_persisted = 0usize;
411 let mut relationships_persisted = 0usize;
412
413 if !graph.entities.is_empty() || !graph.relationships.is_empty() {
414 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
415 for entity in &graph.entities {
416 let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
417 let entity_text = match &entity.description {
418 Some(desc) => format!("{} {}", entity.name, desc),
419 None => entity.name.clone(),
420 };
421 let entity_embedding = crate::embedder::embed_passage(embedder, &entity_text)?;
422 entities::upsert_entity_vec(
423 &tx,
424 entity_id,
425 &namespace,
426 &entity.entity_type,
427 &entity_embedding,
428 &entity.name,
429 )?;
430 entities::link_memory_entity(&tx, memory_id, entity_id)?;
431 entities::increment_degree(&tx, entity_id)?;
432 entities_persisted += 1;
433 }
434 for rel in &graph.relationships {
435 let source_entity = NewEntity {
436 name: rel.source.clone(),
437 entity_type: "concept".to_string(),
438 description: None,
439 };
440 let target_entity = NewEntity {
441 name: rel.target.clone(),
442 entity_type: "concept".to_string(),
443 description: None,
444 };
445 let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
446 let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
447 let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
448 entities::link_memory_relationship(&tx, memory_id, rel_id)?;
449 relationships_persisted += 1;
450 }
451 tx.commit()?;
452 }
453
454 let created_at_epoch = chrono::Utc::now().timestamp();
455 let created_at_iso = crate::tz::formatar_iso(chrono::Utc::now());
456
457 output::emit_json(&RememberResponse {
458 memory_id,
459 name: args.name,
460 namespace,
461 action: action.clone(),
462 operation: action,
463 version,
464 entities_persisted,
465 relationships_persisted,
466 chunks_created,
467 merged_into_memory_id: None,
468 warnings,
469 created_at: created_at_epoch,
470 created_at_iso,
471 elapsed_ms: inicio.elapsed().as_millis() as u64,
472 })?;
473
474 Ok(())
475}
476
477#[cfg(test)]
478mod testes {
479 use crate::output::RememberResponse;
480
481 #[test]
482 fn remember_response_serializa_campos_obrigatorios() {
483 let resp = RememberResponse {
484 memory_id: 42,
485 name: "minha-mem".to_string(),
486 namespace: "global".to_string(),
487 action: "created".to_string(),
488 operation: "created".to_string(),
489 version: 1,
490 entities_persisted: 0,
491 relationships_persisted: 0,
492 chunks_created: 1,
493 merged_into_memory_id: None,
494 warnings: vec![],
495 created_at: 1_705_320_000,
496 created_at_iso: "2024-01-15T12:00:00Z".to_string(),
497 elapsed_ms: 55,
498 };
499
500 let json = serde_json::to_value(&resp).expect("serialização falhou");
501 assert_eq!(json["memory_id"], 42);
502 assert_eq!(json["action"], "created");
503 assert_eq!(json["operation"], "created");
504 assert_eq!(json["version"], 1);
505 assert_eq!(json["elapsed_ms"], 55u64);
506 assert!(json["warnings"].is_array());
507 assert!(json["merged_into_memory_id"].is_null());
508 }
509
510 #[test]
511 fn remember_response_action_e_operation_sao_aliases() {
512 let resp = RememberResponse {
513 memory_id: 1,
514 name: "mem".to_string(),
515 namespace: "global".to_string(),
516 action: "updated".to_string(),
517 operation: "updated".to_string(),
518 version: 2,
519 entities_persisted: 3,
520 relationships_persisted: 1,
521 chunks_created: 2,
522 merged_into_memory_id: None,
523 warnings: vec![],
524 created_at: 0,
525 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
526 elapsed_ms: 0,
527 };
528
529 let json = serde_json::to_value(&resp).expect("serialização falhou");
530 assert_eq!(
531 json["action"], json["operation"],
532 "action e operation devem ser iguais"
533 );
534 assert_eq!(json["entities_persisted"], 3);
535 assert_eq!(json["relationships_persisted"], 1);
536 assert_eq!(json["chunks_created"], 2);
537 }
538
539 #[test]
540 fn remember_response_warnings_lista_mensagens() {
541 let resp = RememberResponse {
542 memory_id: 5,
543 name: "dup-mem".to_string(),
544 namespace: "global".to_string(),
545 action: "created".to_string(),
546 operation: "created".to_string(),
547 version: 1,
548 entities_persisted: 0,
549 relationships_persisted: 0,
550 chunks_created: 1,
551 merged_into_memory_id: None,
552 warnings: vec!["identical body already exists as memory id 3".to_string()],
553 created_at: 0,
554 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
555 elapsed_ms: 10,
556 };
557
558 let json = serde_json::to_value(&resp).expect("serialização falhou");
559 let warnings = json["warnings"]
560 .as_array()
561 .expect("warnings deve ser array");
562 assert_eq!(warnings.len(), 1);
563 assert!(warnings[0].as_str().unwrap().contains("identical body"));
564 }
565
566 #[test]
567 fn nome_invalido_prefixo_reservado_retorna_validation_error() {
568 use crate::errors::AppError;
569 let nome = "__reservado";
571 let resultado: Result<(), AppError> = if nome.starts_with("__") {
572 Err(AppError::Validation(
573 crate::i18n::validacao::nome_reservado(),
574 ))
575 } else {
576 Ok(())
577 };
578 assert!(resultado.is_err());
579 if let Err(AppError::Validation(msg)) = resultado {
580 assert!(!msg.is_empty());
581 }
582 }
583
584 #[test]
585 fn nome_muito_longo_retorna_validation_error() {
586 use crate::errors::AppError;
587 let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
588 let resultado: Result<(), AppError> =
589 if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
590 Err(AppError::Validation(
591 crate::i18n::validacao::nome_comprimento(crate::constants::MAX_MEMORY_NAME_LEN),
592 ))
593 } else {
594 Ok(())
595 };
596 assert!(resultado.is_err());
597 }
598
599 #[test]
600 fn remember_response_merged_into_memory_id_some_serializa_inteiro() {
601 let resp = RememberResponse {
602 memory_id: 10,
603 name: "mem-mergeada".to_string(),
604 namespace: "global".to_string(),
605 action: "updated".to_string(),
606 operation: "updated".to_string(),
607 version: 3,
608 entities_persisted: 0,
609 relationships_persisted: 0,
610 chunks_created: 1,
611 merged_into_memory_id: Some(7),
612 warnings: vec![],
613 created_at: 0,
614 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
615 elapsed_ms: 0,
616 };
617
618 let json = serde_json::to_value(&resp).expect("serialização falhou");
619 assert_eq!(json["merged_into_memory_id"], 7);
620 }
621}