1use crate::chunking;
2use crate::cli::MemoryType;
3use crate::errors::AppError;
4use crate::i18n::erros;
5use crate::output::{self, JsonOutputFormat, RememberResponse};
6use crate::paths::AppPaths;
7use crate::storage::chunks as storage_chunks;
8use crate::storage::connection::open_rw;
9use crate::storage::entities::{NewEntity, NewRelationship};
10use crate::storage::memories::NewMemory;
11use crate::storage::{entities, memories, versions};
12use serde::Deserialize;
13use std::io::Read as _;
14
15#[derive(clap::Args)]
16pub struct RememberArgs {
17 #[arg(long)]
18 pub name: String,
19 #[arg(
20 long,
21 value_enum,
22 long_help = "Memory kind stored in `memories.type`. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill."
23 )]
24 pub r#type: MemoryType,
25 #[arg(long)]
26 pub description: String,
27 #[arg(long)]
28 pub body: Option<String>,
29 #[arg(long)]
30 pub body_file: Option<std::path::PathBuf>,
31 #[arg(long)]
32 pub body_stdin: bool,
33 #[arg(long)]
34 pub entities_file: Option<std::path::PathBuf>,
35 #[arg(long)]
36 pub relationships_file: Option<std::path::PathBuf>,
37 #[arg(long)]
38 pub graph_stdin: bool,
39 #[arg(long, default_value = "global")]
40 pub namespace: Option<String>,
41 #[arg(long)]
42 pub metadata: Option<String>,
43 #[arg(long)]
44 pub metadata_file: Option<std::path::PathBuf>,
45 #[arg(long)]
46 pub force_merge: bool,
47 #[arg(
48 long,
49 value_name = "EPOCH_OR_RFC3339",
50 value_parser = crate::parsers::parse_expected_updated_at,
51 long_help = "Optimistic lock: reject if updated_at does not match. \
52Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
53 )]
54 pub expected_updated_at: Option<i64>,
55 #[arg(long)]
56 pub skip_extraction: bool,
57 #[arg(long)]
58 pub session_id: Option<String>,
59 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
60 pub format: JsonOutputFormat,
61 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
62 pub json: bool,
63 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
64 pub db: Option<String>,
65}
66
67#[derive(Deserialize, Default)]
68struct GraphInput {
69 #[serde(default)]
70 entities: Vec<NewEntity>,
71 #[serde(default)]
72 relationships: Vec<NewRelationship>,
73}
74
75pub fn run(args: RememberArgs) -> Result<(), AppError> {
76 use crate::constants::*;
77
78 let inicio = std::time::Instant::now();
79 let _ = args.format;
80 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
81
82 if args.name.is_empty() || args.name.len() > MAX_MEMORY_NAME_LEN {
83 return Err(AppError::Validation(
84 crate::i18n::validacao::nome_comprimento(MAX_MEMORY_NAME_LEN),
85 ));
86 }
87
88 if args.name.starts_with("__") {
89 return Err(AppError::Validation(
90 crate::i18n::validacao::nome_reservado(),
91 ));
92 }
93
94 {
95 let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
96 .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
97 if !slug_re.is_match(&args.name) {
98 return Err(AppError::Validation(crate::i18n::validacao::nome_kebab(
99 &args.name,
100 )));
101 }
102 }
103
104 if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
105 return Err(AppError::Validation(
106 crate::i18n::validacao::descricao_excede(MAX_MEMORY_DESCRIPTION_LEN),
107 ));
108 }
109
110 let mut raw_body = if let Some(b) = args.body {
111 if b.len() > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
112 return Err(AppError::LimitExceeded(format!(
113 "documento tem {} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember",
114 b.len()
115 )));
116 }
117 b
118 } else if let Some(path) = args.body_file {
119 let file_len = std::fs::metadata(&path).map_err(AppError::Io)?.len() as usize;
120 if file_len > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
121 return Err(AppError::LimitExceeded(format!(
122 "arquivo tem {file_len} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember"
123 )));
124 }
125 std::fs::read_to_string(&path).map_err(AppError::Io)?
126 } else if args.body_stdin || args.graph_stdin {
127 let mut buf = String::new();
128 std::io::stdin()
129 .read_to_string(&mut buf)
130 .map_err(AppError::Io)?;
131 if buf.len() > REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES {
132 return Err(AppError::LimitExceeded(format!(
133 "entrada stdin tem {} bytes; limite operacional seguro atual é {REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES} bytes; reduza ou divida o documento antes de usar remember",
134 buf.len()
135 )));
136 }
137 buf
138 } else {
139 String::new()
140 };
141
142 let mut graph = GraphInput::default();
143 if !args.skip_extraction {
144 if let Some(path) = args.entities_file {
145 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
146 graph.entities = serde_json::from_str(&content)?;
147 }
148 if let Some(path) = args.relationships_file {
149 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
150 graph.relationships = serde_json::from_str(&content)?;
151 }
152 if args.graph_stdin {
153 if let Ok(g) = serde_json::from_str::<GraphInput>(&raw_body) {
154 graph = g;
155 raw_body = String::new();
156 }
157 }
158 }
159
160 if graph.entities.len() > MAX_ENTITIES_PER_MEMORY {
161 return Err(AppError::LimitExceeded(erros::limite_entidades(
162 MAX_ENTITIES_PER_MEMORY,
163 )));
164 }
165 if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
166 return Err(AppError::LimitExceeded(erros::limite_relacionamentos(
167 MAX_RELATIONSHIPS_PER_MEMORY,
168 )));
169 }
170
171 if raw_body.len() > MAX_MEMORY_BODY_LEN {
172 return Err(AppError::LimitExceeded(
173 crate::i18n::validacao::body_excede(MAX_MEMORY_BODY_LEN),
174 ));
175 }
176
177 let metadata: serde_json::Value = if let Some(m) = args.metadata {
178 serde_json::from_str(&m)?
179 } else if let Some(path) = args.metadata_file {
180 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
181 serde_json::from_str(&content)?
182 } else {
183 serde_json::json!({})
184 };
185
186 let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
187 let snippet: String = raw_body.chars().take(200).collect();
188
189 let paths = AppPaths::resolve(args.db.as_deref())?;
190 let mut conn = open_rw(&paths.db)?;
191
192 {
193 use crate::constants::MAX_NAMESPACES_ACTIVE;
194 let active_count: u32 = conn.query_row(
195 "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
196 [],
197 |r| r.get::<_, i64>(0).map(|v| v as u32),
198 )?;
199 let ns_exists: bool = conn.query_row(
200 "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
201 rusqlite::params![namespace],
202 |r| r.get::<_, i64>(0).map(|v| v > 0),
203 )?;
204 if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
205 return Err(AppError::NamespaceError(format!(
206 "limite de {MAX_NAMESPACES_ACTIVE} namespaces ativos excedido ao tentar criar '{namespace}'"
207 )));
208 }
209 }
210
211 let existing_memory = memories::find_by_name(&conn, &namespace, &args.name)?;
212 if existing_memory.is_some() && !args.force_merge {
213 return Err(AppError::Duplicate(erros::memoria_duplicada(
214 &args.name, &namespace,
215 )));
216 }
217
218 let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
219
220 output::emit_progress_i18n(
221 &format!(
222 "Remember stage: validated input; available memory {} MB",
223 crate::memory_guard::available_memory_mb()
224 ),
225 &format!(
226 "Etapa remember: entrada validada; memória disponível {} MB",
227 crate::memory_guard::available_memory_mb()
228 ),
229 );
230
231 let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
232 let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
233 let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
234 let token_offsets = crate::tokenizer::passage_token_offsets(tokenizer, &raw_body)?;
235 let chunks_info = chunking::split_into_chunks_by_token_offsets(&raw_body, &token_offsets);
236 let chunks_created = chunks_info.len();
237
238 output::emit_progress_i18n(
239 &format!(
240 "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
241 chunks_created,
242 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
243 ),
244 &format!(
245 "Etapa remember: tokenizer contou {total_passage_tokens} tokens de passagem (máximo do modelo {model_max_length}); chunking gerou {} chunks; RSS do processo {} MB",
246 chunks_created,
247 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
248 ),
249 );
250
251 if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
252 return Err(AppError::LimitExceeded(format!(
253 "documento gera {chunks_created} chunks; limite operacional seguro atual é {} chunks; divida o documento antes de usar remember",
254 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
255 )));
256 }
257
258 if chunks_created > 1
259 && raw_body.len() > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES
260 {
261 return Err(AppError::LimitExceeded(format!(
262 "documento multi-chunk tem {} bytes; limite operacional seguro atual é {} bytes; reduza ou divida o documento antes de usar remember",
263 raw_body.len(),
264 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNK_BODY_BYTES
265 )));
266 }
267
268 output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
269 let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
270
271 let embedding = if chunks_info.len() == 1 {
272 crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
273 } else {
274 let chunk_texts: Vec<&str> = chunks_info
275 .iter()
276 .map(|c| chunking::chunk_text(&raw_body, c))
277 .collect();
278 let chunk_token_counts: Vec<usize> = chunk_texts
279 .iter()
280 .map(|text| crate::tokenizer::count_passage_tokens(tokenizer, text))
281 .collect::<Result<_, _>>()?;
282 let controlled_batches = crate::embedder::controlled_batch_count(&chunk_token_counts);
283 output::emit_progress_i18n(
284 &format!(
285 "Embedding {} chunks across {} controlled batches...",
286 chunks_info.len(),
287 controlled_batches
288 ),
289 &format!(
290 "Embedando {} chunks em {} batches controlados...",
291 chunks_info.len(),
292 controlled_batches
293 ),
294 );
295 let chunk_embeddings = crate::daemon::embed_passages_controlled_or_local(
296 &paths.models,
297 &chunk_texts,
298 &chunk_token_counts,
299 )?;
300 output::emit_progress_i18n(
301 &format!(
302 "Remember stage: chunk embeddings complete; process RSS {} MB",
303 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
304 ),
305 &format!(
306 "Etapa remember: embeddings dos chunks concluídos; RSS do processo {} MB",
307 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
308 ),
309 );
310 let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
311 chunk_embeddings_cache = Some(chunk_embeddings);
312 aggregated
313 };
314 let body_for_storage = raw_body;
315
316 let memory_type = args.r#type.as_str();
317 let new_memory = NewMemory {
318 namespace: namespace.clone(),
319 name: args.name.clone(),
320 memory_type: memory_type.to_string(),
321 description: args.description.clone(),
322 body: body_for_storage,
323 body_hash: body_hash.clone(),
324 session_id: args.session_id.clone(),
325 source: "agent".to_string(),
326 metadata,
327 };
328
329 let mut warnings = Vec::new();
330
331 let (memory_id, action, version) = match existing_memory {
332 Some((existing_id, _updated_at, _current_version)) => {
333 if let Some(hash_id) = duplicate_hash_id {
334 if hash_id != existing_id {
335 warnings.push(format!(
336 "identical body already exists as memory id {hash_id}"
337 ));
338 }
339 }
340 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
341
342 if chunks_info.len() > 1 {
343 storage_chunks::delete_chunks(&tx, existing_id)?;
344 }
345
346 let next_v = versions::next_version(&tx, existing_id)?;
347 memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
348 versions::insert_version(
349 &tx,
350 existing_id,
351 next_v,
352 &args.name,
353 memory_type,
354 &args.description,
355 &new_memory.body,
356 &serde_json::to_string(&new_memory.metadata)?,
357 None,
358 "edit",
359 )?;
360 memories::upsert_vec(
361 &tx,
362 existing_id,
363 &namespace,
364 memory_type,
365 &embedding,
366 &args.name,
367 &snippet,
368 )?;
369 tx.commit()?;
370 (existing_id, "updated".to_string(), next_v)
371 }
372 None => {
373 if let Some(hash_id) = duplicate_hash_id {
374 warnings.push(format!(
375 "identical body already exists as memory id {hash_id}"
376 ));
377 }
378 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
379 let id = memories::insert(&tx, &new_memory)?;
380 versions::insert_version(
381 &tx,
382 id,
383 1,
384 &args.name,
385 memory_type,
386 &args.description,
387 &new_memory.body,
388 &serde_json::to_string(&new_memory.metadata)?,
389 None,
390 "create",
391 )?;
392 memories::upsert_vec(
393 &tx,
394 id,
395 &namespace,
396 memory_type,
397 &embedding,
398 &args.name,
399 &snippet,
400 )?;
401 tx.commit()?;
402 (id, "created".to_string(), 1)
403 }
404 };
405
406 if chunks_info.len() > 1 {
407 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
408 storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
409
410 let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
411 AppError::Internal(anyhow::anyhow!(
412 "chunk embeddings cache missing for multi-chunk remember path"
413 ))
414 })?;
415
416 for (i, emb) in chunk_embeddings.iter().enumerate() {
417 storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
418 }
419 tx.commit()?;
420 output::emit_progress_i18n(
421 &format!(
422 "Remember stage: persisted chunk vectors; process RSS {} MB",
423 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
424 ),
425 &format!(
426 "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
427 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
428 ),
429 );
430 }
431
432 let mut entities_persisted = 0usize;
433 let mut relationships_persisted = 0usize;
434
435 if !graph.entities.is_empty() || !graph.relationships.is_empty() {
436 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
437 for entity in &graph.entities {
438 let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
439 let entity_text = match &entity.description {
440 Some(desc) => format!("{} {}", entity.name, desc),
441 None => entity.name.clone(),
442 };
443 let entity_embedding =
444 crate::daemon::embed_passage_or_local(&paths.models, &entity_text)?;
445 entities::upsert_entity_vec(
446 &tx,
447 entity_id,
448 &namespace,
449 &entity.entity_type,
450 &entity_embedding,
451 &entity.name,
452 )?;
453 entities::link_memory_entity(&tx, memory_id, entity_id)?;
454 entities::increment_degree(&tx, entity_id)?;
455 entities_persisted += 1;
456 }
457 for rel in &graph.relationships {
458 let source_entity = NewEntity {
459 name: rel.source.clone(),
460 entity_type: "concept".to_string(),
461 description: None,
462 };
463 let target_entity = NewEntity {
464 name: rel.target.clone(),
465 entity_type: "concept".to_string(),
466 description: None,
467 };
468 let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
469 let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
470 let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
471 entities::link_memory_relationship(&tx, memory_id, rel_id)?;
472 relationships_persisted += 1;
473 }
474 tx.commit()?;
475 }
476
477 let created_at_epoch = chrono::Utc::now().timestamp();
478 let created_at_iso = crate::tz::formatar_iso(chrono::Utc::now());
479
480 output::emit_json(&RememberResponse {
481 memory_id,
482 name: args.name,
483 namespace,
484 action: action.clone(),
485 operation: action,
486 version,
487 entities_persisted,
488 relationships_persisted,
489 chunks_created,
490 merged_into_memory_id: None,
491 warnings,
492 created_at: created_at_epoch,
493 created_at_iso,
494 elapsed_ms: inicio.elapsed().as_millis() as u64,
495 })?;
496
497 Ok(())
498}
499
500#[cfg(test)]
501mod testes {
502 use crate::output::RememberResponse;
503
504 #[test]
505 fn remember_response_serializa_campos_obrigatorios() {
506 let resp = RememberResponse {
507 memory_id: 42,
508 name: "minha-mem".to_string(),
509 namespace: "global".to_string(),
510 action: "created".to_string(),
511 operation: "created".to_string(),
512 version: 1,
513 entities_persisted: 0,
514 relationships_persisted: 0,
515 chunks_created: 1,
516 merged_into_memory_id: None,
517 warnings: vec![],
518 created_at: 1_705_320_000,
519 created_at_iso: "2024-01-15T12:00:00Z".to_string(),
520 elapsed_ms: 55,
521 };
522
523 let json = serde_json::to_value(&resp).expect("serialização falhou");
524 assert_eq!(json["memory_id"], 42);
525 assert_eq!(json["action"], "created");
526 assert_eq!(json["operation"], "created");
527 assert_eq!(json["version"], 1);
528 assert_eq!(json["elapsed_ms"], 55u64);
529 assert!(json["warnings"].is_array());
530 assert!(json["merged_into_memory_id"].is_null());
531 }
532
533 #[test]
534 fn remember_response_action_e_operation_sao_aliases() {
535 let resp = RememberResponse {
536 memory_id: 1,
537 name: "mem".to_string(),
538 namespace: "global".to_string(),
539 action: "updated".to_string(),
540 operation: "updated".to_string(),
541 version: 2,
542 entities_persisted: 3,
543 relationships_persisted: 1,
544 chunks_created: 2,
545 merged_into_memory_id: None,
546 warnings: vec![],
547 created_at: 0,
548 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
549 elapsed_ms: 0,
550 };
551
552 let json = serde_json::to_value(&resp).expect("serialização falhou");
553 assert_eq!(
554 json["action"], json["operation"],
555 "action e operation devem ser iguais"
556 );
557 assert_eq!(json["entities_persisted"], 3);
558 assert_eq!(json["relationships_persisted"], 1);
559 assert_eq!(json["chunks_created"], 2);
560 }
561
562 #[test]
563 fn remember_response_warnings_lista_mensagens() {
564 let resp = RememberResponse {
565 memory_id: 5,
566 name: "dup-mem".to_string(),
567 namespace: "global".to_string(),
568 action: "created".to_string(),
569 operation: "created".to_string(),
570 version: 1,
571 entities_persisted: 0,
572 relationships_persisted: 0,
573 chunks_created: 1,
574 merged_into_memory_id: None,
575 warnings: vec!["identical body already exists as memory id 3".to_string()],
576 created_at: 0,
577 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
578 elapsed_ms: 10,
579 };
580
581 let json = serde_json::to_value(&resp).expect("serialização falhou");
582 let warnings = json["warnings"]
583 .as_array()
584 .expect("warnings deve ser array");
585 assert_eq!(warnings.len(), 1);
586 assert!(warnings[0].as_str().unwrap().contains("identical body"));
587 }
588
589 #[test]
590 fn nome_invalido_prefixo_reservado_retorna_validation_error() {
591 use crate::errors::AppError;
592 let nome = "__reservado";
594 let resultado: Result<(), AppError> = if nome.starts_with("__") {
595 Err(AppError::Validation(
596 crate::i18n::validacao::nome_reservado(),
597 ))
598 } else {
599 Ok(())
600 };
601 assert!(resultado.is_err());
602 if let Err(AppError::Validation(msg)) = resultado {
603 assert!(!msg.is_empty());
604 }
605 }
606
607 #[test]
608 fn nome_muito_longo_retorna_validation_error() {
609 use crate::errors::AppError;
610 let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
611 let resultado: Result<(), AppError> =
612 if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
613 Err(AppError::Validation(
614 crate::i18n::validacao::nome_comprimento(crate::constants::MAX_MEMORY_NAME_LEN),
615 ))
616 } else {
617 Ok(())
618 };
619 assert!(resultado.is_err());
620 }
621
622 #[test]
623 fn remember_response_merged_into_memory_id_some_serializa_inteiro() {
624 let resp = RememberResponse {
625 memory_id: 10,
626 name: "mem-mergeada".to_string(),
627 namespace: "global".to_string(),
628 action: "updated".to_string(),
629 operation: "updated".to_string(),
630 version: 3,
631 entities_persisted: 0,
632 relationships_persisted: 0,
633 chunks_created: 1,
634 merged_into_memory_id: Some(7),
635 warnings: vec![],
636 created_at: 0,
637 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
638 elapsed_ms: 0,
639 };
640
641 let json = serde_json::to_value(&resp).expect("serialização falhou");
642 assert_eq!(json["merged_into_memory_id"], 7);
643 }
644}