1use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17fn compute_chunks_persisted(chunks_created: usize) -> usize {
26 if chunks_created > 1 {
27 chunks_created
28 } else {
29 0
30 }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n \
35 # Create a memory with inline body\n \
36 sqlite-graphrag remember --name design-auth --type decision \\\n \
37 --description \"auth design\" --body \"JWT for stateless auth\"\n\n \
38 # Create with curated graph via --graph-stdin\n \
39 echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n \
40 sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n \
41 # Enable automatic URL extraction with --graph-stdin (URL-regex only since v1.0.79)\n \
42 echo '{\"body\":\"See https://docs.rs ...\",\"entities\":[],\"relationships\":[]}' | \\\n \
43 sqlite-graphrag remember --name url-test --type note --description \"test\" \\\n \
44 --graph-stdin --enable-ner\n\n \
45 # Idempotent upsert with --force-merge\n \
46 sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n \
47 --body \"new content\" --force-merge\n\n\
48NOTE:\n \
49 remember does NOT accept positional arguments.\n \
50 Use --body \"text\" for inline content\n \
51 Use --body-file path for file content\n \
52 Use --body-stdin for piped content\n \
53 Use --graph-stdin for JSON with entities and relationships\n\n\
54ENTITY TYPES (for --graph-stdin entities, NOT memory --type):\n \
55 concept, tool, person, file, project, decision, incident,\n \
56 organization, location, date, dashboard, issue_tracker, memory\n \
57 WARNING: reference, skill, document, note, user, feedback are\n \
58 MEMORY types only — NOT valid for entities.\n \
59 Mapping: reference→concept, document→file, user→person")]
60pub struct RememberArgs {
61 #[arg(long)]
64 pub name: String,
65 #[arg(
66 long,
67 value_enum,
68 long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
69 )]
70 pub r#type: Option<MemoryType>,
71 #[arg(long)]
74 pub description: Option<String>,
75 #[arg(
78 long,
79 help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
80 conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
81 )]
82 pub body: Option<String>,
83 #[arg(
84 long,
85 help = "Read body from a file instead of --body",
86 conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
87 )]
88 pub body_file: Option<std::path::PathBuf>,
89 #[arg(
92 long,
93 conflicts_with_all = ["body", "body_file", "graph_stdin"]
94 )]
95 pub body_stdin: bool,
96 #[arg(
97 long,
98 help = "JSON file containing entities to associate with this memory"
99 )]
100 pub entities_file: Option<std::path::PathBuf>,
101 #[arg(
102 long,
103 help = "JSON file containing relationships to associate with this memory"
104 )]
105 pub relationships_file: Option<std::path::PathBuf>,
106 #[arg(
107 long,
108 help = "Read graph JSON (body + entities + relationships) from stdin",
109 conflicts_with_all = [
110 "body",
111 "body_file",
112 "body_stdin",
113 "entities_file",
114 "relationships_file"
115 ]
116 )]
117 pub graph_stdin: bool,
118 #[arg(
119 long,
120 help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
121 )]
122 pub namespace: Option<String>,
123 #[arg(long)]
125 pub metadata: Option<String>,
126 #[arg(long, help = "JSON file containing metadata key-value pairs")]
127 pub metadata_file: Option<std::path::PathBuf>,
128 #[arg(long)]
129 pub force_merge: bool,
130 #[arg(
131 long,
132 value_name = "EPOCH_OR_RFC3339",
133 value_parser = crate::parsers::parse_expected_updated_at,
134 long_help = "Optimistic lock: reject if updated_at does not match. \
135Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
136 )]
137 pub expected_updated_at: Option<i64>,
138 #[arg(
139 long,
140 env = "SQLITE_GRAPHRAG_ENABLE_NER",
141 value_parser = crate::parsers::parse_bool_flexible,
142 action = clap::ArgAction::Set,
143 num_args = 0..=1,
144 default_missing_value = "true",
145 default_value = "false",
146 help = "Enable automatic URL-regex extraction from body (the GLiNER NER pipeline was removed in v1.0.79)"
147 )]
148 pub enable_ner: bool,
149 #[arg(
150 long,
151 env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
152 default_value = "fp32",
153 help = "DEPRECATED: no effect since v1.0.79 (the GLiNER pipeline was removed); accepted for compatibility only"
154 )]
155 pub gliner_variant: String,
156 #[arg(long, hide = true)]
157 pub skip_extraction: bool,
158 #[arg(
162 long,
163 default_value_t = false,
164 help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
165 )]
166 pub clear_body: bool,
167 #[arg(
169 long,
170 default_value_t = false,
171 help = "Validate input and report planned actions without persisting"
172 )]
173 pub dry_run: bool,
174 #[arg(long)]
176 pub session_id: Option<String>,
177 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
178 pub format: JsonOutputFormat,
179 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
180 pub json: bool,
181 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
182 pub db: Option<String>,
183 #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
185 help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
186 pub max_rss_mb: u64,
187 #[arg(long, default_value_t = 50, value_name = "N")]
190 pub max_entity_degree: u32,
191 #[arg(long, default_value_t = 4, value_name = "N",
195 value_parser = clap::value_parser!(u64).range(1..=32),
196 help = "Maximum simultaneous LLM embedding subprocesses (default: 4, clamp [1,32])")]
197 pub llm_parallelism: u64,
198}
199
200#[derive(Deserialize, Default)]
201#[serde(deny_unknown_fields)]
202struct GraphInput {
203 #[serde(default)]
204 body: Option<String>,
205 #[serde(default)]
206 entities: Vec<NewEntity>,
207 #[serde(default)]
208 relationships: Vec<NewRelationship>,
209}
210
211fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
212 for rel in &mut graph.relationships {
213 rel.relation = crate::parsers::normalize_relation(&rel.relation);
214 if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
215 return Err(AppError::Validation(format!(
216 "{e} for relationship '{}' -> '{}'",
217 rel.source, rel.target
218 )));
219 }
220 crate::parsers::warn_if_non_canonical(&rel.relation);
221 if !(0.0..=1.0).contains(&rel.strength) {
222 return Err(AppError::Validation(format!(
223 "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
224 rel.strength, rel.source, rel.target
225 )));
226 }
227 }
228
229 Ok(())
230}
231
232#[tracing::instrument(skip_all, level = "debug", name = "remember")]
233pub fn run(
234 args: RememberArgs,
235 llm_backend: crate::cli::LlmBackendChoice,
236 embedding_backend: crate::cli::EmbeddingBackendChoice,
237) -> Result<(), AppError> {
238 use crate::constants::*;
239
240 let inicio = std::time::Instant::now();
241 let _ = args.format;
242 tracing::debug!(target: "remember", name = %args.name, "persisting memory");
243 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
244
245 let original_name = args.name.clone();
249
250 let normalized_name = {
254 let lower = args.name.to_lowercase().replace(['_', ' '], "-");
255 let trimmed = lower.trim_matches('-').to_string();
256 if trimmed != args.name {
257 tracing::warn!(target: "remember",
258 original = %args.name,
259 normalized = %trimmed,
260 "name auto-normalized to kebab-case"
261 );
262 }
263 trimmed
264 };
265 let name_was_normalized = normalized_name != original_name;
266
267 if normalized_name.is_empty() {
268 return Err(AppError::Validation(
269 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
270 ));
271 }
272 if normalized_name.len() > MAX_MEMORY_NAME_LEN {
273 return Err(AppError::LimitExceeded(
274 crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
275 ));
276 }
277
278 if normalized_name.starts_with("__") {
279 return Err(AppError::Validation(
280 crate::i18n::validation::reserved_name(),
281 ));
282 }
283
284 {
285 let slug_re = crate::constants::name_slug_regex();
286 if !slug_re.is_match(&normalized_name) {
287 return Err(AppError::Validation(crate::i18n::validation::name_kebab(
288 &normalized_name,
289 )));
290 }
291 }
292
293 if let Some(ref desc) = args.description {
294 if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
295 return Err(AppError::Validation(
296 crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
297 ));
298 }
299 }
300
301 let mut raw_body = if let Some(b) = args.body {
302 b
303 } else if let Some(ref path) = args.body_file {
304 let file_size = std::fs::metadata(path).map_err(AppError::Io)?.len();
305 if file_size > MAX_MEMORY_BODY_LEN as u64 {
306 return Err(AppError::LimitExceeded(
307 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
308 ));
309 }
310 match std::fs::read_to_string(path) {
311 Ok(s) => s,
312 Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
313 let bytes = std::fs::read(path).map_err(AppError::Io)?;
314 tracing::warn!(target: "remember", "body file contains invalid UTF-8; replacing invalid sequences");
315 String::from_utf8_lossy(&bytes).into_owned()
316 }
317 Err(e) => return Err(AppError::Io(e)),
318 }
319 } else if args.body_stdin || args.graph_stdin {
320 crate::stdin_helper::read_stdin_with_timeout(60)?
321 } else {
322 String::new()
323 };
324
325 let mut entities_provided_externally =
326 args.entities_file.is_some() || args.relationships_file.is_some();
327
328 let mut graph = GraphInput::default();
329 if let Some(path) = args.entities_file {
330 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
331 if file_size > MAX_MEMORY_BODY_LEN as u64 {
332 return Err(AppError::LimitExceeded(
333 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
334 ));
335 }
336 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
337 graph.entities = serde_json::from_str(&content)?;
338 }
339 if let Some(path) = args.relationships_file {
340 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
341 if file_size > MAX_MEMORY_BODY_LEN as u64 {
342 return Err(AppError::LimitExceeded(
343 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
344 ));
345 }
346 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
347 graph.relationships = serde_json::from_str(&content)?;
348 }
349 if args.graph_stdin {
350 graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
351 AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
352 })?;
353 raw_body = graph.body.take().unwrap_or_default();
354 }
355 if args.graph_stdin && !graph.entities.is_empty() {
356 entities_provided_externally = true;
357 }
358
359 if graph.entities.len() > max_entities_per_memory() {
360 return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
361 max_entities_per_memory(),
362 )));
363 }
364 let mut relationships_truncated = false;
365 let rel_cap = max_relationships_per_memory();
366 if graph.relationships.len() > rel_cap {
367 tracing::warn!(target: "remember",
368 count = graph.relationships.len(),
369 cap = rel_cap,
370 "truncating relationships to cap"
371 );
372 graph.relationships.truncate(rel_cap);
373 relationships_truncated = true;
374 }
375 normalize_and_validate_graph_input(&mut graph)?;
376
377 if raw_body.len() > MAX_MEMORY_BODY_LEN {
378 return Err(AppError::LimitExceeded(
379 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
380 ));
381 }
382
383 let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
388 if !entities_provided_externally
389 && graph.entities.is_empty()
390 && raw_body.trim().is_empty()
391 && !body_will_be_preserved
392 && !args.clear_body
393 {
394 return Err(AppError::Validation(crate::i18n::validation::empty_body()));
395 }
396
397 let metadata: serde_json::Value = if let Some(m) = args.metadata {
398 serde_json::from_str(&m)?
399 } else if let Some(path) = args.metadata_file {
400 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
401 if file_size > MAX_MEMORY_BODY_LEN as u64 {
402 return Err(AppError::LimitExceeded(
403 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
404 ));
405 }
406 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
407 serde_json::from_str(&content)?
408 } else {
409 serde_json::json!({})
410 };
411
412 let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
413 let mut snippet: String = raw_body.chars().take(200).collect();
414
415 let paths = AppPaths::resolve(args.db.as_deref())?;
416 paths.ensure_dirs()?;
417
418 let mut extraction_method: Option<String> = None;
420 let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
421 if args.enable_ner && args.skip_extraction {
422 return Err(AppError::Validation(
423 "--enable-ner and --skip-extraction are mutually exclusive; remove one".to_string(),
424 ));
425 }
426 if args.skip_extraction && !args.enable_ner {
427 tracing::warn!(
434 "--skip-extraction is deprecated since v1.0.45 and has no effect (NER is disabled by default); remove this flag to silence the warning"
435 );
436 }
437 if args.gliner_variant != "fp32" {
441 tracing::warn!(
442 "--gliner-variant is deprecated and has no effect since v1.0.79 (the GLiNER pipeline was removed); --enable-ner performs URL-regex extraction only"
443 );
444 }
445 let gliner_variant: crate::extraction::GlinerVariant = match args.gliner_variant.as_str() {
446 "int8" => crate::extraction::GlinerVariant::Int8,
447 _ => crate::extraction::GlinerVariant::Fp32,
448 };
449 if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
450 match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
451 Ok(extracted) => {
452 extraction_method = Some("url-regex".to_string());
456 extracted_urls = extracted.urls;
457 graph.entities = extracted
460 .entities
461 .into_iter()
462 .map(|e| NewEntity {
463 name: e.name,
464 entity_type: crate::entity_type::EntityType::Concept,
465 description: None,
466 })
467 .collect();
468 graph.relationships.clear();
469 relationships_truncated = false;
470
471 if graph.entities.len() > max_entities_per_memory() {
472 graph.entities.truncate(max_entities_per_memory());
473 }
474 if graph.relationships.len() > max_relationships_per_memory() {
475 relationships_truncated = true;
476 graph.relationships.truncate(max_relationships_per_memory());
477 }
478 normalize_and_validate_graph_input(&mut graph)?;
479 }
480 Err(e) => {
481 tracing::warn!(target: "remember", error = %e, "auto-extraction failed, graceful degradation");
482 extraction_method = Some("none:extraction-failed".to_string());
483 }
484 }
485 }
486
487 let mut conn = open_rw(&paths.db)?;
488 ensure_schema(&mut conn)?;
489
490 if args.dry_run {
492 let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
493 let planned_action = if existing.is_some() && args.force_merge {
494 "would_update"
495 } else {
496 "would_create"
497 };
498 output::emit_json(&serde_json::json!({
499 "dry_run": true,
500 "name": normalized_name,
501 "namespace": namespace,
502 "planned_action": planned_action,
503 }))?;
504 return Ok(());
505 }
506
507 {
508 use crate::constants::MAX_NAMESPACES_ACTIVE;
509 let active_count: u32 = conn.query_row(
510 "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
511 [],
512 |r| r.get::<_, i64>(0).map(|v| v as u32),
513 )?;
514 let ns_exists: bool = conn.query_row(
515 "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
516 rusqlite::params![namespace],
517 |r| r.get::<_, i64>(0).map(|v| v > 0),
518 )?;
519 if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
520 return Err(AppError::NamespaceError(format!(
521 "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
522 )));
523 }
524 }
525
526 if let Some((sd_id, true)) =
528 memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
529 {
530 if args.force_merge {
531 memories::clear_deleted_at(&conn, sd_id)?;
532 } else {
533 return Err(AppError::Duplicate(
534 errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
535 ));
536 }
537 }
538
539 let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
540 if existing_memory.is_some() && !args.force_merge {
541 return Err(AppError::Duplicate(errors_msg::duplicate_memory(
542 &normalized_name,
543 &namespace,
544 )));
545 }
546
547 let (resolved_type, resolved_description) = if existing_memory.is_none() {
551 let t = args.r#type.ok_or_else(|| {
553 AppError::Validation(
554 "--type and --description are required when creating a new memory".to_string(),
555 )
556 })?;
557 let d = args.description.clone().ok_or_else(|| {
558 AppError::Validation(
559 "--type and --description are required when creating a new memory".to_string(),
560 )
561 })?;
562 (t.as_str().to_string(), d)
563 } else {
564 let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
566 .ok_or_else(|| {
567 AppError::NotFound(format!(
568 "memory '{normalized_name}' not found in namespace '{namespace}'"
569 ))
570 })?;
571 let t = args
572 .r#type
573 .map(|v| v.as_str().to_string())
574 .unwrap_or_else(|| existing_row.memory_type.clone());
575 let d = args
576 .description
577 .clone()
578 .unwrap_or_else(|| existing_row.description.clone());
579 (t, d)
580 };
581
582 if body_will_be_preserved {
587 if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
588 if !existing_row.body.is_empty() {
589 tracing::debug!(target: "remember",
590 name = %normalized_name,
591 "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
592 );
593 raw_body = existing_row.body;
594 body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
595 snippet = raw_body.chars().take(200).collect();
596 }
597 }
598 }
599
600 let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
601
602 output::emit_progress_i18n(
603 &format!(
604 "Remember stage: validated input; available memory {} MB",
605 crate::memory_guard::available_memory_mb()
606 ),
607 &format!(
608 "Stage remember: input validated; available memory {} MB",
609 crate::memory_guard::available_memory_mb()
610 ),
611 );
612
613 let model_max_length = crate::tokenizer::get_model_max_length();
614 let total_passage_tokens = crate::tokenizer::count_passage_tokens(&raw_body)?;
615 let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body);
616 let chunks_created = chunks_info.len();
617 let chunks_persisted = compute_chunks_persisted(chunks_info.len());
621
622 output::emit_progress_i18n(
623 &format!(
624 "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
625 chunks_created,
626 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
627 ),
628 &format!(
629 "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
630 chunks_created,
631 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
632 ),
633 );
634
635 if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
636 return Err(AppError::LimitExceeded(format!(
637 "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
638 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
639 )));
640 }
641
642 output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
643 let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
644
645 let skip_embed = crate::embedder::should_skip_embedding_on_failure();
649 let (embedding, backend_invoked_passage): (Option<Vec<f32>>, Option<&str>) = if chunks_info
650 .len()
651 == 1
652 {
653 match crate::embedder::embed_passage_with_embedding_choice(
654 &paths.models,
655 &raw_body,
656 embedding_backend,
657 llm_backend,
658 ) {
659 Ok((v, k)) => (Some(v), Some(k.as_str())),
660 Err(AppError::Validation(msg)) => return Err(AppError::Validation(msg)),
661 Err(e) if skip_embed => {
662 tracing::warn!(error = %e, "embedding failed; --skip-embedding-on-failure active, persisting without embedding");
663 (None, None)
664 }
665 Err(e) => return Err(e),
666 }
667 } else {
668 let chunk_texts: Vec<String> = chunks_info
669 .iter()
670 .map(|c| chunking::chunk_text(&raw_body, c).to_string())
671 .collect();
672 output::emit_progress_i18n(
678 &format!(
679 "Embedding {} chunks in parallel batches (parallelism {})...",
680 chunks_info.len(),
681 args.llm_parallelism
682 ),
683 &format!(
684 "Embedding {} chunks em lotes paralelos (paralelismo {})...",
685 chunks_info.len(),
686 args.llm_parallelism
687 ),
688 );
689 if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
690 if rss > args.max_rss_mb {
691 tracing::error!(target: "remember",
692 rss_mb = rss,
693 max_rss_mb = args.max_rss_mb,
694 "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
695 );
696 return Err(AppError::LowMemory {
697 available_mb: crate::memory_guard::available_memory_mb(),
698 required_mb: args.max_rss_mb,
699 });
700 }
701 }
702 match crate::embedder::embed_passages_parallel_with_embedding_choice(
703 &paths.models,
704 &chunk_texts,
705 args.llm_parallelism as usize,
706 crate::embedder::chunk_embed_batch_size(),
707 embedding_backend,
708 llm_backend,
709 ) {
710 Ok(chunk_embeddings) => {
711 output::emit_progress_i18n(
712 &format!(
713 "Remember stage: chunk embeddings complete; process RSS {} MB",
714 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
715 ),
716 &format!(
717 "Stage remember: chunk embeddings completed; process RSS {} MB",
718 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
719 ),
720 );
721 let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
722 chunk_embeddings_cache = Some(chunk_embeddings);
723 (Some(aggregated), None)
724 }
725 Err(e) if skip_embed => {
726 tracing::warn!(error = %e, "chunk embedding failed; --skip-embedding-on-failure active, persisting without embedding");
727 (None, None)
728 }
729 Err(e) => return Err(e),
730 }
731 };
732 let body_for_storage = raw_body;
733
734 let memory_type = resolved_type.as_str();
735 let new_memory = NewMemory {
736 namespace: namespace.clone(),
737 name: normalized_name.clone(),
738 memory_type: memory_type.to_string(),
739 description: resolved_description.clone(),
740 body: body_for_storage,
741 body_hash: body_hash.clone(),
742 session_id: args.session_id.clone(),
743 source: "agent".to_string(),
744 metadata,
745 };
746
747 let mut warnings = Vec::with_capacity(4);
748 let mut entities_persisted = 0usize;
749 let mut relationships_persisted = 0usize;
750
751 let entity_texts: Vec<String> = graph
756 .entities
757 .iter()
758 .map(|entity| match &entity.description {
759 Some(desc) => format!("{} {}", entity.name, desc),
760 None => entity.name.clone(),
761 })
762 .collect();
763 let (graph_entity_embeddings, embed_cache_stats) =
771 match crate::embedder::embed_entity_texts_cached(
772 &paths.models,
773 &entity_texts,
774 args.llm_parallelism as usize,
775 embedding_backend,
776 llm_backend,
777 ) {
778 Ok(r) => r,
779 Err(e) if skip_embed => {
780 tracing::warn!(error = %e, "entity embedding failed; --skip-embedding-on-failure active");
781 let empty: Vec<Vec<f32>> = entity_texts.iter().map(|_| vec![]).collect();
782 (empty, crate::embedder::EmbedCacheStats::default())
783 }
784 Err(e) => return Err(e),
785 };
786 if embed_cache_stats.hits > 0 {
787 tracing::debug!(
788 hits = embed_cache_stats.hits,
789 misses = embed_cache_stats.misses,
790 requested = embed_cache_stats.requested,
791 "G56: entity embed cache hit (remember)"
792 );
793 }
794
795 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
796
797 let mut skip_reindex = false;
798 let (memory_id, action, version) = match existing_memory {
799 Some((existing_id, _updated_at, _current_version)) => {
800 if let Some(hash_id) = duplicate_hash_id {
801 if hash_id != existing_id {
802 warnings.push(format!(
803 "identical body already exists as memory id {hash_id}"
804 ));
805 }
806 }
807
808 let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
810 .query_row(
811 "SELECT name, description, body FROM memories WHERE id = ?1",
812 rusqlite::params![existing_id],
813 |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
814 )?;
815
816 let existing_body_hash: Option<String> = tx
818 .query_row(
819 "SELECT body_hash FROM memories WHERE id = ?1",
820 rusqlite::params![existing_id],
821 |r| r.get(0),
822 )
823 .ok();
824 let body_unchanged = existing_body_hash.as_deref() == Some(&body_hash);
825 skip_reindex = body_unchanged;
826 if !body_unchanged {
827 storage_chunks::delete_chunks(&tx, existing_id)?;
828 }
829
830 let next_v = versions::next_version(&tx, existing_id)?;
831 memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
832
833 memories::sync_fts_after_update(
836 &tx,
837 existing_id,
838 &old_fts_name,
839 &old_fts_desc,
840 &old_fts_body,
841 &normalized_name,
842 &resolved_description,
843 &new_memory.body,
844 )?;
845
846 versions::insert_version(
847 &tx,
848 existing_id,
849 next_v,
850 &normalized_name,
851 memory_type,
852 &resolved_description,
853 &new_memory.body,
854 &serde_json::to_string(&new_memory.metadata)?,
855 None,
856 "edit",
857 )?;
858 if !body_unchanged {
859 if let Some(ref emb) = embedding {
860 memories::upsert_vec(
861 &tx,
862 existing_id,
863 &namespace,
864 memory_type,
865 emb,
866 &normalized_name,
867 &snippet,
868 )?;
869 }
870 }
871 (existing_id, "updated".to_string(), next_v)
872 }
873 None => {
874 if let Some(hash_id) = duplicate_hash_id {
875 warnings.push(format!(
876 "identical body already exists as memory id {hash_id}"
877 ));
878 }
879 let id = memories::insert(&tx, &new_memory)?;
880 versions::insert_version(
881 &tx,
882 id,
883 1,
884 &normalized_name,
885 memory_type,
886 &resolved_description,
887 &new_memory.body,
888 &serde_json::to_string(&new_memory.metadata)?,
889 None,
890 "create",
891 )?;
892 if let Some(ref emb) = embedding {
893 memories::upsert_vec(
894 &tx,
895 id,
896 &namespace,
897 memory_type,
898 emb,
899 &normalized_name,
900 &snippet,
901 )?;
902 }
903 (id, "created".to_string(), 1)
904 }
905 };
906
907 if chunks_info.len() > 1 && !skip_reindex {
908 storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
909
910 if let Some(chunk_embeddings) = chunk_embeddings_cache.take() {
911 for (i, emb) in chunk_embeddings.iter().enumerate() {
912 storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
913 }
914 }
915 output::emit_progress_i18n(
916 &format!(
917 "Remember stage: persisted chunk vectors; process RSS {} MB",
918 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
919 ),
920 &format!(
921 "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
922 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
923 ),
924 );
925 }
926
927 if !graph.entities.is_empty() || !graph.relationships.is_empty() {
928 for entity in &graph.entities {
929 let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
930 let entity_embedding = &graph_entity_embeddings[entities_persisted];
931 entities::upsert_entity_vec(
932 &tx,
933 entity_id,
934 &namespace,
935 entity.entity_type,
936 entity_embedding,
937 &entity.name,
938 )?;
939 entities::link_memory_entity(&tx, memory_id, entity_id)?;
940 entities_persisted += 1;
941 }
942 let entity_types: std::collections::HashMap<&str, EntityType> = graph
943 .entities
944 .iter()
945 .map(|entity| (entity.name.as_str(), entity.entity_type))
946 .collect();
947
948 let mut affected_entity_ids: std::collections::HashSet<i64> =
949 std::collections::HashSet::new();
950 for entity in &graph.entities {
951 if let Some(eid) = entities::find_entity_id(&tx, &namespace, &entity.name)? {
952 affected_entity_ids.insert(eid);
953 }
954 }
955
956 for rel in &graph.relationships {
957 let source_entity = NewEntity {
958 name: rel.source.clone(),
959 entity_type: entity_types
960 .get(rel.source.as_str())
961 .copied()
962 .unwrap_or(EntityType::Concept),
963 description: None,
964 };
965 let target_entity = NewEntity {
966 name: rel.target.clone(),
967 entity_type: entity_types
968 .get(rel.target.as_str())
969 .copied()
970 .unwrap_or(EntityType::Concept),
971 description: None,
972 };
973 let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
974 let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
975 let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
976 entities::link_memory_relationship(&tx, memory_id, rel_id)?;
977 affected_entity_ids.insert(source_id);
978 affected_entity_ids.insert(target_id);
979 relationships_persisted += 1;
980 }
981
982 for &eid in &affected_entity_ids {
983 entities::recalculate_degree(&tx, eid)?;
984 }
985 if args.max_entity_degree > 0 {
987 let cap = args.max_entity_degree as i64;
988 for &eid in &affected_entity_ids {
989 let degree: i64 = tx.query_row(
990 "SELECT degree FROM entities WHERE id = ?1",
991 rusqlite::params![eid],
992 |r| r.get(0),
993 )?;
994 if degree > cap {
995 let name: String = tx.query_row(
996 "SELECT name FROM entities WHERE id = ?1",
997 rusqlite::params![eid],
998 |r| r.get(0),
999 )?;
1000 tracing::warn!(target: "remember",
1001 entity = %name,
1002 degree = degree,
1003 cap = cap,
1004 "entity degree cap exceeded"
1005 );
1006 }
1007 }
1008 }
1009 }
1010 tx.commit()?;
1011
1012 let urls_persisted = if !extracted_urls.is_empty() {
1015 let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
1016 .into_iter()
1017 .map(|u| storage_urls::MemoryUrl {
1018 url: u.url,
1019 offset: Some(u.start as i64),
1020 })
1021 .collect();
1022 storage_urls::insert_urls(&conn, memory_id, &url_entries)
1023 } else {
1024 0
1025 };
1026
1027 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
1028
1029 let created_at_epoch = chrono::Utc::now().timestamp();
1030 let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
1031
1032 output::emit_json(&RememberResponse {
1033 memory_id,
1034 name: normalized_name.clone(),
1038 namespace,
1039 action: action.clone(),
1040 operation: action,
1041 version,
1042 entities_persisted,
1043 relationships_persisted,
1044 relationships_truncated,
1045 chunks_created,
1046 chunks_persisted,
1047 urls_persisted,
1048 extraction_method,
1049 merged_into_memory_id: None,
1050 warnings,
1051 created_at: created_at_epoch,
1052 created_at_iso,
1053 elapsed_ms: inicio.elapsed().as_millis() as u64,
1054 name_was_normalized,
1055 original_name: name_was_normalized.then_some(original_name),
1056 backend_invoked: backend_invoked_passage,
1057 })?;
1058
1059 Ok(())
1060}
1061
1062#[cfg(test)]
1063mod tests {
1064 use super::compute_chunks_persisted;
1065 use crate::output::RememberResponse;
1066
1067 #[test]
1069 fn chunks_persisted_zero_for_zero_chunks() {
1070 assert_eq!(compute_chunks_persisted(0), 0);
1071 }
1072
1073 #[test]
1074 fn chunks_persisted_zero_for_single_chunk_body() {
1075 assert_eq!(compute_chunks_persisted(1), 0);
1078 }
1079
1080 #[test]
1081 fn chunks_persisted_equals_count_for_multi_chunk_body() {
1082 assert_eq!(compute_chunks_persisted(2), 2);
1084 assert_eq!(compute_chunks_persisted(7), 7);
1085 assert_eq!(compute_chunks_persisted(64), 64);
1086 }
1087
1088 #[test]
1089 fn remember_response_serializes_required_fields() {
1090 let resp = RememberResponse {
1091 memory_id: 42,
1092 name: "minha-mem".to_string(),
1093 namespace: "global".to_string(),
1094 action: "created".to_string(),
1095 operation: "created".to_string(),
1096 version: 1,
1097 entities_persisted: 0,
1098 relationships_persisted: 0,
1099 relationships_truncated: false,
1100 chunks_created: 1,
1101 chunks_persisted: 0,
1102 urls_persisted: 0,
1103 extraction_method: None,
1104 merged_into_memory_id: None,
1105 warnings: vec![],
1106 created_at: 1_705_320_000,
1107 created_at_iso: "2024-01-15T12:00:00Z".to_string(),
1108 elapsed_ms: 55,
1109 name_was_normalized: false,
1110 original_name: None,
1111 backend_invoked: None,
1112 };
1113
1114 let json = serde_json::to_value(&resp).expect("serialization failed");
1115 assert_eq!(json["memory_id"], 42);
1116 assert_eq!(json["action"], "created");
1117 assert_eq!(json["operation"], "created");
1118 assert_eq!(json["version"], 1);
1119 assert_eq!(json["elapsed_ms"], 55u64);
1120 assert!(json["warnings"].is_array());
1121 assert!(json["merged_into_memory_id"].is_null());
1122 }
1123
1124 #[test]
1125 fn remember_response_action_e_operation_sao_aliases() {
1126 let resp = RememberResponse {
1127 memory_id: 1,
1128 name: "mem".to_string(),
1129 namespace: "global".to_string(),
1130 action: "updated".to_string(),
1131 operation: "updated".to_string(),
1132 version: 2,
1133 entities_persisted: 3,
1134 relationships_persisted: 1,
1135 relationships_truncated: false,
1136 extraction_method: None,
1137 chunks_created: 2,
1138 chunks_persisted: 2,
1139 urls_persisted: 0,
1140 merged_into_memory_id: None,
1141 warnings: vec![],
1142 created_at: 0,
1143 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1144 elapsed_ms: 0,
1145 name_was_normalized: false,
1146 original_name: None,
1147 backend_invoked: None,
1148 };
1149
1150 let json = serde_json::to_value(&resp).expect("serialization failed");
1151 assert_eq!(
1152 json["action"], json["operation"],
1153 "action e operation devem ser iguais"
1154 );
1155 assert_eq!(json["entities_persisted"], 3);
1156 assert_eq!(json["relationships_persisted"], 1);
1157 assert_eq!(json["chunks_created"], 2);
1158 }
1159
1160 #[test]
1161 fn remember_response_warnings_lista_mensagens() {
1162 let resp = RememberResponse {
1163 memory_id: 5,
1164 name: "dup-mem".to_string(),
1165 namespace: "global".to_string(),
1166 action: "created".to_string(),
1167 operation: "created".to_string(),
1168 version: 1,
1169 entities_persisted: 0,
1170 extraction_method: None,
1171 relationships_persisted: 0,
1172 relationships_truncated: false,
1173 chunks_created: 1,
1174 chunks_persisted: 0,
1175 urls_persisted: 0,
1176 merged_into_memory_id: None,
1177 warnings: vec!["identical body already exists as memory id 3".to_string()],
1178 created_at: 0,
1179 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1180 elapsed_ms: 10,
1181 name_was_normalized: false,
1182 original_name: None,
1183 backend_invoked: None,
1184 };
1185
1186 let json = serde_json::to_value(&resp).expect("serialization failed");
1187 let warnings = json["warnings"]
1188 .as_array()
1189 .expect("warnings deve ser array");
1190 assert_eq!(warnings.len(), 1);
1191 assert!(warnings[0].as_str().unwrap().contains("identical body"));
1192 }
1193
1194 #[test]
1195 fn invalid_name_reserved_prefix_returns_validation_error() {
1196 use crate::errors::AppError;
1197 let nome = "__reservado";
1199 let resultado: Result<(), AppError> = if nome.starts_with("__") {
1200 Err(AppError::Validation(
1201 crate::i18n::validation::reserved_name(),
1202 ))
1203 } else {
1204 Ok(())
1205 };
1206 assert!(resultado.is_err());
1207 if let Err(AppError::Validation(msg)) = resultado {
1208 assert!(!msg.is_empty());
1209 }
1210 }
1211
1212 #[test]
1213 fn name_too_long_returns_validation_error() {
1214 use crate::errors::AppError;
1215 let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1216 let resultado: Result<(), AppError> =
1217 if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1218 Err(AppError::Validation(crate::i18n::validation::name_length(
1219 crate::constants::MAX_MEMORY_NAME_LEN,
1220 )))
1221 } else {
1222 Ok(())
1223 };
1224 assert!(resultado.is_err());
1225 }
1226
1227 #[test]
1228 fn remember_response_merged_into_memory_id_some_serializes_integer() {
1229 let resp = RememberResponse {
1230 memory_id: 10,
1231 name: "mem-mergeada".to_string(),
1232 namespace: "global".to_string(),
1233 action: "updated".to_string(),
1234 operation: "updated".to_string(),
1235 version: 3,
1236 extraction_method: None,
1237 entities_persisted: 0,
1238 relationships_persisted: 0,
1239 relationships_truncated: false,
1240 chunks_created: 1,
1241 chunks_persisted: 0,
1242 urls_persisted: 0,
1243 merged_into_memory_id: Some(7),
1244 warnings: vec![],
1245 created_at: 0,
1246 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1247 elapsed_ms: 0,
1248 name_was_normalized: false,
1249 original_name: None,
1250 backend_invoked: None,
1251 };
1252
1253 let json = serde_json::to_value(&resp).expect("serialization failed");
1254 assert_eq!(json["merged_into_memory_id"], 7);
1255 }
1256
1257 #[test]
1258 fn remember_response_urls_persisted_serializes_field() {
1259 let resp = RememberResponse {
1261 memory_id: 3,
1262 name: "mem-com-urls".to_string(),
1263 namespace: "global".to_string(),
1264 action: "created".to_string(),
1265 operation: "created".to_string(),
1266 version: 1,
1267 entities_persisted: 0,
1268 relationships_persisted: 0,
1269 relationships_truncated: false,
1270 chunks_created: 1,
1271 chunks_persisted: 0,
1272 urls_persisted: 3,
1273 extraction_method: Some("regex-only".to_string()),
1274 merged_into_memory_id: None,
1275 warnings: vec![],
1276 created_at: 0,
1277 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1278 elapsed_ms: 0,
1279 name_was_normalized: false,
1280 original_name: None,
1281 backend_invoked: None,
1282 };
1283 let json = serde_json::to_value(&resp).expect("serialization failed");
1284 assert_eq!(json["urls_persisted"], 3);
1285 }
1286
1287 #[test]
1288 fn empty_name_after_normalization_returns_specific_message() {
1289 use crate::errors::AppError;
1292 let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1293 let normalized = normalized.trim_matches('-').to_string();
1294 let resultado: Result<(), AppError> = if normalized.is_empty() {
1295 Err(AppError::Validation(
1296 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1297 ))
1298 } else {
1299 Ok(())
1300 };
1301 assert!(resultado.is_err());
1302 if let Err(AppError::Validation(msg)) = resultado {
1303 assert!(
1304 msg.contains("empty after normalization"),
1305 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1306 );
1307 }
1308 }
1309
1310 #[test]
1311 fn name_only_underscores_after_normalization_returns_specific_message() {
1312 use crate::errors::AppError;
1314 let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1315 let normalized = normalized.trim_matches('-').to_string();
1316 assert!(
1317 normalized.is_empty(),
1318 "underscores devem normalizar para string vazia"
1319 );
1320 let resultado: Result<(), AppError> = if normalized.is_empty() {
1321 Err(AppError::Validation(
1322 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1323 ))
1324 } else {
1325 Ok(())
1326 };
1327 assert!(resultado.is_err());
1328 if let Err(AppError::Validation(msg)) = resultado {
1329 assert!(
1330 msg.contains("empty after normalization"),
1331 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1332 );
1333 }
1334 }
1335
1336 #[test]
1337 fn remember_response_relationships_truncated_serializes_field() {
1338 let resp_false = RememberResponse {
1340 memory_id: 1,
1341 name: "test".to_string(),
1342 namespace: "global".to_string(),
1343 action: "created".to_string(),
1344 operation: "created".to_string(),
1345 version: 1,
1346 entities_persisted: 2,
1347 relationships_persisted: 1,
1348 relationships_truncated: false,
1349 chunks_created: 1,
1350 chunks_persisted: 0,
1351 urls_persisted: 0,
1352 extraction_method: None,
1353 merged_into_memory_id: None,
1354 warnings: vec![],
1355 created_at: 0,
1356 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1357 elapsed_ms: 0,
1358 name_was_normalized: false,
1359 original_name: None,
1360 backend_invoked: None,
1361 };
1362 let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1363 assert_eq!(json_false["relationships_truncated"], false);
1364
1365 let resp_true = RememberResponse {
1366 relationships_truncated: true,
1367 ..resp_false
1368 };
1369 let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1370 assert_eq!(json_true["relationships_truncated"], true);
1371 }
1372
1373 fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1382 force_merge && raw_body_is_empty && !clear_body
1383 }
1384
1385 #[test]
1386 fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1387 assert!(
1390 should_preserve_body(true, true, false),
1391 "empty body + force-merge + no clear-body should trigger preservation"
1392 );
1393 }
1394
1395 #[test]
1396 fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1397 assert!(
1399 !should_preserve_body(true, true, true),
1400 "--clear-body must bypass preservation"
1401 );
1402 }
1403
1404 #[test]
1405 fn gap08_non_empty_body_force_merge_does_not_preserve() {
1406 assert!(
1408 !should_preserve_body(true, false, false),
1409 "non-empty body must overwrite, not preserve"
1410 );
1411 }
1412
1413 #[test]
1414 fn gap08_empty_body_no_force_merge_does_not_preserve() {
1415 assert!(
1417 !should_preserve_body(false, true, false),
1418 "no --force-merge means no preservation logic applies"
1419 );
1420 }
1421}