1use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17fn compute_chunks_persisted(chunks_created: usize) -> usize {
26 if chunks_created > 1 {
27 chunks_created
28 } else {
29 0
30 }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n \
35 # Create a memory with inline body\n \
36 sqlite-graphrag remember --name design-auth --type decision \\\n \
37 --description \"auth design\" --body \"JWT for stateless auth\"\n\n \
38 # Create with curated graph via --graph-stdin\n \
39 echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n \
40 sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n \
41 # Enable GLiNER NER extraction with --graph-stdin\n \
42 echo '{\"body\":\"Alice from Microsoft...\",\"entities\":[],\"relationships\":[]}' | \\\n \
43 sqlite-graphrag remember --name ner-test --type note --description \"test\" \\\n \
44 --graph-stdin --enable-ner --gliner-variant int8\n\n \
45 # Idempotent upsert with --force-merge\n \
46 sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n \
47 --body \"new content\" --force-merge\n\n\
48NOTE:\n \
49 remember does NOT accept positional arguments.\n \
50 Use --body \"text\" for inline content\n \
51 Use --body-file path for file content\n \
52 Use --body-stdin for piped content\n \
53 Use --graph-stdin for JSON with entities and relationships\n\n\
54ENTITY TYPES (for --graph-stdin entities, NOT memory --type):\n \
55 concept, tool, person, file, project, decision, incident,\n \
56 organization, location, date, dashboard, issue_tracker, memory\n \
57 WARNING: reference, skill, document, note, user, feedback are\n \
58 MEMORY types only — NOT valid for entities.\n \
59 Mapping: reference→concept, document→file, user→person")]
60pub struct RememberArgs {
61 #[arg(long)]
64 pub name: String,
65 #[arg(
66 long,
67 value_enum,
68 long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
69 )]
70 pub r#type: Option<MemoryType>,
71 #[arg(long)]
74 pub description: Option<String>,
75 #[arg(
78 long,
79 help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
80 conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
81 )]
82 pub body: Option<String>,
83 #[arg(
84 long,
85 help = "Read body from a file instead of --body",
86 conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
87 )]
88 pub body_file: Option<std::path::PathBuf>,
89 #[arg(
92 long,
93 conflicts_with_all = ["body", "body_file", "graph_stdin"]
94 )]
95 pub body_stdin: bool,
96 #[arg(
97 long,
98 help = "JSON file containing entities to associate with this memory"
99 )]
100 pub entities_file: Option<std::path::PathBuf>,
101 #[arg(
102 long,
103 help = "JSON file containing relationships to associate with this memory"
104 )]
105 pub relationships_file: Option<std::path::PathBuf>,
106 #[arg(
107 long,
108 help = "Read graph JSON (body + entities + relationships) from stdin",
109 conflicts_with_all = [
110 "body",
111 "body_file",
112 "body_stdin",
113 "entities_file",
114 "relationships_file"
115 ]
116 )]
117 pub graph_stdin: bool,
118 #[arg(
119 long,
120 help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
121 )]
122 pub namespace: Option<String>,
123 #[arg(long)]
125 pub metadata: Option<String>,
126 #[arg(long, help = "JSON file containing metadata key-value pairs")]
127 pub metadata_file: Option<std::path::PathBuf>,
128 #[arg(long)]
129 pub force_merge: bool,
130 #[arg(
131 long,
132 value_name = "EPOCH_OR_RFC3339",
133 value_parser = crate::parsers::parse_expected_updated_at,
134 long_help = "Optimistic lock: reject if updated_at does not match. \
135Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
136 )]
137 pub expected_updated_at: Option<i64>,
138 #[arg(
139 long,
140 env = "SQLITE_GRAPHRAG_ENABLE_NER",
141 value_parser = crate::parsers::parse_bool_flexible,
142 action = clap::ArgAction::Set,
143 num_args = 0..=1,
144 default_missing_value = "true",
145 default_value = "false",
146 help = "Enable automatic GLiNER NER entity/relationship extraction from body"
147 )]
148 pub enable_ner: bool,
149 #[arg(
150 long,
151 env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
152 default_value = "fp32",
153 help = "GLiNER model variant: fp32 (1.1GB, best quality), fp16 (580MB), int8 (349MB, fastest but may miss entities on short texts), q4, q4f16"
154 )]
155 pub gliner_variant: String,
156 #[arg(long, hide = true)]
157 pub skip_extraction: bool,
158 #[arg(
162 long,
163 default_value_t = false,
164 help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
165 )]
166 pub clear_body: bool,
167 #[arg(
169 long,
170 default_value_t = false,
171 help = "Validate input and report planned actions without persisting"
172 )]
173 pub dry_run: bool,
174 #[arg(long)]
176 pub session_id: Option<String>,
177 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
178 pub format: JsonOutputFormat,
179 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
180 pub json: bool,
181 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
182 pub db: Option<String>,
183 #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
185 help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
186 pub max_rss_mb: u64,
187 #[arg(long, default_value_t = 50, value_name = "N")]
190 pub max_entity_degree: u32,
191}
192
193#[derive(Deserialize, Default)]
194#[serde(deny_unknown_fields)]
195struct GraphInput {
196 #[serde(default)]
197 body: Option<String>,
198 #[serde(default)]
199 entities: Vec<NewEntity>,
200 #[serde(default)]
201 relationships: Vec<NewRelationship>,
202}
203
204fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
205 for rel in &mut graph.relationships {
206 rel.relation = crate::parsers::normalize_relation(&rel.relation);
207 if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
208 return Err(AppError::Validation(format!(
209 "{e} for relationship '{}' -> '{}'",
210 rel.source, rel.target
211 )));
212 }
213 crate::parsers::warn_if_non_canonical(&rel.relation);
214 if !(0.0..=1.0).contains(&rel.strength) {
215 return Err(AppError::Validation(format!(
216 "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
217 rel.strength, rel.source, rel.target
218 )));
219 }
220 }
221
222 Ok(())
223}
224
225#[tracing::instrument(skip_all, level = "debug", name = "remember")]
226pub fn run(args: RememberArgs) -> Result<(), AppError> {
227 use crate::constants::*;
228
229 let inicio = std::time::Instant::now();
230 let _ = args.format;
231 tracing::debug!(target: "remember", name = %args.name, "persisting memory");
232 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
233
234 let original_name = args.name.clone();
238
239 let normalized_name = {
243 let lower = args.name.to_lowercase().replace(['_', ' '], "-");
244 let trimmed = lower.trim_matches('-').to_string();
245 if trimmed != args.name {
246 tracing::warn!(target: "remember",
247 original = %args.name,
248 normalized = %trimmed,
249 "name auto-normalized to kebab-case"
250 );
251 }
252 trimmed
253 };
254 let name_was_normalized = normalized_name != original_name;
255
256 if normalized_name.is_empty() {
257 return Err(AppError::Validation(
258 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
259 ));
260 }
261 if normalized_name.len() > MAX_MEMORY_NAME_LEN {
262 return Err(AppError::LimitExceeded(
263 crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
264 ));
265 }
266
267 if normalized_name.starts_with("__") {
268 return Err(AppError::Validation(
269 crate::i18n::validation::reserved_name(),
270 ));
271 }
272
273 {
274 let slug_re = crate::constants::name_slug_regex();
275 if !slug_re.is_match(&normalized_name) {
276 return Err(AppError::Validation(crate::i18n::validation::name_kebab(
277 &normalized_name,
278 )));
279 }
280 }
281
282 if let Some(ref desc) = args.description {
283 if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
284 return Err(AppError::Validation(
285 crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
286 ));
287 }
288 }
289
290 let mut raw_body = if let Some(b) = args.body {
291 b
292 } else if let Some(ref path) = args.body_file {
293 let file_size = std::fs::metadata(path).map_err(AppError::Io)?.len();
294 if file_size > MAX_MEMORY_BODY_LEN as u64 {
295 return Err(AppError::LimitExceeded(
296 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
297 ));
298 }
299 match std::fs::read_to_string(path) {
300 Ok(s) => s,
301 Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
302 let bytes = std::fs::read(path).map_err(AppError::Io)?;
303 tracing::warn!(target: "remember", "body file contains invalid UTF-8; replacing invalid sequences");
304 String::from_utf8_lossy(&bytes).into_owned()
305 }
306 Err(e) => return Err(AppError::Io(e)),
307 }
308 } else if args.body_stdin || args.graph_stdin {
309 crate::stdin_helper::read_stdin_with_timeout(60)?
310 } else {
311 String::new()
312 };
313
314 let mut entities_provided_externally =
315 args.entities_file.is_some() || args.relationships_file.is_some();
316
317 let mut graph = GraphInput::default();
318 if let Some(path) = args.entities_file {
319 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
320 if file_size > MAX_MEMORY_BODY_LEN as u64 {
321 return Err(AppError::LimitExceeded(
322 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
323 ));
324 }
325 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
326 graph.entities = serde_json::from_str(&content)?;
327 }
328 if let Some(path) = args.relationships_file {
329 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
330 if file_size > MAX_MEMORY_BODY_LEN as u64 {
331 return Err(AppError::LimitExceeded(
332 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
333 ));
334 }
335 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
336 graph.relationships = serde_json::from_str(&content)?;
337 }
338 if args.graph_stdin {
339 graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
340 AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
341 })?;
342 raw_body = graph.body.take().unwrap_or_default();
343 }
344 if args.graph_stdin && !graph.entities.is_empty() {
345 entities_provided_externally = true;
346 }
347
348 if graph.entities.len() > max_entities_per_memory() {
349 return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
350 max_entities_per_memory(),
351 )));
352 }
353 let mut relationships_truncated = false;
354 let rel_cap = max_relationships_per_memory();
355 if graph.relationships.len() > rel_cap {
356 tracing::warn!(target: "remember",
357 count = graph.relationships.len(),
358 cap = rel_cap,
359 "truncating relationships to cap"
360 );
361 graph.relationships.truncate(rel_cap);
362 relationships_truncated = true;
363 }
364 normalize_and_validate_graph_input(&mut graph)?;
365
366 if raw_body.len() > MAX_MEMORY_BODY_LEN {
367 return Err(AppError::LimitExceeded(
368 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
369 ));
370 }
371
372 let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
377 if !entities_provided_externally
378 && graph.entities.is_empty()
379 && raw_body.trim().is_empty()
380 && !body_will_be_preserved
381 && !args.clear_body
382 {
383 return Err(AppError::Validation(crate::i18n::validation::empty_body()));
384 }
385
386 let metadata: serde_json::Value = if let Some(m) = args.metadata {
387 serde_json::from_str(&m)?
388 } else if let Some(path) = args.metadata_file {
389 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
390 if file_size > MAX_MEMORY_BODY_LEN as u64 {
391 return Err(AppError::LimitExceeded(
392 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
393 ));
394 }
395 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
396 serde_json::from_str(&content)?
397 } else {
398 serde_json::json!({})
399 };
400
401 let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
402 let mut snippet: String = raw_body.chars().take(200).collect();
403
404 let paths = AppPaths::resolve(args.db.as_deref())?;
405 paths.ensure_dirs()?;
406
407 let mut extraction_method: Option<String> = None;
409 let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
410 if args.enable_ner && args.skip_extraction {
411 return Err(AppError::Validation(
412 "--enable-ner and --skip-extraction are mutually exclusive; remove one".to_string(),
413 ));
414 }
415 if args.skip_extraction && !args.enable_ner {
416 tracing::warn!(
423 "--skip-extraction is deprecated since v1.0.45 and has no effect (NER is disabled by default); remove this flag to silence the warning"
424 );
425 }
426 let gliner_variant: crate::extraction::GlinerVariant = match args.gliner_variant.as_str() {
427 "int8" => crate::extraction::GlinerVariant::Int8,
428 _ => crate::extraction::GlinerVariant::Fp32,
429 };
430 if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
431 match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
432 Ok(extracted) => {
433 extraction_method = Some("url-regex".to_string());
437 extracted_urls = extracted.urls;
438 graph.entities = extracted
441 .entities
442 .into_iter()
443 .map(|e| NewEntity {
444 name: e.name,
445 entity_type: crate::entity_type::EntityType::Concept,
446 description: None,
447 })
448 .collect();
449 graph.relationships.clear();
450 relationships_truncated = false;
451
452 if graph.entities.len() > max_entities_per_memory() {
453 graph.entities.truncate(max_entities_per_memory());
454 }
455 if graph.relationships.len() > max_relationships_per_memory() {
456 relationships_truncated = true;
457 graph.relationships.truncate(max_relationships_per_memory());
458 }
459 normalize_and_validate_graph_input(&mut graph)?;
460 }
461 Err(e) => {
462 tracing::warn!(target: "remember", error = %e, "auto-extraction failed, graceful degradation");
463 extraction_method = Some("none:extraction-failed".to_string());
464 }
465 }
466 }
467
468 let mut conn = open_rw(&paths.db)?;
469 ensure_schema(&mut conn)?;
470
471 if args.dry_run {
473 let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
474 let planned_action = if existing.is_some() && args.force_merge {
475 "would_update"
476 } else {
477 "would_create"
478 };
479 output::emit_json(&serde_json::json!({
480 "dry_run": true,
481 "name": normalized_name,
482 "namespace": namespace,
483 "planned_action": planned_action,
484 }))?;
485 return Ok(());
486 }
487
488 {
489 use crate::constants::MAX_NAMESPACES_ACTIVE;
490 let active_count: u32 = conn.query_row(
491 "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
492 [],
493 |r| r.get::<_, i64>(0).map(|v| v as u32),
494 )?;
495 let ns_exists: bool = conn.query_row(
496 "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
497 rusqlite::params![namespace],
498 |r| r.get::<_, i64>(0).map(|v| v > 0),
499 )?;
500 if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
501 return Err(AppError::NamespaceError(format!(
502 "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
503 )));
504 }
505 }
506
507 if let Some((sd_id, true)) =
509 memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
510 {
511 if args.force_merge {
512 memories::clear_deleted_at(&conn, sd_id)?;
513 } else {
514 return Err(AppError::Duplicate(
515 errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
516 ));
517 }
518 }
519
520 let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
521 if existing_memory.is_some() && !args.force_merge {
522 return Err(AppError::Duplicate(errors_msg::duplicate_memory(
523 &normalized_name,
524 &namespace,
525 )));
526 }
527
528 let (resolved_type, resolved_description) = if existing_memory.is_none() {
532 let t = args.r#type.ok_or_else(|| {
534 AppError::Validation(
535 "--type and --description are required when creating a new memory".to_string(),
536 )
537 })?;
538 let d = args.description.clone().ok_or_else(|| {
539 AppError::Validation(
540 "--type and --description are required when creating a new memory".to_string(),
541 )
542 })?;
543 (t.as_str().to_string(), d)
544 } else {
545 let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
547 .ok_or_else(|| {
548 AppError::NotFound(format!(
549 "memory '{normalized_name}' not found in namespace '{namespace}'"
550 ))
551 })?;
552 let t = args
553 .r#type
554 .map(|v| v.as_str().to_string())
555 .unwrap_or_else(|| existing_row.memory_type.clone());
556 let d = args
557 .description
558 .clone()
559 .unwrap_or_else(|| existing_row.description.clone());
560 (t, d)
561 };
562
563 if body_will_be_preserved {
568 if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
569 if !existing_row.body.is_empty() {
570 tracing::debug!(target: "remember",
571 name = %normalized_name,
572 "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
573 );
574 raw_body = existing_row.body;
575 body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
576 snippet = raw_body.chars().take(200).collect();
577 }
578 }
579 }
580
581 let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
582
583 output::emit_progress_i18n(
584 &format!(
585 "Remember stage: validated input; available memory {} MB",
586 crate::memory_guard::available_memory_mb()
587 ),
588 &format!(
589 "Stage remember: input validated; available memory {} MB",
590 crate::memory_guard::available_memory_mb()
591 ),
592 );
593
594 let model_max_length = crate::tokenizer::get_model_max_length();
595 let total_passage_tokens = crate::tokenizer::count_passage_tokens(&raw_body)?;
596 let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body);
597 let chunks_created = chunks_info.len();
598 let chunks_persisted = compute_chunks_persisted(chunks_info.len());
602
603 output::emit_progress_i18n(
604 &format!(
605 "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
606 chunks_created,
607 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
608 ),
609 &format!(
610 "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
611 chunks_created,
612 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
613 ),
614 );
615
616 if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
617 return Err(AppError::LimitExceeded(format!(
618 "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
619 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
620 )));
621 }
622
623 output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
624 let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
625
626 let embedding = if chunks_info.len() == 1 {
627 crate::embedder::embed_passage_local(&paths.models, &raw_body)?
628 } else {
629 let chunk_texts: Vec<&str> = chunks_info
630 .iter()
631 .map(|c| chunking::chunk_text(&raw_body, c))
632 .collect();
633 output::emit_progress_i18n(
634 &format!(
635 "Embedding {} chunks serially to keep memory bounded...",
636 chunks_info.len()
637 ),
638 &format!(
639 "Embedding {} chunks serially to keep memory bounded...",
640 chunks_info.len()
641 ),
642 );
643 let embed_cap = chunk_texts.len();
644 let mut chunk_embeddings = Vec::new();
645 chunk_embeddings.try_reserve(embed_cap).map_err(|_| {
646 AppError::LimitExceeded(format!(
647 "allocation of {embed_cap} chunk embeddings would exceed available memory"
648 ))
649 })?;
650 for chunk_text in &chunk_texts {
651 if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
652 if rss > args.max_rss_mb {
653 tracing::error!(target: "remember",
654 rss_mb = rss,
655 max_rss_mb = args.max_rss_mb,
656 "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
657 );
658 return Err(AppError::LowMemory {
659 available_mb: crate::memory_guard::available_memory_mb(),
660 required_mb: args.max_rss_mb,
661 });
662 }
663 }
664 chunk_embeddings.push(crate::embedder::embed_passage_local(
665 &paths.models,
666 chunk_text,
667 )?);
668 }
669 output::emit_progress_i18n(
670 &format!(
671 "Remember stage: chunk embeddings complete; process RSS {} MB",
672 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
673 ),
674 &format!(
675 "Stage remember: chunk embeddings completed; process RSS {} MB",
676 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
677 ),
678 );
679 let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
680 chunk_embeddings_cache = Some(chunk_embeddings);
681 aggregated
682 };
683 let body_for_storage = raw_body;
684
685 let memory_type = resolved_type.as_str();
686 let new_memory = NewMemory {
687 namespace: namespace.clone(),
688 name: normalized_name.clone(),
689 memory_type: memory_type.to_string(),
690 description: resolved_description.clone(),
691 body: body_for_storage,
692 body_hash: body_hash.clone(),
693 session_id: args.session_id.clone(),
694 source: "agent".to_string(),
695 metadata,
696 };
697
698 let mut warnings = Vec::with_capacity(4);
699 let mut entities_persisted = 0usize;
700 let mut relationships_persisted = 0usize;
701
702 let graph_entity_embeddings = graph
703 .entities
704 .iter()
705 .map(|entity| {
706 let entity_text = match &entity.description {
707 Some(desc) => format!("{} {}", entity.name, desc),
708 None => entity.name.clone(),
709 };
710 crate::embedder::embed_passage_local(&paths.models, &entity_text)
711 })
712 .collect::<Result<Vec<_>, _>>()?;
713
714 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
715
716 let mut skip_reindex = false;
717 let (memory_id, action, version) = match existing_memory {
718 Some((existing_id, _updated_at, _current_version)) => {
719 if let Some(hash_id) = duplicate_hash_id {
720 if hash_id != existing_id {
721 warnings.push(format!(
722 "identical body already exists as memory id {hash_id}"
723 ));
724 }
725 }
726
727 let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
729 .query_row(
730 "SELECT name, description, body FROM memories WHERE id = ?1",
731 rusqlite::params![existing_id],
732 |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
733 )?;
734
735 let existing_body_hash: Option<String> = tx
737 .query_row(
738 "SELECT body_hash FROM memories WHERE id = ?1",
739 rusqlite::params![existing_id],
740 |r| r.get(0),
741 )
742 .ok();
743 let body_unchanged = existing_body_hash.as_deref() == Some(&body_hash);
744 skip_reindex = body_unchanged;
745 if !body_unchanged {
746 storage_chunks::delete_chunks(&tx, existing_id)?;
747 }
748
749 let next_v = versions::next_version(&tx, existing_id)?;
750 memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
751
752 memories::sync_fts_after_update(
755 &tx,
756 existing_id,
757 &old_fts_name,
758 &old_fts_desc,
759 &old_fts_body,
760 &normalized_name,
761 &resolved_description,
762 &new_memory.body,
763 )?;
764
765 versions::insert_version(
766 &tx,
767 existing_id,
768 next_v,
769 &normalized_name,
770 memory_type,
771 &resolved_description,
772 &new_memory.body,
773 &serde_json::to_string(&new_memory.metadata)?,
774 None,
775 "edit",
776 )?;
777 if !body_unchanged {
778 memories::upsert_vec(
779 &tx,
780 existing_id,
781 &namespace,
782 memory_type,
783 &embedding,
784 &normalized_name,
785 &snippet,
786 )?;
787 }
788 (existing_id, "updated".to_string(), next_v)
789 }
790 None => {
791 if let Some(hash_id) = duplicate_hash_id {
792 warnings.push(format!(
793 "identical body already exists as memory id {hash_id}"
794 ));
795 }
796 let id = memories::insert(&tx, &new_memory)?;
797 versions::insert_version(
798 &tx,
799 id,
800 1,
801 &normalized_name,
802 memory_type,
803 &resolved_description,
804 &new_memory.body,
805 &serde_json::to_string(&new_memory.metadata)?,
806 None,
807 "create",
808 )?;
809 memories::upsert_vec(
810 &tx,
811 id,
812 &namespace,
813 memory_type,
814 &embedding,
815 &normalized_name,
816 &snippet,
817 )?;
818 (id, "created".to_string(), 1)
819 }
820 };
821
822 if chunks_info.len() > 1 && !skip_reindex {
823 storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
824
825 let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
826 AppError::Internal(anyhow::anyhow!(
827 "chunk embeddings cache missing in multi-chunk remember path"
828 ))
829 })?;
830
831 for (i, emb) in chunk_embeddings.iter().enumerate() {
832 storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
833 }
834 output::emit_progress_i18n(
835 &format!(
836 "Remember stage: persisted chunk vectors; process RSS {} MB",
837 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
838 ),
839 &format!(
840 "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
841 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
842 ),
843 );
844 }
845
846 if !graph.entities.is_empty() || !graph.relationships.is_empty() {
847 for entity in &graph.entities {
848 let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
849 let entity_embedding = &graph_entity_embeddings[entities_persisted];
850 entities::upsert_entity_vec(
851 &tx,
852 entity_id,
853 &namespace,
854 entity.entity_type,
855 entity_embedding,
856 &entity.name,
857 )?;
858 entities::link_memory_entity(&tx, memory_id, entity_id)?;
859 entities::increment_degree(&tx, entity_id)?;
860 if args.max_entity_degree > 0 {
862 let cap = args.max_entity_degree as i64;
863 let degree: i64 = tx.query_row(
864 "SELECT degree FROM entities WHERE id = ?1",
865 rusqlite::params![entity_id],
866 |r| r.get(0),
867 )?;
868 if degree > cap {
869 tracing::warn!(target: "remember",
870 entity = %entity.name,
871 degree = degree,
872 cap = cap,
873 "entity degree cap exceeded"
874 );
875 }
876 }
877 entities_persisted += 1;
878 }
879 let entity_types: std::collections::HashMap<&str, EntityType> = graph
880 .entities
881 .iter()
882 .map(|entity| (entity.name.as_str(), entity.entity_type))
883 .collect();
884
885 for rel in &graph.relationships {
886 let source_entity = NewEntity {
887 name: rel.source.clone(),
888 entity_type: entity_types
889 .get(rel.source.as_str())
890 .copied()
891 .unwrap_or(EntityType::Concept),
892 description: None,
893 };
894 let target_entity = NewEntity {
895 name: rel.target.clone(),
896 entity_type: entity_types
897 .get(rel.target.as_str())
898 .copied()
899 .unwrap_or(EntityType::Concept),
900 description: None,
901 };
902 let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
903 let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
904 let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
905 entities::link_memory_relationship(&tx, memory_id, rel_id)?;
906 relationships_persisted += 1;
907 }
908 }
909 tx.commit()?;
910
911 let urls_persisted = if !extracted_urls.is_empty() {
914 let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
915 .into_iter()
916 .map(|u| storage_urls::MemoryUrl {
917 url: u.url,
918 offset: Some(u.start as i64),
919 })
920 .collect();
921 storage_urls::insert_urls(&conn, memory_id, &url_entries)
922 } else {
923 0
924 };
925
926 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
927
928 let created_at_epoch = chrono::Utc::now().timestamp();
929 let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
930
931 output::emit_json(&RememberResponse {
932 memory_id,
933 name: normalized_name.clone(),
937 namespace,
938 action: action.clone(),
939 operation: action,
940 version,
941 entities_persisted,
942 relationships_persisted,
943 relationships_truncated,
944 chunks_created,
945 chunks_persisted,
946 urls_persisted,
947 extraction_method,
948 merged_into_memory_id: None,
949 warnings,
950 created_at: created_at_epoch,
951 created_at_iso,
952 elapsed_ms: inicio.elapsed().as_millis() as u64,
953 name_was_normalized,
954 original_name: name_was_normalized.then_some(original_name),
955 })?;
956
957 Ok(())
958}
959
960#[cfg(test)]
961mod tests {
962 use super::compute_chunks_persisted;
963 use crate::output::RememberResponse;
964
965 #[test]
967 fn chunks_persisted_zero_for_zero_chunks() {
968 assert_eq!(compute_chunks_persisted(0), 0);
969 }
970
971 #[test]
972 fn chunks_persisted_zero_for_single_chunk_body() {
973 assert_eq!(compute_chunks_persisted(1), 0);
976 }
977
978 #[test]
979 fn chunks_persisted_equals_count_for_multi_chunk_body() {
980 assert_eq!(compute_chunks_persisted(2), 2);
982 assert_eq!(compute_chunks_persisted(7), 7);
983 assert_eq!(compute_chunks_persisted(64), 64);
984 }
985
986 #[test]
987 fn remember_response_serializes_required_fields() {
988 let resp = RememberResponse {
989 memory_id: 42,
990 name: "minha-mem".to_string(),
991 namespace: "global".to_string(),
992 action: "created".to_string(),
993 operation: "created".to_string(),
994 version: 1,
995 entities_persisted: 0,
996 relationships_persisted: 0,
997 relationships_truncated: false,
998 chunks_created: 1,
999 chunks_persisted: 0,
1000 urls_persisted: 0,
1001 extraction_method: None,
1002 merged_into_memory_id: None,
1003 warnings: vec![],
1004 created_at: 1_705_320_000,
1005 created_at_iso: "2024-01-15T12:00:00Z".to_string(),
1006 elapsed_ms: 55,
1007 name_was_normalized: false,
1008 original_name: None,
1009 };
1010
1011 let json = serde_json::to_value(&resp).expect("serialization failed");
1012 assert_eq!(json["memory_id"], 42);
1013 assert_eq!(json["action"], "created");
1014 assert_eq!(json["operation"], "created");
1015 assert_eq!(json["version"], 1);
1016 assert_eq!(json["elapsed_ms"], 55u64);
1017 assert!(json["warnings"].is_array());
1018 assert!(json["merged_into_memory_id"].is_null());
1019 }
1020
1021 #[test]
1022 fn remember_response_action_e_operation_sao_aliases() {
1023 let resp = RememberResponse {
1024 memory_id: 1,
1025 name: "mem".to_string(),
1026 namespace: "global".to_string(),
1027 action: "updated".to_string(),
1028 operation: "updated".to_string(),
1029 version: 2,
1030 entities_persisted: 3,
1031 relationships_persisted: 1,
1032 relationships_truncated: false,
1033 extraction_method: None,
1034 chunks_created: 2,
1035 chunks_persisted: 2,
1036 urls_persisted: 0,
1037 merged_into_memory_id: None,
1038 warnings: vec![],
1039 created_at: 0,
1040 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1041 elapsed_ms: 0,
1042 name_was_normalized: false,
1043 original_name: None,
1044 };
1045
1046 let json = serde_json::to_value(&resp).expect("serialization failed");
1047 assert_eq!(
1048 json["action"], json["operation"],
1049 "action e operation devem ser iguais"
1050 );
1051 assert_eq!(json["entities_persisted"], 3);
1052 assert_eq!(json["relationships_persisted"], 1);
1053 assert_eq!(json["chunks_created"], 2);
1054 }
1055
1056 #[test]
1057 fn remember_response_warnings_lista_mensagens() {
1058 let resp = RememberResponse {
1059 memory_id: 5,
1060 name: "dup-mem".to_string(),
1061 namespace: "global".to_string(),
1062 action: "created".to_string(),
1063 operation: "created".to_string(),
1064 version: 1,
1065 entities_persisted: 0,
1066 extraction_method: None,
1067 relationships_persisted: 0,
1068 relationships_truncated: false,
1069 chunks_created: 1,
1070 chunks_persisted: 0,
1071 urls_persisted: 0,
1072 merged_into_memory_id: None,
1073 warnings: vec!["identical body already exists as memory id 3".to_string()],
1074 created_at: 0,
1075 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1076 elapsed_ms: 10,
1077 name_was_normalized: false,
1078 original_name: None,
1079 };
1080
1081 let json = serde_json::to_value(&resp).expect("serialization failed");
1082 let warnings = json["warnings"]
1083 .as_array()
1084 .expect("warnings deve ser array");
1085 assert_eq!(warnings.len(), 1);
1086 assert!(warnings[0].as_str().unwrap().contains("identical body"));
1087 }
1088
1089 #[test]
1090 fn invalid_name_reserved_prefix_returns_validation_error() {
1091 use crate::errors::AppError;
1092 let nome = "__reservado";
1094 let resultado: Result<(), AppError> = if nome.starts_with("__") {
1095 Err(AppError::Validation(
1096 crate::i18n::validation::reserved_name(),
1097 ))
1098 } else {
1099 Ok(())
1100 };
1101 assert!(resultado.is_err());
1102 if let Err(AppError::Validation(msg)) = resultado {
1103 assert!(!msg.is_empty());
1104 }
1105 }
1106
1107 #[test]
1108 fn name_too_long_returns_validation_error() {
1109 use crate::errors::AppError;
1110 let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1111 let resultado: Result<(), AppError> =
1112 if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1113 Err(AppError::Validation(crate::i18n::validation::name_length(
1114 crate::constants::MAX_MEMORY_NAME_LEN,
1115 )))
1116 } else {
1117 Ok(())
1118 };
1119 assert!(resultado.is_err());
1120 }
1121
1122 #[test]
1123 fn remember_response_merged_into_memory_id_some_serializes_integer() {
1124 let resp = RememberResponse {
1125 memory_id: 10,
1126 name: "mem-mergeada".to_string(),
1127 namespace: "global".to_string(),
1128 action: "updated".to_string(),
1129 operation: "updated".to_string(),
1130 version: 3,
1131 extraction_method: None,
1132 entities_persisted: 0,
1133 relationships_persisted: 0,
1134 relationships_truncated: false,
1135 chunks_created: 1,
1136 chunks_persisted: 0,
1137 urls_persisted: 0,
1138 merged_into_memory_id: Some(7),
1139 warnings: vec![],
1140 created_at: 0,
1141 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1142 elapsed_ms: 0,
1143 name_was_normalized: false,
1144 original_name: None,
1145 };
1146
1147 let json = serde_json::to_value(&resp).expect("serialization failed");
1148 assert_eq!(json["merged_into_memory_id"], 7);
1149 }
1150
1151 #[test]
1152 fn remember_response_urls_persisted_serializes_field() {
1153 let resp = RememberResponse {
1155 memory_id: 3,
1156 name: "mem-com-urls".to_string(),
1157 namespace: "global".to_string(),
1158 action: "created".to_string(),
1159 operation: "created".to_string(),
1160 version: 1,
1161 entities_persisted: 0,
1162 relationships_persisted: 0,
1163 relationships_truncated: false,
1164 chunks_created: 1,
1165 chunks_persisted: 0,
1166 urls_persisted: 3,
1167 extraction_method: Some("regex-only".to_string()),
1168 merged_into_memory_id: None,
1169 warnings: vec![],
1170 created_at: 0,
1171 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1172 elapsed_ms: 0,
1173 name_was_normalized: false,
1174 original_name: None,
1175 };
1176 let json = serde_json::to_value(&resp).expect("serialization failed");
1177 assert_eq!(json["urls_persisted"], 3);
1178 }
1179
1180 #[test]
1181 fn empty_name_after_normalization_returns_specific_message() {
1182 use crate::errors::AppError;
1185 let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1186 let normalized = normalized.trim_matches('-').to_string();
1187 let resultado: Result<(), AppError> = if normalized.is_empty() {
1188 Err(AppError::Validation(
1189 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1190 ))
1191 } else {
1192 Ok(())
1193 };
1194 assert!(resultado.is_err());
1195 if let Err(AppError::Validation(msg)) = resultado {
1196 assert!(
1197 msg.contains("empty after normalization"),
1198 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1199 );
1200 }
1201 }
1202
1203 #[test]
1204 fn name_only_underscores_after_normalization_returns_specific_message() {
1205 use crate::errors::AppError;
1207 let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1208 let normalized = normalized.trim_matches('-').to_string();
1209 assert!(
1210 normalized.is_empty(),
1211 "underscores devem normalizar para string vazia"
1212 );
1213 let resultado: Result<(), AppError> = if normalized.is_empty() {
1214 Err(AppError::Validation(
1215 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1216 ))
1217 } else {
1218 Ok(())
1219 };
1220 assert!(resultado.is_err());
1221 if let Err(AppError::Validation(msg)) = resultado {
1222 assert!(
1223 msg.contains("empty after normalization"),
1224 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1225 );
1226 }
1227 }
1228
1229 #[test]
1230 fn remember_response_relationships_truncated_serializes_field() {
1231 let resp_false = RememberResponse {
1233 memory_id: 1,
1234 name: "test".to_string(),
1235 namespace: "global".to_string(),
1236 action: "created".to_string(),
1237 operation: "created".to_string(),
1238 version: 1,
1239 entities_persisted: 2,
1240 relationships_persisted: 1,
1241 relationships_truncated: false,
1242 chunks_created: 1,
1243 chunks_persisted: 0,
1244 urls_persisted: 0,
1245 extraction_method: None,
1246 merged_into_memory_id: None,
1247 warnings: vec![],
1248 created_at: 0,
1249 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1250 elapsed_ms: 0,
1251 name_was_normalized: false,
1252 original_name: None,
1253 };
1254 let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1255 assert_eq!(json_false["relationships_truncated"], false);
1256
1257 let resp_true = RememberResponse {
1258 relationships_truncated: true,
1259 ..resp_false
1260 };
1261 let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1262 assert_eq!(json_true["relationships_truncated"], true);
1263 }
1264
1265 fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1274 force_merge && raw_body_is_empty && !clear_body
1275 }
1276
1277 #[test]
1278 fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1279 assert!(
1282 should_preserve_body(true, true, false),
1283 "empty body + force-merge + no clear-body should trigger preservation"
1284 );
1285 }
1286
1287 #[test]
1288 fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1289 assert!(
1291 !should_preserve_body(true, true, true),
1292 "--clear-body must bypass preservation"
1293 );
1294 }
1295
1296 #[test]
1297 fn gap08_non_empty_body_force_merge_does_not_preserve() {
1298 assert!(
1300 !should_preserve_body(true, false, false),
1301 "non-empty body must overwrite, not preserve"
1302 );
1303 }
1304
1305 #[test]
1306 fn gap08_empty_body_no_force_merge_does_not_preserve() {
1307 assert!(
1309 !should_preserve_body(false, true, false),
1310 "no --force-merge means no preservation logic applies"
1311 );
1312 }
1313}