1use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17fn compute_chunks_persisted(chunks_created: usize) -> usize {
26 if chunks_created > 1 {
27 chunks_created
28 } else {
29 0
30 }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n \
35 # Create a memory with inline body\n \
36 sqlite-graphrag remember --name design-auth --type decision \\\n \
37 --description \"auth design\" --body \"JWT for stateless auth\"\n\n \
38 # Create with curated graph via --graph-stdin\n \
39 echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n \
40 sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n \
41 # Enable GLiNER NER extraction with --graph-stdin\n \
42 echo '{\"body\":\"Alice from Microsoft...\",\"entities\":[],\"relationships\":[]}' | \\\n \
43 sqlite-graphrag remember --name ner-test --type note --description \"test\" \\\n \
44 --graph-stdin --enable-ner --gliner-variant int8\n\n \
45 # Idempotent upsert with --force-merge\n \
46 sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n \
47 --body \"new content\" --force-merge\n\n\
48NOTE:\n \
49 remember does NOT accept positional arguments.\n \
50 Use --body \"text\" for inline content\n \
51 Use --body-file path for file content\n \
52 Use --body-stdin for piped content\n \
53 Use --graph-stdin for JSON with entities and relationships\n\n\
54ENTITY TYPES (for --graph-stdin entities, NOT memory --type):\n \
55 concept, tool, person, file, project, decision, incident,\n \
56 organization, location, date, dashboard, issue_tracker, memory\n \
57 WARNING: reference, skill, document, note, user, feedback are\n \
58 MEMORY types only — NOT valid for entities.\n \
59 Mapping: reference→concept, document→file, user→person")]
60pub struct RememberArgs {
61 #[arg(long)]
64 pub name: String,
65 #[arg(
66 long,
67 value_enum,
68 long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
69 )]
70 pub r#type: Option<MemoryType>,
71 #[arg(long)]
74 pub description: Option<String>,
75 #[arg(
78 long,
79 help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
80 conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
81 )]
82 pub body: Option<String>,
83 #[arg(
84 long,
85 help = "Read body from a file instead of --body",
86 conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
87 )]
88 pub body_file: Option<std::path::PathBuf>,
89 #[arg(
92 long,
93 conflicts_with_all = ["body", "body_file", "graph_stdin"]
94 )]
95 pub body_stdin: bool,
96 #[arg(
97 long,
98 help = "JSON file containing entities to associate with this memory"
99 )]
100 pub entities_file: Option<std::path::PathBuf>,
101 #[arg(
102 long,
103 help = "JSON file containing relationships to associate with this memory"
104 )]
105 pub relationships_file: Option<std::path::PathBuf>,
106 #[arg(
107 long,
108 help = "Read graph JSON (body + entities + relationships) from stdin",
109 conflicts_with_all = [
110 "body",
111 "body_file",
112 "body_stdin",
113 "entities_file",
114 "relationships_file"
115 ]
116 )]
117 pub graph_stdin: bool,
118 #[arg(
119 long,
120 help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
121 )]
122 pub namespace: Option<String>,
123 #[arg(long)]
125 pub metadata: Option<String>,
126 #[arg(long, help = "JSON file containing metadata key-value pairs")]
127 pub metadata_file: Option<std::path::PathBuf>,
128 #[arg(long)]
129 pub force_merge: bool,
130 #[arg(
131 long,
132 value_name = "EPOCH_OR_RFC3339",
133 value_parser = crate::parsers::parse_expected_updated_at,
134 long_help = "Optimistic lock: reject if updated_at does not match. \
135Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
136 )]
137 pub expected_updated_at: Option<i64>,
138 #[arg(
139 long,
140 env = "SQLITE_GRAPHRAG_ENABLE_NER",
141 value_parser = crate::parsers::parse_bool_flexible,
142 action = clap::ArgAction::Set,
143 num_args = 0..=1,
144 default_missing_value = "true",
145 default_value = "false",
146 help = "Enable automatic GLiNER NER entity/relationship extraction from body"
147 )]
148 pub enable_ner: bool,
149 #[arg(
150 long,
151 env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
152 default_value = "fp32",
153 help = "GLiNER model variant: fp32 (1.1GB, best quality), fp16 (580MB), int8 (349MB, fastest but may miss entities on short texts), q4, q4f16"
154 )]
155 pub gliner_variant: String,
156 #[arg(long, hide = true)]
157 pub skip_extraction: bool,
158 #[arg(
162 long,
163 default_value_t = false,
164 help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
165 )]
166 pub clear_body: bool,
167 #[arg(
169 long,
170 default_value_t = false,
171 help = "Validate input and report planned actions without persisting"
172 )]
173 pub dry_run: bool,
174 #[arg(long)]
176 pub session_id: Option<String>,
177 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
178 pub format: JsonOutputFormat,
179 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
180 pub json: bool,
181 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
182 pub db: Option<String>,
183 #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
185 help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
186 pub max_rss_mb: u64,
187 #[arg(long, default_value_t = 50, value_name = "N")]
190 pub max_entity_degree: u32,
191}
192
193#[derive(Deserialize, Default)]
194#[serde(deny_unknown_fields)]
195struct GraphInput {
196 #[serde(default)]
197 body: Option<String>,
198 #[serde(default)]
199 entities: Vec<NewEntity>,
200 #[serde(default)]
201 relationships: Vec<NewRelationship>,
202}
203
204fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
205 for rel in &mut graph.relationships {
206 rel.relation = crate::parsers::normalize_relation(&rel.relation);
207 if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
208 return Err(AppError::Validation(format!(
209 "{e} for relationship '{}' -> '{}'",
210 rel.source, rel.target
211 )));
212 }
213 crate::parsers::warn_if_non_canonical(&rel.relation);
214 if !(0.0..=1.0).contains(&rel.strength) {
215 return Err(AppError::Validation(format!(
216 "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
217 rel.strength, rel.source, rel.target
218 )));
219 }
220 }
221
222 Ok(())
223}
224
225pub fn run(args: RememberArgs) -> Result<(), AppError> {
226 use crate::constants::*;
227
228 let inicio = std::time::Instant::now();
229 let _ = args.format;
230 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
231
232 let original_name = args.name.clone();
236
237 let normalized_name = {
241 let lower = args.name.to_lowercase().replace(['_', ' '], "-");
242 let trimmed = lower.trim_matches('-').to_string();
243 if trimmed != args.name {
244 tracing::warn!(
245 original = %args.name,
246 normalized = %trimmed,
247 "name auto-normalized to kebab-case"
248 );
249 }
250 trimmed
251 };
252 let name_was_normalized = normalized_name != original_name;
253
254 if normalized_name.is_empty() {
255 return Err(AppError::Validation(
256 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
257 ));
258 }
259 if normalized_name.len() > MAX_MEMORY_NAME_LEN {
260 return Err(AppError::LimitExceeded(
261 crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
262 ));
263 }
264
265 if normalized_name.starts_with("__") {
266 return Err(AppError::Validation(
267 crate::i18n::validation::reserved_name(),
268 ));
269 }
270
271 {
272 let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
273 .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
274 if !slug_re.is_match(&normalized_name) {
275 return Err(AppError::Validation(crate::i18n::validation::name_kebab(
276 &normalized_name,
277 )));
278 }
279 }
280
281 if let Some(ref desc) = args.description {
282 if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
283 return Err(AppError::Validation(
284 crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
285 ));
286 }
287 }
288
289 let mut raw_body = if let Some(b) = args.body {
290 b
291 } else if let Some(ref path) = args.body_file {
292 match std::fs::read_to_string(path) {
293 Ok(s) => s,
294 Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
295 let bytes = std::fs::read(path).map_err(AppError::Io)?;
296 tracing::warn!("body file contains invalid UTF-8; replacing invalid sequences");
297 String::from_utf8_lossy(&bytes).into_owned()
298 }
299 Err(e) => return Err(AppError::Io(e)),
300 }
301 } else if args.body_stdin || args.graph_stdin {
302 crate::stdin_helper::read_stdin_with_timeout(60)?
303 } else {
304 String::new()
305 };
306
307 let mut entities_provided_externally =
308 args.entities_file.is_some() || args.relationships_file.is_some();
309
310 let mut graph = GraphInput::default();
311 if let Some(path) = args.entities_file {
312 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
313 graph.entities = serde_json::from_str(&content)?;
314 }
315 if let Some(path) = args.relationships_file {
316 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
317 graph.relationships = serde_json::from_str(&content)?;
318 }
319 if args.graph_stdin {
320 graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
321 AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
322 })?;
323 raw_body = graph.body.take().unwrap_or_default();
324 }
325 if args.graph_stdin && !graph.entities.is_empty() {
326 entities_provided_externally = true;
327 }
328
329 if graph.entities.len() > max_entities_per_memory() {
330 return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
331 max_entities_per_memory(),
332 )));
333 }
334 let mut relationships_truncated = false;
335 let rel_cap = max_relationships_per_memory();
336 if graph.relationships.len() > rel_cap {
337 tracing::warn!(
338 count = graph.relationships.len(),
339 cap = rel_cap,
340 "truncating relationships to cap"
341 );
342 graph.relationships.truncate(rel_cap);
343 relationships_truncated = true;
344 }
345 normalize_and_validate_graph_input(&mut graph)?;
346
347 if raw_body.len() > MAX_MEMORY_BODY_LEN {
348 return Err(AppError::LimitExceeded(
349 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
350 ));
351 }
352
353 let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
358 if !entities_provided_externally
359 && graph.entities.is_empty()
360 && raw_body.trim().is_empty()
361 && !body_will_be_preserved
362 && !args.clear_body
363 {
364 return Err(AppError::Validation(crate::i18n::validation::empty_body()));
365 }
366
367 let metadata: serde_json::Value = if let Some(m) = args.metadata {
368 serde_json::from_str(&m)?
369 } else if let Some(path) = args.metadata_file {
370 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
371 serde_json::from_str(&content)?
372 } else {
373 serde_json::json!({})
374 };
375
376 let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
377 let mut snippet: String = raw_body.chars().take(200).collect();
378
379 let paths = AppPaths::resolve(args.db.as_deref())?;
380 paths.ensure_dirs()?;
381
382 let mut extraction_method: Option<String> = None;
384 let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
385 if args.enable_ner && args.skip_extraction {
386 tracing::warn!(
387 "--enable-ner and --skip-extraction are contradictory; --enable-ner takes precedence"
388 );
389 }
390 if args.skip_extraction && !args.enable_ner {
391 tracing::warn!("--skip-extraction is deprecated and has no effect (NER is disabled by default since v1.0.45); remove this flag");
392 }
393 let gliner_variant: crate::extraction::GlinerVariant =
394 args.gliner_variant.parse().unwrap_or_else(|e| {
395 tracing::warn!("invalid --gliner-variant: {e}; using fp32");
396 crate::extraction::GlinerVariant::Fp32
397 });
398 if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
399 match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
400 Ok(extracted) => {
401 extraction_method = Some(extracted.extraction_method.clone());
402 extracted_urls = extracted.urls;
403 graph.entities = extracted.entities;
404 graph.relationships = extracted.relationships;
405 relationships_truncated = extracted.relationships_truncated;
406
407 if graph.entities.len() > max_entities_per_memory() {
408 graph.entities.truncate(max_entities_per_memory());
409 }
410 if graph.relationships.len() > max_relationships_per_memory() {
411 relationships_truncated = true;
412 graph.relationships.truncate(max_relationships_per_memory());
413 }
414 normalize_and_validate_graph_input(&mut graph)?;
415 }
416 Err(e) => {
417 tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
418 extraction_method = Some("none:extraction-failed".to_string());
419 }
420 }
421 }
422
423 let mut conn = open_rw(&paths.db)?;
424 ensure_schema(&mut conn)?;
425
426 if args.dry_run {
428 let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
429 let planned_action = if existing.is_some() && args.force_merge {
430 "would_update"
431 } else {
432 "would_create"
433 };
434 output::emit_json(&serde_json::json!({
435 "dry_run": true,
436 "name": normalized_name,
437 "namespace": namespace,
438 "planned_action": planned_action,
439 }))?;
440 return Ok(());
441 }
442
443 {
444 use crate::constants::MAX_NAMESPACES_ACTIVE;
445 let active_count: u32 = conn.query_row(
446 "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
447 [],
448 |r| r.get::<_, i64>(0).map(|v| v as u32),
449 )?;
450 let ns_exists: bool = conn.query_row(
451 "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
452 rusqlite::params![namespace],
453 |r| r.get::<_, i64>(0).map(|v| v > 0),
454 )?;
455 if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
456 return Err(AppError::NamespaceError(format!(
457 "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
458 )));
459 }
460 }
461
462 if let Some((sd_id, true)) =
464 memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
465 {
466 if args.force_merge {
467 memories::clear_deleted_at(&conn, sd_id)?;
468 } else {
469 return Err(AppError::Duplicate(
470 errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
471 ));
472 }
473 }
474
475 let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
476 if existing_memory.is_some() && !args.force_merge {
477 return Err(AppError::Duplicate(errors_msg::duplicate_memory(
478 &normalized_name,
479 &namespace,
480 )));
481 }
482
483 let (resolved_type, resolved_description) = if existing_memory.is_none() {
487 let t = args.r#type.ok_or_else(|| {
489 AppError::Validation(
490 "--type and --description are required when creating a new memory".to_string(),
491 )
492 })?;
493 let d = args.description.clone().ok_or_else(|| {
494 AppError::Validation(
495 "--type and --description are required when creating a new memory".to_string(),
496 )
497 })?;
498 (t.as_str().to_string(), d)
499 } else {
500 let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
502 .ok_or_else(|| {
503 AppError::NotFound(format!(
504 "memory '{normalized_name}' not found in namespace '{namespace}'"
505 ))
506 })?;
507 let t = args
508 .r#type
509 .map(|v| v.as_str().to_string())
510 .unwrap_or_else(|| existing_row.memory_type.clone());
511 let d = args
512 .description
513 .clone()
514 .unwrap_or_else(|| existing_row.description.clone());
515 (t, d)
516 };
517
518 if body_will_be_preserved {
523 if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
524 if !existing_row.body.is_empty() {
525 tracing::debug!(
526 name = %normalized_name,
527 "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
528 );
529 raw_body = existing_row.body;
530 body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
531 snippet = raw_body.chars().take(200).collect();
532 }
533 }
534 }
535
536 let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
537
538 output::emit_progress_i18n(
539 &format!(
540 "Remember stage: validated input; available memory {} MB",
541 crate::memory_guard::available_memory_mb()
542 ),
543 &format!(
544 "Stage remember: input validated; available memory {} MB",
545 crate::memory_guard::available_memory_mb()
546 ),
547 );
548
549 let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
550 let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
551 let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
552 let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
553 let chunks_created = chunks_info.len();
554 let chunks_persisted = compute_chunks_persisted(chunks_info.len());
558
559 output::emit_progress_i18n(
560 &format!(
561 "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
562 chunks_created,
563 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
564 ),
565 &format!(
566 "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
567 chunks_created,
568 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
569 ),
570 );
571
572 if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
573 return Err(AppError::LimitExceeded(format!(
574 "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
575 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
576 )));
577 }
578
579 output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
580 let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
581
582 let embedding = if chunks_info.len() == 1 {
583 crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
584 } else {
585 let chunk_texts: Vec<&str> = chunks_info
586 .iter()
587 .map(|c| chunking::chunk_text(&raw_body, c))
588 .collect();
589 output::emit_progress_i18n(
590 &format!(
591 "Embedding {} chunks serially to keep memory bounded...",
592 chunks_info.len()
593 ),
594 &format!(
595 "Embedding {} chunks serially to keep memory bounded...",
596 chunks_info.len()
597 ),
598 );
599 let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
600 for chunk_text in &chunk_texts {
601 if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
602 if rss > args.max_rss_mb {
603 tracing::error!(
604 rss_mb = rss,
605 max_rss_mb = args.max_rss_mb,
606 "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
607 );
608 return Err(AppError::LowMemory {
609 available_mb: crate::memory_guard::available_memory_mb(),
610 required_mb: args.max_rss_mb,
611 });
612 }
613 }
614 chunk_embeddings.push(crate::daemon::embed_passage_or_local(
615 &paths.models,
616 chunk_text,
617 )?);
618 }
619 output::emit_progress_i18n(
620 &format!(
621 "Remember stage: chunk embeddings complete; process RSS {} MB",
622 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
623 ),
624 &format!(
625 "Stage remember: chunk embeddings completed; process RSS {} MB",
626 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
627 ),
628 );
629 let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
630 chunk_embeddings_cache = Some(chunk_embeddings);
631 aggregated
632 };
633 let body_for_storage = raw_body;
634
635 let memory_type = resolved_type.as_str();
636 let new_memory = NewMemory {
637 namespace: namespace.clone(),
638 name: normalized_name.clone(),
639 memory_type: memory_type.to_string(),
640 description: resolved_description.clone(),
641 body: body_for_storage,
642 body_hash: body_hash.clone(),
643 session_id: args.session_id.clone(),
644 source: "agent".to_string(),
645 metadata,
646 };
647
648 let mut warnings = Vec::with_capacity(4);
649 let mut entities_persisted = 0usize;
650 let mut relationships_persisted = 0usize;
651
652 let graph_entity_embeddings = graph
653 .entities
654 .iter()
655 .map(|entity| {
656 let entity_text = match &entity.description {
657 Some(desc) => format!("{} {}", entity.name, desc),
658 None => entity.name.clone(),
659 };
660 crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
661 })
662 .collect::<Result<Vec<_>, _>>()?;
663
664 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
665
666 let (memory_id, action, version) = match existing_memory {
667 Some((existing_id, _updated_at, _current_version)) => {
668 if let Some(hash_id) = duplicate_hash_id {
669 if hash_id != existing_id {
670 warnings.push(format!(
671 "identical body already exists as memory id {hash_id}"
672 ));
673 }
674 }
675
676 let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
678 .query_row(
679 "SELECT name, description, body FROM memories WHERE id = ?1",
680 rusqlite::params![existing_id],
681 |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
682 )?;
683
684 storage_chunks::delete_chunks(&tx, existing_id)?;
685
686 let next_v = versions::next_version(&tx, existing_id)?;
687 memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
688
689 memories::sync_fts_after_update(
692 &tx,
693 existing_id,
694 &old_fts_name,
695 &old_fts_desc,
696 &old_fts_body,
697 &normalized_name,
698 &resolved_description,
699 &new_memory.body,
700 )?;
701
702 versions::insert_version(
703 &tx,
704 existing_id,
705 next_v,
706 &normalized_name,
707 memory_type,
708 &resolved_description,
709 &new_memory.body,
710 &serde_json::to_string(&new_memory.metadata)?,
711 None,
712 "edit",
713 )?;
714 memories::upsert_vec(
715 &tx,
716 existing_id,
717 &namespace,
718 memory_type,
719 &embedding,
720 &normalized_name,
721 &snippet,
722 )?;
723 (existing_id, "updated".to_string(), next_v)
724 }
725 None => {
726 if let Some(hash_id) = duplicate_hash_id {
727 warnings.push(format!(
728 "identical body already exists as memory id {hash_id}"
729 ));
730 }
731 let id = memories::insert(&tx, &new_memory)?;
732 versions::insert_version(
733 &tx,
734 id,
735 1,
736 &normalized_name,
737 memory_type,
738 &resolved_description,
739 &new_memory.body,
740 &serde_json::to_string(&new_memory.metadata)?,
741 None,
742 "create",
743 )?;
744 memories::upsert_vec(
745 &tx,
746 id,
747 &namespace,
748 memory_type,
749 &embedding,
750 &normalized_name,
751 &snippet,
752 )?;
753 (id, "created".to_string(), 1)
754 }
755 };
756
757 if chunks_info.len() > 1 {
758 storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
759
760 let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
761 AppError::Internal(anyhow::anyhow!(
762 "chunk embeddings cache missing in multi-chunk remember path"
763 ))
764 })?;
765
766 for (i, emb) in chunk_embeddings.iter().enumerate() {
767 storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
768 }
769 output::emit_progress_i18n(
770 &format!(
771 "Remember stage: persisted chunk vectors; process RSS {} MB",
772 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
773 ),
774 &format!(
775 "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
776 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
777 ),
778 );
779 }
780
781 if !graph.entities.is_empty() || !graph.relationships.is_empty() {
782 for entity in &graph.entities {
783 let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
784 let entity_embedding = &graph_entity_embeddings[entities_persisted];
785 entities::upsert_entity_vec(
786 &tx,
787 entity_id,
788 &namespace,
789 entity.entity_type,
790 entity_embedding,
791 &entity.name,
792 )?;
793 entities::link_memory_entity(&tx, memory_id, entity_id)?;
794 entities::increment_degree(&tx, entity_id)?;
795 if args.max_entity_degree > 0 {
797 let cap = args.max_entity_degree as i64;
798 let degree: i64 = tx.query_row(
799 "SELECT degree FROM entities WHERE id = ?1",
800 rusqlite::params![entity_id],
801 |r| r.get(0),
802 )?;
803 if degree > cap {
804 tracing::warn!(
805 entity = %entity.name,
806 degree = degree,
807 cap = cap,
808 "entity degree cap exceeded"
809 );
810 }
811 }
812 entities_persisted += 1;
813 }
814 let entity_types: std::collections::HashMap<&str, EntityType> = graph
815 .entities
816 .iter()
817 .map(|entity| (entity.name.as_str(), entity.entity_type))
818 .collect();
819
820 for rel in &graph.relationships {
821 let source_entity = NewEntity {
822 name: rel.source.clone(),
823 entity_type: entity_types
824 .get(rel.source.as_str())
825 .copied()
826 .unwrap_or(EntityType::Concept),
827 description: None,
828 };
829 let target_entity = NewEntity {
830 name: rel.target.clone(),
831 entity_type: entity_types
832 .get(rel.target.as_str())
833 .copied()
834 .unwrap_or(EntityType::Concept),
835 description: None,
836 };
837 let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
838 let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
839 let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
840 entities::link_memory_relationship(&tx, memory_id, rel_id)?;
841 relationships_persisted += 1;
842 }
843 }
844 tx.commit()?;
845
846 let urls_persisted = if !extracted_urls.is_empty() {
849 let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
850 .into_iter()
851 .map(|u| storage_urls::MemoryUrl {
852 url: u.url,
853 offset: Some(u.offset as i64),
854 })
855 .collect();
856 storage_urls::insert_urls(&conn, memory_id, &url_entries)
857 } else {
858 0
859 };
860
861 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
862
863 let created_at_epoch = chrono::Utc::now().timestamp();
864 let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
865
866 output::emit_json(&RememberResponse {
867 memory_id,
868 name: normalized_name.clone(),
872 namespace,
873 action: action.clone(),
874 operation: action,
875 version,
876 entities_persisted,
877 relationships_persisted,
878 relationships_truncated,
879 chunks_created,
880 chunks_persisted,
881 urls_persisted,
882 extraction_method,
883 merged_into_memory_id: None,
884 warnings,
885 created_at: created_at_epoch,
886 created_at_iso,
887 elapsed_ms: inicio.elapsed().as_millis() as u64,
888 name_was_normalized,
889 original_name: name_was_normalized.then_some(original_name),
890 })?;
891
892 Ok(())
893}
894
895#[cfg(test)]
896mod tests {
897 use super::compute_chunks_persisted;
898 use crate::output::RememberResponse;
899
900 #[test]
902 fn chunks_persisted_zero_for_zero_chunks() {
903 assert_eq!(compute_chunks_persisted(0), 0);
904 }
905
906 #[test]
907 fn chunks_persisted_zero_for_single_chunk_body() {
908 assert_eq!(compute_chunks_persisted(1), 0);
911 }
912
913 #[test]
914 fn chunks_persisted_equals_count_for_multi_chunk_body() {
915 assert_eq!(compute_chunks_persisted(2), 2);
917 assert_eq!(compute_chunks_persisted(7), 7);
918 assert_eq!(compute_chunks_persisted(64), 64);
919 }
920
921 #[test]
922 fn remember_response_serializes_required_fields() {
923 let resp = RememberResponse {
924 memory_id: 42,
925 name: "minha-mem".to_string(),
926 namespace: "global".to_string(),
927 action: "created".to_string(),
928 operation: "created".to_string(),
929 version: 1,
930 entities_persisted: 0,
931 relationships_persisted: 0,
932 relationships_truncated: false,
933 chunks_created: 1,
934 chunks_persisted: 0,
935 urls_persisted: 0,
936 extraction_method: None,
937 merged_into_memory_id: None,
938 warnings: vec![],
939 created_at: 1_705_320_000,
940 created_at_iso: "2024-01-15T12:00:00Z".to_string(),
941 elapsed_ms: 55,
942 name_was_normalized: false,
943 original_name: None,
944 };
945
946 let json = serde_json::to_value(&resp).expect("serialization failed");
947 assert_eq!(json["memory_id"], 42);
948 assert_eq!(json["action"], "created");
949 assert_eq!(json["operation"], "created");
950 assert_eq!(json["version"], 1);
951 assert_eq!(json["elapsed_ms"], 55u64);
952 assert!(json["warnings"].is_array());
953 assert!(json["merged_into_memory_id"].is_null());
954 }
955
956 #[test]
957 fn remember_response_action_e_operation_sao_aliases() {
958 let resp = RememberResponse {
959 memory_id: 1,
960 name: "mem".to_string(),
961 namespace: "global".to_string(),
962 action: "updated".to_string(),
963 operation: "updated".to_string(),
964 version: 2,
965 entities_persisted: 3,
966 relationships_persisted: 1,
967 relationships_truncated: false,
968 extraction_method: None,
969 chunks_created: 2,
970 chunks_persisted: 2,
971 urls_persisted: 0,
972 merged_into_memory_id: None,
973 warnings: vec![],
974 created_at: 0,
975 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
976 elapsed_ms: 0,
977 name_was_normalized: false,
978 original_name: None,
979 };
980
981 let json = serde_json::to_value(&resp).expect("serialization failed");
982 assert_eq!(
983 json["action"], json["operation"],
984 "action e operation devem ser iguais"
985 );
986 assert_eq!(json["entities_persisted"], 3);
987 assert_eq!(json["relationships_persisted"], 1);
988 assert_eq!(json["chunks_created"], 2);
989 }
990
991 #[test]
992 fn remember_response_warnings_lista_mensagens() {
993 let resp = RememberResponse {
994 memory_id: 5,
995 name: "dup-mem".to_string(),
996 namespace: "global".to_string(),
997 action: "created".to_string(),
998 operation: "created".to_string(),
999 version: 1,
1000 entities_persisted: 0,
1001 extraction_method: None,
1002 relationships_persisted: 0,
1003 relationships_truncated: false,
1004 chunks_created: 1,
1005 chunks_persisted: 0,
1006 urls_persisted: 0,
1007 merged_into_memory_id: None,
1008 warnings: vec!["identical body already exists as memory id 3".to_string()],
1009 created_at: 0,
1010 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1011 elapsed_ms: 10,
1012 name_was_normalized: false,
1013 original_name: None,
1014 };
1015
1016 let json = serde_json::to_value(&resp).expect("serialization failed");
1017 let warnings = json["warnings"]
1018 .as_array()
1019 .expect("warnings deve ser array");
1020 assert_eq!(warnings.len(), 1);
1021 assert!(warnings[0].as_str().unwrap().contains("identical body"));
1022 }
1023
1024 #[test]
1025 fn invalid_name_reserved_prefix_returns_validation_error() {
1026 use crate::errors::AppError;
1027 let nome = "__reservado";
1029 let resultado: Result<(), AppError> = if nome.starts_with("__") {
1030 Err(AppError::Validation(
1031 crate::i18n::validation::reserved_name(),
1032 ))
1033 } else {
1034 Ok(())
1035 };
1036 assert!(resultado.is_err());
1037 if let Err(AppError::Validation(msg)) = resultado {
1038 assert!(!msg.is_empty());
1039 }
1040 }
1041
1042 #[test]
1043 fn name_too_long_returns_validation_error() {
1044 use crate::errors::AppError;
1045 let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1046 let resultado: Result<(), AppError> =
1047 if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1048 Err(AppError::Validation(crate::i18n::validation::name_length(
1049 crate::constants::MAX_MEMORY_NAME_LEN,
1050 )))
1051 } else {
1052 Ok(())
1053 };
1054 assert!(resultado.is_err());
1055 }
1056
1057 #[test]
1058 fn remember_response_merged_into_memory_id_some_serializes_integer() {
1059 let resp = RememberResponse {
1060 memory_id: 10,
1061 name: "mem-mergeada".to_string(),
1062 namespace: "global".to_string(),
1063 action: "updated".to_string(),
1064 operation: "updated".to_string(),
1065 version: 3,
1066 extraction_method: None,
1067 entities_persisted: 0,
1068 relationships_persisted: 0,
1069 relationships_truncated: false,
1070 chunks_created: 1,
1071 chunks_persisted: 0,
1072 urls_persisted: 0,
1073 merged_into_memory_id: Some(7),
1074 warnings: vec![],
1075 created_at: 0,
1076 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1077 elapsed_ms: 0,
1078 name_was_normalized: false,
1079 original_name: None,
1080 };
1081
1082 let json = serde_json::to_value(&resp).expect("serialization failed");
1083 assert_eq!(json["merged_into_memory_id"], 7);
1084 }
1085
1086 #[test]
1087 fn remember_response_urls_persisted_serializes_field() {
1088 let resp = RememberResponse {
1090 memory_id: 3,
1091 name: "mem-com-urls".to_string(),
1092 namespace: "global".to_string(),
1093 action: "created".to_string(),
1094 operation: "created".to_string(),
1095 version: 1,
1096 entities_persisted: 0,
1097 relationships_persisted: 0,
1098 relationships_truncated: false,
1099 chunks_created: 1,
1100 chunks_persisted: 0,
1101 urls_persisted: 3,
1102 extraction_method: Some("regex-only".to_string()),
1103 merged_into_memory_id: None,
1104 warnings: vec![],
1105 created_at: 0,
1106 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1107 elapsed_ms: 0,
1108 name_was_normalized: false,
1109 original_name: None,
1110 };
1111 let json = serde_json::to_value(&resp).expect("serialization failed");
1112 assert_eq!(json["urls_persisted"], 3);
1113 }
1114
1115 #[test]
1116 fn empty_name_after_normalization_returns_specific_message() {
1117 use crate::errors::AppError;
1120 let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1121 let normalized = normalized.trim_matches('-').to_string();
1122 let resultado: Result<(), AppError> = if normalized.is_empty() {
1123 Err(AppError::Validation(
1124 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1125 ))
1126 } else {
1127 Ok(())
1128 };
1129 assert!(resultado.is_err());
1130 if let Err(AppError::Validation(msg)) = resultado {
1131 assert!(
1132 msg.contains("empty after normalization"),
1133 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1134 );
1135 }
1136 }
1137
1138 #[test]
1139 fn name_only_underscores_after_normalization_returns_specific_message() {
1140 use crate::errors::AppError;
1142 let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1143 let normalized = normalized.trim_matches('-').to_string();
1144 assert!(
1145 normalized.is_empty(),
1146 "underscores devem normalizar para string vazia"
1147 );
1148 let resultado: Result<(), AppError> = if normalized.is_empty() {
1149 Err(AppError::Validation(
1150 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1151 ))
1152 } else {
1153 Ok(())
1154 };
1155 assert!(resultado.is_err());
1156 if let Err(AppError::Validation(msg)) = resultado {
1157 assert!(
1158 msg.contains("empty after normalization"),
1159 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1160 );
1161 }
1162 }
1163
1164 #[test]
1165 fn remember_response_relationships_truncated_serializes_field() {
1166 let resp_false = RememberResponse {
1168 memory_id: 1,
1169 name: "test".to_string(),
1170 namespace: "global".to_string(),
1171 action: "created".to_string(),
1172 operation: "created".to_string(),
1173 version: 1,
1174 entities_persisted: 2,
1175 relationships_persisted: 1,
1176 relationships_truncated: false,
1177 chunks_created: 1,
1178 chunks_persisted: 0,
1179 urls_persisted: 0,
1180 extraction_method: None,
1181 merged_into_memory_id: None,
1182 warnings: vec![],
1183 created_at: 0,
1184 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1185 elapsed_ms: 0,
1186 name_was_normalized: false,
1187 original_name: None,
1188 };
1189 let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1190 assert_eq!(json_false["relationships_truncated"], false);
1191
1192 let resp_true = RememberResponse {
1193 relationships_truncated: true,
1194 ..resp_false
1195 };
1196 let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1197 assert_eq!(json_true["relationships_truncated"], true);
1198 }
1199
1200 fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1209 force_merge && raw_body_is_empty && !clear_body
1210 }
1211
1212 #[test]
1213 fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1214 assert!(
1217 should_preserve_body(true, true, false),
1218 "empty body + force-merge + no clear-body should trigger preservation"
1219 );
1220 }
1221
1222 #[test]
1223 fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1224 assert!(
1226 !should_preserve_body(true, true, true),
1227 "--clear-body must bypass preservation"
1228 );
1229 }
1230
1231 #[test]
1232 fn gap08_non_empty_body_force_merge_does_not_preserve() {
1233 assert!(
1235 !should_preserve_body(true, false, false),
1236 "non-empty body must overwrite, not preserve"
1237 );
1238 }
1239
1240 #[test]
1241 fn gap08_empty_body_no_force_merge_does_not_preserve() {
1242 assert!(
1244 !should_preserve_body(false, true, false),
1245 "no --force-merge means no preservation logic applies"
1246 );
1247 }
1248}