1use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17fn compute_chunks_persisted(chunks_created: usize) -> usize {
26 if chunks_created > 1 {
27 chunks_created
28 } else {
29 0
30 }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n \
35 # Create a memory with inline body\n \
36 sqlite-graphrag remember --name design-auth --type decision \\\n \
37 --description \"auth design\" --body \"JWT for stateless auth\"\n\n \
38 # Create with curated graph via --graph-stdin\n \
39 echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n \
40 sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n \
41 # Enable GLiNER NER extraction with --graph-stdin\n \
42 echo '{\"body\":\"Alice from Microsoft...\",\"entities\":[],\"relationships\":[]}' | \\\n \
43 sqlite-graphrag remember --name ner-test --type note --description \"test\" \\\n \
44 --graph-stdin --enable-ner --gliner-variant int8\n\n \
45 # Idempotent upsert with --force-merge\n \
46 sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n \
47 --body \"new content\" --force-merge")]
48pub struct RememberArgs {
49 #[arg(long)]
52 pub name: String,
53 #[arg(
54 long,
55 value_enum,
56 long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
57 )]
58 pub r#type: Option<MemoryType>,
59 #[arg(long)]
62 pub description: Option<String>,
63 #[arg(
66 long,
67 help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
68 conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
69 )]
70 pub body: Option<String>,
71 #[arg(
72 long,
73 help = "Read body from a file instead of --body",
74 conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
75 )]
76 pub body_file: Option<std::path::PathBuf>,
77 #[arg(
80 long,
81 conflicts_with_all = ["body", "body_file", "graph_stdin"]
82 )]
83 pub body_stdin: bool,
84 #[arg(
85 long,
86 help = "JSON file containing entities to associate with this memory"
87 )]
88 pub entities_file: Option<std::path::PathBuf>,
89 #[arg(
90 long,
91 help = "JSON file containing relationships to associate with this memory"
92 )]
93 pub relationships_file: Option<std::path::PathBuf>,
94 #[arg(
95 long,
96 help = "Read graph JSON (body + entities + relationships) from stdin",
97 conflicts_with_all = [
98 "body",
99 "body_file",
100 "body_stdin",
101 "entities_file",
102 "relationships_file"
103 ]
104 )]
105 pub graph_stdin: bool,
106 #[arg(
107 long,
108 help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
109 )]
110 pub namespace: Option<String>,
111 #[arg(long)]
113 pub metadata: Option<String>,
114 #[arg(long, help = "JSON file containing metadata key-value pairs")]
115 pub metadata_file: Option<std::path::PathBuf>,
116 #[arg(long)]
117 pub force_merge: bool,
118 #[arg(
119 long,
120 value_name = "EPOCH_OR_RFC3339",
121 value_parser = crate::parsers::parse_expected_updated_at,
122 long_help = "Optimistic lock: reject if updated_at does not match. \
123Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
124 )]
125 pub expected_updated_at: Option<i64>,
126 #[arg(
127 long,
128 env = "SQLITE_GRAPHRAG_ENABLE_NER",
129 value_parser = crate::parsers::parse_bool_flexible,
130 action = clap::ArgAction::Set,
131 num_args = 0..=1,
132 default_missing_value = "true",
133 default_value = "false",
134 help = "Enable automatic GLiNER NER entity/relationship extraction from body"
135 )]
136 pub enable_ner: bool,
137 #[arg(
138 long,
139 env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
140 default_value = "fp32",
141 help = "GLiNER model variant: fp32 (1.1GB, best quality), fp16 (580MB), int8 (349MB, fastest but may miss entities on short texts), q4, q4f16"
142 )]
143 pub gliner_variant: String,
144 #[arg(long, hide = true)]
145 pub skip_extraction: bool,
146 #[arg(
150 long,
151 default_value_t = false,
152 help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
153 )]
154 pub clear_body: bool,
155 #[arg(
157 long,
158 default_value_t = false,
159 help = "Validate input and report planned actions without persisting"
160 )]
161 pub dry_run: bool,
162 #[arg(long)]
164 pub session_id: Option<String>,
165 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
166 pub format: JsonOutputFormat,
167 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
168 pub json: bool,
169 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
170 pub db: Option<String>,
171 #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
173 help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
174 pub max_rss_mb: u64,
175}
176
177#[derive(Deserialize, Default)]
178#[serde(deny_unknown_fields)]
179struct GraphInput {
180 #[serde(default)]
181 body: Option<String>,
182 #[serde(default)]
183 entities: Vec<NewEntity>,
184 #[serde(default)]
185 relationships: Vec<NewRelationship>,
186}
187
188fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
189 for rel in &mut graph.relationships {
190 rel.relation = crate::parsers::normalize_relation(&rel.relation);
191 if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
192 return Err(AppError::Validation(format!(
193 "{e} for relationship '{}' -> '{}'",
194 rel.source, rel.target
195 )));
196 }
197 crate::parsers::warn_if_non_canonical(&rel.relation);
198 if !(0.0..=1.0).contains(&rel.strength) {
199 return Err(AppError::Validation(format!(
200 "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
201 rel.strength, rel.source, rel.target
202 )));
203 }
204 }
205
206 Ok(())
207}
208
209pub fn run(args: RememberArgs) -> Result<(), AppError> {
210 use crate::constants::*;
211
212 let inicio = std::time::Instant::now();
213 let _ = args.format;
214 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
215
216 let original_name = args.name.clone();
220
221 let normalized_name = {
225 let lower = args.name.to_lowercase().replace(['_', ' '], "-");
226 let trimmed = lower.trim_matches('-').to_string();
227 if trimmed != args.name {
228 tracing::warn!(
229 original = %args.name,
230 normalized = %trimmed,
231 "name auto-normalized to kebab-case"
232 );
233 }
234 trimmed
235 };
236 let name_was_normalized = normalized_name != original_name;
237
238 if normalized_name.is_empty() {
239 return Err(AppError::Validation(
240 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
241 ));
242 }
243 if normalized_name.len() > MAX_MEMORY_NAME_LEN {
244 return Err(AppError::LimitExceeded(
245 crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
246 ));
247 }
248
249 if normalized_name.starts_with("__") {
250 return Err(AppError::Validation(
251 crate::i18n::validation::reserved_name(),
252 ));
253 }
254
255 {
256 let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
257 .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
258 if !slug_re.is_match(&normalized_name) {
259 return Err(AppError::Validation(crate::i18n::validation::name_kebab(
260 &normalized_name,
261 )));
262 }
263 }
264
265 if let Some(ref desc) = args.description {
266 if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
267 return Err(AppError::Validation(
268 crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
269 ));
270 }
271 }
272
273 let mut raw_body = if let Some(b) = args.body {
274 b
275 } else if let Some(path) = args.body_file {
276 std::fs::read_to_string(&path).map_err(AppError::Io)?
277 } else if args.body_stdin || args.graph_stdin {
278 crate::stdin_helper::read_stdin_with_timeout(60)?
279 } else {
280 String::new()
281 };
282
283 let mut entities_provided_externally =
284 args.entities_file.is_some() || args.relationships_file.is_some();
285
286 let mut graph = GraphInput::default();
287 if let Some(path) = args.entities_file {
288 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
289 graph.entities = serde_json::from_str(&content)?;
290 }
291 if let Some(path) = args.relationships_file {
292 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
293 graph.relationships = serde_json::from_str(&content)?;
294 }
295 if args.graph_stdin {
296 graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
297 AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
298 })?;
299 raw_body = graph.body.take().unwrap_or_default();
300 }
301 if args.graph_stdin && !graph.entities.is_empty() {
302 entities_provided_externally = true;
303 }
304
305 if graph.entities.len() > max_entities_per_memory() {
306 return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
307 max_entities_per_memory(),
308 )));
309 }
310 if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
311 return Err(AppError::LimitExceeded(
312 errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
313 ));
314 }
315 normalize_and_validate_graph_input(&mut graph)?;
316
317 if raw_body.len() > MAX_MEMORY_BODY_LEN {
318 return Err(AppError::LimitExceeded(
319 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
320 ));
321 }
322
323 let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
328 if !entities_provided_externally
329 && graph.entities.is_empty()
330 && raw_body.trim().is_empty()
331 && !body_will_be_preserved
332 && !args.clear_body
333 {
334 return Err(AppError::Validation(crate::i18n::validation::empty_body()));
335 }
336
337 let metadata: serde_json::Value = if let Some(m) = args.metadata {
338 serde_json::from_str(&m)?
339 } else if let Some(path) = args.metadata_file {
340 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
341 serde_json::from_str(&content)?
342 } else {
343 serde_json::json!({})
344 };
345
346 let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
347 let mut snippet: String = raw_body.chars().take(200).collect();
348
349 let paths = AppPaths::resolve(args.db.as_deref())?;
350 paths.ensure_dirs()?;
351
352 let mut extraction_method: Option<String> = None;
354 let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
355 let mut relationships_truncated = false;
356 if args.enable_ner && args.skip_extraction {
357 tracing::warn!(
358 "--enable-ner and --skip-extraction are contradictory; --enable-ner takes precedence"
359 );
360 }
361 if args.skip_extraction && !args.enable_ner {
362 tracing::warn!("--skip-extraction is deprecated and has no effect (NER is disabled by default since v1.0.45); remove this flag");
363 }
364 let gliner_variant: crate::extraction::GlinerVariant =
365 args.gliner_variant.parse().unwrap_or_else(|e| {
366 tracing::warn!("invalid --gliner-variant: {e}; using fp32");
367 crate::extraction::GlinerVariant::Fp32
368 });
369 if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
370 match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
371 Ok(extracted) => {
372 extraction_method = Some(extracted.extraction_method.clone());
373 extracted_urls = extracted.urls;
374 graph.entities = extracted.entities;
375 graph.relationships = extracted.relationships;
376 relationships_truncated = extracted.relationships_truncated;
377
378 if graph.entities.len() > max_entities_per_memory() {
379 graph.entities.truncate(max_entities_per_memory());
380 }
381 if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
382 relationships_truncated = true;
383 graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
384 }
385 normalize_and_validate_graph_input(&mut graph)?;
386 }
387 Err(e) => {
388 tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
389 extraction_method = Some("none:extraction-failed".to_string());
390 }
391 }
392 }
393
394 let mut conn = open_rw(&paths.db)?;
395 ensure_schema(&mut conn)?;
396
397 if args.dry_run {
399 let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
400 let planned_action = if existing.is_some() && args.force_merge {
401 "would_update"
402 } else {
403 "would_create"
404 };
405 output::emit_json(&serde_json::json!({
406 "dry_run": true,
407 "name": normalized_name,
408 "namespace": namespace,
409 "planned_action": planned_action,
410 }))?;
411 return Ok(());
412 }
413
414 {
415 use crate::constants::MAX_NAMESPACES_ACTIVE;
416 let active_count: u32 = conn.query_row(
417 "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
418 [],
419 |r| r.get::<_, i64>(0).map(|v| v as u32),
420 )?;
421 let ns_exists: bool = conn.query_row(
422 "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
423 rusqlite::params![namespace],
424 |r| r.get::<_, i64>(0).map(|v| v > 0),
425 )?;
426 if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
427 return Err(AppError::NamespaceError(format!(
428 "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
429 )));
430 }
431 }
432
433 if let Some((sd_id, true)) =
435 memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
436 {
437 if args.force_merge {
438 memories::clear_deleted_at(&conn, sd_id)?;
439 } else {
440 return Err(AppError::Duplicate(
441 errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
442 ));
443 }
444 }
445
446 let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
447 if existing_memory.is_some() && !args.force_merge {
448 return Err(AppError::Duplicate(errors_msg::duplicate_memory(
449 &normalized_name,
450 &namespace,
451 )));
452 }
453
454 let (resolved_type, resolved_description) = if existing_memory.is_none() {
458 let t = args.r#type.ok_or_else(|| {
460 AppError::Validation(
461 "--type and --description are required when creating a new memory".to_string(),
462 )
463 })?;
464 let d = args.description.clone().ok_or_else(|| {
465 AppError::Validation(
466 "--type and --description are required when creating a new memory".to_string(),
467 )
468 })?;
469 (t.as_str().to_string(), d)
470 } else {
471 let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
473 .ok_or_else(|| {
474 AppError::NotFound(format!(
475 "memory '{normalized_name}' not found in namespace '{namespace}'"
476 ))
477 })?;
478 let t = args
479 .r#type
480 .map(|v| v.as_str().to_string())
481 .unwrap_or_else(|| existing_row.memory_type.clone());
482 let d = args
483 .description
484 .clone()
485 .unwrap_or_else(|| existing_row.description.clone());
486 (t, d)
487 };
488
489 if body_will_be_preserved {
494 if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
495 if !existing_row.body.is_empty() {
496 tracing::debug!(
497 name = %normalized_name,
498 "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
499 );
500 raw_body = existing_row.body;
501 body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
502 snippet = raw_body.chars().take(200).collect();
503 }
504 }
505 }
506
507 let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
508
509 output::emit_progress_i18n(
510 &format!(
511 "Remember stage: validated input; available memory {} MB",
512 crate::memory_guard::available_memory_mb()
513 ),
514 &format!(
515 "Stage remember: input validated; available memory {} MB",
516 crate::memory_guard::available_memory_mb()
517 ),
518 );
519
520 let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
521 let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
522 let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
523 let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
524 let chunks_created = chunks_info.len();
525 let chunks_persisted = compute_chunks_persisted(chunks_info.len());
529
530 output::emit_progress_i18n(
531 &format!(
532 "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
533 chunks_created,
534 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
535 ),
536 &format!(
537 "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
538 chunks_created,
539 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
540 ),
541 );
542
543 if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
544 return Err(AppError::LimitExceeded(format!(
545 "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
546 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
547 )));
548 }
549
550 output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
551 let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
552
553 let embedding = if chunks_info.len() == 1 {
554 crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
555 } else {
556 let chunk_texts: Vec<&str> = chunks_info
557 .iter()
558 .map(|c| chunking::chunk_text(&raw_body, c))
559 .collect();
560 output::emit_progress_i18n(
561 &format!(
562 "Embedding {} chunks serially to keep memory bounded...",
563 chunks_info.len()
564 ),
565 &format!(
566 "Embedding {} chunks serially to keep memory bounded...",
567 chunks_info.len()
568 ),
569 );
570 let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
571 for chunk_text in &chunk_texts {
572 if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
573 if rss > args.max_rss_mb {
574 tracing::error!(
575 rss_mb = rss,
576 max_rss_mb = args.max_rss_mb,
577 "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
578 );
579 return Err(AppError::LowMemory {
580 available_mb: crate::memory_guard::available_memory_mb(),
581 required_mb: args.max_rss_mb,
582 });
583 }
584 }
585 chunk_embeddings.push(crate::daemon::embed_passage_or_local(
586 &paths.models,
587 chunk_text,
588 )?);
589 }
590 output::emit_progress_i18n(
591 &format!(
592 "Remember stage: chunk embeddings complete; process RSS {} MB",
593 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
594 ),
595 &format!(
596 "Stage remember: chunk embeddings completed; process RSS {} MB",
597 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
598 ),
599 );
600 let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
601 chunk_embeddings_cache = Some(chunk_embeddings);
602 aggregated
603 };
604 let body_for_storage = raw_body;
605
606 let memory_type = resolved_type.as_str();
607 let new_memory = NewMemory {
608 namespace: namespace.clone(),
609 name: normalized_name.clone(),
610 memory_type: memory_type.to_string(),
611 description: resolved_description.clone(),
612 body: body_for_storage,
613 body_hash: body_hash.clone(),
614 session_id: args.session_id.clone(),
615 source: "agent".to_string(),
616 metadata,
617 };
618
619 let mut warnings = Vec::with_capacity(4);
620 let mut entities_persisted = 0usize;
621 let mut relationships_persisted = 0usize;
622
623 let graph_entity_embeddings = graph
624 .entities
625 .iter()
626 .map(|entity| {
627 let entity_text = match &entity.description {
628 Some(desc) => format!("{} {}", entity.name, desc),
629 None => entity.name.clone(),
630 };
631 crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
632 })
633 .collect::<Result<Vec<_>, _>>()?;
634
635 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
636
637 let (memory_id, action, version) = match existing_memory {
638 Some((existing_id, _updated_at, _current_version)) => {
639 if let Some(hash_id) = duplicate_hash_id {
640 if hash_id != existing_id {
641 warnings.push(format!(
642 "identical body already exists as memory id {hash_id}"
643 ));
644 }
645 }
646
647 let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
649 .query_row(
650 "SELECT name, description, body FROM memories WHERE id = ?1",
651 rusqlite::params![existing_id],
652 |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
653 )?;
654
655 storage_chunks::delete_chunks(&tx, existing_id)?;
656
657 let next_v = versions::next_version(&tx, existing_id)?;
658 memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
659
660 memories::sync_fts_after_update(
663 &tx,
664 existing_id,
665 &old_fts_name,
666 &old_fts_desc,
667 &old_fts_body,
668 &normalized_name,
669 &resolved_description,
670 &new_memory.body,
671 )?;
672
673 versions::insert_version(
674 &tx,
675 existing_id,
676 next_v,
677 &normalized_name,
678 memory_type,
679 &resolved_description,
680 &new_memory.body,
681 &serde_json::to_string(&new_memory.metadata)?,
682 None,
683 "edit",
684 )?;
685 memories::upsert_vec(
686 &tx,
687 existing_id,
688 &namespace,
689 memory_type,
690 &embedding,
691 &normalized_name,
692 &snippet,
693 )?;
694 (existing_id, "updated".to_string(), next_v)
695 }
696 None => {
697 if let Some(hash_id) = duplicate_hash_id {
698 warnings.push(format!(
699 "identical body already exists as memory id {hash_id}"
700 ));
701 }
702 let id = memories::insert(&tx, &new_memory)?;
703 versions::insert_version(
704 &tx,
705 id,
706 1,
707 &normalized_name,
708 memory_type,
709 &resolved_description,
710 &new_memory.body,
711 &serde_json::to_string(&new_memory.metadata)?,
712 None,
713 "create",
714 )?;
715 memories::upsert_vec(
716 &tx,
717 id,
718 &namespace,
719 memory_type,
720 &embedding,
721 &normalized_name,
722 &snippet,
723 )?;
724 (id, "created".to_string(), 1)
725 }
726 };
727
728 if chunks_info.len() > 1 {
729 storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
730
731 let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
732 AppError::Internal(anyhow::anyhow!(
733 "chunk embeddings cache missing in multi-chunk remember path"
734 ))
735 })?;
736
737 for (i, emb) in chunk_embeddings.iter().enumerate() {
738 storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
739 }
740 output::emit_progress_i18n(
741 &format!(
742 "Remember stage: persisted chunk vectors; process RSS {} MB",
743 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
744 ),
745 &format!(
746 "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
747 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
748 ),
749 );
750 }
751
752 if !graph.entities.is_empty() || !graph.relationships.is_empty() {
753 for entity in &graph.entities {
754 let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
755 let entity_embedding = &graph_entity_embeddings[entities_persisted];
756 entities::upsert_entity_vec(
757 &tx,
758 entity_id,
759 &namespace,
760 entity.entity_type,
761 entity_embedding,
762 &entity.name,
763 )?;
764 entities::link_memory_entity(&tx, memory_id, entity_id)?;
765 entities::increment_degree(&tx, entity_id)?;
766 entities_persisted += 1;
767 }
768 let entity_types: std::collections::HashMap<&str, EntityType> = graph
769 .entities
770 .iter()
771 .map(|entity| (entity.name.as_str(), entity.entity_type))
772 .collect();
773
774 for rel in &graph.relationships {
775 let source_entity = NewEntity {
776 name: rel.source.clone(),
777 entity_type: entity_types
778 .get(rel.source.as_str())
779 .copied()
780 .unwrap_or(EntityType::Concept),
781 description: None,
782 };
783 let target_entity = NewEntity {
784 name: rel.target.clone(),
785 entity_type: entity_types
786 .get(rel.target.as_str())
787 .copied()
788 .unwrap_or(EntityType::Concept),
789 description: None,
790 };
791 let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
792 let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
793 let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
794 entities::link_memory_relationship(&tx, memory_id, rel_id)?;
795 relationships_persisted += 1;
796 }
797 }
798 tx.commit()?;
799
800 let urls_persisted = if !extracted_urls.is_empty() {
803 let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
804 .into_iter()
805 .map(|u| storage_urls::MemoryUrl {
806 url: u.url,
807 offset: Some(u.offset as i64),
808 })
809 .collect();
810 storage_urls::insert_urls(&conn, memory_id, &url_entries)
811 } else {
812 0
813 };
814
815 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
816
817 let created_at_epoch = chrono::Utc::now().timestamp();
818 let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
819
820 output::emit_json(&RememberResponse {
821 memory_id,
822 name: normalized_name.clone(),
826 namespace,
827 action: action.clone(),
828 operation: action,
829 version,
830 entities_persisted,
831 relationships_persisted,
832 relationships_truncated,
833 chunks_created,
834 chunks_persisted,
835 urls_persisted,
836 extraction_method,
837 merged_into_memory_id: None,
838 warnings,
839 created_at: created_at_epoch,
840 created_at_iso,
841 elapsed_ms: inicio.elapsed().as_millis() as u64,
842 name_was_normalized,
843 original_name: name_was_normalized.then_some(original_name),
844 })?;
845
846 Ok(())
847}
848
849#[cfg(test)]
850mod tests {
851 use super::compute_chunks_persisted;
852 use crate::output::RememberResponse;
853
854 #[test]
856 fn chunks_persisted_zero_for_zero_chunks() {
857 assert_eq!(compute_chunks_persisted(0), 0);
858 }
859
860 #[test]
861 fn chunks_persisted_zero_for_single_chunk_body() {
862 assert_eq!(compute_chunks_persisted(1), 0);
865 }
866
867 #[test]
868 fn chunks_persisted_equals_count_for_multi_chunk_body() {
869 assert_eq!(compute_chunks_persisted(2), 2);
871 assert_eq!(compute_chunks_persisted(7), 7);
872 assert_eq!(compute_chunks_persisted(64), 64);
873 }
874
875 #[test]
876 fn remember_response_serializes_required_fields() {
877 let resp = RememberResponse {
878 memory_id: 42,
879 name: "minha-mem".to_string(),
880 namespace: "global".to_string(),
881 action: "created".to_string(),
882 operation: "created".to_string(),
883 version: 1,
884 entities_persisted: 0,
885 relationships_persisted: 0,
886 relationships_truncated: false,
887 chunks_created: 1,
888 chunks_persisted: 0,
889 urls_persisted: 0,
890 extraction_method: None,
891 merged_into_memory_id: None,
892 warnings: vec![],
893 created_at: 1_705_320_000,
894 created_at_iso: "2024-01-15T12:00:00Z".to_string(),
895 elapsed_ms: 55,
896 name_was_normalized: false,
897 original_name: None,
898 };
899
900 let json = serde_json::to_value(&resp).expect("serialization failed");
901 assert_eq!(json["memory_id"], 42);
902 assert_eq!(json["action"], "created");
903 assert_eq!(json["operation"], "created");
904 assert_eq!(json["version"], 1);
905 assert_eq!(json["elapsed_ms"], 55u64);
906 assert!(json["warnings"].is_array());
907 assert!(json["merged_into_memory_id"].is_null());
908 }
909
910 #[test]
911 fn remember_response_action_e_operation_sao_aliases() {
912 let resp = RememberResponse {
913 memory_id: 1,
914 name: "mem".to_string(),
915 namespace: "global".to_string(),
916 action: "updated".to_string(),
917 operation: "updated".to_string(),
918 version: 2,
919 entities_persisted: 3,
920 relationships_persisted: 1,
921 relationships_truncated: false,
922 extraction_method: None,
923 chunks_created: 2,
924 chunks_persisted: 2,
925 urls_persisted: 0,
926 merged_into_memory_id: None,
927 warnings: vec![],
928 created_at: 0,
929 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
930 elapsed_ms: 0,
931 name_was_normalized: false,
932 original_name: None,
933 };
934
935 let json = serde_json::to_value(&resp).expect("serialization failed");
936 assert_eq!(
937 json["action"], json["operation"],
938 "action e operation devem ser iguais"
939 );
940 assert_eq!(json["entities_persisted"], 3);
941 assert_eq!(json["relationships_persisted"], 1);
942 assert_eq!(json["chunks_created"], 2);
943 }
944
945 #[test]
946 fn remember_response_warnings_lista_mensagens() {
947 let resp = RememberResponse {
948 memory_id: 5,
949 name: "dup-mem".to_string(),
950 namespace: "global".to_string(),
951 action: "created".to_string(),
952 operation: "created".to_string(),
953 version: 1,
954 entities_persisted: 0,
955 extraction_method: None,
956 relationships_persisted: 0,
957 relationships_truncated: false,
958 chunks_created: 1,
959 chunks_persisted: 0,
960 urls_persisted: 0,
961 merged_into_memory_id: None,
962 warnings: vec!["identical body already exists as memory id 3".to_string()],
963 created_at: 0,
964 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
965 elapsed_ms: 10,
966 name_was_normalized: false,
967 original_name: None,
968 };
969
970 let json = serde_json::to_value(&resp).expect("serialization failed");
971 let warnings = json["warnings"]
972 .as_array()
973 .expect("warnings deve ser array");
974 assert_eq!(warnings.len(), 1);
975 assert!(warnings[0].as_str().unwrap().contains("identical body"));
976 }
977
978 #[test]
979 fn invalid_name_reserved_prefix_returns_validation_error() {
980 use crate::errors::AppError;
981 let nome = "__reservado";
983 let resultado: Result<(), AppError> = if nome.starts_with("__") {
984 Err(AppError::Validation(
985 crate::i18n::validation::reserved_name(),
986 ))
987 } else {
988 Ok(())
989 };
990 assert!(resultado.is_err());
991 if let Err(AppError::Validation(msg)) = resultado {
992 assert!(!msg.is_empty());
993 }
994 }
995
996 #[test]
997 fn name_too_long_returns_validation_error() {
998 use crate::errors::AppError;
999 let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1000 let resultado: Result<(), AppError> =
1001 if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1002 Err(AppError::Validation(crate::i18n::validation::name_length(
1003 crate::constants::MAX_MEMORY_NAME_LEN,
1004 )))
1005 } else {
1006 Ok(())
1007 };
1008 assert!(resultado.is_err());
1009 }
1010
1011 #[test]
1012 fn remember_response_merged_into_memory_id_some_serializes_integer() {
1013 let resp = RememberResponse {
1014 memory_id: 10,
1015 name: "mem-mergeada".to_string(),
1016 namespace: "global".to_string(),
1017 action: "updated".to_string(),
1018 operation: "updated".to_string(),
1019 version: 3,
1020 extraction_method: None,
1021 entities_persisted: 0,
1022 relationships_persisted: 0,
1023 relationships_truncated: false,
1024 chunks_created: 1,
1025 chunks_persisted: 0,
1026 urls_persisted: 0,
1027 merged_into_memory_id: Some(7),
1028 warnings: vec![],
1029 created_at: 0,
1030 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1031 elapsed_ms: 0,
1032 name_was_normalized: false,
1033 original_name: None,
1034 };
1035
1036 let json = serde_json::to_value(&resp).expect("serialization failed");
1037 assert_eq!(json["merged_into_memory_id"], 7);
1038 }
1039
1040 #[test]
1041 fn remember_response_urls_persisted_serializes_field() {
1042 let resp = RememberResponse {
1044 memory_id: 3,
1045 name: "mem-com-urls".to_string(),
1046 namespace: "global".to_string(),
1047 action: "created".to_string(),
1048 operation: "created".to_string(),
1049 version: 1,
1050 entities_persisted: 0,
1051 relationships_persisted: 0,
1052 relationships_truncated: false,
1053 chunks_created: 1,
1054 chunks_persisted: 0,
1055 urls_persisted: 3,
1056 extraction_method: Some("regex-only".to_string()),
1057 merged_into_memory_id: None,
1058 warnings: vec![],
1059 created_at: 0,
1060 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1061 elapsed_ms: 0,
1062 name_was_normalized: false,
1063 original_name: None,
1064 };
1065 let json = serde_json::to_value(&resp).expect("serialization failed");
1066 assert_eq!(json["urls_persisted"], 3);
1067 }
1068
1069 #[test]
1070 fn empty_name_after_normalization_returns_specific_message() {
1071 use crate::errors::AppError;
1074 let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1075 let normalized = normalized.trim_matches('-').to_string();
1076 let resultado: Result<(), AppError> = if normalized.is_empty() {
1077 Err(AppError::Validation(
1078 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1079 ))
1080 } else {
1081 Ok(())
1082 };
1083 assert!(resultado.is_err());
1084 if let Err(AppError::Validation(msg)) = resultado {
1085 assert!(
1086 msg.contains("empty after normalization"),
1087 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1088 );
1089 }
1090 }
1091
1092 #[test]
1093 fn name_only_underscores_after_normalization_returns_specific_message() {
1094 use crate::errors::AppError;
1096 let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1097 let normalized = normalized.trim_matches('-').to_string();
1098 assert!(
1099 normalized.is_empty(),
1100 "underscores devem normalizar para string vazia"
1101 );
1102 let resultado: Result<(), AppError> = if normalized.is_empty() {
1103 Err(AppError::Validation(
1104 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1105 ))
1106 } else {
1107 Ok(())
1108 };
1109 assert!(resultado.is_err());
1110 if let Err(AppError::Validation(msg)) = resultado {
1111 assert!(
1112 msg.contains("empty after normalization"),
1113 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1114 );
1115 }
1116 }
1117
1118 #[test]
1119 fn remember_response_relationships_truncated_serializes_field() {
1120 let resp_false = RememberResponse {
1122 memory_id: 1,
1123 name: "test".to_string(),
1124 namespace: "global".to_string(),
1125 action: "created".to_string(),
1126 operation: "created".to_string(),
1127 version: 1,
1128 entities_persisted: 2,
1129 relationships_persisted: 1,
1130 relationships_truncated: false,
1131 chunks_created: 1,
1132 chunks_persisted: 0,
1133 urls_persisted: 0,
1134 extraction_method: None,
1135 merged_into_memory_id: None,
1136 warnings: vec![],
1137 created_at: 0,
1138 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1139 elapsed_ms: 0,
1140 name_was_normalized: false,
1141 original_name: None,
1142 };
1143 let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1144 assert_eq!(json_false["relationships_truncated"], false);
1145
1146 let resp_true = RememberResponse {
1147 relationships_truncated: true,
1148 ..resp_false
1149 };
1150 let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1151 assert_eq!(json_true["relationships_truncated"], true);
1152 }
1153
1154 fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1163 force_merge && raw_body_is_empty && !clear_body
1164 }
1165
1166 #[test]
1167 fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1168 assert!(
1171 should_preserve_body(true, true, false),
1172 "empty body + force-merge + no clear-body should trigger preservation"
1173 );
1174 }
1175
1176 #[test]
1177 fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1178 assert!(
1180 !should_preserve_body(true, true, true),
1181 "--clear-body must bypass preservation"
1182 );
1183 }
1184
1185 #[test]
1186 fn gap08_non_empty_body_force_merge_does_not_preserve() {
1187 assert!(
1189 !should_preserve_body(true, false, false),
1190 "non-empty body must overwrite, not preserve"
1191 );
1192 }
1193
1194 #[test]
1195 fn gap08_empty_body_no_force_merge_does_not_preserve() {
1196 assert!(
1198 !should_preserve_body(false, true, false),
1199 "no --force-merge means no preservation logic applies"
1200 );
1201 }
1202}