1use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17fn compute_chunks_persisted(chunks_created: usize) -> usize {
26 if chunks_created > 1 {
27 chunks_created
28 } else {
29 0
30 }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n \
35 # Create a memory with inline body\n \
36 sqlite-graphrag remember --name design-auth --type decision \\\n \
37 --description \"auth design\" --body \"JWT for stateless auth\"\n\n \
38 # Create with curated graph via --graph-stdin\n \
39 echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n \
40 sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n \
41 # Enable GLiNER NER extraction with --graph-stdin\n \
42 echo '{\"body\":\"Alice from Microsoft...\",\"entities\":[],\"relationships\":[]}' | \\\n \
43 sqlite-graphrag remember --name ner-test --type note --description \"test\" \\\n \
44 --graph-stdin --enable-ner --gliner-variant int8\n\n \
45 # Idempotent upsert with --force-merge\n \
46 sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n \
47 --body \"new content\" --force-merge")]
48pub struct RememberArgs {
49 #[arg(long)]
52 pub name: String,
53 #[arg(
54 long,
55 value_enum,
56 long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
57 )]
58 pub r#type: Option<MemoryType>,
59 #[arg(long)]
62 pub description: Option<String>,
63 #[arg(
66 long,
67 help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
68 conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
69 )]
70 pub body: Option<String>,
71 #[arg(
72 long,
73 help = "Read body from a file instead of --body",
74 conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
75 )]
76 pub body_file: Option<std::path::PathBuf>,
77 #[arg(
80 long,
81 conflicts_with_all = ["body", "body_file", "graph_stdin"]
82 )]
83 pub body_stdin: bool,
84 #[arg(
85 long,
86 help = "JSON file containing entities to associate with this memory"
87 )]
88 pub entities_file: Option<std::path::PathBuf>,
89 #[arg(
90 long,
91 help = "JSON file containing relationships to associate with this memory"
92 )]
93 pub relationships_file: Option<std::path::PathBuf>,
94 #[arg(
95 long,
96 help = "Read graph JSON (body + entities + relationships) from stdin",
97 conflicts_with_all = [
98 "body",
99 "body_file",
100 "body_stdin",
101 "entities_file",
102 "relationships_file"
103 ]
104 )]
105 pub graph_stdin: bool,
106 #[arg(
107 long,
108 help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
109 )]
110 pub namespace: Option<String>,
111 #[arg(long)]
113 pub metadata: Option<String>,
114 #[arg(long, help = "JSON file containing metadata key-value pairs")]
115 pub metadata_file: Option<std::path::PathBuf>,
116 #[arg(long)]
117 pub force_merge: bool,
118 #[arg(
119 long,
120 value_name = "EPOCH_OR_RFC3339",
121 value_parser = crate::parsers::parse_expected_updated_at,
122 long_help = "Optimistic lock: reject if updated_at does not match. \
123Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
124 )]
125 pub expected_updated_at: Option<i64>,
126 #[arg(
127 long,
128 env = "SQLITE_GRAPHRAG_ENABLE_NER",
129 value_parser = crate::parsers::parse_bool_flexible,
130 action = clap::ArgAction::Set,
131 num_args = 0..=1,
132 default_missing_value = "true",
133 default_value = "false",
134 help = "Enable automatic GLiNER NER entity/relationship extraction from body"
135 )]
136 pub enable_ner: bool,
137 #[arg(
138 long,
139 env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
140 default_value = "fp32",
141 help = "GLiNER model variant: fp32 (1.1GB, best quality), fp16 (580MB), int8 (349MB, fastest but may miss entities on short texts), q4, q4f16"
142 )]
143 pub gliner_variant: String,
144 #[arg(long, hide = true)]
145 pub skip_extraction: bool,
146 #[arg(
150 long,
151 default_value_t = false,
152 help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
153 )]
154 pub clear_body: bool,
155 #[arg(
157 long,
158 default_value_t = false,
159 help = "Validate input and report planned actions without persisting"
160 )]
161 pub dry_run: bool,
162 #[arg(long)]
164 pub session_id: Option<String>,
165 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
166 pub format: JsonOutputFormat,
167 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
168 pub json: bool,
169 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
170 pub db: Option<String>,
171 #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
173 help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
174 pub max_rss_mb: u64,
175 #[arg(long, default_value_t = 50, value_name = "N")]
178 pub max_entity_degree: u32,
179}
180
181#[derive(Deserialize, Default)]
182#[serde(deny_unknown_fields)]
183struct GraphInput {
184 #[serde(default)]
185 body: Option<String>,
186 #[serde(default)]
187 entities: Vec<NewEntity>,
188 #[serde(default)]
189 relationships: Vec<NewRelationship>,
190}
191
192fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
193 for rel in &mut graph.relationships {
194 rel.relation = crate::parsers::normalize_relation(&rel.relation);
195 if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
196 return Err(AppError::Validation(format!(
197 "{e} for relationship '{}' -> '{}'",
198 rel.source, rel.target
199 )));
200 }
201 crate::parsers::warn_if_non_canonical(&rel.relation);
202 if !(0.0..=1.0).contains(&rel.strength) {
203 return Err(AppError::Validation(format!(
204 "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
205 rel.strength, rel.source, rel.target
206 )));
207 }
208 }
209
210 Ok(())
211}
212
213pub fn run(args: RememberArgs) -> Result<(), AppError> {
214 use crate::constants::*;
215
216 let inicio = std::time::Instant::now();
217 let _ = args.format;
218 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
219
220 let original_name = args.name.clone();
224
225 let normalized_name = {
229 let lower = args.name.to_lowercase().replace(['_', ' '], "-");
230 let trimmed = lower.trim_matches('-').to_string();
231 if trimmed != args.name {
232 tracing::warn!(
233 original = %args.name,
234 normalized = %trimmed,
235 "name auto-normalized to kebab-case"
236 );
237 }
238 trimmed
239 };
240 let name_was_normalized = normalized_name != original_name;
241
242 if normalized_name.is_empty() {
243 return Err(AppError::Validation(
244 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
245 ));
246 }
247 if normalized_name.len() > MAX_MEMORY_NAME_LEN {
248 return Err(AppError::LimitExceeded(
249 crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
250 ));
251 }
252
253 if normalized_name.starts_with("__") {
254 return Err(AppError::Validation(
255 crate::i18n::validation::reserved_name(),
256 ));
257 }
258
259 {
260 let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
261 .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
262 if !slug_re.is_match(&normalized_name) {
263 return Err(AppError::Validation(crate::i18n::validation::name_kebab(
264 &normalized_name,
265 )));
266 }
267 }
268
269 if let Some(ref desc) = args.description {
270 if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
271 return Err(AppError::Validation(
272 crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
273 ));
274 }
275 }
276
277 let mut raw_body = if let Some(b) = args.body {
278 b
279 } else if let Some(path) = args.body_file {
280 std::fs::read_to_string(&path).map_err(AppError::Io)?
281 } else if args.body_stdin || args.graph_stdin {
282 crate::stdin_helper::read_stdin_with_timeout(60)?
283 } else {
284 String::new()
285 };
286
287 let mut entities_provided_externally =
288 args.entities_file.is_some() || args.relationships_file.is_some();
289
290 let mut graph = GraphInput::default();
291 if let Some(path) = args.entities_file {
292 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
293 graph.entities = serde_json::from_str(&content)?;
294 }
295 if let Some(path) = args.relationships_file {
296 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
297 graph.relationships = serde_json::from_str(&content)?;
298 }
299 if args.graph_stdin {
300 graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
301 AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
302 })?;
303 raw_body = graph.body.take().unwrap_or_default();
304 }
305 if args.graph_stdin && !graph.entities.is_empty() {
306 entities_provided_externally = true;
307 }
308
309 if graph.entities.len() > max_entities_per_memory() {
310 return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
311 max_entities_per_memory(),
312 )));
313 }
314 if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
315 return Err(AppError::LimitExceeded(
316 errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
317 ));
318 }
319 normalize_and_validate_graph_input(&mut graph)?;
320
321 if raw_body.len() > MAX_MEMORY_BODY_LEN {
322 return Err(AppError::LimitExceeded(
323 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
324 ));
325 }
326
327 let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
332 if !entities_provided_externally
333 && graph.entities.is_empty()
334 && raw_body.trim().is_empty()
335 && !body_will_be_preserved
336 && !args.clear_body
337 {
338 return Err(AppError::Validation(crate::i18n::validation::empty_body()));
339 }
340
341 let metadata: serde_json::Value = if let Some(m) = args.metadata {
342 serde_json::from_str(&m)?
343 } else if let Some(path) = args.metadata_file {
344 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
345 serde_json::from_str(&content)?
346 } else {
347 serde_json::json!({})
348 };
349
350 let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
351 let mut snippet: String = raw_body.chars().take(200).collect();
352
353 let paths = AppPaths::resolve(args.db.as_deref())?;
354 paths.ensure_dirs()?;
355
356 let mut extraction_method: Option<String> = None;
358 let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
359 let mut relationships_truncated = false;
360 if args.enable_ner && args.skip_extraction {
361 tracing::warn!(
362 "--enable-ner and --skip-extraction are contradictory; --enable-ner takes precedence"
363 );
364 }
365 if args.skip_extraction && !args.enable_ner {
366 tracing::warn!("--skip-extraction is deprecated and has no effect (NER is disabled by default since v1.0.45); remove this flag");
367 }
368 let gliner_variant: crate::extraction::GlinerVariant =
369 args.gliner_variant.parse().unwrap_or_else(|e| {
370 tracing::warn!("invalid --gliner-variant: {e}; using fp32");
371 crate::extraction::GlinerVariant::Fp32
372 });
373 if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
374 match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
375 Ok(extracted) => {
376 extraction_method = Some(extracted.extraction_method.clone());
377 extracted_urls = extracted.urls;
378 graph.entities = extracted.entities;
379 graph.relationships = extracted.relationships;
380 relationships_truncated = extracted.relationships_truncated;
381
382 if graph.entities.len() > max_entities_per_memory() {
383 graph.entities.truncate(max_entities_per_memory());
384 }
385 if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
386 relationships_truncated = true;
387 graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
388 }
389 normalize_and_validate_graph_input(&mut graph)?;
390 }
391 Err(e) => {
392 tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
393 extraction_method = Some("none:extraction-failed".to_string());
394 }
395 }
396 }
397
398 let mut conn = open_rw(&paths.db)?;
399 ensure_schema(&mut conn)?;
400
401 if args.dry_run {
403 let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
404 let planned_action = if existing.is_some() && args.force_merge {
405 "would_update"
406 } else {
407 "would_create"
408 };
409 output::emit_json(&serde_json::json!({
410 "dry_run": true,
411 "name": normalized_name,
412 "namespace": namespace,
413 "planned_action": planned_action,
414 }))?;
415 return Ok(());
416 }
417
418 {
419 use crate::constants::MAX_NAMESPACES_ACTIVE;
420 let active_count: u32 = conn.query_row(
421 "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
422 [],
423 |r| r.get::<_, i64>(0).map(|v| v as u32),
424 )?;
425 let ns_exists: bool = conn.query_row(
426 "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
427 rusqlite::params![namespace],
428 |r| r.get::<_, i64>(0).map(|v| v > 0),
429 )?;
430 if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
431 return Err(AppError::NamespaceError(format!(
432 "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
433 )));
434 }
435 }
436
437 if let Some((sd_id, true)) =
439 memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
440 {
441 if args.force_merge {
442 memories::clear_deleted_at(&conn, sd_id)?;
443 } else {
444 return Err(AppError::Duplicate(
445 errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
446 ));
447 }
448 }
449
450 let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
451 if existing_memory.is_some() && !args.force_merge {
452 return Err(AppError::Duplicate(errors_msg::duplicate_memory(
453 &normalized_name,
454 &namespace,
455 )));
456 }
457
458 let (resolved_type, resolved_description) = if existing_memory.is_none() {
462 let t = args.r#type.ok_or_else(|| {
464 AppError::Validation(
465 "--type and --description are required when creating a new memory".to_string(),
466 )
467 })?;
468 let d = args.description.clone().ok_or_else(|| {
469 AppError::Validation(
470 "--type and --description are required when creating a new memory".to_string(),
471 )
472 })?;
473 (t.as_str().to_string(), d)
474 } else {
475 let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
477 .ok_or_else(|| {
478 AppError::NotFound(format!(
479 "memory '{normalized_name}' not found in namespace '{namespace}'"
480 ))
481 })?;
482 let t = args
483 .r#type
484 .map(|v| v.as_str().to_string())
485 .unwrap_or_else(|| existing_row.memory_type.clone());
486 let d = args
487 .description
488 .clone()
489 .unwrap_or_else(|| existing_row.description.clone());
490 (t, d)
491 };
492
493 if body_will_be_preserved {
498 if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
499 if !existing_row.body.is_empty() {
500 tracing::debug!(
501 name = %normalized_name,
502 "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
503 );
504 raw_body = existing_row.body;
505 body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
506 snippet = raw_body.chars().take(200).collect();
507 }
508 }
509 }
510
511 let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
512
513 output::emit_progress_i18n(
514 &format!(
515 "Remember stage: validated input; available memory {} MB",
516 crate::memory_guard::available_memory_mb()
517 ),
518 &format!(
519 "Stage remember: input validated; available memory {} MB",
520 crate::memory_guard::available_memory_mb()
521 ),
522 );
523
524 let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
525 let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
526 let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
527 let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
528 let chunks_created = chunks_info.len();
529 let chunks_persisted = compute_chunks_persisted(chunks_info.len());
533
534 output::emit_progress_i18n(
535 &format!(
536 "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
537 chunks_created,
538 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
539 ),
540 &format!(
541 "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
542 chunks_created,
543 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
544 ),
545 );
546
547 if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
548 return Err(AppError::LimitExceeded(format!(
549 "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
550 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
551 )));
552 }
553
554 output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
555 let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
556
557 let embedding = if chunks_info.len() == 1 {
558 crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
559 } else {
560 let chunk_texts: Vec<&str> = chunks_info
561 .iter()
562 .map(|c| chunking::chunk_text(&raw_body, c))
563 .collect();
564 output::emit_progress_i18n(
565 &format!(
566 "Embedding {} chunks serially to keep memory bounded...",
567 chunks_info.len()
568 ),
569 &format!(
570 "Embedding {} chunks serially to keep memory bounded...",
571 chunks_info.len()
572 ),
573 );
574 let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
575 for chunk_text in &chunk_texts {
576 if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
577 if rss > args.max_rss_mb {
578 tracing::error!(
579 rss_mb = rss,
580 max_rss_mb = args.max_rss_mb,
581 "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
582 );
583 return Err(AppError::LowMemory {
584 available_mb: crate::memory_guard::available_memory_mb(),
585 required_mb: args.max_rss_mb,
586 });
587 }
588 }
589 chunk_embeddings.push(crate::daemon::embed_passage_or_local(
590 &paths.models,
591 chunk_text,
592 )?);
593 }
594 output::emit_progress_i18n(
595 &format!(
596 "Remember stage: chunk embeddings complete; process RSS {} MB",
597 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
598 ),
599 &format!(
600 "Stage remember: chunk embeddings completed; process RSS {} MB",
601 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
602 ),
603 );
604 let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
605 chunk_embeddings_cache = Some(chunk_embeddings);
606 aggregated
607 };
608 let body_for_storage = raw_body;
609
610 let memory_type = resolved_type.as_str();
611 let new_memory = NewMemory {
612 namespace: namespace.clone(),
613 name: normalized_name.clone(),
614 memory_type: memory_type.to_string(),
615 description: resolved_description.clone(),
616 body: body_for_storage,
617 body_hash: body_hash.clone(),
618 session_id: args.session_id.clone(),
619 source: "agent".to_string(),
620 metadata,
621 };
622
623 let mut warnings = Vec::with_capacity(4);
624 let mut entities_persisted = 0usize;
625 let mut relationships_persisted = 0usize;
626
627 let graph_entity_embeddings = graph
628 .entities
629 .iter()
630 .map(|entity| {
631 let entity_text = match &entity.description {
632 Some(desc) => format!("{} {}", entity.name, desc),
633 None => entity.name.clone(),
634 };
635 crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
636 })
637 .collect::<Result<Vec<_>, _>>()?;
638
639 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
640
641 let (memory_id, action, version) = match existing_memory {
642 Some((existing_id, _updated_at, _current_version)) => {
643 if let Some(hash_id) = duplicate_hash_id {
644 if hash_id != existing_id {
645 warnings.push(format!(
646 "identical body already exists as memory id {hash_id}"
647 ));
648 }
649 }
650
651 let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
653 .query_row(
654 "SELECT name, description, body FROM memories WHERE id = ?1",
655 rusqlite::params![existing_id],
656 |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
657 )?;
658
659 storage_chunks::delete_chunks(&tx, existing_id)?;
660
661 let next_v = versions::next_version(&tx, existing_id)?;
662 memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
663
664 memories::sync_fts_after_update(
667 &tx,
668 existing_id,
669 &old_fts_name,
670 &old_fts_desc,
671 &old_fts_body,
672 &normalized_name,
673 &resolved_description,
674 &new_memory.body,
675 )?;
676
677 versions::insert_version(
678 &tx,
679 existing_id,
680 next_v,
681 &normalized_name,
682 memory_type,
683 &resolved_description,
684 &new_memory.body,
685 &serde_json::to_string(&new_memory.metadata)?,
686 None,
687 "edit",
688 )?;
689 memories::upsert_vec(
690 &tx,
691 existing_id,
692 &namespace,
693 memory_type,
694 &embedding,
695 &normalized_name,
696 &snippet,
697 )?;
698 (existing_id, "updated".to_string(), next_v)
699 }
700 None => {
701 if let Some(hash_id) = duplicate_hash_id {
702 warnings.push(format!(
703 "identical body already exists as memory id {hash_id}"
704 ));
705 }
706 let id = memories::insert(&tx, &new_memory)?;
707 versions::insert_version(
708 &tx,
709 id,
710 1,
711 &normalized_name,
712 memory_type,
713 &resolved_description,
714 &new_memory.body,
715 &serde_json::to_string(&new_memory.metadata)?,
716 None,
717 "create",
718 )?;
719 memories::upsert_vec(
720 &tx,
721 id,
722 &namespace,
723 memory_type,
724 &embedding,
725 &normalized_name,
726 &snippet,
727 )?;
728 (id, "created".to_string(), 1)
729 }
730 };
731
732 if chunks_info.len() > 1 {
733 storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
734
735 let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
736 AppError::Internal(anyhow::anyhow!(
737 "chunk embeddings cache missing in multi-chunk remember path"
738 ))
739 })?;
740
741 for (i, emb) in chunk_embeddings.iter().enumerate() {
742 storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
743 }
744 output::emit_progress_i18n(
745 &format!(
746 "Remember stage: persisted chunk vectors; process RSS {} MB",
747 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
748 ),
749 &format!(
750 "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
751 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
752 ),
753 );
754 }
755
756 if !graph.entities.is_empty() || !graph.relationships.is_empty() {
757 for entity in &graph.entities {
758 let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
759 let entity_embedding = &graph_entity_embeddings[entities_persisted];
760 entities::upsert_entity_vec(
761 &tx,
762 entity_id,
763 &namespace,
764 entity.entity_type,
765 entity_embedding,
766 &entity.name,
767 )?;
768 entities::link_memory_entity(&tx, memory_id, entity_id)?;
769 entities::increment_degree(&tx, entity_id)?;
770 if args.max_entity_degree > 0 {
772 let cap = args.max_entity_degree as i64;
773 let degree: i64 = tx.query_row(
774 "SELECT degree FROM entities WHERE id = ?1",
775 rusqlite::params![entity_id],
776 |r| r.get(0),
777 )?;
778 if degree > cap {
779 tracing::warn!(
780 entity = %entity.name,
781 degree = degree,
782 cap = cap,
783 "entity degree cap exceeded"
784 );
785 }
786 }
787 entities_persisted += 1;
788 }
789 let entity_types: std::collections::HashMap<&str, EntityType> = graph
790 .entities
791 .iter()
792 .map(|entity| (entity.name.as_str(), entity.entity_type))
793 .collect();
794
795 for rel in &graph.relationships {
796 let source_entity = NewEntity {
797 name: rel.source.clone(),
798 entity_type: entity_types
799 .get(rel.source.as_str())
800 .copied()
801 .unwrap_or(EntityType::Concept),
802 description: None,
803 };
804 let target_entity = NewEntity {
805 name: rel.target.clone(),
806 entity_type: entity_types
807 .get(rel.target.as_str())
808 .copied()
809 .unwrap_or(EntityType::Concept),
810 description: None,
811 };
812 let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
813 let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
814 let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
815 entities::link_memory_relationship(&tx, memory_id, rel_id)?;
816 relationships_persisted += 1;
817 }
818 }
819 tx.commit()?;
820
821 let urls_persisted = if !extracted_urls.is_empty() {
824 let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
825 .into_iter()
826 .map(|u| storage_urls::MemoryUrl {
827 url: u.url,
828 offset: Some(u.offset as i64),
829 })
830 .collect();
831 storage_urls::insert_urls(&conn, memory_id, &url_entries)
832 } else {
833 0
834 };
835
836 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
837
838 let created_at_epoch = chrono::Utc::now().timestamp();
839 let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
840
841 output::emit_json(&RememberResponse {
842 memory_id,
843 name: normalized_name.clone(),
847 namespace,
848 action: action.clone(),
849 operation: action,
850 version,
851 entities_persisted,
852 relationships_persisted,
853 relationships_truncated,
854 chunks_created,
855 chunks_persisted,
856 urls_persisted,
857 extraction_method,
858 merged_into_memory_id: None,
859 warnings,
860 created_at: created_at_epoch,
861 created_at_iso,
862 elapsed_ms: inicio.elapsed().as_millis() as u64,
863 name_was_normalized,
864 original_name: name_was_normalized.then_some(original_name),
865 })?;
866
867 Ok(())
868}
869
870#[cfg(test)]
871mod tests {
872 use super::compute_chunks_persisted;
873 use crate::output::RememberResponse;
874
875 #[test]
877 fn chunks_persisted_zero_for_zero_chunks() {
878 assert_eq!(compute_chunks_persisted(0), 0);
879 }
880
881 #[test]
882 fn chunks_persisted_zero_for_single_chunk_body() {
883 assert_eq!(compute_chunks_persisted(1), 0);
886 }
887
888 #[test]
889 fn chunks_persisted_equals_count_for_multi_chunk_body() {
890 assert_eq!(compute_chunks_persisted(2), 2);
892 assert_eq!(compute_chunks_persisted(7), 7);
893 assert_eq!(compute_chunks_persisted(64), 64);
894 }
895
896 #[test]
897 fn remember_response_serializes_required_fields() {
898 let resp = RememberResponse {
899 memory_id: 42,
900 name: "minha-mem".to_string(),
901 namespace: "global".to_string(),
902 action: "created".to_string(),
903 operation: "created".to_string(),
904 version: 1,
905 entities_persisted: 0,
906 relationships_persisted: 0,
907 relationships_truncated: false,
908 chunks_created: 1,
909 chunks_persisted: 0,
910 urls_persisted: 0,
911 extraction_method: None,
912 merged_into_memory_id: None,
913 warnings: vec![],
914 created_at: 1_705_320_000,
915 created_at_iso: "2024-01-15T12:00:00Z".to_string(),
916 elapsed_ms: 55,
917 name_was_normalized: false,
918 original_name: None,
919 };
920
921 let json = serde_json::to_value(&resp).expect("serialization failed");
922 assert_eq!(json["memory_id"], 42);
923 assert_eq!(json["action"], "created");
924 assert_eq!(json["operation"], "created");
925 assert_eq!(json["version"], 1);
926 assert_eq!(json["elapsed_ms"], 55u64);
927 assert!(json["warnings"].is_array());
928 assert!(json["merged_into_memory_id"].is_null());
929 }
930
931 #[test]
932 fn remember_response_action_e_operation_sao_aliases() {
933 let resp = RememberResponse {
934 memory_id: 1,
935 name: "mem".to_string(),
936 namespace: "global".to_string(),
937 action: "updated".to_string(),
938 operation: "updated".to_string(),
939 version: 2,
940 entities_persisted: 3,
941 relationships_persisted: 1,
942 relationships_truncated: false,
943 extraction_method: None,
944 chunks_created: 2,
945 chunks_persisted: 2,
946 urls_persisted: 0,
947 merged_into_memory_id: None,
948 warnings: vec![],
949 created_at: 0,
950 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
951 elapsed_ms: 0,
952 name_was_normalized: false,
953 original_name: None,
954 };
955
956 let json = serde_json::to_value(&resp).expect("serialization failed");
957 assert_eq!(
958 json["action"], json["operation"],
959 "action e operation devem ser iguais"
960 );
961 assert_eq!(json["entities_persisted"], 3);
962 assert_eq!(json["relationships_persisted"], 1);
963 assert_eq!(json["chunks_created"], 2);
964 }
965
966 #[test]
967 fn remember_response_warnings_lista_mensagens() {
968 let resp = RememberResponse {
969 memory_id: 5,
970 name: "dup-mem".to_string(),
971 namespace: "global".to_string(),
972 action: "created".to_string(),
973 operation: "created".to_string(),
974 version: 1,
975 entities_persisted: 0,
976 extraction_method: None,
977 relationships_persisted: 0,
978 relationships_truncated: false,
979 chunks_created: 1,
980 chunks_persisted: 0,
981 urls_persisted: 0,
982 merged_into_memory_id: None,
983 warnings: vec!["identical body already exists as memory id 3".to_string()],
984 created_at: 0,
985 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
986 elapsed_ms: 10,
987 name_was_normalized: false,
988 original_name: None,
989 };
990
991 let json = serde_json::to_value(&resp).expect("serialization failed");
992 let warnings = json["warnings"]
993 .as_array()
994 .expect("warnings deve ser array");
995 assert_eq!(warnings.len(), 1);
996 assert!(warnings[0].as_str().unwrap().contains("identical body"));
997 }
998
999 #[test]
1000 fn invalid_name_reserved_prefix_returns_validation_error() {
1001 use crate::errors::AppError;
1002 let nome = "__reservado";
1004 let resultado: Result<(), AppError> = if nome.starts_with("__") {
1005 Err(AppError::Validation(
1006 crate::i18n::validation::reserved_name(),
1007 ))
1008 } else {
1009 Ok(())
1010 };
1011 assert!(resultado.is_err());
1012 if let Err(AppError::Validation(msg)) = resultado {
1013 assert!(!msg.is_empty());
1014 }
1015 }
1016
1017 #[test]
1018 fn name_too_long_returns_validation_error() {
1019 use crate::errors::AppError;
1020 let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1021 let resultado: Result<(), AppError> =
1022 if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1023 Err(AppError::Validation(crate::i18n::validation::name_length(
1024 crate::constants::MAX_MEMORY_NAME_LEN,
1025 )))
1026 } else {
1027 Ok(())
1028 };
1029 assert!(resultado.is_err());
1030 }
1031
1032 #[test]
1033 fn remember_response_merged_into_memory_id_some_serializes_integer() {
1034 let resp = RememberResponse {
1035 memory_id: 10,
1036 name: "mem-mergeada".to_string(),
1037 namespace: "global".to_string(),
1038 action: "updated".to_string(),
1039 operation: "updated".to_string(),
1040 version: 3,
1041 extraction_method: None,
1042 entities_persisted: 0,
1043 relationships_persisted: 0,
1044 relationships_truncated: false,
1045 chunks_created: 1,
1046 chunks_persisted: 0,
1047 urls_persisted: 0,
1048 merged_into_memory_id: Some(7),
1049 warnings: vec![],
1050 created_at: 0,
1051 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1052 elapsed_ms: 0,
1053 name_was_normalized: false,
1054 original_name: None,
1055 };
1056
1057 let json = serde_json::to_value(&resp).expect("serialization failed");
1058 assert_eq!(json["merged_into_memory_id"], 7);
1059 }
1060
1061 #[test]
1062 fn remember_response_urls_persisted_serializes_field() {
1063 let resp = RememberResponse {
1065 memory_id: 3,
1066 name: "mem-com-urls".to_string(),
1067 namespace: "global".to_string(),
1068 action: "created".to_string(),
1069 operation: "created".to_string(),
1070 version: 1,
1071 entities_persisted: 0,
1072 relationships_persisted: 0,
1073 relationships_truncated: false,
1074 chunks_created: 1,
1075 chunks_persisted: 0,
1076 urls_persisted: 3,
1077 extraction_method: Some("regex-only".to_string()),
1078 merged_into_memory_id: None,
1079 warnings: vec![],
1080 created_at: 0,
1081 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1082 elapsed_ms: 0,
1083 name_was_normalized: false,
1084 original_name: None,
1085 };
1086 let json = serde_json::to_value(&resp).expect("serialization failed");
1087 assert_eq!(json["urls_persisted"], 3);
1088 }
1089
1090 #[test]
1091 fn empty_name_after_normalization_returns_specific_message() {
1092 use crate::errors::AppError;
1095 let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1096 let normalized = normalized.trim_matches('-').to_string();
1097 let resultado: Result<(), AppError> = if normalized.is_empty() {
1098 Err(AppError::Validation(
1099 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1100 ))
1101 } else {
1102 Ok(())
1103 };
1104 assert!(resultado.is_err());
1105 if let Err(AppError::Validation(msg)) = resultado {
1106 assert!(
1107 msg.contains("empty after normalization"),
1108 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1109 );
1110 }
1111 }
1112
1113 #[test]
1114 fn name_only_underscores_after_normalization_returns_specific_message() {
1115 use crate::errors::AppError;
1117 let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1118 let normalized = normalized.trim_matches('-').to_string();
1119 assert!(
1120 normalized.is_empty(),
1121 "underscores devem normalizar para string vazia"
1122 );
1123 let resultado: Result<(), AppError> = if normalized.is_empty() {
1124 Err(AppError::Validation(
1125 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1126 ))
1127 } else {
1128 Ok(())
1129 };
1130 assert!(resultado.is_err());
1131 if let Err(AppError::Validation(msg)) = resultado {
1132 assert!(
1133 msg.contains("empty after normalization"),
1134 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1135 );
1136 }
1137 }
1138
1139 #[test]
1140 fn remember_response_relationships_truncated_serializes_field() {
1141 let resp_false = RememberResponse {
1143 memory_id: 1,
1144 name: "test".to_string(),
1145 namespace: "global".to_string(),
1146 action: "created".to_string(),
1147 operation: "created".to_string(),
1148 version: 1,
1149 entities_persisted: 2,
1150 relationships_persisted: 1,
1151 relationships_truncated: false,
1152 chunks_created: 1,
1153 chunks_persisted: 0,
1154 urls_persisted: 0,
1155 extraction_method: None,
1156 merged_into_memory_id: None,
1157 warnings: vec![],
1158 created_at: 0,
1159 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1160 elapsed_ms: 0,
1161 name_was_normalized: false,
1162 original_name: None,
1163 };
1164 let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1165 assert_eq!(json_false["relationships_truncated"], false);
1166
1167 let resp_true = RememberResponse {
1168 relationships_truncated: true,
1169 ..resp_false
1170 };
1171 let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1172 assert_eq!(json_true["relationships_truncated"], true);
1173 }
1174
1175 fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1184 force_merge && raw_body_is_empty && !clear_body
1185 }
1186
1187 #[test]
1188 fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1189 assert!(
1192 should_preserve_body(true, true, false),
1193 "empty body + force-merge + no clear-body should trigger preservation"
1194 );
1195 }
1196
1197 #[test]
1198 fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1199 assert!(
1201 !should_preserve_body(true, true, true),
1202 "--clear-body must bypass preservation"
1203 );
1204 }
1205
1206 #[test]
1207 fn gap08_non_empty_body_force_merge_does_not_preserve() {
1208 assert!(
1210 !should_preserve_body(true, false, false),
1211 "non-empty body must overwrite, not preserve"
1212 );
1213 }
1214
1215 #[test]
1216 fn gap08_empty_body_no_force_merge_does_not_preserve() {
1217 assert!(
1219 !should_preserve_body(false, true, false),
1220 "no --force-merge means no preservation logic applies"
1221 );
1222 }
1223}