1use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17fn compute_chunks_persisted(chunks_created: usize) -> usize {
26 if chunks_created > 1 {
27 chunks_created
28 } else {
29 0
30 }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n \
35 # Create a memory with inline body\n \
36 sqlite-graphrag remember --name design-auth --type decision \\\n \
37 --description \"auth design\" --body \"JWT for stateless auth\"\n\n \
38 # Create with curated graph via --graph-stdin\n \
39 echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n \
40 sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n \
41 # Enable automatic URL extraction with --graph-stdin (URL-regex only since v1.0.79)\n \
42 echo '{\"body\":\"See https://docs.rs ...\",\"entities\":[],\"relationships\":[]}' | \\\n \
43 sqlite-graphrag remember --name url-test --type note --description \"test\" \\\n \
44 --graph-stdin --enable-ner\n\n \
45 # Idempotent upsert with --force-merge\n \
46 sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n \
47 --body \"new content\" --force-merge\n\n\
48NOTE:\n \
49 remember does NOT accept positional arguments.\n \
50 Use --body \"text\" for inline content\n \
51 Use --body-file path for file content\n \
52 Use --body-stdin for piped content\n \
53 Use --graph-stdin for JSON with entities and relationships\n\n\
54ENTITY TYPES (for --graph-stdin entities, NOT memory --type):\n \
55 concept, tool, person, file, project, decision, incident,\n \
56 organization, location, date, dashboard, issue_tracker, memory\n \
57 WARNING: reference, skill, document, note, user, feedback are\n \
58 MEMORY types only — NOT valid for entities.\n \
59 Mapping: reference→concept, document→file, user→person")]
60pub struct RememberArgs {
61 #[arg(long)]
64 pub name: String,
65 #[arg(
66 long,
67 value_enum,
68 long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
69 )]
70 pub r#type: Option<MemoryType>,
71 #[arg(long)]
74 pub description: Option<String>,
75 #[arg(
78 long,
79 help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
80 conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
81 )]
82 pub body: Option<String>,
83 #[arg(
84 long,
85 help = "Read body from a file instead of --body",
86 conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
87 )]
88 pub body_file: Option<std::path::PathBuf>,
89 #[arg(
92 long,
93 conflicts_with_all = ["body", "body_file", "graph_stdin"]
94 )]
95 pub body_stdin: bool,
96 #[arg(
97 long,
98 help = "JSON file containing entities to associate with this memory"
99 )]
100 pub entities_file: Option<std::path::PathBuf>,
101 #[arg(
102 long,
103 help = "JSON file containing relationships to associate with this memory"
104 )]
105 pub relationships_file: Option<std::path::PathBuf>,
106 #[arg(
107 long,
108 help = "Read graph JSON (body + entities + relationships) from stdin",
109 conflicts_with_all = [
110 "body",
111 "body_file",
112 "body_stdin",
113 "entities_file",
114 "relationships_file"
115 ]
116 )]
117 pub graph_stdin: bool,
118 #[arg(
119 long,
120 help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
121 )]
122 pub namespace: Option<String>,
123 #[arg(long)]
125 pub metadata: Option<String>,
126 #[arg(long, help = "JSON file containing metadata key-value pairs")]
127 pub metadata_file: Option<std::path::PathBuf>,
128 #[arg(long)]
129 pub force_merge: bool,
130 #[arg(
131 long,
132 value_name = "EPOCH_OR_RFC3339",
133 value_parser = crate::parsers::parse_expected_updated_at,
134 long_help = "Optimistic lock: reject if updated_at does not match. \
135Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
136 )]
137 pub expected_updated_at: Option<i64>,
138 #[arg(
139 long,
140 env = "SQLITE_GRAPHRAG_ENABLE_NER",
141 value_parser = crate::parsers::parse_bool_flexible,
142 action = clap::ArgAction::Set,
143 num_args = 0..=1,
144 default_missing_value = "true",
145 default_value = "false",
146 help = "Enable automatic URL-regex extraction from body (the GLiNER NER pipeline was removed in v1.0.79)"
147 )]
148 pub enable_ner: bool,
149 #[arg(
150 long,
151 env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
152 default_value = "fp32",
153 help = "DEPRECATED: no effect since v1.0.79 (the GLiNER pipeline was removed); accepted for compatibility only"
154 )]
155 pub gliner_variant: String,
156 #[arg(long, hide = true)]
157 pub skip_extraction: bool,
158 #[arg(
162 long,
163 default_value_t = false,
164 help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
165 )]
166 pub clear_body: bool,
167 #[arg(
169 long,
170 default_value_t = false,
171 help = "Validate input and report planned actions without persisting"
172 )]
173 pub dry_run: bool,
174 #[arg(long)]
176 pub session_id: Option<String>,
177 #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
178 pub format: JsonOutputFormat,
179 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
180 pub json: bool,
181 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
182 pub db: Option<String>,
183 #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
185 help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
186 pub max_rss_mb: u64,
187 #[arg(long, default_value_t = 50, value_name = "N")]
190 pub max_entity_degree: u32,
191 #[arg(long, default_value_t = 4, value_name = "N",
195 value_parser = clap::value_parser!(u64).range(1..=32),
196 help = "Maximum simultaneous LLM embedding subprocesses (default: 4, clamp [1,32])")]
197 pub llm_parallelism: u64,
198}
199
200#[derive(Deserialize, Default)]
201#[serde(deny_unknown_fields)]
202struct GraphInput {
203 #[serde(default)]
204 body: Option<String>,
205 #[serde(default)]
206 entities: Vec<NewEntity>,
207 #[serde(default)]
208 relationships: Vec<NewRelationship>,
209}
210
211fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
212 for rel in &mut graph.relationships {
213 rel.relation = crate::parsers::normalize_relation(&rel.relation);
214 if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
215 return Err(AppError::Validation(format!(
216 "{e} for relationship '{}' -> '{}'",
217 rel.source, rel.target
218 )));
219 }
220 crate::parsers::warn_if_non_canonical(&rel.relation);
221 if !(0.0..=1.0).contains(&rel.strength) {
222 return Err(AppError::Validation(format!(
223 "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
224 rel.strength, rel.source, rel.target
225 )));
226 }
227 }
228
229 Ok(())
230}
231
232#[tracing::instrument(skip_all, level = "debug", name = "remember")]
233pub fn run(args: RememberArgs, llm_backend: crate::cli::LlmBackendChoice) -> Result<(), AppError> {
234 use crate::constants::*;
235
236 let inicio = std::time::Instant::now();
237 let _ = args.format;
238 tracing::debug!(target: "remember", name = %args.name, "persisting memory");
239 let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
240
241 let original_name = args.name.clone();
245
246 let normalized_name = {
250 let lower = args.name.to_lowercase().replace(['_', ' '], "-");
251 let trimmed = lower.trim_matches('-').to_string();
252 if trimmed != args.name {
253 tracing::warn!(target: "remember",
254 original = %args.name,
255 normalized = %trimmed,
256 "name auto-normalized to kebab-case"
257 );
258 }
259 trimmed
260 };
261 let name_was_normalized = normalized_name != original_name;
262
263 if normalized_name.is_empty() {
264 return Err(AppError::Validation(
265 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
266 ));
267 }
268 if normalized_name.len() > MAX_MEMORY_NAME_LEN {
269 return Err(AppError::LimitExceeded(
270 crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
271 ));
272 }
273
274 if normalized_name.starts_with("__") {
275 return Err(AppError::Validation(
276 crate::i18n::validation::reserved_name(),
277 ));
278 }
279
280 {
281 let slug_re = crate::constants::name_slug_regex();
282 if !slug_re.is_match(&normalized_name) {
283 return Err(AppError::Validation(crate::i18n::validation::name_kebab(
284 &normalized_name,
285 )));
286 }
287 }
288
289 if let Some(ref desc) = args.description {
290 if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
291 return Err(AppError::Validation(
292 crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
293 ));
294 }
295 }
296
297 let mut raw_body = if let Some(b) = args.body {
298 b
299 } else if let Some(ref path) = args.body_file {
300 let file_size = std::fs::metadata(path).map_err(AppError::Io)?.len();
301 if file_size > MAX_MEMORY_BODY_LEN as u64 {
302 return Err(AppError::LimitExceeded(
303 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
304 ));
305 }
306 match std::fs::read_to_string(path) {
307 Ok(s) => s,
308 Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
309 let bytes = std::fs::read(path).map_err(AppError::Io)?;
310 tracing::warn!(target: "remember", "body file contains invalid UTF-8; replacing invalid sequences");
311 String::from_utf8_lossy(&bytes).into_owned()
312 }
313 Err(e) => return Err(AppError::Io(e)),
314 }
315 } else if args.body_stdin || args.graph_stdin {
316 crate::stdin_helper::read_stdin_with_timeout(60)?
317 } else {
318 String::new()
319 };
320
321 let mut entities_provided_externally =
322 args.entities_file.is_some() || args.relationships_file.is_some();
323
324 let mut graph = GraphInput::default();
325 if let Some(path) = args.entities_file {
326 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
327 if file_size > MAX_MEMORY_BODY_LEN as u64 {
328 return Err(AppError::LimitExceeded(
329 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
330 ));
331 }
332 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
333 graph.entities = serde_json::from_str(&content)?;
334 }
335 if let Some(path) = args.relationships_file {
336 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
337 if file_size > MAX_MEMORY_BODY_LEN as u64 {
338 return Err(AppError::LimitExceeded(
339 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
340 ));
341 }
342 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
343 graph.relationships = serde_json::from_str(&content)?;
344 }
345 if args.graph_stdin {
346 graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
347 AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
348 })?;
349 raw_body = graph.body.take().unwrap_or_default();
350 }
351 if args.graph_stdin && !graph.entities.is_empty() {
352 entities_provided_externally = true;
353 }
354
355 if graph.entities.len() > max_entities_per_memory() {
356 return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
357 max_entities_per_memory(),
358 )));
359 }
360 let mut relationships_truncated = false;
361 let rel_cap = max_relationships_per_memory();
362 if graph.relationships.len() > rel_cap {
363 tracing::warn!(target: "remember",
364 count = graph.relationships.len(),
365 cap = rel_cap,
366 "truncating relationships to cap"
367 );
368 graph.relationships.truncate(rel_cap);
369 relationships_truncated = true;
370 }
371 normalize_and_validate_graph_input(&mut graph)?;
372
373 if raw_body.len() > MAX_MEMORY_BODY_LEN {
374 return Err(AppError::LimitExceeded(
375 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
376 ));
377 }
378
379 let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
384 if !entities_provided_externally
385 && graph.entities.is_empty()
386 && raw_body.trim().is_empty()
387 && !body_will_be_preserved
388 && !args.clear_body
389 {
390 return Err(AppError::Validation(crate::i18n::validation::empty_body()));
391 }
392
393 let metadata: serde_json::Value = if let Some(m) = args.metadata {
394 serde_json::from_str(&m)?
395 } else if let Some(path) = args.metadata_file {
396 let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
397 if file_size > MAX_MEMORY_BODY_LEN as u64 {
398 return Err(AppError::LimitExceeded(
399 crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
400 ));
401 }
402 let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
403 serde_json::from_str(&content)?
404 } else {
405 serde_json::json!({})
406 };
407
408 let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
409 let mut snippet: String = raw_body.chars().take(200).collect();
410
411 let paths = AppPaths::resolve(args.db.as_deref())?;
412 paths.ensure_dirs()?;
413
414 let mut extraction_method: Option<String> = None;
416 let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
417 if args.enable_ner && args.skip_extraction {
418 return Err(AppError::Validation(
419 "--enable-ner and --skip-extraction are mutually exclusive; remove one".to_string(),
420 ));
421 }
422 if args.skip_extraction && !args.enable_ner {
423 tracing::warn!(
430 "--skip-extraction is deprecated since v1.0.45 and has no effect (NER is disabled by default); remove this flag to silence the warning"
431 );
432 }
433 if args.gliner_variant != "fp32" {
437 tracing::warn!(
438 "--gliner-variant is deprecated and has no effect since v1.0.79 (the GLiNER pipeline was removed); --enable-ner performs URL-regex extraction only"
439 );
440 }
441 let gliner_variant: crate::extraction::GlinerVariant = match args.gliner_variant.as_str() {
442 "int8" => crate::extraction::GlinerVariant::Int8,
443 _ => crate::extraction::GlinerVariant::Fp32,
444 };
445 if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
446 match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
447 Ok(extracted) => {
448 extraction_method = Some("url-regex".to_string());
452 extracted_urls = extracted.urls;
453 graph.entities = extracted
456 .entities
457 .into_iter()
458 .map(|e| NewEntity {
459 name: e.name,
460 entity_type: crate::entity_type::EntityType::Concept,
461 description: None,
462 })
463 .collect();
464 graph.relationships.clear();
465 relationships_truncated = false;
466
467 if graph.entities.len() > max_entities_per_memory() {
468 graph.entities.truncate(max_entities_per_memory());
469 }
470 if graph.relationships.len() > max_relationships_per_memory() {
471 relationships_truncated = true;
472 graph.relationships.truncate(max_relationships_per_memory());
473 }
474 normalize_and_validate_graph_input(&mut graph)?;
475 }
476 Err(e) => {
477 tracing::warn!(target: "remember", error = %e, "auto-extraction failed, graceful degradation");
478 extraction_method = Some("none:extraction-failed".to_string());
479 }
480 }
481 }
482
483 let mut conn = open_rw(&paths.db)?;
484 ensure_schema(&mut conn)?;
485
486 if args.dry_run {
488 let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
489 let planned_action = if existing.is_some() && args.force_merge {
490 "would_update"
491 } else {
492 "would_create"
493 };
494 output::emit_json(&serde_json::json!({
495 "dry_run": true,
496 "name": normalized_name,
497 "namespace": namespace,
498 "planned_action": planned_action,
499 }))?;
500 return Ok(());
501 }
502
503 {
504 use crate::constants::MAX_NAMESPACES_ACTIVE;
505 let active_count: u32 = conn.query_row(
506 "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
507 [],
508 |r| r.get::<_, i64>(0).map(|v| v as u32),
509 )?;
510 let ns_exists: bool = conn.query_row(
511 "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
512 rusqlite::params![namespace],
513 |r| r.get::<_, i64>(0).map(|v| v > 0),
514 )?;
515 if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
516 return Err(AppError::NamespaceError(format!(
517 "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
518 )));
519 }
520 }
521
522 if let Some((sd_id, true)) =
524 memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
525 {
526 if args.force_merge {
527 memories::clear_deleted_at(&conn, sd_id)?;
528 } else {
529 return Err(AppError::Duplicate(
530 errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
531 ));
532 }
533 }
534
535 let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
536 if existing_memory.is_some() && !args.force_merge {
537 return Err(AppError::Duplicate(errors_msg::duplicate_memory(
538 &normalized_name,
539 &namespace,
540 )));
541 }
542
543 let (resolved_type, resolved_description) = if existing_memory.is_none() {
547 let t = args.r#type.ok_or_else(|| {
549 AppError::Validation(
550 "--type and --description are required when creating a new memory".to_string(),
551 )
552 })?;
553 let d = args.description.clone().ok_or_else(|| {
554 AppError::Validation(
555 "--type and --description are required when creating a new memory".to_string(),
556 )
557 })?;
558 (t.as_str().to_string(), d)
559 } else {
560 let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
562 .ok_or_else(|| {
563 AppError::NotFound(format!(
564 "memory '{normalized_name}' not found in namespace '{namespace}'"
565 ))
566 })?;
567 let t = args
568 .r#type
569 .map(|v| v.as_str().to_string())
570 .unwrap_or_else(|| existing_row.memory_type.clone());
571 let d = args
572 .description
573 .clone()
574 .unwrap_or_else(|| existing_row.description.clone());
575 (t, d)
576 };
577
578 if body_will_be_preserved {
583 if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
584 if !existing_row.body.is_empty() {
585 tracing::debug!(target: "remember",
586 name = %normalized_name,
587 "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
588 );
589 raw_body = existing_row.body;
590 body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
591 snippet = raw_body.chars().take(200).collect();
592 }
593 }
594 }
595
596 let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
597
598 output::emit_progress_i18n(
599 &format!(
600 "Remember stage: validated input; available memory {} MB",
601 crate::memory_guard::available_memory_mb()
602 ),
603 &format!(
604 "Stage remember: input validated; available memory {} MB",
605 crate::memory_guard::available_memory_mb()
606 ),
607 );
608
609 let model_max_length = crate::tokenizer::get_model_max_length();
610 let total_passage_tokens = crate::tokenizer::count_passage_tokens(&raw_body)?;
611 let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body);
612 let chunks_created = chunks_info.len();
613 let chunks_persisted = compute_chunks_persisted(chunks_info.len());
617
618 output::emit_progress_i18n(
619 &format!(
620 "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
621 chunks_created,
622 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
623 ),
624 &format!(
625 "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
626 chunks_created,
627 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
628 ),
629 );
630
631 if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
632 return Err(AppError::LimitExceeded(format!(
633 "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
634 crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
635 )));
636 }
637
638 output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
639 let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
640
641 let skip_embed = crate::embedder::should_skip_embedding_on_failure();
645 let (embedding, backend_invoked_passage): (Option<Vec<f32>>, Option<&str>) = if chunks_info.len() == 1 {
646 match crate::embedder::embed_passage_with_choice(&paths.models, &raw_body, Some(llm_backend)) {
647 Ok((v, k)) => (Some(v), Some(k.as_str())),
648 Err(AppError::Validation(msg)) => return Err(AppError::Validation(msg)),
649 Err(e) if skip_embed => {
650 tracing::warn!(error = %e, "embedding failed; --skip-embedding-on-failure active, persisting without embedding");
651 (None, None)
652 }
653 Err(e) => return Err(e),
654 }
655 } else {
656 let chunk_texts: Vec<String> = chunks_info
657 .iter()
658 .map(|c| chunking::chunk_text(&raw_body, c).to_string())
659 .collect();
660 output::emit_progress_i18n(
666 &format!(
667 "Embedding {} chunks in parallel batches (parallelism {})...",
668 chunks_info.len(),
669 args.llm_parallelism
670 ),
671 &format!(
672 "Embedding {} chunks em lotes paralelos (paralelismo {})...",
673 chunks_info.len(),
674 args.llm_parallelism
675 ),
676 );
677 if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
678 if rss > args.max_rss_mb {
679 tracing::error!(target: "remember",
680 rss_mb = rss,
681 max_rss_mb = args.max_rss_mb,
682 "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
683 );
684 return Err(AppError::LowMemory {
685 available_mb: crate::memory_guard::available_memory_mb(),
686 required_mb: args.max_rss_mb,
687 });
688 }
689 }
690 match crate::embedder::embed_passages_parallel_local(
691 &paths.models,
692 &chunk_texts,
693 args.llm_parallelism as usize,
694 crate::embedder::chunk_embed_batch_size(),
695 ) {
696 Ok(chunk_embeddings) => {
697 output::emit_progress_i18n(
698 &format!(
699 "Remember stage: chunk embeddings complete; process RSS {} MB",
700 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
701 ),
702 &format!(
703 "Stage remember: chunk embeddings completed; process RSS {} MB",
704 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
705 ),
706 );
707 let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
708 chunk_embeddings_cache = Some(chunk_embeddings);
709 (Some(aggregated), None)
710 }
711 Err(e) if skip_embed => {
712 tracing::warn!(error = %e, "chunk embedding failed; --skip-embedding-on-failure active, persisting without embedding");
713 (None, None)
714 }
715 Err(e) => return Err(e),
716 }
717 };
718 let body_for_storage = raw_body;
719
720 let memory_type = resolved_type.as_str();
721 let new_memory = NewMemory {
722 namespace: namespace.clone(),
723 name: normalized_name.clone(),
724 memory_type: memory_type.to_string(),
725 description: resolved_description.clone(),
726 body: body_for_storage,
727 body_hash: body_hash.clone(),
728 session_id: args.session_id.clone(),
729 source: "agent".to_string(),
730 metadata,
731 };
732
733 let mut warnings = Vec::with_capacity(4);
734 let mut entities_persisted = 0usize;
735 let mut relationships_persisted = 0usize;
736
737 let entity_texts: Vec<String> = graph
742 .entities
743 .iter()
744 .map(|entity| match &entity.description {
745 Some(desc) => format!("{} {}", entity.name, desc),
746 None => entity.name.clone(),
747 })
748 .collect();
749 let (graph_entity_embeddings, embed_cache_stats) = match crate::embedder::embed_entity_texts_cached(
757 &paths.models,
758 &entity_texts,
759 args.llm_parallelism as usize,
760 ) {
761 Ok(r) => r,
762 Err(e) if skip_embed => {
763 tracing::warn!(error = %e, "entity embedding failed; --skip-embedding-on-failure active");
764 let empty: Vec<Vec<f32>> = entity_texts.iter().map(|_| vec![]).collect();
765 (empty, crate::embedder::EmbedCacheStats::default())
766 }
767 Err(e) => return Err(e),
768 };
769 if embed_cache_stats.hits > 0 {
770 tracing::debug!(
771 hits = embed_cache_stats.hits,
772 misses = embed_cache_stats.misses,
773 requested = embed_cache_stats.requested,
774 "G56: entity embed cache hit (remember)"
775 );
776 }
777
778 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
779
780 let mut skip_reindex = false;
781 let (memory_id, action, version) = match existing_memory {
782 Some((existing_id, _updated_at, _current_version)) => {
783 if let Some(hash_id) = duplicate_hash_id {
784 if hash_id != existing_id {
785 warnings.push(format!(
786 "identical body already exists as memory id {hash_id}"
787 ));
788 }
789 }
790
791 let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
793 .query_row(
794 "SELECT name, description, body FROM memories WHERE id = ?1",
795 rusqlite::params![existing_id],
796 |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
797 )?;
798
799 let existing_body_hash: Option<String> = tx
801 .query_row(
802 "SELECT body_hash FROM memories WHERE id = ?1",
803 rusqlite::params![existing_id],
804 |r| r.get(0),
805 )
806 .ok();
807 let body_unchanged = existing_body_hash.as_deref() == Some(&body_hash);
808 skip_reindex = body_unchanged;
809 if !body_unchanged {
810 storage_chunks::delete_chunks(&tx, existing_id)?;
811 }
812
813 let next_v = versions::next_version(&tx, existing_id)?;
814 memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
815
816 memories::sync_fts_after_update(
819 &tx,
820 existing_id,
821 &old_fts_name,
822 &old_fts_desc,
823 &old_fts_body,
824 &normalized_name,
825 &resolved_description,
826 &new_memory.body,
827 )?;
828
829 versions::insert_version(
830 &tx,
831 existing_id,
832 next_v,
833 &normalized_name,
834 memory_type,
835 &resolved_description,
836 &new_memory.body,
837 &serde_json::to_string(&new_memory.metadata)?,
838 None,
839 "edit",
840 )?;
841 if !body_unchanged {
842 if let Some(ref emb) = embedding {
843 memories::upsert_vec(
844 &tx,
845 existing_id,
846 &namespace,
847 memory_type,
848 emb,
849 &normalized_name,
850 &snippet,
851 )?;
852 }
853 }
854 (existing_id, "updated".to_string(), next_v)
855 }
856 None => {
857 if let Some(hash_id) = duplicate_hash_id {
858 warnings.push(format!(
859 "identical body already exists as memory id {hash_id}"
860 ));
861 }
862 let id = memories::insert(&tx, &new_memory)?;
863 versions::insert_version(
864 &tx,
865 id,
866 1,
867 &normalized_name,
868 memory_type,
869 &resolved_description,
870 &new_memory.body,
871 &serde_json::to_string(&new_memory.metadata)?,
872 None,
873 "create",
874 )?;
875 if let Some(ref emb) = embedding {
876 memories::upsert_vec(
877 &tx,
878 id,
879 &namespace,
880 memory_type,
881 emb,
882 &normalized_name,
883 &snippet,
884 )?;
885 }
886 (id, "created".to_string(), 1)
887 }
888 };
889
890 if chunks_info.len() > 1 && !skip_reindex {
891 storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
892
893 if let Some(chunk_embeddings) = chunk_embeddings_cache.take() {
894 for (i, emb) in chunk_embeddings.iter().enumerate() {
895 storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
896 }
897 }
898 output::emit_progress_i18n(
899 &format!(
900 "Remember stage: persisted chunk vectors; process RSS {} MB",
901 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
902 ),
903 &format!(
904 "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
905 crate::memory_guard::current_process_memory_mb().unwrap_or(0)
906 ),
907 );
908 }
909
910 if !graph.entities.is_empty() || !graph.relationships.is_empty() {
911 for entity in &graph.entities {
912 let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
913 let entity_embedding = &graph_entity_embeddings[entities_persisted];
914 entities::upsert_entity_vec(
915 &tx,
916 entity_id,
917 &namespace,
918 entity.entity_type,
919 entity_embedding,
920 &entity.name,
921 )?;
922 entities::link_memory_entity(&tx, memory_id, entity_id)?;
923 entities::increment_degree(&tx, entity_id)?;
924 if args.max_entity_degree > 0 {
926 let cap = args.max_entity_degree as i64;
927 let degree: i64 = tx.query_row(
928 "SELECT degree FROM entities WHERE id = ?1",
929 rusqlite::params![entity_id],
930 |r| r.get(0),
931 )?;
932 if degree > cap {
933 tracing::warn!(target: "remember",
934 entity = %entity.name,
935 degree = degree,
936 cap = cap,
937 "entity degree cap exceeded"
938 );
939 }
940 }
941 entities_persisted += 1;
942 }
943 let entity_types: std::collections::HashMap<&str, EntityType> = graph
944 .entities
945 .iter()
946 .map(|entity| (entity.name.as_str(), entity.entity_type))
947 .collect();
948
949 for rel in &graph.relationships {
950 let source_entity = NewEntity {
951 name: rel.source.clone(),
952 entity_type: entity_types
953 .get(rel.source.as_str())
954 .copied()
955 .unwrap_or(EntityType::Concept),
956 description: None,
957 };
958 let target_entity = NewEntity {
959 name: rel.target.clone(),
960 entity_type: entity_types
961 .get(rel.target.as_str())
962 .copied()
963 .unwrap_or(EntityType::Concept),
964 description: None,
965 };
966 let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
967 let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
968 let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
969 entities::link_memory_relationship(&tx, memory_id, rel_id)?;
970 relationships_persisted += 1;
971 }
972 }
973 tx.commit()?;
974
975 let urls_persisted = if !extracted_urls.is_empty() {
978 let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
979 .into_iter()
980 .map(|u| storage_urls::MemoryUrl {
981 url: u.url,
982 offset: Some(u.start as i64),
983 })
984 .collect();
985 storage_urls::insert_urls(&conn, memory_id, &url_entries)
986 } else {
987 0
988 };
989
990 conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
991
992 let created_at_epoch = chrono::Utc::now().timestamp();
993 let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
994
995 output::emit_json(&RememberResponse {
996 memory_id,
997 name: normalized_name.clone(),
1001 namespace,
1002 action: action.clone(),
1003 operation: action,
1004 version,
1005 entities_persisted,
1006 relationships_persisted,
1007 relationships_truncated,
1008 chunks_created,
1009 chunks_persisted,
1010 urls_persisted,
1011 extraction_method,
1012 merged_into_memory_id: None,
1013 warnings,
1014 created_at: created_at_epoch,
1015 created_at_iso,
1016 elapsed_ms: inicio.elapsed().as_millis() as u64,
1017 name_was_normalized,
1018 original_name: name_was_normalized.then_some(original_name),
1019 backend_invoked: backend_invoked_passage,
1020 })?;
1021
1022 Ok(())
1023}
1024
1025#[cfg(test)]
1026mod tests {
1027 use super::compute_chunks_persisted;
1028 use crate::output::RememberResponse;
1029
1030 #[test]
1032 fn chunks_persisted_zero_for_zero_chunks() {
1033 assert_eq!(compute_chunks_persisted(0), 0);
1034 }
1035
1036 #[test]
1037 fn chunks_persisted_zero_for_single_chunk_body() {
1038 assert_eq!(compute_chunks_persisted(1), 0);
1041 }
1042
1043 #[test]
1044 fn chunks_persisted_equals_count_for_multi_chunk_body() {
1045 assert_eq!(compute_chunks_persisted(2), 2);
1047 assert_eq!(compute_chunks_persisted(7), 7);
1048 assert_eq!(compute_chunks_persisted(64), 64);
1049 }
1050
1051 #[test]
1052 fn remember_response_serializes_required_fields() {
1053 let resp = RememberResponse {
1054 memory_id: 42,
1055 name: "minha-mem".to_string(),
1056 namespace: "global".to_string(),
1057 action: "created".to_string(),
1058 operation: "created".to_string(),
1059 version: 1,
1060 entities_persisted: 0,
1061 relationships_persisted: 0,
1062 relationships_truncated: false,
1063 chunks_created: 1,
1064 chunks_persisted: 0,
1065 urls_persisted: 0,
1066 extraction_method: None,
1067 merged_into_memory_id: None,
1068 warnings: vec![],
1069 created_at: 1_705_320_000,
1070 created_at_iso: "2024-01-15T12:00:00Z".to_string(),
1071 elapsed_ms: 55,
1072 name_was_normalized: false,
1073 original_name: None,
1074 backend_invoked: None,
1075 };
1076
1077 let json = serde_json::to_value(&resp).expect("serialization failed");
1078 assert_eq!(json["memory_id"], 42);
1079 assert_eq!(json["action"], "created");
1080 assert_eq!(json["operation"], "created");
1081 assert_eq!(json["version"], 1);
1082 assert_eq!(json["elapsed_ms"], 55u64);
1083 assert!(json["warnings"].is_array());
1084 assert!(json["merged_into_memory_id"].is_null());
1085 }
1086
1087 #[test]
1088 fn remember_response_action_e_operation_sao_aliases() {
1089 let resp = RememberResponse {
1090 memory_id: 1,
1091 name: "mem".to_string(),
1092 namespace: "global".to_string(),
1093 action: "updated".to_string(),
1094 operation: "updated".to_string(),
1095 version: 2,
1096 entities_persisted: 3,
1097 relationships_persisted: 1,
1098 relationships_truncated: false,
1099 extraction_method: None,
1100 chunks_created: 2,
1101 chunks_persisted: 2,
1102 urls_persisted: 0,
1103 merged_into_memory_id: None,
1104 warnings: vec![],
1105 created_at: 0,
1106 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1107 elapsed_ms: 0,
1108 name_was_normalized: false,
1109 original_name: None,
1110 backend_invoked: None,
1111 };
1112
1113 let json = serde_json::to_value(&resp).expect("serialization failed");
1114 assert_eq!(
1115 json["action"], json["operation"],
1116 "action e operation devem ser iguais"
1117 );
1118 assert_eq!(json["entities_persisted"], 3);
1119 assert_eq!(json["relationships_persisted"], 1);
1120 assert_eq!(json["chunks_created"], 2);
1121 }
1122
1123 #[test]
1124 fn remember_response_warnings_lista_mensagens() {
1125 let resp = RememberResponse {
1126 memory_id: 5,
1127 name: "dup-mem".to_string(),
1128 namespace: "global".to_string(),
1129 action: "created".to_string(),
1130 operation: "created".to_string(),
1131 version: 1,
1132 entities_persisted: 0,
1133 extraction_method: None,
1134 relationships_persisted: 0,
1135 relationships_truncated: false,
1136 chunks_created: 1,
1137 chunks_persisted: 0,
1138 urls_persisted: 0,
1139 merged_into_memory_id: None,
1140 warnings: vec!["identical body already exists as memory id 3".to_string()],
1141 created_at: 0,
1142 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1143 elapsed_ms: 10,
1144 name_was_normalized: false,
1145 original_name: None,
1146 backend_invoked: None,
1147 };
1148
1149 let json = serde_json::to_value(&resp).expect("serialization failed");
1150 let warnings = json["warnings"]
1151 .as_array()
1152 .expect("warnings deve ser array");
1153 assert_eq!(warnings.len(), 1);
1154 assert!(warnings[0].as_str().unwrap().contains("identical body"));
1155 }
1156
1157 #[test]
1158 fn invalid_name_reserved_prefix_returns_validation_error() {
1159 use crate::errors::AppError;
1160 let nome = "__reservado";
1162 let resultado: Result<(), AppError> = if nome.starts_with("__") {
1163 Err(AppError::Validation(
1164 crate::i18n::validation::reserved_name(),
1165 ))
1166 } else {
1167 Ok(())
1168 };
1169 assert!(resultado.is_err());
1170 if let Err(AppError::Validation(msg)) = resultado {
1171 assert!(!msg.is_empty());
1172 }
1173 }
1174
1175 #[test]
1176 fn name_too_long_returns_validation_error() {
1177 use crate::errors::AppError;
1178 let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1179 let resultado: Result<(), AppError> =
1180 if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1181 Err(AppError::Validation(crate::i18n::validation::name_length(
1182 crate::constants::MAX_MEMORY_NAME_LEN,
1183 )))
1184 } else {
1185 Ok(())
1186 };
1187 assert!(resultado.is_err());
1188 }
1189
1190 #[test]
1191 fn remember_response_merged_into_memory_id_some_serializes_integer() {
1192 let resp = RememberResponse {
1193 memory_id: 10,
1194 name: "mem-mergeada".to_string(),
1195 namespace: "global".to_string(),
1196 action: "updated".to_string(),
1197 operation: "updated".to_string(),
1198 version: 3,
1199 extraction_method: None,
1200 entities_persisted: 0,
1201 relationships_persisted: 0,
1202 relationships_truncated: false,
1203 chunks_created: 1,
1204 chunks_persisted: 0,
1205 urls_persisted: 0,
1206 merged_into_memory_id: Some(7),
1207 warnings: vec![],
1208 created_at: 0,
1209 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1210 elapsed_ms: 0,
1211 name_was_normalized: false,
1212 original_name: None,
1213 backend_invoked: None,
1214 };
1215
1216 let json = serde_json::to_value(&resp).expect("serialization failed");
1217 assert_eq!(json["merged_into_memory_id"], 7);
1218 }
1219
1220 #[test]
1221 fn remember_response_urls_persisted_serializes_field() {
1222 let resp = RememberResponse {
1224 memory_id: 3,
1225 name: "mem-com-urls".to_string(),
1226 namespace: "global".to_string(),
1227 action: "created".to_string(),
1228 operation: "created".to_string(),
1229 version: 1,
1230 entities_persisted: 0,
1231 relationships_persisted: 0,
1232 relationships_truncated: false,
1233 chunks_created: 1,
1234 chunks_persisted: 0,
1235 urls_persisted: 3,
1236 extraction_method: Some("regex-only".to_string()),
1237 merged_into_memory_id: None,
1238 warnings: vec![],
1239 created_at: 0,
1240 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1241 elapsed_ms: 0,
1242 name_was_normalized: false,
1243 original_name: None,
1244 backend_invoked: None,
1245 };
1246 let json = serde_json::to_value(&resp).expect("serialization failed");
1247 assert_eq!(json["urls_persisted"], 3);
1248 }
1249
1250 #[test]
1251 fn empty_name_after_normalization_returns_specific_message() {
1252 use crate::errors::AppError;
1255 let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1256 let normalized = normalized.trim_matches('-').to_string();
1257 let resultado: Result<(), AppError> = if normalized.is_empty() {
1258 Err(AppError::Validation(
1259 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1260 ))
1261 } else {
1262 Ok(())
1263 };
1264 assert!(resultado.is_err());
1265 if let Err(AppError::Validation(msg)) = resultado {
1266 assert!(
1267 msg.contains("empty after normalization"),
1268 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1269 );
1270 }
1271 }
1272
1273 #[test]
1274 fn name_only_underscores_after_normalization_returns_specific_message() {
1275 use crate::errors::AppError;
1277 let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1278 let normalized = normalized.trim_matches('-').to_string();
1279 assert!(
1280 normalized.is_empty(),
1281 "underscores devem normalizar para string vazia"
1282 );
1283 let resultado: Result<(), AppError> = if normalized.is_empty() {
1284 Err(AppError::Validation(
1285 "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1286 ))
1287 } else {
1288 Ok(())
1289 };
1290 assert!(resultado.is_err());
1291 if let Err(AppError::Validation(msg)) = resultado {
1292 assert!(
1293 msg.contains("empty after normalization"),
1294 "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1295 );
1296 }
1297 }
1298
1299 #[test]
1300 fn remember_response_relationships_truncated_serializes_field() {
1301 let resp_false = RememberResponse {
1303 memory_id: 1,
1304 name: "test".to_string(),
1305 namespace: "global".to_string(),
1306 action: "created".to_string(),
1307 operation: "created".to_string(),
1308 version: 1,
1309 entities_persisted: 2,
1310 relationships_persisted: 1,
1311 relationships_truncated: false,
1312 chunks_created: 1,
1313 chunks_persisted: 0,
1314 urls_persisted: 0,
1315 extraction_method: None,
1316 merged_into_memory_id: None,
1317 warnings: vec![],
1318 created_at: 0,
1319 created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1320 elapsed_ms: 0,
1321 name_was_normalized: false,
1322 original_name: None,
1323 backend_invoked: None,
1324 };
1325 let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1326 assert_eq!(json_false["relationships_truncated"], false);
1327
1328 let resp_true = RememberResponse {
1329 relationships_truncated: true,
1330 ..resp_false
1331 };
1332 let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1333 assert_eq!(json_true["relationships_truncated"], true);
1334 }
1335
1336 fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1345 force_merge && raw_body_is_empty && !clear_body
1346 }
1347
1348 #[test]
1349 fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1350 assert!(
1353 should_preserve_body(true, true, false),
1354 "empty body + force-merge + no clear-body should trigger preservation"
1355 );
1356 }
1357
1358 #[test]
1359 fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1360 assert!(
1362 !should_preserve_body(true, true, true),
1363 "--clear-body must bypass preservation"
1364 );
1365 }
1366
1367 #[test]
1368 fn gap08_non_empty_body_force_merge_does_not_preserve() {
1369 assert!(
1371 !should_preserve_body(true, false, false),
1372 "non-empty body must overwrite, not preserve"
1373 );
1374 }
1375
1376 #[test]
1377 fn gap08_empty_body_no_force_merge_does_not_preserve() {
1378 assert!(
1380 !should_preserve_body(false, true, false),
1381 "no --force-merge means no preservation logic applies"
1382 );
1383 }
1384}