Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n  \
35    # Create a memory with inline body\n  \
36    sqlite-graphrag remember --name design-auth --type decision \\\n    \
37    --description \"auth design\" --body \"JWT for stateless auth\"\n\n  \
38    # Create with curated graph via --graph-stdin\n  \
39    echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
40    sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n  \
41    # Enable GLiNER NER extraction with --graph-stdin\n  \
42    echo '{\"body\":\"Alice from Microsoft...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
43    sqlite-graphrag remember --name ner-test --type note --description \"test\" \\\n    \
44    --graph-stdin --enable-ner --gliner-variant int8\n\n  \
45    # Idempotent upsert with --force-merge\n  \
46    sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n    \
47    --body \"new content\" --force-merge")]
48pub struct RememberArgs {
49    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
50    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
51    #[arg(long)]
52    pub name: String,
53    #[arg(
54        long,
55        value_enum,
56        long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
57    )]
58    pub r#type: Option<MemoryType>,
59    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
60    /// Required when creating a new memory. Optional with --force-merge: if omitted the existing description is inherited.
61    #[arg(long)]
62    pub description: Option<String>,
63    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
64    /// Maximum 512000 bytes; rejected if empty without an external graph.
65    #[arg(
66        long,
67        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
68        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
69    )]
70    pub body: Option<String>,
71    #[arg(
72        long,
73        help = "Read body from a file instead of --body",
74        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
75    )]
76    pub body_file: Option<std::path::PathBuf>,
77    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
78    /// Mutually exclusive with --body, --body-file, --graph-stdin.
79    #[arg(
80        long,
81        conflicts_with_all = ["body", "body_file", "graph_stdin"]
82    )]
83    pub body_stdin: bool,
84    #[arg(
85        long,
86        help = "JSON file containing entities to associate with this memory"
87    )]
88    pub entities_file: Option<std::path::PathBuf>,
89    #[arg(
90        long,
91        help = "JSON file containing relationships to associate with this memory"
92    )]
93    pub relationships_file: Option<std::path::PathBuf>,
94    #[arg(
95        long,
96        help = "Read graph JSON (body + entities + relationships) from stdin",
97        conflicts_with_all = [
98            "body",
99            "body_file",
100            "body_stdin",
101            "entities_file",
102            "relationships_file"
103        ]
104    )]
105    pub graph_stdin: bool,
106    #[arg(
107        long,
108        help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
109    )]
110    pub namespace: Option<String>,
111    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
112    #[arg(long)]
113    pub metadata: Option<String>,
114    #[arg(long, help = "JSON file containing metadata key-value pairs")]
115    pub metadata_file: Option<std::path::PathBuf>,
116    #[arg(long)]
117    pub force_merge: bool,
118    #[arg(
119        long,
120        value_name = "EPOCH_OR_RFC3339",
121        value_parser = crate::parsers::parse_expected_updated_at,
122        long_help = "Optimistic lock: reject if updated_at does not match. \
123Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
124    )]
125    pub expected_updated_at: Option<i64>,
126    #[arg(
127        long,
128        env = "SQLITE_GRAPHRAG_ENABLE_NER",
129        value_parser = crate::parsers::parse_bool_flexible,
130        action = clap::ArgAction::Set,
131        num_args = 0..=1,
132        default_missing_value = "true",
133        default_value = "false",
134        help = "Enable automatic GLiNER NER entity/relationship extraction from body"
135    )]
136    pub enable_ner: bool,
137    #[arg(
138        long,
139        env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
140        default_value = "fp32",
141        help = "GLiNER model variant: fp32 (1.1GB, best quality), fp16 (580MB), int8 (349MB, fastest but may miss entities on short texts), q4, q4f16"
142    )]
143    pub gliner_variant: String,
144    #[arg(long, hide = true)]
145    pub skip_extraction: bool,
146    /// Explicitly clear the body content (set to empty string). Required to distinguish
147    /// intentional body clearing from accidental omission during --force-merge.
148    /// Without this flag, an empty body passed to --force-merge preserves the existing body.
149    #[arg(
150        long,
151        default_value_t = false,
152        help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
153    )]
154    pub clear_body: bool,
155    /// Validate input and report planned actions without persisting.
156    #[arg(
157        long,
158        default_value_t = false,
159        help = "Validate input and report planned actions without persisting"
160    )]
161    pub dry_run: bool,
162    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
163    #[arg(long)]
164    pub session_id: Option<String>,
165    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
166    pub format: JsonOutputFormat,
167    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
168    pub json: bool,
169    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
170    pub db: Option<String>,
171    /// Maximum process RSS in MiB; abort if exceeded during embedding.
172    #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
173          help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
174    pub max_rss_mb: u64,
175    /// Emit a warning (but do not reject) when persisting an entity whose degree would
176    /// exceed this value after the upsert. Default 50. Set 0 to disable the check.
177    #[arg(long, default_value_t = 50, value_name = "N")]
178    pub max_entity_degree: u32,
179}
180
181#[derive(Deserialize, Default)]
182#[serde(deny_unknown_fields)]
183struct GraphInput {
184    #[serde(default)]
185    body: Option<String>,
186    #[serde(default)]
187    entities: Vec<NewEntity>,
188    #[serde(default)]
189    relationships: Vec<NewRelationship>,
190}
191
192fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
193    for rel in &mut graph.relationships {
194        rel.relation = crate::parsers::normalize_relation(&rel.relation);
195        if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
196            return Err(AppError::Validation(format!(
197                "{e} for relationship '{}' -> '{}'",
198                rel.source, rel.target
199            )));
200        }
201        crate::parsers::warn_if_non_canonical(&rel.relation);
202        if !(0.0..=1.0).contains(&rel.strength) {
203            return Err(AppError::Validation(format!(
204                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
205                rel.strength, rel.source, rel.target
206            )));
207        }
208    }
209
210    Ok(())
211}
212
213pub fn run(args: RememberArgs) -> Result<(), AppError> {
214    use crate::constants::*;
215
216    let inicio = std::time::Instant::now();
217    let _ = args.format;
218    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
219
220    // Capture the original `--name` before normalization so the JSON response can
221    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
222    // an owned String because `args.name` is moved into the response below.
223    let original_name = args.name.clone();
224
225    // Auto-normalize to kebab-case before validation (P2-H).
226    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
227    // after truncation by a long filename ending in a hyphen.
228    let normalized_name = {
229        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
230        let trimmed = lower.trim_matches('-').to_string();
231        if trimmed != args.name {
232            tracing::warn!(
233                original = %args.name,
234                normalized = %trimmed,
235                "name auto-normalized to kebab-case"
236            );
237        }
238        trimmed
239    };
240    let name_was_normalized = normalized_name != original_name;
241
242    if normalized_name.is_empty() {
243        return Err(AppError::Validation(
244            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
245        ));
246    }
247    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
248        return Err(AppError::LimitExceeded(
249            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
250        ));
251    }
252
253    if normalized_name.starts_with("__") {
254        return Err(AppError::Validation(
255            crate::i18n::validation::reserved_name(),
256        ));
257    }
258
259    {
260        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
261            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
262        if !slug_re.is_match(&normalized_name) {
263            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
264                &normalized_name,
265            )));
266        }
267    }
268
269    if let Some(ref desc) = args.description {
270        if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
271            return Err(AppError::Validation(
272                crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
273            ));
274        }
275    }
276
277    let mut raw_body = if let Some(b) = args.body {
278        b
279    } else if let Some(path) = args.body_file {
280        std::fs::read_to_string(&path).map_err(AppError::Io)?
281    } else if args.body_stdin || args.graph_stdin {
282        crate::stdin_helper::read_stdin_with_timeout(60)?
283    } else {
284        String::new()
285    };
286
287    let mut entities_provided_externally =
288        args.entities_file.is_some() || args.relationships_file.is_some();
289
290    let mut graph = GraphInput::default();
291    if let Some(path) = args.entities_file {
292        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
293        graph.entities = serde_json::from_str(&content)?;
294    }
295    if let Some(path) = args.relationships_file {
296        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
297        graph.relationships = serde_json::from_str(&content)?;
298    }
299    if args.graph_stdin {
300        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
301            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
302        })?;
303        raw_body = graph.body.take().unwrap_or_default();
304    }
305    if args.graph_stdin && !graph.entities.is_empty() {
306        entities_provided_externally = true;
307    }
308
309    if graph.entities.len() > max_entities_per_memory() {
310        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
311            max_entities_per_memory(),
312        )));
313    }
314    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
315        return Err(AppError::LimitExceeded(
316            errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
317        ));
318    }
319    normalize_and_validate_graph_input(&mut graph)?;
320
321    if raw_body.len() > MAX_MEMORY_BODY_LEN {
322        return Err(AppError::LimitExceeded(
323            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
324        ));
325    }
326
327    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
328    // Without this check, empty embeddings would be persisted, breaking recall semantics.
329    // GAP-08: skip this guard when --force-merge without --clear-body; the existing body
330    // will be preserved from the database, so the effective body will not be empty.
331    let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
332    if !entities_provided_externally
333        && graph.entities.is_empty()
334        && raw_body.trim().is_empty()
335        && !body_will_be_preserved
336        && !args.clear_body
337    {
338        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
339    }
340
341    let metadata: serde_json::Value = if let Some(m) = args.metadata {
342        serde_json::from_str(&m)?
343    } else if let Some(path) = args.metadata_file {
344        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
345        serde_json::from_str(&content)?
346    } else {
347        serde_json::json!({})
348    };
349
350    let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
351    let mut snippet: String = raw_body.chars().take(200).collect();
352
353    let paths = AppPaths::resolve(args.db.as_deref())?;
354    paths.ensure_dirs()?;
355
356    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
357    let mut extraction_method: Option<String> = None;
358    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
359    let mut relationships_truncated = false;
360    if args.enable_ner && args.skip_extraction {
361        tracing::warn!(
362            "--enable-ner and --skip-extraction are contradictory; --enable-ner takes precedence"
363        );
364    }
365    if args.skip_extraction && !args.enable_ner {
366        tracing::warn!("--skip-extraction is deprecated and has no effect (NER is disabled by default since v1.0.45); remove this flag");
367    }
368    let gliner_variant: crate::extraction::GlinerVariant =
369        args.gliner_variant.parse().unwrap_or_else(|e| {
370            tracing::warn!("invalid --gliner-variant: {e}; using fp32");
371            crate::extraction::GlinerVariant::Fp32
372        });
373    if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
374        match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
375            Ok(extracted) => {
376                extraction_method = Some(extracted.extraction_method.clone());
377                extracted_urls = extracted.urls;
378                graph.entities = extracted.entities;
379                graph.relationships = extracted.relationships;
380                relationships_truncated = extracted.relationships_truncated;
381
382                if graph.entities.len() > max_entities_per_memory() {
383                    graph.entities.truncate(max_entities_per_memory());
384                }
385                if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
386                    relationships_truncated = true;
387                    graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
388                }
389                normalize_and_validate_graph_input(&mut graph)?;
390            }
391            Err(e) => {
392                tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
393                extraction_method = Some("none:extraction-failed".to_string());
394            }
395        }
396    }
397
398    let mut conn = open_rw(&paths.db)?;
399    ensure_schema(&mut conn)?;
400
401    // --dry-run: emit planned action without any DB writes and return.
402    if args.dry_run {
403        let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
404        let planned_action = if existing.is_some() && args.force_merge {
405            "would_update"
406        } else {
407            "would_create"
408        };
409        output::emit_json(&serde_json::json!({
410            "dry_run": true,
411            "name": normalized_name,
412            "namespace": namespace,
413            "planned_action": planned_action,
414        }))?;
415        return Ok(());
416    }
417
418    {
419        use crate::constants::MAX_NAMESPACES_ACTIVE;
420        let active_count: u32 = conn.query_row(
421            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
422            [],
423            |r| r.get::<_, i64>(0).map(|v| v as u32),
424        )?;
425        let ns_exists: bool = conn.query_row(
426            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
427            rusqlite::params![namespace],
428            |r| r.get::<_, i64>(0).map(|v| v > 0),
429        )?;
430        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
431            return Err(AppError::NamespaceError(format!(
432                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
433            )));
434        }
435    }
436
437    // M7: detect soft-deleted memory before the standard duplicate check.
438    if let Some((sd_id, true)) =
439        memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
440    {
441        if args.force_merge {
442            memories::clear_deleted_at(&conn, sd_id)?;
443        } else {
444            return Err(AppError::Duplicate(
445                errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
446            ));
447        }
448    }
449
450    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
451    if existing_memory.is_some() && !args.force_merge {
452        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
453            &normalized_name,
454            &namespace,
455        )));
456    }
457
458    // GAP-10: resolve type and description.
459    // For CREATE path (new memory): both are required.
460    // For UPDATE path (--force-merge on existing memory): inherit from existing row when omitted.
461    let (resolved_type, resolved_description) = if existing_memory.is_none() {
462        // CREATE path — both fields are mandatory.
463        let t = args.r#type.ok_or_else(|| {
464            AppError::Validation(
465                "--type and --description are required when creating a new memory".to_string(),
466            )
467        })?;
468        let d = args.description.clone().ok_or_else(|| {
469            AppError::Validation(
470                "--type and --description are required when creating a new memory".to_string(),
471            )
472        })?;
473        (t.as_str().to_string(), d)
474    } else {
475        // UPDATE path (--force-merge) — inherit missing fields from stored row.
476        let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
477            .ok_or_else(|| {
478                AppError::NotFound(format!(
479                    "memory '{normalized_name}' not found in namespace '{namespace}'"
480                ))
481            })?;
482        let t = args
483            .r#type
484            .map(|v| v.as_str().to_string())
485            .unwrap_or_else(|| existing_row.memory_type.clone());
486        let d = args
487            .description
488            .clone()
489            .unwrap_or_else(|| existing_row.description.clone());
490        (t, d)
491    };
492
493    // GAP-08/GAP-09: protect existing body from accidental destruction during --force-merge.
494    // When the caller omits a body (or passes an empty one) without --clear-body, silently
495    // preserve the existing body from the database.  This prevents a common scripting mistake
496    // where a cron job updates metadata fields and inadvertently wipes the stored content.
497    if body_will_be_preserved {
498        if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
499            if !existing_row.body.is_empty() {
500                tracing::debug!(
501                    name = %normalized_name,
502                    "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
503                );
504                raw_body = existing_row.body;
505                body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
506                snippet = raw_body.chars().take(200).collect();
507            }
508        }
509    }
510
511    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
512
513    output::emit_progress_i18n(
514        &format!(
515            "Remember stage: validated input; available memory {} MB",
516            crate::memory_guard::available_memory_mb()
517        ),
518        &format!(
519            "Stage remember: input validated; available memory {} MB",
520            crate::memory_guard::available_memory_mb()
521        ),
522    );
523
524    let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
525    let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
526    let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
527    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
528    let chunks_created = chunks_info.len();
529    // For single-chunk bodies the memory row itself stores the content and no
530    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
531    // bodies every chunk is persisted via `insert_chunk_slices`.
532    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
533
534    output::emit_progress_i18n(
535        &format!(
536            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
537            chunks_created,
538            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
539        ),
540        &format!(
541            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
542            chunks_created,
543            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
544        ),
545    );
546
547    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
548        return Err(AppError::LimitExceeded(format!(
549            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
550            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
551        )));
552    }
553
554    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
555    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
556
557    let embedding = if chunks_info.len() == 1 {
558        crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
559    } else {
560        let chunk_texts: Vec<&str> = chunks_info
561            .iter()
562            .map(|c| chunking::chunk_text(&raw_body, c))
563            .collect();
564        output::emit_progress_i18n(
565            &format!(
566                "Embedding {} chunks serially to keep memory bounded...",
567                chunks_info.len()
568            ),
569            &format!(
570                "Embedding {} chunks serially to keep memory bounded...",
571                chunks_info.len()
572            ),
573        );
574        let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
575        for chunk_text in &chunk_texts {
576            if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
577                if rss > args.max_rss_mb {
578                    tracing::error!(
579                        rss_mb = rss,
580                        max_rss_mb = args.max_rss_mb,
581                        "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
582                    );
583                    return Err(AppError::LowMemory {
584                        available_mb: crate::memory_guard::available_memory_mb(),
585                        required_mb: args.max_rss_mb,
586                    });
587                }
588            }
589            chunk_embeddings.push(crate::daemon::embed_passage_or_local(
590                &paths.models,
591                chunk_text,
592            )?);
593        }
594        output::emit_progress_i18n(
595            &format!(
596                "Remember stage: chunk embeddings complete; process RSS {} MB",
597                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
598            ),
599            &format!(
600                "Stage remember: chunk embeddings completed; process RSS {} MB",
601                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
602            ),
603        );
604        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
605        chunk_embeddings_cache = Some(chunk_embeddings);
606        aggregated
607    };
608    let body_for_storage = raw_body;
609
610    let memory_type = resolved_type.as_str();
611    let new_memory = NewMemory {
612        namespace: namespace.clone(),
613        name: normalized_name.clone(),
614        memory_type: memory_type.to_string(),
615        description: resolved_description.clone(),
616        body: body_for_storage,
617        body_hash: body_hash.clone(),
618        session_id: args.session_id.clone(),
619        source: "agent".to_string(),
620        metadata,
621    };
622
623    let mut warnings = Vec::with_capacity(4);
624    let mut entities_persisted = 0usize;
625    let mut relationships_persisted = 0usize;
626
627    let graph_entity_embeddings = graph
628        .entities
629        .iter()
630        .map(|entity| {
631            let entity_text = match &entity.description {
632                Some(desc) => format!("{} {}", entity.name, desc),
633                None => entity.name.clone(),
634            };
635            crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
636        })
637        .collect::<Result<Vec<_>, _>>()?;
638
639    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
640
641    let (memory_id, action, version) = match existing_memory {
642        Some((existing_id, _updated_at, _current_version)) => {
643            if let Some(hash_id) = duplicate_hash_id {
644                if hash_id != existing_id {
645                    warnings.push(format!(
646                        "identical body already exists as memory id {hash_id}"
647                    ));
648                }
649            }
650
651            // C1 fix: capture old values for FTS5 sync before update
652            let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
653                .query_row(
654                    "SELECT name, description, body FROM memories WHERE id = ?1",
655                    rusqlite::params![existing_id],
656                    |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
657                )?;
658
659            storage_chunks::delete_chunks(&tx, existing_id)?;
660
661            let next_v = versions::next_version(&tx, existing_id)?;
662            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
663
664            // C1 fix: sync FTS5 external-content index after update
665            // (trg_fts_au trigger is absent by design due to sqlite-vec conflict)
666            memories::sync_fts_after_update(
667                &tx,
668                existing_id,
669                &old_fts_name,
670                &old_fts_desc,
671                &old_fts_body,
672                &normalized_name,
673                &resolved_description,
674                &new_memory.body,
675            )?;
676
677            versions::insert_version(
678                &tx,
679                existing_id,
680                next_v,
681                &normalized_name,
682                memory_type,
683                &resolved_description,
684                &new_memory.body,
685                &serde_json::to_string(&new_memory.metadata)?,
686                None,
687                "edit",
688            )?;
689            memories::upsert_vec(
690                &tx,
691                existing_id,
692                &namespace,
693                memory_type,
694                &embedding,
695                &normalized_name,
696                &snippet,
697            )?;
698            (existing_id, "updated".to_string(), next_v)
699        }
700        None => {
701            if let Some(hash_id) = duplicate_hash_id {
702                warnings.push(format!(
703                    "identical body already exists as memory id {hash_id}"
704                ));
705            }
706            let id = memories::insert(&tx, &new_memory)?;
707            versions::insert_version(
708                &tx,
709                id,
710                1,
711                &normalized_name,
712                memory_type,
713                &resolved_description,
714                &new_memory.body,
715                &serde_json::to_string(&new_memory.metadata)?,
716                None,
717                "create",
718            )?;
719            memories::upsert_vec(
720                &tx,
721                id,
722                &namespace,
723                memory_type,
724                &embedding,
725                &normalized_name,
726                &snippet,
727            )?;
728            (id, "created".to_string(), 1)
729        }
730    };
731
732    if chunks_info.len() > 1 {
733        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
734
735        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
736            AppError::Internal(anyhow::anyhow!(
737                "chunk embeddings cache missing in multi-chunk remember path"
738            ))
739        })?;
740
741        for (i, emb) in chunk_embeddings.iter().enumerate() {
742            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
743        }
744        output::emit_progress_i18n(
745            &format!(
746                "Remember stage: persisted chunk vectors; process RSS {} MB",
747                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
748            ),
749            &format!(
750                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
751                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
752            ),
753        );
754    }
755
756    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
757        for entity in &graph.entities {
758            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
759            let entity_embedding = &graph_entity_embeddings[entities_persisted];
760            entities::upsert_entity_vec(
761                &tx,
762                entity_id,
763                &namespace,
764                entity.entity_type,
765                entity_embedding,
766                &entity.name,
767            )?;
768            entities::link_memory_entity(&tx, memory_id, entity_id)?;
769            entities::increment_degree(&tx, entity_id)?;
770            // GAP-17: warn when entity degree exceeds the configured cap.
771            if args.max_entity_degree > 0 {
772                let cap = args.max_entity_degree as i64;
773                let degree: i64 = tx.query_row(
774                    "SELECT degree FROM entities WHERE id = ?1",
775                    rusqlite::params![entity_id],
776                    |r| r.get(0),
777                )?;
778                if degree > cap {
779                    tracing::warn!(
780                        entity = %entity.name,
781                        degree = degree,
782                        cap = cap,
783                        "entity degree cap exceeded"
784                    );
785                }
786            }
787            entities_persisted += 1;
788        }
789        let entity_types: std::collections::HashMap<&str, EntityType> = graph
790            .entities
791            .iter()
792            .map(|entity| (entity.name.as_str(), entity.entity_type))
793            .collect();
794
795        for rel in &graph.relationships {
796            let source_entity = NewEntity {
797                name: rel.source.clone(),
798                entity_type: entity_types
799                    .get(rel.source.as_str())
800                    .copied()
801                    .unwrap_or(EntityType::Concept),
802                description: None,
803            };
804            let target_entity = NewEntity {
805                name: rel.target.clone(),
806                entity_type: entity_types
807                    .get(rel.target.as_str())
808                    .copied()
809                    .unwrap_or(EntityType::Concept),
810                description: None,
811            };
812            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
813            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
814            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
815            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
816            relationships_persisted += 1;
817        }
818    }
819    tx.commit()?;
820
821    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
822    // Failures do not propagate — non-critical path with graceful degradation.
823    let urls_persisted = if !extracted_urls.is_empty() {
824        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
825            .into_iter()
826            .map(|u| storage_urls::MemoryUrl {
827                url: u.url,
828                offset: Some(u.offset as i64),
829            })
830            .collect();
831        storage_urls::insert_urls(&conn, memory_id, &url_entries)
832    } else {
833        0
834    };
835
836    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
837
838    let created_at_epoch = chrono::Utc::now().timestamp();
839    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
840
841    output::emit_json(&RememberResponse {
842        memory_id,
843        // Persist the normalized (kebab-case) slug as `name` since that is the
844        // storage key. The original input is exposed via `original_name` only
845        // when normalization actually changed something (B_4 in v1.0.32).
846        name: normalized_name.clone(),
847        namespace,
848        action: action.clone(),
849        operation: action,
850        version,
851        entities_persisted,
852        relationships_persisted,
853        relationships_truncated,
854        chunks_created,
855        chunks_persisted,
856        urls_persisted,
857        extraction_method,
858        merged_into_memory_id: None,
859        warnings,
860        created_at: created_at_epoch,
861        created_at_iso,
862        elapsed_ms: inicio.elapsed().as_millis() as u64,
863        name_was_normalized,
864        original_name: name_was_normalized.then_some(original_name),
865    })?;
866
867    Ok(())
868}
869
870#[cfg(test)]
871mod tests {
872    use super::compute_chunks_persisted;
873    use crate::output::RememberResponse;
874
875    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
876    #[test]
877    fn chunks_persisted_zero_for_zero_chunks() {
878        assert_eq!(compute_chunks_persisted(0), 0);
879    }
880
881    #[test]
882    fn chunks_persisted_zero_for_single_chunk_body() {
883        // Single-chunk bodies live in the memories row itself; no row is
884        // appended to memory_chunks. This is the documented contract.
885        assert_eq!(compute_chunks_persisted(1), 0);
886    }
887
888    #[test]
889    fn chunks_persisted_equals_count_for_multi_chunk_body() {
890        // Every chunk above the first triggers a row in memory_chunks.
891        assert_eq!(compute_chunks_persisted(2), 2);
892        assert_eq!(compute_chunks_persisted(7), 7);
893        assert_eq!(compute_chunks_persisted(64), 64);
894    }
895
896    #[test]
897    fn remember_response_serializes_required_fields() {
898        let resp = RememberResponse {
899            memory_id: 42,
900            name: "minha-mem".to_string(),
901            namespace: "global".to_string(),
902            action: "created".to_string(),
903            operation: "created".to_string(),
904            version: 1,
905            entities_persisted: 0,
906            relationships_persisted: 0,
907            relationships_truncated: false,
908            chunks_created: 1,
909            chunks_persisted: 0,
910            urls_persisted: 0,
911            extraction_method: None,
912            merged_into_memory_id: None,
913            warnings: vec![],
914            created_at: 1_705_320_000,
915            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
916            elapsed_ms: 55,
917            name_was_normalized: false,
918            original_name: None,
919        };
920
921        let json = serde_json::to_value(&resp).expect("serialization failed");
922        assert_eq!(json["memory_id"], 42);
923        assert_eq!(json["action"], "created");
924        assert_eq!(json["operation"], "created");
925        assert_eq!(json["version"], 1);
926        assert_eq!(json["elapsed_ms"], 55u64);
927        assert!(json["warnings"].is_array());
928        assert!(json["merged_into_memory_id"].is_null());
929    }
930
931    #[test]
932    fn remember_response_action_e_operation_sao_aliases() {
933        let resp = RememberResponse {
934            memory_id: 1,
935            name: "mem".to_string(),
936            namespace: "global".to_string(),
937            action: "updated".to_string(),
938            operation: "updated".to_string(),
939            version: 2,
940            entities_persisted: 3,
941            relationships_persisted: 1,
942            relationships_truncated: false,
943            extraction_method: None,
944            chunks_created: 2,
945            chunks_persisted: 2,
946            urls_persisted: 0,
947            merged_into_memory_id: None,
948            warnings: vec![],
949            created_at: 0,
950            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
951            elapsed_ms: 0,
952            name_was_normalized: false,
953            original_name: None,
954        };
955
956        let json = serde_json::to_value(&resp).expect("serialization failed");
957        assert_eq!(
958            json["action"], json["operation"],
959            "action e operation devem ser iguais"
960        );
961        assert_eq!(json["entities_persisted"], 3);
962        assert_eq!(json["relationships_persisted"], 1);
963        assert_eq!(json["chunks_created"], 2);
964    }
965
966    #[test]
967    fn remember_response_warnings_lista_mensagens() {
968        let resp = RememberResponse {
969            memory_id: 5,
970            name: "dup-mem".to_string(),
971            namespace: "global".to_string(),
972            action: "created".to_string(),
973            operation: "created".to_string(),
974            version: 1,
975            entities_persisted: 0,
976            extraction_method: None,
977            relationships_persisted: 0,
978            relationships_truncated: false,
979            chunks_created: 1,
980            chunks_persisted: 0,
981            urls_persisted: 0,
982            merged_into_memory_id: None,
983            warnings: vec!["identical body already exists as memory id 3".to_string()],
984            created_at: 0,
985            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
986            elapsed_ms: 10,
987            name_was_normalized: false,
988            original_name: None,
989        };
990
991        let json = serde_json::to_value(&resp).expect("serialization failed");
992        let warnings = json["warnings"]
993            .as_array()
994            .expect("warnings deve ser array");
995        assert_eq!(warnings.len(), 1);
996        assert!(warnings[0].as_str().unwrap().contains("identical body"));
997    }
998
999    #[test]
1000    fn invalid_name_reserved_prefix_returns_validation_error() {
1001        use crate::errors::AppError;
1002        // Validates the rejection logic for names with the "__" prefix directly
1003        let nome = "__reservado";
1004        let resultado: Result<(), AppError> = if nome.starts_with("__") {
1005            Err(AppError::Validation(
1006                crate::i18n::validation::reserved_name(),
1007            ))
1008        } else {
1009            Ok(())
1010        };
1011        assert!(resultado.is_err());
1012        if let Err(AppError::Validation(msg)) = resultado {
1013            assert!(!msg.is_empty());
1014        }
1015    }
1016
1017    #[test]
1018    fn name_too_long_returns_validation_error() {
1019        use crate::errors::AppError;
1020        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1021        let resultado: Result<(), AppError> =
1022            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1023                Err(AppError::Validation(crate::i18n::validation::name_length(
1024                    crate::constants::MAX_MEMORY_NAME_LEN,
1025                )))
1026            } else {
1027                Ok(())
1028            };
1029        assert!(resultado.is_err());
1030    }
1031
1032    #[test]
1033    fn remember_response_merged_into_memory_id_some_serializes_integer() {
1034        let resp = RememberResponse {
1035            memory_id: 10,
1036            name: "mem-mergeada".to_string(),
1037            namespace: "global".to_string(),
1038            action: "updated".to_string(),
1039            operation: "updated".to_string(),
1040            version: 3,
1041            extraction_method: None,
1042            entities_persisted: 0,
1043            relationships_persisted: 0,
1044            relationships_truncated: false,
1045            chunks_created: 1,
1046            chunks_persisted: 0,
1047            urls_persisted: 0,
1048            merged_into_memory_id: Some(7),
1049            warnings: vec![],
1050            created_at: 0,
1051            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1052            elapsed_ms: 0,
1053            name_was_normalized: false,
1054            original_name: None,
1055        };
1056
1057        let json = serde_json::to_value(&resp).expect("serialization failed");
1058        assert_eq!(json["merged_into_memory_id"], 7);
1059    }
1060
1061    #[test]
1062    fn remember_response_urls_persisted_serializes_field() {
1063        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
1064        let resp = RememberResponse {
1065            memory_id: 3,
1066            name: "mem-com-urls".to_string(),
1067            namespace: "global".to_string(),
1068            action: "created".to_string(),
1069            operation: "created".to_string(),
1070            version: 1,
1071            entities_persisted: 0,
1072            relationships_persisted: 0,
1073            relationships_truncated: false,
1074            chunks_created: 1,
1075            chunks_persisted: 0,
1076            urls_persisted: 3,
1077            extraction_method: Some("regex-only".to_string()),
1078            merged_into_memory_id: None,
1079            warnings: vec![],
1080            created_at: 0,
1081            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1082            elapsed_ms: 0,
1083            name_was_normalized: false,
1084            original_name: None,
1085        };
1086        let json = serde_json::to_value(&resp).expect("serialization failed");
1087        assert_eq!(json["urls_persisted"], 3);
1088    }
1089
1090    #[test]
1091    fn empty_name_after_normalization_returns_specific_message() {
1092        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
1093        // must produce a distinct error message, not the "too long" message.
1094        use crate::errors::AppError;
1095        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1096        let normalized = normalized.trim_matches('-').to_string();
1097        let resultado: Result<(), AppError> = if normalized.is_empty() {
1098            Err(AppError::Validation(
1099                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1100            ))
1101        } else {
1102            Ok(())
1103        };
1104        assert!(resultado.is_err());
1105        if let Err(AppError::Validation(msg)) = resultado {
1106            assert!(
1107                msg.contains("empty after normalization"),
1108                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1109            );
1110        }
1111    }
1112
1113    #[test]
1114    fn name_only_underscores_after_normalization_returns_specific_message() {
1115        // P0-4 regression: name consisting only of underscores normalizes to empty string.
1116        use crate::errors::AppError;
1117        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1118        let normalized = normalized.trim_matches('-').to_string();
1119        assert!(
1120            normalized.is_empty(),
1121            "underscores devem normalizar para string vazia"
1122        );
1123        let resultado: Result<(), AppError> = if normalized.is_empty() {
1124            Err(AppError::Validation(
1125                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1126            ))
1127        } else {
1128            Ok(())
1129        };
1130        assert!(resultado.is_err());
1131        if let Err(AppError::Validation(msg)) = resultado {
1132            assert!(
1133                msg.contains("empty after normalization"),
1134                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1135            );
1136        }
1137    }
1138
1139    #[test]
1140    fn remember_response_relationships_truncated_serializes_field() {
1141        // P1-D: garante que relationships_truncated aparece no JSON como bool.
1142        let resp_false = RememberResponse {
1143            memory_id: 1,
1144            name: "test".to_string(),
1145            namespace: "global".to_string(),
1146            action: "created".to_string(),
1147            operation: "created".to_string(),
1148            version: 1,
1149            entities_persisted: 2,
1150            relationships_persisted: 1,
1151            relationships_truncated: false,
1152            chunks_created: 1,
1153            chunks_persisted: 0,
1154            urls_persisted: 0,
1155            extraction_method: None,
1156            merged_into_memory_id: None,
1157            warnings: vec![],
1158            created_at: 0,
1159            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1160            elapsed_ms: 0,
1161            name_was_normalized: false,
1162            original_name: None,
1163        };
1164        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1165        assert_eq!(json_false["relationships_truncated"], false);
1166
1167        let resp_true = RememberResponse {
1168            relationships_truncated: true,
1169            ..resp_false
1170        };
1171        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1172        assert_eq!(json_true["relationships_truncated"], true);
1173    }
1174
1175    // GAP-08: body-preservation predicate tests.
1176    // Verifies the decision logic that determines whether an existing body should
1177    // be kept instead of overwritten with an empty incoming body during --force-merge.
1178
1179    /// Returns `true` when the existing body should be preserved.
1180    ///
1181    /// Mirrors the `body_will_be_preserved` expression in `run()` so the logic
1182    /// is testable without a real database connection.
1183    fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1184        force_merge && raw_body_is_empty && !clear_body
1185    }
1186
1187    #[test]
1188    fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1189        // Caller passes no body with --force-merge but without --clear-body.
1190        // The existing body in the DB must be kept.
1191        assert!(
1192            should_preserve_body(true, true, false),
1193            "empty body + force-merge + no clear-body should trigger preservation"
1194        );
1195    }
1196
1197    #[test]
1198    fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1199        // Caller explicitly passes --clear-body; intentional wipe is honoured.
1200        assert!(
1201            !should_preserve_body(true, true, true),
1202            "--clear-body must bypass preservation"
1203        );
1204    }
1205
1206    #[test]
1207    fn gap08_non_empty_body_force_merge_does_not_preserve() {
1208        // Caller provides a real body; it must overwrite the existing one.
1209        assert!(
1210            !should_preserve_body(true, false, false),
1211            "non-empty body must overwrite, not preserve"
1212        );
1213    }
1214
1215    #[test]
1216    fn gap08_empty_body_no_force_merge_does_not_preserve() {
1217        // Without --force-merge the path is a fresh create; no preservation needed.
1218        assert!(
1219            !should_preserve_body(false, true, false),
1220            "no --force-merge means no preservation logic applies"
1221        );
1222    }
1223}