Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n  \
35    # Create a memory with inline body\n  \
36    sqlite-graphrag remember --name design-auth --type decision \\\n    \
37    --description \"auth design\" --body \"JWT for stateless auth\"\n\n  \
38    # Create with curated graph via --graph-stdin\n  \
39    echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
40    sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n  \
41    # Enable automatic URL extraction with --graph-stdin (URL-regex only since v1.0.79)\n  \
42    echo '{\"body\":\"See https://docs.rs ...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
43    sqlite-graphrag remember --name url-test --type note --description \"test\" \\\n    \
44    --graph-stdin --enable-ner\n\n  \
45    # Idempotent upsert with --force-merge\n  \
46    sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n    \
47    --body \"new content\" --force-merge\n\n\
48NOTE:\n  \
49    remember does NOT accept positional arguments.\n  \
50    Use --body \"text\" for inline content\n  \
51    Use --body-file path for file content\n  \
52    Use --body-stdin for piped content\n  \
53    Use --graph-stdin for JSON with entities and relationships\n\n\
54ENTITY TYPES (for --graph-stdin entities, NOT memory --type):\n  \
55    concept, tool, person, file, project, decision, incident,\n  \
56    organization, location, date, dashboard, issue_tracker, memory\n  \
57    WARNING: reference, skill, document, note, user, feedback are\n  \
58    MEMORY types only — NOT valid for entities.\n  \
59    Mapping: reference→concept, document→file, user→person")]
60pub struct RememberArgs {
61    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
62    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
63    #[arg(long)]
64    pub name: String,
65    #[arg(
66        long,
67        value_enum,
68        long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
69    )]
70    pub r#type: Option<MemoryType>,
71    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
72    /// Required when creating a new memory. Optional with --force-merge: if omitted the existing description is inherited.
73    #[arg(long)]
74    pub description: Option<String>,
75    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
76    /// Maximum 512000 bytes; rejected if empty without an external graph.
77    #[arg(
78        long,
79        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
80        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
81    )]
82    pub body: Option<String>,
83    #[arg(
84        long,
85        help = "Read body from a file instead of --body",
86        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
87    )]
88    pub body_file: Option<std::path::PathBuf>,
89    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
90    /// Mutually exclusive with --body, --body-file, --graph-stdin.
91    #[arg(
92        long,
93        conflicts_with_all = ["body", "body_file", "graph_stdin"]
94    )]
95    pub body_stdin: bool,
96    #[arg(
97        long,
98        help = "JSON file containing entities to associate with this memory"
99    )]
100    pub entities_file: Option<std::path::PathBuf>,
101    #[arg(
102        long,
103        help = "JSON file containing relationships to associate with this memory"
104    )]
105    pub relationships_file: Option<std::path::PathBuf>,
106    #[arg(
107        long,
108        help = "Read graph JSON (body + entities + relationships) from stdin",
109        conflicts_with_all = [
110            "body",
111            "body_file",
112            "body_stdin",
113            "entities_file",
114            "relationships_file"
115        ]
116    )]
117    pub graph_stdin: bool,
118    #[arg(
119        long,
120        help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
121    )]
122    pub namespace: Option<String>,
123    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
124    #[arg(long)]
125    pub metadata: Option<String>,
126    #[arg(long, help = "JSON file containing metadata key-value pairs")]
127    pub metadata_file: Option<std::path::PathBuf>,
128    #[arg(long)]
129    pub force_merge: bool,
130    #[arg(
131        long,
132        value_name = "EPOCH_OR_RFC3339",
133        value_parser = crate::parsers::parse_expected_updated_at,
134        long_help = "Optimistic lock: reject if updated_at does not match. \
135Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
136    )]
137    pub expected_updated_at: Option<i64>,
138    #[arg(
139        long,
140        env = "SQLITE_GRAPHRAG_ENABLE_NER",
141        value_parser = crate::parsers::parse_bool_flexible,
142        action = clap::ArgAction::Set,
143        num_args = 0..=1,
144        default_missing_value = "true",
145        default_value = "false",
146        help = "Enable automatic URL-regex extraction from body (the GLiNER NER pipeline was removed in v1.0.79)"
147    )]
148    pub enable_ner: bool,
149    #[arg(
150        long,
151        env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
152        default_value = "fp32",
153        help = "DEPRECATED: no effect since v1.0.79 (the GLiNER pipeline was removed); accepted for compatibility only"
154    )]
155    pub gliner_variant: String,
156    #[arg(long, hide = true)]
157    pub skip_extraction: bool,
158    /// Explicitly clear the body content (set to empty string). Required to distinguish
159    /// intentional body clearing from accidental omission during --force-merge.
160    /// Without this flag, an empty body passed to --force-merge preserves the existing body.
161    #[arg(
162        long,
163        default_value_t = false,
164        help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
165    )]
166    pub clear_body: bool,
167    /// Validate input and report planned actions without persisting.
168    #[arg(
169        long,
170        default_value_t = false,
171        help = "Validate input and report planned actions without persisting"
172    )]
173    pub dry_run: bool,
174    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
175    #[arg(long)]
176    pub session_id: Option<String>,
177    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
178    pub format: JsonOutputFormat,
179    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
180    pub json: bool,
181    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
182    pub db: Option<String>,
183    /// Maximum process RSS in MiB; abort if exceeded during embedding.
184    #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
185          help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
186    pub max_rss_mb: u64,
187    /// Emit a warning (but do not reject) when persisting an entity whose degree would
188    /// exceed this value after the upsert. Default 50. Set 0 to disable the check.
189    #[arg(long, default_value_t = 50, value_name = "N")]
190    pub max_entity_degree: u32,
191    /// G42/S3 (v1.0.79): maximum simultaneous LLM embedding subprocesses.
192    /// The effective value is further bounded by CPU count and available
193    /// RAM (permits = min(N, cpus, ram_livre*0.5/350MB), clamp [1, 32]).
194    #[arg(long, default_value_t = 4, value_name = "N",
195          value_parser = clap::value_parser!(u64).range(1..=32),
196          help = "Maximum simultaneous LLM embedding subprocesses (default: 4, clamp [1,32])")]
197    pub llm_parallelism: u64,
198}
199
200#[derive(Deserialize, Default)]
201#[serde(deny_unknown_fields)]
202struct GraphInput {
203    #[serde(default)]
204    body: Option<String>,
205    #[serde(default)]
206    entities: Vec<NewEntity>,
207    #[serde(default)]
208    relationships: Vec<NewRelationship>,
209}
210
211fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
212    for rel in &mut graph.relationships {
213        rel.relation = crate::parsers::normalize_relation(&rel.relation);
214        if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
215            return Err(AppError::Validation(format!(
216                "{e} for relationship '{}' -> '{}'",
217                rel.source, rel.target
218            )));
219        }
220        crate::parsers::warn_if_non_canonical(&rel.relation);
221        if !(0.0..=1.0).contains(&rel.strength) {
222            return Err(AppError::Validation(format!(
223                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
224                rel.strength, rel.source, rel.target
225            )));
226        }
227    }
228
229    Ok(())
230}
231
232#[tracing::instrument(skip_all, level = "debug", name = "remember")]
233pub fn run(
234    args: RememberArgs,
235    llm_backend: crate::cli::LlmBackendChoice,
236    embedding_backend: crate::cli::EmbeddingBackendChoice,
237) -> Result<(), AppError> {
238    use crate::constants::*;
239
240    let inicio = std::time::Instant::now();
241    let _ = args.format;
242    tracing::debug!(target: "remember", name = %args.name, "persisting memory");
243    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
244
245    // Capture the original `--name` before normalization so the JSON response can
246    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
247    // an owned String because `args.name` is moved into the response below.
248    let original_name = args.name.clone();
249
250    // Auto-normalize to kebab-case before validation (P2-H).
251    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
252    // after truncation by a long filename ending in a hyphen.
253    let normalized_name = {
254        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
255        let trimmed = lower.trim_matches('-').to_string();
256        if trimmed != args.name {
257            tracing::warn!(target: "remember",
258                original = %args.name,
259                normalized = %trimmed,
260                "name auto-normalized to kebab-case"
261            );
262        }
263        trimmed
264    };
265    let name_was_normalized = normalized_name != original_name;
266
267    if normalized_name.is_empty() {
268        return Err(AppError::Validation(
269            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
270        ));
271    }
272    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
273        return Err(AppError::LimitExceeded(
274            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
275        ));
276    }
277
278    if normalized_name.starts_with("__") {
279        return Err(AppError::Validation(
280            crate::i18n::validation::reserved_name(),
281        ));
282    }
283
284    {
285        let slug_re = crate::constants::name_slug_regex();
286        if !slug_re.is_match(&normalized_name) {
287            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
288                &normalized_name,
289            )));
290        }
291    }
292
293    if let Some(ref desc) = args.description {
294        if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
295            return Err(AppError::Validation(
296                crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
297            ));
298        }
299    }
300
301    let mut raw_body = if let Some(b) = args.body {
302        b
303    } else if let Some(ref path) = args.body_file {
304        let file_size = std::fs::metadata(path).map_err(AppError::Io)?.len();
305        if file_size > MAX_MEMORY_BODY_LEN as u64 {
306            return Err(AppError::LimitExceeded(
307                crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
308            ));
309        }
310        match std::fs::read_to_string(path) {
311            Ok(s) => s,
312            Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
313                let bytes = std::fs::read(path).map_err(AppError::Io)?;
314                tracing::warn!(target: "remember", "body file contains invalid UTF-8; replacing invalid sequences");
315                String::from_utf8_lossy(&bytes).into_owned()
316            }
317            Err(e) => return Err(AppError::Io(e)),
318        }
319    } else if args.body_stdin || args.graph_stdin {
320        crate::stdin_helper::read_stdin_with_timeout(60)?
321    } else {
322        String::new()
323    };
324
325    let mut entities_provided_externally =
326        args.entities_file.is_some() || args.relationships_file.is_some();
327
328    let mut graph = GraphInput::default();
329    if let Some(path) = args.entities_file {
330        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
331        if file_size > MAX_MEMORY_BODY_LEN as u64 {
332            return Err(AppError::LimitExceeded(
333                crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
334            ));
335        }
336        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
337        graph.entities = serde_json::from_str(&content)?;
338    }
339    if let Some(path) = args.relationships_file {
340        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
341        if file_size > MAX_MEMORY_BODY_LEN as u64 {
342            return Err(AppError::LimitExceeded(
343                crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
344            ));
345        }
346        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
347        graph.relationships = serde_json::from_str(&content)?;
348    }
349    if args.graph_stdin {
350        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
351            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
352        })?;
353        raw_body = graph.body.take().unwrap_or_default();
354    }
355    if args.graph_stdin && !graph.entities.is_empty() {
356        entities_provided_externally = true;
357    }
358
359    if graph.entities.len() > max_entities_per_memory() {
360        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
361            max_entities_per_memory(),
362        )));
363    }
364    let mut relationships_truncated = false;
365    let rel_cap = max_relationships_per_memory();
366    if graph.relationships.len() > rel_cap {
367        tracing::warn!(target: "remember",
368            count = graph.relationships.len(),
369            cap = rel_cap,
370            "truncating relationships to cap"
371        );
372        graph.relationships.truncate(rel_cap);
373        relationships_truncated = true;
374    }
375    normalize_and_validate_graph_input(&mut graph)?;
376
377    if raw_body.len() > MAX_MEMORY_BODY_LEN {
378        return Err(AppError::LimitExceeded(
379            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
380        ));
381    }
382
383    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
384    // Without this check, empty embeddings would be persisted, breaking recall semantics.
385    // GAP-08: skip this guard when --force-merge without --clear-body; the existing body
386    // will be preserved from the database, so the effective body will not be empty.
387    let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
388    if !entities_provided_externally
389        && graph.entities.is_empty()
390        && raw_body.trim().is_empty()
391        && !body_will_be_preserved
392        && !args.clear_body
393    {
394        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
395    }
396
397    let metadata: serde_json::Value = if let Some(m) = args.metadata {
398        serde_json::from_str(&m)?
399    } else if let Some(path) = args.metadata_file {
400        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
401        if file_size > MAX_MEMORY_BODY_LEN as u64 {
402            return Err(AppError::LimitExceeded(
403                crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
404            ));
405        }
406        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
407        serde_json::from_str(&content)?
408    } else {
409        serde_json::json!({})
410    };
411
412    let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
413    let mut snippet: String = raw_body.chars().take(200).collect();
414
415    let paths = AppPaths::resolve(args.db.as_deref())?;
416    paths.ensure_dirs()?;
417
418    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
419    let mut extraction_method: Option<String> = None;
420    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
421    if args.enable_ner && args.skip_extraction {
422        return Err(AppError::Validation(
423            "--enable-ner and --skip-extraction are mutually exclusive; remove one".to_string(),
424        ));
425    }
426    if args.skip_extraction && !args.enable_ner {
427        // v1.0.74: revert to v1.0.45 hidden no-op behavior. The v1.0.67
428        // commit (9ddb17b) promoted this to a hard validation error, which
429        // broke the "kept as a hidden no-op for backwards compatibility"
430        // promise documented in CHANGELOG v1.0.45 and started failing
431        // 5+ CI jobs whose E2E tests use this flag to skip the
432        // GLiNER-ONNX model download in CI environments.
433        tracing::warn!(
434            "--skip-extraction is deprecated since v1.0.45 and has no effect (NER is disabled by default); remove this flag to silence the warning"
435        );
436    }
437    // v1.0.79: --gliner-variant is a no-op kept for compatibility; a
438    // non-default value signals the caller still expects the removed
439    // GLiNER pipeline, so warn explicitly.
440    if args.gliner_variant != "fp32" {
441        tracing::warn!(
442            "--gliner-variant is deprecated and has no effect since v1.0.79 (the GLiNER pipeline was removed); --enable-ner performs URL-regex extraction only"
443        );
444    }
445    let gliner_variant: crate::extraction::GlinerVariant = match args.gliner_variant.as_str() {
446        "int8" => crate::extraction::GlinerVariant::Int8,
447        _ => crate::extraction::GlinerVariant::Fp32,
448    };
449    if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
450        match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
451            Ok(extracted) => {
452                // v1.0.76: ExtractionResult is URL + entity + elapsed_ms;
453                // the LLM ExtractionBackend returns typed relationships
454                // separately. The default build is URL-only extraction.
455                extraction_method = Some("url-regex".to_string());
456                extracted_urls = extracted.urls;
457                // Convert ExtractedEntity → NewEntity (no offsets,
458                // type defaults to Concept).
459                graph.entities = extracted
460                    .entities
461                    .into_iter()
462                    .map(|e| NewEntity {
463                        name: e.name,
464                        entity_type: crate::entity_type::EntityType::Concept,
465                        description: None,
466                    })
467                    .collect();
468                graph.relationships.clear();
469                relationships_truncated = false;
470
471                if graph.entities.len() > max_entities_per_memory() {
472                    graph.entities.truncate(max_entities_per_memory());
473                }
474                if graph.relationships.len() > max_relationships_per_memory() {
475                    relationships_truncated = true;
476                    graph.relationships.truncate(max_relationships_per_memory());
477                }
478                normalize_and_validate_graph_input(&mut graph)?;
479            }
480            Err(e) => {
481                tracing::warn!(target: "remember", error = %e, "auto-extraction failed, graceful degradation");
482                extraction_method = Some("none:extraction-failed".to_string());
483            }
484        }
485    }
486
487    let mut conn = open_rw(&paths.db)?;
488    ensure_schema(&mut conn)?;
489
490    // --dry-run: emit planned action without any DB writes and return.
491    if args.dry_run {
492        let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
493        let planned_action = if existing.is_some() && args.force_merge {
494            "would_update"
495        } else {
496            "would_create"
497        };
498        output::emit_json(&serde_json::json!({
499            "dry_run": true,
500            "name": normalized_name,
501            "namespace": namespace,
502            "planned_action": planned_action,
503        }))?;
504        return Ok(());
505    }
506
507    {
508        use crate::constants::MAX_NAMESPACES_ACTIVE;
509        let active_count: u32 = conn.query_row(
510            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
511            [],
512            |r| r.get::<_, i64>(0).map(|v| v as u32),
513        )?;
514        let ns_exists: bool = conn.query_row(
515            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
516            rusqlite::params![namespace],
517            |r| r.get::<_, i64>(0).map(|v| v > 0),
518        )?;
519        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
520            return Err(AppError::NamespaceError(format!(
521                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
522            )));
523        }
524    }
525
526    // M7: detect soft-deleted memory before the standard duplicate check.
527    if let Some((sd_id, true)) =
528        memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
529    {
530        if args.force_merge {
531            memories::clear_deleted_at(&conn, sd_id)?;
532        } else {
533            return Err(AppError::Duplicate(
534                errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
535            ));
536        }
537    }
538
539    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
540    if existing_memory.is_some() && !args.force_merge {
541        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
542            &normalized_name,
543            &namespace,
544        )));
545    }
546
547    // GAP-10: resolve type and description.
548    // For CREATE path (new memory): both are required.
549    // For UPDATE path (--force-merge on existing memory): inherit from existing row when omitted.
550    let (resolved_type, resolved_description) = if existing_memory.is_none() {
551        // CREATE path — both fields are mandatory.
552        let t = args.r#type.ok_or_else(|| {
553            AppError::Validation(
554                "--type and --description are required when creating a new memory".to_string(),
555            )
556        })?;
557        let d = args.description.clone().ok_or_else(|| {
558            AppError::Validation(
559                "--type and --description are required when creating a new memory".to_string(),
560            )
561        })?;
562        (t.as_str().to_string(), d)
563    } else {
564        // UPDATE path (--force-merge) — inherit missing fields from stored row.
565        let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
566            .ok_or_else(|| {
567                AppError::NotFound(format!(
568                    "memory '{normalized_name}' not found in namespace '{namespace}'"
569                ))
570            })?;
571        let t = args
572            .r#type
573            .map(|v| v.as_str().to_string())
574            .unwrap_or_else(|| existing_row.memory_type.clone());
575        let d = args
576            .description
577            .clone()
578            .unwrap_or_else(|| existing_row.description.clone());
579        (t, d)
580    };
581
582    // GAP-08/GAP-09: protect existing body from accidental destruction during --force-merge.
583    // When the caller omits a body (or passes an empty one) without --clear-body, silently
584    // preserve the existing body from the database.  This prevents a common scripting mistake
585    // where a cron job updates metadata fields and inadvertently wipes the stored content.
586    if body_will_be_preserved {
587        if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
588            if !existing_row.body.is_empty() {
589                tracing::debug!(target: "remember",
590                    name = %normalized_name,
591                    "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
592                );
593                raw_body = existing_row.body;
594                body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
595                snippet = raw_body.chars().take(200).collect();
596            }
597        }
598    }
599
600    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
601
602    output::emit_progress_i18n(
603        &format!(
604            "Remember stage: validated input; available memory {} MB",
605            crate::memory_guard::available_memory_mb()
606        ),
607        &format!(
608            "Stage remember: input validated; available memory {} MB",
609            crate::memory_guard::available_memory_mb()
610        ),
611    );
612
613    let model_max_length = crate::tokenizer::get_model_max_length();
614    let total_passage_tokens = crate::tokenizer::count_passage_tokens(&raw_body)?;
615    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body);
616    let chunks_created = chunks_info.len();
617    // For single-chunk bodies the memory row itself stores the content and no
618    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
619    // bodies every chunk is persisted via `insert_chunk_slices`.
620    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
621
622    output::emit_progress_i18n(
623        &format!(
624            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
625            chunks_created,
626            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
627        ),
628        &format!(
629            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
630            chunks_created,
631            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
632        ),
633    );
634
635    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
636        return Err(AppError::LimitExceeded(format!(
637            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
638            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
639        )));
640    }
641
642    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
643    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
644
645    // v1.0.84 (ADR-0042): extrai o backend que efetivamente executou o
646    // embedding da passagem (ou do batch em chunks) para popular
647    // `backend_invoked` no envelope de resposta.
648    let skip_embed = crate::embedder::should_skip_embedding_on_failure();
649    let (embedding, backend_invoked_passage): (Option<Vec<f32>>, Option<&str>) = if chunks_info
650        .len()
651        == 1
652    {
653        match crate::embedder::embed_passage_with_embedding_choice(
654            &paths.models,
655            &raw_body,
656            embedding_backend,
657            llm_backend,
658        ) {
659            Ok((v, k)) => (Some(v), Some(k.as_str())),
660            Err(AppError::Validation(msg)) => return Err(AppError::Validation(msg)),
661            Err(e) if skip_embed => {
662                tracing::warn!(error = %e, "embedding failed; --skip-embedding-on-failure active, persisting without embedding");
663                (None, None)
664            }
665            Err(e) => return Err(e),
666        }
667    } else {
668        let chunk_texts: Vec<String> = chunks_info
669            .iter()
670            .map(|c| chunking::chunk_text(&raw_body, c).to_string())
671            .collect();
672        // G42/S2+S3 (v1.0.79): chunks are embedded in dim-adaptive
673        // batches per LLM call (G44: clamp(base*64/dim, 1, base)), with up to
674        // --llm-parallelism bounded subprocesses in flight. The old
675        // serial loop spent SUM(items) wall time; the fan-out spends
676        // roughly MAX(batch).
677        output::emit_progress_i18n(
678            &format!(
679                "Embedding {} chunks in parallel batches (parallelism {})...",
680                chunks_info.len(),
681                args.llm_parallelism
682            ),
683            &format!(
684                "Embedding {} chunks em lotes paralelos (paralelismo {})...",
685                chunks_info.len(),
686                args.llm_parallelism
687            ),
688        );
689        if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
690            if rss > args.max_rss_mb {
691                tracing::error!(target: "remember",
692                    rss_mb = rss,
693                    max_rss_mb = args.max_rss_mb,
694                    "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
695                );
696                return Err(AppError::LowMemory {
697                    available_mb: crate::memory_guard::available_memory_mb(),
698                    required_mb: args.max_rss_mb,
699                });
700            }
701        }
702        match crate::embedder::embed_passages_parallel_with_embedding_choice(
703            &paths.models,
704            &chunk_texts,
705            args.llm_parallelism as usize,
706            crate::embedder::chunk_embed_batch_size(),
707            embedding_backend,
708            llm_backend,
709        ) {
710            Ok(chunk_embeddings) => {
711                output::emit_progress_i18n(
712                    &format!(
713                        "Remember stage: chunk embeddings complete; process RSS {} MB",
714                        crate::memory_guard::current_process_memory_mb().unwrap_or(0)
715                    ),
716                    &format!(
717                        "Stage remember: chunk embeddings completed; process RSS {} MB",
718                        crate::memory_guard::current_process_memory_mb().unwrap_or(0)
719                    ),
720                );
721                let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
722                chunk_embeddings_cache = Some(chunk_embeddings);
723                (Some(aggregated), None)
724            }
725            Err(e) if skip_embed => {
726                tracing::warn!(error = %e, "chunk embedding failed; --skip-embedding-on-failure active, persisting without embedding");
727                (None, None)
728            }
729            Err(e) => return Err(e),
730        }
731    };
732    let body_for_storage = raw_body;
733
734    let memory_type = resolved_type.as_str();
735    let new_memory = NewMemory {
736        namespace: namespace.clone(),
737        name: normalized_name.clone(),
738        memory_type: memory_type.to_string(),
739        description: resolved_description.clone(),
740        body: body_for_storage,
741        body_hash: body_hash.clone(),
742        session_id: args.session_id.clone(),
743        source: "agent".to_string(),
744        metadata,
745    };
746
747    let mut warnings = Vec::with_capacity(4);
748    let mut entities_persisted = 0usize;
749    let mut relationships_persisted = 0usize;
750
751    // G42/S2+A4 (v1.0.79): entity names are SHORT texts — they get their
752    // own batch profile (25 per LLM call) instead of one subprocess per
753    // 3-15 byte name (21 names used to cost ~12 minutes, 46% of the
754    // measured remember total).
755    let entity_texts: Vec<String> = graph
756        .entities
757        .iter()
758        .map(|entity| match &entity.description {
759            Some(desc) => format!("{} {}", entity.name, desc),
760            None => entity.name.clone(),
761        })
762        .collect();
763    // G56 (v1.0.80): route entity-name embedding through the in-process
764    // cache. Repeated `remember` invocations within one CLI process — and
765    // re-embedded entities inside a single batch — skip the LLM call
766    // entirely when the (model, text) pair was already produced. The
767    // chunk body embedding below still uses `embed_passages_parallel_local`
768    // because chunks are unique per memory and the cache hit rate is
769    // effectively zero.
770    let (graph_entity_embeddings, embed_cache_stats) =
771        match crate::embedder::embed_entity_texts_cached(
772            &paths.models,
773            &entity_texts,
774            args.llm_parallelism as usize,
775            embedding_backend,
776            llm_backend,
777        ) {
778            Ok(r) => r,
779            Err(e) if skip_embed => {
780                tracing::warn!(error = %e, "entity embedding failed; --skip-embedding-on-failure active");
781                let empty: Vec<Vec<f32>> = entity_texts.iter().map(|_| vec![]).collect();
782                (empty, crate::embedder::EmbedCacheStats::default())
783            }
784            Err(e) => return Err(e),
785        };
786    if embed_cache_stats.hits > 0 {
787        tracing::debug!(
788            hits = embed_cache_stats.hits,
789            misses = embed_cache_stats.misses,
790            requested = embed_cache_stats.requested,
791            "G56: entity embed cache hit (remember)"
792        );
793    }
794
795    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
796
797    let mut skip_reindex = false;
798    let (memory_id, action, version) = match existing_memory {
799        Some((existing_id, _updated_at, _current_version)) => {
800            if let Some(hash_id) = duplicate_hash_id {
801                if hash_id != existing_id {
802                    warnings.push(format!(
803                        "identical body already exists as memory id {hash_id}"
804                    ));
805                }
806            }
807
808            // C1 fix: capture old values for FTS5 sync before update
809            let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
810                .query_row(
811                    "SELECT name, description, body FROM memories WHERE id = ?1",
812                    rusqlite::params![existing_id],
813                    |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
814                )?;
815
816            // G15: skip re-indexing when body hash matches (common in --force-merge loops)
817            let existing_body_hash: Option<String> = tx
818                .query_row(
819                    "SELECT body_hash FROM memories WHERE id = ?1",
820                    rusqlite::params![existing_id],
821                    |r| r.get(0),
822                )
823                .ok();
824            let body_unchanged = existing_body_hash.as_deref() == Some(&body_hash);
825            skip_reindex = body_unchanged;
826            if !body_unchanged {
827                storage_chunks::delete_chunks(&tx, existing_id)?;
828            }
829
830            let next_v = versions::next_version(&tx, existing_id)?;
831            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
832
833            // C1 fix: sync FTS5 external-content index after update
834            // (trg_fts_au trigger is absent by design due to sqlite-vec conflict)
835            memories::sync_fts_after_update(
836                &tx,
837                existing_id,
838                &old_fts_name,
839                &old_fts_desc,
840                &old_fts_body,
841                &normalized_name,
842                &resolved_description,
843                &new_memory.body,
844            )?;
845
846            versions::insert_version(
847                &tx,
848                existing_id,
849                next_v,
850                &normalized_name,
851                memory_type,
852                &resolved_description,
853                &new_memory.body,
854                &serde_json::to_string(&new_memory.metadata)?,
855                None,
856                "edit",
857            )?;
858            if !body_unchanged {
859                if let Some(ref emb) = embedding {
860                    memories::upsert_vec(
861                        &tx,
862                        existing_id,
863                        &namespace,
864                        memory_type,
865                        emb,
866                        &normalized_name,
867                        &snippet,
868                    )?;
869                }
870            }
871            (existing_id, "updated".to_string(), next_v)
872        }
873        None => {
874            if let Some(hash_id) = duplicate_hash_id {
875                warnings.push(format!(
876                    "identical body already exists as memory id {hash_id}"
877                ));
878            }
879            let id = memories::insert(&tx, &new_memory)?;
880            versions::insert_version(
881                &tx,
882                id,
883                1,
884                &normalized_name,
885                memory_type,
886                &resolved_description,
887                &new_memory.body,
888                &serde_json::to_string(&new_memory.metadata)?,
889                None,
890                "create",
891            )?;
892            if let Some(ref emb) = embedding {
893                memories::upsert_vec(
894                    &tx,
895                    id,
896                    &namespace,
897                    memory_type,
898                    emb,
899                    &normalized_name,
900                    &snippet,
901                )?;
902            }
903            (id, "created".to_string(), 1)
904        }
905    };
906
907    if chunks_info.len() > 1 && !skip_reindex {
908        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
909
910        if let Some(chunk_embeddings) = chunk_embeddings_cache.take() {
911            for (i, emb) in chunk_embeddings.iter().enumerate() {
912                storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
913            }
914        }
915        output::emit_progress_i18n(
916            &format!(
917                "Remember stage: persisted chunk vectors; process RSS {} MB",
918                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
919            ),
920            &format!(
921                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
922                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
923            ),
924        );
925    }
926
927    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
928        for entity in &graph.entities {
929            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
930            let entity_embedding = &graph_entity_embeddings[entities_persisted];
931            entities::upsert_entity_vec(
932                &tx,
933                entity_id,
934                &namespace,
935                entity.entity_type,
936                entity_embedding,
937                &entity.name,
938            )?;
939            entities::link_memory_entity(&tx, memory_id, entity_id)?;
940            entities_persisted += 1;
941        }
942        let entity_types: std::collections::HashMap<&str, EntityType> = graph
943            .entities
944            .iter()
945            .map(|entity| (entity.name.as_str(), entity.entity_type))
946            .collect();
947
948        let mut affected_entity_ids: std::collections::HashSet<i64> =
949            std::collections::HashSet::new();
950        for entity in &graph.entities {
951            if let Some(eid) = entities::find_entity_id(&tx, &namespace, &entity.name)? {
952                affected_entity_ids.insert(eid);
953            }
954        }
955
956        for rel in &graph.relationships {
957            let source_entity = NewEntity {
958                name: rel.source.clone(),
959                entity_type: entity_types
960                    .get(rel.source.as_str())
961                    .copied()
962                    .unwrap_or(EntityType::Concept),
963                description: None,
964            };
965            let target_entity = NewEntity {
966                name: rel.target.clone(),
967                entity_type: entity_types
968                    .get(rel.target.as_str())
969                    .copied()
970                    .unwrap_or(EntityType::Concept),
971                description: None,
972            };
973            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
974            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
975            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
976            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
977            affected_entity_ids.insert(source_id);
978            affected_entity_ids.insert(target_id);
979            relationships_persisted += 1;
980        }
981
982        for &eid in &affected_entity_ids {
983            entities::recalculate_degree(&tx, eid)?;
984        }
985        // GAP-17: warn when entity degree exceeds the configured cap.
986        if args.max_entity_degree > 0 {
987            let cap = args.max_entity_degree as i64;
988            for &eid in &affected_entity_ids {
989                let degree: i64 = tx.query_row(
990                    "SELECT degree FROM entities WHERE id = ?1",
991                    rusqlite::params![eid],
992                    |r| r.get(0),
993                )?;
994                if degree > cap {
995                    let name: String = tx.query_row(
996                        "SELECT name FROM entities WHERE id = ?1",
997                        rusqlite::params![eid],
998                        |r| r.get(0),
999                    )?;
1000                    tracing::warn!(target: "remember",
1001                        entity = %name,
1002                        degree = degree,
1003                        cap = cap,
1004                        "entity degree cap exceeded"
1005                    );
1006                }
1007            }
1008        }
1009    }
1010    tx.commit()?;
1011
1012    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
1013    // Failures do not propagate — non-critical path with graceful degradation.
1014    let urls_persisted = if !extracted_urls.is_empty() {
1015        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
1016            .into_iter()
1017            .map(|u| storage_urls::MemoryUrl {
1018                url: u.url,
1019                offset: Some(u.start as i64),
1020            })
1021            .collect();
1022        storage_urls::insert_urls(&conn, memory_id, &url_entries)
1023    } else {
1024        0
1025    };
1026
1027    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
1028
1029    let created_at_epoch = chrono::Utc::now().timestamp();
1030    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
1031
1032    output::emit_json(&RememberResponse {
1033        memory_id,
1034        // Persist the normalized (kebab-case) slug as `name` since that is the
1035        // storage key. The original input is exposed via `original_name` only
1036        // when normalization actually changed something (B_4 in v1.0.32).
1037        name: normalized_name.clone(),
1038        namespace,
1039        action: action.clone(),
1040        operation: action,
1041        version,
1042        entities_persisted,
1043        relationships_persisted,
1044        relationships_truncated,
1045        chunks_created,
1046        chunks_persisted,
1047        urls_persisted,
1048        extraction_method,
1049        merged_into_memory_id: None,
1050        warnings,
1051        created_at: created_at_epoch,
1052        created_at_iso,
1053        elapsed_ms: inicio.elapsed().as_millis() as u64,
1054        name_was_normalized,
1055        original_name: name_was_normalized.then_some(original_name),
1056        backend_invoked: backend_invoked_passage,
1057    })?;
1058
1059    Ok(())
1060}
1061
1062#[cfg(test)]
1063mod tests {
1064    use super::compute_chunks_persisted;
1065    use crate::output::RememberResponse;
1066
1067    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
1068    #[test]
1069    fn chunks_persisted_zero_for_zero_chunks() {
1070        assert_eq!(compute_chunks_persisted(0), 0);
1071    }
1072
1073    #[test]
1074    fn chunks_persisted_zero_for_single_chunk_body() {
1075        // Single-chunk bodies live in the memories row itself; no row is
1076        // appended to memory_chunks. This is the documented contract.
1077        assert_eq!(compute_chunks_persisted(1), 0);
1078    }
1079
1080    #[test]
1081    fn chunks_persisted_equals_count_for_multi_chunk_body() {
1082        // Every chunk above the first triggers a row in memory_chunks.
1083        assert_eq!(compute_chunks_persisted(2), 2);
1084        assert_eq!(compute_chunks_persisted(7), 7);
1085        assert_eq!(compute_chunks_persisted(64), 64);
1086    }
1087
1088    #[test]
1089    fn remember_response_serializes_required_fields() {
1090        let resp = RememberResponse {
1091            memory_id: 42,
1092            name: "minha-mem".to_string(),
1093            namespace: "global".to_string(),
1094            action: "created".to_string(),
1095            operation: "created".to_string(),
1096            version: 1,
1097            entities_persisted: 0,
1098            relationships_persisted: 0,
1099            relationships_truncated: false,
1100            chunks_created: 1,
1101            chunks_persisted: 0,
1102            urls_persisted: 0,
1103            extraction_method: None,
1104            merged_into_memory_id: None,
1105            warnings: vec![],
1106            created_at: 1_705_320_000,
1107            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
1108            elapsed_ms: 55,
1109            name_was_normalized: false,
1110            original_name: None,
1111            backend_invoked: None,
1112        };
1113
1114        let json = serde_json::to_value(&resp).expect("serialization failed");
1115        assert_eq!(json["memory_id"], 42);
1116        assert_eq!(json["action"], "created");
1117        assert_eq!(json["operation"], "created");
1118        assert_eq!(json["version"], 1);
1119        assert_eq!(json["elapsed_ms"], 55u64);
1120        assert!(json["warnings"].is_array());
1121        assert!(json["merged_into_memory_id"].is_null());
1122    }
1123
1124    #[test]
1125    fn remember_response_action_e_operation_sao_aliases() {
1126        let resp = RememberResponse {
1127            memory_id: 1,
1128            name: "mem".to_string(),
1129            namespace: "global".to_string(),
1130            action: "updated".to_string(),
1131            operation: "updated".to_string(),
1132            version: 2,
1133            entities_persisted: 3,
1134            relationships_persisted: 1,
1135            relationships_truncated: false,
1136            extraction_method: None,
1137            chunks_created: 2,
1138            chunks_persisted: 2,
1139            urls_persisted: 0,
1140            merged_into_memory_id: None,
1141            warnings: vec![],
1142            created_at: 0,
1143            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1144            elapsed_ms: 0,
1145            name_was_normalized: false,
1146            original_name: None,
1147            backend_invoked: None,
1148        };
1149
1150        let json = serde_json::to_value(&resp).expect("serialization failed");
1151        assert_eq!(
1152            json["action"], json["operation"],
1153            "action e operation devem ser iguais"
1154        );
1155        assert_eq!(json["entities_persisted"], 3);
1156        assert_eq!(json["relationships_persisted"], 1);
1157        assert_eq!(json["chunks_created"], 2);
1158    }
1159
1160    #[test]
1161    fn remember_response_warnings_lista_mensagens() {
1162        let resp = RememberResponse {
1163            memory_id: 5,
1164            name: "dup-mem".to_string(),
1165            namespace: "global".to_string(),
1166            action: "created".to_string(),
1167            operation: "created".to_string(),
1168            version: 1,
1169            entities_persisted: 0,
1170            extraction_method: None,
1171            relationships_persisted: 0,
1172            relationships_truncated: false,
1173            chunks_created: 1,
1174            chunks_persisted: 0,
1175            urls_persisted: 0,
1176            merged_into_memory_id: None,
1177            warnings: vec!["identical body already exists as memory id 3".to_string()],
1178            created_at: 0,
1179            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1180            elapsed_ms: 10,
1181            name_was_normalized: false,
1182            original_name: None,
1183            backend_invoked: None,
1184        };
1185
1186        let json = serde_json::to_value(&resp).expect("serialization failed");
1187        let warnings = json["warnings"]
1188            .as_array()
1189            .expect("warnings deve ser array");
1190        assert_eq!(warnings.len(), 1);
1191        assert!(warnings[0].as_str().unwrap().contains("identical body"));
1192    }
1193
1194    #[test]
1195    fn invalid_name_reserved_prefix_returns_validation_error() {
1196        use crate::errors::AppError;
1197        // Validates the rejection logic for names with the "__" prefix directly
1198        let nome = "__reservado";
1199        let resultado: Result<(), AppError> = if nome.starts_with("__") {
1200            Err(AppError::Validation(
1201                crate::i18n::validation::reserved_name(),
1202            ))
1203        } else {
1204            Ok(())
1205        };
1206        assert!(resultado.is_err());
1207        if let Err(AppError::Validation(msg)) = resultado {
1208            assert!(!msg.is_empty());
1209        }
1210    }
1211
1212    #[test]
1213    fn name_too_long_returns_validation_error() {
1214        use crate::errors::AppError;
1215        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1216        let resultado: Result<(), AppError> =
1217            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1218                Err(AppError::Validation(crate::i18n::validation::name_length(
1219                    crate::constants::MAX_MEMORY_NAME_LEN,
1220                )))
1221            } else {
1222                Ok(())
1223            };
1224        assert!(resultado.is_err());
1225    }
1226
1227    #[test]
1228    fn remember_response_merged_into_memory_id_some_serializes_integer() {
1229        let resp = RememberResponse {
1230            memory_id: 10,
1231            name: "mem-mergeada".to_string(),
1232            namespace: "global".to_string(),
1233            action: "updated".to_string(),
1234            operation: "updated".to_string(),
1235            version: 3,
1236            extraction_method: None,
1237            entities_persisted: 0,
1238            relationships_persisted: 0,
1239            relationships_truncated: false,
1240            chunks_created: 1,
1241            chunks_persisted: 0,
1242            urls_persisted: 0,
1243            merged_into_memory_id: Some(7),
1244            warnings: vec![],
1245            created_at: 0,
1246            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1247            elapsed_ms: 0,
1248            name_was_normalized: false,
1249            original_name: None,
1250            backend_invoked: None,
1251        };
1252
1253        let json = serde_json::to_value(&resp).expect("serialization failed");
1254        assert_eq!(json["merged_into_memory_id"], 7);
1255    }
1256
1257    #[test]
1258    fn remember_response_urls_persisted_serializes_field() {
1259        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
1260        let resp = RememberResponse {
1261            memory_id: 3,
1262            name: "mem-com-urls".to_string(),
1263            namespace: "global".to_string(),
1264            action: "created".to_string(),
1265            operation: "created".to_string(),
1266            version: 1,
1267            entities_persisted: 0,
1268            relationships_persisted: 0,
1269            relationships_truncated: false,
1270            chunks_created: 1,
1271            chunks_persisted: 0,
1272            urls_persisted: 3,
1273            extraction_method: Some("regex-only".to_string()),
1274            merged_into_memory_id: None,
1275            warnings: vec![],
1276            created_at: 0,
1277            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1278            elapsed_ms: 0,
1279            name_was_normalized: false,
1280            original_name: None,
1281            backend_invoked: None,
1282        };
1283        let json = serde_json::to_value(&resp).expect("serialization failed");
1284        assert_eq!(json["urls_persisted"], 3);
1285    }
1286
1287    #[test]
1288    fn empty_name_after_normalization_returns_specific_message() {
1289        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
1290        // must produce a distinct error message, not the "too long" message.
1291        use crate::errors::AppError;
1292        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1293        let normalized = normalized.trim_matches('-').to_string();
1294        let resultado: Result<(), AppError> = if normalized.is_empty() {
1295            Err(AppError::Validation(
1296                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1297            ))
1298        } else {
1299            Ok(())
1300        };
1301        assert!(resultado.is_err());
1302        if let Err(AppError::Validation(msg)) = resultado {
1303            assert!(
1304                msg.contains("empty after normalization"),
1305                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1306            );
1307        }
1308    }
1309
1310    #[test]
1311    fn name_only_underscores_after_normalization_returns_specific_message() {
1312        // P0-4 regression: name consisting only of underscores normalizes to empty string.
1313        use crate::errors::AppError;
1314        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1315        let normalized = normalized.trim_matches('-').to_string();
1316        assert!(
1317            normalized.is_empty(),
1318            "underscores devem normalizar para string vazia"
1319        );
1320        let resultado: Result<(), AppError> = if normalized.is_empty() {
1321            Err(AppError::Validation(
1322                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1323            ))
1324        } else {
1325            Ok(())
1326        };
1327        assert!(resultado.is_err());
1328        if let Err(AppError::Validation(msg)) = resultado {
1329            assert!(
1330                msg.contains("empty after normalization"),
1331                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1332            );
1333        }
1334    }
1335
1336    #[test]
1337    fn remember_response_relationships_truncated_serializes_field() {
1338        // P1-D: garante que relationships_truncated aparece no JSON como bool.
1339        let resp_false = RememberResponse {
1340            memory_id: 1,
1341            name: "test".to_string(),
1342            namespace: "global".to_string(),
1343            action: "created".to_string(),
1344            operation: "created".to_string(),
1345            version: 1,
1346            entities_persisted: 2,
1347            relationships_persisted: 1,
1348            relationships_truncated: false,
1349            chunks_created: 1,
1350            chunks_persisted: 0,
1351            urls_persisted: 0,
1352            extraction_method: None,
1353            merged_into_memory_id: None,
1354            warnings: vec![],
1355            created_at: 0,
1356            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1357            elapsed_ms: 0,
1358            name_was_normalized: false,
1359            original_name: None,
1360            backend_invoked: None,
1361        };
1362        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1363        assert_eq!(json_false["relationships_truncated"], false);
1364
1365        let resp_true = RememberResponse {
1366            relationships_truncated: true,
1367            ..resp_false
1368        };
1369        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1370        assert_eq!(json_true["relationships_truncated"], true);
1371    }
1372
1373    // GAP-08: body-preservation predicate tests.
1374    // Verifies the decision logic that determines whether an existing body should
1375    // be kept instead of overwritten with an empty incoming body during --force-merge.
1376
1377    /// Returns `true` when the existing body should be preserved.
1378    ///
1379    /// Mirrors the `body_will_be_preserved` expression in `run()` so the logic
1380    /// is testable without a real database connection.
1381    fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1382        force_merge && raw_body_is_empty && !clear_body
1383    }
1384
1385    #[test]
1386    fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1387        // Caller passes no body with --force-merge but without --clear-body.
1388        // The existing body in the DB must be kept.
1389        assert!(
1390            should_preserve_body(true, true, false),
1391            "empty body + force-merge + no clear-body should trigger preservation"
1392        );
1393    }
1394
1395    #[test]
1396    fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1397        // Caller explicitly passes --clear-body; intentional wipe is honoured.
1398        assert!(
1399            !should_preserve_body(true, true, true),
1400            "--clear-body must bypass preservation"
1401        );
1402    }
1403
1404    #[test]
1405    fn gap08_non_empty_body_force_merge_does_not_preserve() {
1406        // Caller provides a real body; it must overwrite the existing one.
1407        assert!(
1408            !should_preserve_body(true, false, false),
1409            "non-empty body must overwrite, not preserve"
1410        );
1411    }
1412
1413    #[test]
1414    fn gap08_empty_body_no_force_merge_does_not_preserve() {
1415        // Without --force-merge the path is a fresh create; no preservation needed.
1416        assert!(
1417            !should_preserve_body(false, true, false),
1418            "no --force-merge means no preservation logic applies"
1419        );
1420    }
1421}