Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n  \
35    # Create a memory with inline body\n  \
36    sqlite-graphrag remember --name design-auth --type decision \\\n    \
37    --description \"auth design\" --body \"JWT for stateless auth\"\n\n  \
38    # Create with curated graph via --graph-stdin\n  \
39    echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
40    sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n  \
41    # Enable GLiNER NER extraction with --graph-stdin\n  \
42    echo '{\"body\":\"Alice from Microsoft...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
43    sqlite-graphrag remember --name ner-test --type note --description \"test\" \\\n    \
44    --graph-stdin --enable-ner --gliner-variant int8\n\n  \
45    # Idempotent upsert with --force-merge\n  \
46    sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n    \
47    --body \"new content\" --force-merge\n\n\
48NOTE:\n  \
49    remember does NOT accept positional arguments.\n  \
50    Use --body \"text\" for inline content\n  \
51    Use --body-file path for file content\n  \
52    Use --body-stdin for piped content\n  \
53    Use --graph-stdin for JSON with entities and relationships\n\n\
54ENTITY TYPES (for --graph-stdin entities, NOT memory --type):\n  \
55    concept, tool, person, file, project, decision, incident,\n  \
56    organization, location, date, dashboard, issue_tracker, memory\n  \
57    WARNING: reference, skill, document, note, user, feedback are\n  \
58    MEMORY types only — NOT valid for entities.\n  \
59    Mapping: reference→concept, document→file, user→person")]
60pub struct RememberArgs {
61    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
62    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
63    #[arg(long)]
64    pub name: String,
65    #[arg(
66        long,
67        value_enum,
68        long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
69    )]
70    pub r#type: Option<MemoryType>,
71    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
72    /// Required when creating a new memory. Optional with --force-merge: if omitted the existing description is inherited.
73    #[arg(long)]
74    pub description: Option<String>,
75    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
76    /// Maximum 512000 bytes; rejected if empty without an external graph.
77    #[arg(
78        long,
79        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
80        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
81    )]
82    pub body: Option<String>,
83    #[arg(
84        long,
85        help = "Read body from a file instead of --body",
86        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
87    )]
88    pub body_file: Option<std::path::PathBuf>,
89    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
90    /// Mutually exclusive with --body, --body-file, --graph-stdin.
91    #[arg(
92        long,
93        conflicts_with_all = ["body", "body_file", "graph_stdin"]
94    )]
95    pub body_stdin: bool,
96    #[arg(
97        long,
98        help = "JSON file containing entities to associate with this memory"
99    )]
100    pub entities_file: Option<std::path::PathBuf>,
101    #[arg(
102        long,
103        help = "JSON file containing relationships to associate with this memory"
104    )]
105    pub relationships_file: Option<std::path::PathBuf>,
106    #[arg(
107        long,
108        help = "Read graph JSON (body + entities + relationships) from stdin",
109        conflicts_with_all = [
110            "body",
111            "body_file",
112            "body_stdin",
113            "entities_file",
114            "relationships_file"
115        ]
116    )]
117    pub graph_stdin: bool,
118    #[arg(
119        long,
120        help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
121    )]
122    pub namespace: Option<String>,
123    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
124    #[arg(long)]
125    pub metadata: Option<String>,
126    #[arg(long, help = "JSON file containing metadata key-value pairs")]
127    pub metadata_file: Option<std::path::PathBuf>,
128    #[arg(long)]
129    pub force_merge: bool,
130    #[arg(
131        long,
132        value_name = "EPOCH_OR_RFC3339",
133        value_parser = crate::parsers::parse_expected_updated_at,
134        long_help = "Optimistic lock: reject if updated_at does not match. \
135Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
136    )]
137    pub expected_updated_at: Option<i64>,
138    #[arg(
139        long,
140        env = "SQLITE_GRAPHRAG_ENABLE_NER",
141        value_parser = crate::parsers::parse_bool_flexible,
142        action = clap::ArgAction::Set,
143        num_args = 0..=1,
144        default_missing_value = "true",
145        default_value = "false",
146        help = "Enable automatic GLiNER NER entity/relationship extraction from body"
147    )]
148    pub enable_ner: bool,
149    #[arg(
150        long,
151        env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
152        default_value = "fp32",
153        help = "GLiNER model variant: fp32 (1.1GB, best quality), fp16 (580MB), int8 (349MB, fastest but may miss entities on short texts), q4, q4f16"
154    )]
155    pub gliner_variant: String,
156    #[arg(long, hide = true)]
157    pub skip_extraction: bool,
158    /// Explicitly clear the body content (set to empty string). Required to distinguish
159    /// intentional body clearing from accidental omission during --force-merge.
160    /// Without this flag, an empty body passed to --force-merge preserves the existing body.
161    #[arg(
162        long,
163        default_value_t = false,
164        help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
165    )]
166    pub clear_body: bool,
167    /// Validate input and report planned actions without persisting.
168    #[arg(
169        long,
170        default_value_t = false,
171        help = "Validate input and report planned actions without persisting"
172    )]
173    pub dry_run: bool,
174    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
175    #[arg(long)]
176    pub session_id: Option<String>,
177    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
178    pub format: JsonOutputFormat,
179    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
180    pub json: bool,
181    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
182    pub db: Option<String>,
183    /// Maximum process RSS in MiB; abort if exceeded during embedding.
184    #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
185          help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
186    pub max_rss_mb: u64,
187    /// Emit a warning (but do not reject) when persisting an entity whose degree would
188    /// exceed this value after the upsert. Default 50. Set 0 to disable the check.
189    #[arg(long, default_value_t = 50, value_name = "N")]
190    pub max_entity_degree: u32,
191}
192
193#[derive(Deserialize, Default)]
194#[serde(deny_unknown_fields)]
195struct GraphInput {
196    #[serde(default)]
197    body: Option<String>,
198    #[serde(default)]
199    entities: Vec<NewEntity>,
200    #[serde(default)]
201    relationships: Vec<NewRelationship>,
202}
203
204fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
205    for rel in &mut graph.relationships {
206        rel.relation = crate::parsers::normalize_relation(&rel.relation);
207        if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
208            return Err(AppError::Validation(format!(
209                "{e} for relationship '{}' -> '{}'",
210                rel.source, rel.target
211            )));
212        }
213        crate::parsers::warn_if_non_canonical(&rel.relation);
214        if !(0.0..=1.0).contains(&rel.strength) {
215            return Err(AppError::Validation(format!(
216                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
217                rel.strength, rel.source, rel.target
218            )));
219        }
220    }
221
222    Ok(())
223}
224
225#[tracing::instrument(skip_all, level = "debug", name = "remember")]
226pub fn run(args: RememberArgs) -> Result<(), AppError> {
227    use crate::constants::*;
228
229    let inicio = std::time::Instant::now();
230    let _ = args.format;
231    tracing::debug!(target: "remember", name = %args.name, "persisting memory");
232    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
233
234    // Capture the original `--name` before normalization so the JSON response can
235    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
236    // an owned String because `args.name` is moved into the response below.
237    let original_name = args.name.clone();
238
239    // Auto-normalize to kebab-case before validation (P2-H).
240    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
241    // after truncation by a long filename ending in a hyphen.
242    let normalized_name = {
243        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
244        let trimmed = lower.trim_matches('-').to_string();
245        if trimmed != args.name {
246            tracing::warn!(target: "remember",
247                original = %args.name,
248                normalized = %trimmed,
249                "name auto-normalized to kebab-case"
250            );
251        }
252        trimmed
253    };
254    let name_was_normalized = normalized_name != original_name;
255
256    if normalized_name.is_empty() {
257        return Err(AppError::Validation(
258            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
259        ));
260    }
261    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
262        return Err(AppError::LimitExceeded(
263            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
264        ));
265    }
266
267    if normalized_name.starts_with("__") {
268        return Err(AppError::Validation(
269            crate::i18n::validation::reserved_name(),
270        ));
271    }
272
273    {
274        let slug_re = crate::constants::name_slug_regex();
275        if !slug_re.is_match(&normalized_name) {
276            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
277                &normalized_name,
278            )));
279        }
280    }
281
282    if let Some(ref desc) = args.description {
283        if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
284            return Err(AppError::Validation(
285                crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
286            ));
287        }
288    }
289
290    let mut raw_body = if let Some(b) = args.body {
291        b
292    } else if let Some(ref path) = args.body_file {
293        let file_size = std::fs::metadata(path).map_err(AppError::Io)?.len();
294        if file_size > MAX_MEMORY_BODY_LEN as u64 {
295            return Err(AppError::LimitExceeded(
296                crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
297            ));
298        }
299        match std::fs::read_to_string(path) {
300            Ok(s) => s,
301            Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
302                let bytes = std::fs::read(path).map_err(AppError::Io)?;
303                tracing::warn!(target: "remember", "body file contains invalid UTF-8; replacing invalid sequences");
304                String::from_utf8_lossy(&bytes).into_owned()
305            }
306            Err(e) => return Err(AppError::Io(e)),
307        }
308    } else if args.body_stdin || args.graph_stdin {
309        crate::stdin_helper::read_stdin_with_timeout(60)?
310    } else {
311        String::new()
312    };
313
314    let mut entities_provided_externally =
315        args.entities_file.is_some() || args.relationships_file.is_some();
316
317    let mut graph = GraphInput::default();
318    if let Some(path) = args.entities_file {
319        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
320        if file_size > MAX_MEMORY_BODY_LEN as u64 {
321            return Err(AppError::LimitExceeded(
322                crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
323            ));
324        }
325        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
326        graph.entities = serde_json::from_str(&content)?;
327    }
328    if let Some(path) = args.relationships_file {
329        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
330        if file_size > MAX_MEMORY_BODY_LEN as u64 {
331            return Err(AppError::LimitExceeded(
332                crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
333            ));
334        }
335        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
336        graph.relationships = serde_json::from_str(&content)?;
337    }
338    if args.graph_stdin {
339        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
340            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
341        })?;
342        raw_body = graph.body.take().unwrap_or_default();
343    }
344    if args.graph_stdin && !graph.entities.is_empty() {
345        entities_provided_externally = true;
346    }
347
348    if graph.entities.len() > max_entities_per_memory() {
349        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
350            max_entities_per_memory(),
351        )));
352    }
353    let mut relationships_truncated = false;
354    let rel_cap = max_relationships_per_memory();
355    if graph.relationships.len() > rel_cap {
356        tracing::warn!(target: "remember",
357            count = graph.relationships.len(),
358            cap = rel_cap,
359            "truncating relationships to cap"
360        );
361        graph.relationships.truncate(rel_cap);
362        relationships_truncated = true;
363    }
364    normalize_and_validate_graph_input(&mut graph)?;
365
366    if raw_body.len() > MAX_MEMORY_BODY_LEN {
367        return Err(AppError::LimitExceeded(
368            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
369        ));
370    }
371
372    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
373    // Without this check, empty embeddings would be persisted, breaking recall semantics.
374    // GAP-08: skip this guard when --force-merge without --clear-body; the existing body
375    // will be preserved from the database, so the effective body will not be empty.
376    let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
377    if !entities_provided_externally
378        && graph.entities.is_empty()
379        && raw_body.trim().is_empty()
380        && !body_will_be_preserved
381        && !args.clear_body
382    {
383        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
384    }
385
386    let metadata: serde_json::Value = if let Some(m) = args.metadata {
387        serde_json::from_str(&m)?
388    } else if let Some(path) = args.metadata_file {
389        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
390        if file_size > MAX_MEMORY_BODY_LEN as u64 {
391            return Err(AppError::LimitExceeded(
392                crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
393            ));
394        }
395        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
396        serde_json::from_str(&content)?
397    } else {
398        serde_json::json!({})
399    };
400
401    let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
402    let mut snippet: String = raw_body.chars().take(200).collect();
403
404    let paths = AppPaths::resolve(args.db.as_deref())?;
405    paths.ensure_dirs()?;
406
407    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
408    let mut extraction_method: Option<String> = None;
409    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
410    if args.enable_ner && args.skip_extraction {
411        return Err(AppError::Validation(
412            "--enable-ner and --skip-extraction are mutually exclusive; remove one".to_string(),
413        ));
414    }
415    if args.skip_extraction && !args.enable_ner {
416        // v1.0.74: revert to v1.0.45 hidden no-op behavior. The v1.0.67
417        // commit (9ddb17b) promoted this to a hard validation error, which
418        // broke the "kept as a hidden no-op for backwards compatibility"
419        // promise documented in CHANGELOG v1.0.45 and started failing
420        // 5+ CI jobs whose E2E tests use this flag to skip the
421        // GLiNER-ONNX model download in CI environments.
422        tracing::warn!(
423            "--skip-extraction is deprecated since v1.0.45 and has no effect (NER is disabled by default); remove this flag to silence the warning"
424        );
425    }
426    let gliner_variant: crate::extraction::GlinerVariant = match args.gliner_variant.as_str() {
427        "int8" => crate::extraction::GlinerVariant::Int8,
428        _ => crate::extraction::GlinerVariant::Fp32,
429    };
430    if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
431        match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
432            Ok(extracted) => {
433                // v1.0.76: ExtractionResult is URL + entity + elapsed_ms;
434                // the LLM ExtractionBackend returns typed relationships
435                // separately. The default build is URL-only extraction.
436                extraction_method = Some("url-regex".to_string());
437                extracted_urls = extracted.urls;
438                // Convert ExtractedEntity → NewEntity (no offsets,
439                // type defaults to Concept).
440                graph.entities = extracted
441                    .entities
442                    .into_iter()
443                    .map(|e| NewEntity {
444                        name: e.name,
445                        entity_type: crate::entity_type::EntityType::Concept,
446                        description: None,
447                    })
448                    .collect();
449                graph.relationships.clear();
450                relationships_truncated = false;
451
452                if graph.entities.len() > max_entities_per_memory() {
453                    graph.entities.truncate(max_entities_per_memory());
454                }
455                if graph.relationships.len() > max_relationships_per_memory() {
456                    relationships_truncated = true;
457                    graph.relationships.truncate(max_relationships_per_memory());
458                }
459                normalize_and_validate_graph_input(&mut graph)?;
460            }
461            Err(e) => {
462                tracing::warn!(target: "remember", error = %e, "auto-extraction failed, graceful degradation");
463                extraction_method = Some("none:extraction-failed".to_string());
464            }
465        }
466    }
467
468    let mut conn = open_rw(&paths.db)?;
469    ensure_schema(&mut conn)?;
470
471    // --dry-run: emit planned action without any DB writes and return.
472    if args.dry_run {
473        let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
474        let planned_action = if existing.is_some() && args.force_merge {
475            "would_update"
476        } else {
477            "would_create"
478        };
479        output::emit_json(&serde_json::json!({
480            "dry_run": true,
481            "name": normalized_name,
482            "namespace": namespace,
483            "planned_action": planned_action,
484        }))?;
485        return Ok(());
486    }
487
488    {
489        use crate::constants::MAX_NAMESPACES_ACTIVE;
490        let active_count: u32 = conn.query_row(
491            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
492            [],
493            |r| r.get::<_, i64>(0).map(|v| v as u32),
494        )?;
495        let ns_exists: bool = conn.query_row(
496            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
497            rusqlite::params![namespace],
498            |r| r.get::<_, i64>(0).map(|v| v > 0),
499        )?;
500        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
501            return Err(AppError::NamespaceError(format!(
502                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
503            )));
504        }
505    }
506
507    // M7: detect soft-deleted memory before the standard duplicate check.
508    if let Some((sd_id, true)) =
509        memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
510    {
511        if args.force_merge {
512            memories::clear_deleted_at(&conn, sd_id)?;
513        } else {
514            return Err(AppError::Duplicate(
515                errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
516            ));
517        }
518    }
519
520    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
521    if existing_memory.is_some() && !args.force_merge {
522        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
523            &normalized_name,
524            &namespace,
525        )));
526    }
527
528    // GAP-10: resolve type and description.
529    // For CREATE path (new memory): both are required.
530    // For UPDATE path (--force-merge on existing memory): inherit from existing row when omitted.
531    let (resolved_type, resolved_description) = if existing_memory.is_none() {
532        // CREATE path — both fields are mandatory.
533        let t = args.r#type.ok_or_else(|| {
534            AppError::Validation(
535                "--type and --description are required when creating a new memory".to_string(),
536            )
537        })?;
538        let d = args.description.clone().ok_or_else(|| {
539            AppError::Validation(
540                "--type and --description are required when creating a new memory".to_string(),
541            )
542        })?;
543        (t.as_str().to_string(), d)
544    } else {
545        // UPDATE path (--force-merge) — inherit missing fields from stored row.
546        let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
547            .ok_or_else(|| {
548                AppError::NotFound(format!(
549                    "memory '{normalized_name}' not found in namespace '{namespace}'"
550                ))
551            })?;
552        let t = args
553            .r#type
554            .map(|v| v.as_str().to_string())
555            .unwrap_or_else(|| existing_row.memory_type.clone());
556        let d = args
557            .description
558            .clone()
559            .unwrap_or_else(|| existing_row.description.clone());
560        (t, d)
561    };
562
563    // GAP-08/GAP-09: protect existing body from accidental destruction during --force-merge.
564    // When the caller omits a body (or passes an empty one) without --clear-body, silently
565    // preserve the existing body from the database.  This prevents a common scripting mistake
566    // where a cron job updates metadata fields and inadvertently wipes the stored content.
567    if body_will_be_preserved {
568        if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
569            if !existing_row.body.is_empty() {
570                tracing::debug!(target: "remember",
571                    name = %normalized_name,
572                    "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
573                );
574                raw_body = existing_row.body;
575                body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
576                snippet = raw_body.chars().take(200).collect();
577            }
578        }
579    }
580
581    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
582
583    output::emit_progress_i18n(
584        &format!(
585            "Remember stage: validated input; available memory {} MB",
586            crate::memory_guard::available_memory_mb()
587        ),
588        &format!(
589            "Stage remember: input validated; available memory {} MB",
590            crate::memory_guard::available_memory_mb()
591        ),
592    );
593
594    let model_max_length = crate::tokenizer::get_model_max_length();
595    let total_passage_tokens = crate::tokenizer::count_passage_tokens(&raw_body)?;
596    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body);
597    let chunks_created = chunks_info.len();
598    // For single-chunk bodies the memory row itself stores the content and no
599    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
600    // bodies every chunk is persisted via `insert_chunk_slices`.
601    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
602
603    output::emit_progress_i18n(
604        &format!(
605            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
606            chunks_created,
607            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
608        ),
609        &format!(
610            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
611            chunks_created,
612            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
613        ),
614    );
615
616    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
617        return Err(AppError::LimitExceeded(format!(
618            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
619            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
620        )));
621    }
622
623    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
624    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
625
626    let embedding = if chunks_info.len() == 1 {
627        crate::embedder::embed_passage_local(&paths.models, &raw_body)?
628    } else {
629        let chunk_texts: Vec<&str> = chunks_info
630            .iter()
631            .map(|c| chunking::chunk_text(&raw_body, c))
632            .collect();
633        output::emit_progress_i18n(
634            &format!(
635                "Embedding {} chunks serially to keep memory bounded...",
636                chunks_info.len()
637            ),
638            &format!(
639                "Embedding {} chunks serially to keep memory bounded...",
640                chunks_info.len()
641            ),
642        );
643        let embed_cap = chunk_texts.len();
644        let mut chunk_embeddings = Vec::new();
645        chunk_embeddings.try_reserve(embed_cap).map_err(|_| {
646            AppError::LimitExceeded(format!(
647                "allocation of {embed_cap} chunk embeddings would exceed available memory"
648            ))
649        })?;
650        for chunk_text in &chunk_texts {
651            if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
652                if rss > args.max_rss_mb {
653                    tracing::error!(target: "remember",
654                        rss_mb = rss,
655                        max_rss_mb = args.max_rss_mb,
656                        "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
657                    );
658                    return Err(AppError::LowMemory {
659                        available_mb: crate::memory_guard::available_memory_mb(),
660                        required_mb: args.max_rss_mb,
661                    });
662                }
663            }
664            chunk_embeddings.push(crate::embedder::embed_passage_local(
665                &paths.models,
666                chunk_text,
667            )?);
668        }
669        output::emit_progress_i18n(
670            &format!(
671                "Remember stage: chunk embeddings complete; process RSS {} MB",
672                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
673            ),
674            &format!(
675                "Stage remember: chunk embeddings completed; process RSS {} MB",
676                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
677            ),
678        );
679        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
680        chunk_embeddings_cache = Some(chunk_embeddings);
681        aggregated
682    };
683    let body_for_storage = raw_body;
684
685    let memory_type = resolved_type.as_str();
686    let new_memory = NewMemory {
687        namespace: namespace.clone(),
688        name: normalized_name.clone(),
689        memory_type: memory_type.to_string(),
690        description: resolved_description.clone(),
691        body: body_for_storage,
692        body_hash: body_hash.clone(),
693        session_id: args.session_id.clone(),
694        source: "agent".to_string(),
695        metadata,
696    };
697
698    let mut warnings = Vec::with_capacity(4);
699    let mut entities_persisted = 0usize;
700    let mut relationships_persisted = 0usize;
701
702    let graph_entity_embeddings = graph
703        .entities
704        .iter()
705        .map(|entity| {
706            let entity_text = match &entity.description {
707                Some(desc) => format!("{} {}", entity.name, desc),
708                None => entity.name.clone(),
709            };
710            crate::embedder::embed_passage_local(&paths.models, &entity_text)
711        })
712        .collect::<Result<Vec<_>, _>>()?;
713
714    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
715
716    let mut skip_reindex = false;
717    let (memory_id, action, version) = match existing_memory {
718        Some((existing_id, _updated_at, _current_version)) => {
719            if let Some(hash_id) = duplicate_hash_id {
720                if hash_id != existing_id {
721                    warnings.push(format!(
722                        "identical body already exists as memory id {hash_id}"
723                    ));
724                }
725            }
726
727            // C1 fix: capture old values for FTS5 sync before update
728            let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
729                .query_row(
730                    "SELECT name, description, body FROM memories WHERE id = ?1",
731                    rusqlite::params![existing_id],
732                    |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
733                )?;
734
735            // G15: skip re-indexing when body hash matches (common in --force-merge loops)
736            let existing_body_hash: Option<String> = tx
737                .query_row(
738                    "SELECT body_hash FROM memories WHERE id = ?1",
739                    rusqlite::params![existing_id],
740                    |r| r.get(0),
741                )
742                .ok();
743            let body_unchanged = existing_body_hash.as_deref() == Some(&body_hash);
744            skip_reindex = body_unchanged;
745            if !body_unchanged {
746                storage_chunks::delete_chunks(&tx, existing_id)?;
747            }
748
749            let next_v = versions::next_version(&tx, existing_id)?;
750            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
751
752            // C1 fix: sync FTS5 external-content index after update
753            // (trg_fts_au trigger is absent by design due to sqlite-vec conflict)
754            memories::sync_fts_after_update(
755                &tx,
756                existing_id,
757                &old_fts_name,
758                &old_fts_desc,
759                &old_fts_body,
760                &normalized_name,
761                &resolved_description,
762                &new_memory.body,
763            )?;
764
765            versions::insert_version(
766                &tx,
767                existing_id,
768                next_v,
769                &normalized_name,
770                memory_type,
771                &resolved_description,
772                &new_memory.body,
773                &serde_json::to_string(&new_memory.metadata)?,
774                None,
775                "edit",
776            )?;
777            if !body_unchanged {
778                memories::upsert_vec(
779                    &tx,
780                    existing_id,
781                    &namespace,
782                    memory_type,
783                    &embedding,
784                    &normalized_name,
785                    &snippet,
786                )?;
787            }
788            (existing_id, "updated".to_string(), next_v)
789        }
790        None => {
791            if let Some(hash_id) = duplicate_hash_id {
792                warnings.push(format!(
793                    "identical body already exists as memory id {hash_id}"
794                ));
795            }
796            let id = memories::insert(&tx, &new_memory)?;
797            versions::insert_version(
798                &tx,
799                id,
800                1,
801                &normalized_name,
802                memory_type,
803                &resolved_description,
804                &new_memory.body,
805                &serde_json::to_string(&new_memory.metadata)?,
806                None,
807                "create",
808            )?;
809            memories::upsert_vec(
810                &tx,
811                id,
812                &namespace,
813                memory_type,
814                &embedding,
815                &normalized_name,
816                &snippet,
817            )?;
818            (id, "created".to_string(), 1)
819        }
820    };
821
822    if chunks_info.len() > 1 && !skip_reindex {
823        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
824
825        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
826            AppError::Internal(anyhow::anyhow!(
827                "chunk embeddings cache missing in multi-chunk remember path"
828            ))
829        })?;
830
831        for (i, emb) in chunk_embeddings.iter().enumerate() {
832            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
833        }
834        output::emit_progress_i18n(
835            &format!(
836                "Remember stage: persisted chunk vectors; process RSS {} MB",
837                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
838            ),
839            &format!(
840                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
841                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
842            ),
843        );
844    }
845
846    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
847        for entity in &graph.entities {
848            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
849            let entity_embedding = &graph_entity_embeddings[entities_persisted];
850            entities::upsert_entity_vec(
851                &tx,
852                entity_id,
853                &namespace,
854                entity.entity_type,
855                entity_embedding,
856                &entity.name,
857            )?;
858            entities::link_memory_entity(&tx, memory_id, entity_id)?;
859            entities::increment_degree(&tx, entity_id)?;
860            // GAP-17: warn when entity degree exceeds the configured cap.
861            if args.max_entity_degree > 0 {
862                let cap = args.max_entity_degree as i64;
863                let degree: i64 = tx.query_row(
864                    "SELECT degree FROM entities WHERE id = ?1",
865                    rusqlite::params![entity_id],
866                    |r| r.get(0),
867                )?;
868                if degree > cap {
869                    tracing::warn!(target: "remember",
870                        entity = %entity.name,
871                        degree = degree,
872                        cap = cap,
873                        "entity degree cap exceeded"
874                    );
875                }
876            }
877            entities_persisted += 1;
878        }
879        let entity_types: std::collections::HashMap<&str, EntityType> = graph
880            .entities
881            .iter()
882            .map(|entity| (entity.name.as_str(), entity.entity_type))
883            .collect();
884
885        for rel in &graph.relationships {
886            let source_entity = NewEntity {
887                name: rel.source.clone(),
888                entity_type: entity_types
889                    .get(rel.source.as_str())
890                    .copied()
891                    .unwrap_or(EntityType::Concept),
892                description: None,
893            };
894            let target_entity = NewEntity {
895                name: rel.target.clone(),
896                entity_type: entity_types
897                    .get(rel.target.as_str())
898                    .copied()
899                    .unwrap_or(EntityType::Concept),
900                description: None,
901            };
902            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
903            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
904            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
905            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
906            relationships_persisted += 1;
907        }
908    }
909    tx.commit()?;
910
911    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
912    // Failures do not propagate — non-critical path with graceful degradation.
913    let urls_persisted = if !extracted_urls.is_empty() {
914        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
915            .into_iter()
916            .map(|u| storage_urls::MemoryUrl {
917                url: u.url,
918                offset: Some(u.start as i64),
919            })
920            .collect();
921        storage_urls::insert_urls(&conn, memory_id, &url_entries)
922    } else {
923        0
924    };
925
926    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
927
928    let created_at_epoch = chrono::Utc::now().timestamp();
929    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
930
931    output::emit_json(&RememberResponse {
932        memory_id,
933        // Persist the normalized (kebab-case) slug as `name` since that is the
934        // storage key. The original input is exposed via `original_name` only
935        // when normalization actually changed something (B_4 in v1.0.32).
936        name: normalized_name.clone(),
937        namespace,
938        action: action.clone(),
939        operation: action,
940        version,
941        entities_persisted,
942        relationships_persisted,
943        relationships_truncated,
944        chunks_created,
945        chunks_persisted,
946        urls_persisted,
947        extraction_method,
948        merged_into_memory_id: None,
949        warnings,
950        created_at: created_at_epoch,
951        created_at_iso,
952        elapsed_ms: inicio.elapsed().as_millis() as u64,
953        name_was_normalized,
954        original_name: name_was_normalized.then_some(original_name),
955    })?;
956
957    Ok(())
958}
959
960#[cfg(test)]
961mod tests {
962    use super::compute_chunks_persisted;
963    use crate::output::RememberResponse;
964
965    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
966    #[test]
967    fn chunks_persisted_zero_for_zero_chunks() {
968        assert_eq!(compute_chunks_persisted(0), 0);
969    }
970
971    #[test]
972    fn chunks_persisted_zero_for_single_chunk_body() {
973        // Single-chunk bodies live in the memories row itself; no row is
974        // appended to memory_chunks. This is the documented contract.
975        assert_eq!(compute_chunks_persisted(1), 0);
976    }
977
978    #[test]
979    fn chunks_persisted_equals_count_for_multi_chunk_body() {
980        // Every chunk above the first triggers a row in memory_chunks.
981        assert_eq!(compute_chunks_persisted(2), 2);
982        assert_eq!(compute_chunks_persisted(7), 7);
983        assert_eq!(compute_chunks_persisted(64), 64);
984    }
985
986    #[test]
987    fn remember_response_serializes_required_fields() {
988        let resp = RememberResponse {
989            memory_id: 42,
990            name: "minha-mem".to_string(),
991            namespace: "global".to_string(),
992            action: "created".to_string(),
993            operation: "created".to_string(),
994            version: 1,
995            entities_persisted: 0,
996            relationships_persisted: 0,
997            relationships_truncated: false,
998            chunks_created: 1,
999            chunks_persisted: 0,
1000            urls_persisted: 0,
1001            extraction_method: None,
1002            merged_into_memory_id: None,
1003            warnings: vec![],
1004            created_at: 1_705_320_000,
1005            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
1006            elapsed_ms: 55,
1007            name_was_normalized: false,
1008            original_name: None,
1009        };
1010
1011        let json = serde_json::to_value(&resp).expect("serialization failed");
1012        assert_eq!(json["memory_id"], 42);
1013        assert_eq!(json["action"], "created");
1014        assert_eq!(json["operation"], "created");
1015        assert_eq!(json["version"], 1);
1016        assert_eq!(json["elapsed_ms"], 55u64);
1017        assert!(json["warnings"].is_array());
1018        assert!(json["merged_into_memory_id"].is_null());
1019    }
1020
1021    #[test]
1022    fn remember_response_action_e_operation_sao_aliases() {
1023        let resp = RememberResponse {
1024            memory_id: 1,
1025            name: "mem".to_string(),
1026            namespace: "global".to_string(),
1027            action: "updated".to_string(),
1028            operation: "updated".to_string(),
1029            version: 2,
1030            entities_persisted: 3,
1031            relationships_persisted: 1,
1032            relationships_truncated: false,
1033            extraction_method: None,
1034            chunks_created: 2,
1035            chunks_persisted: 2,
1036            urls_persisted: 0,
1037            merged_into_memory_id: None,
1038            warnings: vec![],
1039            created_at: 0,
1040            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1041            elapsed_ms: 0,
1042            name_was_normalized: false,
1043            original_name: None,
1044        };
1045
1046        let json = serde_json::to_value(&resp).expect("serialization failed");
1047        assert_eq!(
1048            json["action"], json["operation"],
1049            "action e operation devem ser iguais"
1050        );
1051        assert_eq!(json["entities_persisted"], 3);
1052        assert_eq!(json["relationships_persisted"], 1);
1053        assert_eq!(json["chunks_created"], 2);
1054    }
1055
1056    #[test]
1057    fn remember_response_warnings_lista_mensagens() {
1058        let resp = RememberResponse {
1059            memory_id: 5,
1060            name: "dup-mem".to_string(),
1061            namespace: "global".to_string(),
1062            action: "created".to_string(),
1063            operation: "created".to_string(),
1064            version: 1,
1065            entities_persisted: 0,
1066            extraction_method: None,
1067            relationships_persisted: 0,
1068            relationships_truncated: false,
1069            chunks_created: 1,
1070            chunks_persisted: 0,
1071            urls_persisted: 0,
1072            merged_into_memory_id: None,
1073            warnings: vec!["identical body already exists as memory id 3".to_string()],
1074            created_at: 0,
1075            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1076            elapsed_ms: 10,
1077            name_was_normalized: false,
1078            original_name: None,
1079        };
1080
1081        let json = serde_json::to_value(&resp).expect("serialization failed");
1082        let warnings = json["warnings"]
1083            .as_array()
1084            .expect("warnings deve ser array");
1085        assert_eq!(warnings.len(), 1);
1086        assert!(warnings[0].as_str().unwrap().contains("identical body"));
1087    }
1088
1089    #[test]
1090    fn invalid_name_reserved_prefix_returns_validation_error() {
1091        use crate::errors::AppError;
1092        // Validates the rejection logic for names with the "__" prefix directly
1093        let nome = "__reservado";
1094        let resultado: Result<(), AppError> = if nome.starts_with("__") {
1095            Err(AppError::Validation(
1096                crate::i18n::validation::reserved_name(),
1097            ))
1098        } else {
1099            Ok(())
1100        };
1101        assert!(resultado.is_err());
1102        if let Err(AppError::Validation(msg)) = resultado {
1103            assert!(!msg.is_empty());
1104        }
1105    }
1106
1107    #[test]
1108    fn name_too_long_returns_validation_error() {
1109        use crate::errors::AppError;
1110        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1111        let resultado: Result<(), AppError> =
1112            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1113                Err(AppError::Validation(crate::i18n::validation::name_length(
1114                    crate::constants::MAX_MEMORY_NAME_LEN,
1115                )))
1116            } else {
1117                Ok(())
1118            };
1119        assert!(resultado.is_err());
1120    }
1121
1122    #[test]
1123    fn remember_response_merged_into_memory_id_some_serializes_integer() {
1124        let resp = RememberResponse {
1125            memory_id: 10,
1126            name: "mem-mergeada".to_string(),
1127            namespace: "global".to_string(),
1128            action: "updated".to_string(),
1129            operation: "updated".to_string(),
1130            version: 3,
1131            extraction_method: None,
1132            entities_persisted: 0,
1133            relationships_persisted: 0,
1134            relationships_truncated: false,
1135            chunks_created: 1,
1136            chunks_persisted: 0,
1137            urls_persisted: 0,
1138            merged_into_memory_id: Some(7),
1139            warnings: vec![],
1140            created_at: 0,
1141            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1142            elapsed_ms: 0,
1143            name_was_normalized: false,
1144            original_name: None,
1145        };
1146
1147        let json = serde_json::to_value(&resp).expect("serialization failed");
1148        assert_eq!(json["merged_into_memory_id"], 7);
1149    }
1150
1151    #[test]
1152    fn remember_response_urls_persisted_serializes_field() {
1153        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
1154        let resp = RememberResponse {
1155            memory_id: 3,
1156            name: "mem-com-urls".to_string(),
1157            namespace: "global".to_string(),
1158            action: "created".to_string(),
1159            operation: "created".to_string(),
1160            version: 1,
1161            entities_persisted: 0,
1162            relationships_persisted: 0,
1163            relationships_truncated: false,
1164            chunks_created: 1,
1165            chunks_persisted: 0,
1166            urls_persisted: 3,
1167            extraction_method: Some("regex-only".to_string()),
1168            merged_into_memory_id: None,
1169            warnings: vec![],
1170            created_at: 0,
1171            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1172            elapsed_ms: 0,
1173            name_was_normalized: false,
1174            original_name: None,
1175        };
1176        let json = serde_json::to_value(&resp).expect("serialization failed");
1177        assert_eq!(json["urls_persisted"], 3);
1178    }
1179
1180    #[test]
1181    fn empty_name_after_normalization_returns_specific_message() {
1182        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
1183        // must produce a distinct error message, not the "too long" message.
1184        use crate::errors::AppError;
1185        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1186        let normalized = normalized.trim_matches('-').to_string();
1187        let resultado: Result<(), AppError> = if normalized.is_empty() {
1188            Err(AppError::Validation(
1189                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1190            ))
1191        } else {
1192            Ok(())
1193        };
1194        assert!(resultado.is_err());
1195        if let Err(AppError::Validation(msg)) = resultado {
1196            assert!(
1197                msg.contains("empty after normalization"),
1198                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1199            );
1200        }
1201    }
1202
1203    #[test]
1204    fn name_only_underscores_after_normalization_returns_specific_message() {
1205        // P0-4 regression: name consisting only of underscores normalizes to empty string.
1206        use crate::errors::AppError;
1207        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1208        let normalized = normalized.trim_matches('-').to_string();
1209        assert!(
1210            normalized.is_empty(),
1211            "underscores devem normalizar para string vazia"
1212        );
1213        let resultado: Result<(), AppError> = if normalized.is_empty() {
1214            Err(AppError::Validation(
1215                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1216            ))
1217        } else {
1218            Ok(())
1219        };
1220        assert!(resultado.is_err());
1221        if let Err(AppError::Validation(msg)) = resultado {
1222            assert!(
1223                msg.contains("empty after normalization"),
1224                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1225            );
1226        }
1227    }
1228
1229    #[test]
1230    fn remember_response_relationships_truncated_serializes_field() {
1231        // P1-D: garante que relationships_truncated aparece no JSON como bool.
1232        let resp_false = RememberResponse {
1233            memory_id: 1,
1234            name: "test".to_string(),
1235            namespace: "global".to_string(),
1236            action: "created".to_string(),
1237            operation: "created".to_string(),
1238            version: 1,
1239            entities_persisted: 2,
1240            relationships_persisted: 1,
1241            relationships_truncated: false,
1242            chunks_created: 1,
1243            chunks_persisted: 0,
1244            urls_persisted: 0,
1245            extraction_method: None,
1246            merged_into_memory_id: None,
1247            warnings: vec![],
1248            created_at: 0,
1249            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1250            elapsed_ms: 0,
1251            name_was_normalized: false,
1252            original_name: None,
1253        };
1254        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1255        assert_eq!(json_false["relationships_truncated"], false);
1256
1257        let resp_true = RememberResponse {
1258            relationships_truncated: true,
1259            ..resp_false
1260        };
1261        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1262        assert_eq!(json_true["relationships_truncated"], true);
1263    }
1264
1265    // GAP-08: body-preservation predicate tests.
1266    // Verifies the decision logic that determines whether an existing body should
1267    // be kept instead of overwritten with an empty incoming body during --force-merge.
1268
1269    /// Returns `true` when the existing body should be preserved.
1270    ///
1271    /// Mirrors the `body_will_be_preserved` expression in `run()` so the logic
1272    /// is testable without a real database connection.
1273    fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1274        force_merge && raw_body_is_empty && !clear_body
1275    }
1276
1277    #[test]
1278    fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1279        // Caller passes no body with --force-merge but without --clear-body.
1280        // The existing body in the DB must be kept.
1281        assert!(
1282            should_preserve_body(true, true, false),
1283            "empty body + force-merge + no clear-body should trigger preservation"
1284        );
1285    }
1286
1287    #[test]
1288    fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1289        // Caller explicitly passes --clear-body; intentional wipe is honoured.
1290        assert!(
1291            !should_preserve_body(true, true, true),
1292            "--clear-body must bypass preservation"
1293        );
1294    }
1295
1296    #[test]
1297    fn gap08_non_empty_body_force_merge_does_not_preserve() {
1298        // Caller provides a real body; it must overwrite the existing one.
1299        assert!(
1300            !should_preserve_body(true, false, false),
1301            "non-empty body must overwrite, not preserve"
1302        );
1303    }
1304
1305    #[test]
1306    fn gap08_empty_body_no_force_merge_does_not_preserve() {
1307        // Without --force-merge the path is a fresh create; no preservation needed.
1308        assert!(
1309            !should_preserve_body(false, true, false),
1310            "no --force-merge means no preservation logic applies"
1311        );
1312    }
1313}