Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17#[derive(clap::Args)]
18#[command(after_long_help = "EXAMPLES:\n  \
19    # Create a memory with inline body\n  \
20    sqlite-graphrag remember --name design-auth --type decision \\\n    \
21    --description \"auth design\" --body \"JWT for stateless auth\"\n\n  \
22    # Create with curated graph via --graph-stdin\n  \
23    echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
24    sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n  \
25    # Enable automatic URL extraction with --graph-stdin (URL-regex only since v1.0.79)\n  \
26    echo '{\"body\":\"See https://docs.rs ...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
27    sqlite-graphrag remember --name url-test --type note --description \"test\" \\\n    \
28    --graph-stdin --enable-ner\n\n  \
29    # Idempotent upsert with --force-merge\n  \
30    sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n    \
31    --body \"new content\" --force-merge\n\n\
32NOTE:\n  \
33    remember does NOT accept positional arguments.\n  \
34    Use --body \"text\" for inline content\n  \
35    Use --body-file path for file content\n  \
36    Use --body-stdin for piped content\n  \
37    Use --graph-stdin for JSON with entities and relationships\n\n\
38ENTITY TYPES (for --graph-stdin entities, NOT memory --type):\n  \
39    concept, tool, person, file, project, decision, incident,\n  \
40    organization, location, date, dashboard, issue_tracker, memory\n  \
41    WARNING: reference, skill, document, note, user, feedback are\n  \
42    MEMORY types only — NOT valid for entities.\n  \
43    Mapping: reference→concept, document→file, user→person")]
44pub struct RememberArgs {
45    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
46    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
47    #[arg(long)]
48    pub name: String,
49    #[arg(
50        long,
51        value_enum,
52        long_help = "Memory kind stored in `memories.type`. Required when creating a new memory. Optional with --force-merge: if omitted the existing memory type is inherited. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
53    )]
54    pub r#type: Option<MemoryType>,
55    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
56    /// Required when creating a new memory. Optional with --force-merge: if omitted the existing description is inherited.
57    ///
58    /// GAP-SG-33: `allow_hyphen_values` lets a description that begins with a
59    /// hyphen (e.g. `"- bullet"`) be accepted as a value instead of being
60    /// mistaken for a flag.
61    #[arg(long, allow_hyphen_values = true)]
62    pub description: Option<String>,
63    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
64    /// Maximum 512000 bytes; rejected if empty without an external graph.
65    ///
66    /// GAP-SG-33: `allow_hyphen_values` lets a body that begins with a hyphen
67    /// (e.g. a markdown bullet list) be accepted as a value.
68    #[arg(
69        long,
70        allow_hyphen_values = true,
71        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
72        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
73    )]
74    pub body: Option<String>,
75    #[arg(
76        long,
77        help = "Read body from a file instead of --body",
78        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
79    )]
80    pub body_file: Option<std::path::PathBuf>,
81    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
82    /// Mutually exclusive with --body, --body-file, --graph-stdin.
83    #[arg(
84        long,
85        conflicts_with_all = ["body", "body_file", "graph_stdin"]
86    )]
87    pub body_stdin: bool,
88    #[arg(
89        long,
90        help = "JSON file containing entities to associate with this memory"
91    )]
92    pub entities_file: Option<std::path::PathBuf>,
93    #[arg(
94        long,
95        help = "JSON file containing relationships to associate with this memory"
96    )]
97    pub relationships_file: Option<std::path::PathBuf>,
98    #[arg(
99        long,
100        help = "Read graph JSON (body + entities + relationships) from stdin",
101        conflicts_with_all = [
102            "body",
103            "body_file",
104            "body_stdin",
105            "entities_file",
106            "relationships_file",
107            "graph_file"
108        ]
109    )]
110    pub graph_stdin: bool,
111    /// GAP-SG-30: read graph JSON (`{body, entities, relationships}`) from a
112    /// FILE instead of stdin, so a curated graph can combine with a body
113    /// supplied via --body / --body-file / --body-stdin (which previously
114    /// conflicted with --graph-stdin over the single stdin). The file's `body`
115    /// field is used only when no other body source is given; otherwise the
116    /// body source wins and only the file's entities/relationships are applied.
117    #[arg(
118        long,
119        value_name = "PATH",
120        help = "Read graph JSON (body + entities + relationships) from a file (combines with --body/--body-file/--body-stdin)",
121        conflicts_with_all = ["graph_stdin", "entities_file", "relationships_file"]
122    )]
123    pub graph_file: Option<std::path::PathBuf>,
124    #[arg(
125        long,
126        help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
127    )]
128    pub namespace: Option<String>,
129    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
130    #[arg(long)]
131    pub metadata: Option<String>,
132    #[arg(long, help = "JSON file containing metadata key-value pairs")]
133    pub metadata_file: Option<std::path::PathBuf>,
134    #[arg(long)]
135    pub force_merge: bool,
136    #[arg(
137        long,
138        value_name = "EPOCH_OR_RFC3339",
139        value_parser = crate::parsers::parse_expected_updated_at,
140        long_help = "Optimistic lock: reject if updated_at does not match. \
141Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
142    )]
143    pub expected_updated_at: Option<i64>,
144    #[arg(
145        long,
146        env = "SQLITE_GRAPHRAG_ENABLE_NER",
147        value_parser = crate::parsers::parse_bool_flexible,
148        action = clap::ArgAction::Set,
149        num_args = 0..=1,
150        default_missing_value = "true",
151        default_value = "false",
152        help = "Enable automatic URL-regex extraction from body (the GLiNER NER pipeline was removed in v1.0.79)"
153    )]
154    pub enable_ner: bool,
155    #[arg(
156        long,
157        env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
158        default_value = "fp32",
159        help = "DEPRECATED: no effect since v1.0.79 (the GLiNER pipeline was removed); accepted for compatibility only"
160    )]
161    pub gliner_variant: String,
162    #[arg(long, hide = true)]
163    pub skip_extraction: bool,
164    /// Explicitly clear the body content (set to empty string). Required to distinguish
165    /// intentional body clearing from accidental omission during --force-merge.
166    /// Without this flag, an empty body passed to --force-merge preserves the existing body.
167    #[arg(
168        long,
169        default_value_t = false,
170        help = "Explicitly clear body content during --force-merge (without this flag, an empty body is ignored and the existing body is kept)"
171    )]
172    pub clear_body: bool,
173    /// Validate input and report planned actions without persisting.
174    #[arg(
175        long,
176        default_value_t = false,
177        help = "Validate input and report planned actions without persisting"
178    )]
179    pub dry_run: bool,
180    /// GAP-SG-37: reject (instead of silently normalizing) when the supplied
181    /// --name is not already canonical kebab-case. Use this when the literal
182    /// name matters and a silent transform would surprise downstream lookups.
183    #[arg(
184        long,
185        default_value_t = false,
186        help = "Reject the write if --name would be normalized to kebab-case (preserve-name guard)"
187    )]
188    pub strict_name: bool,
189    /// GAP-SG-51: with --force-merge, REPLACE the memory's entity/relationship
190    /// bindings with the supplied set instead of merging additively. Combined
191    /// with an empty `entities`/`relationships` payload this clears all bindings
192    /// without deleting the memory.
193    #[arg(
194        long,
195        default_value_t = false,
196        help = "With --force-merge, replace (not merge) the memory's graph bindings; empty entities clears them"
197    )]
198    pub replace_graph: bool,
199    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
200    #[arg(long)]
201    pub session_id: Option<String>,
202    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
203    pub format: JsonOutputFormat,
204    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
205    pub json: bool,
206    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
207    pub db: Option<String>,
208    /// Maximum process RSS in MiB; abort if exceeded during embedding.
209    #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
210          help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
211    pub max_rss_mb: u64,
212    /// G42/S3 (v1.0.79): maximum simultaneous LLM embedding subprocesses.
213    /// The effective value is further bounded by CPU count and available
214    /// RAM (permits = min(N, cpus, ram_livre*0.5/350MB), clamp [1, 32]).
215    #[arg(long, default_value_t = 4, value_name = "N",
216          value_parser = clap::value_parser!(u64).range(1..=32),
217          help = "Maximum simultaneous LLM embedding subprocesses (default: 4, clamp [1,32])")]
218    pub llm_parallelism: u64,
219}
220
221#[derive(Deserialize, Default)]
222#[serde(deny_unknown_fields)]
223struct GraphInput {
224    #[serde(default)]
225    body: Option<String>,
226    #[serde(default)]
227    entities: Vec<NewEntity>,
228    #[serde(default)]
229    relationships: Vec<NewRelationship>,
230}
231
232fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
233    for rel in &mut graph.relationships {
234        rel.relation = crate::parsers::normalize_relation(&rel.relation);
235        if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
236            return Err(AppError::Validation(format!(
237                "{e} for relationship '{}' -> '{}'",
238                rel.source, rel.target
239            )));
240        }
241        crate::parsers::warn_if_non_canonical(&rel.relation);
242        if !(0.0..=1.0).contains(&rel.strength) {
243            return Err(AppError::Validation(format!(
244                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
245                rel.strength, rel.source, rel.target
246            )));
247        }
248    }
249
250    Ok(())
251}
252
253#[tracing::instrument(skip_all, level = "debug", name = "remember")]
254pub fn run(
255    args: RememberArgs,
256    llm_backend: crate::cli::LlmBackendChoice,
257    embedding_backend: crate::cli::EmbeddingBackendChoice,
258) -> Result<(), AppError> {
259    use crate::constants::*;
260
261    let inicio = std::time::Instant::now();
262    let _ = args.format;
263    tracing::debug!(target: "remember", name = %args.name, "persisting memory");
264    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
265
266    // Capture the original `--name` before normalization so the JSON response can
267    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
268    // an owned String because `args.name` is moved into the response below.
269    let original_name = args.name.clone();
270
271    // Auto-normalize to kebab-case before validation (P2-H).
272    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
273    // after truncation by a long filename ending in a hyphen.
274    let normalized_name = {
275        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
276        let trimmed = lower.trim_matches('-').to_string();
277        if trimmed != args.name {
278            tracing::warn!(target: "remember",
279                original = %args.name,
280                normalized = %trimmed,
281                "name auto-normalized to kebab-case"
282            );
283        }
284        trimmed
285    };
286    let name_was_normalized = normalized_name != original_name;
287
288    // GAP-SG-37: when --strict-name is set, refuse to silently rewrite the name.
289    // The operator gets the canonical form so they can re-submit it explicitly.
290    if args.strict_name && name_was_normalized {
291        return Err(AppError::Validation(format!(
292            "--strict-name is set but '{original_name}' is not canonical kebab-case; \
293             re-run with --name '{normalized_name}' (or drop --strict-name to allow auto-normalization)"
294        )));
295    }
296
297    if normalized_name.is_empty() {
298        return Err(AppError::Validation(
299            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
300        ));
301    }
302    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
303        return Err(AppError::LimitExceeded(
304            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
305        ));
306    }
307
308    if normalized_name.starts_with("__") {
309        return Err(AppError::Validation(
310            crate::i18n::validation::reserved_name(),
311        ));
312    }
313
314    {
315        let slug_re = crate::constants::name_slug_regex();
316        if !slug_re.is_match(&normalized_name) {
317            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
318                &normalized_name,
319            )));
320        }
321    }
322
323    if let Some(ref desc) = args.description {
324        if desc.len() > MAX_MEMORY_DESCRIPTION_LEN {
325            return Err(AppError::Validation(
326                crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
327            ));
328        }
329    }
330
331    // GAP-SG-30: capture whether the body comes from an explicit source before
332    // `args.body` is moved below; --graph-file only adopts the file's body when
333    // no explicit body source was supplied.
334    let body_explicitly_provided =
335        args.body.is_some() || args.body_file.is_some() || args.body_stdin;
336
337    let mut raw_body = if let Some(b) = args.body {
338        b
339    } else if let Some(ref path) = args.body_file {
340        let file_size = std::fs::metadata(path).map_err(AppError::Io)?.len();
341        if file_size > MAX_MEMORY_BODY_LEN as u64 {
342            return Err(AppError::BodyTooLarge {
343                bytes: file_size,
344                limit: MAX_MEMORY_BODY_LEN as u64,
345            });
346        }
347        match std::fs::read_to_string(path) {
348            Ok(s) => s,
349            Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
350                let bytes = std::fs::read(path).map_err(AppError::Io)?;
351                tracing::warn!(target: "remember", "body file contains invalid UTF-8; replacing invalid sequences");
352                String::from_utf8_lossy(&bytes).into_owned()
353            }
354            Err(e) => return Err(AppError::Io(e)),
355        }
356    } else if args.body_stdin || args.graph_stdin {
357        crate::stdin_helper::read_stdin_with_timeout(60)?
358    } else {
359        String::new()
360    };
361
362    let mut entities_provided_externally =
363        args.entities_file.is_some() || args.relationships_file.is_some();
364
365    let mut graph = GraphInput::default();
366    if let Some(path) = args.entities_file {
367        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
368        if file_size > MAX_MEMORY_BODY_LEN as u64 {
369            return Err(AppError::BodyTooLarge {
370                bytes: file_size,
371                limit: MAX_MEMORY_BODY_LEN as u64,
372            });
373        }
374        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
375        // v1.1.1 (P7): boundary validation with context — an invalid
376        // entity_type surfaces the FromStr message (13 valid values + hints)
377        // as a Validation error instead of a bare Json error (exit 20).
378        graph.entities = serde_json::from_str(&content)
379            .map_err(|e| AppError::Validation(format!("invalid JSON in --entities-file: {e}")))?;
380    }
381    if let Some(path) = args.relationships_file {
382        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
383        if file_size > MAX_MEMORY_BODY_LEN as u64 {
384            return Err(AppError::BodyTooLarge {
385                bytes: file_size,
386                limit: MAX_MEMORY_BODY_LEN as u64,
387            });
388        }
389        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
390        graph.relationships = serde_json::from_str(&content).map_err(|e| {
391            AppError::Validation(format!("invalid JSON in --relationships-file: {e}"))
392        })?;
393    }
394    if args.graph_stdin {
395        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
396            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
397        })?;
398        raw_body = graph.body.take().unwrap_or_default();
399    }
400    if args.graph_stdin && !graph.entities.is_empty() {
401        entities_provided_externally = true;
402    }
403    // GAP-SG-30: graph from a file, combinable with any body source. Conflicts
404    // with --graph-stdin/--entities-file/--relationships-file (enforced by clap),
405    // so `graph` is still empty here when --graph-file is set.
406    if let Some(path) = args.graph_file {
407        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
408        if file_size > MAX_MEMORY_BODY_LEN as u64 {
409            return Err(AppError::BodyTooLarge {
410                bytes: file_size,
411                limit: MAX_MEMORY_BODY_LEN as u64,
412            });
413        }
414        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
415        let mut gf = serde_json::from_str::<GraphInput>(&content)
416            .map_err(|e| AppError::Validation(format!("invalid JSON in --graph-file: {e}")))?;
417        graph.entities = gf.entities;
418        graph.relationships = gf.relationships;
419        if !body_explicitly_provided {
420            raw_body = gf.body.take().unwrap_or_default();
421        }
422        if !graph.entities.is_empty() {
423            entities_provided_externally = true;
424        }
425    }
426
427    if graph.entities.len() > max_entities_per_memory() {
428        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
429            max_entities_per_memory(),
430        )));
431    }
432    let mut relationships_truncated = false;
433    let rel_cap = max_relationships_per_memory();
434    if graph.relationships.len() > rel_cap {
435        tracing::warn!(target: "remember",
436            count = graph.relationships.len(),
437            cap = rel_cap,
438            "truncating relationships to cap"
439        );
440        graph.relationships.truncate(rel_cap);
441        relationships_truncated = true;
442    }
443    normalize_and_validate_graph_input(&mut graph)?;
444
445    if raw_body.len() > MAX_MEMORY_BODY_LEN {
446        return Err(AppError::BodyTooLarge {
447            bytes: raw_body.len() as u64,
448            limit: MAX_MEMORY_BODY_LEN as u64,
449        });
450    }
451
452    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
453    // Without this check, empty embeddings would be persisted, breaking recall semantics.
454    // GAP-08: skip this guard when --force-merge without --clear-body; the existing body
455    // will be preserved from the database, so the effective body will not be empty.
456    let body_will_be_preserved = args.force_merge && raw_body.trim().is_empty() && !args.clear_body;
457    if !entities_provided_externally
458        && graph.entities.is_empty()
459        && raw_body.trim().is_empty()
460        && !body_will_be_preserved
461        && !args.clear_body
462    {
463        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
464    }
465
466    let metadata: serde_json::Value = if let Some(m) = args.metadata {
467        serde_json::from_str(&m)?
468    } else if let Some(path) = args.metadata_file {
469        let file_size = std::fs::metadata(&path).map_err(AppError::Io)?.len();
470        if file_size > MAX_MEMORY_BODY_LEN as u64 {
471            return Err(AppError::BodyTooLarge {
472                bytes: file_size,
473                limit: MAX_MEMORY_BODY_LEN as u64,
474            });
475        }
476        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
477        serde_json::from_str(&content)?
478    } else {
479        serde_json::json!({})
480    };
481
482    let mut body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
483    let mut snippet: String = raw_body.chars().take(200).collect();
484
485    let paths = AppPaths::resolve(args.db.as_deref())?;
486    paths.ensure_dirs()?;
487
488    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
489    let mut extraction_method: Option<String> = None;
490    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
491    if args.enable_ner && args.skip_extraction {
492        return Err(AppError::Validation(
493            "--enable-ner and --skip-extraction are mutually exclusive; remove one".to_string(),
494        ));
495    }
496    if args.skip_extraction && !args.enable_ner {
497        // v1.0.74: revert to v1.0.45 hidden no-op behavior. The v1.0.67
498        // commit (9ddb17b) promoted this to a hard validation error, which
499        // broke the "kept as a hidden no-op for backwards compatibility"
500        // promise documented in CHANGELOG v1.0.45 and started failing
501        // 5+ CI jobs whose E2E tests use this flag to skip the
502        // GLiNER-ONNX model download in CI environments.
503        tracing::warn!(
504            "--skip-extraction is deprecated since v1.0.45 and has no effect (NER is disabled by default); remove this flag to silence the warning"
505        );
506    }
507    // v1.0.79: --gliner-variant is a no-op kept for compatibility; a
508    // non-default value signals the caller still expects the removed
509    // GLiNER pipeline, so warn explicitly.
510    if args.gliner_variant != "fp32" {
511        tracing::warn!(
512            "--gliner-variant is deprecated and has no effect since v1.0.79 (the GLiNER pipeline was removed); --enable-ner performs URL-regex extraction only"
513        );
514    }
515    let gliner_variant: crate::extraction::GlinerVariant = match args.gliner_variant.as_str() {
516        "int8" => crate::extraction::GlinerVariant::Int8,
517        _ => crate::extraction::GlinerVariant::Fp32,
518    };
519    if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
520        match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
521            Ok(extracted) => {
522                // v1.0.76: ExtractionResult is URL + entity + elapsed_ms;
523                // the LLM ExtractionBackend returns typed relationships
524                // separately. The default build is URL-only extraction.
525                extraction_method = Some("url-regex".to_string());
526                extracted_urls = extracted.urls;
527                // Convert ExtractedEntity → NewEntity (no offsets,
528                // type defaults to Concept).
529                graph.entities = extracted
530                    .entities
531                    .into_iter()
532                    .map(|e| NewEntity {
533                        name: e.name,
534                        entity_type: crate::entity_type::EntityType::Concept,
535                        description: None,
536                    })
537                    .collect();
538                graph.relationships.clear();
539                relationships_truncated = false;
540
541                if graph.entities.len() > max_entities_per_memory() {
542                    graph.entities.truncate(max_entities_per_memory());
543                }
544                if graph.relationships.len() > max_relationships_per_memory() {
545                    relationships_truncated = true;
546                    graph.relationships.truncate(max_relationships_per_memory());
547                }
548                normalize_and_validate_graph_input(&mut graph)?;
549            }
550            Err(e) => {
551                tracing::warn!(target: "remember", error = %e, "auto-extraction failed, graceful degradation");
552                extraction_method = Some("none:extraction-failed".to_string());
553            }
554        }
555    }
556
557    let mut conn = open_rw(&paths.db)?;
558    ensure_schema(&mut conn)?;
559
560    // --dry-run: emit planned action without any DB writes and return.
561    if args.dry_run {
562        let existing = memories::find_by_name(&conn, &namespace, &normalized_name)?;
563        let planned_action = if existing.is_some() && args.force_merge {
564            "would_update"
565        } else {
566            "would_create"
567        };
568        output::emit_json(&serde_json::json!({
569            "dry_run": true,
570            "name": normalized_name,
571            "namespace": namespace,
572            "planned_action": planned_action,
573        }))?;
574        return Ok(());
575    }
576
577    {
578        use crate::constants::MAX_NAMESPACES_ACTIVE;
579        let active_count: u32 = conn.query_row(
580            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
581            [],
582            |r| r.get::<_, i64>(0).map(|v| v as u32),
583        )?;
584        let ns_exists: bool = conn.query_row(
585            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
586            rusqlite::params![namespace],
587            |r| r.get::<_, i64>(0).map(|v| v > 0),
588        )?;
589        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
590            return Err(AppError::NamespaceError(format!(
591                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
592            )));
593        }
594    }
595
596    // M7: detect soft-deleted memory before the standard duplicate check.
597    if let Some((sd_id, true)) =
598        memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
599    {
600        if args.force_merge {
601            memories::clear_deleted_at(&conn, sd_id)?;
602        } else {
603            return Err(AppError::Duplicate(
604                errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
605            ));
606        }
607    }
608
609    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
610    if existing_memory.is_some() && !args.force_merge {
611        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
612            &normalized_name,
613            &namespace,
614        )));
615    }
616
617    // GAP-10: resolve type and description.
618    // For CREATE path (new memory): both are required.
619    // For UPDATE path (--force-merge on existing memory): inherit from existing row when omitted.
620    let (resolved_type, resolved_description) = if existing_memory.is_none() {
621        // CREATE path — both fields are mandatory.
622        let t = args.r#type.ok_or_else(|| {
623            AppError::Validation(
624                "--type and --description are required when creating a new memory".to_string(),
625            )
626        })?;
627        let d = args.description.clone().ok_or_else(|| {
628            AppError::Validation(
629                "--type and --description are required when creating a new memory".to_string(),
630            )
631        })?;
632        (t.as_str().to_string(), d)
633    } else {
634        // UPDATE path (--force-merge) — inherit missing fields from stored row.
635        let existing_row = memories::read_by_name(&conn, &namespace, &normalized_name)?
636            .ok_or_else(|| {
637                AppError::NotFound(format!(
638                    "memory '{normalized_name}' not found in namespace '{namespace}'"
639                ))
640            })?;
641        let t = args
642            .r#type
643            .map(|v| v.as_str().to_string())
644            .unwrap_or_else(|| existing_row.memory_type.clone());
645        let d = args
646            .description
647            .clone()
648            .unwrap_or_else(|| existing_row.description.clone());
649        (t, d)
650    };
651
652    // GAP-08/GAP-09: protect existing body from accidental destruction during --force-merge.
653    // When the caller omits a body (or passes an empty one) without --clear-body, silently
654    // preserve the existing body from the database.  This prevents a common scripting mistake
655    // where a cron job updates metadata fields and inadvertently wipes the stored content.
656    if body_will_be_preserved {
657        if let Some(existing_row) = memories::read_by_name(&conn, &namespace, &normalized_name)? {
658            if !existing_row.body.is_empty() {
659                tracing::debug!(target: "remember",
660                    name = %normalized_name,
661                    "GAP-08: empty body with --force-merge and no --clear-body; preserving existing body"
662                );
663                raw_body = existing_row.body;
664                body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
665                snippet = raw_body.chars().take(200).collect();
666            }
667        }
668    }
669
670    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
671
672    output::emit_progress_i18n(
673        &format!(
674            "Remember stage: validated input; available memory {} MB",
675            crate::memory_guard::available_memory_mb()
676        ),
677        &format!(
678            "Stage remember: input validated; available memory {} MB",
679            crate::memory_guard::available_memory_mb()
680        ),
681    );
682
683    let model_max_length = crate::tokenizer::get_model_max_length();
684    let total_passage_tokens = crate::tokenizer::count_passage_tokens(&raw_body)?;
685    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body);
686    let chunks_created = chunks_info.len();
687    // GAP-SG-40: `chunks_persisted` is no longer a pre-commit estimate. It is
688    // read back from `memory_chunks` AFTER the transaction commits (see below)
689    // so the reported count matches the observable database state. Single-chunk
690    // bodies store inline in the memories row and append no chunk rows.
691
692    output::emit_progress_i18n(
693        &format!(
694            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
695            chunks_created,
696            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
697        ),
698        &format!(
699            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
700            chunks_created,
701            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
702        ),
703    );
704
705    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
706        return Err(AppError::TooManyChunks {
707            chunks: chunks_created,
708            limit: crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS,
709        });
710    }
711
712    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
713    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
714
715    // v1.0.84 (ADR-0042): extrai o backend que efetivamente executou o
716    // embedding da passagem (ou do batch em chunks) para popular
717    // `backend_invoked` no envelope de resposta.
718    let skip_embed = crate::embedder::should_skip_embedding_on_failure();
719    let (embedding, backend_invoked_passage): (Option<Vec<f32>>, Option<&str>) = if chunks_info
720        .len()
721        == 1
722    {
723        match crate::embedder::embed_passage_with_embedding_choice(
724            &paths.models,
725            &raw_body,
726            embedding_backend,
727            llm_backend,
728        ) {
729            Ok((v, k)) => (Some(v), Some(k.as_str())),
730            Err(AppError::Validation(msg)) => return Err(AppError::Validation(msg)),
731            Err(e) if skip_embed => {
732                tracing::warn!(error = %e, "embedding failed; --skip-embedding-on-failure active, persisting without embedding");
733                (None, None)
734            }
735            Err(e) => return Err(e),
736        }
737    } else {
738        let chunk_texts: Vec<String> = chunks_info
739            .iter()
740            .map(|c| chunking::chunk_text(&raw_body, c).to_string())
741            .collect();
742        // G42/S2+S3 (v1.0.79): chunks are embedded in dim-adaptive
743        // batches per LLM call (G44: clamp(base*64/dim, 1, base)), with up to
744        // --llm-parallelism bounded subprocesses in flight. The old
745        // serial loop spent SUM(items) wall time; the fan-out spends
746        // roughly MAX(batch).
747        output::emit_progress_i18n(
748            &format!(
749                "Embedding {} chunks in parallel batches (parallelism {})...",
750                chunks_info.len(),
751                args.llm_parallelism
752            ),
753            &format!(
754                "Embedding {} chunks em lotes paralelos (paralelismo {})...",
755                chunks_info.len(),
756                args.llm_parallelism
757            ),
758        );
759        if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
760            if rss > args.max_rss_mb {
761                tracing::error!(target: "remember",
762                    rss_mb = rss,
763                    max_rss_mb = args.max_rss_mb,
764                    "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
765                );
766                return Err(AppError::LowMemory {
767                    available_mb: crate::memory_guard::available_memory_mb(),
768                    required_mb: args.max_rss_mb,
769                });
770            }
771        }
772        match crate::embedder::embed_passages_parallel_with_embedding_choice(
773            &paths.models,
774            &chunk_texts,
775            args.llm_parallelism as usize,
776            crate::embedder::chunk_embed_batch_size(),
777            embedding_backend,
778            llm_backend,
779        ) {
780            Ok(chunk_embeddings) => {
781                output::emit_progress_i18n(
782                    &format!(
783                        "Remember stage: chunk embeddings complete; process RSS {} MB",
784                        crate::memory_guard::current_process_memory_mb().unwrap_or(0)
785                    ),
786                    &format!(
787                        "Stage remember: chunk embeddings completed; process RSS {} MB",
788                        crate::memory_guard::current_process_memory_mb().unwrap_or(0)
789                    ),
790                );
791                let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
792                chunk_embeddings_cache = Some(chunk_embeddings);
793                (Some(aggregated), None)
794            }
795            Err(e) if skip_embed => {
796                tracing::warn!(error = %e, "chunk embedding failed; --skip-embedding-on-failure active, persisting without embedding");
797                (None, None)
798            }
799            Err(e) => return Err(e),
800        }
801    };
802    let body_for_storage = raw_body;
803
804    let memory_type = resolved_type.as_str();
805    let new_memory = NewMemory {
806        namespace: namespace.clone(),
807        name: normalized_name.clone(),
808        memory_type: memory_type.to_string(),
809        description: resolved_description.clone(),
810        body: body_for_storage,
811        body_hash: body_hash.clone(),
812        session_id: args.session_id.clone(),
813        source: "agent".to_string(),
814        metadata,
815    };
816
817    let mut warnings = Vec::with_capacity(4);
818    let mut entities_persisted = 0usize;
819    let mut relationships_persisted = 0usize;
820
821    // G42/S2+A4 (v1.0.79): entity names are SHORT texts — they get their
822    // own batch profile (25 per LLM call) instead of one subprocess per
823    // 3-15 byte name (21 names used to cost ~12 minutes, 46% of the
824    // measured remember total).
825    let entity_texts: Vec<String> = graph
826        .entities
827        .iter()
828        .map(|entity| match &entity.description {
829            Some(desc) => format!("{} {}", entity.name, desc),
830            None => entity.name.clone(),
831        })
832        .collect();
833    // G56 (v1.0.80): route entity-name embedding through the in-process
834    // cache. Repeated `remember` invocations within one CLI process — and
835    // re-embedded entities inside a single batch — skip the LLM call
836    // entirely when the (model, text) pair was already produced. The
837    // chunk body embedding below still uses `embed_passages_parallel_local`
838    // because chunks are unique per memory and the cache hit rate is
839    // effectively zero.
840    let (graph_entity_embeddings, embed_cache_stats) =
841        match crate::embedder::embed_entity_texts_cached(
842            &paths.models,
843            &entity_texts,
844            args.llm_parallelism as usize,
845            embedding_backend,
846            llm_backend,
847        ) {
848            Ok(r) => r,
849            Err(e) if skip_embed => {
850                tracing::warn!(error = %e, "entity embedding failed; --skip-embedding-on-failure active");
851                let empty: Vec<Vec<f32>> = entity_texts.iter().map(|_| vec![]).collect();
852                (empty, crate::embedder::EmbedCacheStats::default())
853            }
854            Err(e) => return Err(e),
855        };
856    if embed_cache_stats.hits > 0 {
857        tracing::debug!(
858            hits = embed_cache_stats.hits,
859            misses = embed_cache_stats.misses,
860            requested = embed_cache_stats.requested,
861            "G56: entity embed cache hit (remember)"
862        );
863    }
864
865    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
866
867    let mut skip_reindex = false;
868    let (memory_id, action, version) = match existing_memory {
869        Some((existing_id, _updated_at, _current_version)) => {
870            if let Some(hash_id) = duplicate_hash_id {
871                if hash_id != existing_id {
872                    warnings.push(format!(
873                        "identical body already exists as memory id {hash_id}"
874                    ));
875                }
876            }
877
878            // C1 fix: capture old values for FTS5 sync before update
879            let (old_fts_name, old_fts_desc, old_fts_body): (String, String, String) = tx
880                .query_row(
881                    "SELECT name, description, body FROM memories WHERE id = ?1",
882                    rusqlite::params![existing_id],
883                    |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
884                )?;
885
886            // G15: skip re-indexing when body hash matches (common in --force-merge loops)
887            let existing_body_hash: Option<String> = tx
888                .query_row(
889                    "SELECT body_hash FROM memories WHERE id = ?1",
890                    rusqlite::params![existing_id],
891                    |r| r.get(0),
892                )
893                .ok();
894            let body_unchanged = existing_body_hash.as_deref() == Some(&body_hash);
895            skip_reindex = body_unchanged;
896            if !body_unchanged {
897                storage_chunks::delete_chunks(&tx, existing_id)?;
898            }
899
900            let next_v = versions::next_version(&tx, existing_id)?;
901            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
902
903            // C1 fix: sync FTS5 external-content index after update
904            // (trg_fts_au trigger is absent by design due to sqlite-vec conflict)
905            memories::sync_fts_after_update(
906                &tx,
907                existing_id,
908                &old_fts_name,
909                &old_fts_desc,
910                &old_fts_body,
911                &normalized_name,
912                &resolved_description,
913                &new_memory.body,
914            )?;
915
916            versions::insert_version(
917                &tx,
918                existing_id,
919                next_v,
920                &normalized_name,
921                memory_type,
922                &resolved_description,
923                &new_memory.body,
924                &serde_json::to_string(&new_memory.metadata)?,
925                None,
926                "edit",
927            )?;
928            if !body_unchanged {
929                if let Some(ref emb) = embedding {
930                    memories::upsert_vec(
931                        &tx,
932                        existing_id,
933                        &namespace,
934                        memory_type,
935                        emb,
936                        &normalized_name,
937                        &snippet,
938                    )?;
939                }
940            }
941            (existing_id, "updated".to_string(), next_v)
942        }
943        None => {
944            if let Some(hash_id) = duplicate_hash_id {
945                warnings.push(format!(
946                    "identical body already exists as memory id {hash_id}"
947                ));
948            }
949            let id = memories::insert(&tx, &new_memory)?;
950            versions::insert_version(
951                &tx,
952                id,
953                1,
954                &normalized_name,
955                memory_type,
956                &resolved_description,
957                &new_memory.body,
958                &serde_json::to_string(&new_memory.metadata)?,
959                None,
960                "create",
961            )?;
962            if let Some(ref emb) = embedding {
963                memories::upsert_vec(
964                    &tx,
965                    id,
966                    &namespace,
967                    memory_type,
968                    emb,
969                    &normalized_name,
970                    &snippet,
971                )?;
972            }
973            (id, "created".to_string(), 1)
974        }
975    };
976
977    // GAP-SG-51: when --force-merge --replace-graph updates an existing memory,
978    // clear its prior entity/relationship bindings BEFORE re-linking the supplied
979    // set. With an empty `entities`/`relationships` payload this zeroes the graph
980    // for that memory without a `forget`. New bindings (if any) are linked by the
981    // block further below.
982    if args.replace_graph && action == "updated" {
983        let (e_removed, r_removed) = entities::clear_memory_graph_bindings(&tx, memory_id)?;
984        if e_removed + r_removed > 0 {
985            warnings.push(format!(
986                "--replace-graph cleared {e_removed} entity binding(s) and {r_removed} relationship binding(s) before re-linking"
987            ));
988        }
989    }
990
991    if chunks_info.len() > 1 && !skip_reindex {
992        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
993
994        if let Some(chunk_embeddings) = chunk_embeddings_cache.take() {
995            for (i, emb) in chunk_embeddings.iter().enumerate() {
996                storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
997            }
998        }
999        output::emit_progress_i18n(
1000            &format!(
1001                "Remember stage: persisted chunk vectors; process RSS {} MB",
1002                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
1003            ),
1004            &format!(
1005                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
1006                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
1007            ),
1008        );
1009    }
1010
1011    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
1012        for entity in &graph.entities {
1013            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
1014            let entity_embedding = &graph_entity_embeddings[entities_persisted];
1015            entities::upsert_entity_vec(
1016                &tx,
1017                entity_id,
1018                &namespace,
1019                entity.entity_type,
1020                entity_embedding,
1021                &entity.name,
1022            )?;
1023            entities::link_memory_entity(&tx, memory_id, entity_id)?;
1024            entities_persisted += 1;
1025        }
1026        let entity_types: std::collections::HashMap<&str, EntityType> = graph
1027            .entities
1028            .iter()
1029            .map(|entity| (entity.name.as_str(), entity.entity_type))
1030            .collect();
1031
1032        let mut affected_entity_ids: std::collections::HashSet<i64> =
1033            std::collections::HashSet::new();
1034        for entity in &graph.entities {
1035            if let Some(eid) = entities::find_entity_id(&tx, &namespace, &entity.name)? {
1036                affected_entity_ids.insert(eid);
1037            }
1038        }
1039
1040        for rel in &graph.relationships {
1041            let source_entity = NewEntity {
1042                name: rel.source.clone(),
1043                entity_type: entity_types
1044                    .get(rel.source.as_str())
1045                    .copied()
1046                    .unwrap_or(EntityType::Concept),
1047                description: None,
1048            };
1049            let target_entity = NewEntity {
1050                name: rel.target.clone(),
1051                entity_type: entity_types
1052                    .get(rel.target.as_str())
1053                    .copied()
1054                    .unwrap_or(EntityType::Concept),
1055                description: None,
1056            };
1057            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
1058            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
1059            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
1060            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
1061            affected_entity_ids.insert(source_id);
1062            affected_entity_ids.insert(target_id);
1063            relationships_persisted += 1;
1064        }
1065
1066        for &eid in &affected_entity_ids {
1067            entities::recalculate_degree(&tx, eid)?;
1068        }
1069    }
1070    tx.commit()?;
1071
1072    // GAP-SG-40: read back the real chunk-row count now that the write is
1073    // durable, so `chunks_persisted` reflects observable state (0 for inline
1074    // single-chunk bodies, the exact row count for multi-chunk bodies).
1075    let chunks_persisted = storage_chunks::count_for_memory(&conn, memory_id)?;
1076
1077    // GAP-SG-44: confirm the memory has a persisted embedding vector. A missing
1078    // vector (embedding step failed/skipped) makes the memory unsearchable
1079    // silently; surface it as a warning recommending re-embed instead of leaving
1080    // `health` to report `vec_memories_missing` later.
1081    if !new_memory.body.trim().is_empty() {
1082        let has_vec: bool = conn
1083            .query_row(
1084                "SELECT EXISTS(SELECT 1 FROM memory_embeddings WHERE memory_id = ?1)",
1085                rusqlite::params![memory_id],
1086                |r| r.get::<_, i64>(0).map(|v| v > 0),
1087            )
1088            .unwrap_or(false);
1089        if !has_vec {
1090            tracing::warn!(target: "remember",
1091                memory_id,
1092                name = %normalized_name,
1093                "memory persisted without an embedding vector; recall will be degraded until re-embedded"
1094            );
1095            warnings.push(
1096                "memory persisted without an embedding vector; run `enrich --operation re-embed` to make it searchable"
1097                    .to_string(),
1098            );
1099        }
1100    }
1101
1102    // GAP-SG-13: when --force-merge UPDATES an existing memory its body/graph may
1103    // have changed, so drop any stale enrich-queue sidecar entry keyed to it. The
1104    // next enrich run re-scans it cleanly. Best-effort; no-op when the queue file
1105    // is absent.
1106    if action == "updated" {
1107        crate::commands::enrich::cleanup_queue_entry(&paths.db, memory_id, &normalized_name);
1108    }
1109
1110    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
1111    // Failures do not propagate — non-critical path with graceful degradation.
1112    let urls_persisted = if !extracted_urls.is_empty() {
1113        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
1114            .into_iter()
1115            .map(|u| storage_urls::MemoryUrl {
1116                url: u.url,
1117                offset: Some(u.start as i64),
1118            })
1119            .collect();
1120        storage_urls::insert_urls(&conn, memory_id, &url_entries)
1121    } else {
1122        0
1123    };
1124
1125    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
1126
1127    let created_at_epoch = chrono::Utc::now().timestamp();
1128    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
1129
1130    output::emit_json(&RememberResponse {
1131        memory_id,
1132        // Persist the normalized (kebab-case) slug as `name` since that is the
1133        // storage key. The original input is exposed via `original_name` only
1134        // when normalization actually changed something (B_4 in v1.0.32).
1135        name: normalized_name.clone(),
1136        namespace,
1137        action: action.clone(),
1138        operation: action,
1139        version,
1140        entities_persisted,
1141        relationships_persisted,
1142        relationships_truncated,
1143        chunks_created,
1144        chunks_persisted,
1145        urls_persisted,
1146        extraction_method,
1147        merged_into_memory_id: None,
1148        warnings,
1149        created_at: created_at_epoch,
1150        created_at_iso,
1151        elapsed_ms: inicio.elapsed().as_millis() as u64,
1152        name_was_normalized,
1153        original_name: name_was_normalized.then_some(original_name),
1154        backend_invoked: backend_invoked_passage,
1155    })?;
1156
1157    Ok(())
1158}
1159
1160#[cfg(test)]
1161mod tests {
1162    use crate::output::RememberResponse;
1163
1164    /// GAP-SG-37: replicates the `--strict-name` guard predicate so the
1165    /// reject-on-normalization decision is unit-testable without a DB.
1166    fn strict_name_rejects(strict: bool, name_was_normalized: bool) -> bool {
1167        strict && name_was_normalized
1168    }
1169
1170    #[test]
1171    fn strict_name_rejects_only_when_name_would_change() {
1172        assert!(
1173            strict_name_rejects(true, true),
1174            "strict + changed must reject"
1175        );
1176        assert!(
1177            !strict_name_rejects(true, false),
1178            "strict + canonical passes"
1179        );
1180        assert!(
1181            !strict_name_rejects(false, true),
1182            "non-strict always passes"
1183        );
1184        assert!(!strict_name_rejects(false, false));
1185    }
1186
1187    // GAP-SG-37/SG-51: --strict-name and --replace-graph must parse on remember.
1188    #[test]
1189    fn remember_parses_strict_name_and_replace_graph_flags() {
1190        use crate::cli::{Cli, Commands};
1191        use clap::Parser;
1192        let cli = Cli::try_parse_from([
1193            "sqlite-graphrag",
1194            "remember",
1195            "--name",
1196            "my-mem",
1197            "--type",
1198            "note",
1199            "--description",
1200            "d",
1201            "--body",
1202            "b",
1203            "--strict-name",
1204            "--replace-graph",
1205            "--force-merge",
1206        ])
1207        .expect("parse");
1208        match cli.command {
1209            Some(Commands::Remember(a)) => {
1210                assert!(a.strict_name);
1211                assert!(a.replace_graph);
1212                assert!(a.force_merge);
1213            }
1214            other => panic!("expected remember, got {other:?}"),
1215        }
1216    }
1217
1218    #[test]
1219    fn remember_response_serializes_required_fields() {
1220        let resp = RememberResponse {
1221            memory_id: 42,
1222            name: "minha-mem".to_string(),
1223            namespace: "global".to_string(),
1224            action: "created".to_string(),
1225            operation: "created".to_string(),
1226            version: 1,
1227            entities_persisted: 0,
1228            relationships_persisted: 0,
1229            relationships_truncated: false,
1230            chunks_created: 1,
1231            chunks_persisted: 0,
1232            urls_persisted: 0,
1233            extraction_method: None,
1234            merged_into_memory_id: None,
1235            warnings: vec![],
1236            created_at: 1_705_320_000,
1237            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
1238            elapsed_ms: 55,
1239            name_was_normalized: false,
1240            original_name: None,
1241            backend_invoked: None,
1242        };
1243
1244        let json = serde_json::to_value(&resp).expect("serialization failed");
1245        assert_eq!(json["memory_id"], 42);
1246        assert_eq!(json["action"], "created");
1247        assert_eq!(json["operation"], "created");
1248        assert_eq!(json["version"], 1);
1249        assert_eq!(json["elapsed_ms"], 55u64);
1250        assert!(json["warnings"].is_array());
1251        assert!(json["merged_into_memory_id"].is_null());
1252    }
1253
1254    #[test]
1255    fn remember_response_action_e_operation_sao_aliases() {
1256        let resp = RememberResponse {
1257            memory_id: 1,
1258            name: "mem".to_string(),
1259            namespace: "global".to_string(),
1260            action: "updated".to_string(),
1261            operation: "updated".to_string(),
1262            version: 2,
1263            entities_persisted: 3,
1264            relationships_persisted: 1,
1265            relationships_truncated: false,
1266            extraction_method: None,
1267            chunks_created: 2,
1268            chunks_persisted: 2,
1269            urls_persisted: 0,
1270            merged_into_memory_id: None,
1271            warnings: vec![],
1272            created_at: 0,
1273            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1274            elapsed_ms: 0,
1275            name_was_normalized: false,
1276            original_name: None,
1277            backend_invoked: None,
1278        };
1279
1280        let json = serde_json::to_value(&resp).expect("serialization failed");
1281        assert_eq!(
1282            json["action"], json["operation"],
1283            "action e operation devem ser iguais"
1284        );
1285        assert_eq!(json["entities_persisted"], 3);
1286        assert_eq!(json["relationships_persisted"], 1);
1287        assert_eq!(json["chunks_created"], 2);
1288    }
1289
1290    #[test]
1291    fn remember_response_warnings_lista_mensagens() {
1292        let resp = RememberResponse {
1293            memory_id: 5,
1294            name: "dup-mem".to_string(),
1295            namespace: "global".to_string(),
1296            action: "created".to_string(),
1297            operation: "created".to_string(),
1298            version: 1,
1299            entities_persisted: 0,
1300            extraction_method: None,
1301            relationships_persisted: 0,
1302            relationships_truncated: false,
1303            chunks_created: 1,
1304            chunks_persisted: 0,
1305            urls_persisted: 0,
1306            merged_into_memory_id: None,
1307            warnings: vec!["identical body already exists as memory id 3".to_string()],
1308            created_at: 0,
1309            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1310            elapsed_ms: 10,
1311            name_was_normalized: false,
1312            original_name: None,
1313            backend_invoked: None,
1314        };
1315
1316        let json = serde_json::to_value(&resp).expect("serialization failed");
1317        let warnings = json["warnings"]
1318            .as_array()
1319            .expect("warnings deve ser array");
1320        assert_eq!(warnings.len(), 1);
1321        assert!(warnings[0].as_str().unwrap().contains("identical body"));
1322    }
1323
1324    #[test]
1325    fn invalid_name_reserved_prefix_returns_validation_error() {
1326        use crate::errors::AppError;
1327        // Validates the rejection logic for names with the "__" prefix directly
1328        let nome = "__reservado";
1329        let resultado: Result<(), AppError> = if nome.starts_with("__") {
1330            Err(AppError::Validation(
1331                crate::i18n::validation::reserved_name(),
1332            ))
1333        } else {
1334            Ok(())
1335        };
1336        assert!(resultado.is_err());
1337        if let Err(AppError::Validation(msg)) = resultado {
1338            assert!(!msg.is_empty());
1339        }
1340    }
1341
1342    #[test]
1343    fn name_too_long_returns_validation_error() {
1344        use crate::errors::AppError;
1345        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
1346        let resultado: Result<(), AppError> =
1347            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
1348                Err(AppError::Validation(crate::i18n::validation::name_length(
1349                    crate::constants::MAX_MEMORY_NAME_LEN,
1350                )))
1351            } else {
1352                Ok(())
1353            };
1354        assert!(resultado.is_err());
1355    }
1356
1357    #[test]
1358    fn remember_response_merged_into_memory_id_some_serializes_integer() {
1359        let resp = RememberResponse {
1360            memory_id: 10,
1361            name: "mem-mergeada".to_string(),
1362            namespace: "global".to_string(),
1363            action: "updated".to_string(),
1364            operation: "updated".to_string(),
1365            version: 3,
1366            extraction_method: None,
1367            entities_persisted: 0,
1368            relationships_persisted: 0,
1369            relationships_truncated: false,
1370            chunks_created: 1,
1371            chunks_persisted: 0,
1372            urls_persisted: 0,
1373            merged_into_memory_id: Some(7),
1374            warnings: vec![],
1375            created_at: 0,
1376            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1377            elapsed_ms: 0,
1378            name_was_normalized: false,
1379            original_name: None,
1380            backend_invoked: None,
1381        };
1382
1383        let json = serde_json::to_value(&resp).expect("serialization failed");
1384        assert_eq!(json["merged_into_memory_id"], 7);
1385    }
1386
1387    #[test]
1388    fn remember_response_urls_persisted_serializes_field() {
1389        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
1390        let resp = RememberResponse {
1391            memory_id: 3,
1392            name: "mem-com-urls".to_string(),
1393            namespace: "global".to_string(),
1394            action: "created".to_string(),
1395            operation: "created".to_string(),
1396            version: 1,
1397            entities_persisted: 0,
1398            relationships_persisted: 0,
1399            relationships_truncated: false,
1400            chunks_created: 1,
1401            chunks_persisted: 0,
1402            urls_persisted: 3,
1403            extraction_method: Some("regex-only".to_string()),
1404            merged_into_memory_id: None,
1405            warnings: vec![],
1406            created_at: 0,
1407            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1408            elapsed_ms: 0,
1409            name_was_normalized: false,
1410            original_name: None,
1411            backend_invoked: None,
1412        };
1413        let json = serde_json::to_value(&resp).expect("serialization failed");
1414        assert_eq!(json["urls_persisted"], 3);
1415    }
1416
1417    #[test]
1418    fn empty_name_after_normalization_returns_specific_message() {
1419        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
1420        // must produce a distinct error message, not the "too long" message.
1421        use crate::errors::AppError;
1422        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
1423        let normalized = normalized.trim_matches('-').to_string();
1424        let resultado: Result<(), AppError> = if normalized.is_empty() {
1425            Err(AppError::Validation(
1426                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1427            ))
1428        } else {
1429            Ok(())
1430        };
1431        assert!(resultado.is_err());
1432        if let Err(AppError::Validation(msg)) = resultado {
1433            assert!(
1434                msg.contains("empty after normalization"),
1435                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1436            );
1437        }
1438    }
1439
1440    #[test]
1441    fn name_only_underscores_after_normalization_returns_specific_message() {
1442        // P0-4 regression: name consisting only of underscores normalizes to empty string.
1443        use crate::errors::AppError;
1444        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
1445        let normalized = normalized.trim_matches('-').to_string();
1446        assert!(
1447            normalized.is_empty(),
1448            "underscores devem normalizar para string vazia"
1449        );
1450        let resultado: Result<(), AppError> = if normalized.is_empty() {
1451            Err(AppError::Validation(
1452                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
1453            ))
1454        } else {
1455            Ok(())
1456        };
1457        assert!(resultado.is_err());
1458        if let Err(AppError::Validation(msg)) = resultado {
1459            assert!(
1460                msg.contains("empty after normalization"),
1461                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
1462            );
1463        }
1464    }
1465
1466    #[test]
1467    fn remember_response_relationships_truncated_serializes_field() {
1468        // P1-D: garante que relationships_truncated aparece no JSON como bool.
1469        let resp_false = RememberResponse {
1470            memory_id: 1,
1471            name: "test".to_string(),
1472            namespace: "global".to_string(),
1473            action: "created".to_string(),
1474            operation: "created".to_string(),
1475            version: 1,
1476            entities_persisted: 2,
1477            relationships_persisted: 1,
1478            relationships_truncated: false,
1479            chunks_created: 1,
1480            chunks_persisted: 0,
1481            urls_persisted: 0,
1482            extraction_method: None,
1483            merged_into_memory_id: None,
1484            warnings: vec![],
1485            created_at: 0,
1486            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1487            elapsed_ms: 0,
1488            name_was_normalized: false,
1489            original_name: None,
1490            backend_invoked: None,
1491        };
1492        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1493        assert_eq!(json_false["relationships_truncated"], false);
1494
1495        let resp_true = RememberResponse {
1496            relationships_truncated: true,
1497            ..resp_false
1498        };
1499        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1500        assert_eq!(json_true["relationships_truncated"], true);
1501    }
1502
1503    // GAP-08: body-preservation predicate tests.
1504    // Verifies the decision logic that determines whether an existing body should
1505    // be kept instead of overwritten with an empty incoming body during --force-merge.
1506
1507    /// Returns `true` when the existing body should be preserved.
1508    ///
1509    /// Mirrors the `body_will_be_preserved` expression in `run()` so the logic
1510    /// is testable without a real database connection.
1511    fn should_preserve_body(force_merge: bool, raw_body_is_empty: bool, clear_body: bool) -> bool {
1512        force_merge && raw_body_is_empty && !clear_body
1513    }
1514
1515    #[test]
1516    fn gap08_empty_body_force_merge_no_clear_body_preserves() {
1517        // Caller passes no body with --force-merge but without --clear-body.
1518        // The existing body in the DB must be kept.
1519        assert!(
1520            should_preserve_body(true, true, false),
1521            "empty body + force-merge + no clear-body should trigger preservation"
1522        );
1523    }
1524
1525    #[test]
1526    fn gap08_empty_body_force_merge_with_clear_body_does_not_preserve() {
1527        // Caller explicitly passes --clear-body; intentional wipe is honoured.
1528        assert!(
1529            !should_preserve_body(true, true, true),
1530            "--clear-body must bypass preservation"
1531        );
1532    }
1533
1534    #[test]
1535    fn gap08_non_empty_body_force_merge_does_not_preserve() {
1536        // Caller provides a real body; it must overwrite the existing one.
1537        assert!(
1538            !should_preserve_body(true, false, false),
1539            "non-empty body must overwrite, not preserve"
1540        );
1541    }
1542
1543    #[test]
1544    fn gap08_empty_body_no_force_merge_does_not_preserve() {
1545        // Without --force-merge the path is a fresh create; no preservation needed.
1546        assert!(
1547            !should_preserve_body(false, true, false),
1548            "no --force-merge means no preservation logic applies"
1549        );
1550    }
1551}