Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n  \
35    # Create a memory with inline body\n  \
36    sqlite-graphrag remember --name design-auth --type decision \\\n    \
37    --description \"auth design\" --body \"JWT for stateless auth\"\n\n  \
38    # Create with curated graph via --graph-stdin\n  \
39    echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
40    sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n  \
41    # Enable GLiNER NER extraction with --graph-stdin\n  \
42    echo '{\"body\":\"Alice from Microsoft...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
43    sqlite-graphrag remember --name ner-test --type note --description \"test\" \\\n    \
44    --graph-stdin --enable-ner --gliner-variant int8\n\n  \
45    # Idempotent upsert with --force-merge\n  \
46    sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n    \
47    --body \"new content\" --force-merge")]
48pub struct RememberArgs {
49    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
50    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
51    #[arg(long)]
52    pub name: String,
53    #[arg(
54        long,
55        value_enum,
56        long_help = "Memory kind stored in `memories.type`. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
57    )]
58    pub r#type: MemoryType,
59    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
60    #[arg(long)]
61    pub description: String,
62    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
63    /// Maximum 512000 bytes; rejected if empty without an external graph.
64    #[arg(
65        long,
66        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
67        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
68    )]
69    pub body: Option<String>,
70    #[arg(
71        long,
72        help = "Read body from a file instead of --body",
73        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
74    )]
75    pub body_file: Option<std::path::PathBuf>,
76    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
77    /// Mutually exclusive with --body, --body-file, --graph-stdin.
78    #[arg(
79        long,
80        conflicts_with_all = ["body", "body_file", "graph_stdin"]
81    )]
82    pub body_stdin: bool,
83    #[arg(
84        long,
85        help = "JSON file containing entities to associate with this memory"
86    )]
87    pub entities_file: Option<std::path::PathBuf>,
88    #[arg(
89        long,
90        help = "JSON file containing relationships to associate with this memory"
91    )]
92    pub relationships_file: Option<std::path::PathBuf>,
93    #[arg(
94        long,
95        help = "Read graph JSON (body + entities + relationships) from stdin",
96        conflicts_with_all = [
97            "body",
98            "body_file",
99            "body_stdin",
100            "entities_file",
101            "relationships_file"
102        ]
103    )]
104    pub graph_stdin: bool,
105    #[arg(
106        long,
107        help = "Namespace (env: SQLITE_GRAPHRAG_NAMESPACE, default: global)"
108    )]
109    pub namespace: Option<String>,
110    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
111    #[arg(long)]
112    pub metadata: Option<String>,
113    #[arg(long, help = "JSON file containing metadata key-value pairs")]
114    pub metadata_file: Option<std::path::PathBuf>,
115    #[arg(long)]
116    pub force_merge: bool,
117    #[arg(
118        long,
119        value_name = "EPOCH_OR_RFC3339",
120        value_parser = crate::parsers::parse_expected_updated_at,
121        long_help = "Optimistic lock: reject if updated_at does not match. \
122Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
123    )]
124    pub expected_updated_at: Option<i64>,
125    #[arg(
126        long,
127        env = "SQLITE_GRAPHRAG_ENABLE_NER",
128        value_parser = crate::parsers::parse_bool_flexible,
129        action = clap::ArgAction::Set,
130        num_args = 0..=1,
131        default_missing_value = "true",
132        default_value = "false",
133        help = "Enable automatic GLiNER NER entity/relationship extraction from body"
134    )]
135    pub enable_ner: bool,
136    #[arg(
137        long,
138        env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
139        default_value = "fp32",
140        help = "GLiNER model variant: fp32 (1.1GB, best quality), fp16 (580MB), int8 (349MB, fastest but may miss entities on short texts), q4, q4f16"
141    )]
142    pub gliner_variant: String,
143    #[arg(long, hide = true)]
144    pub skip_extraction: bool,
145    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
146    #[arg(long)]
147    pub session_id: Option<String>,
148    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
149    pub format: JsonOutputFormat,
150    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
151    pub json: bool,
152    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
153    pub db: Option<String>,
154    /// Maximum process RSS in MiB; abort if exceeded during embedding.
155    #[arg(long, default_value_t = crate::constants::DEFAULT_MAX_RSS_MB,
156          help = "Maximum process RSS in MiB; abort if exceeded during embedding (default: 8192)")]
157    pub max_rss_mb: u64,
158}
159
160#[derive(Deserialize, Default)]
161#[serde(deny_unknown_fields)]
162struct GraphInput {
163    #[serde(default)]
164    body: Option<String>,
165    #[serde(default)]
166    entities: Vec<NewEntity>,
167    #[serde(default)]
168    relationships: Vec<NewRelationship>,
169}
170
171fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
172    for rel in &mut graph.relationships {
173        rel.relation = crate::parsers::normalize_relation(&rel.relation);
174        if let Err(e) = crate::parsers::validate_relation_format(&rel.relation) {
175            return Err(AppError::Validation(format!(
176                "{e} for relationship '{}' -> '{}'",
177                rel.source, rel.target
178            )));
179        }
180        crate::parsers::warn_if_non_canonical(&rel.relation);
181        if !(0.0..=1.0).contains(&rel.strength) {
182            return Err(AppError::Validation(format!(
183                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
184                rel.strength, rel.source, rel.target
185            )));
186        }
187    }
188
189    Ok(())
190}
191
192pub fn run(args: RememberArgs) -> Result<(), AppError> {
193    use crate::constants::*;
194
195    let inicio = std::time::Instant::now();
196    let _ = args.format;
197    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
198
199    // Capture the original `--name` before normalization so the JSON response can
200    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
201    // an owned String because `args.name` is moved into the response below.
202    let original_name = args.name.clone();
203
204    // Auto-normalize to kebab-case before validation (P2-H).
205    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
206    // after truncation by a long filename ending in a hyphen.
207    let normalized_name = {
208        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
209        let trimmed = lower.trim_matches('-').to_string();
210        if trimmed != args.name {
211            tracing::warn!(
212                original = %args.name,
213                normalized = %trimmed,
214                "name auto-normalized to kebab-case"
215            );
216        }
217        trimmed
218    };
219    let name_was_normalized = normalized_name != original_name;
220
221    if normalized_name.is_empty() {
222        return Err(AppError::Validation(
223            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
224        ));
225    }
226    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
227        return Err(AppError::LimitExceeded(
228            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
229        ));
230    }
231
232    if normalized_name.starts_with("__") {
233        return Err(AppError::Validation(
234            crate::i18n::validation::reserved_name(),
235        ));
236    }
237
238    {
239        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
240            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
241        if !slug_re.is_match(&normalized_name) {
242            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
243                &normalized_name,
244            )));
245        }
246    }
247
248    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
249        return Err(AppError::Validation(
250            crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
251        ));
252    }
253
254    let mut raw_body = if let Some(b) = args.body {
255        b
256    } else if let Some(path) = args.body_file {
257        std::fs::read_to_string(&path).map_err(AppError::Io)?
258    } else if args.body_stdin || args.graph_stdin {
259        crate::stdin_helper::read_stdin_with_timeout(60)?
260    } else {
261        String::new()
262    };
263
264    let entities_provided_externally =
265        args.entities_file.is_some() || args.relationships_file.is_some() || args.graph_stdin;
266
267    let mut graph = GraphInput::default();
268    if let Some(path) = args.entities_file {
269        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
270        graph.entities = serde_json::from_str(&content)?;
271    }
272    if let Some(path) = args.relationships_file {
273        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
274        graph.relationships = serde_json::from_str(&content)?;
275    }
276    if args.graph_stdin {
277        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
278            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
279        })?;
280        raw_body = graph.body.take().unwrap_or_default();
281    }
282
283    if graph.entities.len() > max_entities_per_memory() {
284        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
285            max_entities_per_memory(),
286        )));
287    }
288    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
289        return Err(AppError::LimitExceeded(
290            errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
291        ));
292    }
293    normalize_and_validate_graph_input(&mut graph)?;
294
295    if raw_body.len() > MAX_MEMORY_BODY_LEN {
296        return Err(AppError::LimitExceeded(
297            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
298        ));
299    }
300
301    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
302    // Without this check, empty embeddings would be persisted, breaking recall semantics.
303    if !entities_provided_externally && graph.entities.is_empty() && raw_body.trim().is_empty() {
304        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
305    }
306
307    let metadata: serde_json::Value = if let Some(m) = args.metadata {
308        serde_json::from_str(&m)?
309    } else if let Some(path) = args.metadata_file {
310        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
311        serde_json::from_str(&content)?
312    } else {
313        serde_json::json!({})
314    };
315
316    let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
317    let snippet: String = raw_body.chars().take(200).collect();
318
319    let paths = AppPaths::resolve(args.db.as_deref())?;
320    paths.ensure_dirs()?;
321
322    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
323    let mut extraction_method: Option<String> = None;
324    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::with_capacity(4);
325    let mut relationships_truncated = false;
326    if args.enable_ner && args.skip_extraction {
327        tracing::warn!(
328            "--enable-ner and --skip-extraction are contradictory; --enable-ner takes precedence"
329        );
330    }
331    if args.skip_extraction && !args.enable_ner {
332        tracing::warn!("--skip-extraction is deprecated and has no effect (NER is disabled by default since v1.0.45); remove this flag");
333    }
334    let gliner_variant: crate::extraction::GlinerVariant =
335        args.gliner_variant.parse().unwrap_or_else(|e| {
336            tracing::warn!("invalid --gliner-variant: {e}; using fp32");
337            crate::extraction::GlinerVariant::Fp32
338        });
339    if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
340        match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
341            Ok(extracted) => {
342                extraction_method = Some(extracted.extraction_method.clone());
343                extracted_urls = extracted.urls;
344                graph.entities = extracted.entities;
345                graph.relationships = extracted.relationships;
346                relationships_truncated = extracted.relationships_truncated;
347
348                if graph.entities.len() > max_entities_per_memory() {
349                    graph.entities.truncate(max_entities_per_memory());
350                }
351                if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
352                    relationships_truncated = true;
353                    graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
354                }
355                normalize_and_validate_graph_input(&mut graph)?;
356            }
357            Err(e) => {
358                tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
359                extraction_method = Some("none:extraction-failed".to_string());
360            }
361        }
362    }
363
364    let mut conn = open_rw(&paths.db)?;
365    ensure_schema(&mut conn)?;
366
367    {
368        use crate::constants::MAX_NAMESPACES_ACTIVE;
369        let active_count: u32 = conn.query_row(
370            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
371            [],
372            |r| r.get::<_, i64>(0).map(|v| v as u32),
373        )?;
374        let ns_exists: bool = conn.query_row(
375            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
376            rusqlite::params![namespace],
377            |r| r.get::<_, i64>(0).map(|v| v > 0),
378        )?;
379        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
380            return Err(AppError::NamespaceError(format!(
381                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
382            )));
383        }
384    }
385
386    // M7: detect soft-deleted memory before the standard duplicate check.
387    if let Some((sd_id, true)) =
388        memories::find_by_name_any_state(&conn, &namespace, &normalized_name)?
389    {
390        if args.force_merge {
391            memories::clear_deleted_at(&conn, sd_id)?;
392        } else {
393            return Err(AppError::Duplicate(
394                errors_msg::duplicate_memory_soft_deleted(&normalized_name, &namespace),
395            ));
396        }
397    }
398
399    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
400    if existing_memory.is_some() && !args.force_merge {
401        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
402            &normalized_name,
403            &namespace,
404        )));
405    }
406
407    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
408
409    output::emit_progress_i18n(
410        &format!(
411            "Remember stage: validated input; available memory {} MB",
412            crate::memory_guard::available_memory_mb()
413        ),
414        &format!(
415            "Stage remember: input validated; available memory {} MB",
416            crate::memory_guard::available_memory_mb()
417        ),
418    );
419
420    let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
421    let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
422    let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
423    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
424    let chunks_created = chunks_info.len();
425    // For single-chunk bodies the memory row itself stores the content and no
426    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
427    // bodies every chunk is persisted via `insert_chunk_slices`.
428    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
429
430    output::emit_progress_i18n(
431        &format!(
432            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
433            chunks_created,
434            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
435        ),
436        &format!(
437            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
438            chunks_created,
439            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
440        ),
441    );
442
443    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
444        return Err(AppError::LimitExceeded(format!(
445            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
446            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
447        )));
448    }
449
450    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
451    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
452
453    let embedding = if chunks_info.len() == 1 {
454        crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
455    } else {
456        let chunk_texts: Vec<&str> = chunks_info
457            .iter()
458            .map(|c| chunking::chunk_text(&raw_body, c))
459            .collect();
460        output::emit_progress_i18n(
461            &format!(
462                "Embedding {} chunks serially to keep memory bounded...",
463                chunks_info.len()
464            ),
465            &format!(
466                "Embedding {} chunks serially to keep memory bounded...",
467                chunks_info.len()
468            ),
469        );
470        let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
471        for chunk_text in &chunk_texts {
472            if let Some(rss) = crate::memory_guard::current_process_memory_mb() {
473                if rss > args.max_rss_mb {
474                    tracing::error!(
475                        rss_mb = rss,
476                        max_rss_mb = args.max_rss_mb,
477                        "RSS exceeded --max-rss-mb threshold; aborting to prevent system instability"
478                    );
479                    return Err(AppError::LowMemory {
480                        available_mb: crate::memory_guard::available_memory_mb(),
481                        required_mb: args.max_rss_mb,
482                    });
483                }
484            }
485            chunk_embeddings.push(crate::daemon::embed_passage_or_local(
486                &paths.models,
487                chunk_text,
488            )?);
489        }
490        output::emit_progress_i18n(
491            &format!(
492                "Remember stage: chunk embeddings complete; process RSS {} MB",
493                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
494            ),
495            &format!(
496                "Stage remember: chunk embeddings completed; process RSS {} MB",
497                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
498            ),
499        );
500        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
501        chunk_embeddings_cache = Some(chunk_embeddings);
502        aggregated
503    };
504    let body_for_storage = raw_body;
505
506    let memory_type = args.r#type.as_str();
507    let new_memory = NewMemory {
508        namespace: namespace.clone(),
509        name: normalized_name.clone(),
510        memory_type: memory_type.to_string(),
511        description: args.description.clone(),
512        body: body_for_storage,
513        body_hash: body_hash.clone(),
514        session_id: args.session_id.clone(),
515        source: "agent".to_string(),
516        metadata,
517    };
518
519    let mut warnings = Vec::new();
520    let mut entities_persisted = 0usize;
521    let mut relationships_persisted = 0usize;
522
523    let graph_entity_embeddings = graph
524        .entities
525        .iter()
526        .map(|entity| {
527            let entity_text = match &entity.description {
528                Some(desc) => format!("{} {}", entity.name, desc),
529                None => entity.name.clone(),
530            };
531            crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
532        })
533        .collect::<Result<Vec<_>, _>>()?;
534
535    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
536
537    let (memory_id, action, version) = match existing_memory {
538        Some((existing_id, _updated_at, _current_version)) => {
539            if let Some(hash_id) = duplicate_hash_id {
540                if hash_id != existing_id {
541                    warnings.push(format!(
542                        "identical body already exists as memory id {hash_id}"
543                    ));
544                }
545            }
546
547            storage_chunks::delete_chunks(&tx, existing_id)?;
548
549            let next_v = versions::next_version(&tx, existing_id)?;
550            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
551            versions::insert_version(
552                &tx,
553                existing_id,
554                next_v,
555                &normalized_name,
556                memory_type,
557                &args.description,
558                &new_memory.body,
559                &serde_json::to_string(&new_memory.metadata)?,
560                None,
561                "edit",
562            )?;
563            memories::upsert_vec(
564                &tx,
565                existing_id,
566                &namespace,
567                memory_type,
568                &embedding,
569                &normalized_name,
570                &snippet,
571            )?;
572            (existing_id, "updated".to_string(), next_v)
573        }
574        None => {
575            if let Some(hash_id) = duplicate_hash_id {
576                warnings.push(format!(
577                    "identical body already exists as memory id {hash_id}"
578                ));
579            }
580            let id = memories::insert(&tx, &new_memory)?;
581            versions::insert_version(
582                &tx,
583                id,
584                1,
585                &normalized_name,
586                memory_type,
587                &args.description,
588                &new_memory.body,
589                &serde_json::to_string(&new_memory.metadata)?,
590                None,
591                "create",
592            )?;
593            memories::upsert_vec(
594                &tx,
595                id,
596                &namespace,
597                memory_type,
598                &embedding,
599                &normalized_name,
600                &snippet,
601            )?;
602            (id, "created".to_string(), 1)
603        }
604    };
605
606    if chunks_info.len() > 1 {
607        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
608
609        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
610            AppError::Internal(anyhow::anyhow!(
611                "chunk embeddings cache missing in multi-chunk remember path"
612            ))
613        })?;
614
615        for (i, emb) in chunk_embeddings.iter().enumerate() {
616            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
617        }
618        output::emit_progress_i18n(
619            &format!(
620                "Remember stage: persisted chunk vectors; process RSS {} MB",
621                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
622            ),
623            &format!(
624                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
625                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
626            ),
627        );
628    }
629
630    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
631        for entity in &graph.entities {
632            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
633            let entity_embedding = &graph_entity_embeddings[entities_persisted];
634            entities::upsert_entity_vec(
635                &tx,
636                entity_id,
637                &namespace,
638                entity.entity_type,
639                entity_embedding,
640                &entity.name,
641            )?;
642            entities::link_memory_entity(&tx, memory_id, entity_id)?;
643            entities::increment_degree(&tx, entity_id)?;
644            entities_persisted += 1;
645        }
646        let entity_types: std::collections::HashMap<&str, EntityType> = graph
647            .entities
648            .iter()
649            .map(|entity| (entity.name.as_str(), entity.entity_type))
650            .collect();
651
652        for rel in &graph.relationships {
653            let source_entity = NewEntity {
654                name: rel.source.clone(),
655                entity_type: entity_types
656                    .get(rel.source.as_str())
657                    .copied()
658                    .unwrap_or(EntityType::Concept),
659                description: None,
660            };
661            let target_entity = NewEntity {
662                name: rel.target.clone(),
663                entity_type: entity_types
664                    .get(rel.target.as_str())
665                    .copied()
666                    .unwrap_or(EntityType::Concept),
667                description: None,
668            };
669            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
670            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
671            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
672            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
673            relationships_persisted += 1;
674        }
675    }
676    tx.commit()?;
677
678    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
679    // Failures do not propagate — non-critical path with graceful degradation.
680    let urls_persisted = if !extracted_urls.is_empty() {
681        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
682            .into_iter()
683            .map(|u| storage_urls::MemoryUrl {
684                url: u.url,
685                offset: Some(u.offset as i64),
686            })
687            .collect();
688        storage_urls::insert_urls(&conn, memory_id, &url_entries)
689    } else {
690        0
691    };
692
693    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
694
695    let created_at_epoch = chrono::Utc::now().timestamp();
696    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
697
698    output::emit_json(&RememberResponse {
699        memory_id,
700        // Persist the normalized (kebab-case) slug as `name` since that is the
701        // storage key. The original input is exposed via `original_name` only
702        // when normalization actually changed something (B_4 in v1.0.32).
703        name: normalized_name.clone(),
704        namespace,
705        action: action.clone(),
706        operation: action,
707        version,
708        entities_persisted,
709        relationships_persisted,
710        relationships_truncated,
711        chunks_created,
712        chunks_persisted,
713        urls_persisted,
714        extraction_method,
715        merged_into_memory_id: None,
716        warnings,
717        created_at: created_at_epoch,
718        created_at_iso,
719        elapsed_ms: inicio.elapsed().as_millis() as u64,
720        name_was_normalized,
721        original_name: name_was_normalized.then_some(original_name),
722    })?;
723
724    Ok(())
725}
726
727#[cfg(test)]
728mod tests {
729    use super::compute_chunks_persisted;
730    use crate::output::RememberResponse;
731
732    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
733    #[test]
734    fn chunks_persisted_zero_for_zero_chunks() {
735        assert_eq!(compute_chunks_persisted(0), 0);
736    }
737
738    #[test]
739    fn chunks_persisted_zero_for_single_chunk_body() {
740        // Single-chunk bodies live in the memories row itself; no row is
741        // appended to memory_chunks. This is the documented contract.
742        assert_eq!(compute_chunks_persisted(1), 0);
743    }
744
745    #[test]
746    fn chunks_persisted_equals_count_for_multi_chunk_body() {
747        // Every chunk above the first triggers a row in memory_chunks.
748        assert_eq!(compute_chunks_persisted(2), 2);
749        assert_eq!(compute_chunks_persisted(7), 7);
750        assert_eq!(compute_chunks_persisted(64), 64);
751    }
752
753    #[test]
754    fn remember_response_serializes_required_fields() {
755        let resp = RememberResponse {
756            memory_id: 42,
757            name: "minha-mem".to_string(),
758            namespace: "global".to_string(),
759            action: "created".to_string(),
760            operation: "created".to_string(),
761            version: 1,
762            entities_persisted: 0,
763            relationships_persisted: 0,
764            relationships_truncated: false,
765            chunks_created: 1,
766            chunks_persisted: 0,
767            urls_persisted: 0,
768            extraction_method: None,
769            merged_into_memory_id: None,
770            warnings: vec![],
771            created_at: 1_705_320_000,
772            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
773            elapsed_ms: 55,
774            name_was_normalized: false,
775            original_name: None,
776        };
777
778        let json = serde_json::to_value(&resp).expect("serialization failed");
779        assert_eq!(json["memory_id"], 42);
780        assert_eq!(json["action"], "created");
781        assert_eq!(json["operation"], "created");
782        assert_eq!(json["version"], 1);
783        assert_eq!(json["elapsed_ms"], 55u64);
784        assert!(json["warnings"].is_array());
785        assert!(json["merged_into_memory_id"].is_null());
786    }
787
788    #[test]
789    fn remember_response_action_e_operation_sao_aliases() {
790        let resp = RememberResponse {
791            memory_id: 1,
792            name: "mem".to_string(),
793            namespace: "global".to_string(),
794            action: "updated".to_string(),
795            operation: "updated".to_string(),
796            version: 2,
797            entities_persisted: 3,
798            relationships_persisted: 1,
799            relationships_truncated: false,
800            extraction_method: None,
801            chunks_created: 2,
802            chunks_persisted: 2,
803            urls_persisted: 0,
804            merged_into_memory_id: None,
805            warnings: vec![],
806            created_at: 0,
807            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
808            elapsed_ms: 0,
809            name_was_normalized: false,
810            original_name: None,
811        };
812
813        let json = serde_json::to_value(&resp).expect("serialization failed");
814        assert_eq!(
815            json["action"], json["operation"],
816            "action e operation devem ser iguais"
817        );
818        assert_eq!(json["entities_persisted"], 3);
819        assert_eq!(json["relationships_persisted"], 1);
820        assert_eq!(json["chunks_created"], 2);
821    }
822
823    #[test]
824    fn remember_response_warnings_lista_mensagens() {
825        let resp = RememberResponse {
826            memory_id: 5,
827            name: "dup-mem".to_string(),
828            namespace: "global".to_string(),
829            action: "created".to_string(),
830            operation: "created".to_string(),
831            version: 1,
832            entities_persisted: 0,
833            extraction_method: None,
834            relationships_persisted: 0,
835            relationships_truncated: false,
836            chunks_created: 1,
837            chunks_persisted: 0,
838            urls_persisted: 0,
839            merged_into_memory_id: None,
840            warnings: vec!["identical body already exists as memory id 3".to_string()],
841            created_at: 0,
842            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
843            elapsed_ms: 10,
844            name_was_normalized: false,
845            original_name: None,
846        };
847
848        let json = serde_json::to_value(&resp).expect("serialization failed");
849        let warnings = json["warnings"]
850            .as_array()
851            .expect("warnings deve ser array");
852        assert_eq!(warnings.len(), 1);
853        assert!(warnings[0].as_str().unwrap().contains("identical body"));
854    }
855
856    #[test]
857    fn invalid_name_reserved_prefix_returns_validation_error() {
858        use crate::errors::AppError;
859        // Validates the rejection logic for names with the "__" prefix directly
860        let nome = "__reservado";
861        let resultado: Result<(), AppError> = if nome.starts_with("__") {
862            Err(AppError::Validation(
863                crate::i18n::validation::reserved_name(),
864            ))
865        } else {
866            Ok(())
867        };
868        assert!(resultado.is_err());
869        if let Err(AppError::Validation(msg)) = resultado {
870            assert!(!msg.is_empty());
871        }
872    }
873
874    #[test]
875    fn name_too_long_returns_validation_error() {
876        use crate::errors::AppError;
877        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
878        let resultado: Result<(), AppError> =
879            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
880                Err(AppError::Validation(crate::i18n::validation::name_length(
881                    crate::constants::MAX_MEMORY_NAME_LEN,
882                )))
883            } else {
884                Ok(())
885            };
886        assert!(resultado.is_err());
887    }
888
889    #[test]
890    fn remember_response_merged_into_memory_id_some_serializes_integer() {
891        let resp = RememberResponse {
892            memory_id: 10,
893            name: "mem-mergeada".to_string(),
894            namespace: "global".to_string(),
895            action: "updated".to_string(),
896            operation: "updated".to_string(),
897            version: 3,
898            extraction_method: None,
899            entities_persisted: 0,
900            relationships_persisted: 0,
901            relationships_truncated: false,
902            chunks_created: 1,
903            chunks_persisted: 0,
904            urls_persisted: 0,
905            merged_into_memory_id: Some(7),
906            warnings: vec![],
907            created_at: 0,
908            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
909            elapsed_ms: 0,
910            name_was_normalized: false,
911            original_name: None,
912        };
913
914        let json = serde_json::to_value(&resp).expect("serialization failed");
915        assert_eq!(json["merged_into_memory_id"], 7);
916    }
917
918    #[test]
919    fn remember_response_urls_persisted_serializes_field() {
920        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
921        let resp = RememberResponse {
922            memory_id: 3,
923            name: "mem-com-urls".to_string(),
924            namespace: "global".to_string(),
925            action: "created".to_string(),
926            operation: "created".to_string(),
927            version: 1,
928            entities_persisted: 0,
929            relationships_persisted: 0,
930            relationships_truncated: false,
931            chunks_created: 1,
932            chunks_persisted: 0,
933            urls_persisted: 3,
934            extraction_method: Some("regex-only".to_string()),
935            merged_into_memory_id: None,
936            warnings: vec![],
937            created_at: 0,
938            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
939            elapsed_ms: 0,
940            name_was_normalized: false,
941            original_name: None,
942        };
943        let json = serde_json::to_value(&resp).expect("serialization failed");
944        assert_eq!(json["urls_persisted"], 3);
945    }
946
947    #[test]
948    fn empty_name_after_normalization_returns_specific_message() {
949        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
950        // must produce a distinct error message, not the "too long" message.
951        use crate::errors::AppError;
952        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
953        let normalized = normalized.trim_matches('-').to_string();
954        let resultado: Result<(), AppError> = if normalized.is_empty() {
955            Err(AppError::Validation(
956                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
957            ))
958        } else {
959            Ok(())
960        };
961        assert!(resultado.is_err());
962        if let Err(AppError::Validation(msg)) = resultado {
963            assert!(
964                msg.contains("empty after normalization"),
965                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
966            );
967        }
968    }
969
970    #[test]
971    fn name_only_underscores_after_normalization_returns_specific_message() {
972        // P0-4 regression: name consisting only of underscores normalizes to empty string.
973        use crate::errors::AppError;
974        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
975        let normalized = normalized.trim_matches('-').to_string();
976        assert!(
977            normalized.is_empty(),
978            "underscores devem normalizar para string vazia"
979        );
980        let resultado: Result<(), AppError> = if normalized.is_empty() {
981            Err(AppError::Validation(
982                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
983            ))
984        } else {
985            Ok(())
986        };
987        assert!(resultado.is_err());
988        if let Err(AppError::Validation(msg)) = resultado {
989            assert!(
990                msg.contains("empty after normalization"),
991                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
992            );
993        }
994    }
995
996    #[test]
997    fn remember_response_relationships_truncated_serializes_field() {
998        // P1-D: garante que relationships_truncated aparece no JSON como bool.
999        let resp_false = RememberResponse {
1000            memory_id: 1,
1001            name: "test".to_string(),
1002            namespace: "global".to_string(),
1003            action: "created".to_string(),
1004            operation: "created".to_string(),
1005            version: 1,
1006            entities_persisted: 2,
1007            relationships_persisted: 1,
1008            relationships_truncated: false,
1009            chunks_created: 1,
1010            chunks_persisted: 0,
1011            urls_persisted: 0,
1012            extraction_method: None,
1013            merged_into_memory_id: None,
1014            warnings: vec![],
1015            created_at: 0,
1016            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
1017            elapsed_ms: 0,
1018            name_was_normalized: false,
1019            original_name: None,
1020        };
1021        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1022        assert_eq!(json_false["relationships_truncated"], false);
1023
1024        let resp_true = RememberResponse {
1025            relationships_truncated: true,
1026            ..resp_false
1027        };
1028        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1029        assert_eq!(json_true["relationships_truncated"], true);
1030    }
1031}