Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34pub struct RememberArgs {
35    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
36    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
37    #[arg(long)]
38    pub name: String,
39    #[arg(
40        long,
41        value_enum,
42        long_help = "Memory kind stored in `memories.type`. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
43    )]
44    pub r#type: MemoryType,
45    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
46    #[arg(long)]
47    pub description: String,
48    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
49    /// Maximum 512000 bytes; rejected if empty without an external graph.
50    #[arg(
51        long,
52        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
53        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
54    )]
55    pub body: Option<String>,
56    #[arg(
57        long,
58        help = "Read body from a file instead of --body",
59        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
60    )]
61    pub body_file: Option<std::path::PathBuf>,
62    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
63    /// Mutually exclusive with --body, --body-file, --graph-stdin.
64    #[arg(
65        long,
66        conflicts_with_all = ["body", "body_file", "graph_stdin"]
67    )]
68    pub body_stdin: bool,
69    #[arg(
70        long,
71        help = "JSON file containing entities to associate with this memory"
72    )]
73    pub entities_file: Option<std::path::PathBuf>,
74    #[arg(
75        long,
76        help = "JSON file containing relationships to associate with this memory"
77    )]
78    pub relationships_file: Option<std::path::PathBuf>,
79    #[arg(
80        long,
81        help = "Read graph JSON (body + entities + relationships) from stdin",
82        conflicts_with_all = [
83            "body",
84            "body_file",
85            "body_stdin",
86            "entities_file",
87            "relationships_file"
88        ]
89    )]
90    pub graph_stdin: bool,
91    #[arg(long, default_value = "global")]
92    pub namespace: Option<String>,
93    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
94    #[arg(long)]
95    pub metadata: Option<String>,
96    #[arg(long, help = "JSON file containing metadata key-value pairs")]
97    pub metadata_file: Option<std::path::PathBuf>,
98    #[arg(long)]
99    pub force_merge: bool,
100    #[arg(
101        long,
102        value_name = "EPOCH_OR_RFC3339",
103        value_parser = crate::parsers::parse_expected_updated_at,
104        long_help = "Optimistic lock: reject if updated_at does not match. \
105Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
106    )]
107    pub expected_updated_at: Option<i64>,
108    #[arg(
109        long,
110        env = "SQLITE_GRAPHRAG_ENABLE_NER",
111        help = "Enable automatic BERT NER entity/relationship extraction from body"
112    )]
113    pub enable_ner: bool,
114    #[arg(long, hide = true)]
115    pub skip_extraction: bool,
116    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
117    #[arg(long)]
118    pub session_id: Option<String>,
119    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
120    pub format: JsonOutputFormat,
121    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
122    pub json: bool,
123    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
124    pub db: Option<String>,
125}
126
127#[derive(Deserialize, Default)]
128#[serde(deny_unknown_fields)]
129struct GraphInput {
130    #[serde(default)]
131    body: Option<String>,
132    #[serde(default)]
133    entities: Vec<NewEntity>,
134    #[serde(default)]
135    relationships: Vec<NewRelationship>,
136}
137
138fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
139    for rel in &mut graph.relationships {
140        rel.relation = rel.relation.replace('-', "_");
141        if !is_valid_relation(&rel.relation) {
142            return Err(AppError::Validation(format!(
143                "invalid relation '{}' for relationship '{}' -> '{}'",
144                rel.relation, rel.source, rel.target
145            )));
146        }
147        if !(0.0..=1.0).contains(&rel.strength) {
148            return Err(AppError::Validation(format!(
149                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
150                rel.strength, rel.source, rel.target
151            )));
152        }
153    }
154
155    Ok(())
156}
157
158fn is_valid_relation(relation: &str) -> bool {
159    matches!(
160        relation,
161        "applies_to"
162            | "uses"
163            | "depends_on"
164            | "causes"
165            | "fixes"
166            | "contradicts"
167            | "supports"
168            | "follows"
169            | "related"
170            | "mentions"
171            | "replaces"
172            | "tracked_in"
173    )
174}
175
176pub fn run(args: RememberArgs) -> Result<(), AppError> {
177    use crate::constants::*;
178
179    let inicio = std::time::Instant::now();
180    let _ = args.format;
181    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
182
183    // Capture the original `--name` before normalization so the JSON response can
184    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
185    // an owned String because `args.name` is moved into the response below.
186    let original_name = args.name.clone();
187
188    // Auto-normalize to kebab-case before validation (P2-H).
189    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
190    // after truncation by a long filename ending in a hyphen.
191    let normalized_name = {
192        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
193        let trimmed = lower.trim_matches('-').to_string();
194        if trimmed != args.name {
195            tracing::warn!(
196                original = %args.name,
197                normalized = %trimmed,
198                "name auto-normalized to kebab-case"
199            );
200        }
201        trimmed
202    };
203    let name_was_normalized = normalized_name != original_name;
204
205    if normalized_name.is_empty() {
206        return Err(AppError::Validation(
207            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
208        ));
209    }
210    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
211        return Err(AppError::LimitExceeded(
212            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
213        ));
214    }
215
216    if normalized_name.starts_with("__") {
217        return Err(AppError::Validation(
218            crate::i18n::validation::reserved_name(),
219        ));
220    }
221
222    {
223        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
224            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
225        if !slug_re.is_match(&normalized_name) {
226            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
227                &normalized_name,
228            )));
229        }
230    }
231
232    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
233        return Err(AppError::Validation(
234            crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
235        ));
236    }
237
238    let mut raw_body = if let Some(b) = args.body {
239        b
240    } else if let Some(path) = args.body_file {
241        std::fs::read_to_string(&path).map_err(AppError::Io)?
242    } else if args.body_stdin || args.graph_stdin {
243        crate::stdin_helper::read_stdin_with_timeout(60)?
244    } else {
245        String::new()
246    };
247
248    let entities_provided_externally =
249        args.entities_file.is_some() || args.relationships_file.is_some() || args.graph_stdin;
250
251    let mut graph = GraphInput::default();
252    if let Some(path) = args.entities_file {
253        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
254        graph.entities = serde_json::from_str(&content)?;
255    }
256    if let Some(path) = args.relationships_file {
257        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
258        graph.relationships = serde_json::from_str(&content)?;
259    }
260    if args.graph_stdin {
261        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
262            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
263        })?;
264        raw_body = graph.body.take().unwrap_or_default();
265    }
266
267    if graph.entities.len() > max_entities_per_memory() {
268        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
269            max_entities_per_memory(),
270        )));
271    }
272    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
273        return Err(AppError::LimitExceeded(
274            errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
275        ));
276    }
277    normalize_and_validate_graph_input(&mut graph)?;
278
279    if raw_body.len() > MAX_MEMORY_BODY_LEN {
280        return Err(AppError::LimitExceeded(
281            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
282        ));
283    }
284
285    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
286    // Without this check, empty embeddings would be persisted, breaking recall semantics.
287    if !entities_provided_externally && graph.entities.is_empty() && raw_body.trim().is_empty() {
288        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
289    }
290
291    let metadata: serde_json::Value = if let Some(m) = args.metadata {
292        serde_json::from_str(&m)?
293    } else if let Some(path) = args.metadata_file {
294        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
295        serde_json::from_str(&content)?
296    } else {
297        serde_json::json!({})
298    };
299
300    let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
301    let snippet: String = raw_body.chars().take(200).collect();
302
303    let paths = AppPaths::resolve(args.db.as_deref())?;
304    paths.ensure_dirs()?;
305
306    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
307    let mut extraction_method: Option<String> = None;
308    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::new();
309    let mut relationships_truncated = false;
310    if args.enable_ner
311        && !entities_provided_externally
312        && graph.entities.is_empty()
313        && !raw_body.trim().is_empty()
314    {
315        match crate::extraction::extract_graph_auto(&raw_body, &paths) {
316            Ok(extracted) => {
317                extraction_method = Some(extracted.extraction_method.clone());
318                extracted_urls = extracted.urls;
319                graph.entities = extracted.entities;
320                graph.relationships = extracted.relationships;
321                relationships_truncated = extracted.relationships_truncated;
322
323                if graph.entities.len() > max_entities_per_memory() {
324                    graph.entities.truncate(max_entities_per_memory());
325                }
326                if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
327                    relationships_truncated = true;
328                    graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
329                }
330                normalize_and_validate_graph_input(&mut graph)?;
331            }
332            Err(e) => {
333                tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
334            }
335        }
336    }
337
338    let mut conn = open_rw(&paths.db)?;
339    ensure_schema(&mut conn)?;
340
341    {
342        use crate::constants::MAX_NAMESPACES_ACTIVE;
343        let active_count: u32 = conn.query_row(
344            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
345            [],
346            |r| r.get::<_, i64>(0).map(|v| v as u32),
347        )?;
348        let ns_exists: bool = conn.query_row(
349            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
350            rusqlite::params![namespace],
351            |r| r.get::<_, i64>(0).map(|v| v > 0),
352        )?;
353        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
354            return Err(AppError::NamespaceError(format!(
355                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
356            )));
357        }
358    }
359
360    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
361    if existing_memory.is_some() && !args.force_merge {
362        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
363            &normalized_name,
364            &namespace,
365        )));
366    }
367
368    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
369
370    output::emit_progress_i18n(
371        &format!(
372            "Remember stage: validated input; available memory {} MB",
373            crate::memory_guard::available_memory_mb()
374        ),
375        &format!(
376            "Stage remember: input validated; available memory {} MB",
377            crate::memory_guard::available_memory_mb()
378        ),
379    );
380
381    let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
382    let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
383    let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
384    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
385    let chunks_created = chunks_info.len();
386    // For single-chunk bodies the memory row itself stores the content and no
387    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
388    // bodies every chunk is persisted via `insert_chunk_slices`.
389    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
390
391    output::emit_progress_i18n(
392        &format!(
393            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
394            chunks_created,
395            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
396        ),
397        &format!(
398            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
399            chunks_created,
400            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
401        ),
402    );
403
404    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
405        return Err(AppError::LimitExceeded(format!(
406            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
407            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
408        )));
409    }
410
411    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
412    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
413
414    let embedding = if chunks_info.len() == 1 {
415        crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
416    } else {
417        let chunk_texts: Vec<&str> = chunks_info
418            .iter()
419            .map(|c| chunking::chunk_text(&raw_body, c))
420            .collect();
421        output::emit_progress_i18n(
422            &format!(
423                "Embedding {} chunks serially to keep memory bounded...",
424                chunks_info.len()
425            ),
426            &format!(
427                "Embedding {} chunks serially to keep memory bounded...",
428                chunks_info.len()
429            ),
430        );
431        let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
432        for chunk_text in &chunk_texts {
433            chunk_embeddings.push(crate::daemon::embed_passage_or_local(
434                &paths.models,
435                chunk_text,
436            )?);
437        }
438        output::emit_progress_i18n(
439            &format!(
440                "Remember stage: chunk embeddings complete; process RSS {} MB",
441                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
442            ),
443            &format!(
444                "Stage remember: chunk embeddings completed; process RSS {} MB",
445                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
446            ),
447        );
448        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
449        chunk_embeddings_cache = Some(chunk_embeddings);
450        aggregated
451    };
452    let body_for_storage = raw_body;
453
454    let memory_type = args.r#type.as_str();
455    let new_memory = NewMemory {
456        namespace: namespace.clone(),
457        name: normalized_name.clone(),
458        memory_type: memory_type.to_string(),
459        description: args.description.clone(),
460        body: body_for_storage,
461        body_hash: body_hash.clone(),
462        session_id: args.session_id.clone(),
463        source: "agent".to_string(),
464        metadata,
465    };
466
467    let mut warnings = Vec::new();
468    let mut entities_persisted = 0usize;
469    let mut relationships_persisted = 0usize;
470
471    let graph_entity_embeddings = graph
472        .entities
473        .iter()
474        .map(|entity| {
475            let entity_text = match &entity.description {
476                Some(desc) => format!("{} {}", entity.name, desc),
477                None => entity.name.clone(),
478            };
479            crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
480        })
481        .collect::<Result<Vec<_>, _>>()?;
482
483    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
484
485    let (memory_id, action, version) = match existing_memory {
486        Some((existing_id, _updated_at, _current_version)) => {
487            if let Some(hash_id) = duplicate_hash_id {
488                if hash_id != existing_id {
489                    warnings.push(format!(
490                        "identical body already exists as memory id {hash_id}"
491                    ));
492                }
493            }
494
495            storage_chunks::delete_chunks(&tx, existing_id)?;
496
497            let next_v = versions::next_version(&tx, existing_id)?;
498            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
499            versions::insert_version(
500                &tx,
501                existing_id,
502                next_v,
503                &normalized_name,
504                memory_type,
505                &args.description,
506                &new_memory.body,
507                &serde_json::to_string(&new_memory.metadata)?,
508                None,
509                "edit",
510            )?;
511            memories::upsert_vec(
512                &tx,
513                existing_id,
514                &namespace,
515                memory_type,
516                &embedding,
517                &normalized_name,
518                &snippet,
519            )?;
520            (existing_id, "updated".to_string(), next_v)
521        }
522        None => {
523            if let Some(hash_id) = duplicate_hash_id {
524                warnings.push(format!(
525                    "identical body already exists as memory id {hash_id}"
526                ));
527            }
528            let id = memories::insert(&tx, &new_memory)?;
529            versions::insert_version(
530                &tx,
531                id,
532                1,
533                &normalized_name,
534                memory_type,
535                &args.description,
536                &new_memory.body,
537                &serde_json::to_string(&new_memory.metadata)?,
538                None,
539                "create",
540            )?;
541            memories::upsert_vec(
542                &tx,
543                id,
544                &namespace,
545                memory_type,
546                &embedding,
547                &normalized_name,
548                &snippet,
549            )?;
550            (id, "created".to_string(), 1)
551        }
552    };
553
554    if chunks_info.len() > 1 {
555        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
556
557        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
558            AppError::Internal(anyhow::anyhow!(
559                "cache de embeddings de chunks ausente no caminho multi-chunk do remember"
560            ))
561        })?;
562
563        for (i, emb) in chunk_embeddings.iter().enumerate() {
564            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
565        }
566        output::emit_progress_i18n(
567            &format!(
568                "Remember stage: persisted chunk vectors; process RSS {} MB",
569                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
570            ),
571            &format!(
572                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
573                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
574            ),
575        );
576    }
577
578    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
579        for entity in &graph.entities {
580            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
581            let entity_embedding = &graph_entity_embeddings[entities_persisted];
582            entities::upsert_entity_vec(
583                &tx,
584                entity_id,
585                &namespace,
586                entity.entity_type,
587                entity_embedding,
588                &entity.name,
589            )?;
590            entities::link_memory_entity(&tx, memory_id, entity_id)?;
591            entities::increment_degree(&tx, entity_id)?;
592            entities_persisted += 1;
593        }
594        let entity_types: std::collections::HashMap<&str, EntityType> = graph
595            .entities
596            .iter()
597            .map(|entity| (entity.name.as_str(), entity.entity_type))
598            .collect();
599
600        for rel in &graph.relationships {
601            let source_entity = NewEntity {
602                name: rel.source.clone(),
603                entity_type: entity_types
604                    .get(rel.source.as_str())
605                    .copied()
606                    .unwrap_or(EntityType::Concept),
607                description: None,
608            };
609            let target_entity = NewEntity {
610                name: rel.target.clone(),
611                entity_type: entity_types
612                    .get(rel.target.as_str())
613                    .copied()
614                    .unwrap_or(EntityType::Concept),
615                description: None,
616            };
617            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
618            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
619            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
620            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
621            relationships_persisted += 1;
622        }
623    }
624    tx.commit()?;
625
626    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
627    // Failures do not propagate — non-critical path with graceful degradation.
628    let urls_persisted = if !extracted_urls.is_empty() {
629        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
630            .into_iter()
631            .map(|u| storage_urls::MemoryUrl {
632                url: u.url,
633                offset: Some(u.offset as i64),
634            })
635            .collect();
636        storage_urls::insert_urls(&conn, memory_id, &url_entries)
637    } else {
638        0
639    };
640
641    let created_at_epoch = chrono::Utc::now().timestamp();
642    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
643
644    output::emit_json(&RememberResponse {
645        memory_id,
646        // Persist the normalized (kebab-case) slug as `name` since that is the
647        // storage key. The original input is exposed via `original_name` only
648        // when normalization actually changed something (B_4 in v1.0.32).
649        name: normalized_name.clone(),
650        namespace,
651        action: action.clone(),
652        operation: action,
653        version,
654        entities_persisted,
655        relationships_persisted,
656        relationships_truncated,
657        chunks_created,
658        chunks_persisted,
659        urls_persisted,
660        extraction_method,
661        merged_into_memory_id: None,
662        warnings,
663        created_at: created_at_epoch,
664        created_at_iso,
665        elapsed_ms: inicio.elapsed().as_millis() as u64,
666        name_was_normalized,
667        original_name: name_was_normalized.then_some(original_name),
668    })?;
669
670    Ok(())
671}
672
673#[cfg(test)]
674mod tests {
675    use super::compute_chunks_persisted;
676    use crate::output::RememberResponse;
677
678    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
679    #[test]
680    fn chunks_persisted_zero_for_zero_chunks() {
681        assert_eq!(compute_chunks_persisted(0), 0);
682    }
683
684    #[test]
685    fn chunks_persisted_zero_for_single_chunk_body() {
686        // Single-chunk bodies live in the memories row itself; no row is
687        // appended to memory_chunks. This is the documented contract.
688        assert_eq!(compute_chunks_persisted(1), 0);
689    }
690
691    #[test]
692    fn chunks_persisted_equals_count_for_multi_chunk_body() {
693        // Every chunk above the first triggers a row in memory_chunks.
694        assert_eq!(compute_chunks_persisted(2), 2);
695        assert_eq!(compute_chunks_persisted(7), 7);
696        assert_eq!(compute_chunks_persisted(64), 64);
697    }
698
699    #[test]
700    fn remember_response_serializes_required_fields() {
701        let resp = RememberResponse {
702            memory_id: 42,
703            name: "minha-mem".to_string(),
704            namespace: "global".to_string(),
705            action: "created".to_string(),
706            operation: "created".to_string(),
707            version: 1,
708            entities_persisted: 0,
709            relationships_persisted: 0,
710            relationships_truncated: false,
711            chunks_created: 1,
712            chunks_persisted: 0,
713            urls_persisted: 0,
714            extraction_method: None,
715            merged_into_memory_id: None,
716            warnings: vec![],
717            created_at: 1_705_320_000,
718            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
719            elapsed_ms: 55,
720            name_was_normalized: false,
721            original_name: None,
722        };
723
724        let json = serde_json::to_value(&resp).expect("serialization failed");
725        assert_eq!(json["memory_id"], 42);
726        assert_eq!(json["action"], "created");
727        assert_eq!(json["operation"], "created");
728        assert_eq!(json["version"], 1);
729        assert_eq!(json["elapsed_ms"], 55u64);
730        assert!(json["warnings"].is_array());
731        assert!(json["merged_into_memory_id"].is_null());
732    }
733
734    #[test]
735    fn remember_response_action_e_operation_sao_aliases() {
736        let resp = RememberResponse {
737            memory_id: 1,
738            name: "mem".to_string(),
739            namespace: "global".to_string(),
740            action: "updated".to_string(),
741            operation: "updated".to_string(),
742            version: 2,
743            entities_persisted: 3,
744            relationships_persisted: 1,
745            relationships_truncated: false,
746            extraction_method: None,
747            chunks_created: 2,
748            chunks_persisted: 2,
749            urls_persisted: 0,
750            merged_into_memory_id: None,
751            warnings: vec![],
752            created_at: 0,
753            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
754            elapsed_ms: 0,
755            name_was_normalized: false,
756            original_name: None,
757        };
758
759        let json = serde_json::to_value(&resp).expect("serialization failed");
760        assert_eq!(
761            json["action"], json["operation"],
762            "action e operation devem ser iguais"
763        );
764        assert_eq!(json["entities_persisted"], 3);
765        assert_eq!(json["relationships_persisted"], 1);
766        assert_eq!(json["chunks_created"], 2);
767    }
768
769    #[test]
770    fn remember_response_warnings_lista_mensagens() {
771        let resp = RememberResponse {
772            memory_id: 5,
773            name: "dup-mem".to_string(),
774            namespace: "global".to_string(),
775            action: "created".to_string(),
776            operation: "created".to_string(),
777            version: 1,
778            entities_persisted: 0,
779            extraction_method: None,
780            relationships_persisted: 0,
781            relationships_truncated: false,
782            chunks_created: 1,
783            chunks_persisted: 0,
784            urls_persisted: 0,
785            merged_into_memory_id: None,
786            warnings: vec!["identical body already exists as memory id 3".to_string()],
787            created_at: 0,
788            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
789            elapsed_ms: 10,
790            name_was_normalized: false,
791            original_name: None,
792        };
793
794        let json = serde_json::to_value(&resp).expect("serialization failed");
795        let warnings = json["warnings"]
796            .as_array()
797            .expect("warnings deve ser array");
798        assert_eq!(warnings.len(), 1);
799        assert!(warnings[0].as_str().unwrap().contains("identical body"));
800    }
801
802    #[test]
803    fn invalid_name_reserved_prefix_returns_validation_error() {
804        use crate::errors::AppError;
805        // Validates the rejection logic for names with the "__" prefix directly
806        let nome = "__reservado";
807        let resultado: Result<(), AppError> = if nome.starts_with("__") {
808            Err(AppError::Validation(
809                crate::i18n::validation::reserved_name(),
810            ))
811        } else {
812            Ok(())
813        };
814        assert!(resultado.is_err());
815        if let Err(AppError::Validation(msg)) = resultado {
816            assert!(!msg.is_empty());
817        }
818    }
819
820    #[test]
821    fn name_too_long_returns_validation_error() {
822        use crate::errors::AppError;
823        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
824        let resultado: Result<(), AppError> =
825            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
826                Err(AppError::Validation(crate::i18n::validation::name_length(
827                    crate::constants::MAX_MEMORY_NAME_LEN,
828                )))
829            } else {
830                Ok(())
831            };
832        assert!(resultado.is_err());
833    }
834
835    #[test]
836    fn remember_response_merged_into_memory_id_some_serializes_integer() {
837        let resp = RememberResponse {
838            memory_id: 10,
839            name: "mem-mergeada".to_string(),
840            namespace: "global".to_string(),
841            action: "updated".to_string(),
842            operation: "updated".to_string(),
843            version: 3,
844            extraction_method: None,
845            entities_persisted: 0,
846            relationships_persisted: 0,
847            relationships_truncated: false,
848            chunks_created: 1,
849            chunks_persisted: 0,
850            urls_persisted: 0,
851            merged_into_memory_id: Some(7),
852            warnings: vec![],
853            created_at: 0,
854            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
855            elapsed_ms: 0,
856            name_was_normalized: false,
857            original_name: None,
858        };
859
860        let json = serde_json::to_value(&resp).expect("serialization failed");
861        assert_eq!(json["merged_into_memory_id"], 7);
862    }
863
864    #[test]
865    fn remember_response_urls_persisted_serializes_field() {
866        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
867        let resp = RememberResponse {
868            memory_id: 3,
869            name: "mem-com-urls".to_string(),
870            namespace: "global".to_string(),
871            action: "created".to_string(),
872            operation: "created".to_string(),
873            version: 1,
874            entities_persisted: 0,
875            relationships_persisted: 0,
876            relationships_truncated: false,
877            chunks_created: 1,
878            chunks_persisted: 0,
879            urls_persisted: 3,
880            extraction_method: Some("regex-only".to_string()),
881            merged_into_memory_id: None,
882            warnings: vec![],
883            created_at: 0,
884            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
885            elapsed_ms: 0,
886            name_was_normalized: false,
887            original_name: None,
888        };
889        let json = serde_json::to_value(&resp).expect("serialization failed");
890        assert_eq!(json["urls_persisted"], 3);
891    }
892
893    #[test]
894    fn empty_name_after_normalization_returns_specific_message() {
895        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
896        // must produce a distinct error message, not the "too long" message.
897        use crate::errors::AppError;
898        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
899        let normalized = normalized.trim_matches('-').to_string();
900        let resultado: Result<(), AppError> = if normalized.is_empty() {
901            Err(AppError::Validation(
902                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
903            ))
904        } else {
905            Ok(())
906        };
907        assert!(resultado.is_err());
908        if let Err(AppError::Validation(msg)) = resultado {
909            assert!(
910                msg.contains("empty after normalization"),
911                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
912            );
913        }
914    }
915
916    #[test]
917    fn name_only_underscores_after_normalization_returns_specific_message() {
918        // P0-4 regression: name consisting only of underscores normalizes to empty string.
919        use crate::errors::AppError;
920        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
921        let normalized = normalized.trim_matches('-').to_string();
922        assert!(
923            normalized.is_empty(),
924            "underscores devem normalizar para string vazia"
925        );
926        let resultado: Result<(), AppError> = if normalized.is_empty() {
927            Err(AppError::Validation(
928                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
929            ))
930        } else {
931            Ok(())
932        };
933        assert!(resultado.is_err());
934        if let Err(AppError::Validation(msg)) = resultado {
935            assert!(
936                msg.contains("empty after normalization"),
937                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
938            );
939        }
940    }
941
942    #[test]
943    fn remember_response_relationships_truncated_serializes_field() {
944        // P1-D: garante que relationships_truncated aparece no JSON como bool.
945        let resp_false = RememberResponse {
946            memory_id: 1,
947            name: "test".to_string(),
948            namespace: "global".to_string(),
949            action: "created".to_string(),
950            operation: "created".to_string(),
951            version: 1,
952            entities_persisted: 2,
953            relationships_persisted: 1,
954            relationships_truncated: false,
955            chunks_created: 1,
956            chunks_persisted: 0,
957            urls_persisted: 0,
958            extraction_method: None,
959            merged_into_memory_id: None,
960            warnings: vec![],
961            created_at: 0,
962            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
963            elapsed_ms: 0,
964            name_was_normalized: false,
965            original_name: None,
966        };
967        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
968        assert_eq!(json_false["relationships_truncated"], false);
969
970        let resp_true = RememberResponse {
971            relationships_truncated: true,
972            ..resp_false
973        };
974        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
975        assert_eq!(json_true["relationships_truncated"], true);
976    }
977}