Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34pub struct RememberArgs {
35    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
36    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
37    #[arg(long)]
38    pub name: String,
39    #[arg(
40        long,
41        value_enum,
42        long_help = "Memory kind stored in `memories.type`. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
43    )]
44    pub r#type: MemoryType,
45    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
46    #[arg(long)]
47    pub description: String,
48    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
49    /// Maximum 512000 bytes; rejected if empty without an external graph.
50    #[arg(
51        long,
52        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
53        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
54    )]
55    pub body: Option<String>,
56    #[arg(
57        long,
58        help = "Read body from a file instead of --body",
59        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
60    )]
61    pub body_file: Option<std::path::PathBuf>,
62    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
63    /// Mutually exclusive with --body, --body-file, --graph-stdin.
64    #[arg(
65        long,
66        conflicts_with_all = ["body", "body_file", "graph_stdin"]
67    )]
68    pub body_stdin: bool,
69    #[arg(
70        long,
71        help = "JSON file containing entities to associate with this memory"
72    )]
73    pub entities_file: Option<std::path::PathBuf>,
74    #[arg(
75        long,
76        help = "JSON file containing relationships to associate with this memory"
77    )]
78    pub relationships_file: Option<std::path::PathBuf>,
79    #[arg(
80        long,
81        help = "Read graph JSON (body + entities + relationships) from stdin",
82        conflicts_with_all = [
83            "body",
84            "body_file",
85            "body_stdin",
86            "entities_file",
87            "relationships_file"
88        ]
89    )]
90    pub graph_stdin: bool,
91    #[arg(long, default_value = "global")]
92    pub namespace: Option<String>,
93    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
94    #[arg(long)]
95    pub metadata: Option<String>,
96    #[arg(long, help = "JSON file containing metadata key-value pairs")]
97    pub metadata_file: Option<std::path::PathBuf>,
98    #[arg(long)]
99    pub force_merge: bool,
100    #[arg(
101        long,
102        value_name = "EPOCH_OR_RFC3339",
103        value_parser = crate::parsers::parse_expected_updated_at,
104        long_help = "Optimistic lock: reject if updated_at does not match. \
105Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
106    )]
107    pub expected_updated_at: Option<i64>,
108    #[arg(
109        long,
110        help = "Disable automatic entity/relationship extraction from body"
111    )]
112    pub skip_extraction: bool,
113    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
114    #[arg(long)]
115    pub session_id: Option<String>,
116    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
117    pub format: JsonOutputFormat,
118    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
119    pub json: bool,
120    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
121    pub db: Option<String>,
122}
123
124#[derive(Deserialize, Default)]
125#[serde(deny_unknown_fields)]
126struct GraphInput {
127    #[serde(default)]
128    body: Option<String>,
129    #[serde(default)]
130    entities: Vec<NewEntity>,
131    #[serde(default)]
132    relationships: Vec<NewRelationship>,
133}
134
135fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
136    for rel in &mut graph.relationships {
137        rel.relation = rel.relation.replace('-', "_");
138        if !is_valid_relation(&rel.relation) {
139            return Err(AppError::Validation(format!(
140                "invalid relation '{}' for relationship '{}' -> '{}'",
141                rel.relation, rel.source, rel.target
142            )));
143        }
144        if !(0.0..=1.0).contains(&rel.strength) {
145            return Err(AppError::Validation(format!(
146                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
147                rel.strength, rel.source, rel.target
148            )));
149        }
150    }
151
152    Ok(())
153}
154
155fn is_valid_relation(relation: &str) -> bool {
156    matches!(
157        relation,
158        "applies_to"
159            | "uses"
160            | "depends_on"
161            | "causes"
162            | "fixes"
163            | "contradicts"
164            | "supports"
165            | "follows"
166            | "related"
167            | "mentions"
168            | "replaces"
169            | "tracked_in"
170    )
171}
172
173pub fn run(args: RememberArgs) -> Result<(), AppError> {
174    use crate::constants::*;
175
176    let inicio = std::time::Instant::now();
177    let _ = args.format;
178    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
179
180    // Capture the original `--name` before normalization so the JSON response can
181    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
182    // an owned String because `args.name` is moved into the response below.
183    let original_name = args.name.clone();
184
185    // Auto-normalize to kebab-case before validation (P2-H).
186    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
187    // after truncation by a long filename ending in a hyphen.
188    let normalized_name = {
189        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
190        let trimmed = lower.trim_matches('-').to_string();
191        if trimmed != args.name {
192            tracing::warn!(
193                original = %args.name,
194                normalized = %trimmed,
195                "name auto-normalized to kebab-case"
196            );
197        }
198        trimmed
199    };
200    let name_was_normalized = normalized_name != original_name;
201
202    if normalized_name.is_empty() {
203        return Err(AppError::Validation(
204            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
205        ));
206    }
207    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
208        return Err(AppError::LimitExceeded(
209            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
210        ));
211    }
212
213    if normalized_name.starts_with("__") {
214        return Err(AppError::Validation(
215            crate::i18n::validation::reserved_name(),
216        ));
217    }
218
219    {
220        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
221            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
222        if !slug_re.is_match(&normalized_name) {
223            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
224                &normalized_name,
225            )));
226        }
227    }
228
229    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
230        return Err(AppError::Validation(
231            crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
232        ));
233    }
234
235    let mut raw_body = if let Some(b) = args.body {
236        b
237    } else if let Some(path) = args.body_file {
238        std::fs::read_to_string(&path).map_err(AppError::Io)?
239    } else if args.body_stdin || args.graph_stdin {
240        crate::stdin_helper::read_stdin_with_timeout(60)?
241    } else {
242        String::new()
243    };
244
245    let entities_provided_externally =
246        args.entities_file.is_some() || args.relationships_file.is_some() || args.graph_stdin;
247
248    let mut graph = GraphInput::default();
249    if let Some(path) = args.entities_file {
250        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
251        graph.entities = serde_json::from_str(&content)?;
252    }
253    if let Some(path) = args.relationships_file {
254        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
255        graph.relationships = serde_json::from_str(&content)?;
256    }
257    if args.graph_stdin {
258        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
259            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
260        })?;
261        raw_body = graph.body.take().unwrap_or_default();
262    }
263
264    if graph.entities.len() > max_entities_per_memory() {
265        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
266            max_entities_per_memory(),
267        )));
268    }
269    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
270        return Err(AppError::LimitExceeded(
271            errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
272        ));
273    }
274    normalize_and_validate_graph_input(&mut graph)?;
275
276    if raw_body.len() > MAX_MEMORY_BODY_LEN {
277        return Err(AppError::LimitExceeded(
278            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
279        ));
280    }
281
282    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
283    // Without this check, empty embeddings would be persisted, breaking recall semantics.
284    if !entities_provided_externally && graph.entities.is_empty() && raw_body.trim().is_empty() {
285        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
286    }
287
288    let metadata: serde_json::Value = if let Some(m) = args.metadata {
289        serde_json::from_str(&m)?
290    } else if let Some(path) = args.metadata_file {
291        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
292        serde_json::from_str(&content)?
293    } else {
294        serde_json::json!({})
295    };
296
297    let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
298    let snippet: String = raw_body.chars().take(200).collect();
299
300    let paths = AppPaths::resolve(args.db.as_deref())?;
301    paths.ensure_dirs()?;
302
303    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
304    let mut extraction_method: Option<String> = None;
305    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::new();
306    let mut relationships_truncated = false;
307    if !args.skip_extraction
308        && !entities_provided_externally
309        && graph.entities.is_empty()
310        && !raw_body.trim().is_empty()
311    {
312        match crate::extraction::extract_graph_auto(&raw_body, &paths) {
313            Ok(extracted) => {
314                extraction_method = Some(extracted.extraction_method.clone());
315                extracted_urls = extracted.urls;
316                graph.entities = extracted.entities;
317                graph.relationships = extracted.relationships;
318                relationships_truncated = extracted.relationships_truncated;
319
320                if graph.entities.len() > max_entities_per_memory() {
321                    graph.entities.truncate(max_entities_per_memory());
322                }
323                if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
324                    relationships_truncated = true;
325                    graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
326                }
327                normalize_and_validate_graph_input(&mut graph)?;
328            }
329            Err(e) => {
330                tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
331            }
332        }
333    }
334
335    let mut conn = open_rw(&paths.db)?;
336    ensure_schema(&mut conn)?;
337
338    {
339        use crate::constants::MAX_NAMESPACES_ACTIVE;
340        let active_count: u32 = conn.query_row(
341            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
342            [],
343            |r| r.get::<_, i64>(0).map(|v| v as u32),
344        )?;
345        let ns_exists: bool = conn.query_row(
346            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
347            rusqlite::params![namespace],
348            |r| r.get::<_, i64>(0).map(|v| v > 0),
349        )?;
350        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
351            return Err(AppError::NamespaceError(format!(
352                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
353            )));
354        }
355    }
356
357    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
358    if existing_memory.is_some() && !args.force_merge {
359        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
360            &normalized_name,
361            &namespace,
362        )));
363    }
364
365    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
366
367    output::emit_progress_i18n(
368        &format!(
369            "Remember stage: validated input; available memory {} MB",
370            crate::memory_guard::available_memory_mb()
371        ),
372        &format!(
373            "Stage remember: input validated; available memory {} MB",
374            crate::memory_guard::available_memory_mb()
375        ),
376    );
377
378    let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
379    let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
380    let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
381    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
382    let chunks_created = chunks_info.len();
383    // For single-chunk bodies the memory row itself stores the content and no
384    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
385    // bodies every chunk is persisted via `insert_chunk_slices`.
386    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
387
388    output::emit_progress_i18n(
389        &format!(
390            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
391            chunks_created,
392            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
393        ),
394        &format!(
395            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
396            chunks_created,
397            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
398        ),
399    );
400
401    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
402        return Err(AppError::LimitExceeded(format!(
403            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
404            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
405        )));
406    }
407
408    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
409    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
410
411    let embedding = if chunks_info.len() == 1 {
412        crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
413    } else {
414        let chunk_texts: Vec<&str> = chunks_info
415            .iter()
416            .map(|c| chunking::chunk_text(&raw_body, c))
417            .collect();
418        output::emit_progress_i18n(
419            &format!(
420                "Embedding {} chunks serially to keep memory bounded...",
421                chunks_info.len()
422            ),
423            &format!(
424                "Embedding {} chunks serially to keep memory bounded...",
425                chunks_info.len()
426            ),
427        );
428        let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
429        for chunk_text in &chunk_texts {
430            chunk_embeddings.push(crate::daemon::embed_passage_or_local(
431                &paths.models,
432                chunk_text,
433            )?);
434        }
435        output::emit_progress_i18n(
436            &format!(
437                "Remember stage: chunk embeddings complete; process RSS {} MB",
438                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
439            ),
440            &format!(
441                "Stage remember: chunk embeddings completed; process RSS {} MB",
442                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
443            ),
444        );
445        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
446        chunk_embeddings_cache = Some(chunk_embeddings);
447        aggregated
448    };
449    let body_for_storage = raw_body;
450
451    let memory_type = args.r#type.as_str();
452    let new_memory = NewMemory {
453        namespace: namespace.clone(),
454        name: normalized_name.clone(),
455        memory_type: memory_type.to_string(),
456        description: args.description.clone(),
457        body: body_for_storage,
458        body_hash: body_hash.clone(),
459        session_id: args.session_id.clone(),
460        source: "agent".to_string(),
461        metadata,
462    };
463
464    let mut warnings = Vec::new();
465    let mut entities_persisted = 0usize;
466    let mut relationships_persisted = 0usize;
467
468    let graph_entity_embeddings = graph
469        .entities
470        .iter()
471        .map(|entity| {
472            let entity_text = match &entity.description {
473                Some(desc) => format!("{} {}", entity.name, desc),
474                None => entity.name.clone(),
475            };
476            crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
477        })
478        .collect::<Result<Vec<_>, _>>()?;
479
480    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
481
482    let (memory_id, action, version) = match existing_memory {
483        Some((existing_id, _updated_at, _current_version)) => {
484            if let Some(hash_id) = duplicate_hash_id {
485                if hash_id != existing_id {
486                    warnings.push(format!(
487                        "identical body already exists as memory id {hash_id}"
488                    ));
489                }
490            }
491
492            storage_chunks::delete_chunks(&tx, existing_id)?;
493
494            let next_v = versions::next_version(&tx, existing_id)?;
495            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
496            versions::insert_version(
497                &tx,
498                existing_id,
499                next_v,
500                &normalized_name,
501                memory_type,
502                &args.description,
503                &new_memory.body,
504                &serde_json::to_string(&new_memory.metadata)?,
505                None,
506                "edit",
507            )?;
508            memories::upsert_vec(
509                &tx,
510                existing_id,
511                &namespace,
512                memory_type,
513                &embedding,
514                &normalized_name,
515                &snippet,
516            )?;
517            (existing_id, "updated".to_string(), next_v)
518        }
519        None => {
520            if let Some(hash_id) = duplicate_hash_id {
521                warnings.push(format!(
522                    "identical body already exists as memory id {hash_id}"
523                ));
524            }
525            let id = memories::insert(&tx, &new_memory)?;
526            versions::insert_version(
527                &tx,
528                id,
529                1,
530                &normalized_name,
531                memory_type,
532                &args.description,
533                &new_memory.body,
534                &serde_json::to_string(&new_memory.metadata)?,
535                None,
536                "create",
537            )?;
538            memories::upsert_vec(
539                &tx,
540                id,
541                &namespace,
542                memory_type,
543                &embedding,
544                &normalized_name,
545                &snippet,
546            )?;
547            (id, "created".to_string(), 1)
548        }
549    };
550
551    if chunks_info.len() > 1 {
552        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
553
554        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
555            AppError::Internal(anyhow::anyhow!(
556                "cache de embeddings de chunks ausente no caminho multi-chunk do remember"
557            ))
558        })?;
559
560        for (i, emb) in chunk_embeddings.iter().enumerate() {
561            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
562        }
563        output::emit_progress_i18n(
564            &format!(
565                "Remember stage: persisted chunk vectors; process RSS {} MB",
566                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
567            ),
568            &format!(
569                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
570                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
571            ),
572        );
573    }
574
575    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
576        for entity in &graph.entities {
577            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
578            let entity_embedding = &graph_entity_embeddings[entities_persisted];
579            entities::upsert_entity_vec(
580                &tx,
581                entity_id,
582                &namespace,
583                entity.entity_type,
584                entity_embedding,
585                &entity.name,
586            )?;
587            entities::link_memory_entity(&tx, memory_id, entity_id)?;
588            entities::increment_degree(&tx, entity_id)?;
589            entities_persisted += 1;
590        }
591        let entity_types: std::collections::HashMap<&str, EntityType> = graph
592            .entities
593            .iter()
594            .map(|entity| (entity.name.as_str(), entity.entity_type))
595            .collect();
596
597        for rel in &graph.relationships {
598            let source_entity = NewEntity {
599                name: rel.source.clone(),
600                entity_type: entity_types
601                    .get(rel.source.as_str())
602                    .copied()
603                    .unwrap_or(EntityType::Concept),
604                description: None,
605            };
606            let target_entity = NewEntity {
607                name: rel.target.clone(),
608                entity_type: entity_types
609                    .get(rel.target.as_str())
610                    .copied()
611                    .unwrap_or(EntityType::Concept),
612                description: None,
613            };
614            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
615            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
616            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
617            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
618            relationships_persisted += 1;
619        }
620    }
621    tx.commit()?;
622
623    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
624    // Failures do not propagate — non-critical path with graceful degradation.
625    let urls_persisted = if !extracted_urls.is_empty() {
626        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
627            .into_iter()
628            .map(|u| storage_urls::MemoryUrl {
629                url: u.url,
630                offset: Some(u.offset as i64),
631            })
632            .collect();
633        storage_urls::insert_urls(&conn, memory_id, &url_entries)
634    } else {
635        0
636    };
637
638    let created_at_epoch = chrono::Utc::now().timestamp();
639    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
640
641    output::emit_json(&RememberResponse {
642        memory_id,
643        // Persist the normalized (kebab-case) slug as `name` since that is the
644        // storage key. The original input is exposed via `original_name` only
645        // when normalization actually changed something (B_4 in v1.0.32).
646        name: normalized_name.clone(),
647        namespace,
648        action: action.clone(),
649        operation: action,
650        version,
651        entities_persisted,
652        relationships_persisted,
653        relationships_truncated,
654        chunks_created,
655        chunks_persisted,
656        urls_persisted,
657        extraction_method,
658        merged_into_memory_id: None,
659        warnings,
660        created_at: created_at_epoch,
661        created_at_iso,
662        elapsed_ms: inicio.elapsed().as_millis() as u64,
663        name_was_normalized,
664        original_name: name_was_normalized.then_some(original_name),
665    })?;
666
667    Ok(())
668}
669
670#[cfg(test)]
671mod tests {
672    use super::compute_chunks_persisted;
673    use crate::output::RememberResponse;
674
675    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
676    #[test]
677    fn chunks_persisted_zero_for_zero_chunks() {
678        assert_eq!(compute_chunks_persisted(0), 0);
679    }
680
681    #[test]
682    fn chunks_persisted_zero_for_single_chunk_body() {
683        // Single-chunk bodies live in the memories row itself; no row is
684        // appended to memory_chunks. This is the documented contract.
685        assert_eq!(compute_chunks_persisted(1), 0);
686    }
687
688    #[test]
689    fn chunks_persisted_equals_count_for_multi_chunk_body() {
690        // Every chunk above the first triggers a row in memory_chunks.
691        assert_eq!(compute_chunks_persisted(2), 2);
692        assert_eq!(compute_chunks_persisted(7), 7);
693        assert_eq!(compute_chunks_persisted(64), 64);
694    }
695
696    #[test]
697    fn remember_response_serializes_required_fields() {
698        let resp = RememberResponse {
699            memory_id: 42,
700            name: "minha-mem".to_string(),
701            namespace: "global".to_string(),
702            action: "created".to_string(),
703            operation: "created".to_string(),
704            version: 1,
705            entities_persisted: 0,
706            relationships_persisted: 0,
707            relationships_truncated: false,
708            chunks_created: 1,
709            chunks_persisted: 0,
710            urls_persisted: 0,
711            extraction_method: None,
712            merged_into_memory_id: None,
713            warnings: vec![],
714            created_at: 1_705_320_000,
715            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
716            elapsed_ms: 55,
717            name_was_normalized: false,
718            original_name: None,
719        };
720
721        let json = serde_json::to_value(&resp).expect("serialization failed");
722        assert_eq!(json["memory_id"], 42);
723        assert_eq!(json["action"], "created");
724        assert_eq!(json["operation"], "created");
725        assert_eq!(json["version"], 1);
726        assert_eq!(json["elapsed_ms"], 55u64);
727        assert!(json["warnings"].is_array());
728        assert!(json["merged_into_memory_id"].is_null());
729    }
730
731    #[test]
732    fn remember_response_action_e_operation_sao_aliases() {
733        let resp = RememberResponse {
734            memory_id: 1,
735            name: "mem".to_string(),
736            namespace: "global".to_string(),
737            action: "updated".to_string(),
738            operation: "updated".to_string(),
739            version: 2,
740            entities_persisted: 3,
741            relationships_persisted: 1,
742            relationships_truncated: false,
743            extraction_method: None,
744            chunks_created: 2,
745            chunks_persisted: 2,
746            urls_persisted: 0,
747            merged_into_memory_id: None,
748            warnings: vec![],
749            created_at: 0,
750            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
751            elapsed_ms: 0,
752            name_was_normalized: false,
753            original_name: None,
754        };
755
756        let json = serde_json::to_value(&resp).expect("serialization failed");
757        assert_eq!(
758            json["action"], json["operation"],
759            "action e operation devem ser iguais"
760        );
761        assert_eq!(json["entities_persisted"], 3);
762        assert_eq!(json["relationships_persisted"], 1);
763        assert_eq!(json["chunks_created"], 2);
764    }
765
766    #[test]
767    fn remember_response_warnings_lista_mensagens() {
768        let resp = RememberResponse {
769            memory_id: 5,
770            name: "dup-mem".to_string(),
771            namespace: "global".to_string(),
772            action: "created".to_string(),
773            operation: "created".to_string(),
774            version: 1,
775            entities_persisted: 0,
776            extraction_method: None,
777            relationships_persisted: 0,
778            relationships_truncated: false,
779            chunks_created: 1,
780            chunks_persisted: 0,
781            urls_persisted: 0,
782            merged_into_memory_id: None,
783            warnings: vec!["identical body already exists as memory id 3".to_string()],
784            created_at: 0,
785            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
786            elapsed_ms: 10,
787            name_was_normalized: false,
788            original_name: None,
789        };
790
791        let json = serde_json::to_value(&resp).expect("serialization failed");
792        let warnings = json["warnings"]
793            .as_array()
794            .expect("warnings deve ser array");
795        assert_eq!(warnings.len(), 1);
796        assert!(warnings[0].as_str().unwrap().contains("identical body"));
797    }
798
799    #[test]
800    fn invalid_name_reserved_prefix_returns_validation_error() {
801        use crate::errors::AppError;
802        // Validates the rejection logic for names with the "__" prefix directly
803        let nome = "__reservado";
804        let resultado: Result<(), AppError> = if nome.starts_with("__") {
805            Err(AppError::Validation(
806                crate::i18n::validation::reserved_name(),
807            ))
808        } else {
809            Ok(())
810        };
811        assert!(resultado.is_err());
812        if let Err(AppError::Validation(msg)) = resultado {
813            assert!(!msg.is_empty());
814        }
815    }
816
817    #[test]
818    fn name_too_long_returns_validation_error() {
819        use crate::errors::AppError;
820        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
821        let resultado: Result<(), AppError> =
822            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
823                Err(AppError::Validation(crate::i18n::validation::name_length(
824                    crate::constants::MAX_MEMORY_NAME_LEN,
825                )))
826            } else {
827                Ok(())
828            };
829        assert!(resultado.is_err());
830    }
831
832    #[test]
833    fn remember_response_merged_into_memory_id_some_serializes_integer() {
834        let resp = RememberResponse {
835            memory_id: 10,
836            name: "mem-mergeada".to_string(),
837            namespace: "global".to_string(),
838            action: "updated".to_string(),
839            operation: "updated".to_string(),
840            version: 3,
841            extraction_method: None,
842            entities_persisted: 0,
843            relationships_persisted: 0,
844            relationships_truncated: false,
845            chunks_created: 1,
846            chunks_persisted: 0,
847            urls_persisted: 0,
848            merged_into_memory_id: Some(7),
849            warnings: vec![],
850            created_at: 0,
851            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
852            elapsed_ms: 0,
853            name_was_normalized: false,
854            original_name: None,
855        };
856
857        let json = serde_json::to_value(&resp).expect("serialization failed");
858        assert_eq!(json["merged_into_memory_id"], 7);
859    }
860
861    #[test]
862    fn remember_response_urls_persisted_serializes_field() {
863        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
864        let resp = RememberResponse {
865            memory_id: 3,
866            name: "mem-com-urls".to_string(),
867            namespace: "global".to_string(),
868            action: "created".to_string(),
869            operation: "created".to_string(),
870            version: 1,
871            entities_persisted: 0,
872            relationships_persisted: 0,
873            relationships_truncated: false,
874            chunks_created: 1,
875            chunks_persisted: 0,
876            urls_persisted: 3,
877            extraction_method: Some("regex-only".to_string()),
878            merged_into_memory_id: None,
879            warnings: vec![],
880            created_at: 0,
881            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
882            elapsed_ms: 0,
883            name_was_normalized: false,
884            original_name: None,
885        };
886        let json = serde_json::to_value(&resp).expect("serialization failed");
887        assert_eq!(json["urls_persisted"], 3);
888    }
889
890    #[test]
891    fn empty_name_after_normalization_returns_specific_message() {
892        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
893        // must produce a distinct error message, not the "too long" message.
894        use crate::errors::AppError;
895        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
896        let normalized = normalized.trim_matches('-').to_string();
897        let resultado: Result<(), AppError> = if normalized.is_empty() {
898            Err(AppError::Validation(
899                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
900            ))
901        } else {
902            Ok(())
903        };
904        assert!(resultado.is_err());
905        if let Err(AppError::Validation(msg)) = resultado {
906            assert!(
907                msg.contains("empty after normalization"),
908                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
909            );
910        }
911    }
912
913    #[test]
914    fn name_only_underscores_after_normalization_returns_specific_message() {
915        // P0-4 regression: name consisting only of underscores normalizes to empty string.
916        use crate::errors::AppError;
917        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
918        let normalized = normalized.trim_matches('-').to_string();
919        assert!(
920            normalized.is_empty(),
921            "underscores devem normalizar para string vazia"
922        );
923        let resultado: Result<(), AppError> = if normalized.is_empty() {
924            Err(AppError::Validation(
925                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
926            ))
927        } else {
928            Ok(())
929        };
930        assert!(resultado.is_err());
931        if let Err(AppError::Validation(msg)) = resultado {
932            assert!(
933                msg.contains("empty after normalization"),
934                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
935            );
936        }
937    }
938
939    #[test]
940    fn remember_response_relationships_truncated_serializes_field() {
941        // P1-D: garante que relationships_truncated aparece no JSON como bool.
942        let resp_false = RememberResponse {
943            memory_id: 1,
944            name: "test".to_string(),
945            namespace: "global".to_string(),
946            action: "created".to_string(),
947            operation: "created".to_string(),
948            version: 1,
949            entities_persisted: 2,
950            relationships_persisted: 1,
951            relationships_truncated: false,
952            chunks_created: 1,
953            chunks_persisted: 0,
954            urls_persisted: 0,
955            extraction_method: None,
956            merged_into_memory_id: None,
957            warnings: vec![],
958            created_at: 0,
959            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
960            elapsed_ms: 0,
961            name_was_normalized: false,
962            original_name: None,
963        };
964        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
965        assert_eq!(json_false["relationships_truncated"], false);
966
967        let resp_true = RememberResponse {
968            relationships_truncated: true,
969            ..resp_false
970        };
971        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
972        assert_eq!(json_true["relationships_truncated"], true);
973    }
974}