Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::errors::AppError;
6use crate::i18n::errors_msg;
7use crate::output::{self, JsonOutputFormat, RememberResponse};
8use crate::paths::AppPaths;
9use crate::storage::chunks as storage_chunks;
10use crate::storage::connection::{ensure_schema, open_rw};
11use crate::storage::entities::{NewEntity, NewRelationship};
12use crate::storage::memories::NewMemory;
13use crate::storage::{entities, memories, urls as storage_urls, versions};
14use serde::Deserialize;
15
16/// Returns the number of rows that will be written to `memory_chunks` for the
17/// given chunk count. Single-chunk bodies are stored directly in the
18/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
19/// bodies persist every chunk and the count equals `chunks_created`.
20///
21/// Centralized as a function so the H-M8 invariant is unit-testable without
22/// running the full handler. The schema for `chunks_persisted` documents this
23/// contract explicitly (see `docs/schemas/remember.schema.json`).
24fn compute_chunks_persisted(chunks_created: usize) -> usize {
25    if chunks_created > 1 {
26        chunks_created
27    } else {
28        0
29    }
30}
31
32#[derive(clap::Args)]
33pub struct RememberArgs {
34    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
35    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
36    #[arg(long)]
37    pub name: String,
38    #[arg(
39        long,
40        value_enum,
41        long_help = "Memory kind stored in `memories.type`. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
42    )]
43    pub r#type: MemoryType,
44    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
45    #[arg(long)]
46    pub description: String,
47    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
48    /// Maximum 512000 bytes; rejected if empty without an external graph.
49    #[arg(
50        long,
51        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
52        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
53    )]
54    pub body: Option<String>,
55    #[arg(
56        long,
57        help = "Read body from a file instead of --body",
58        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
59    )]
60    pub body_file: Option<std::path::PathBuf>,
61    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
62    /// Mutually exclusive with --body, --body-file, --graph-stdin.
63    #[arg(
64        long,
65        conflicts_with_all = ["body", "body_file", "graph_stdin"]
66    )]
67    pub body_stdin: bool,
68    #[arg(
69        long,
70        help = "JSON file containing entities to associate with this memory"
71    )]
72    pub entities_file: Option<std::path::PathBuf>,
73    #[arg(
74        long,
75        help = "JSON file containing relationships to associate with this memory"
76    )]
77    pub relationships_file: Option<std::path::PathBuf>,
78    #[arg(
79        long,
80        help = "Read graph JSON (body + entities + relationships) from stdin",
81        conflicts_with_all = [
82            "body",
83            "body_file",
84            "body_stdin",
85            "entities_file",
86            "relationships_file"
87        ]
88    )]
89    pub graph_stdin: bool,
90    #[arg(long, default_value = "global")]
91    pub namespace: Option<String>,
92    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
93    #[arg(long)]
94    pub metadata: Option<String>,
95    #[arg(long, help = "JSON file containing metadata key-value pairs")]
96    pub metadata_file: Option<std::path::PathBuf>,
97    #[arg(long)]
98    pub force_merge: bool,
99    #[arg(
100        long,
101        value_name = "EPOCH_OR_RFC3339",
102        value_parser = crate::parsers::parse_expected_updated_at,
103        long_help = "Optimistic lock: reject if updated_at does not match. \
104Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
105    )]
106    pub expected_updated_at: Option<i64>,
107    #[arg(
108        long,
109        help = "Disable automatic entity/relationship extraction from body"
110    )]
111    pub skip_extraction: bool,
112    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
113    #[arg(long)]
114    pub session_id: Option<String>,
115    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
116    pub format: JsonOutputFormat,
117    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
118    pub json: bool,
119    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
120    pub db: Option<String>,
121}
122
123#[derive(Deserialize, Default)]
124#[serde(deny_unknown_fields)]
125struct GraphInput {
126    #[serde(default)]
127    body: Option<String>,
128    #[serde(default)]
129    entities: Vec<NewEntity>,
130    #[serde(default)]
131    relationships: Vec<NewRelationship>,
132}
133
134fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
135    for entity in &graph.entities {
136        if !is_valid_entity_type(&entity.entity_type) {
137            return Err(AppError::Validation(format!(
138                "invalid entity_type '{}' for entity '{}'",
139                entity.entity_type, entity.name
140            )));
141        }
142    }
143
144    for rel in &mut graph.relationships {
145        rel.relation = rel.relation.replace('-', "_");
146        if !is_valid_relation(&rel.relation) {
147            return Err(AppError::Validation(format!(
148                "invalid relation '{}' for relationship '{}' -> '{}'",
149                rel.relation, rel.source, rel.target
150            )));
151        }
152        if !(0.0..=1.0).contains(&rel.strength) {
153            return Err(AppError::Validation(format!(
154                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
155                rel.strength, rel.source, rel.target
156            )));
157        }
158    }
159
160    Ok(())
161}
162
163fn is_valid_entity_type(entity_type: &str) -> bool {
164    matches!(
165        entity_type,
166        "project"
167            | "tool"
168            | "person"
169            | "file"
170            | "concept"
171            | "incident"
172            | "decision"
173            | "memory"
174            | "dashboard"
175            | "issue_tracker"
176            | "organization"
177            | "location"
178            | "date"
179    )
180}
181
182fn is_valid_relation(relation: &str) -> bool {
183    matches!(
184        relation,
185        "applies_to"
186            | "uses"
187            | "depends_on"
188            | "causes"
189            | "fixes"
190            | "contradicts"
191            | "supports"
192            | "follows"
193            | "related"
194            | "mentions"
195            | "replaces"
196            | "tracked_in"
197    )
198}
199
200pub fn run(args: RememberArgs) -> Result<(), AppError> {
201    use crate::constants::*;
202
203    let inicio = std::time::Instant::now();
204    let _ = args.format;
205    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
206
207    // Capture the original `--name` before normalization so the JSON response can
208    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
209    // an owned String because `args.name` is moved into the response below.
210    let original_name = args.name.clone();
211
212    // Auto-normalize to kebab-case before validation (P2-H).
213    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
214    // after truncation by a long filename ending in a hyphen.
215    let normalized_name = {
216        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
217        let trimmed = lower.trim_matches('-').to_string();
218        if trimmed != args.name {
219            tracing::warn!(
220                original = %args.name,
221                normalized = %trimmed,
222                "name auto-normalized to kebab-case"
223            );
224        }
225        trimmed
226    };
227    let name_was_normalized = normalized_name != original_name;
228
229    if normalized_name.is_empty() {
230        return Err(AppError::Validation(
231            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
232        ));
233    }
234    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
235        return Err(AppError::LimitExceeded(
236            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
237        ));
238    }
239
240    if normalized_name.starts_with("__") {
241        return Err(AppError::Validation(
242            crate::i18n::validation::reserved_name(),
243        ));
244    }
245
246    {
247        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
248            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
249        if !slug_re.is_match(&normalized_name) {
250            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
251                &normalized_name,
252            )));
253        }
254    }
255
256    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
257        return Err(AppError::Validation(
258            crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
259        ));
260    }
261
262    let mut raw_body = if let Some(b) = args.body {
263        b
264    } else if let Some(path) = args.body_file {
265        std::fs::read_to_string(&path).map_err(AppError::Io)?
266    } else if args.body_stdin || args.graph_stdin {
267        crate::stdin_helper::read_stdin_with_timeout(60)?
268    } else {
269        String::new()
270    };
271
272    let entities_provided_externally =
273        args.entities_file.is_some() || args.relationships_file.is_some() || args.graph_stdin;
274
275    let mut graph = GraphInput::default();
276    if let Some(path) = args.entities_file {
277        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
278        graph.entities = serde_json::from_str(&content)?;
279    }
280    if let Some(path) = args.relationships_file {
281        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
282        graph.relationships = serde_json::from_str(&content)?;
283    }
284    if args.graph_stdin {
285        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
286            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
287        })?;
288        raw_body = graph.body.take().unwrap_or_default();
289    }
290
291    if graph.entities.len() > MAX_ENTITIES_PER_MEMORY {
292        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
293            MAX_ENTITIES_PER_MEMORY,
294        )));
295    }
296    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
297        return Err(AppError::LimitExceeded(
298            errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
299        ));
300    }
301    normalize_and_validate_graph_input(&mut graph)?;
302
303    if raw_body.len() > MAX_MEMORY_BODY_LEN {
304        return Err(AppError::LimitExceeded(
305            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
306        ));
307    }
308
309    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
310    // Without this check, empty embeddings would be persisted, breaking recall semantics.
311    if !entities_provided_externally && graph.entities.is_empty() && raw_body.trim().is_empty() {
312        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
313    }
314
315    let metadata: serde_json::Value = if let Some(m) = args.metadata {
316        serde_json::from_str(&m)?
317    } else if let Some(path) = args.metadata_file {
318        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
319        serde_json::from_str(&content)?
320    } else {
321        serde_json::json!({})
322    };
323
324    let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
325    let snippet: String = raw_body.chars().take(200).collect();
326
327    let paths = AppPaths::resolve(args.db.as_deref())?;
328    paths.ensure_dirs()?;
329
330    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
331    let mut extraction_method: Option<String> = None;
332    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::new();
333    let mut relationships_truncated = false;
334    if !args.skip_extraction
335        && !entities_provided_externally
336        && graph.entities.is_empty()
337        && !raw_body.trim().is_empty()
338    {
339        match crate::extraction::extract_graph_auto(&raw_body, &paths) {
340            Ok(extracted) => {
341                extraction_method = Some(extracted.extraction_method.clone());
342                extracted_urls = extracted.urls;
343                graph.entities = extracted.entities;
344                graph.relationships = extracted.relationships;
345                relationships_truncated = extracted.relationships_truncated;
346
347                if graph.entities.len() > MAX_ENTITIES_PER_MEMORY {
348                    graph.entities.truncate(MAX_ENTITIES_PER_MEMORY);
349                }
350                if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
351                    relationships_truncated = true;
352                    graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
353                }
354                normalize_and_validate_graph_input(&mut graph)?;
355            }
356            Err(e) => {
357                tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
358            }
359        }
360    }
361
362    let mut conn = open_rw(&paths.db)?;
363    ensure_schema(&mut conn)?;
364
365    {
366        use crate::constants::MAX_NAMESPACES_ACTIVE;
367        let active_count: u32 = conn.query_row(
368            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
369            [],
370            |r| r.get::<_, i64>(0).map(|v| v as u32),
371        )?;
372        let ns_exists: bool = conn.query_row(
373            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
374            rusqlite::params![namespace],
375            |r| r.get::<_, i64>(0).map(|v| v > 0),
376        )?;
377        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
378            return Err(AppError::NamespaceError(format!(
379                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
380            )));
381        }
382    }
383
384    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
385    if existing_memory.is_some() && !args.force_merge {
386        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
387            &normalized_name,
388            &namespace,
389        )));
390    }
391
392    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
393
394    output::emit_progress_i18n(
395        &format!(
396            "Remember stage: validated input; available memory {} MB",
397            crate::memory_guard::available_memory_mb()
398        ),
399        &format!(
400            "Stage remember: input validated; available memory {} MB",
401            crate::memory_guard::available_memory_mb()
402        ),
403    );
404
405    let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
406    let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
407    let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
408    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
409    let chunks_created = chunks_info.len();
410    // For single-chunk bodies the memory row itself stores the content and no
411    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
412    // bodies every chunk is persisted via `insert_chunk_slices`.
413    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
414
415    output::emit_progress_i18n(
416        &format!(
417            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
418            chunks_created,
419            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
420        ),
421        &format!(
422            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
423            chunks_created,
424            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
425        ),
426    );
427
428    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
429        return Err(AppError::LimitExceeded(format!(
430            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
431            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
432        )));
433    }
434
435    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
436    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
437
438    let embedding = if chunks_info.len() == 1 {
439        crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
440    } else {
441        let chunk_texts: Vec<&str> = chunks_info
442            .iter()
443            .map(|c| chunking::chunk_text(&raw_body, c))
444            .collect();
445        output::emit_progress_i18n(
446            &format!(
447                "Embedding {} chunks serially to keep memory bounded...",
448                chunks_info.len()
449            ),
450            &format!(
451                "Embedding {} chunks serially to keep memory bounded...",
452                chunks_info.len()
453            ),
454        );
455        let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
456        for chunk_text in &chunk_texts {
457            chunk_embeddings.push(crate::daemon::embed_passage_or_local(
458                &paths.models,
459                chunk_text,
460            )?);
461        }
462        output::emit_progress_i18n(
463            &format!(
464                "Remember stage: chunk embeddings complete; process RSS {} MB",
465                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
466            ),
467            &format!(
468                "Stage remember: chunk embeddings completed; process RSS {} MB",
469                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
470            ),
471        );
472        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
473        chunk_embeddings_cache = Some(chunk_embeddings);
474        aggregated
475    };
476    let body_for_storage = raw_body;
477
478    let memory_type = args.r#type.as_str();
479    let new_memory = NewMemory {
480        namespace: namespace.clone(),
481        name: normalized_name.clone(),
482        memory_type: memory_type.to_string(),
483        description: args.description.clone(),
484        body: body_for_storage,
485        body_hash: body_hash.clone(),
486        session_id: args.session_id.clone(),
487        source: "agent".to_string(),
488        metadata,
489    };
490
491    let mut warnings = Vec::new();
492    let mut entities_persisted = 0usize;
493    let mut relationships_persisted = 0usize;
494
495    let graph_entity_embeddings = graph
496        .entities
497        .iter()
498        .map(|entity| {
499            let entity_text = match &entity.description {
500                Some(desc) => format!("{} {}", entity.name, desc),
501                None => entity.name.clone(),
502            };
503            crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
504        })
505        .collect::<Result<Vec<_>, _>>()?;
506
507    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
508
509    let (memory_id, action, version) = match existing_memory {
510        Some((existing_id, _updated_at, _current_version)) => {
511            if let Some(hash_id) = duplicate_hash_id {
512                if hash_id != existing_id {
513                    warnings.push(format!(
514                        "identical body already exists as memory id {hash_id}"
515                    ));
516                }
517            }
518
519            storage_chunks::delete_chunks(&tx, existing_id)?;
520
521            let next_v = versions::next_version(&tx, existing_id)?;
522            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
523            versions::insert_version(
524                &tx,
525                existing_id,
526                next_v,
527                &normalized_name,
528                memory_type,
529                &args.description,
530                &new_memory.body,
531                &serde_json::to_string(&new_memory.metadata)?,
532                None,
533                "edit",
534            )?;
535            memories::upsert_vec(
536                &tx,
537                existing_id,
538                &namespace,
539                memory_type,
540                &embedding,
541                &normalized_name,
542                &snippet,
543            )?;
544            (existing_id, "updated".to_string(), next_v)
545        }
546        None => {
547            if let Some(hash_id) = duplicate_hash_id {
548                warnings.push(format!(
549                    "identical body already exists as memory id {hash_id}"
550                ));
551            }
552            let id = memories::insert(&tx, &new_memory)?;
553            versions::insert_version(
554                &tx,
555                id,
556                1,
557                &normalized_name,
558                memory_type,
559                &args.description,
560                &new_memory.body,
561                &serde_json::to_string(&new_memory.metadata)?,
562                None,
563                "create",
564            )?;
565            memories::upsert_vec(
566                &tx,
567                id,
568                &namespace,
569                memory_type,
570                &embedding,
571                &normalized_name,
572                &snippet,
573            )?;
574            (id, "created".to_string(), 1)
575        }
576    };
577
578    if chunks_info.len() > 1 {
579        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
580
581        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
582            AppError::Internal(anyhow::anyhow!(
583                "cache de embeddings de chunks ausente no caminho multi-chunk do remember"
584            ))
585        })?;
586
587        for (i, emb) in chunk_embeddings.iter().enumerate() {
588            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
589        }
590        output::emit_progress_i18n(
591            &format!(
592                "Remember stage: persisted chunk vectors; process RSS {} MB",
593                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
594            ),
595            &format!(
596                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
597                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
598            ),
599        );
600    }
601
602    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
603        for entity in &graph.entities {
604            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
605            let entity_embedding = &graph_entity_embeddings[entities_persisted];
606            entities::upsert_entity_vec(
607                &tx,
608                entity_id,
609                &namespace,
610                &entity.entity_type,
611                entity_embedding,
612                &entity.name,
613            )?;
614            entities::link_memory_entity(&tx, memory_id, entity_id)?;
615            entities::increment_degree(&tx, entity_id)?;
616            entities_persisted += 1;
617        }
618        let entity_types: std::collections::HashMap<&str, &str> = graph
619            .entities
620            .iter()
621            .map(|entity| (entity.name.as_str(), entity.entity_type.as_str()))
622            .collect();
623
624        for rel in &graph.relationships {
625            let source_entity = NewEntity {
626                name: rel.source.clone(),
627                entity_type: entity_types
628                    .get(rel.source.as_str())
629                    .copied()
630                    .unwrap_or("concept")
631                    .to_string(),
632                description: None,
633            };
634            let target_entity = NewEntity {
635                name: rel.target.clone(),
636                entity_type: entity_types
637                    .get(rel.target.as_str())
638                    .copied()
639                    .unwrap_or("concept")
640                    .to_string(),
641                description: None,
642            };
643            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
644            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
645            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
646            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
647            relationships_persisted += 1;
648        }
649    }
650    tx.commit()?;
651
652    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
653    // Failures do not propagate — non-critical path with graceful degradation.
654    let urls_persisted = if !extracted_urls.is_empty() {
655        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
656            .into_iter()
657            .map(|u| storage_urls::MemoryUrl {
658                url: u.url,
659                offset: Some(u.offset as i64),
660            })
661            .collect();
662        storage_urls::insert_urls(&conn, memory_id, &url_entries)
663    } else {
664        0
665    };
666
667    let created_at_epoch = chrono::Utc::now().timestamp();
668    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
669
670    output::emit_json(&RememberResponse {
671        memory_id,
672        // Persist the normalized (kebab-case) slug as `name` since that is the
673        // storage key. The original input is exposed via `original_name` only
674        // when normalization actually changed something (B_4 in v1.0.32).
675        name: normalized_name.clone(),
676        namespace,
677        action: action.clone(),
678        operation: action,
679        version,
680        entities_persisted,
681        relationships_persisted,
682        relationships_truncated,
683        chunks_created,
684        chunks_persisted,
685        urls_persisted,
686        extraction_method,
687        merged_into_memory_id: None,
688        warnings,
689        created_at: created_at_epoch,
690        created_at_iso,
691        elapsed_ms: inicio.elapsed().as_millis() as u64,
692        name_was_normalized,
693        original_name: name_was_normalized.then_some(original_name),
694    })?;
695
696    Ok(())
697}
698
699#[cfg(test)]
700mod tests {
701    use super::compute_chunks_persisted;
702    use crate::output::RememberResponse;
703
704    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
705    #[test]
706    fn chunks_persisted_zero_for_zero_chunks() {
707        assert_eq!(compute_chunks_persisted(0), 0);
708    }
709
710    #[test]
711    fn chunks_persisted_zero_for_single_chunk_body() {
712        // Single-chunk bodies live in the memories row itself; no row is
713        // appended to memory_chunks. This is the documented contract.
714        assert_eq!(compute_chunks_persisted(1), 0);
715    }
716
717    #[test]
718    fn chunks_persisted_equals_count_for_multi_chunk_body() {
719        // Every chunk above the first triggers a row in memory_chunks.
720        assert_eq!(compute_chunks_persisted(2), 2);
721        assert_eq!(compute_chunks_persisted(7), 7);
722        assert_eq!(compute_chunks_persisted(64), 64);
723    }
724
725    #[test]
726    fn remember_response_serializes_required_fields() {
727        let resp = RememberResponse {
728            memory_id: 42,
729            name: "minha-mem".to_string(),
730            namespace: "global".to_string(),
731            action: "created".to_string(),
732            operation: "created".to_string(),
733            version: 1,
734            entities_persisted: 0,
735            relationships_persisted: 0,
736            relationships_truncated: false,
737            chunks_created: 1,
738            chunks_persisted: 0,
739            urls_persisted: 0,
740            extraction_method: None,
741            merged_into_memory_id: None,
742            warnings: vec![],
743            created_at: 1_705_320_000,
744            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
745            elapsed_ms: 55,
746            name_was_normalized: false,
747            original_name: None,
748        };
749
750        let json = serde_json::to_value(&resp).expect("serialization failed");
751        assert_eq!(json["memory_id"], 42);
752        assert_eq!(json["action"], "created");
753        assert_eq!(json["operation"], "created");
754        assert_eq!(json["version"], 1);
755        assert_eq!(json["elapsed_ms"], 55u64);
756        assert!(json["warnings"].is_array());
757        assert!(json["merged_into_memory_id"].is_null());
758    }
759
760    #[test]
761    fn remember_response_action_e_operation_sao_aliases() {
762        let resp = RememberResponse {
763            memory_id: 1,
764            name: "mem".to_string(),
765            namespace: "global".to_string(),
766            action: "updated".to_string(),
767            operation: "updated".to_string(),
768            version: 2,
769            entities_persisted: 3,
770            relationships_persisted: 1,
771            relationships_truncated: false,
772            extraction_method: None,
773            chunks_created: 2,
774            chunks_persisted: 2,
775            urls_persisted: 0,
776            merged_into_memory_id: None,
777            warnings: vec![],
778            created_at: 0,
779            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
780            elapsed_ms: 0,
781            name_was_normalized: false,
782            original_name: None,
783        };
784
785        let json = serde_json::to_value(&resp).expect("serialization failed");
786        assert_eq!(
787            json["action"], json["operation"],
788            "action e operation devem ser iguais"
789        );
790        assert_eq!(json["entities_persisted"], 3);
791        assert_eq!(json["relationships_persisted"], 1);
792        assert_eq!(json["chunks_created"], 2);
793    }
794
795    #[test]
796    fn remember_response_warnings_lista_mensagens() {
797        let resp = RememberResponse {
798            memory_id: 5,
799            name: "dup-mem".to_string(),
800            namespace: "global".to_string(),
801            action: "created".to_string(),
802            operation: "created".to_string(),
803            version: 1,
804            entities_persisted: 0,
805            extraction_method: None,
806            relationships_persisted: 0,
807            relationships_truncated: false,
808            chunks_created: 1,
809            chunks_persisted: 0,
810            urls_persisted: 0,
811            merged_into_memory_id: None,
812            warnings: vec!["identical body already exists as memory id 3".to_string()],
813            created_at: 0,
814            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
815            elapsed_ms: 10,
816            name_was_normalized: false,
817            original_name: None,
818        };
819
820        let json = serde_json::to_value(&resp).expect("serialization failed");
821        let warnings = json["warnings"]
822            .as_array()
823            .expect("warnings deve ser array");
824        assert_eq!(warnings.len(), 1);
825        assert!(warnings[0].as_str().unwrap().contains("identical body"));
826    }
827
828    #[test]
829    fn invalid_name_reserved_prefix_returns_validation_error() {
830        use crate::errors::AppError;
831        // Validates the rejection logic for names with the "__" prefix directly
832        let nome = "__reservado";
833        let resultado: Result<(), AppError> = if nome.starts_with("__") {
834            Err(AppError::Validation(
835                crate::i18n::validation::reserved_name(),
836            ))
837        } else {
838            Ok(())
839        };
840        assert!(resultado.is_err());
841        if let Err(AppError::Validation(msg)) = resultado {
842            assert!(!msg.is_empty());
843        }
844    }
845
846    #[test]
847    fn name_too_long_returns_validation_error() {
848        use crate::errors::AppError;
849        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
850        let resultado: Result<(), AppError> =
851            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
852                Err(AppError::Validation(crate::i18n::validation::name_length(
853                    crate::constants::MAX_MEMORY_NAME_LEN,
854                )))
855            } else {
856                Ok(())
857            };
858        assert!(resultado.is_err());
859    }
860
861    #[test]
862    fn remember_response_merged_into_memory_id_some_serializes_integer() {
863        let resp = RememberResponse {
864            memory_id: 10,
865            name: "mem-mergeada".to_string(),
866            namespace: "global".to_string(),
867            action: "updated".to_string(),
868            operation: "updated".to_string(),
869            version: 3,
870            extraction_method: None,
871            entities_persisted: 0,
872            relationships_persisted: 0,
873            relationships_truncated: false,
874            chunks_created: 1,
875            chunks_persisted: 0,
876            urls_persisted: 0,
877            merged_into_memory_id: Some(7),
878            warnings: vec![],
879            created_at: 0,
880            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
881            elapsed_ms: 0,
882            name_was_normalized: false,
883            original_name: None,
884        };
885
886        let json = serde_json::to_value(&resp).expect("serialization failed");
887        assert_eq!(json["merged_into_memory_id"], 7);
888    }
889
890    #[test]
891    fn remember_response_urls_persisted_serializes_field() {
892        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
893        let resp = RememberResponse {
894            memory_id: 3,
895            name: "mem-com-urls".to_string(),
896            namespace: "global".to_string(),
897            action: "created".to_string(),
898            operation: "created".to_string(),
899            version: 1,
900            entities_persisted: 0,
901            relationships_persisted: 0,
902            relationships_truncated: false,
903            chunks_created: 1,
904            chunks_persisted: 0,
905            urls_persisted: 3,
906            extraction_method: Some("regex-only".to_string()),
907            merged_into_memory_id: None,
908            warnings: vec![],
909            created_at: 0,
910            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
911            elapsed_ms: 0,
912            name_was_normalized: false,
913            original_name: None,
914        };
915        let json = serde_json::to_value(&resp).expect("serialization failed");
916        assert_eq!(json["urls_persisted"], 3);
917    }
918
919    #[test]
920    fn empty_name_after_normalization_returns_specific_message() {
921        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
922        // must produce a distinct error message, not the "too long" message.
923        use crate::errors::AppError;
924        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
925        let normalized = normalized.trim_matches('-').to_string();
926        let resultado: Result<(), AppError> = if normalized.is_empty() {
927            Err(AppError::Validation(
928                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
929            ))
930        } else {
931            Ok(())
932        };
933        assert!(resultado.is_err());
934        if let Err(AppError::Validation(msg)) = resultado {
935            assert!(
936                msg.contains("empty after normalization"),
937                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
938            );
939        }
940    }
941
942    #[test]
943    fn name_only_underscores_after_normalization_returns_specific_message() {
944        // P0-4 regression: name consisting only of underscores normalizes to empty string.
945        use crate::errors::AppError;
946        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
947        let normalized = normalized.trim_matches('-').to_string();
948        assert!(
949            normalized.is_empty(),
950            "underscores devem normalizar para string vazia"
951        );
952        let resultado: Result<(), AppError> = if normalized.is_empty() {
953            Err(AppError::Validation(
954                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
955            ))
956        } else {
957            Ok(())
958        };
959        assert!(resultado.is_err());
960        if let Err(AppError::Validation(msg)) = resultado {
961            assert!(
962                msg.contains("empty after normalization"),
963                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
964            );
965        }
966    }
967
968    #[test]
969    fn remember_response_relationships_truncated_serializes_field() {
970        // P1-D: garante que relationships_truncated aparece no JSON como bool.
971        let resp_false = RememberResponse {
972            memory_id: 1,
973            name: "test".to_string(),
974            namespace: "global".to_string(),
975            action: "created".to_string(),
976            operation: "created".to_string(),
977            version: 1,
978            entities_persisted: 2,
979            relationships_persisted: 1,
980            relationships_truncated: false,
981            chunks_created: 1,
982            chunks_persisted: 0,
983            urls_persisted: 0,
984            extraction_method: None,
985            merged_into_memory_id: None,
986            warnings: vec![],
987            created_at: 0,
988            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
989            elapsed_ms: 0,
990            name_was_normalized: false,
991            original_name: None,
992        };
993        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
994        assert_eq!(json_false["relationships_truncated"], false);
995
996        let resp_true = RememberResponse {
997            relationships_truncated: true,
998            ..resp_false
999        };
1000        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1001        assert_eq!(json_true["relationships_truncated"], true);
1002    }
1003
1004    #[test]
1005    fn is_valid_entity_type_accepts_v008_types() {
1006        // V008 added organization, location, date — ensure the validator accepts them.
1007        assert!(super::is_valid_entity_type("organization"));
1008        assert!(super::is_valid_entity_type("location"));
1009        assert!(super::is_valid_entity_type("date"));
1010        assert!(!super::is_valid_entity_type("unknown_type_xyz"));
1011    }
1012}