Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34pub struct RememberArgs {
35    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
36    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
37    #[arg(long)]
38    pub name: String,
39    #[arg(
40        long,
41        value_enum,
42        long_help = "Memory kind stored in `memories.type`. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
43    )]
44    pub r#type: MemoryType,
45    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
46    #[arg(long)]
47    pub description: String,
48    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
49    /// Maximum 512000 bytes; rejected if empty without an external graph.
50    #[arg(
51        long,
52        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
53        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
54    )]
55    pub body: Option<String>,
56    #[arg(
57        long,
58        help = "Read body from a file instead of --body",
59        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
60    )]
61    pub body_file: Option<std::path::PathBuf>,
62    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
63    /// Mutually exclusive with --body, --body-file, --graph-stdin.
64    #[arg(
65        long,
66        conflicts_with_all = ["body", "body_file", "graph_stdin"]
67    )]
68    pub body_stdin: bool,
69    #[arg(
70        long,
71        help = "JSON file containing entities to associate with this memory"
72    )]
73    pub entities_file: Option<std::path::PathBuf>,
74    #[arg(
75        long,
76        help = "JSON file containing relationships to associate with this memory"
77    )]
78    pub relationships_file: Option<std::path::PathBuf>,
79    #[arg(
80        long,
81        help = "Read graph JSON (body + entities + relationships) from stdin",
82        conflicts_with_all = [
83            "body",
84            "body_file",
85            "body_stdin",
86            "entities_file",
87            "relationships_file"
88        ]
89    )]
90    pub graph_stdin: bool,
91    #[arg(long, default_value = "global")]
92    pub namespace: Option<String>,
93    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
94    #[arg(long)]
95    pub metadata: Option<String>,
96    #[arg(long, help = "JSON file containing metadata key-value pairs")]
97    pub metadata_file: Option<std::path::PathBuf>,
98    #[arg(long)]
99    pub force_merge: bool,
100    #[arg(
101        long,
102        value_name = "EPOCH_OR_RFC3339",
103        value_parser = crate::parsers::parse_expected_updated_at,
104        long_help = "Optimistic lock: reject if updated_at does not match. \
105Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
106    )]
107    pub expected_updated_at: Option<i64>,
108    #[arg(
109        long,
110        env = "SQLITE_GRAPHRAG_ENABLE_NER",
111        value_parser = crate::parsers::parse_bool_flexible,
112        action = clap::ArgAction::Set,
113        num_args = 0..=1,
114        default_missing_value = "true",
115        default_value = "false",
116        help = "Enable automatic BERT NER entity/relationship extraction from body"
117    )]
118    pub enable_ner: bool,
119    #[arg(long, hide = true)]
120    pub skip_extraction: bool,
121    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
122    #[arg(long)]
123    pub session_id: Option<String>,
124    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
125    pub format: JsonOutputFormat,
126    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
127    pub json: bool,
128    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
129    pub db: Option<String>,
130}
131
132#[derive(Deserialize, Default)]
133#[serde(deny_unknown_fields)]
134struct GraphInput {
135    #[serde(default)]
136    body: Option<String>,
137    #[serde(default)]
138    entities: Vec<NewEntity>,
139    #[serde(default)]
140    relationships: Vec<NewRelationship>,
141}
142
143fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
144    for rel in &mut graph.relationships {
145        rel.relation = rel.relation.replace('-', "_");
146        if !is_valid_relation(&rel.relation) {
147            return Err(AppError::Validation(format!(
148                "invalid relation '{}' for relationship '{}' -> '{}'",
149                rel.relation, rel.source, rel.target
150            )));
151        }
152        if !(0.0..=1.0).contains(&rel.strength) {
153            return Err(AppError::Validation(format!(
154                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
155                rel.strength, rel.source, rel.target
156            )));
157        }
158    }
159
160    Ok(())
161}
162
163fn is_valid_relation(relation: &str) -> bool {
164    matches!(
165        relation,
166        "applies_to"
167            | "uses"
168            | "depends_on"
169            | "causes"
170            | "fixes"
171            | "contradicts"
172            | "supports"
173            | "follows"
174            | "related"
175            | "mentions"
176            | "replaces"
177            | "tracked_in"
178    )
179}
180
181pub fn run(args: RememberArgs) -> Result<(), AppError> {
182    use crate::constants::*;
183
184    let inicio = std::time::Instant::now();
185    let _ = args.format;
186    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
187
188    // Capture the original `--name` before normalization so the JSON response can
189    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
190    // an owned String because `args.name` is moved into the response below.
191    let original_name = args.name.clone();
192
193    // Auto-normalize to kebab-case before validation (P2-H).
194    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
195    // after truncation by a long filename ending in a hyphen.
196    let normalized_name = {
197        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
198        let trimmed = lower.trim_matches('-').to_string();
199        if trimmed != args.name {
200            tracing::warn!(
201                original = %args.name,
202                normalized = %trimmed,
203                "name auto-normalized to kebab-case"
204            );
205        }
206        trimmed
207    };
208    let name_was_normalized = normalized_name != original_name;
209
210    if normalized_name.is_empty() {
211        return Err(AppError::Validation(
212            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
213        ));
214    }
215    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
216        return Err(AppError::LimitExceeded(
217            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
218        ));
219    }
220
221    if normalized_name.starts_with("__") {
222        return Err(AppError::Validation(
223            crate::i18n::validation::reserved_name(),
224        ));
225    }
226
227    {
228        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
229            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
230        if !slug_re.is_match(&normalized_name) {
231            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
232                &normalized_name,
233            )));
234        }
235    }
236
237    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
238        return Err(AppError::Validation(
239            crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
240        ));
241    }
242
243    let mut raw_body = if let Some(b) = args.body {
244        b
245    } else if let Some(path) = args.body_file {
246        std::fs::read_to_string(&path).map_err(AppError::Io)?
247    } else if args.body_stdin || args.graph_stdin {
248        crate::stdin_helper::read_stdin_with_timeout(60)?
249    } else {
250        String::new()
251    };
252
253    let entities_provided_externally =
254        args.entities_file.is_some() || args.relationships_file.is_some() || args.graph_stdin;
255
256    let mut graph = GraphInput::default();
257    if let Some(path) = args.entities_file {
258        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
259        graph.entities = serde_json::from_str(&content)?;
260    }
261    if let Some(path) = args.relationships_file {
262        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
263        graph.relationships = serde_json::from_str(&content)?;
264    }
265    if args.graph_stdin {
266        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
267            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
268        })?;
269        raw_body = graph.body.take().unwrap_or_default();
270    }
271
272    if graph.entities.len() > max_entities_per_memory() {
273        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
274            max_entities_per_memory(),
275        )));
276    }
277    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
278        return Err(AppError::LimitExceeded(
279            errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
280        ));
281    }
282    normalize_and_validate_graph_input(&mut graph)?;
283
284    if raw_body.len() > MAX_MEMORY_BODY_LEN {
285        return Err(AppError::LimitExceeded(
286            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
287        ));
288    }
289
290    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
291    // Without this check, empty embeddings would be persisted, breaking recall semantics.
292    if !entities_provided_externally && graph.entities.is_empty() && raw_body.trim().is_empty() {
293        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
294    }
295
296    let metadata: serde_json::Value = if let Some(m) = args.metadata {
297        serde_json::from_str(&m)?
298    } else if let Some(path) = args.metadata_file {
299        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
300        serde_json::from_str(&content)?
301    } else {
302        serde_json::json!({})
303    };
304
305    let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
306    let snippet: String = raw_body.chars().take(200).collect();
307
308    let paths = AppPaths::resolve(args.db.as_deref())?;
309    paths.ensure_dirs()?;
310
311    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
312    let mut extraction_method: Option<String> = None;
313    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::new();
314    let mut relationships_truncated = false;
315    if args.enable_ner && args.skip_extraction {
316        tracing::warn!(
317            "--enable-ner and --skip-extraction are contradictory; --enable-ner takes precedence"
318        );
319    }
320    if args.enable_ner
321        && !entities_provided_externally
322        && graph.entities.is_empty()
323        && !raw_body.trim().is_empty()
324    {
325        match crate::extraction::extract_graph_auto(&raw_body, &paths) {
326            Ok(extracted) => {
327                extraction_method = Some(extracted.extraction_method.clone());
328                extracted_urls = extracted.urls;
329                graph.entities = extracted.entities;
330                graph.relationships = extracted.relationships;
331                relationships_truncated = extracted.relationships_truncated;
332
333                if graph.entities.len() > max_entities_per_memory() {
334                    graph.entities.truncate(max_entities_per_memory());
335                }
336                if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
337                    relationships_truncated = true;
338                    graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
339                }
340                normalize_and_validate_graph_input(&mut graph)?;
341            }
342            Err(e) => {
343                tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
344            }
345        }
346    }
347
348    let mut conn = open_rw(&paths.db)?;
349    ensure_schema(&mut conn)?;
350
351    {
352        use crate::constants::MAX_NAMESPACES_ACTIVE;
353        let active_count: u32 = conn.query_row(
354            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
355            [],
356            |r| r.get::<_, i64>(0).map(|v| v as u32),
357        )?;
358        let ns_exists: bool = conn.query_row(
359            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
360            rusqlite::params![namespace],
361            |r| r.get::<_, i64>(0).map(|v| v > 0),
362        )?;
363        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
364            return Err(AppError::NamespaceError(format!(
365                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
366            )));
367        }
368    }
369
370    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
371    if existing_memory.is_some() && !args.force_merge {
372        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
373            &normalized_name,
374            &namespace,
375        )));
376    }
377
378    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
379
380    output::emit_progress_i18n(
381        &format!(
382            "Remember stage: validated input; available memory {} MB",
383            crate::memory_guard::available_memory_mb()
384        ),
385        &format!(
386            "Stage remember: input validated; available memory {} MB",
387            crate::memory_guard::available_memory_mb()
388        ),
389    );
390
391    let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
392    let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
393    let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
394    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
395    let chunks_created = chunks_info.len();
396    // For single-chunk bodies the memory row itself stores the content and no
397    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
398    // bodies every chunk is persisted via `insert_chunk_slices`.
399    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
400
401    output::emit_progress_i18n(
402        &format!(
403            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
404            chunks_created,
405            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
406        ),
407        &format!(
408            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
409            chunks_created,
410            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
411        ),
412    );
413
414    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
415        return Err(AppError::LimitExceeded(format!(
416            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
417            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
418        )));
419    }
420
421    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
422    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
423
424    let embedding = if chunks_info.len() == 1 {
425        crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
426    } else {
427        let chunk_texts: Vec<&str> = chunks_info
428            .iter()
429            .map(|c| chunking::chunk_text(&raw_body, c))
430            .collect();
431        output::emit_progress_i18n(
432            &format!(
433                "Embedding {} chunks serially to keep memory bounded...",
434                chunks_info.len()
435            ),
436            &format!(
437                "Embedding {} chunks serially to keep memory bounded...",
438                chunks_info.len()
439            ),
440        );
441        let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
442        for chunk_text in &chunk_texts {
443            chunk_embeddings.push(crate::daemon::embed_passage_or_local(
444                &paths.models,
445                chunk_text,
446            )?);
447        }
448        output::emit_progress_i18n(
449            &format!(
450                "Remember stage: chunk embeddings complete; process RSS {} MB",
451                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
452            ),
453            &format!(
454                "Stage remember: chunk embeddings completed; process RSS {} MB",
455                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
456            ),
457        );
458        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
459        chunk_embeddings_cache = Some(chunk_embeddings);
460        aggregated
461    };
462    let body_for_storage = raw_body;
463
464    let memory_type = args.r#type.as_str();
465    let new_memory = NewMemory {
466        namespace: namespace.clone(),
467        name: normalized_name.clone(),
468        memory_type: memory_type.to_string(),
469        description: args.description.clone(),
470        body: body_for_storage,
471        body_hash: body_hash.clone(),
472        session_id: args.session_id.clone(),
473        source: "agent".to_string(),
474        metadata,
475    };
476
477    let mut warnings = Vec::new();
478    let mut entities_persisted = 0usize;
479    let mut relationships_persisted = 0usize;
480
481    let graph_entity_embeddings = graph
482        .entities
483        .iter()
484        .map(|entity| {
485            let entity_text = match &entity.description {
486                Some(desc) => format!("{} {}", entity.name, desc),
487                None => entity.name.clone(),
488            };
489            crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
490        })
491        .collect::<Result<Vec<_>, _>>()?;
492
493    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
494
495    let (memory_id, action, version) = match existing_memory {
496        Some((existing_id, _updated_at, _current_version)) => {
497            if let Some(hash_id) = duplicate_hash_id {
498                if hash_id != existing_id {
499                    warnings.push(format!(
500                        "identical body already exists as memory id {hash_id}"
501                    ));
502                }
503            }
504
505            storage_chunks::delete_chunks(&tx, existing_id)?;
506
507            let next_v = versions::next_version(&tx, existing_id)?;
508            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
509            versions::insert_version(
510                &tx,
511                existing_id,
512                next_v,
513                &normalized_name,
514                memory_type,
515                &args.description,
516                &new_memory.body,
517                &serde_json::to_string(&new_memory.metadata)?,
518                None,
519                "edit",
520            )?;
521            memories::upsert_vec(
522                &tx,
523                existing_id,
524                &namespace,
525                memory_type,
526                &embedding,
527                &normalized_name,
528                &snippet,
529            )?;
530            (existing_id, "updated".to_string(), next_v)
531        }
532        None => {
533            if let Some(hash_id) = duplicate_hash_id {
534                warnings.push(format!(
535                    "identical body already exists as memory id {hash_id}"
536                ));
537            }
538            let id = memories::insert(&tx, &new_memory)?;
539            versions::insert_version(
540                &tx,
541                id,
542                1,
543                &normalized_name,
544                memory_type,
545                &args.description,
546                &new_memory.body,
547                &serde_json::to_string(&new_memory.metadata)?,
548                None,
549                "create",
550            )?;
551            memories::upsert_vec(
552                &tx,
553                id,
554                &namespace,
555                memory_type,
556                &embedding,
557                &normalized_name,
558                &snippet,
559            )?;
560            (id, "created".to_string(), 1)
561        }
562    };
563
564    if chunks_info.len() > 1 {
565        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
566
567        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
568            AppError::Internal(anyhow::anyhow!(
569                "cache de embeddings de chunks ausente no caminho multi-chunk do remember"
570            ))
571        })?;
572
573        for (i, emb) in chunk_embeddings.iter().enumerate() {
574            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
575        }
576        output::emit_progress_i18n(
577            &format!(
578                "Remember stage: persisted chunk vectors; process RSS {} MB",
579                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
580            ),
581            &format!(
582                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
583                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
584            ),
585        );
586    }
587
588    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
589        for entity in &graph.entities {
590            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
591            let entity_embedding = &graph_entity_embeddings[entities_persisted];
592            entities::upsert_entity_vec(
593                &tx,
594                entity_id,
595                &namespace,
596                entity.entity_type,
597                entity_embedding,
598                &entity.name,
599            )?;
600            entities::link_memory_entity(&tx, memory_id, entity_id)?;
601            entities::increment_degree(&tx, entity_id)?;
602            entities_persisted += 1;
603        }
604        let entity_types: std::collections::HashMap<&str, EntityType> = graph
605            .entities
606            .iter()
607            .map(|entity| (entity.name.as_str(), entity.entity_type))
608            .collect();
609
610        for rel in &graph.relationships {
611            let source_entity = NewEntity {
612                name: rel.source.clone(),
613                entity_type: entity_types
614                    .get(rel.source.as_str())
615                    .copied()
616                    .unwrap_or(EntityType::Concept),
617                description: None,
618            };
619            let target_entity = NewEntity {
620                name: rel.target.clone(),
621                entity_type: entity_types
622                    .get(rel.target.as_str())
623                    .copied()
624                    .unwrap_or(EntityType::Concept),
625                description: None,
626            };
627            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
628            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
629            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
630            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
631            relationships_persisted += 1;
632        }
633    }
634    tx.commit()?;
635
636    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
637    // Failures do not propagate — non-critical path with graceful degradation.
638    let urls_persisted = if !extracted_urls.is_empty() {
639        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
640            .into_iter()
641            .map(|u| storage_urls::MemoryUrl {
642                url: u.url,
643                offset: Some(u.offset as i64),
644            })
645            .collect();
646        storage_urls::insert_urls(&conn, memory_id, &url_entries)
647    } else {
648        0
649    };
650
651    let created_at_epoch = chrono::Utc::now().timestamp();
652    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
653
654    output::emit_json(&RememberResponse {
655        memory_id,
656        // Persist the normalized (kebab-case) slug as `name` since that is the
657        // storage key. The original input is exposed via `original_name` only
658        // when normalization actually changed something (B_4 in v1.0.32).
659        name: normalized_name.clone(),
660        namespace,
661        action: action.clone(),
662        operation: action,
663        version,
664        entities_persisted,
665        relationships_persisted,
666        relationships_truncated,
667        chunks_created,
668        chunks_persisted,
669        urls_persisted,
670        extraction_method,
671        merged_into_memory_id: None,
672        warnings,
673        created_at: created_at_epoch,
674        created_at_iso,
675        elapsed_ms: inicio.elapsed().as_millis() as u64,
676        name_was_normalized,
677        original_name: name_was_normalized.then_some(original_name),
678    })?;
679
680    Ok(())
681}
682
683#[cfg(test)]
684mod tests {
685    use super::compute_chunks_persisted;
686    use crate::output::RememberResponse;
687
688    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
689    #[test]
690    fn chunks_persisted_zero_for_zero_chunks() {
691        assert_eq!(compute_chunks_persisted(0), 0);
692    }
693
694    #[test]
695    fn chunks_persisted_zero_for_single_chunk_body() {
696        // Single-chunk bodies live in the memories row itself; no row is
697        // appended to memory_chunks. This is the documented contract.
698        assert_eq!(compute_chunks_persisted(1), 0);
699    }
700
701    #[test]
702    fn chunks_persisted_equals_count_for_multi_chunk_body() {
703        // Every chunk above the first triggers a row in memory_chunks.
704        assert_eq!(compute_chunks_persisted(2), 2);
705        assert_eq!(compute_chunks_persisted(7), 7);
706        assert_eq!(compute_chunks_persisted(64), 64);
707    }
708
709    #[test]
710    fn remember_response_serializes_required_fields() {
711        let resp = RememberResponse {
712            memory_id: 42,
713            name: "minha-mem".to_string(),
714            namespace: "global".to_string(),
715            action: "created".to_string(),
716            operation: "created".to_string(),
717            version: 1,
718            entities_persisted: 0,
719            relationships_persisted: 0,
720            relationships_truncated: false,
721            chunks_created: 1,
722            chunks_persisted: 0,
723            urls_persisted: 0,
724            extraction_method: None,
725            merged_into_memory_id: None,
726            warnings: vec![],
727            created_at: 1_705_320_000,
728            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
729            elapsed_ms: 55,
730            name_was_normalized: false,
731            original_name: None,
732        };
733
734        let json = serde_json::to_value(&resp).expect("serialization failed");
735        assert_eq!(json["memory_id"], 42);
736        assert_eq!(json["action"], "created");
737        assert_eq!(json["operation"], "created");
738        assert_eq!(json["version"], 1);
739        assert_eq!(json["elapsed_ms"], 55u64);
740        assert!(json["warnings"].is_array());
741        assert!(json["merged_into_memory_id"].is_null());
742    }
743
744    #[test]
745    fn remember_response_action_e_operation_sao_aliases() {
746        let resp = RememberResponse {
747            memory_id: 1,
748            name: "mem".to_string(),
749            namespace: "global".to_string(),
750            action: "updated".to_string(),
751            operation: "updated".to_string(),
752            version: 2,
753            entities_persisted: 3,
754            relationships_persisted: 1,
755            relationships_truncated: false,
756            extraction_method: None,
757            chunks_created: 2,
758            chunks_persisted: 2,
759            urls_persisted: 0,
760            merged_into_memory_id: None,
761            warnings: vec![],
762            created_at: 0,
763            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
764            elapsed_ms: 0,
765            name_was_normalized: false,
766            original_name: None,
767        };
768
769        let json = serde_json::to_value(&resp).expect("serialization failed");
770        assert_eq!(
771            json["action"], json["operation"],
772            "action e operation devem ser iguais"
773        );
774        assert_eq!(json["entities_persisted"], 3);
775        assert_eq!(json["relationships_persisted"], 1);
776        assert_eq!(json["chunks_created"], 2);
777    }
778
779    #[test]
780    fn remember_response_warnings_lista_mensagens() {
781        let resp = RememberResponse {
782            memory_id: 5,
783            name: "dup-mem".to_string(),
784            namespace: "global".to_string(),
785            action: "created".to_string(),
786            operation: "created".to_string(),
787            version: 1,
788            entities_persisted: 0,
789            extraction_method: None,
790            relationships_persisted: 0,
791            relationships_truncated: false,
792            chunks_created: 1,
793            chunks_persisted: 0,
794            urls_persisted: 0,
795            merged_into_memory_id: None,
796            warnings: vec!["identical body already exists as memory id 3".to_string()],
797            created_at: 0,
798            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
799            elapsed_ms: 10,
800            name_was_normalized: false,
801            original_name: None,
802        };
803
804        let json = serde_json::to_value(&resp).expect("serialization failed");
805        let warnings = json["warnings"]
806            .as_array()
807            .expect("warnings deve ser array");
808        assert_eq!(warnings.len(), 1);
809        assert!(warnings[0].as_str().unwrap().contains("identical body"));
810    }
811
812    #[test]
813    fn invalid_name_reserved_prefix_returns_validation_error() {
814        use crate::errors::AppError;
815        // Validates the rejection logic for names with the "__" prefix directly
816        let nome = "__reservado";
817        let resultado: Result<(), AppError> = if nome.starts_with("__") {
818            Err(AppError::Validation(
819                crate::i18n::validation::reserved_name(),
820            ))
821        } else {
822            Ok(())
823        };
824        assert!(resultado.is_err());
825        if let Err(AppError::Validation(msg)) = resultado {
826            assert!(!msg.is_empty());
827        }
828    }
829
830    #[test]
831    fn name_too_long_returns_validation_error() {
832        use crate::errors::AppError;
833        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
834        let resultado: Result<(), AppError> =
835            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
836                Err(AppError::Validation(crate::i18n::validation::name_length(
837                    crate::constants::MAX_MEMORY_NAME_LEN,
838                )))
839            } else {
840                Ok(())
841            };
842        assert!(resultado.is_err());
843    }
844
845    #[test]
846    fn remember_response_merged_into_memory_id_some_serializes_integer() {
847        let resp = RememberResponse {
848            memory_id: 10,
849            name: "mem-mergeada".to_string(),
850            namespace: "global".to_string(),
851            action: "updated".to_string(),
852            operation: "updated".to_string(),
853            version: 3,
854            extraction_method: None,
855            entities_persisted: 0,
856            relationships_persisted: 0,
857            relationships_truncated: false,
858            chunks_created: 1,
859            chunks_persisted: 0,
860            urls_persisted: 0,
861            merged_into_memory_id: Some(7),
862            warnings: vec![],
863            created_at: 0,
864            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
865            elapsed_ms: 0,
866            name_was_normalized: false,
867            original_name: None,
868        };
869
870        let json = serde_json::to_value(&resp).expect("serialization failed");
871        assert_eq!(json["merged_into_memory_id"], 7);
872    }
873
874    #[test]
875    fn remember_response_urls_persisted_serializes_field() {
876        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
877        let resp = RememberResponse {
878            memory_id: 3,
879            name: "mem-com-urls".to_string(),
880            namespace: "global".to_string(),
881            action: "created".to_string(),
882            operation: "created".to_string(),
883            version: 1,
884            entities_persisted: 0,
885            relationships_persisted: 0,
886            relationships_truncated: false,
887            chunks_created: 1,
888            chunks_persisted: 0,
889            urls_persisted: 3,
890            extraction_method: Some("regex-only".to_string()),
891            merged_into_memory_id: None,
892            warnings: vec![],
893            created_at: 0,
894            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
895            elapsed_ms: 0,
896            name_was_normalized: false,
897            original_name: None,
898        };
899        let json = serde_json::to_value(&resp).expect("serialization failed");
900        assert_eq!(json["urls_persisted"], 3);
901    }
902
903    #[test]
904    fn empty_name_after_normalization_returns_specific_message() {
905        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
906        // must produce a distinct error message, not the "too long" message.
907        use crate::errors::AppError;
908        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
909        let normalized = normalized.trim_matches('-').to_string();
910        let resultado: Result<(), AppError> = if normalized.is_empty() {
911            Err(AppError::Validation(
912                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
913            ))
914        } else {
915            Ok(())
916        };
917        assert!(resultado.is_err());
918        if let Err(AppError::Validation(msg)) = resultado {
919            assert!(
920                msg.contains("empty after normalization"),
921                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
922            );
923        }
924    }
925
926    #[test]
927    fn name_only_underscores_after_normalization_returns_specific_message() {
928        // P0-4 regression: name consisting only of underscores normalizes to empty string.
929        use crate::errors::AppError;
930        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
931        let normalized = normalized.trim_matches('-').to_string();
932        assert!(
933            normalized.is_empty(),
934            "underscores devem normalizar para string vazia"
935        );
936        let resultado: Result<(), AppError> = if normalized.is_empty() {
937            Err(AppError::Validation(
938                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
939            ))
940        } else {
941            Ok(())
942        };
943        assert!(resultado.is_err());
944        if let Err(AppError::Validation(msg)) = resultado {
945            assert!(
946                msg.contains("empty after normalization"),
947                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
948            );
949        }
950    }
951
952    #[test]
953    fn remember_response_relationships_truncated_serializes_field() {
954        // P1-D: garante que relationships_truncated aparece no JSON como bool.
955        let resp_false = RememberResponse {
956            memory_id: 1,
957            name: "test".to_string(),
958            namespace: "global".to_string(),
959            action: "created".to_string(),
960            operation: "created".to_string(),
961            version: 1,
962            entities_persisted: 2,
963            relationships_persisted: 1,
964            relationships_truncated: false,
965            chunks_created: 1,
966            chunks_persisted: 0,
967            urls_persisted: 0,
968            extraction_method: None,
969            merged_into_memory_id: None,
970            warnings: vec![],
971            created_at: 0,
972            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
973            elapsed_ms: 0,
974            name_was_normalized: false,
975            original_name: None,
976        };
977        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
978        assert_eq!(json_false["relationships_truncated"], false);
979
980        let resp_true = RememberResponse {
981            relationships_truncated: true,
982            ..resp_false
983        };
984        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
985        assert_eq!(json_true["relationships_truncated"], true);
986    }
987}