Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34pub struct RememberArgs {
35    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
36    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
37    #[arg(long)]
38    pub name: String,
39    #[arg(
40        long,
41        value_enum,
42        long_help = "Memory kind stored in `memories.type`. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
43    )]
44    pub r#type: MemoryType,
45    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
46    #[arg(long)]
47    pub description: String,
48    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
49    /// Maximum 512000 bytes; rejected if empty without an external graph.
50    #[arg(
51        long,
52        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
53        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
54    )]
55    pub body: Option<String>,
56    #[arg(
57        long,
58        help = "Read body from a file instead of --body",
59        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
60    )]
61    pub body_file: Option<std::path::PathBuf>,
62    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
63    /// Mutually exclusive with --body, --body-file, --graph-stdin.
64    #[arg(
65        long,
66        conflicts_with_all = ["body", "body_file", "graph_stdin"]
67    )]
68    pub body_stdin: bool,
69    #[arg(
70        long,
71        help = "JSON file containing entities to associate with this memory"
72    )]
73    pub entities_file: Option<std::path::PathBuf>,
74    #[arg(
75        long,
76        help = "JSON file containing relationships to associate with this memory"
77    )]
78    pub relationships_file: Option<std::path::PathBuf>,
79    #[arg(
80        long,
81        help = "Read graph JSON (body + entities + relationships) from stdin",
82        conflicts_with_all = [
83            "body",
84            "body_file",
85            "body_stdin",
86            "entities_file",
87            "relationships_file"
88        ]
89    )]
90    pub graph_stdin: bool,
91    #[arg(long, default_value = "global")]
92    pub namespace: Option<String>,
93    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
94    #[arg(long)]
95    pub metadata: Option<String>,
96    #[arg(long, help = "JSON file containing metadata key-value pairs")]
97    pub metadata_file: Option<std::path::PathBuf>,
98    #[arg(long)]
99    pub force_merge: bool,
100    #[arg(
101        long,
102        value_name = "EPOCH_OR_RFC3339",
103        value_parser = crate::parsers::parse_expected_updated_at,
104        long_help = "Optimistic lock: reject if updated_at does not match. \
105Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
106    )]
107    pub expected_updated_at: Option<i64>,
108    #[arg(
109        long,
110        env = "SQLITE_GRAPHRAG_ENABLE_NER",
111        value_parser = crate::parsers::parse_bool_flexible,
112        action = clap::ArgAction::Set,
113        num_args = 0..=1,
114        default_missing_value = "true",
115        default_value = "false",
116        help = "Enable automatic GLiNER NER entity/relationship extraction from body"
117    )]
118    pub enable_ner: bool,
119    #[arg(
120        long,
121        env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
122        default_value = "fp32",
123        help = "GLiNER model variant: fp32 (best quality, 1.1GB), fp16 (580MB), int8 (349MB, fastest)"
124    )]
125    pub gliner_variant: String,
126    #[arg(long, hide = true)]
127    pub skip_extraction: bool,
128    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
129    #[arg(long)]
130    pub session_id: Option<String>,
131    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
132    pub format: JsonOutputFormat,
133    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
134    pub json: bool,
135    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
136    pub db: Option<String>,
137}
138
139#[derive(Deserialize, Default)]
140#[serde(deny_unknown_fields)]
141struct GraphInput {
142    #[serde(default)]
143    body: Option<String>,
144    #[serde(default)]
145    entities: Vec<NewEntity>,
146    #[serde(default)]
147    relationships: Vec<NewRelationship>,
148}
149
150fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
151    for rel in &mut graph.relationships {
152        rel.relation = rel.relation.replace('-', "_");
153        if !is_valid_relation(&rel.relation) {
154            return Err(AppError::Validation(format!(
155                "invalid relation '{}' for relationship '{}' -> '{}'",
156                rel.relation, rel.source, rel.target
157            )));
158        }
159        if !(0.0..=1.0).contains(&rel.strength) {
160            return Err(AppError::Validation(format!(
161                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
162                rel.strength, rel.source, rel.target
163            )));
164        }
165    }
166
167    Ok(())
168}
169
170fn is_valid_relation(relation: &str) -> bool {
171    matches!(
172        relation,
173        "applies_to"
174            | "uses"
175            | "depends_on"
176            | "causes"
177            | "fixes"
178            | "contradicts"
179            | "supports"
180            | "follows"
181            | "related"
182            | "mentions"
183            | "replaces"
184            | "tracked_in"
185    )
186}
187
188pub fn run(args: RememberArgs) -> Result<(), AppError> {
189    use crate::constants::*;
190
191    let inicio = std::time::Instant::now();
192    let _ = args.format;
193    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
194
195    // Capture the original `--name` before normalization so the JSON response can
196    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
197    // an owned String because `args.name` is moved into the response below.
198    let original_name = args.name.clone();
199
200    // Auto-normalize to kebab-case before validation (P2-H).
201    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
202    // after truncation by a long filename ending in a hyphen.
203    let normalized_name = {
204        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
205        let trimmed = lower.trim_matches('-').to_string();
206        if trimmed != args.name {
207            tracing::warn!(
208                original = %args.name,
209                normalized = %trimmed,
210                "name auto-normalized to kebab-case"
211            );
212        }
213        trimmed
214    };
215    let name_was_normalized = normalized_name != original_name;
216
217    if normalized_name.is_empty() {
218        return Err(AppError::Validation(
219            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
220        ));
221    }
222    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
223        return Err(AppError::LimitExceeded(
224            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
225        ));
226    }
227
228    if normalized_name.starts_with("__") {
229        return Err(AppError::Validation(
230            crate::i18n::validation::reserved_name(),
231        ));
232    }
233
234    {
235        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
236            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
237        if !slug_re.is_match(&normalized_name) {
238            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
239                &normalized_name,
240            )));
241        }
242    }
243
244    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
245        return Err(AppError::Validation(
246            crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
247        ));
248    }
249
250    let mut raw_body = if let Some(b) = args.body {
251        b
252    } else if let Some(path) = args.body_file {
253        std::fs::read_to_string(&path).map_err(AppError::Io)?
254    } else if args.body_stdin || args.graph_stdin {
255        crate::stdin_helper::read_stdin_with_timeout(60)?
256    } else {
257        String::new()
258    };
259
260    let entities_provided_externally =
261        args.entities_file.is_some() || args.relationships_file.is_some() || args.graph_stdin;
262
263    let mut graph = GraphInput::default();
264    if let Some(path) = args.entities_file {
265        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
266        graph.entities = serde_json::from_str(&content)?;
267    }
268    if let Some(path) = args.relationships_file {
269        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
270        graph.relationships = serde_json::from_str(&content)?;
271    }
272    if args.graph_stdin {
273        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
274            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
275        })?;
276        raw_body = graph.body.take().unwrap_or_default();
277    }
278
279    if graph.entities.len() > max_entities_per_memory() {
280        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
281            max_entities_per_memory(),
282        )));
283    }
284    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
285        return Err(AppError::LimitExceeded(
286            errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
287        ));
288    }
289    normalize_and_validate_graph_input(&mut graph)?;
290
291    if raw_body.len() > MAX_MEMORY_BODY_LEN {
292        return Err(AppError::LimitExceeded(
293            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
294        ));
295    }
296
297    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
298    // Without this check, empty embeddings would be persisted, breaking recall semantics.
299    if !entities_provided_externally && graph.entities.is_empty() && raw_body.trim().is_empty() {
300        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
301    }
302
303    let metadata: serde_json::Value = if let Some(m) = args.metadata {
304        serde_json::from_str(&m)?
305    } else if let Some(path) = args.metadata_file {
306        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
307        serde_json::from_str(&content)?
308    } else {
309        serde_json::json!({})
310    };
311
312    let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
313    let snippet: String = raw_body.chars().take(200).collect();
314
315    let paths = AppPaths::resolve(args.db.as_deref())?;
316    paths.ensure_dirs()?;
317
318    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
319    let mut extraction_method: Option<String> = None;
320    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::new();
321    let mut relationships_truncated = false;
322    if args.enable_ner && args.skip_extraction {
323        tracing::warn!(
324            "--enable-ner and --skip-extraction are contradictory; --enable-ner takes precedence"
325        );
326    }
327    let gliner_variant: crate::extraction::GlinerVariant =
328        args.gliner_variant.parse().unwrap_or_else(|e| {
329            tracing::warn!("invalid --gliner-variant: {e}; using fp32");
330            crate::extraction::GlinerVariant::Fp32
331        });
332    if args.enable_ner
333        && !entities_provided_externally
334        && graph.entities.is_empty()
335        && !raw_body.trim().is_empty()
336    {
337        match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
338            Ok(extracted) => {
339                extraction_method = Some(extracted.extraction_method.clone());
340                extracted_urls = extracted.urls;
341                graph.entities = extracted.entities;
342                graph.relationships = extracted.relationships;
343                relationships_truncated = extracted.relationships_truncated;
344
345                if graph.entities.len() > max_entities_per_memory() {
346                    graph.entities.truncate(max_entities_per_memory());
347                }
348                if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
349                    relationships_truncated = true;
350                    graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
351                }
352                normalize_and_validate_graph_input(&mut graph)?;
353            }
354            Err(e) => {
355                tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
356            }
357        }
358    }
359
360    let mut conn = open_rw(&paths.db)?;
361    ensure_schema(&mut conn)?;
362
363    {
364        use crate::constants::MAX_NAMESPACES_ACTIVE;
365        let active_count: u32 = conn.query_row(
366            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
367            [],
368            |r| r.get::<_, i64>(0).map(|v| v as u32),
369        )?;
370        let ns_exists: bool = conn.query_row(
371            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
372            rusqlite::params![namespace],
373            |r| r.get::<_, i64>(0).map(|v| v > 0),
374        )?;
375        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
376            return Err(AppError::NamespaceError(format!(
377                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
378            )));
379        }
380    }
381
382    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
383    if existing_memory.is_some() && !args.force_merge {
384        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
385            &normalized_name,
386            &namespace,
387        )));
388    }
389
390    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
391
392    output::emit_progress_i18n(
393        &format!(
394            "Remember stage: validated input; available memory {} MB",
395            crate::memory_guard::available_memory_mb()
396        ),
397        &format!(
398            "Stage remember: input validated; available memory {} MB",
399            crate::memory_guard::available_memory_mb()
400        ),
401    );
402
403    let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
404    let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
405    let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
406    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
407    let chunks_created = chunks_info.len();
408    // For single-chunk bodies the memory row itself stores the content and no
409    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
410    // bodies every chunk is persisted via `insert_chunk_slices`.
411    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
412
413    output::emit_progress_i18n(
414        &format!(
415            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
416            chunks_created,
417            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
418        ),
419        &format!(
420            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
421            chunks_created,
422            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
423        ),
424    );
425
426    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
427        return Err(AppError::LimitExceeded(format!(
428            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
429            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
430        )));
431    }
432
433    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
434    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
435
436    let embedding = if chunks_info.len() == 1 {
437        crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
438    } else {
439        let chunk_texts: Vec<&str> = chunks_info
440            .iter()
441            .map(|c| chunking::chunk_text(&raw_body, c))
442            .collect();
443        output::emit_progress_i18n(
444            &format!(
445                "Embedding {} chunks serially to keep memory bounded...",
446                chunks_info.len()
447            ),
448            &format!(
449                "Embedding {} chunks serially to keep memory bounded...",
450                chunks_info.len()
451            ),
452        );
453        let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
454        for chunk_text in &chunk_texts {
455            chunk_embeddings.push(crate::daemon::embed_passage_or_local(
456                &paths.models,
457                chunk_text,
458            )?);
459        }
460        output::emit_progress_i18n(
461            &format!(
462                "Remember stage: chunk embeddings complete; process RSS {} MB",
463                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
464            ),
465            &format!(
466                "Stage remember: chunk embeddings completed; process RSS {} MB",
467                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
468            ),
469        );
470        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
471        chunk_embeddings_cache = Some(chunk_embeddings);
472        aggregated
473    };
474    let body_for_storage = raw_body;
475
476    let memory_type = args.r#type.as_str();
477    let new_memory = NewMemory {
478        namespace: namespace.clone(),
479        name: normalized_name.clone(),
480        memory_type: memory_type.to_string(),
481        description: args.description.clone(),
482        body: body_for_storage,
483        body_hash: body_hash.clone(),
484        session_id: args.session_id.clone(),
485        source: "agent".to_string(),
486        metadata,
487    };
488
489    let mut warnings = Vec::new();
490    let mut entities_persisted = 0usize;
491    let mut relationships_persisted = 0usize;
492
493    let graph_entity_embeddings = graph
494        .entities
495        .iter()
496        .map(|entity| {
497            let entity_text = match &entity.description {
498                Some(desc) => format!("{} {}", entity.name, desc),
499                None => entity.name.clone(),
500            };
501            crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
502        })
503        .collect::<Result<Vec<_>, _>>()?;
504
505    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
506
507    let (memory_id, action, version) = match existing_memory {
508        Some((existing_id, _updated_at, _current_version)) => {
509            if let Some(hash_id) = duplicate_hash_id {
510                if hash_id != existing_id {
511                    warnings.push(format!(
512                        "identical body already exists as memory id {hash_id}"
513                    ));
514                }
515            }
516
517            storage_chunks::delete_chunks(&tx, existing_id)?;
518
519            let next_v = versions::next_version(&tx, existing_id)?;
520            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
521            versions::insert_version(
522                &tx,
523                existing_id,
524                next_v,
525                &normalized_name,
526                memory_type,
527                &args.description,
528                &new_memory.body,
529                &serde_json::to_string(&new_memory.metadata)?,
530                None,
531                "edit",
532            )?;
533            memories::upsert_vec(
534                &tx,
535                existing_id,
536                &namespace,
537                memory_type,
538                &embedding,
539                &normalized_name,
540                &snippet,
541            )?;
542            (existing_id, "updated".to_string(), next_v)
543        }
544        None => {
545            if let Some(hash_id) = duplicate_hash_id {
546                warnings.push(format!(
547                    "identical body already exists as memory id {hash_id}"
548                ));
549            }
550            let id = memories::insert(&tx, &new_memory)?;
551            versions::insert_version(
552                &tx,
553                id,
554                1,
555                &normalized_name,
556                memory_type,
557                &args.description,
558                &new_memory.body,
559                &serde_json::to_string(&new_memory.metadata)?,
560                None,
561                "create",
562            )?;
563            memories::upsert_vec(
564                &tx,
565                id,
566                &namespace,
567                memory_type,
568                &embedding,
569                &normalized_name,
570                &snippet,
571            )?;
572            (id, "created".to_string(), 1)
573        }
574    };
575
576    if chunks_info.len() > 1 {
577        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
578
579        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
580            AppError::Internal(anyhow::anyhow!(
581                "cache de embeddings de chunks ausente no caminho multi-chunk do remember"
582            ))
583        })?;
584
585        for (i, emb) in chunk_embeddings.iter().enumerate() {
586            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
587        }
588        output::emit_progress_i18n(
589            &format!(
590                "Remember stage: persisted chunk vectors; process RSS {} MB",
591                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
592            ),
593            &format!(
594                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
595                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
596            ),
597        );
598    }
599
600    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
601        for entity in &graph.entities {
602            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
603            let entity_embedding = &graph_entity_embeddings[entities_persisted];
604            entities::upsert_entity_vec(
605                &tx,
606                entity_id,
607                &namespace,
608                entity.entity_type,
609                entity_embedding,
610                &entity.name,
611            )?;
612            entities::link_memory_entity(&tx, memory_id, entity_id)?;
613            entities::increment_degree(&tx, entity_id)?;
614            entities_persisted += 1;
615        }
616        let entity_types: std::collections::HashMap<&str, EntityType> = graph
617            .entities
618            .iter()
619            .map(|entity| (entity.name.as_str(), entity.entity_type))
620            .collect();
621
622        for rel in &graph.relationships {
623            let source_entity = NewEntity {
624                name: rel.source.clone(),
625                entity_type: entity_types
626                    .get(rel.source.as_str())
627                    .copied()
628                    .unwrap_or(EntityType::Concept),
629                description: None,
630            };
631            let target_entity = NewEntity {
632                name: rel.target.clone(),
633                entity_type: entity_types
634                    .get(rel.target.as_str())
635                    .copied()
636                    .unwrap_or(EntityType::Concept),
637                description: None,
638            };
639            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
640            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
641            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
642            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
643            relationships_persisted += 1;
644        }
645    }
646    tx.commit()?;
647
648    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
649    // Failures do not propagate — non-critical path with graceful degradation.
650    let urls_persisted = if !extracted_urls.is_empty() {
651        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
652            .into_iter()
653            .map(|u| storage_urls::MemoryUrl {
654                url: u.url,
655                offset: Some(u.offset as i64),
656            })
657            .collect();
658        storage_urls::insert_urls(&conn, memory_id, &url_entries)
659    } else {
660        0
661    };
662
663    let created_at_epoch = chrono::Utc::now().timestamp();
664    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
665
666    output::emit_json(&RememberResponse {
667        memory_id,
668        // Persist the normalized (kebab-case) slug as `name` since that is the
669        // storage key. The original input is exposed via `original_name` only
670        // when normalization actually changed something (B_4 in v1.0.32).
671        name: normalized_name.clone(),
672        namespace,
673        action: action.clone(),
674        operation: action,
675        version,
676        entities_persisted,
677        relationships_persisted,
678        relationships_truncated,
679        chunks_created,
680        chunks_persisted,
681        urls_persisted,
682        extraction_method,
683        merged_into_memory_id: None,
684        warnings,
685        created_at: created_at_epoch,
686        created_at_iso,
687        elapsed_ms: inicio.elapsed().as_millis() as u64,
688        name_was_normalized,
689        original_name: name_was_normalized.then_some(original_name),
690    })?;
691
692    Ok(())
693}
694
695#[cfg(test)]
696mod tests {
697    use super::compute_chunks_persisted;
698    use crate::output::RememberResponse;
699
700    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
701    #[test]
702    fn chunks_persisted_zero_for_zero_chunks() {
703        assert_eq!(compute_chunks_persisted(0), 0);
704    }
705
706    #[test]
707    fn chunks_persisted_zero_for_single_chunk_body() {
708        // Single-chunk bodies live in the memories row itself; no row is
709        // appended to memory_chunks. This is the documented contract.
710        assert_eq!(compute_chunks_persisted(1), 0);
711    }
712
713    #[test]
714    fn chunks_persisted_equals_count_for_multi_chunk_body() {
715        // Every chunk above the first triggers a row in memory_chunks.
716        assert_eq!(compute_chunks_persisted(2), 2);
717        assert_eq!(compute_chunks_persisted(7), 7);
718        assert_eq!(compute_chunks_persisted(64), 64);
719    }
720
721    #[test]
722    fn remember_response_serializes_required_fields() {
723        let resp = RememberResponse {
724            memory_id: 42,
725            name: "minha-mem".to_string(),
726            namespace: "global".to_string(),
727            action: "created".to_string(),
728            operation: "created".to_string(),
729            version: 1,
730            entities_persisted: 0,
731            relationships_persisted: 0,
732            relationships_truncated: false,
733            chunks_created: 1,
734            chunks_persisted: 0,
735            urls_persisted: 0,
736            extraction_method: None,
737            merged_into_memory_id: None,
738            warnings: vec![],
739            created_at: 1_705_320_000,
740            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
741            elapsed_ms: 55,
742            name_was_normalized: false,
743            original_name: None,
744        };
745
746        let json = serde_json::to_value(&resp).expect("serialization failed");
747        assert_eq!(json["memory_id"], 42);
748        assert_eq!(json["action"], "created");
749        assert_eq!(json["operation"], "created");
750        assert_eq!(json["version"], 1);
751        assert_eq!(json["elapsed_ms"], 55u64);
752        assert!(json["warnings"].is_array());
753        assert!(json["merged_into_memory_id"].is_null());
754    }
755
756    #[test]
757    fn remember_response_action_e_operation_sao_aliases() {
758        let resp = RememberResponse {
759            memory_id: 1,
760            name: "mem".to_string(),
761            namespace: "global".to_string(),
762            action: "updated".to_string(),
763            operation: "updated".to_string(),
764            version: 2,
765            entities_persisted: 3,
766            relationships_persisted: 1,
767            relationships_truncated: false,
768            extraction_method: None,
769            chunks_created: 2,
770            chunks_persisted: 2,
771            urls_persisted: 0,
772            merged_into_memory_id: None,
773            warnings: vec![],
774            created_at: 0,
775            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
776            elapsed_ms: 0,
777            name_was_normalized: false,
778            original_name: None,
779        };
780
781        let json = serde_json::to_value(&resp).expect("serialization failed");
782        assert_eq!(
783            json["action"], json["operation"],
784            "action e operation devem ser iguais"
785        );
786        assert_eq!(json["entities_persisted"], 3);
787        assert_eq!(json["relationships_persisted"], 1);
788        assert_eq!(json["chunks_created"], 2);
789    }
790
791    #[test]
792    fn remember_response_warnings_lista_mensagens() {
793        let resp = RememberResponse {
794            memory_id: 5,
795            name: "dup-mem".to_string(),
796            namespace: "global".to_string(),
797            action: "created".to_string(),
798            operation: "created".to_string(),
799            version: 1,
800            entities_persisted: 0,
801            extraction_method: None,
802            relationships_persisted: 0,
803            relationships_truncated: false,
804            chunks_created: 1,
805            chunks_persisted: 0,
806            urls_persisted: 0,
807            merged_into_memory_id: None,
808            warnings: vec!["identical body already exists as memory id 3".to_string()],
809            created_at: 0,
810            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
811            elapsed_ms: 10,
812            name_was_normalized: false,
813            original_name: None,
814        };
815
816        let json = serde_json::to_value(&resp).expect("serialization failed");
817        let warnings = json["warnings"]
818            .as_array()
819            .expect("warnings deve ser array");
820        assert_eq!(warnings.len(), 1);
821        assert!(warnings[0].as_str().unwrap().contains("identical body"));
822    }
823
824    #[test]
825    fn invalid_name_reserved_prefix_returns_validation_error() {
826        use crate::errors::AppError;
827        // Validates the rejection logic for names with the "__" prefix directly
828        let nome = "__reservado";
829        let resultado: Result<(), AppError> = if nome.starts_with("__") {
830            Err(AppError::Validation(
831                crate::i18n::validation::reserved_name(),
832            ))
833        } else {
834            Ok(())
835        };
836        assert!(resultado.is_err());
837        if let Err(AppError::Validation(msg)) = resultado {
838            assert!(!msg.is_empty());
839        }
840    }
841
842    #[test]
843    fn name_too_long_returns_validation_error() {
844        use crate::errors::AppError;
845        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
846        let resultado: Result<(), AppError> =
847            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
848                Err(AppError::Validation(crate::i18n::validation::name_length(
849                    crate::constants::MAX_MEMORY_NAME_LEN,
850                )))
851            } else {
852                Ok(())
853            };
854        assert!(resultado.is_err());
855    }
856
857    #[test]
858    fn remember_response_merged_into_memory_id_some_serializes_integer() {
859        let resp = RememberResponse {
860            memory_id: 10,
861            name: "mem-mergeada".to_string(),
862            namespace: "global".to_string(),
863            action: "updated".to_string(),
864            operation: "updated".to_string(),
865            version: 3,
866            extraction_method: None,
867            entities_persisted: 0,
868            relationships_persisted: 0,
869            relationships_truncated: false,
870            chunks_created: 1,
871            chunks_persisted: 0,
872            urls_persisted: 0,
873            merged_into_memory_id: Some(7),
874            warnings: vec![],
875            created_at: 0,
876            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
877            elapsed_ms: 0,
878            name_was_normalized: false,
879            original_name: None,
880        };
881
882        let json = serde_json::to_value(&resp).expect("serialization failed");
883        assert_eq!(json["merged_into_memory_id"], 7);
884    }
885
886    #[test]
887    fn remember_response_urls_persisted_serializes_field() {
888        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
889        let resp = RememberResponse {
890            memory_id: 3,
891            name: "mem-com-urls".to_string(),
892            namespace: "global".to_string(),
893            action: "created".to_string(),
894            operation: "created".to_string(),
895            version: 1,
896            entities_persisted: 0,
897            relationships_persisted: 0,
898            relationships_truncated: false,
899            chunks_created: 1,
900            chunks_persisted: 0,
901            urls_persisted: 3,
902            extraction_method: Some("regex-only".to_string()),
903            merged_into_memory_id: None,
904            warnings: vec![],
905            created_at: 0,
906            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
907            elapsed_ms: 0,
908            name_was_normalized: false,
909            original_name: None,
910        };
911        let json = serde_json::to_value(&resp).expect("serialization failed");
912        assert_eq!(json["urls_persisted"], 3);
913    }
914
915    #[test]
916    fn empty_name_after_normalization_returns_specific_message() {
917        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
918        // must produce a distinct error message, not the "too long" message.
919        use crate::errors::AppError;
920        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
921        let normalized = normalized.trim_matches('-').to_string();
922        let resultado: Result<(), AppError> = if normalized.is_empty() {
923            Err(AppError::Validation(
924                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
925            ))
926        } else {
927            Ok(())
928        };
929        assert!(resultado.is_err());
930        if let Err(AppError::Validation(msg)) = resultado {
931            assert!(
932                msg.contains("empty after normalization"),
933                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
934            );
935        }
936    }
937
938    #[test]
939    fn name_only_underscores_after_normalization_returns_specific_message() {
940        // P0-4 regression: name consisting only of underscores normalizes to empty string.
941        use crate::errors::AppError;
942        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
943        let normalized = normalized.trim_matches('-').to_string();
944        assert!(
945            normalized.is_empty(),
946            "underscores devem normalizar para string vazia"
947        );
948        let resultado: Result<(), AppError> = if normalized.is_empty() {
949            Err(AppError::Validation(
950                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
951            ))
952        } else {
953            Ok(())
954        };
955        assert!(resultado.is_err());
956        if let Err(AppError::Validation(msg)) = resultado {
957            assert!(
958                msg.contains("empty after normalization"),
959                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
960            );
961        }
962    }
963
964    #[test]
965    fn remember_response_relationships_truncated_serializes_field() {
966        // P1-D: garante que relationships_truncated aparece no JSON como bool.
967        let resp_false = RememberResponse {
968            memory_id: 1,
969            name: "test".to_string(),
970            namespace: "global".to_string(),
971            action: "created".to_string(),
972            operation: "created".to_string(),
973            version: 1,
974            entities_persisted: 2,
975            relationships_persisted: 1,
976            relationships_truncated: false,
977            chunks_created: 1,
978            chunks_persisted: 0,
979            urls_persisted: 0,
980            extraction_method: None,
981            merged_into_memory_id: None,
982            warnings: vec![],
983            created_at: 0,
984            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
985            elapsed_ms: 0,
986            name_was_normalized: false,
987            original_name: None,
988        };
989        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
990        assert_eq!(json_false["relationships_truncated"], false);
991
992        let resp_true = RememberResponse {
993            relationships_truncated: true,
994            ..resp_false
995        };
996        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
997        assert_eq!(json_true["relationships_truncated"], true);
998    }
999}