Skip to main content

sqlite_graphrag/commands/
remember.rs

1//! Handler for the `remember` CLI subcommand.
2
3use crate::chunking;
4use crate::cli::MemoryType;
5use crate::entity_type::EntityType;
6use crate::errors::AppError;
7use crate::i18n::errors_msg;
8use crate::output::{self, JsonOutputFormat, RememberResponse};
9use crate::paths::AppPaths;
10use crate::storage::chunks as storage_chunks;
11use crate::storage::connection::{ensure_schema, open_rw};
12use crate::storage::entities::{NewEntity, NewRelationship};
13use crate::storage::memories::NewMemory;
14use crate::storage::{entities, memories, urls as storage_urls, versions};
15use serde::Deserialize;
16
17/// Returns the number of rows that will be written to `memory_chunks` for the
18/// given chunk count. Single-chunk bodies are stored directly in the
19/// `memories` row, so no chunk row is appended (returns `0`). Multi-chunk
20/// bodies persist every chunk and the count equals `chunks_created`.
21///
22/// Centralized as a function so the H-M8 invariant is unit-testable without
23/// running the full handler. The schema for `chunks_persisted` documents this
24/// contract explicitly (see `docs/schemas/remember.schema.json`).
25fn compute_chunks_persisted(chunks_created: usize) -> usize {
26    if chunks_created > 1 {
27        chunks_created
28    } else {
29        0
30    }
31}
32
33#[derive(clap::Args)]
34#[command(after_long_help = "EXAMPLES:\n  \
35    # Create a memory with inline body\n  \
36    sqlite-graphrag remember --name design-auth --type decision \\\n    \
37    --description \"auth design\" --body \"JWT for stateless auth\"\n\n  \
38    # Create with curated graph via --graph-stdin\n  \
39    echo '{\"body\":\"...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
40    sqlite-graphrag remember --name my-mem --type note --description \"desc\" --graph-stdin\n\n  \
41    # Enable GLiNER NER extraction with --graph-stdin\n  \
42    echo '{\"body\":\"Alice from Microsoft...\",\"entities\":[],\"relationships\":[]}' | \\\n    \
43    sqlite-graphrag remember --name ner-test --type note --description \"test\" \\\n    \
44    --graph-stdin --enable-ner --gliner-variant int8\n\n  \
45    # Idempotent upsert with --force-merge\n  \
46    sqlite-graphrag remember --name my-mem --type note --description \"updated\" \\\n    \
47    --body \"new content\" --force-merge")]
48pub struct RememberArgs {
49    /// Memory name in kebab-case (lowercase letters, digits, hyphens).
50    /// Acts as unique key within the namespace; collisions trigger merge or rejection.
51    #[arg(long)]
52    pub name: String,
53    #[arg(
54        long,
55        value_enum,
56        long_help = "Memory kind stored in `memories.type`. This is NOT the graph `entity_type` used in `--entities-file`. Valid values: user, feedback, project, reference, decision, incident, skill, document, note."
57    )]
58    pub r#type: MemoryType,
59    /// Short description (≤500 chars) summarizing the memory for use in `list` and `recall` snippets.
60    #[arg(long)]
61    pub description: String,
62    /// Inline body content. Mutually exclusive with --body-file, --body-stdin, --graph-stdin.
63    /// Maximum 512000 bytes; rejected if empty without an external graph.
64    #[arg(
65        long,
66        help = "Inline body content (max 500 KB / 512000 bytes; for larger inputs split into multiple memories or use --body-file)",
67        conflicts_with_all = ["body_file", "body_stdin", "graph_stdin"]
68    )]
69    pub body: Option<String>,
70    #[arg(
71        long,
72        help = "Read body from a file instead of --body",
73        conflicts_with_all = ["body", "body_stdin", "graph_stdin"]
74    )]
75    pub body_file: Option<std::path::PathBuf>,
76    /// Read body from stdin until EOF. Useful in pipes (echo "..." | sqlite-graphrag remember ...).
77    /// Mutually exclusive with --body, --body-file, --graph-stdin.
78    #[arg(
79        long,
80        conflicts_with_all = ["body", "body_file", "graph_stdin"]
81    )]
82    pub body_stdin: bool,
83    #[arg(
84        long,
85        help = "JSON file containing entities to associate with this memory"
86    )]
87    pub entities_file: Option<std::path::PathBuf>,
88    #[arg(
89        long,
90        help = "JSON file containing relationships to associate with this memory"
91    )]
92    pub relationships_file: Option<std::path::PathBuf>,
93    #[arg(
94        long,
95        help = "Read graph JSON (body + entities + relationships) from stdin",
96        conflicts_with_all = [
97            "body",
98            "body_file",
99            "body_stdin",
100            "entities_file",
101            "relationships_file"
102        ]
103    )]
104    pub graph_stdin: bool,
105    #[arg(long, default_value = "global")]
106    pub namespace: Option<String>,
107    /// Inline JSON object with arbitrary metadata key-value pairs. Mutually exclusive with --metadata-file.
108    #[arg(long)]
109    pub metadata: Option<String>,
110    #[arg(long, help = "JSON file containing metadata key-value pairs")]
111    pub metadata_file: Option<std::path::PathBuf>,
112    #[arg(long)]
113    pub force_merge: bool,
114    #[arg(
115        long,
116        value_name = "EPOCH_OR_RFC3339",
117        value_parser = crate::parsers::parse_expected_updated_at,
118        long_help = "Optimistic lock: reject if updated_at does not match. \
119Accepts Unix epoch (e.g. 1700000000) or RFC 3339 (e.g. 2026-04-19T12:00:00Z)."
120    )]
121    pub expected_updated_at: Option<i64>,
122    #[arg(
123        long,
124        env = "SQLITE_GRAPHRAG_ENABLE_NER",
125        value_parser = crate::parsers::parse_bool_flexible,
126        action = clap::ArgAction::Set,
127        num_args = 0..=1,
128        default_missing_value = "true",
129        default_value = "false",
130        help = "Enable automatic GLiNER NER entity/relationship extraction from body"
131    )]
132    pub enable_ner: bool,
133    #[arg(
134        long,
135        env = "SQLITE_GRAPHRAG_GLINER_VARIANT",
136        default_value = "fp32",
137        help = "GLiNER model variant: fp32 (best quality, 1.1GB), fp16 (580MB), int8 (349MB, fastest)"
138    )]
139    pub gliner_variant: String,
140    #[arg(long, hide = true)]
141    pub skip_extraction: bool,
142    /// Optional opaque session identifier for tracing memory provenance across multi-agent runs.
143    #[arg(long)]
144    pub session_id: Option<String>,
145    #[arg(long, value_enum, default_value_t = JsonOutputFormat::Json)]
146    pub format: JsonOutputFormat,
147    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
148    pub json: bool,
149    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
150    pub db: Option<String>,
151}
152
153#[derive(Deserialize, Default)]
154#[serde(deny_unknown_fields)]
155struct GraphInput {
156    #[serde(default)]
157    body: Option<String>,
158    #[serde(default)]
159    entities: Vec<NewEntity>,
160    #[serde(default)]
161    relationships: Vec<NewRelationship>,
162}
163
164fn normalize_and_validate_graph_input(graph: &mut GraphInput) -> Result<(), AppError> {
165    for rel in &mut graph.relationships {
166        rel.relation = rel.relation.replace('-', "_");
167        if !is_valid_relation(&rel.relation) {
168            return Err(AppError::Validation(format!(
169                "invalid relation '{}' for relationship '{}' -> '{}'",
170                rel.relation, rel.source, rel.target
171            )));
172        }
173        if !(0.0..=1.0).contains(&rel.strength) {
174            return Err(AppError::Validation(format!(
175                "invalid strength {} for relationship '{}' -> '{}'; expected value in [0.0, 1.0]",
176                rel.strength, rel.source, rel.target
177            )));
178        }
179    }
180
181    Ok(())
182}
183
184fn is_valid_relation(relation: &str) -> bool {
185    matches!(
186        relation,
187        "applies_to"
188            | "uses"
189            | "depends_on"
190            | "causes"
191            | "fixes"
192            | "contradicts"
193            | "supports"
194            | "follows"
195            | "related"
196            | "mentions"
197            | "replaces"
198            | "tracked_in"
199    )
200}
201
202pub fn run(args: RememberArgs) -> Result<(), AppError> {
203    use crate::constants::*;
204
205    let inicio = std::time::Instant::now();
206    let _ = args.format;
207    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
208
209    // Capture the original `--name` before normalization so the JSON response can
210    // surface `name_was_normalized` + `original_name` (B_4 in v1.0.32). Stored as
211    // an owned String because `args.name` is moved into the response below.
212    let original_name = args.name.clone();
213
214    // Auto-normalize to kebab-case before validation (P2-H).
215    // v1.0.20: also trims hyphens at the boundary (including trailing) to avoid rejection
216    // after truncation by a long filename ending in a hyphen.
217    let normalized_name = {
218        let lower = args.name.to_lowercase().replace(['_', ' '], "-");
219        let trimmed = lower.trim_matches('-').to_string();
220        if trimmed != args.name {
221            tracing::warn!(
222                original = %args.name,
223                normalized = %trimmed,
224                "name auto-normalized to kebab-case"
225            );
226        }
227        trimmed
228    };
229    let name_was_normalized = normalized_name != original_name;
230
231    if normalized_name.is_empty() {
232        return Err(AppError::Validation(
233            "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
234        ));
235    }
236    if normalized_name.len() > MAX_MEMORY_NAME_LEN {
237        return Err(AppError::LimitExceeded(
238            crate::i18n::validation::name_length(MAX_MEMORY_NAME_LEN),
239        ));
240    }
241
242    if normalized_name.starts_with("__") {
243        return Err(AppError::Validation(
244            crate::i18n::validation::reserved_name(),
245        ));
246    }
247
248    {
249        let slug_re = regex::Regex::new(crate::constants::NAME_SLUG_REGEX)
250            .map_err(|e| AppError::Internal(anyhow::anyhow!("regex: {e}")))?;
251        if !slug_re.is_match(&normalized_name) {
252            return Err(AppError::Validation(crate::i18n::validation::name_kebab(
253                &normalized_name,
254            )));
255        }
256    }
257
258    if args.description.len() > MAX_MEMORY_DESCRIPTION_LEN {
259        return Err(AppError::Validation(
260            crate::i18n::validation::description_exceeds(MAX_MEMORY_DESCRIPTION_LEN),
261        ));
262    }
263
264    let mut raw_body = if let Some(b) = args.body {
265        b
266    } else if let Some(path) = args.body_file {
267        std::fs::read_to_string(&path).map_err(AppError::Io)?
268    } else if args.body_stdin || args.graph_stdin {
269        crate::stdin_helper::read_stdin_with_timeout(60)?
270    } else {
271        String::new()
272    };
273
274    let entities_provided_externally =
275        args.entities_file.is_some() || args.relationships_file.is_some() || args.graph_stdin;
276
277    let mut graph = GraphInput::default();
278    if let Some(path) = args.entities_file {
279        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
280        graph.entities = serde_json::from_str(&content)?;
281    }
282    if let Some(path) = args.relationships_file {
283        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
284        graph.relationships = serde_json::from_str(&content)?;
285    }
286    if args.graph_stdin {
287        graph = serde_json::from_str::<GraphInput>(&raw_body).map_err(|e| {
288            AppError::Validation(format!("invalid JSON payload on --graph-stdin: {e}"))
289        })?;
290        raw_body = graph.body.take().unwrap_or_default();
291    }
292
293    if graph.entities.len() > max_entities_per_memory() {
294        return Err(AppError::LimitExceeded(errors_msg::entity_limit_exceeded(
295            max_entities_per_memory(),
296        )));
297    }
298    if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
299        return Err(AppError::LimitExceeded(
300            errors_msg::relationship_limit_exceeded(MAX_RELATIONSHIPS_PER_MEMORY),
301        ));
302    }
303    normalize_and_validate_graph_input(&mut graph)?;
304
305    if raw_body.len() > MAX_MEMORY_BODY_LEN {
306        return Err(AppError::LimitExceeded(
307            crate::i18n::validation::body_exceeds(MAX_MEMORY_BODY_LEN),
308        ));
309    }
310
311    // v1.0.22 P1: reject empty or whitespace-only body when no external graph is provided.
312    // Without this check, empty embeddings would be persisted, breaking recall semantics.
313    if !entities_provided_externally && graph.entities.is_empty() && raw_body.trim().is_empty() {
314        return Err(AppError::Validation(crate::i18n::validation::empty_body()));
315    }
316
317    let metadata: serde_json::Value = if let Some(m) = args.metadata {
318        serde_json::from_str(&m)?
319    } else if let Some(path) = args.metadata_file {
320        let content = std::fs::read_to_string(&path).map_err(AppError::Io)?;
321        serde_json::from_str(&content)?
322    } else {
323        serde_json::json!({})
324    };
325
326    let body_hash = blake3::hash(raw_body.as_bytes()).to_hex().to_string();
327    let snippet: String = raw_body.chars().take(200).collect();
328
329    let paths = AppPaths::resolve(args.db.as_deref())?;
330    paths.ensure_dirs()?;
331
332    // v1.0.20: use .trim().is_empty() to reject bodies that are only whitespace.
333    let mut extraction_method: Option<String> = None;
334    let mut extracted_urls: Vec<crate::extraction::ExtractedUrl> = Vec::new();
335    let mut relationships_truncated = false;
336    if args.enable_ner && args.skip_extraction {
337        tracing::warn!(
338            "--enable-ner and --skip-extraction are contradictory; --enable-ner takes precedence"
339        );
340    }
341    if args.skip_extraction && !args.enable_ner {
342        tracing::warn!("--skip-extraction is deprecated and has no effect (NER is disabled by default since v1.0.45); remove this flag");
343    }
344    let gliner_variant: crate::extraction::GlinerVariant =
345        args.gliner_variant.parse().unwrap_or_else(|e| {
346            tracing::warn!("invalid --gliner-variant: {e}; using fp32");
347            crate::extraction::GlinerVariant::Fp32
348        });
349    if args.enable_ner && graph.entities.is_empty() && !raw_body.trim().is_empty() {
350        match crate::extraction::extract_graph_auto(&raw_body, &paths, gliner_variant) {
351            Ok(extracted) => {
352                extraction_method = Some(extracted.extraction_method.clone());
353                extracted_urls = extracted.urls;
354                graph.entities = extracted.entities;
355                graph.relationships = extracted.relationships;
356                relationships_truncated = extracted.relationships_truncated;
357
358                if graph.entities.len() > max_entities_per_memory() {
359                    graph.entities.truncate(max_entities_per_memory());
360                }
361                if graph.relationships.len() > MAX_RELATIONSHIPS_PER_MEMORY {
362                    relationships_truncated = true;
363                    graph.relationships.truncate(MAX_RELATIONSHIPS_PER_MEMORY);
364                }
365                normalize_and_validate_graph_input(&mut graph)?;
366            }
367            Err(e) => {
368                tracing::warn!("auto-extraction failed (graceful degradation): {e:#}");
369                extraction_method = Some("none:extraction-failed".to_string());
370            }
371        }
372    }
373
374    let mut conn = open_rw(&paths.db)?;
375    ensure_schema(&mut conn)?;
376
377    {
378        use crate::constants::MAX_NAMESPACES_ACTIVE;
379        let active_count: u32 = conn.query_row(
380            "SELECT COUNT(DISTINCT namespace) FROM memories WHERE deleted_at IS NULL",
381            [],
382            |r| r.get::<_, i64>(0).map(|v| v as u32),
383        )?;
384        let ns_exists: bool = conn.query_row(
385            "SELECT EXISTS(SELECT 1 FROM memories WHERE namespace = ?1 AND deleted_at IS NULL)",
386            rusqlite::params![namespace],
387            |r| r.get::<_, i64>(0).map(|v| v > 0),
388        )?;
389        if !ns_exists && active_count >= MAX_NAMESPACES_ACTIVE {
390            return Err(AppError::NamespaceError(format!(
391                "active namespace limit of {MAX_NAMESPACES_ACTIVE} reached while trying to create '{namespace}'"
392            )));
393        }
394    }
395
396    let existing_memory = memories::find_by_name(&conn, &namespace, &normalized_name)?;
397    if existing_memory.is_some() && !args.force_merge {
398        return Err(AppError::Duplicate(errors_msg::duplicate_memory(
399            &normalized_name,
400            &namespace,
401        )));
402    }
403
404    let duplicate_hash_id = memories::find_by_hash(&conn, &namespace, &body_hash)?;
405
406    output::emit_progress_i18n(
407        &format!(
408            "Remember stage: validated input; available memory {} MB",
409            crate::memory_guard::available_memory_mb()
410        ),
411        &format!(
412            "Stage remember: input validated; available memory {} MB",
413            crate::memory_guard::available_memory_mb()
414        ),
415    );
416
417    let tokenizer = crate::tokenizer::get_tokenizer(&paths.models)?;
418    let model_max_length = crate::tokenizer::get_model_max_length(&paths.models)?;
419    let total_passage_tokens = crate::tokenizer::count_passage_tokens(tokenizer, &raw_body)?;
420    let chunks_info = chunking::split_into_chunks_hierarchical(&raw_body, tokenizer);
421    let chunks_created = chunks_info.len();
422    // For single-chunk bodies the memory row itself stores the content and no
423    // entry is appended to `memory_chunks` (see line ~545). For multi-chunk
424    // bodies every chunk is persisted via `insert_chunk_slices`.
425    let chunks_persisted = compute_chunks_persisted(chunks_info.len());
426
427    output::emit_progress_i18n(
428        &format!(
429            "Remember stage: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
430            chunks_created,
431            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
432        ),
433        &format!(
434            "Stage remember: tokenizer counted {total_passage_tokens} passage tokens (model max {model_max_length}); chunking produced {} chunks; process RSS {} MB",
435            chunks_created,
436            crate::memory_guard::current_process_memory_mb().unwrap_or(0)
437        ),
438    );
439
440    if chunks_created > crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS {
441        return Err(AppError::LimitExceeded(format!(
442            "document produces {chunks_created} chunks; current safe operational limit is {} chunks; split the document before using remember",
443            crate::constants::REMEMBER_MAX_SAFE_MULTI_CHUNKS
444        )));
445    }
446
447    output::emit_progress_i18n("Computing embedding...", "Calculando embedding...");
448    let mut chunk_embeddings_cache: Option<Vec<Vec<f32>>> = None;
449
450    let embedding = if chunks_info.len() == 1 {
451        crate::daemon::embed_passage_or_local(&paths.models, &raw_body)?
452    } else {
453        let chunk_texts: Vec<&str> = chunks_info
454            .iter()
455            .map(|c| chunking::chunk_text(&raw_body, c))
456            .collect();
457        output::emit_progress_i18n(
458            &format!(
459                "Embedding {} chunks serially to keep memory bounded...",
460                chunks_info.len()
461            ),
462            &format!(
463                "Embedding {} chunks serially to keep memory bounded...",
464                chunks_info.len()
465            ),
466        );
467        let mut chunk_embeddings = Vec::with_capacity(chunk_texts.len());
468        for chunk_text in &chunk_texts {
469            chunk_embeddings.push(crate::daemon::embed_passage_or_local(
470                &paths.models,
471                chunk_text,
472            )?);
473        }
474        output::emit_progress_i18n(
475            &format!(
476                "Remember stage: chunk embeddings complete; process RSS {} MB",
477                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
478            ),
479            &format!(
480                "Stage remember: chunk embeddings completed; process RSS {} MB",
481                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
482            ),
483        );
484        let aggregated = chunking::aggregate_embeddings(&chunk_embeddings);
485        chunk_embeddings_cache = Some(chunk_embeddings);
486        aggregated
487    };
488    let body_for_storage = raw_body;
489
490    let memory_type = args.r#type.as_str();
491    let new_memory = NewMemory {
492        namespace: namespace.clone(),
493        name: normalized_name.clone(),
494        memory_type: memory_type.to_string(),
495        description: args.description.clone(),
496        body: body_for_storage,
497        body_hash: body_hash.clone(),
498        session_id: args.session_id.clone(),
499        source: "agent".to_string(),
500        metadata,
501    };
502
503    let mut warnings = Vec::new();
504    let mut entities_persisted = 0usize;
505    let mut relationships_persisted = 0usize;
506
507    let graph_entity_embeddings = graph
508        .entities
509        .iter()
510        .map(|entity| {
511            let entity_text = match &entity.description {
512                Some(desc) => format!("{} {}", entity.name, desc),
513                None => entity.name.clone(),
514            };
515            crate::daemon::embed_passage_or_local(&paths.models, &entity_text)
516        })
517        .collect::<Result<Vec<_>, _>>()?;
518
519    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
520
521    let (memory_id, action, version) = match existing_memory {
522        Some((existing_id, _updated_at, _current_version)) => {
523            if let Some(hash_id) = duplicate_hash_id {
524                if hash_id != existing_id {
525                    warnings.push(format!(
526                        "identical body already exists as memory id {hash_id}"
527                    ));
528                }
529            }
530
531            storage_chunks::delete_chunks(&tx, existing_id)?;
532
533            let next_v = versions::next_version(&tx, existing_id)?;
534            memories::update(&tx, existing_id, &new_memory, args.expected_updated_at)?;
535            versions::insert_version(
536                &tx,
537                existing_id,
538                next_v,
539                &normalized_name,
540                memory_type,
541                &args.description,
542                &new_memory.body,
543                &serde_json::to_string(&new_memory.metadata)?,
544                None,
545                "edit",
546            )?;
547            memories::upsert_vec(
548                &tx,
549                existing_id,
550                &namespace,
551                memory_type,
552                &embedding,
553                &normalized_name,
554                &snippet,
555            )?;
556            (existing_id, "updated".to_string(), next_v)
557        }
558        None => {
559            if let Some(hash_id) = duplicate_hash_id {
560                warnings.push(format!(
561                    "identical body already exists as memory id {hash_id}"
562                ));
563            }
564            let id = memories::insert(&tx, &new_memory)?;
565            versions::insert_version(
566                &tx,
567                id,
568                1,
569                &normalized_name,
570                memory_type,
571                &args.description,
572                &new_memory.body,
573                &serde_json::to_string(&new_memory.metadata)?,
574                None,
575                "create",
576            )?;
577            memories::upsert_vec(
578                &tx,
579                id,
580                &namespace,
581                memory_type,
582                &embedding,
583                &normalized_name,
584                &snippet,
585            )?;
586            (id, "created".to_string(), 1)
587        }
588    };
589
590    if chunks_info.len() > 1 {
591        storage_chunks::insert_chunk_slices(&tx, memory_id, &new_memory.body, &chunks_info)?;
592
593        let chunk_embeddings = chunk_embeddings_cache.take().ok_or_else(|| {
594            AppError::Internal(anyhow::anyhow!(
595                "cache de embeddings de chunks ausente no caminho multi-chunk do remember"
596            ))
597        })?;
598
599        for (i, emb) in chunk_embeddings.iter().enumerate() {
600            storage_chunks::upsert_chunk_vec(&tx, i as i64, memory_id, i as i32, emb)?;
601        }
602        output::emit_progress_i18n(
603            &format!(
604                "Remember stage: persisted chunk vectors; process RSS {} MB",
605                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
606            ),
607            &format!(
608                "Etapa remember: vetores de chunks persistidos; RSS do processo {} MB",
609                crate::memory_guard::current_process_memory_mb().unwrap_or(0)
610            ),
611        );
612    }
613
614    if !graph.entities.is_empty() || !graph.relationships.is_empty() {
615        for entity in &graph.entities {
616            let entity_id = entities::upsert_entity(&tx, &namespace, entity)?;
617            let entity_embedding = &graph_entity_embeddings[entities_persisted];
618            entities::upsert_entity_vec(
619                &tx,
620                entity_id,
621                &namespace,
622                entity.entity_type,
623                entity_embedding,
624                &entity.name,
625            )?;
626            entities::link_memory_entity(&tx, memory_id, entity_id)?;
627            entities::increment_degree(&tx, entity_id)?;
628            entities_persisted += 1;
629        }
630        let entity_types: std::collections::HashMap<&str, EntityType> = graph
631            .entities
632            .iter()
633            .map(|entity| (entity.name.as_str(), entity.entity_type))
634            .collect();
635
636        for rel in &graph.relationships {
637            let source_entity = NewEntity {
638                name: rel.source.clone(),
639                entity_type: entity_types
640                    .get(rel.source.as_str())
641                    .copied()
642                    .unwrap_or(EntityType::Concept),
643                description: None,
644            };
645            let target_entity = NewEntity {
646                name: rel.target.clone(),
647                entity_type: entity_types
648                    .get(rel.target.as_str())
649                    .copied()
650                    .unwrap_or(EntityType::Concept),
651                description: None,
652            };
653            let source_id = entities::upsert_entity(&tx, &namespace, &source_entity)?;
654            let target_id = entities::upsert_entity(&tx, &namespace, &target_entity)?;
655            let rel_id = entities::upsert_relationship(&tx, &namespace, source_id, target_id, rel)?;
656            entities::link_memory_relationship(&tx, memory_id, rel_id)?;
657            relationships_persisted += 1;
658        }
659    }
660    tx.commit()?;
661
662    // v1.0.24 P0-2: persist URLs in a dedicated table, outside the main transaction.
663    // Failures do not propagate — non-critical path with graceful degradation.
664    let urls_persisted = if !extracted_urls.is_empty() {
665        let url_entries: Vec<storage_urls::MemoryUrl> = extracted_urls
666            .into_iter()
667            .map(|u| storage_urls::MemoryUrl {
668                url: u.url,
669                offset: Some(u.offset as i64),
670            })
671            .collect();
672        storage_urls::insert_urls(&conn, memory_id, &url_entries)
673    } else {
674        0
675    };
676
677    let created_at_epoch = chrono::Utc::now().timestamp();
678    let created_at_iso = crate::tz::format_iso(chrono::Utc::now());
679
680    output::emit_json(&RememberResponse {
681        memory_id,
682        // Persist the normalized (kebab-case) slug as `name` since that is the
683        // storage key. The original input is exposed via `original_name` only
684        // when normalization actually changed something (B_4 in v1.0.32).
685        name: normalized_name.clone(),
686        namespace,
687        action: action.clone(),
688        operation: action,
689        version,
690        entities_persisted,
691        relationships_persisted,
692        relationships_truncated,
693        chunks_created,
694        chunks_persisted,
695        urls_persisted,
696        extraction_method,
697        merged_into_memory_id: None,
698        warnings,
699        created_at: created_at_epoch,
700        created_at_iso,
701        elapsed_ms: inicio.elapsed().as_millis() as u64,
702        name_was_normalized,
703        original_name: name_was_normalized.then_some(original_name),
704    })?;
705
706    Ok(())
707}
708
709#[cfg(test)]
710mod tests {
711    use super::compute_chunks_persisted;
712    use crate::output::RememberResponse;
713
714    // Bug H-M8: chunks_persisted contract is unit-testable and matches schema.
715    #[test]
716    fn chunks_persisted_zero_for_zero_chunks() {
717        assert_eq!(compute_chunks_persisted(0), 0);
718    }
719
720    #[test]
721    fn chunks_persisted_zero_for_single_chunk_body() {
722        // Single-chunk bodies live in the memories row itself; no row is
723        // appended to memory_chunks. This is the documented contract.
724        assert_eq!(compute_chunks_persisted(1), 0);
725    }
726
727    #[test]
728    fn chunks_persisted_equals_count_for_multi_chunk_body() {
729        // Every chunk above the first triggers a row in memory_chunks.
730        assert_eq!(compute_chunks_persisted(2), 2);
731        assert_eq!(compute_chunks_persisted(7), 7);
732        assert_eq!(compute_chunks_persisted(64), 64);
733    }
734
735    #[test]
736    fn remember_response_serializes_required_fields() {
737        let resp = RememberResponse {
738            memory_id: 42,
739            name: "minha-mem".to_string(),
740            namespace: "global".to_string(),
741            action: "created".to_string(),
742            operation: "created".to_string(),
743            version: 1,
744            entities_persisted: 0,
745            relationships_persisted: 0,
746            relationships_truncated: false,
747            chunks_created: 1,
748            chunks_persisted: 0,
749            urls_persisted: 0,
750            extraction_method: None,
751            merged_into_memory_id: None,
752            warnings: vec![],
753            created_at: 1_705_320_000,
754            created_at_iso: "2024-01-15T12:00:00Z".to_string(),
755            elapsed_ms: 55,
756            name_was_normalized: false,
757            original_name: None,
758        };
759
760        let json = serde_json::to_value(&resp).expect("serialization failed");
761        assert_eq!(json["memory_id"], 42);
762        assert_eq!(json["action"], "created");
763        assert_eq!(json["operation"], "created");
764        assert_eq!(json["version"], 1);
765        assert_eq!(json["elapsed_ms"], 55u64);
766        assert!(json["warnings"].is_array());
767        assert!(json["merged_into_memory_id"].is_null());
768    }
769
770    #[test]
771    fn remember_response_action_e_operation_sao_aliases() {
772        let resp = RememberResponse {
773            memory_id: 1,
774            name: "mem".to_string(),
775            namespace: "global".to_string(),
776            action: "updated".to_string(),
777            operation: "updated".to_string(),
778            version: 2,
779            entities_persisted: 3,
780            relationships_persisted: 1,
781            relationships_truncated: false,
782            extraction_method: None,
783            chunks_created: 2,
784            chunks_persisted: 2,
785            urls_persisted: 0,
786            merged_into_memory_id: None,
787            warnings: vec![],
788            created_at: 0,
789            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
790            elapsed_ms: 0,
791            name_was_normalized: false,
792            original_name: None,
793        };
794
795        let json = serde_json::to_value(&resp).expect("serialization failed");
796        assert_eq!(
797            json["action"], json["operation"],
798            "action e operation devem ser iguais"
799        );
800        assert_eq!(json["entities_persisted"], 3);
801        assert_eq!(json["relationships_persisted"], 1);
802        assert_eq!(json["chunks_created"], 2);
803    }
804
805    #[test]
806    fn remember_response_warnings_lista_mensagens() {
807        let resp = RememberResponse {
808            memory_id: 5,
809            name: "dup-mem".to_string(),
810            namespace: "global".to_string(),
811            action: "created".to_string(),
812            operation: "created".to_string(),
813            version: 1,
814            entities_persisted: 0,
815            extraction_method: None,
816            relationships_persisted: 0,
817            relationships_truncated: false,
818            chunks_created: 1,
819            chunks_persisted: 0,
820            urls_persisted: 0,
821            merged_into_memory_id: None,
822            warnings: vec!["identical body already exists as memory id 3".to_string()],
823            created_at: 0,
824            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
825            elapsed_ms: 10,
826            name_was_normalized: false,
827            original_name: None,
828        };
829
830        let json = serde_json::to_value(&resp).expect("serialization failed");
831        let warnings = json["warnings"]
832            .as_array()
833            .expect("warnings deve ser array");
834        assert_eq!(warnings.len(), 1);
835        assert!(warnings[0].as_str().unwrap().contains("identical body"));
836    }
837
838    #[test]
839    fn invalid_name_reserved_prefix_returns_validation_error() {
840        use crate::errors::AppError;
841        // Validates the rejection logic for names with the "__" prefix directly
842        let nome = "__reservado";
843        let resultado: Result<(), AppError> = if nome.starts_with("__") {
844            Err(AppError::Validation(
845                crate::i18n::validation::reserved_name(),
846            ))
847        } else {
848            Ok(())
849        };
850        assert!(resultado.is_err());
851        if let Err(AppError::Validation(msg)) = resultado {
852            assert!(!msg.is_empty());
853        }
854    }
855
856    #[test]
857    fn name_too_long_returns_validation_error() {
858        use crate::errors::AppError;
859        let nome_longo = "a".repeat(crate::constants::MAX_MEMORY_NAME_LEN + 1);
860        let resultado: Result<(), AppError> =
861            if nome_longo.is_empty() || nome_longo.len() > crate::constants::MAX_MEMORY_NAME_LEN {
862                Err(AppError::Validation(crate::i18n::validation::name_length(
863                    crate::constants::MAX_MEMORY_NAME_LEN,
864                )))
865            } else {
866                Ok(())
867            };
868        assert!(resultado.is_err());
869    }
870
871    #[test]
872    fn remember_response_merged_into_memory_id_some_serializes_integer() {
873        let resp = RememberResponse {
874            memory_id: 10,
875            name: "mem-mergeada".to_string(),
876            namespace: "global".to_string(),
877            action: "updated".to_string(),
878            operation: "updated".to_string(),
879            version: 3,
880            extraction_method: None,
881            entities_persisted: 0,
882            relationships_persisted: 0,
883            relationships_truncated: false,
884            chunks_created: 1,
885            chunks_persisted: 0,
886            urls_persisted: 0,
887            merged_into_memory_id: Some(7),
888            warnings: vec![],
889            created_at: 0,
890            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
891            elapsed_ms: 0,
892            name_was_normalized: false,
893            original_name: None,
894        };
895
896        let json = serde_json::to_value(&resp).expect("serialization failed");
897        assert_eq!(json["merged_into_memory_id"], 7);
898    }
899
900    #[test]
901    fn remember_response_urls_persisted_serializes_field() {
902        // v1.0.24 P0-2: garante que urls_persisted aparece no JSON e aceita valor > 0.
903        let resp = RememberResponse {
904            memory_id: 3,
905            name: "mem-com-urls".to_string(),
906            namespace: "global".to_string(),
907            action: "created".to_string(),
908            operation: "created".to_string(),
909            version: 1,
910            entities_persisted: 0,
911            relationships_persisted: 0,
912            relationships_truncated: false,
913            chunks_created: 1,
914            chunks_persisted: 0,
915            urls_persisted: 3,
916            extraction_method: Some("regex-only".to_string()),
917            merged_into_memory_id: None,
918            warnings: vec![],
919            created_at: 0,
920            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
921            elapsed_ms: 0,
922            name_was_normalized: false,
923            original_name: None,
924        };
925        let json = serde_json::to_value(&resp).expect("serialization failed");
926        assert_eq!(json["urls_persisted"], 3);
927    }
928
929    #[test]
930    fn empty_name_after_normalization_returns_specific_message() {
931        // P0-4 regression: name consisting only of hyphens normalizes to empty string;
932        // must produce a distinct error message, not the "too long" message.
933        use crate::errors::AppError;
934        let normalized = "---".to_lowercase().replace(['_', ' '], "-");
935        let normalized = normalized.trim_matches('-').to_string();
936        let resultado: Result<(), AppError> = if normalized.is_empty() {
937            Err(AppError::Validation(
938                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
939            ))
940        } else {
941            Ok(())
942        };
943        assert!(resultado.is_err());
944        if let Err(AppError::Validation(msg)) = resultado {
945            assert!(
946                msg.contains("empty after normalization"),
947                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
948            );
949        }
950    }
951
952    #[test]
953    fn name_only_underscores_after_normalization_returns_specific_message() {
954        // P0-4 regression: name consisting only of underscores normalizes to empty string.
955        use crate::errors::AppError;
956        let normalized = "___".to_lowercase().replace(['_', ' '], "-");
957        let normalized = normalized.trim_matches('-').to_string();
958        assert!(
959            normalized.is_empty(),
960            "underscores devem normalizar para string vazia"
961        );
962        let resultado: Result<(), AppError> = if normalized.is_empty() {
963            Err(AppError::Validation(
964                "name cannot be empty after normalization (input was blank or contained only hyphens/underscores/spaces)".to_string(),
965            ))
966        } else {
967            Ok(())
968        };
969        assert!(resultado.is_err());
970        if let Err(AppError::Validation(msg)) = resultado {
971            assert!(
972                msg.contains("empty after normalization"),
973                "mensagem deve mencionar 'empty after normalization', obteve: {msg}"
974            );
975        }
976    }
977
978    #[test]
979    fn remember_response_relationships_truncated_serializes_field() {
980        // P1-D: garante que relationships_truncated aparece no JSON como bool.
981        let resp_false = RememberResponse {
982            memory_id: 1,
983            name: "test".to_string(),
984            namespace: "global".to_string(),
985            action: "created".to_string(),
986            operation: "created".to_string(),
987            version: 1,
988            entities_persisted: 2,
989            relationships_persisted: 1,
990            relationships_truncated: false,
991            chunks_created: 1,
992            chunks_persisted: 0,
993            urls_persisted: 0,
994            extraction_method: None,
995            merged_into_memory_id: None,
996            warnings: vec![],
997            created_at: 0,
998            created_at_iso: "1970-01-01T00:00:00Z".to_string(),
999            elapsed_ms: 0,
1000            name_was_normalized: false,
1001            original_name: None,
1002        };
1003        let json_false = serde_json::to_value(&resp_false).expect("serialization failed");
1004        assert_eq!(json_false["relationships_truncated"], false);
1005
1006        let resp_true = RememberResponse {
1007            relationships_truncated: true,
1008            ..resp_false
1009        };
1010        let json_true = serde_json::to_value(&resp_true).expect("serialization failed");
1011        assert_eq!(json_true["relationships_truncated"], true);
1012    }
1013}