Skip to main content

ai_memory/mcp/tools/store/
validation.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! `memory_store` input validation + `on_conflict` resolution.
5//!
6//! #881 (PR-4 extraction): split out of the monolithic
7//! `src/mcp/tools/store.rs` so the cheapest gate fires in its own
8//! ~120-LOC module. Wire-compat preserved verbatim: every error message
9//! and `OnConflict` variant is byte-identical to the pre-#881 inline
10//! code path.
11
12use crate::mcp::param_names;
13
14/// v0.6.3.1 P2 (G6) — `on_conflict` modes for `memory_store`.
15///
16/// * `Error`   — refuse the write with a typed CONFLICT error. This is
17///               the new default for v2-aware clients.
18/// * `Merge`   — keep the v0.6.3 silent-merge upsert behaviour. Default
19///               for v1 / unknown clients to preserve backward
20///               compatibility.
21/// * `Version` — auto-suffix the title with `(2)`, `(3)`, ... to write
22///               a distinct row.
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum OnConflict {
25    Error,
26    Merge,
27    Version,
28}
29
30impl OnConflict {
31    /// # Errors
32    ///
33    /// Returns the wire-compatible `"invalid on_conflict '...'..."`
34    /// error string surfaced to MCP callers when an unknown value
35    /// appears in the params.
36    ///
37    /// v0.7.0 (multi-agent literal-sweep scanner B finding F-B3.x):
38    /// promoted from `pub(super)` to `pub` so the HTTP handler
39    /// (`src/handlers/create.rs`) can reuse this single parse path
40    /// instead of the prior duplicated `matches!(... "error" | "merge"
41    /// | "version")` + per-mode dispatch. Single SSOT for the on-
42    /// conflict closed set.
43    pub fn parse(s: &str) -> Result<Self, String> {
44        match s {
45            "error" => Ok(Self::Error),
46            "merge" => Ok(Self::Merge),
47            "version" => Ok(Self::Version),
48            other => Err(format!(
49                "invalid on_conflict '{other}' (expected error|merge|version)"
50            )),
51        }
52    }
53}
54
55/// Capability profile detection. v2-aware clients default to `Error`;
56/// v1 / unknown clients default to `Merge` to preserve the v0.6.3
57/// contract. The determination keys off the MCP client name (captured
58/// at `initialize` from `clientInfo.name`). Known v2 clients are
59/// listed explicitly so the policy is auditable. The list is
60/// intentionally narrow — adding a name here is a deliberate decision
61/// that "this client knows how to handle a CONFLICT response from
62/// memory_store".
63pub(super) fn default_on_conflict_for_client(mcp_client: Option<&str>) -> OnConflict {
64    let Some(client) = mcp_client else {
65        return OnConflict::Merge;
66    };
67    // Match on the prefix before any '@' — `ai:foo@host:pid-N` style ids.
68    let head = client.split('@').next().unwrap_or(client);
69    let normalized = head.to_ascii_lowercase();
70    // v2-capable clients (explicitly opted-in via known name).
71    const V2_CLIENT_PREFIXES: &[&str] = &["ai:claude-code", "ai:ai-memory-cli/v2"];
72    for prefix in V2_CLIENT_PREFIXES {
73        if normalized.starts_with(prefix) {
74            return OnConflict::Error;
75        }
76    }
77    OnConflict::Merge
78}
79
80/// #881 — input-parse + validation + memory-construction extracted
81/// from the monolithic `handle_store`. Returns the parsed
82/// `(memory, on_conflict, agent_id, explicit_scope)` tuple ready for
83/// the governance gate, or a wire-compatible error string on the
84/// first validation failure.
85///
86/// Wire-compat preserved verbatim: every error message is
87/// byte-identical to the pre-#881 inline path.
88///
89/// # Errors
90///
91/// Returns the typed validation error string surfaced to MCP callers
92/// (`"title is required"` / `"invalid tier: ..."` / etc.) when the
93/// params fail any of the [`crate::validate`] checks, or
94/// `"invalid on_conflict ..."` when an unknown on_conflict mode
95/// appears.
96#[allow(clippy::too_many_lines)]
97pub(super) fn parse_and_build_memory(
98    params: &serde_json::Value,
99    mcp_client: Option<&str>,
100    resolved_ttl: &crate::config::ResolvedTtl,
101    conn: &rusqlite::Connection,
102) -> Result<(crate::models::Memory, OnConflict, String, Option<String>), String> {
103    use crate::models::{ConfidenceSource, Memory, Tier};
104    use crate::{db, validate};
105
106    let title = params["title"]
107        .as_str()
108        .ok_or(crate::errors::msg::TITLE_REQUIRED)?;
109    let content = params["content"]
110        .as_str()
111        .ok_or(crate::errors::msg::CONTENT_REQUIRED)?;
112    let tier_str = params["tier"].as_str().unwrap_or(Tier::Mid.as_str());
113    let tier =
114        Tier::from_str(tier_str).ok_or_else(|| crate::errors::msg::invalid("tier", tier_str))?;
115    // #1590 — namespace default ladder: explicit caller param >
116    // operator-configured `[storage].default_namespace` (seeded
117    // process-wide at boot; `None` for unconfigured deployments) >
118    // compiled `DEFAULT_NAMESPACE`. Pre-#1590 the resolved
119    // `default_namespace` was consumed by NO write path — the MCP
120    // store always hardcoded the compiled "global" fallback.
121    let namespace = params["namespace"].as_str().map_or_else(
122        || {
123            crate::config::configured_default_namespace()
124                .unwrap_or_else(|| crate::DEFAULT_NAMESPACE.to_string())
125        },
126        str::to_string,
127    );
128    // v0.7.x (issue #1175): vendor-neutral substrate default. The
129    // pre-#1175 hardcode of `"claude"` was a heterogeneous-NHI monoculture
130    // defect — `memory_store` from a non-Anthropic NHI silently stamped
131    // `source = "claude"` regardless of which model actually made the call.
132    // Caller-supplied `params["source"]` still wins; the default is now
133    // the role-categorical vendor-neutral value `"nhi"`.
134    let source = params["source"]
135        .as_str()
136        .unwrap_or(validate::DEFAULT_NHI_SOURCE)
137        .to_string();
138    // v0.6.3.1 P2 (G6) — explicit `on_conflict` overrides the per-client default.
139    let on_conflict = if let Some(s) = params["on_conflict"].as_str() {
140        OnConflict::parse(s)?
141    } else {
142        default_on_conflict_for_client(mcp_client)
143    };
144    // B4 (R2-LOW) — clamp to i32 range instead of panicking on out-of-range
145    // JSON. A maliciously-crafted `"priority": 9999999999` would have crashed
146    // the stdio MCP server pre-fix. `validate_priority` below enforces the
147    // semantic 1-10 range, so the clamp is purely a panic guard.
148    let priority = i32::try_from(params["priority"].as_i64().unwrap_or(5)).unwrap_or(i32::MAX);
149    // #1591 — keep "did the caller actually send confidence?" visible
150    // so the row's `confidence_source` is truthful: an omitted value
151    // stamps the compiled DEFAULT_CONFIDENCE with source="default"
152    // instead of falsely claiming "caller_provided".
153    let caller_confidence = params[param_names::CONFIDENCE].as_f64();
154    let confidence = caller_confidence.unwrap_or(crate::models::DEFAULT_CONFIDENCE);
155    let tags: Vec<String> = params["tags"]
156        .as_array()
157        .map(|a| {
158            a.iter()
159                .filter_map(|v| v.as_str().map(String::from))
160                .collect()
161        })
162        .unwrap_or_default();
163
164    validate::validate_title(title).map_err(|e| e.to_string())?;
165    validate::validate_content(content).map_err(|e| e.to_string())?;
166    validate::validate_namespace(&namespace).map_err(|e| e.to_string())?;
167    validate::validate_source(&source).map_err(|e| e.to_string())?;
168    validate::validate_tags(&tags).map_err(|e| e.to_string())?;
169    validate::validate_priority(priority).map_err(|e| e.to_string())?;
170    validate::validate_confidence(confidence).map_err(|e| e.to_string())?;
171
172    let mut metadata = if params["metadata"].is_object() {
173        params["metadata"].clone()
174    } else {
175        serde_json::json!({})
176    };
177    // Resolve agent_id via the NHI-hardened precedence chain and merge into
178    // metadata. Explicit values win in this order:
179    //   1. top-level `agent_id` param
180    //   2. embedded `metadata.agent_id` (backward compatible with callers
181    //      that supply it inline)
182    //   3. env / MCP clientInfo / host / anonymous (handled inside `identity`)
183    let explicit_agent_id = params["agent_id"].as_str().or_else(|| {
184        metadata
185            .get(param_names::AGENT_ID)
186            .and_then(serde_json::Value::as_str)
187    });
188    let agent_id = crate::identity::resolve_agent_id(explicit_agent_id, mcp_client)
189        .map_err(|e| e.to_string())?;
190    if let Some(obj) = metadata.as_object_mut() {
191        obj.insert(
192            "agent_id".to_string(),
193            serde_json::Value::String(agent_id.clone()),
194        );
195    }
196    // #151 scope: top-level `scope` param OR inline metadata.scope
197    let explicit_scope = params["scope"]
198        .as_str()
199        .or_else(|| {
200            metadata
201                .get(param_names::SCOPE)
202                .and_then(serde_json::Value::as_str)
203        })
204        .map(str::to_string);
205    if let Some(ref s) = explicit_scope {
206        validate::validate_scope(s).map_err(|e| e.to_string())?;
207        if let Some(obj) = metadata.as_object_mut() {
208            obj.insert("scope".to_string(), serde_json::Value::String(s.clone()));
209        }
210    }
211    validate::validate_metadata(&metadata).map_err(|e| e.to_string())?;
212
213    let now = chrono::Utc::now();
214    let expires_at = resolved_ttl
215        .ttl_for_tier(&tier)
216        .map(|s| (now + chrono::Duration::seconds(s)).to_rfc3339());
217
218    // v0.6.3.1 P2 (G6) — apply the conflict policy BEFORE building the
219    // canonical Memory. `Version` mode rewrites `title` to a free suffix;
220    // `Error` mode short-circuits with a typed error if the row already
221    // exists; `Merge` defers to the legacy code path below.
222    let resolved_title = match on_conflict {
223        OnConflict::Error => {
224            if let Some(existing_id) =
225                db::find_by_title_namespace(conn, title, &namespace).map_err(|e| e.to_string())?
226            {
227                return Err(format!(
228                    "CONFLICT: memory with title '{title}' already exists in namespace \
229                     '{namespace}' (existing id: {existing_id}). Pass \
230                     on_conflict='merge' to update in place or 'version' to suffix the title."
231                ));
232            }
233            title.to_string()
234        }
235        OnConflict::Version => {
236            db::next_versioned_title(conn, title, &namespace).map_err(|e| e.to_string())?
237        }
238        OnConflict::Merge => title.to_string(),
239    };
240
241    // v0.7.x Form 6 (issue #759) — caller-supplied `kind` parameter.
242    // Recognised values match the [`crate::models::MemoryKind`] enum.
243    // `None` means the auto-classify hook (if enabled) decides.
244    //
245    // v0.7.0 #1467 — an explicit, non-parseable `kind` is now REJECTED
246    // (was silently coerced to `Observation`) so the MCP store path
247    // matches the CLI / HTTP strict gate. The MCP store path validates
248    // inline (it does not route through `validate::validate_create`), so
249    // call the shared `validate_kind` here directly.
250    let kind_param = params["kind"].as_str();
251    crate::validate::validate_kind(kind_param).map_err(|e| e.to_string())?;
252    let caller_kind = kind_param.and_then(crate::models::MemoryKind::from_str);
253
254    let source_uri = match params[param_names::SOURCE_URI].as_str().map(str::trim) {
255        Some(s) if !s.is_empty() => {
256            crate::validate::validate_source_uri(s).map_err(|e| e.to_string())?;
257            Some(s.to_string())
258        }
259        _ => None,
260    };
261
262    // v0.7.0 #1421 — sister fix to #1411 (HTTP Form-4 wire-truthfulness).
263    // Pre-fix the MCP store path declared `citations: Vec::new()` and
264    // `source_span: None` even when the caller supplied them in the
265    // request — silently dropping the validated Form-4 provenance
266    // fields. Parse, validate, then thread through.
267    let citations: Vec<crate::models::Citation> = match params.get(param_names::CITATIONS) {
268        Some(v) if !v.is_null() => serde_json::from_value(v.clone()).map_err(|e| {
269            format!(
270                "invalid `citations` (expected array of {{uri, accessed_at, hash?, span?}}): {e}"
271            )
272        })?,
273        _ => Vec::new(),
274    };
275    if !citations.is_empty() {
276        crate::validate::validate_citations(&citations).map_err(|e| e.to_string())?;
277    }
278    let source_span: Option<crate::models::SourceSpan> = match params.get(param_names::SOURCE_SPAN)
279    {
280        Some(v) if !v.is_null() => Some(
281            serde_json::from_value(v.clone())
282                .map_err(|e| format!("invalid `source_span` (expected {{start, end}}): {e}"))?,
283        ),
284        _ => None,
285    };
286    if let Some(span) = source_span.as_ref() {
287        crate::validate::validate_source_span(span).map_err(|e| e.to_string())?;
288    }
289
290    let mem = Memory {
291        id: uuid::Uuid::new_v4().to_string(),
292        tier,
293        namespace,
294        title: resolved_title,
295        content: content.to_string(),
296        tags,
297        priority: priority.clamp(1, 10),
298        confidence: confidence.clamp(0.0, 1.0),
299        source,
300        access_count: 0,
301        created_at: now.to_rfc3339(),
302        updated_at: now.to_rfc3339(),
303        last_accessed_at: None,
304        expires_at,
305        metadata,
306        reflection_depth: 0,
307        memory_kind: caller_kind.unwrap_or(crate::models::MemoryKind::Observation),
308        entity_id: None,
309        persona_version: None,
310        citations,
311        source_uri,
312        source_span,
313        // #1591 — truthful confidence provenance: only an explicit
314        // caller value is `caller_provided`; the compiled fallback is
315        // `default`.
316        confidence_source: if caller_confidence.is_some() {
317            ConfidenceSource::CallerProvided
318        } else {
319            ConfidenceSource::Default
320        },
321        confidence_signals: None,
322        confidence_decayed_at: None,
323        version: 1,
324    };
325
326    Ok((mem, on_conflict, agent_id, explicit_scope))
327}
328
329#[cfg(test)]
330mod tests {
331    use super::*;
332
333    #[test]
334    fn on_conflict_parse_variants() {
335        assert_eq!(OnConflict::parse("error").unwrap(), OnConflict::Error);
336        assert_eq!(OnConflict::parse("merge").unwrap(), OnConflict::Merge);
337        assert_eq!(OnConflict::parse("version").unwrap(), OnConflict::Version);
338        assert!(OnConflict::parse("nope").is_err());
339    }
340
341    #[test]
342    fn default_on_conflict_for_client_matrix() {
343        assert_eq!(default_on_conflict_for_client(None), OnConflict::Merge);
344        assert_eq!(
345            default_on_conflict_for_client(Some("ai:claude-code@host:pid-1")),
346            OnConflict::Error
347        );
348        assert_eq!(
349            default_on_conflict_for_client(Some("AI:Claude-Code@whatever")),
350            OnConflict::Error,
351            "case-insensitive prefix match"
352        );
353        assert_eq!(
354            default_on_conflict_for_client(Some("ai:ai-memory-cli/v2-something")),
355            OnConflict::Error
356        );
357        assert_eq!(
358            default_on_conflict_for_client(Some("ai:unknown-client@host:pid-1")),
359            OnConflict::Merge
360        );
361    }
362
363    /// v0.7.x (issue #1175) — `parse_and_build_memory` MUST default the
364    /// `source` field to the vendor-neutral [`crate::validate::DEFAULT_NHI_SOURCE`]
365    /// when the caller omits it. Pre-#1175 this site hardcoded `"claude"`
366    /// — a heterogeneous-NHI monoculture defect that silently broke
367    /// forensic queries keyed on `source = 'claude'` for every
368    /// non-Anthropic NHI's writes.
369    ///
370    /// Pinned at the unit-test layer (rather than as an integration
371    /// test) because [`crate::mcp::tools::store::handle_store`] is
372    /// `pub(crate)`; the substrate has not historically committed to a
373    /// public-API surface for the store entry point.
374    #[test]
375    fn issue_1175_source_default_is_vendor_neutral_nhi() {
376        use crate::config::ResolvedTtl;
377        use crate::storage as db;
378        use serde_json::json;
379
380        let conn = db::open(std::path::Path::new(":memory:")).expect("open in-memory db");
381        let ttl = ResolvedTtl::default();
382        let params = json!({
383            "title": "issue-1175-store-default",
384            "content": "memory body",
385            "namespace": "issue-1175-store-default",
386            // No source field — should default to DEFAULT_NHI_SOURCE.
387        });
388
389        let (mem, _on_conflict, _agent_id, _explicit_scope) =
390            parse_and_build_memory(&params, None, &ttl, &conn)
391                .expect("parse_and_build_memory must succeed for a minimal valid payload");
392
393        assert_eq!(
394            mem.source,
395            crate::validate::DEFAULT_NHI_SOURCE,
396            "memory_store must default to the vendor-neutral substrate \
397             source value (\"nhi\"); pre-#1175 this site stamped \"claude\""
398        );
399    }
400
401    /// v0.7.x (issue #1175) — caller-supplied `source` MUST still
402    /// override the default. The vendor-neutral default only fires
403    /// when the caller omits the field; pre-#1175 callers that
404    /// explicitly passed `source: "claude"` continue to land that
405    /// value verbatim.
406    #[test]
407    fn issue_1175_caller_source_overrides_vendor_neutral_default() {
408        use crate::config::ResolvedTtl;
409        use crate::storage as db;
410        use serde_json::json;
411
412        let conn = db::open(std::path::Path::new(":memory:")).expect("open in-memory db");
413        let ttl = ResolvedTtl::default();
414        let params = json!({
415            "title": "issue-1175-store-override",
416            "content": "memory body",
417            "namespace": "issue-1175-store-override",
418            "source": "system",
419        });
420
421        let (mem, _on_conflict, _agent_id, _explicit_scope) =
422            parse_and_build_memory(&params, None, &ttl, &conn)
423                .expect("parse_and_build_memory must succeed");
424
425        assert_eq!(
426            mem.source, "system",
427            "caller-supplied source wins over the default"
428        );
429    }
430
431    /// v0.7.x issue #1591 regression — `memory_store` with NO
432    /// `confidence` argument must stamp the compiled default value
433    /// with TRUTHFUL provenance `confidence_source = "default"`.
434    /// Pre-#1591 the omitted case stamped `confidence = 1.0` +
435    /// `confidence_source = "caller_provided"` — a false provenance
436    /// claim indistinguishable from a deliberate caller assertion.
437    #[test]
438    fn issue_1591_omitted_confidence_stamps_source_default() {
439        use crate::config::ResolvedTtl;
440        use crate::storage as db;
441        use serde_json::json;
442
443        let conn = db::open(std::path::Path::new(":memory:")).expect("open in-memory db");
444        let ttl = ResolvedTtl::default();
445        let params = json!({
446            "title": "issue-1591-omitted",
447            "content": "memory body",
448            "namespace": "issue-1591",
449            // No confidence field.
450        });
451        let (mem, _, _, _) = parse_and_build_memory(&params, None, &ttl, &conn).expect("ok");
452        assert!((mem.confidence - crate::models::DEFAULT_CONFIDENCE).abs() < f64::EPSILON);
453        assert_eq!(
454            mem.confidence_source,
455            crate::models::ConfidenceSource::Default,
456            "#1591: omitted confidence must stamp source=default"
457        );
458        assert_eq!(mem.confidence_source.as_str(), "default");
459    }
460
461    /// v0.7.x issue #1591 regression — an EXPLICIT `confidence=0.8`
462    /// keeps the historical `caller_provided` provenance.
463    #[test]
464    fn issue_1591_explicit_confidence_stays_caller_provided() {
465        use crate::config::ResolvedTtl;
466        use crate::storage as db;
467        use serde_json::json;
468
469        let conn = db::open(std::path::Path::new(":memory:")).expect("open in-memory db");
470        let ttl = ResolvedTtl::default();
471        let params = json!({
472            "title": "issue-1591-explicit",
473            "content": "memory body",
474            "namespace": "issue-1591",
475            "confidence": 0.8,
476        });
477        let (mem, _, _, _) = parse_and_build_memory(&params, None, &ttl, &conn).expect("ok");
478        assert!((mem.confidence - 0.8).abs() < f64::EPSILON);
479        assert_eq!(
480            mem.confidence_source,
481            crate::models::ConfidenceSource::CallerProvided,
482        );
483    }
484
485    /// v0.7.x issue #1590 regression — the MCP store namespace default
486    /// ladder: explicit param > operator-configured
487    /// `[storage].default_namespace` (process-wide seed) > compiled
488    /// `"global"`. Pre-#1590 the configured value was resolved but
489    /// consumed by NO write path.
490    #[test]
491    fn issue_1590_store_namespace_default_ladder() {
492        use crate::config::ResolvedTtl;
493        use crate::storage as db;
494        use serde_json::json;
495
496        let _gate = crate::config::lock_configured_default_namespace_for_test();
497        let conn = db::open(std::path::Path::new(":memory:")).expect("open in-memory db");
498        let ttl = ResolvedTtl::default();
499        let omitted_ns = json!({
500            "title": "issue-1590-store",
501            "content": "memory body",
502        });
503
504        // Unconfigured deployment: historical compiled default.
505        crate::config::set_configured_default_namespace(None);
506        let (mem, _, _, _) = parse_and_build_memory(&omitted_ns, None, &ttl, &conn).expect("ok");
507        assert_eq!(mem.namespace, crate::DEFAULT_NAMESPACE);
508
509        // Operator explicitly configured [storage].default_namespace.
510        crate::config::set_configured_default_namespace(Some("alphaone".to_string()));
511        let (mem, _, _, _) = parse_and_build_memory(&omitted_ns, None, &ttl, &conn).expect("ok");
512        assert_eq!(
513            mem.namespace, "alphaone",
514            "#1590: configured default_namespace must win over compiled global"
515        );
516
517        // Explicit caller namespace still beats the configured default.
518        let explicit_ns = json!({
519            "title": "issue-1590-store-explicit",
520            "content": "memory body",
521            "namespace": "caller-ns",
522        });
523        let (mem, _, _, _) = parse_and_build_memory(&explicit_ns, None, &ttl, &conn).expect("ok");
524        assert_eq!(mem.namespace, "caller-ns", "explicit param wins");
525
526        crate::config::set_configured_default_namespace(None);
527    }
528}