Skip to main content

ai_memory/
validate.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4use anyhow::{Result, bail};
5
6use crate::models::{
7    CreateMemory, MAX_CONTENT_SIZE, MAX_NAMESPACE_DEPTH, Memory, UpdateMemory, VALID_AGENT_TYPES,
8    VALID_SCOPES,
9};
10
11const MAX_TITLE_LEN: usize = 512;
12/// Max characters in a namespace string (post-Task 1.4).
13/// Flat namespaces still fit in the historical 128 budget; 512 is the ceiling
14/// for hierarchical paths like `a/b/c/…` up to 8 levels deep.
15const MAX_NAMESPACE_LEN: usize = 512;
16const MAX_SOURCE_LEN: usize = 64;
17const MAX_TAG_LEN: usize = 128;
18const MAX_TAGS_COUNT: usize = 50;
19const MAX_RELATION_LEN: usize = 64;
20const MAX_ID_LEN: usize = 128;
21const MAX_AGENT_ID_LEN: usize = 128;
22const MAX_METADATA_SIZE: usize = 65_536;
23const MAX_METADATA_DEPTH: usize = 32;
24
25const VALID_SOURCES: &[&str] = &[
26    "user",
27    "claude",
28    "hook",
29    "api",
30    "cli",
31    "import",
32    "consolidation",
33    "system",
34    "chaos",
35    // v0.6.2 (S32): `handle_notify` stamps source="notify" on inbox rows.
36    // Without this entry, peers reject the notify in `sync_push`'s
37    // `validate_memory` — the notify lands on the sender's inbox but
38    // never reaches the target's inbox on peer nodes.
39    "notify",
40];
41const VALID_RELATIONS: &[&str] = &["related_to", "supersedes", "contradicts", "derived_from"];
42
43fn is_valid_rfc3339(s: &str) -> bool {
44    chrono::DateTime::parse_from_rfc3339(s).is_ok()
45}
46
47fn is_clean_string(s: &str) -> bool {
48    !s.chars().any(|c| c.is_control() && c != '\n' && c != '\t')
49}
50
51pub fn validate_title(title: &str) -> Result<()> {
52    let trimmed = title.trim();
53    if trimmed.is_empty() {
54        bail!("title cannot be empty");
55    }
56    if trimmed.chars().count() > MAX_TITLE_LEN {
57        bail!("title exceeds max length of {MAX_TITLE_LEN} characters");
58    }
59    if !is_clean_string(trimmed) {
60        bail!("title contains invalid characters");
61    }
62    Ok(())
63}
64
65pub fn validate_content(content: &str) -> Result<()> {
66    if content.trim().is_empty() {
67        bail!("content cannot be empty");
68    }
69    if content.len() > MAX_CONTENT_SIZE {
70        bail!("content exceeds max size of {MAX_CONTENT_SIZE} bytes");
71    }
72    if !is_clean_string(content) {
73        bail!("content contains invalid characters");
74    }
75    Ok(())
76}
77
78/// Validate a namespace (flat or hierarchical, Task 1.4).
79///
80/// Flat namespaces (`"global"`, `"ai-memory"`) remain fully valid — hierarchy
81/// is opt-in. Hierarchical paths use `/` as the segment delimiter:
82///
83/// ```text
84/// alphaone/engineering/platform
85/// ```
86///
87/// Rules:
88/// - **Not empty**, no leading/trailing whitespace
89/// - Length ≤ [`MAX_NAMESPACE_LEN`] (512 chars)
90/// - Depth (segment count) ≤ [`MAX_NAMESPACE_DEPTH`] (8)
91/// - Backslashes, null bytes, control chars, and spaces are forbidden
92/// - Leading and trailing `/` are forbidden (normalize input via
93///   [`normalize_namespace`] before validating)
94/// - Empty segments (consecutive `//`) are forbidden
95/// - Each segment is non-empty; no further character restriction beyond
96///   the whole-string checks above (preserving historical flexibility
97///   for existing flat namespaces like `ai-memory-mcp-dev`)
98pub fn validate_namespace(ns: &str) -> Result<()> {
99    let trimmed = ns.trim();
100    if trimmed.is_empty() {
101        bail!("namespace cannot be empty");
102    }
103    if trimmed.chars().count() > MAX_NAMESPACE_LEN {
104        bail!("namespace exceeds max length of {MAX_NAMESPACE_LEN} characters");
105    }
106    if trimmed.contains('\\') || trimmed.contains('\0') {
107        bail!("namespace cannot contain backslashes or null bytes");
108    }
109    if trimmed.contains(' ') {
110        bail!("namespace cannot contain spaces (use hyphens or underscores)");
111    }
112    if !is_clean_string(trimmed) {
113        bail!("namespace contains invalid control characters");
114    }
115    // Task 1.4 — hierarchical paths. '/' is permitted as a delimiter, but
116    // leading/trailing/empty segments are rejected to force callers to
117    // normalize input first (ambiguity between "foo" and "foo/" is not
118    // something we want to paper over at match time).
119    if trimmed.starts_with('/') {
120        bail!("namespace cannot start with '/' (normalize input first)");
121    }
122    if trimmed.ends_with('/') {
123        bail!("namespace cannot end with '/' (normalize input first)");
124    }
125    if trimmed.split('/').any(str::is_empty) {
126        bail!("namespace cannot contain empty segments (e.g. '//')");
127    }
128    // Reject `..` and `.` segments — they look like path traversal to
129    // human readers and silently confuse hierarchy semantics. Visibility
130    // prefix matching with LIKE 'foo/%' would let memories at
131    // `foo/../malicious` appear under `foo`'s team-scope queries
132    // (red-team #240).
133    if trimmed.split('/').any(|s| s == ".." || s == ".") {
134        bail!("namespace segments '.' and '..' are not allowed");
135    }
136    let depth = crate::models::namespace_depth(trimmed);
137    if depth > MAX_NAMESPACE_DEPTH {
138        bail!("namespace depth {depth} exceeds max of {MAX_NAMESPACE_DEPTH}");
139    }
140    Ok(())
141}
142
143/// Normalize a namespace input to the canonical form accepted by
144/// [`validate_namespace`]. Not called by write paths (would lowercase
145/// existing flat namespaces and break their lookup keys); instead exposed
146/// as a helper that callers opt into, and used by Task 1.5+ when accepting
147/// user-typed hierarchical paths.
148///
149/// - Trim leading/trailing whitespace
150/// - Strip leading/trailing `/`
151/// - Collapse consecutive `/` into a single separator
152/// - Lowercase the result
153///
154/// This is a pure helper; the write path does **not** auto-apply it so that
155/// callers retain control over case sensitivity on existing flat namespaces.
156/// Use it when you need to accept loose user input and produce a matchable
157/// canonical key.
158#[allow(dead_code)]
159#[must_use]
160pub fn normalize_namespace(input: &str) -> String {
161    let trimmed = input.trim();
162    let collapsed: Vec<&str> = trimmed.split('/').filter(|s| !s.is_empty()).collect();
163    collapsed.join("/").to_lowercase()
164}
165
166pub fn validate_source(source: &str) -> Result<()> {
167    if source.trim().is_empty() {
168        bail!("source cannot be empty");
169    }
170    if source.len() > MAX_SOURCE_LEN {
171        bail!("source exceeds max length of {MAX_SOURCE_LEN} bytes");
172    }
173    if !VALID_SOURCES.contains(&source) {
174        bail!(
175            "invalid source '{}' — must be one of: {}",
176            source,
177            VALID_SOURCES.join(", ")
178        );
179    }
180    Ok(())
181}
182
183/// Validate an agent identifier (NHI-hardened).
184///
185/// Allowed characters: alphanumeric plus `_`, `-`, `:`, `@`, `.`, `/`.
186/// Length: 1..=128 bytes.
187///
188/// This intentionally permits prefixed/scoped forms such as
189/// `ai:claude-code@host-1:pid-123`, `host:dev-1:pid-9-deadbeef`,
190/// `anonymous:req-abcdef01`, and future SPIFFE-style ids containing `/`.
191/// Rejects whitespace, null bytes, control chars, and shell metacharacters.
192pub fn validate_agent_id(agent_id: &str) -> Result<()> {
193    if agent_id.is_empty() {
194        bail!("agent_id cannot be empty");
195    }
196    if agent_id.len() > MAX_AGENT_ID_LEN {
197        bail!("agent_id exceeds max length of {MAX_AGENT_ID_LEN} bytes");
198    }
199    for c in agent_id.chars() {
200        if !(c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | ':' | '@' | '.' | '/')) {
201            bail!("agent_id contains invalid character '{c}' (allowed: alphanumeric, _-:@./)");
202        }
203    }
204    Ok(())
205}
206
207/// Validate a visibility scope against the closed `VALID_SCOPES` set
208/// (Task 1.5). Enforced on write paths that accept an explicit `scope`
209/// parameter. Memories with no `scope` metadata are treated as `private`
210/// by the query layer without needing explicit validation here.
211pub fn validate_scope(scope: &str) -> Result<()> {
212    if scope.is_empty() {
213        bail!("scope cannot be empty");
214    }
215    if !VALID_SCOPES.contains(&scope) {
216        bail!(
217            "invalid scope '{}' — must be one of: {}",
218            scope,
219            VALID_SCOPES.join(", ")
220        );
221    }
222    Ok(())
223}
224
225/// Validate a [`GovernancePolicy`] (Task 1.8). Closed-set tag checks are
226/// already handled by serde on deserialization; this adds semantic bounds:
227/// consensus quorum must be ≥ 1, Agent references must pass
228/// `validate_agent_id`, and the policy as a whole must not use
229/// `GovernanceLevel::Approve` without a meaningful approver.
230pub fn validate_governance_policy(policy: &crate::models::GovernancePolicy) -> Result<()> {
231    use crate::models::{ApproverType, GovernanceLevel};
232    // Approver-specific constraints
233    match &policy.approver {
234        ApproverType::Human => {}
235        ApproverType::Agent(id) => {
236            validate_agent_id(id)?;
237        }
238        ApproverType::Consensus(n) => {
239            if *n == 0 {
240                bail!("governance.approver.consensus quorum must be >= 1");
241            }
242        }
243    }
244    // `Approve` level is meaningless without a configured approver. The
245    // `Human` default is always valid, but a `Consensus(0)` or bad-id agent
246    // would have been caught above.
247    let uses_approve = matches!(policy.write, GovernanceLevel::Approve)
248        || matches!(policy.promote, GovernanceLevel::Approve)
249        || matches!(policy.delete, GovernanceLevel::Approve);
250    if uses_approve
251        && let ApproverType::Consensus(n) = &policy.approver
252        && *n == 0
253    {
254        bail!("governance uses 'approve' level but approver consensus is 0");
255    }
256    Ok(())
257}
258
259/// Maximum length for an `agent_type` string.
260const MAX_AGENT_TYPE_LEN: usize = 64;
261
262/// Validate an agent type. Accepts any value matching one of these forms
263/// (red-team #235 — the original closed whitelist blocked future agents):
264///
265/// - **Anything in [`VALID_AGENT_TYPES`]** — the curated short-list including
266///   `human`, `system`, and known AI model identifiers
267/// - **Any `ai:<name>` form** — `^ai:[A-Za-z0-9_.-]{1,60}$`. Lets operators
268///   register `ai:claude-opus-4.8`, `ai:gpt-5`, `ai:gemini-2.5`, etc. without
269///   waiting for a code release
270///
271/// Strict format guard: alphanumeric + `_-:.` only, max 64 bytes total.
272/// This keeps the value safe for SQL storage, JSON serialization, and
273/// shell display while removing the closed-list hard stop.
274pub fn validate_agent_type(agent_type: &str) -> Result<()> {
275    if agent_type.is_empty() {
276        bail!("agent_type cannot be empty");
277    }
278    if agent_type.len() > MAX_AGENT_TYPE_LEN {
279        bail!("agent_type exceeds max length of {MAX_AGENT_TYPE_LEN} bytes");
280    }
281    // Curated set always wins.
282    if VALID_AGENT_TYPES.contains(&agent_type) {
283        return Ok(());
284    }
285    // Open `ai:<name>` namespace for forward compatibility with future models.
286    if let Some(name) = agent_type.strip_prefix("ai:") {
287        if name.is_empty() {
288            bail!("agent_type 'ai:' must include a name (e.g. 'ai:claude-opus-4.7')");
289        }
290        if name
291            .chars()
292            .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.'))
293        {
294            return Ok(());
295        }
296        bail!(
297            "agent_type '{agent_type}' contains invalid characters in the ai: name \
298             part (allowed: alphanumeric, _-.)"
299        );
300    }
301    let valid = VALID_AGENT_TYPES.join(", ");
302    bail!("invalid agent_type '{agent_type}' — must be one of: {valid} (or any ai:<name> form)");
303}
304
305/// Validate a list of capability strings. Shares `validate_tags` rules
306/// (non-empty, <=128 bytes each, clean chars, <=50 entries).
307pub fn validate_capabilities(caps: &[String]) -> Result<()> {
308    validate_tags(caps)
309}
310
311pub fn validate_tags(tags: &[String]) -> Result<()> {
312    if tags.len() > MAX_TAGS_COUNT {
313        bail!("too many tags (max {MAX_TAGS_COUNT})");
314    }
315    for tag in tags {
316        let trimmed = tag.trim();
317        if trimmed.is_empty() {
318            bail!("tags cannot contain empty strings");
319        }
320        if trimmed.len() > MAX_TAG_LEN {
321            let preview: String = trimmed.chars().take(20).collect();
322            bail!("tag '{preview}...' exceeds max length of {MAX_TAG_LEN} bytes");
323        }
324        if !is_clean_string(trimmed) {
325            bail!("tag contains invalid characters");
326        }
327    }
328    Ok(())
329}
330
331pub fn validate_id(id: &str) -> Result<()> {
332    if id.trim().is_empty() {
333        bail!("id cannot be empty");
334    }
335    if id.len() > MAX_ID_LEN {
336        bail!("id exceeds max length of {MAX_ID_LEN} bytes");
337    }
338    if !is_clean_string(id) {
339        bail!("id contains invalid characters");
340    }
341    Ok(())
342}
343
344pub fn validate_expires_at(expires_at: Option<&str>) -> Result<()> {
345    if let Some(ts) = expires_at {
346        if !is_valid_rfc3339(ts) {
347            bail!("expires_at is not valid RFC3339: '{ts}'");
348        }
349        if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(ts)
350            && dt < chrono::Utc::now()
351        {
352            bail!("expires_at is in the past");
353        }
354    }
355    Ok(())
356}
357
358pub fn validate_ttl_secs(ttl: Option<i64>) -> Result<()> {
359    if let Some(secs) = ttl {
360        if secs <= 0 {
361            bail!("ttl_secs must be positive (got {secs})");
362        }
363        if secs > 365 * 24 * 3600 {
364            bail!("ttl_secs exceeds maximum of 1 year");
365        }
366    }
367    Ok(())
368}
369
370pub fn validate_metadata(metadata: &serde_json::Value) -> Result<()> {
371    if !metadata.is_object() {
372        bail!("metadata must be a JSON object");
373    }
374    let serialized = serde_json::to_string(metadata)
375        .map_err(|e| anyhow::anyhow!("metadata is not valid JSON: {e}"))?;
376    if serialized.len() > MAX_METADATA_SIZE {
377        bail!(
378            "metadata exceeds max size of {MAX_METADATA_SIZE} bytes (got {})",
379            serialized.len()
380        );
381    }
382    let depth = json_depth(metadata);
383    if depth > MAX_METADATA_DEPTH {
384        bail!("metadata nesting depth exceeds limit of {MAX_METADATA_DEPTH} (got {depth})");
385    }
386    Ok(())
387}
388
389fn json_depth(val: &serde_json::Value) -> usize {
390    match val {
391        serde_json::Value::Object(map) => 1 + map.values().map(json_depth).max().unwrap_or(0),
392        serde_json::Value::Array(arr) => 1 + arr.iter().map(json_depth).max().unwrap_or(0),
393        _ => 0,
394    }
395}
396
397pub fn validate_relation(relation: &str) -> Result<()> {
398    if relation.trim().is_empty() {
399        bail!("relation cannot be empty");
400    }
401    if relation.len() > MAX_RELATION_LEN {
402        bail!("relation exceeds max length of {MAX_RELATION_LEN} bytes");
403    }
404    if !VALID_RELATIONS.contains(&relation) {
405        bail!(
406            "invalid relation '{}' — must be one of: {}",
407            relation,
408            VALID_RELATIONS.join(", ")
409        );
410    }
411    Ok(())
412}
413
414pub fn validate_confidence(confidence: f64) -> Result<()> {
415    if confidence.is_nan() || confidence.is_infinite() {
416        bail!("confidence must be a finite number");
417    }
418    if !(0.0..=1.0).contains(&confidence) {
419        bail!("confidence must be between 0.0 and 1.0 (got {confidence})");
420    }
421    Ok(())
422}
423
424pub fn validate_priority(priority: i32) -> Result<()> {
425    if !(1..=10).contains(&priority) {
426        bail!("priority must be between 1 and 10 (got {priority})");
427    }
428    Ok(())
429}
430
431/// Validate a full `CreateMemory` before insert.
432pub fn validate_create(mem: &CreateMemory) -> Result<()> {
433    validate_title(&mem.title)?;
434    validate_content(&mem.content)?;
435    validate_namespace(&mem.namespace)?;
436    validate_source(&mem.source)?;
437    validate_tags(&mem.tags)?;
438    validate_priority(mem.priority)?;
439    validate_confidence(mem.confidence)?;
440    validate_expires_at(mem.expires_at.as_deref())?;
441    validate_ttl_secs(mem.ttl_secs)?;
442    validate_metadata(&mem.metadata)?;
443    Ok(())
444}
445
446/// Validate a full Memory (used for import).
447pub fn validate_memory(mem: &Memory) -> Result<()> {
448    validate_id(&mem.id)?;
449    validate_title(&mem.title)?;
450    validate_content(&mem.content)?;
451    validate_namespace(&mem.namespace)?;
452    validate_source(&mem.source)?;
453    validate_tags(&mem.tags)?;
454    validate_priority(mem.priority)?;
455    validate_confidence(mem.confidence)?;
456    if mem.access_count < 0 {
457        bail!("access_count cannot be negative");
458    }
459    if !is_valid_rfc3339(&mem.created_at) {
460        bail!("created_at is not valid RFC3339");
461    }
462    if !is_valid_rfc3339(&mem.updated_at) {
463        bail!("updated_at is not valid RFC3339");
464    }
465    if let Some(ref ts) = mem.last_accessed_at
466        && !is_valid_rfc3339(ts)
467    {
468        bail!("last_accessed_at is not valid RFC3339");
469    }
470    // Don't reject past expires_at on import — may be importing historical data
471    if let Some(ref ts) = mem.expires_at
472        && !is_valid_rfc3339(ts)
473    {
474        bail!("expires_at is not valid RFC3339");
475    }
476    validate_metadata(&mem.metadata)?;
477    Ok(())
478}
479
480/// Validate update fields (only validates present fields).
481/// Note: `expires_at` allows past dates in updates for programmatic TTL management
482/// and GC testing — only format is validated, not chronological ordering.
483pub fn validate_update(update: &UpdateMemory) -> Result<()> {
484    if let Some(ref t) = update.title {
485        validate_title(t)?;
486    }
487    if let Some(ref c) = update.content {
488        validate_content(c)?;
489    }
490    if let Some(ref ns) = update.namespace {
491        validate_namespace(ns)?;
492    }
493    if let Some(ref tags) = update.tags {
494        validate_tags(tags)?;
495    }
496    if let Some(p) = update.priority {
497        validate_priority(p)?;
498    }
499    if let Some(c) = update.confidence {
500        validate_confidence(c)?;
501    }
502    if let Some(ref ts) = update.expires_at {
503        validate_expires_at_format(ts)?;
504    }
505    if let Some(ref meta) = update.metadata {
506        validate_metadata(meta)?;
507    }
508    Ok(())
509}
510
511/// Validate `expires_at` format only (no past-date check). Used by update path.
512pub fn validate_expires_at_format(ts: &str) -> Result<()> {
513    if !is_valid_rfc3339(ts) {
514        bail!("expires_at is not valid RFC3339: '{ts}'");
515    }
516    Ok(())
517}
518
519/// Validate link creation.
520pub fn validate_link(source_id: &str, target_id: &str, relation: &str) -> Result<()> {
521    validate_id(source_id)?;
522    validate_id(target_id)?;
523    validate_relation(relation)?;
524    if source_id == target_id {
525        bail!("cannot link a memory to itself");
526    }
527    Ok(())
528}
529
530/// Validate consolidation request.
531pub fn validate_consolidate(
532    ids: &[String],
533    title: &str,
534    summary: &str,
535    namespace: &str,
536) -> Result<()> {
537    if ids.len() < 2 {
538        bail!("need at least 2 memory IDs to consolidate");
539    }
540    if ids.len() > 100 {
541        bail!("cannot consolidate more than 100 memories at once");
542    }
543    let mut seen = std::collections::HashSet::new();
544    for id in ids {
545        validate_id(id)?;
546        if !seen.insert(id) {
547            bail!("duplicate memory ID: {id}");
548        }
549    }
550    validate_title(title)?;
551    validate_content(summary)?;
552    validate_namespace(namespace)?;
553    Ok(())
554}
555
556#[cfg(test)]
557mod tests {
558    use super::*;
559
560    #[test]
561    fn test_valid_title() {
562        assert!(validate_title("BIND9 custom build").is_ok());
563        assert!(validate_title("").is_err());
564        assert!(validate_title("   ").is_err());
565        assert!(validate_title(&"x".repeat(513)).is_err());
566        assert!(validate_title("has\0null").is_err());
567    }
568
569    #[test]
570    fn test_valid_namespace_flat_backwards_compat() {
571        // Task 1.4: flat namespaces must still validate exactly as before.
572        assert!(validate_namespace("my-project").is_ok());
573        assert!(validate_namespace("global").is_ok());
574        assert!(validate_namespace("under_score").is_ok());
575        assert!(validate_namespace("ai-memory-mcp-dev").is_ok());
576        assert!(validate_namespace("_agents").is_ok());
577    }
578
579    #[test]
580    fn test_valid_namespace_rejections_preserved() {
581        assert!(validate_namespace("").is_err());
582        assert!(validate_namespace("   ").is_err());
583        assert!(validate_namespace("has space").is_err());
584        assert!(validate_namespace("has\\backslash").is_err());
585        assert!(validate_namespace("has\0null").is_err());
586        assert!(validate_namespace("has\x07bell").is_err());
587    }
588
589    #[test]
590    fn test_namespace_rejects_dot_segments_redteam_240() {
591        // Red-team #240 — `..` and `.` segments must be rejected to
592        // prevent hierarchy confusion / visibility prefix-match games.
593        assert!(validate_namespace("acme/../other").is_err());
594        assert!(validate_namespace("acme/./other").is_err());
595        assert!(validate_namespace("..").is_err());
596        assert!(validate_namespace(".").is_err());
597        assert!(validate_namespace("acme/team/..").is_err());
598        assert!(validate_namespace("../acme").is_err());
599        // But two dots inside a name is fine — only standalone segments are blocked.
600        assert!(validate_namespace("acme/team..special").is_ok());
601        assert!(validate_namespace("acme/.dotfile").is_ok());
602    }
603
604    #[test]
605    fn test_namespace_length_bumped_to_512() {
606        // Historical 128-char budget is a floor; 512 is the new max for paths.
607        assert!(validate_namespace(&"x".repeat(128)).is_ok());
608        assert!(validate_namespace(&"x".repeat(512)).is_ok());
609        assert!(validate_namespace(&"x".repeat(513)).is_err());
610    }
611
612    // Task 1.4 — hierarchical paths ---------------------------------------
613
614    #[test]
615    fn test_hierarchical_paths_accepted() {
616        assert!(validate_namespace("alphaone/engineering").is_ok());
617        assert!(validate_namespace("alphaone/engineering/platform").is_ok());
618        assert!(validate_namespace("a/b/c/d/e/f/g/h").is_ok(), "8 levels OK");
619    }
620
621    #[test]
622    fn test_hierarchical_depth_cap() {
623        // 9 levels exceeds MAX_NAMESPACE_DEPTH (8)
624        assert!(validate_namespace("a/b/c/d/e/f/g/h/i").is_err());
625    }
626
627    #[test]
628    fn test_hierarchical_rejects_leading_slash() {
629        assert!(validate_namespace("/alphaone/engineering").is_err());
630    }
631
632    #[test]
633    fn test_hierarchical_rejects_trailing_slash() {
634        assert!(validate_namespace("alphaone/engineering/").is_err());
635    }
636
637    #[test]
638    fn test_hierarchical_rejects_empty_segments() {
639        assert!(validate_namespace("alphaone//engineering").is_err());
640        assert!(validate_namespace("a///b").is_err());
641    }
642
643    #[test]
644    fn test_hierarchical_rejects_control_chars() {
645        assert!(validate_namespace("a/b\x07c").is_err());
646        assert!(validate_namespace("a/b\0c").is_err());
647    }
648
649    #[test]
650    fn test_normalize_namespace_strips_slashes() {
651        assert_eq!(
652            normalize_namespace("/alphaone/engineering/"),
653            "alphaone/engineering"
654        );
655        assert_eq!(normalize_namespace("///a///b///"), "a/b");
656    }
657
658    #[test]
659    fn test_normalize_namespace_lowercases() {
660        assert_eq!(
661            normalize_namespace("AlphaOne/Engineering"),
662            "alphaone/engineering"
663        );
664        assert_eq!(normalize_namespace("MYAPP"), "myapp");
665    }
666
667    #[test]
668    fn test_normalize_namespace_trims_whitespace() {
669        assert_eq!(normalize_namespace("  alphaone/eng  "), "alphaone/eng");
670    }
671
672    #[test]
673    fn test_normalize_then_validate_roundtrip() {
674        let raw = "/AlphaOne//Engineering/Platform/";
675        let norm = normalize_namespace(raw);
676        assert_eq!(norm, "alphaone/engineering/platform");
677        assert!(validate_namespace(&norm).is_ok());
678    }
679
680    #[test]
681    fn test_valid_source() {
682        assert!(validate_source("user").is_ok());
683        assert!(validate_source("claude").is_ok());
684        assert!(validate_source("hook").is_ok());
685        assert!(validate_source("api").is_ok());
686        assert!(validate_source("cli").is_ok());
687        assert!(validate_source("import").is_ok());
688        assert!(validate_source("").is_err());
689        assert!(validate_source("random").is_err());
690    }
691
692    #[test]
693    fn test_valid_agent_id() {
694        // Accepted NHI-hardened formats
695        assert!(validate_agent_id("alice").is_ok());
696        assert!(validate_agent_id("ai:claude-code@host-1:pid-123").is_ok());
697        assert!(validate_agent_id("host:dev-1:pid-9-deadbeef").is_ok());
698        assert!(validate_agent_id("anonymous:req-abcdef01").is_ok());
699        assert!(validate_agent_id("anonymous:pid-42-0123abcd").is_ok());
700        assert!(validate_agent_id("spiffe://example.org/ns/prod").is_ok());
701        assert!(validate_agent_id("a").is_ok());
702        assert!(validate_agent_id(&"a".repeat(128)).is_ok());
703    }
704
705    #[test]
706    fn test_invalid_agent_id() {
707        // Empty / oversized
708        assert!(validate_agent_id("").is_err());
709        assert!(validate_agent_id(&"a".repeat(129)).is_err());
710
711        // Whitespace
712        assert!(validate_agent_id("alice bob").is_err());
713        assert!(validate_agent_id("alice\tbob").is_err());
714        assert!(validate_agent_id(" alice").is_err());
715        assert!(validate_agent_id("alice ").is_err());
716
717        // Null byte / control chars
718        assert!(validate_agent_id("has\0null").is_err());
719        assert!(validate_agent_id("has\x07bell").is_err());
720        assert!(validate_agent_id("has\nnewline").is_err());
721
722        // Shell metacharacters
723        assert!(validate_agent_id("alice;rm").is_err());
724        assert!(validate_agent_id("alice|cat").is_err());
725        assert!(validate_agent_id("alice&bg").is_err());
726        assert!(validate_agent_id("alice$VAR").is_err());
727        assert!(validate_agent_id("alice`cmd`").is_err());
728        assert!(validate_agent_id("alice\\bs").is_err());
729        assert!(validate_agent_id("alice?q").is_err());
730        assert!(validate_agent_id("alice*glob").is_err());
731    }
732
733    #[test]
734    fn test_validate_governance_policy_default_ok() {
735        let p = crate::models::GovernancePolicy::default();
736        assert!(validate_governance_policy(&p).is_ok());
737    }
738
739    #[test]
740    fn test_validate_governance_consensus_zero_rejected() {
741        use crate::models::{ApproverType, GovernanceLevel, GovernancePolicy};
742        let p = GovernancePolicy {
743            write: GovernanceLevel::Any,
744            promote: GovernanceLevel::Any,
745            delete: GovernanceLevel::Owner,
746            approver: ApproverType::Consensus(0),
747            inherit: true,
748        };
749        assert!(validate_governance_policy(&p).is_err());
750    }
751
752    #[test]
753    fn test_validate_governance_agent_id_checked() {
754        use crate::models::{ApproverType, GovernanceLevel, GovernancePolicy};
755        let bad = GovernancePolicy {
756            write: GovernanceLevel::Any,
757            promote: GovernanceLevel::Any,
758            delete: GovernanceLevel::Owner,
759            approver: ApproverType::Agent("has space".to_string()),
760            inherit: true,
761        };
762        assert!(validate_governance_policy(&bad).is_err());
763
764        let good = GovernancePolicy {
765            write: GovernanceLevel::Any,
766            promote: GovernanceLevel::Any,
767            delete: GovernanceLevel::Owner,
768            approver: ApproverType::Agent("alice".to_string()),
769            inherit: true,
770        };
771        assert!(validate_governance_policy(&good).is_ok());
772    }
773
774    #[test]
775    fn test_valid_scope() {
776        for s in ["private", "team", "unit", "org", "collective"] {
777            assert!(validate_scope(s).is_ok(), "{s} must be valid");
778        }
779    }
780
781    #[test]
782    fn test_invalid_scope() {
783        assert!(validate_scope("").is_err());
784        assert!(validate_scope("public").is_err());
785        assert!(validate_scope("PRIVATE").is_err());
786        assert!(validate_scope("personal").is_err());
787    }
788
789    #[test]
790    fn test_valid_agent_type_curated_values() {
791        assert!(validate_agent_type("ai:claude-opus-4.6").is_ok());
792        assert!(validate_agent_type("ai:codex-5.4").is_ok());
793        assert!(validate_agent_type("ai:grok-4.2").is_ok());
794        assert!(validate_agent_type("human").is_ok());
795        assert!(validate_agent_type("system").is_ok());
796    }
797
798    #[test]
799    fn test_valid_agent_type_open_ai_namespace_redteam_235() {
800        // Red-team #235 — any `ai:<name>` form must be accepted so operators
801        // can register future / custom AI agents without code changes.
802        assert!(validate_agent_type("ai:claude-opus-4.8").is_ok());
803        assert!(validate_agent_type("ai:gpt-5").is_ok());
804        assert!(validate_agent_type("ai:gemini-2.5").is_ok());
805        assert!(validate_agent_type("ai:custom_internal-model.v2").is_ok());
806        assert!(validate_agent_type("ai:claude").is_ok());
807    }
808
809    #[test]
810    fn test_invalid_agent_type() {
811        // Empty.
812        assert!(validate_agent_type("").is_err());
813        // Wrong prefix case (only lowercase `ai:` matches the open form).
814        assert!(validate_agent_type("AI:CLAUDE").is_err());
815        // Plain word without `ai:` and not in curated set.
816        assert!(validate_agent_type("bogus").is_err());
817        // `ai:` with no name part.
818        assert!(validate_agent_type("ai:").is_err());
819        // Invalid char inside the ai: name part.
820        assert!(validate_agent_type("ai:foo bar").is_err());
821        assert!(validate_agent_type("ai:foo;rm").is_err());
822        // Too long.
823        assert!(validate_agent_type(&format!("ai:{}", "x".repeat(80))).is_err());
824    }
825
826    #[test]
827    fn test_agents_namespace_accepted() {
828        assert!(validate_namespace("_agents").is_ok());
829    }
830
831    #[test]
832    fn test_valid_tags() {
833        assert!(validate_tags(&["dns".to_string(), "bind9".to_string()]).is_ok());
834        assert!(validate_tags(&[]).is_ok());
835        assert!(validate_tags(&[String::new()]).is_err());
836        let too_many: Vec<String> = (0..51).map(|i| format!("tag{i}")).collect();
837        assert!(validate_tags(&too_many).is_err());
838    }
839
840    #[test]
841    fn test_valid_relation() {
842        assert!(validate_relation("related_to").is_ok());
843        assert!(validate_relation("supersedes").is_ok());
844        assert!(validate_relation("").is_err());
845        assert!(validate_relation("invented_relation").is_err());
846    }
847
848    #[test]
849    fn test_valid_confidence() {
850        assert!(validate_confidence(0.0).is_ok());
851        assert!(validate_confidence(0.5).is_ok());
852        assert!(validate_confidence(1.0).is_ok());
853        assert!(validate_confidence(-0.1).is_err());
854        assert!(validate_confidence(1.1).is_err());
855        assert!(validate_confidence(f64::NAN).is_err());
856        assert!(validate_confidence(f64::INFINITY).is_err());
857    }
858
859    #[test]
860    fn test_valid_ttl() {
861        assert!(validate_ttl_secs(None).is_ok());
862        assert!(validate_ttl_secs(Some(3600)).is_ok());
863        assert!(validate_ttl_secs(Some(0)).is_err());
864        assert!(validate_ttl_secs(Some(-1)).is_err());
865        assert!(validate_ttl_secs(Some(366 * 24 * 3600)).is_err());
866    }
867
868    #[test]
869    fn test_self_link_rejected() {
870        assert!(validate_link("abc", "abc", "related_to").is_err());
871        assert!(validate_link("abc", "def", "related_to").is_ok());
872    }
873
874    #[test]
875    fn test_valid_metadata() {
876        assert!(validate_metadata(&serde_json::json!({})).is_ok());
877        assert!(validate_metadata(&serde_json::json!({"key": "value"})).is_ok());
878        assert!(validate_metadata(&serde_json::json!({"nested": {"a": 1}})).is_ok());
879        // Non-object types rejected
880        assert!(validate_metadata(&serde_json::json!("string")).is_err());
881        assert!(validate_metadata(&serde_json::json!(42)).is_err());
882        assert!(validate_metadata(&serde_json::json!([1, 2])).is_err());
883        assert!(validate_metadata(&serde_json::json!(null)).is_err());
884    }
885
886    #[test]
887    fn test_clean_string_rejects_control_chars() {
888        assert!(is_clean_string("normal text"));
889        assert!(is_clean_string("with\nnewline"));
890        assert!(is_clean_string("with\ttab"));
891        assert!(!is_clean_string("has\0null"));
892        assert!(!is_clean_string("has\x07bell"));
893        assert!(!is_clean_string("has\x1b[31mANSI\x1b[0m"));
894        assert!(!is_clean_string("has\x08backspace"));
895    }
896
897    #[test]
898    fn test_oversized_metadata_rejected() {
899        let big_value = "x".repeat(MAX_METADATA_SIZE);
900        let meta = serde_json::json!({"big": big_value});
901        assert!(validate_metadata(&meta).is_err());
902    }
903
904    #[test]
905    fn test_deeply_nested_metadata_rejected() {
906        // Build a 33-level deep object (exceeds MAX_METADATA_DEPTH of 32)
907        let mut val = serde_json::json!("leaf");
908        for _ in 0..33 {
909            val = serde_json::json!({"nested": val});
910        }
911        assert!(validate_metadata(&val).is_err());
912
913        // 32 levels should be fine
914        let mut val = serde_json::json!("leaf");
915        for _ in 0..31 {
916            val = serde_json::json!({"nested": val});
917        }
918        assert!(validate_metadata(&val).is_ok());
919    }
920
921    // -----------------------------------------------------------------
922    // W11/S11b: proptest properties — boundary + adversarial fuzz
923    // -----------------------------------------------------------------
924    use proptest::prelude::*;
925
926    proptest! {
927        // Title rejection happens iff trimmed string is empty (whitespace-only or "").
928        #[test]
929        fn prop_validate_title_rejects_empty_strings_only_when_actually_empty(
930            ws in r"[ \t\n]{0,16}",
931            tail in r"[A-Za-z0-9 _\-.,!?]{0,80}",
932        ) {
933            // Whitespace-only must reject; otherwise title is valid (within char bounds).
934            let title = format!("{ws}{tail}{ws}");
935            let trimmed_empty = title.trim().is_empty();
936            let result = validate_title(&title);
937            if trimmed_empty {
938                prop_assert!(result.is_err(), "whitespace-only title must reject: {:?}", title);
939            } else if title.chars().count() <= 512 {
940                prop_assert!(result.is_ok(), "non-empty trimmed title must accept: {:?}", title);
941            }
942        }
943    }
944
945    proptest! {
946        // Namespaces with control chars / spaces / backslashes / null bytes must reject.
947        #[test]
948        fn prop_validate_namespace_rejects_invalid_chars(
949            base in r"[a-z][a-z0-9_-]{0,20}",
950            // Pick one of the always-rejected chars and splice it in.
951            bad in prop::sample::select(&[' ', '\\', '\0', '\x07', '\x1b', '\x08']),
952        ) {
953            let ns = format!("{base}{bad}suffix");
954            prop_assert!(
955                validate_namespace(&ns).is_err(),
956                "namespace with bad char {:?} must reject: {:?}", bad, ns
957            );
958        }
959    }
960
961    proptest! {
962        // a/b/c style paths up to 8 levels with safe chars should validate.
963        #[test]
964        fn prop_validate_namespace_accepts_valid_hierarchy(
965            segs in prop::collection::vec(r"[a-z][a-z0-9_-]{0,20}", 1..=8),
966        ) {
967            // Filter out `.` / `..` segments which the validator rejects.
968            let safe: Vec<String> = segs
969                .into_iter()
970                .filter(|s| s != "." && s != "..")
971                .collect();
972            if safe.is_empty() {
973                return Ok(());
974            }
975            let ns = safe.join("/");
976            prop_assert!(
977                validate_namespace(&ns).is_ok(),
978                "valid hierarchy must accept: {:?}", ns
979            );
980        }
981    }
982
983    proptest! {
984        // Priority must accept 1..=10, reject anything outside that band.
985        #[test]
986        fn prop_validate_priority_rejects_outside_range(p in -1000i32..1000i32) {
987            let result = validate_priority(p);
988            if (1..=10).contains(&p) {
989                prop_assert!(result.is_ok(), "priority {p} (in 1..=10) must accept");
990            } else {
991                prop_assert!(result.is_err(), "priority {p} (outside 1..=10) must reject");
992            }
993        }
994    }
995
996    proptest! {
997        // Confidence rejects NaN / infinity / out-of-band values, accepts [0.0, 1.0].
998        // Documented behavior: rejects (does not clamp).
999        #[test]
1000        fn prop_validate_confidence_clamps_or_rejects(c in -10.0f64..10.0f64) {
1001            let result = validate_confidence(c);
1002            if (0.0..=1.0).contains(&c) {
1003                prop_assert!(result.is_ok(), "confidence {c} in [0,1] must accept");
1004            } else {
1005                prop_assert!(result.is_err(), "confidence {c} outside [0,1] must reject");
1006            }
1007        }
1008
1009        #[test]
1010        fn prop_validate_confidence_nan_inf_always_rejected(_u in Just(())) {
1011            prop_assert!(validate_confidence(f64::NAN).is_err());
1012            prop_assert!(validate_confidence(f64::INFINITY).is_err());
1013            prop_assert!(validate_confidence(f64::NEG_INFINITY).is_err());
1014        }
1015    }
1016
1017    proptest! {
1018        // Self-link must reject for every relation type, regardless of id payload.
1019        #[test]
1020        fn prop_validate_link_rejects_self_link_for_every_relation(
1021            id in r"[a-z][a-zA-Z0-9_-]{0,32}",
1022            rel_idx in 0usize..4,
1023        ) {
1024            let relations = ["related_to", "supersedes", "contradicts", "derived_from"];
1025            let rel = relations[rel_idx];
1026            let result = validate_link(&id, &id, rel);
1027            prop_assert!(result.is_err(), "self-link must reject for relation {rel}, id {:?}", id);
1028        }
1029    }
1030
1031    // -----------------------------------------------------------------
1032    // Unicode-boundary unit tests (W11/S11b — visible-but-tricky chars)
1033    // -----------------------------------------------------------------
1034
1035    #[test]
1036    fn test_title_accepts_zero_width_joiner() {
1037        // ZWJ (U+200D) is not a control char; titles should accept it.
1038        assert!(validate_title("emoji\u{200D}joiner").is_ok());
1039    }
1040
1041    #[test]
1042    fn test_title_accepts_rtl_marks() {
1043        // Right-to-left mark (U+200F) and LRM (U+200E) are allowed (non-control).
1044        assert!(validate_title("hello\u{200F}world").is_ok());
1045        assert!(validate_title("hello\u{200E}world").is_ok());
1046    }
1047
1048    #[test]
1049    fn test_title_accepts_combining_chars() {
1050        // Combining acute accent on `e` (U+0065 U+0301) — distinct chars,
1051        // is_clean_string allows them; char count differs from byte count.
1052        assert!(validate_title("cafe\u{0301}").is_ok());
1053    }
1054
1055    #[test]
1056    fn test_title_rejects_unicode_bom_as_control() {
1057        // U+FEFF (BOM/zero-width no-break space) — Rust's `is_control` on BOM
1058        // returns false (it's a format char, not control). Document actual
1059        // behavior: titles containing BOM are accepted.
1060        assert!(validate_title("foo\u{FEFF}bar").is_ok());
1061    }
1062}