Skip to main content

acdp_validation/
lib.rs

1//! Runtime validation against the ACDP schemas.
2//!
3//! The JSON schemas are the single source of truth for wire-shape constraints,
4//! but JSON Schema cannot express every invariant in the ACDP RFCs. This
5//! module implements the runtime checks the schema delegates to producers
6//! and registries:
7//!
8//! - String length / array uniqueness / array size limits
9//! - `data_period.start <= end`
10//! - `DataRef` oneOf (location XOR embedded), URI credential rejection,
11//!   structured-locator scheme pattern, embedded size cap, embedded
12//!   `content` typing per encoding
13//! - `metadata` runtime depth / JCS-size / property-count caps
14//! - `agent_id` DID pattern + `did:web` enforcement (v0.1.0)
15//! - Signature value length per algorithm
16//! - Embedded `content_hash` computation and verification
17//! - Identifier pattern validation (`ctx_id`, `lineage_id`, `content_hash`)
18//!
19//! Each function is independently usable; [`validate_publish_request`] and
20//! [`validate_body`] aggregate everything for end-to-end validation.
21
22use acdp_crypto::try_canonicalize_value;
23use acdp_primitives::error::AcdpError;
24use acdp_types::body::Body;
25use acdp_types::data_ref::{DataRef, EmbeddedContent, EmbeddedEncoding, Location};
26use acdp_types::primitives::{
27    AgentDid, ContentHash, ContextType, CtxId, LineageId, Status, Visibility,
28};
29use acdp_types::publish::PublishRequest;
30use base64::{engine::general_purpose::STANDARD, Engine};
31use sha2::{Digest, Sha256};
32
33// ── Constants from the schemas ────────────────────────────────────────────────
34
35const MAX_TITLE_LEN: usize = 500;
36const MAX_DESCRIPTION_LEN: usize = 5000;
37const MAX_SUMMARY_LEN: usize = 1000;
38const MAX_DOMAIN_LEN: usize = 200;
39const MAX_DATA_REF_DESCRIPTION_LEN: usize = 1000;
40const MAX_TAG_LEN: usize = 100;
41const MAX_CONTRIBUTORS: usize = 100;
42const MAX_TAGS: usize = 200;
43const MAX_DERIVED_FROM: usize = 1000;
44const MAX_AUDIENCE: usize = 1000;
45const MAX_METADATA_PROPERTIES: usize = 100;
46const MAX_METADATA_DEPTH: usize = 8;
47const MAX_METADATA_JCS_BYTES: usize = 65_536;
48const MAX_URI_LEN: usize = 4096;
49const MAX_EMBEDDED_BYTES: usize = 65_536;
50const ED25519_SIG_B64_LEN: usize = 88;
51const ECDSA_P256_SIG_B64_LEN: usize = 88;
52
53// ── Capabilities ─────────────────────────────────────────────────────────────
54
55/// Validate a [`acdp_types::CapabilitiesDocument`] against the
56/// runtime constraints listed in RFC-ACDP-0007 §3.
57///
58/// The JSON schema enforces *types*; this validator enforces the
59/// constraints the schema cannot express:
60///
61/// 1. `acdp_version` matches `^\d+\.\d+\.\d+$`.
62/// 2. `registry_did` is a v0.1.0 `did:web` DID.
63/// 3. `supported_signature_algorithms` MUST contain `"ed25519"`.
64/// 4. `supported_did_methods` MUST contain `"did:web"`.
65/// 5. `profiles` MUST contain `"acdp-registry-core"`.
66/// 6. `limits.max_embedded_bytes` MUST equal exactly 65536.
67/// 7. If `supports_idempotency_key` is `true`,
68///    `limits.idempotency_key_ttl_seconds` MUST be present and in
69///    `86400..=604800`.
70/// 8. `limits.max_payload_bytes` MUST be at least 1024 bytes.
71///
72/// Wired into `acdp::client::RegistryClient::capabilities` and
73/// `acdp::client::CrossRegistryResolver::resolve`.
74pub fn validate_capabilities(caps: &acdp_types::CapabilitiesDocument) -> Result<(), AcdpError> {
75    validate_semver_pattern("acdp_version", &caps.acdp_version)?;
76
77    AgentDid::parse_web(caps.registry_did.as_str()).map_err(|e| {
78        AcdpError::SchemaViolation(format!(
79            "capabilities.registry_did must be did:web for v0.1.0: {e}"
80        ))
81    })?;
82
83    if !caps
84        .supported_signature_algorithms
85        .iter()
86        .any(|a| a == "ed25519")
87    {
88        return Err(AcdpError::SchemaViolation(
89            "capabilities.supported_signature_algorithms MUST contain 'ed25519' \
90             (RFC-ACDP-0001 §5.10)"
91                .into(),
92        ));
93    }
94
95    if !caps.supported_did_methods.iter().any(|m| m == "did:web") {
96        return Err(AcdpError::SchemaViolation(
97            "capabilities.supported_did_methods MUST contain 'did:web' \
98             (RFC-ACDP-0001 §5.4)"
99                .into(),
100        ));
101    }
102
103    if !caps.profiles.iter().any(|p| p == "acdp-registry-core") {
104        return Err(AcdpError::SchemaViolation(
105            "capabilities.profiles MUST contain 'acdp-registry-core' \
106             (RFC-ACDP-0001 §9.1)"
107                .into(),
108        ));
109    }
110
111    if caps.limits.max_embedded_bytes != 65_536 {
112        return Err(AcdpError::SchemaViolation(format!(
113            "capabilities.limits.max_embedded_bytes must be 65536 (fixed by \
114             RFC-ACDP-0007 §3.1), got {}",
115            caps.limits.max_embedded_bytes
116        )));
117    }
118
119    if caps.limits.max_payload_bytes < 1024 {
120        return Err(AcdpError::SchemaViolation(format!(
121            "capabilities.limits.max_payload_bytes must be ≥ 1024, got {}",
122            caps.limits.max_payload_bytes
123        )));
124    }
125
126    if caps.supports_idempotency_key {
127        let ttl = caps.limits.idempotency_key_ttl_seconds.ok_or_else(|| {
128            AcdpError::SchemaViolation(
129                "limits.idempotency_key_ttl_seconds is required when \
130                 supports_idempotency_key is true (RFC-ACDP-0007 §3.2)"
131                    .into(),
132            )
133        })?;
134        if !(86_400..=604_800).contains(&ttl) {
135            return Err(AcdpError::SchemaViolation(format!(
136                "limits.idempotency_key_ttl_seconds must be in 86400..=604800, got {ttl}"
137            )));
138        }
139    }
140
141    Ok(())
142}
143
144// ── Top-level entry points ───────────────────────────────────────────────────
145
146/// Validate a complete [`PublishRequest`] against every schema constraint
147/// and runtime invariant.
148pub fn validate_publish_request(req: &PublishRequest) -> Result<(), AcdpError> {
149    validate_title(&req.title)?;
150    validate_optional_string(
151        req.description.as_deref(),
152        "description",
153        MAX_DESCRIPTION_LEN,
154    )?;
155    validate_optional_string(req.summary.as_deref(), "summary", MAX_SUMMARY_LEN)?;
156    validate_optional_string(req.domain.as_deref(), "domain", MAX_DOMAIN_LEN)?;
157
158    validate_agent_did(&req.agent_id)?;
159    for c in &req.contributors {
160        validate_loose_did(c)?;
161    }
162    validate_unique_array("contributors", &req.contributors, MAX_CONTRIBUTORS)?;
163    validate_unique_array("derived_from", &req.derived_from, MAX_DERIVED_FROM)?;
164
165    if let Some(tags) = &req.tags {
166        validate_tags(tags)?;
167    }
168    if let Some(audience) = &req.audience {
169        validate_unique_array("audience", audience, MAX_AUDIENCE)?;
170        for did in audience {
171            validate_loose_did(did)?;
172        }
173    }
174
175    validate_visibility_audience(&req.visibility, req.audience.as_deref())?;
176
177    if let Some(dp) = &req.data_period {
178        if dp.start > dp.end {
179            return Err(AcdpError::SchemaViolation(
180                "data_period.start must not be after data_period.end".into(),
181            ));
182        }
183    }
184
185    if let Some(ct) = &req.context_type.namespaced_form() {
186        validate_namespaced_context_type(ct)?;
187    }
188
189    if let Some(meta) = &req.metadata {
190        validate_metadata(meta)?;
191    }
192
193    for dr in &req.data_refs {
194        validate_data_ref(dr)?;
195    }
196
197    validate_signature_length(&req.signature.algorithm, &req.signature.value)?;
198    validate_did_key_key_id_form(&req.signature.key_id)?;
199    ContentHash::parse(req.content_hash.as_str())?;
200
201    // Identifier patterns on every supplied ctx_id
202    if let Some(prev) = &req.supersedes {
203        CtxId::parse(prev.as_str())?;
204    }
205    for ancestor in &req.derived_from {
206        CtxId::parse(ancestor.as_str())?;
207    }
208    if let Some(lineage) = &req.lineage_id {
209        acdp_types::primitives::LineageId::parse(lineage.as_str())?;
210    }
211
212    // acdp_version pattern (semver `^\d+\.\d+\.\d+$`)
213    if let Some(v) = &req.acdp_version {
214        validate_semver_pattern("acdp_version", v)?;
215    }
216
217    // Version coherence (also enforced by the builder)
218    match (&req.supersedes, req.version) {
219        (None, 1) => {}
220        (None, v) => {
221            return Err(AcdpError::SchemaViolation(format!(
222                "first-version publish requires version=1, got {v}"
223            )));
224        }
225        (Some(_), v) if v >= 2 => {}
226        (Some(_), v) => {
227            return Err(AcdpError::SchemaViolation(format!(
228                "supersession publish requires version >= 2, got {v}"
229            )));
230        }
231    }
232
233    // RFC-ACDP-0003 §2.2 / `acdp-publish-request.schema.json` allOf:
234    // v1 publications MUST NOT include lineage_id (the value would
235    // necessarily be wrong because the formula depends on the
236    // registry-assigned ctx_id). The builder enforces this too, but
237    // applying it here lets the validator stand alone for callers that
238    // do not go through `RequestBuilder` (e.g. the conformance harness,
239    // server-side validators).
240    if req.version == 1 && req.lineage_id.is_some() {
241        return Err(AcdpError::SchemaViolation(
242            "lineage_id MUST NOT be set on v1 publish requests (RFC-ACDP-0003 §2.2)".into(),
243        ));
244    }
245
246    Ok(())
247}
248
249/// Validate a stored [`Body`] (retrieval-side check).
250pub fn validate_body(body: &Body) -> Result<(), AcdpError> {
251    validate_body_inner(body, /* check_embedded_hashes = */ true)
252}
253
254/// Same as [`validate_body`] but skips the embedded-`content_hash` recomputation.
255///
256/// Used by `acdp::client::VerifiedContext::fetch_report` so per-`DataRef`
257/// embedded-hash outcomes can be recorded individually rather than
258/// short-circuiting the whole verification. Callers that want the
259/// embedded-hash check MUST run [`verify_embedded_hash`] themselves —
260/// `fetch_report`'s recording loop is one such caller.
261///
262/// Production code that doesn't need partial-failure reporting should
263/// prefer [`validate_body`].
264pub fn validate_body_structural(body: &Body) -> Result<(), AcdpError> {
265    validate_body_inner(body, /* check_embedded_hashes = */ false)
266}
267
268fn validate_body_inner(body: &Body, check_embedded_hashes: bool) -> Result<(), AcdpError> {
269    validate_title(&body.title)?;
270    validate_optional_string(
271        body.description.as_deref(),
272        "description",
273        MAX_DESCRIPTION_LEN,
274    )?;
275    validate_optional_string(body.summary.as_deref(), "summary", MAX_SUMMARY_LEN)?;
276    validate_optional_string(body.domain.as_deref(), "domain", MAX_DOMAIN_LEN)?;
277
278    validate_agent_did(&body.agent_id)?;
279    for c in &body.contributors {
280        validate_loose_did(c)?;
281    }
282    validate_unique_array("contributors", &body.contributors, MAX_CONTRIBUTORS)?;
283    validate_unique_array("derived_from", &body.derived_from, MAX_DERIVED_FROM)?;
284
285    if let Some(tags) = &body.tags {
286        validate_tags(tags)?;
287    }
288    if let Some(audience) = &body.audience {
289        validate_unique_array("audience", audience, MAX_AUDIENCE)?;
290        for did in audience {
291            validate_loose_did(did)?;
292        }
293    }
294    validate_visibility_audience(&body.visibility, body.audience.as_deref())?;
295
296    if let Some(dp) = &body.data_period {
297        if dp.start > dp.end {
298            return Err(AcdpError::SchemaViolation(
299                "data_period.start must not be after data_period.end".into(),
300            ));
301        }
302    }
303
304    if let Some(meta) = &body.metadata {
305        validate_metadata(meta)?;
306    }
307
308    // Forward-compat `extensions` (`#[serde(flatten)]`) are producer-
309    // controlled and flow into JCS + content_hash; cap them like metadata
310    // so they cannot bypass the §3.3 size/count/depth limits (P1-3).
311    validate_extensions(&body.extensions)?;
312
313    for dr in &body.data_refs {
314        if check_embedded_hashes {
315            validate_data_ref(dr)?;
316        } else {
317            validate_data_ref_structural(dr)?;
318        }
319    }
320
321    validate_signature_length(&body.signature.algorithm, &body.signature.value)?;
322    validate_did_key_key_id_form(&body.signature.key_id)?;
323    validate_identifiers(&body.ctx_id, &body.lineage_id, &body.content_hash)?;
324
325    // Every entry in supersedes / derived_from MUST be a valid ctx_id.
326    if let Some(prev) = &body.supersedes {
327        CtxId::parse(prev.as_str())?;
328    }
329    for ancestor in &body.derived_from {
330        CtxId::parse(ancestor.as_str())?;
331    }
332
333    if let Some(v) = &body.acdp_version {
334        validate_semver_pattern("acdp_version", v)?;
335    }
336
337    let _ = &body.created_at; // schema-derived; serde already enforces RFC 3339
338    validate_origin_registry(&body.origin_registry)?;
339
340    // Avoid unused-import warnings on Status / Visibility
341    let _ = std::any::type_name::<Status>();
342    let _: &Visibility = &body.visibility;
343
344    Ok(())
345}
346
347/// Validate an identifier triple — convenient for retrieval-side use.
348pub fn validate_identifiers(
349    ctx_id: &CtxId,
350    lineage_id: &LineageId,
351    content_hash: &ContentHash,
352) -> Result<(), AcdpError> {
353    CtxId::parse(ctx_id.as_str())?;
354    LineageId::parse(lineage_id.as_str())?;
355    ContentHash::parse(content_hash.as_str())?;
356    Ok(())
357}
358
359// ── DataRef ──────────────────────────────────────────────────────────────────
360
361/// Validate a single [`DataRef`] against `acdp-data-ref.schema.json` and the
362/// runtime invariants the schema delegates.
363pub fn validate_data_ref(dr: &DataRef) -> Result<(), AcdpError> {
364    validate_data_ref_structural(dr)?;
365    // BUG-02: verify the declared content_hash against the decoded bytes
366    // (RFC-ACDP-0002 §6.6 #8). A producer-supplied wrong hash is a
367    // signed commitment to a misleading integrity claim, so we catch
368    // it at validate time, not just inside `PublishValidator`.
369    if dr.embedded.is_some() {
370        verify_embedded_hash(dr)?;
371    }
372    Ok(())
373}
374
375/// Same as [`validate_data_ref`] but skips the embedded-`content_hash`
376/// recomputation. Callers that want to report per-`DataRef` hash failures
377/// (e.g. `acdp::client::VerifiedContext::fetch_report`) run the
378/// structural checks via this helper, then call [`verify_embedded_hash`]
379/// themselves and record the outcome instead of short-circuiting.
380pub fn validate_data_ref_structural(dr: &DataRef) -> Result<(), AcdpError> {
381    // oneOf: exactly one of location / embedded
382    match (&dr.location, &dr.embedded) {
383        (None, None) => {
384            return Err(AcdpError::SchemaViolation(
385                "DataRef requires exactly one of 'location' or 'embedded' (got neither)".into(),
386            ));
387        }
388        (Some(_), Some(_)) => {
389            return Err(AcdpError::SchemaViolation(
390                "DataRef requires exactly one of 'location' or 'embedded' (got both)".into(),
391            ));
392        }
393        _ => {}
394    }
395
396    if let Some(desc) = &dr.description {
397        if desc.len() > MAX_DATA_REF_DESCRIPTION_LEN {
398            return Err(AcdpError::SchemaViolation(format!(
399                "DataRef.description {} chars exceeds {} limit",
400                desc.len(),
401                MAX_DATA_REF_DESCRIPTION_LEN
402            )));
403        }
404    }
405
406    if let Some(loc) = &dr.location {
407        validate_location(loc)?;
408    }
409    if let Some(emb) = &dr.embedded {
410        validate_embedded(emb)?;
411    }
412
413    Ok(())
414}
415
416fn validate_location(loc: &Location) -> Result<(), AcdpError> {
417    match loc {
418        Location::Uri(uri) => validate_uri_location(uri),
419        Location::Structured(map) => validate_structured_locator(map),
420    }
421}
422
423fn validate_uri_location(uri: &str) -> Result<(), AcdpError> {
424    if uri.len() < 3 || uri.len() > MAX_URI_LEN {
425        return Err(AcdpError::SchemaViolation(format!(
426            "DataRef.location URI length {} not in 3..={}",
427            uri.len(),
428            MAX_URI_LEN
429        )));
430    }
431    // Scheme: ^[a-z][a-z0-9+.-]*:
432    let (scheme, rest) = uri
433        .split_once(':')
434        .ok_or_else(|| AcdpError::SchemaViolation(format!("URI missing scheme: {uri}")))?;
435    if scheme.is_empty()
436        || !scheme
437            .chars()
438            .next()
439            .is_some_and(|c| c.is_ascii_lowercase())
440        || !scheme
441            .chars()
442            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, '+' | '.' | '-'))
443    {
444        return Err(AcdpError::SchemaViolation(format!(
445            "URI scheme '{scheme}' invalid; must match [a-z][a-z0-9+.-]*"
446        )));
447    }
448    // userinfo rejection: ^[a-z][a-z0-9+.-]*://[^/?#@]+@
449    if let Some(after_slashes) = rest.strip_prefix("//") {
450        if let Some(authority_end) = after_slashes.find(['/', '?', '#']) {
451            let authority = &after_slashes[..authority_end];
452            if authority.contains('@') {
453                return Err(AcdpError::SchemaViolation(format!(
454                    "URI MUST NOT contain credentials in userinfo: {uri}"
455                )));
456            }
457        } else if after_slashes.contains('@') {
458            return Err(AcdpError::SchemaViolation(format!(
459                "URI MUST NOT contain credentials in userinfo: {uri}"
460            )));
461        }
462    }
463    Ok(())
464}
465
466fn validate_structured_locator(
467    map: &serde_json::Map<String, serde_json::Value>,
468) -> Result<(), AcdpError> {
469    let scheme = map.get("scheme").and_then(|v| v.as_str()).ok_or_else(|| {
470        AcdpError::SchemaViolation("structured locator missing required 'scheme'".into())
471    })?;
472    if !is_dotted_namespace_scheme(scheme) {
473        return Err(AcdpError::SchemaViolation(format!(
474            "structured locator scheme '{scheme}' must match ^[a-z][a-z0-9-]*(\\.[a-z][a-z0-9-]*)+$"
475        )));
476    }
477    Ok(())
478}
479
480fn is_dotted_namespace_scheme(s: &str) -> bool {
481    let parts: Vec<&str> = s.split('.').collect();
482    if parts.len() < 2 {
483        return false;
484    }
485    parts.iter().all(|part| {
486        !part.is_empty()
487            && part.chars().next().is_some_and(|c| c.is_ascii_lowercase())
488            && part
489                .chars()
490                .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
491    })
492}
493
494fn validate_embedded(emb: &EmbeddedContent) -> Result<(), AcdpError> {
495    // utf8 / base64: content MUST be a JSON string
496    match emb.encoding {
497        EmbeddedEncoding::Utf8 | EmbeddedEncoding::Base64 => {
498            if !emb.content.is_string() {
499                return Err(AcdpError::SchemaViolation(format!(
500                    "embedded {:?} content MUST be a JSON string",
501                    emb.encoding
502                )));
503            }
504        }
505        EmbeddedEncoding::Json => {}
506    }
507    // Decoded size cap
508    let decoded = embedded_decoded_bytes(emb)?;
509    if decoded.len() > MAX_EMBEDDED_BYTES {
510        return Err(AcdpError::EmbeddedTooLarge(format!(
511            "embedded decoded size {} bytes exceeds {} limit",
512            decoded.len(),
513            MAX_EMBEDDED_BYTES
514        )));
515    }
516    Ok(())
517}
518
519/// Decode an [`EmbeddedContent`] to its canonical byte form per
520/// `acdp-data-ref.schema.json` `content_hash` semantics:
521/// - `json`   → JCS-canonicalized bytes
522/// - `utf8`   → raw UTF-8 bytes of the string
523/// - `base64` → base64-decoded bytes of the string
524pub fn embedded_decoded_bytes(emb: &EmbeddedContent) -> Result<Vec<u8>, AcdpError> {
525    Ok(match emb.encoding {
526        EmbeddedEncoding::Json => try_canonicalize_value(&emb.content)?,
527        EmbeddedEncoding::Utf8 => {
528            let s = emb.content.as_str().ok_or_else(|| {
529                AcdpError::SchemaViolation("utf8 embedded content must be a JSON string".into())
530            })?;
531            s.as_bytes().to_vec()
532        }
533        EmbeddedEncoding::Base64 => {
534            let s = emb.content.as_str().ok_or_else(|| {
535                AcdpError::SchemaViolation("base64 embedded content must be a JSON string".into())
536            })?;
537            STANDARD
538                .decode(s)
539                .map_err(|e| AcdpError::SchemaViolation(format!("base64 decode failed: {e}")))?
540        }
541    })
542}
543
544/// Compute the SHA-256 [`ContentHash`] of decoded embedded content.
545pub fn compute_embedded_hash(emb: &EmbeddedContent) -> Result<ContentHash, AcdpError> {
546    let bytes = embedded_decoded_bytes(emb)?;
547    let digest = Sha256::digest(&bytes);
548    Ok(ContentHash(format!("sha256:{}", hex::encode(digest))))
549}
550
551/// Verify a [`DataRef`]'s declared `content_hash` against its embedded payload.
552/// Does nothing if the ref has no `content_hash` or no `embedded`.
553///
554/// BUG-02: a mismatch is a *data-reference-level* integrity failure
555/// ([`AcdpError::DataRefHashMismatch`], wire code `data_ref_hash_mismatch`)
556/// — the embedded bytes diverged from the producer-declared hash, but the
557/// body's own `content_hash` / signature are unaffected. It is NOT the
558/// body-level [`AcdpError::HashMismatch`] (RFC-ACDP-0007 §5, data-ref-007).
559pub fn verify_embedded_hash(dr: &DataRef) -> Result<(), AcdpError> {
560    let (Some(emb), Some(stored)) = (&dr.embedded, &dr.content_hash) else {
561        return Ok(());
562    };
563    let recomputed = compute_embedded_hash(emb)?;
564    if &recomputed != stored {
565        return Err(AcdpError::DataRefHashMismatch(format!(
566            "embedded content_hash mismatch: declared {}, computed {}",
567            stored.as_str(),
568            recomputed.as_str()
569        )));
570    }
571    Ok(())
572}
573
574// ── Metadata ─────────────────────────────────────────────────────────────────
575
576/// Validate `metadata`'s runtime invariants per RFC-ACDP-0002 §3.3:
577/// max 100 top-level properties, max 8 nesting levels, max 64 KB JCS size.
578pub fn validate_metadata(value: &serde_json::Value) -> Result<(), AcdpError> {
579    validate_json_object_limits(value, "metadata")
580}
581
582/// Shared object-limit check for any producer-controlled free-form JSON
583/// object (`metadata` and the flattened `extensions`): max 100 top-level
584/// properties, max 8 nesting levels, max 64 KB JCS size. Without this,
585/// `extensions` (P1-3) would carry unbounded keys/values into JCS+SHA-256.
586fn validate_json_object_limits(value: &serde_json::Value, field: &str) -> Result<(), AcdpError> {
587    let obj = value
588        .as_object()
589        .ok_or_else(|| AcdpError::SchemaViolation(format!("{field} must be a JSON object")))?;
590    if obj.len() > MAX_METADATA_PROPERTIES {
591        return Err(AcdpError::SchemaViolation(format!(
592            "{field} has {} top-level properties, exceeds {} limit",
593            obj.len(),
594            MAX_METADATA_PROPERTIES
595        )));
596    }
597    let depth = json_depth(value);
598    if depth > MAX_METADATA_DEPTH {
599        return Err(AcdpError::SchemaViolation(format!(
600            "{field} nesting depth {depth} exceeds {MAX_METADATA_DEPTH}"
601        )));
602    }
603    let canonical_size = try_canonicalize_value(value)?.len();
604    if canonical_size > MAX_METADATA_JCS_BYTES {
605        return Err(AcdpError::SchemaViolation(format!(
606            "{field} JCS-canonical size {canonical_size} bytes exceeds {MAX_METADATA_JCS_BYTES}"
607        )));
608    }
609    Ok(())
610}
611
612/// Validate the flattened forward-compatibility `extensions` object with
613/// the same property-count / depth / JCS-size caps as `metadata`.
614pub fn validate_extensions(
615    extensions: &serde_json::Map<String, serde_json::Value>,
616) -> Result<(), AcdpError> {
617    if extensions.is_empty() {
618        return Ok(());
619    }
620    // Wrap in a `Value::Object` (clones the map) so the shared
621    // object-limit walker can scan it; `extensions` is small and capped,
622    // so the clone is negligible.
623    let value = serde_json::Value::Object(extensions.clone());
624    validate_json_object_limits(&value, "extensions")
625}
626
627/// Depth measured per RFC-ACDP-0002 §3.3: nested-object/array count,
628/// not counting leaf scalars. The cap of 8 is inclusive (`≤ 8`).
629/// `meta-003` pins this boundary.
630///
631/// Recursion is bounded by `MAX_JSON_DEPTH_SCAN` (well above the §3.3 cap
632/// of 8 and serde_json's 128-level parse limit) so a pathologically deep
633/// programmatically-built `Value` cannot blow the stack here. Any value
634/// that reaches the budget already exceeds `MAX_METADATA_DEPTH`, so the
635/// caller rejects it regardless of the exact (clamped) count.
636fn json_depth(v: &serde_json::Value) -> usize {
637    /// Above §3.3's cap of 8 and serde's 128 parse limit; bounds stack use.
638    const MAX_JSON_DEPTH_SCAN: usize = 256;
639    fn go(v: &serde_json::Value, budget: usize) -> usize {
640        if budget == 0 {
641            return 1; // stop descending; already far over MAX_METADATA_DEPTH
642        }
643        match v {
644            serde_json::Value::Object(map) => {
645                1 + map.values().map(|x| go(x, budget - 1)).max().unwrap_or(0)
646            }
647            serde_json::Value::Array(arr) => {
648                1 + arr.iter().map(|x| go(x, budget - 1)).max().unwrap_or(0)
649            }
650            _ => 0,
651        }
652    }
653    go(v, MAX_JSON_DEPTH_SCAN)
654}
655
656// ── Visibility ───────────────────────────────────────────────────────────────
657
658fn validate_visibility_audience(
659    vis: &Visibility,
660    audience: Option<&[AgentDid]>,
661) -> Result<(), AcdpError> {
662    match vis {
663        Visibility::Restricted => {
664            if audience.is_none_or(|a| a.is_empty()) {
665                return Err(AcdpError::SchemaViolation(
666                    "visibility:restricted requires a non-empty audience".into(),
667                ));
668            }
669        }
670        Visibility::Public => {
671            if audience.is_some_and(|a| !a.is_empty()) {
672                return Err(AcdpError::SchemaViolation(
673                    "visibility:public MUST NOT include audience".into(),
674                ));
675            }
676        }
677        Visibility::Private => {}
678    }
679    Ok(())
680}
681
682// ── Strings & arrays ─────────────────────────────────────────────────────────
683
684fn validate_title(title: &str) -> Result<(), AcdpError> {
685    if title.is_empty() || title.chars().count() > MAX_TITLE_LEN {
686        return Err(AcdpError::SchemaViolation(format!(
687            "title length {} not in 1..={}",
688            title.chars().count(),
689            MAX_TITLE_LEN
690        )));
691    }
692    Ok(())
693}
694
695fn validate_optional_string(s: Option<&str>, name: &str, max_len: usize) -> Result<(), AcdpError> {
696    if let Some(value) = s {
697        if value.chars().count() > max_len {
698            return Err(AcdpError::SchemaViolation(format!(
699                "{name} length {} exceeds {max_len}",
700                value.chars().count()
701            )));
702        }
703    }
704    Ok(())
705}
706
707fn validate_unique_array<T: PartialEq + std::fmt::Debug>(
708    name: &str,
709    items: &[T],
710    max: usize,
711) -> Result<(), AcdpError> {
712    if items.len() > max {
713        return Err(AcdpError::SchemaViolation(format!(
714            "{name} has {} items, exceeds {max}",
715            items.len()
716        )));
717    }
718    for (i, item) in items.iter().enumerate() {
719        if items[i + 1..].iter().any(|other| other == item) {
720            return Err(AcdpError::SchemaViolation(format!(
721                "{name} contains duplicate entry: {item:?}"
722            )));
723        }
724    }
725    Ok(())
726}
727
728fn validate_tags(tags: &[String]) -> Result<(), AcdpError> {
729    if tags.len() > MAX_TAGS {
730        return Err(AcdpError::SchemaViolation(format!(
731            "tags has {} entries, exceeds {}",
732            tags.len(),
733            MAX_TAGS
734        )));
735    }
736    for tag in tags {
737        validate_tag(tag)?;
738    }
739    // Uniqueness
740    for (i, tag) in tags.iter().enumerate() {
741        if tags[i + 1..].iter().any(|t| t == tag) {
742            return Err(AcdpError::SchemaViolation(format!(
743                "tags contains duplicate entry: {tag}"
744            )));
745        }
746    }
747    Ok(())
748}
749
750fn validate_tag(tag: &str) -> Result<(), AcdpError> {
751    if tag.is_empty() || tag.len() > MAX_TAG_LEN {
752        return Err(AcdpError::SchemaViolation(format!(
753            "tag '{tag}' length not in 1..={MAX_TAG_LEN}"
754        )));
755    }
756    let mut chars = tag.chars();
757    let first = chars.next().unwrap();
758    if !first.is_ascii_alphanumeric() {
759        return Err(AcdpError::SchemaViolation(format!(
760            "tag '{tag}' first char must be alphanumeric"
761        )));
762    }
763    if !chars.all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '.' | '-')) {
764        return Err(AcdpError::SchemaViolation(format!(
765            "tag '{tag}' must match [A-Za-z0-9][A-Za-z0-9_.-]*"
766        )));
767    }
768    Ok(())
769}
770
771// ── DID / agent_id ───────────────────────────────────────────────────────────
772
773/// Validate a DID used as `agent_id`.
774///
775/// Producers MUST use a resolvable method: `did:web` (RFC-ACDP-0001
776/// §5.4, the v0.1.0 baseline) or `did:key` (ACDP 0.2 — pure offline
777/// resolution; the DID is the key). For did:key the embedded key
778/// material is decoded here so a garbage identifier fails at schema
779/// validation rather than at signature verification. Whether a given
780/// *registry* accepts did:key producers is a capabilities decision
781/// (`supported_did_methods`), enforced by
782/// `registry::PublishValidator` — this function checks protocol-level
783/// well-formedness only.
784/// Validate the `signature.key_id` form when it is a `did:key` URL
785/// (ACDP 0.2). The only verification method a did:key document has is
786/// the key itself, so the fragment MUST equal the method-specific
787/// identifier and the key material MUST decode. No-op for other
788/// methods — their key_id resolves against a DID document at
789/// verification time.
790fn validate_did_key_key_id_form(key_id: &str) -> Result<(), AcdpError> {
791    if !key_id.starts_with("did:key:") {
792        return Ok(());
793    }
794    acdp_did::key::resolve_did_key_url(key_id).map_err(|e| {
795        AcdpError::SchemaViolation(format!(
796            "signature.key_id is not a well-formed did:key URL: {e}"
797        ))
798    })?;
799    Ok(())
800}
801
802fn validate_agent_did(did: &AgentDid) -> Result<(), AcdpError> {
803    if did.as_str().starts_with("did:key:") {
804        AgentDid::parse(did.as_str())?;
805        acdp_did::key::resolve_did_key(did.as_str()).map_err(|e| {
806            AcdpError::SchemaViolation(format!("agent_id is not a well-formed did:key: {e}"))
807        })?;
808        return Ok(());
809    }
810    AgentDid::parse_web(did.as_str())?;
811    Ok(())
812}
813
814/// Validate `body.origin_registry` per `acdp-context-body.schema.json`
815/// (RFC-ACDP-0002 §3.1, fixture body-001/body-002).
816///
817/// MUST be a bare DNS hostname — NOT a `did:web:` URI, NOT a URL.
818/// `capabilities.registry_did` carries the `did:web` encoding; the
819/// stored body carries the hostname encoding. Storing either form in
820/// the other field is a conformance violation.
821fn validate_origin_registry(s: &str) -> Result<(), AcdpError> {
822    if s.is_empty() {
823        return Err(AcdpError::SchemaViolation(
824            "origin_registry must be a non-empty DNS hostname".into(),
825        ));
826    }
827    if s.starts_with("did:") {
828        return Err(AcdpError::SchemaViolation(format!(
829            "origin_registry must be a DNS hostname, not a DID URI (got '{s}'); \
830             use the bare authority — capabilities.registry_did carries the did:web form"
831        )));
832    }
833    if s.contains("://") {
834        return Err(AcdpError::SchemaViolation(format!(
835            "origin_registry must be a DNS hostname, not a URL (got '{s}')"
836        )));
837    }
838    if s.ends_with('.') || s.starts_with('.') {
839        return Err(AcdpError::SchemaViolation(format!(
840            "origin_registry must be a syntactically valid DNS hostname (got '{s}')"
841        )));
842    }
843    // BUG-02: delegate the full hostname grammar to the same validator
844    // `CtxId::parse` uses for its authority. Enforces lowercase-only,
845    // no underscore, no port, and valid label structure — values like
846    // `REGISTRY.EXAMPLE.COM`, `registry_example.com`, or `registry-.com`
847    // pass the coarse checks above but are not schema-valid hostnames.
848    if !acdp_types::primitives::is_valid_dns_authority(s) {
849        return Err(AcdpError::SchemaViolation(format!(
850            "origin_registry '{s}' is not a valid DNS hostname (must be lowercase \
851             labels of [a-z0-9-] separated by dots, e.g. 'registry.example.com')"
852        )));
853    }
854    Ok(())
855}
856
857/// Validate a DID used in `contributors[]` or `audience[]`.
858///
859/// Per the spec plan's RFC-FIX-11 method-scope table:
860/// - contributors[] SHOULD be `did:web` (attribution; no key resolution),
861/// - audience[] MAY be any DID method (authorization list; not resolved
862///   in v0.1.0).
863///
864/// This helper enforces only the loose `did:` syntax (no method
865/// constraint) so other-method contributors are accepted.
866fn validate_loose_did(did: &AgentDid) -> Result<(), AcdpError> {
867    AgentDid::parse(did.as_str())?;
868    Ok(())
869}
870
871// ── Context type ─────────────────────────────────────────────────────────────
872
873fn validate_namespaced_context_type(value: &str) -> Result<(), AcdpError> {
874    // Schema pattern: ^[a-z][a-z0-9_]*:[a-z][a-z0-9_-]*$
875    let (ns, name) = value.split_once(':').ok_or_else(|| {
876        AcdpError::SchemaViolation(format!(
877            "context_type '{value}' missing namespace separator"
878        ))
879    })?;
880    if ns.is_empty()
881        || !ns.chars().next().is_some_and(|c| c.is_ascii_lowercase())
882        || !ns
883            .chars()
884            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
885    {
886        return Err(AcdpError::SchemaViolation(format!(
887            "context_type namespace '{ns}' must match [a-z][a-z0-9_]*"
888        )));
889    }
890    if name.is_empty()
891        || !name.chars().next().is_some_and(|c| c.is_ascii_lowercase())
892        || !name
893            .chars()
894            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, '_' | '-'))
895    {
896        return Err(AcdpError::SchemaViolation(format!(
897            "context_type name '{name}' must match [a-z][a-z0-9_-]*"
898        )));
899    }
900    Ok(())
901}
902
903trait ContextTypeExt {
904    fn namespaced_form(&self) -> Option<&str>;
905}
906
907impl ContextTypeExt for ContextType {
908    fn namespaced_form(&self) -> Option<&str> {
909        match self {
910            ContextType::Custom(s) => Some(s.as_str()),
911            _ => None,
912        }
913    }
914}
915
916// ── Signatures ───────────────────────────────────────────────────────────────
917
918fn validate_semver_pattern(name: &str, value: &str) -> Result<(), AcdpError> {
919    let parts: Vec<&str> = value.split('.').collect();
920    let ok = parts.len() == 3
921        && parts
922            .iter()
923            .all(|p| !p.is_empty() && p.chars().all(|c| c.is_ascii_digit()));
924    if !ok {
925        return Err(AcdpError::SchemaViolation(format!(
926            "{name} '{value}' must match the semver pattern ^\\d+\\.\\d+\\.\\d+$"
927        )));
928    }
929    Ok(())
930}
931
932fn validate_signature_length(algorithm: &str, value_b64: &str) -> Result<(), AcdpError> {
933    let expected = match algorithm {
934        "ed25519" => Some(ED25519_SIG_B64_LEN),
935        "ecdsa-p256" => Some(ECDSA_P256_SIG_B64_LEN),
936        _ => None,
937    };
938    if let Some(n) = expected {
939        if value_b64.len() != n {
940            return Err(AcdpError::InvalidSignature(format!(
941                "signature.value for '{algorithm}' must be {n} base64 chars, got {}",
942                value_b64.len()
943            )));
944        }
945    }
946    Ok(())
947}
948
949// ── Tests ─────────────────────────────────────────────────────────────────────
950
951#[cfg(test)]
952mod tests {
953    use super::*;
954    use acdp_types::data_ref::DataRefType;
955    use serde_json::json;
956
957    fn embedded_json(v: serde_json::Value) -> EmbeddedContent {
958        EmbeddedContent {
959            encoding: EmbeddedEncoding::Json,
960            content: v,
961        }
962    }
963
964    // ── origin_registry (BUG-02) ─────────────────────────────────────────────
965
966    #[test]
967    fn origin_registry_accepts_valid_hostname() {
968        validate_origin_registry("registry.example.com").unwrap();
969        validate_origin_registry("reg.example").unwrap();
970        validate_origin_registry("a-b-c.io").unwrap();
971    }
972
973    #[test]
974    fn origin_registry_rejects_uppercase() {
975        assert!(matches!(
976            validate_origin_registry("REGISTRY.EXAMPLE.COM"),
977            Err(AcdpError::SchemaViolation(_))
978        ));
979    }
980
981    #[test]
982    fn origin_registry_rejects_underscore() {
983        assert!(matches!(
984            validate_origin_registry("registry_example.com"),
985            Err(AcdpError::SchemaViolation(_))
986        ));
987    }
988
989    #[test]
990    fn origin_registry_rejects_hyphen_label_edges() {
991        assert!(matches!(
992            validate_origin_registry("registry-.com"),
993            Err(AcdpError::SchemaViolation(_))
994        ));
995        assert!(matches!(
996            validate_origin_registry("-registry.example.com"),
997            Err(AcdpError::SchemaViolation(_))
998        ));
999    }
1000
1001    // ── DataRef.oneOf ────────────────────────────────────────────────────────
1002
1003    #[test]
1004    fn data_ref_neither_location_nor_embedded_rejected() {
1005        let dr = DataRef {
1006            ref_type: DataRefType::PrimaryResult,
1007            description: None,
1008            size_bytes: None,
1009            format: None,
1010            schema_version: None,
1011            content_hash: None,
1012            location: None,
1013            embedded: None,
1014            extensions: serde_json::Map::new(),
1015        };
1016        assert!(matches!(
1017            validate_data_ref(&dr),
1018            Err(AcdpError::SchemaViolation(_))
1019        ));
1020    }
1021
1022    #[test]
1023    fn data_ref_both_location_and_embedded_rejected() {
1024        let dr = DataRef {
1025            ref_type: DataRefType::PrimaryResult,
1026            description: None,
1027            size_bytes: None,
1028            format: None,
1029            schema_version: None,
1030            content_hash: None,
1031            location: Some(Location::Uri("https://x/y".into())),
1032            embedded: Some(embedded_json(json!({"a": 1}))),
1033            extensions: serde_json::Map::new(),
1034        };
1035        assert!(matches!(
1036            validate_data_ref(&dr),
1037            Err(AcdpError::SchemaViolation(_))
1038        ));
1039    }
1040
1041    // ── DataRef.location URI ─────────────────────────────────────────────────
1042
1043    #[test]
1044    fn uri_credentials_rejected() {
1045        let dr = DataRef::uri(DataRefType::RawData, "https://user:pass@example.com/data");
1046        assert!(matches!(
1047            validate_data_ref(&dr),
1048            Err(AcdpError::SchemaViolation(_))
1049        ));
1050    }
1051
1052    #[test]
1053    fn uri_without_scheme_rejected() {
1054        let dr = DataRef::uri(DataRefType::RawData, "no-scheme");
1055        assert!(matches!(
1056            validate_data_ref(&dr),
1057            Err(AcdpError::SchemaViolation(_))
1058        ));
1059    }
1060
1061    #[test]
1062    fn uri_too_long_rejected() {
1063        let long_uri = format!("https://x.com/{}", "a".repeat(MAX_URI_LEN));
1064        let dr = DataRef::uri(DataRefType::RawData, long_uri);
1065        assert!(matches!(
1066            validate_data_ref(&dr),
1067            Err(AcdpError::SchemaViolation(_))
1068        ));
1069    }
1070
1071    // ── DataRef.location structured ──────────────────────────────────────────
1072
1073    #[test]
1074    fn structured_locator_missing_scheme_rejected() {
1075        let mut map = serde_json::Map::new();
1076        map.insert("offset".into(), json!(42));
1077        let dr = DataRef {
1078            ref_type: DataRefType::RawData,
1079            description: None,
1080            size_bytes: None,
1081            format: None,
1082            schema_version: None,
1083            content_hash: None,
1084            location: Some(Location::Structured(map)),
1085            embedded: None,
1086            extensions: serde_json::Map::new(),
1087        };
1088        assert!(matches!(
1089            validate_data_ref(&dr),
1090            Err(AcdpError::SchemaViolation(_))
1091        ));
1092    }
1093
1094    #[test]
1095    fn structured_locator_bad_scheme_rejected() {
1096        // try_structured rejects at construction time; structured() panics
1097        // in debug builds. The validate_data_ref guard catches anyone who
1098        // assembles a `DataRef` literal with a bad scheme directly.
1099        let err =
1100            DataRef::try_structured(DataRefType::RawData, "not_dotted", serde_json::Map::new())
1101                .unwrap_err();
1102        assert!(matches!(err, AcdpError::SchemaViolation(_)));
1103
1104        // Direct literal construction (skipping the constructor): must
1105        // also be caught by validate_data_ref.
1106        let mut bad = serde_json::Map::new();
1107        bad.insert(
1108            "scheme".into(),
1109            serde_json::Value::String("not_dotted".into()),
1110        );
1111        let dr = DataRef {
1112            ref_type: DataRefType::RawData,
1113            description: None,
1114            size_bytes: None,
1115            format: None,
1116            schema_version: None,
1117            content_hash: None,
1118            location: Some(Location::Structured(bad)),
1119            embedded: None,
1120            extensions: serde_json::Map::new(),
1121        };
1122        assert!(matches!(
1123            validate_data_ref(&dr),
1124            Err(AcdpError::SchemaViolation(_))
1125        ));
1126    }
1127
1128    #[test]
1129    fn structured_locator_valid() {
1130        let mut extra = serde_json::Map::new();
1131        extra.insert("topic".into(), json!("events"));
1132        let dr = DataRef::structured(DataRefType::RawData, "kafka.offset", extra);
1133        validate_data_ref(&dr).unwrap();
1134    }
1135
1136    // ── DataRef.embedded ─────────────────────────────────────────────────────
1137
1138    #[test]
1139    fn embedded_utf8_must_be_string() {
1140        let dr = DataRef {
1141            ref_type: DataRefType::PrimaryResult,
1142            description: None,
1143            size_bytes: None,
1144            format: None,
1145            schema_version: None,
1146            content_hash: None,
1147            location: None,
1148            embedded: Some(EmbeddedContent {
1149                encoding: EmbeddedEncoding::Utf8,
1150                content: json!(42),
1151            }),
1152            extensions: serde_json::Map::new(),
1153        };
1154        assert!(matches!(
1155            validate_data_ref(&dr),
1156            Err(AcdpError::SchemaViolation(_))
1157        ));
1158    }
1159
1160    #[test]
1161    fn embedded_too_large_rejected() {
1162        // 70 KB of UTF-8 content
1163        let big = "a".repeat(70 * 1024);
1164        let dr = DataRef::embedded_utf8(DataRefType::PrimaryResult, big);
1165        assert!(matches!(
1166            validate_data_ref(&dr),
1167            Err(AcdpError::EmbeddedTooLarge(_))
1168        ));
1169    }
1170
1171    // ── Embedded hash ────────────────────────────────────────────────────────
1172
1173    #[test]
1174    fn embedded_hash_json_round_trip() {
1175        let emb = embedded_json(json!({"b": 2, "a": 1}));
1176        let h = compute_embedded_hash(&emb).unwrap();
1177        // JCS sorts keys → {"a":1,"b":2}, hash is deterministic
1178        let expected = {
1179            let bytes = b"{\"a\":1,\"b\":2}";
1180            format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
1181        };
1182        assert_eq!(h.as_str(), expected);
1183    }
1184
1185    #[test]
1186    fn embedded_hash_utf8() {
1187        let emb = EmbeddedContent {
1188            encoding: EmbeddedEncoding::Utf8,
1189            content: json!("hello"),
1190        };
1191        let h = compute_embedded_hash(&emb).unwrap();
1192        let expected = format!("sha256:{}", hex::encode(Sha256::digest(b"hello")));
1193        assert_eq!(h.as_str(), expected);
1194    }
1195
1196    #[test]
1197    fn embedded_hash_base64() {
1198        let raw = b"binary data";
1199        let b64 = STANDARD.encode(raw);
1200        let emb = EmbeddedContent {
1201            encoding: EmbeddedEncoding::Base64,
1202            content: json!(b64),
1203        };
1204        let h = compute_embedded_hash(&emb).unwrap();
1205        let expected = format!("sha256:{}", hex::encode(Sha256::digest(raw)));
1206        assert_eq!(h.as_str(), expected);
1207    }
1208
1209    #[test]
1210    fn verify_embedded_hash_mismatch_detected() {
1211        let emb = embedded_json(json!({"x": 1}));
1212        let dr = DataRef {
1213            ref_type: DataRefType::PrimaryResult,
1214            description: None,
1215            size_bytes: None,
1216            format: None,
1217            schema_version: None,
1218            content_hash: Some(ContentHash("sha256:0000".into())),
1219            location: None,
1220            embedded: Some(emb),
1221            extensions: serde_json::Map::new(),
1222        };
1223        assert!(matches!(
1224            verify_embedded_hash(&dr),
1225            Err(AcdpError::DataRefHashMismatch(_))
1226        ));
1227    }
1228
1229    // ── Metadata ─────────────────────────────────────────────────────────────
1230
1231    #[test]
1232    fn metadata_too_many_properties_rejected() {
1233        let mut obj = serde_json::Map::new();
1234        for i in 0..101 {
1235            obj.insert(format!("k{i}"), json!(i));
1236        }
1237        assert!(matches!(
1238            validate_metadata(&serde_json::Value::Object(obj)),
1239            Err(AcdpError::SchemaViolation(_))
1240        ));
1241    }
1242
1243    #[test]
1244    fn metadata_too_deep_rejected() {
1245        // Build an object nested 10 levels deep
1246        let mut v = json!("leaf");
1247        for _ in 0..10 {
1248            let mut o = serde_json::Map::new();
1249            o.insert("a".into(), v);
1250            v = serde_json::Value::Object(o);
1251        }
1252        assert!(matches!(
1253            validate_metadata(&v),
1254            Err(AcdpError::SchemaViolation(_))
1255        ));
1256    }
1257
1258    #[test]
1259    fn metadata_too_large_rejected() {
1260        let big = "a".repeat(70 * 1024);
1261        let v = json!({"big": big});
1262        assert!(matches!(
1263            validate_metadata(&v),
1264            Err(AcdpError::SchemaViolation(_))
1265        ));
1266    }
1267
1268    #[test]
1269    fn metadata_must_be_object() {
1270        assert!(matches!(
1271            validate_metadata(&json!([1, 2, 3])),
1272            Err(AcdpError::SchemaViolation(_))
1273        ));
1274    }
1275
1276    // ── Visibility / audience ────────────────────────────────────────────────
1277
1278    #[test]
1279    fn public_with_audience_rejected() {
1280        let aud = vec![AgentDid::new("did:web:x")];
1281        assert!(matches!(
1282            validate_visibility_audience(&Visibility::Public, Some(&aud)),
1283            Err(AcdpError::SchemaViolation(_))
1284        ));
1285    }
1286
1287    #[test]
1288    fn public_with_empty_audience_ok() {
1289        validate_visibility_audience(&Visibility::Public, Some(&[])).unwrap();
1290        validate_visibility_audience(&Visibility::Public, None).unwrap();
1291    }
1292
1293    #[test]
1294    fn restricted_without_audience_rejected() {
1295        assert!(matches!(
1296            validate_visibility_audience(&Visibility::Restricted, None),
1297            Err(AcdpError::SchemaViolation(_))
1298        ));
1299    }
1300
1301    // ── data_period ──────────────────────────────────────────────────────────
1302
1303    #[test]
1304    fn data_period_start_after_end_rejected_via_builder() {
1305        use acdp_crypto::SigningKey;
1306        use acdp_producer::Producer;
1307        use acdp_types::body::DataPeriod;
1308        use chrono::TimeZone;
1309
1310        let p = Producer::new(
1311            SigningKey::from_bytes(&[0u8; 32]),
1312            AgentDid::new("did:web:agents.example.com:test"),
1313            "did:web:agents.example.com:test#key-1",
1314        );
1315        let err = p
1316            .publish_request()
1317            .title("t")
1318            .context_type(ContextType::DataSnapshot)
1319            .data_period(DataPeriod {
1320                start: chrono::Utc.with_ymd_and_hms(2026, 6, 1, 0, 0, 0).unwrap(),
1321                end: chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1322            })
1323            .build()
1324            .unwrap_err();
1325        assert!(matches!(err, AcdpError::SchemaViolation(_)));
1326    }
1327
1328    // ── Tags ─────────────────────────────────────────────────────────────────
1329
1330    #[test]
1331    fn tag_pattern_validation() {
1332        validate_tag("hello").unwrap();
1333        validate_tag("Q1-2026").unwrap();
1334        validate_tag("a_b.c").unwrap();
1335        // Cannot start with non-alphanumeric
1336        assert!(validate_tag("-bad").is_err());
1337        // Disallowed chars
1338        assert!(validate_tag("space here").is_err());
1339        // Empty
1340        assert!(validate_tag("").is_err());
1341    }
1342
1343    #[test]
1344    fn duplicate_tags_rejected() {
1345        let tags = vec!["a".to_string(), "b".to_string(), "a".to_string()];
1346        assert!(validate_tags(&tags).is_err());
1347    }
1348
1349    // ── Signature length ─────────────────────────────────────────────────────
1350
1351    #[test]
1352    fn ed25519_sig_must_be_88_chars() {
1353        assert!(validate_signature_length("ed25519", "AAAA").is_err());
1354        validate_signature_length("ed25519", &"A".repeat(88)).unwrap();
1355        // Unknown algorithm: skipped
1356        validate_signature_length("future-alg", "any").unwrap();
1357    }
1358
1359    // ── context_type custom ──────────────────────────────────────────────────
1360
1361    #[test]
1362    fn namespaced_context_type_pattern() {
1363        validate_namespaced_context_type("finance:portfolio_snapshot").unwrap();
1364        assert!(validate_namespaced_context_type("Finance:portfolio").is_err());
1365        assert!(validate_namespaced_context_type("finance:Portfolio").is_err());
1366        assert!(validate_namespaced_context_type("no-colon").is_err());
1367    }
1368
1369    // ── R2 audit test-coverage matrix ────────────────────────────────────────
1370
1371    /// T8 — `acdp_version` semver pattern is enforced.
1372    #[test]
1373    fn acdp_version_pattern_rejects_non_semver() {
1374        validate_semver_pattern("acdp_version", "0.1.0").unwrap();
1375        validate_semver_pattern("acdp_version", "10.20.30").unwrap();
1376        assert!(validate_semver_pattern("acdp_version", "0.1.0-rc.1").is_err());
1377        assert!(validate_semver_pattern("acdp_version", "0.0").is_err());
1378        assert!(validate_semver_pattern("acdp_version", "vee.zero.zero").is_err());
1379    }
1380
1381    /// T7 — `derived_from` containing a malformed ctx_id is rejected by
1382    /// `validate_publish_request`.
1383    #[test]
1384    fn derived_from_malformed_ctx_id_rejected() {
1385        use acdp_crypto::SigningKey;
1386        use acdp_producer::Producer;
1387
1388        let p = Producer::new(
1389            SigningKey::from_bytes(&[0u8; 32]),
1390            AgentDid::new("did:web:agents.example.com:test"),
1391            "did:web:agents.example.com:test#key-1",
1392        );
1393        let err = p
1394            .publish_request()
1395            .title("t")
1396            .context_type(ContextType::DataSnapshot)
1397            .derived_from(vec![CtxId("not-a-ctx-id".into())])
1398            .build()
1399            .unwrap_err();
1400        assert!(matches!(err, AcdpError::SchemaViolation(_)));
1401    }
1402
1403    /// T2 — Embedded `content_hash` mismatch caught by
1404    /// `verify_embedded_hash`.
1405    #[test]
1406    fn embedded_content_hash_mismatch_caught() {
1407        use acdp_types::data_ref::DataRefType;
1408        let dr = DataRef {
1409            ref_type: DataRefType::PrimaryResult,
1410            description: None,
1411            size_bytes: None,
1412            format: None,
1413            schema_version: None,
1414            content_hash: Some(ContentHash("sha256:0000".into())),
1415            location: None,
1416            embedded: Some(EmbeddedContent {
1417                encoding: EmbeddedEncoding::Json,
1418                content: json!({"x": 1}),
1419            }),
1420            extensions: serde_json::Map::new(),
1421        };
1422        assert!(matches!(
1423            verify_embedded_hash(&dr),
1424            Err(AcdpError::DataRefHashMismatch(_))
1425        ));
1426    }
1427
1428    /// T14 — duplicate audience entries rejected (uniqueItems: true).
1429    #[test]
1430    fn audience_uniqueness_rejected() {
1431        let dup = vec![
1432            AgentDid::new("did:web:a.example.com"),
1433            AgentDid::new("did:web:a.example.com"),
1434        ];
1435        let err = validate_unique_array("audience", &dup, MAX_AUDIENCE).unwrap_err();
1436        assert!(matches!(err, AcdpError::SchemaViolation(_)));
1437    }
1438
1439    // ── P1-3: extensions caps + bounded walkers ──────────────────────────────
1440
1441    #[test]
1442    fn extensions_empty_ok() {
1443        validate_extensions(&serde_json::Map::new()).unwrap();
1444    }
1445
1446    #[test]
1447    fn extensions_small_forward_compat_accepted() {
1448        // The can-008/can-009 shape: a couple of unknown producer fields.
1449        let mut ext = serde_json::Map::new();
1450        ext.insert("priority".into(), json!("high"));
1451        ext.insert("custom".into(), json!({"k": [1, 2, 3]}));
1452        validate_extensions(&ext).unwrap();
1453    }
1454
1455    #[test]
1456    fn extensions_too_many_properties_rejected() {
1457        let mut ext = serde_json::Map::new();
1458        for i in 0..(MAX_METADATA_PROPERTIES + 1) {
1459            ext.insert(format!("k{i}"), json!(i));
1460        }
1461        let err = validate_extensions(&ext).unwrap_err();
1462        assert!(matches!(err, AcdpError::SchemaViolation(_)));
1463    }
1464
1465    #[test]
1466    fn extensions_oversized_jcs_rejected() {
1467        let mut ext = serde_json::Map::new();
1468        ext.insert("blob".into(), json!("x".repeat(MAX_METADATA_JCS_BYTES + 1)));
1469        let err = validate_extensions(&ext).unwrap_err();
1470        assert!(matches!(err, AcdpError::SchemaViolation(_)));
1471    }
1472
1473    #[test]
1474    fn extensions_too_deep_rejected() {
1475        // Build a value nested past MAX_METADATA_DEPTH.
1476        let mut v = json!(0);
1477        for _ in 0..(MAX_METADATA_DEPTH + 2) {
1478            v = json!({ "n": v });
1479        }
1480        let mut ext = serde_json::Map::new();
1481        ext.insert("deep".into(), v);
1482        let err = validate_extensions(&ext).unwrap_err();
1483        assert!(matches!(err, AcdpError::SchemaViolation(_)));
1484    }
1485
1486    #[test]
1487    fn json_depth_clamps_past_scan_budget() {
1488        // Deeper than the 256-frame scan budget but shallow enough that
1489        // building/dropping the Value is itself safe. `json_depth` must
1490        // bound its own recursion and still report a value over the §3.3
1491        // cap, and the canonicalizer must refuse it rather than overflow.
1492        let mut v = json!(0);
1493        for _ in 0..400 {
1494            v = json!([v]);
1495        }
1496        assert!(json_depth(&v) > MAX_METADATA_DEPTH);
1497        assert!(acdp_crypto::try_canonicalize_value(&v).is_err());
1498    }
1499}