1use acdp_crypto::try_canonicalize_value;
23use acdp_primitives::error::AcdpError;
24use acdp_types::body::Body;
25use acdp_types::data_ref::{DataRef, EmbeddedContent, EmbeddedEncoding, Location};
26use acdp_types::primitives::{
27 AgentDid, ContentHash, ContextType, CtxId, LineageId, Status, Visibility,
28};
29use acdp_types::publish::PublishRequest;
30use base64::{engine::general_purpose::STANDARD, Engine};
31use sha2::{Digest, Sha256};
32
33const MAX_TITLE_LEN: usize = 500;
36const MAX_DESCRIPTION_LEN: usize = 5000;
37const MAX_SUMMARY_LEN: usize = 1000;
38const MAX_DOMAIN_LEN: usize = 200;
39const MAX_DATA_REF_DESCRIPTION_LEN: usize = 1000;
40const MAX_TAG_LEN: usize = 100;
41const MAX_CONTRIBUTORS: usize = 100;
42const MAX_TAGS: usize = 200;
43const MAX_DERIVED_FROM: usize = 1000;
44const MAX_AUDIENCE: usize = 1000;
45const MAX_METADATA_PROPERTIES: usize = 100;
46const MAX_METADATA_DEPTH: usize = 8;
47const MAX_METADATA_JCS_BYTES: usize = 65_536;
48const MAX_URI_LEN: usize = 4096;
49const MAX_EMBEDDED_BYTES: usize = 65_536;
50const ED25519_SIG_B64_LEN: usize = 88;
51const ECDSA_P256_SIG_B64_LEN: usize = 88;
52
53pub fn validate_capabilities(caps: &acdp_types::CapabilitiesDocument) -> Result<(), AcdpError> {
75 validate_semver_pattern("acdp_version", &caps.acdp_version)?;
76
77 AgentDid::parse_web(caps.registry_did.as_str()).map_err(|e| {
78 AcdpError::SchemaViolation(format!(
79 "capabilities.registry_did must be did:web for v0.1.0: {e}"
80 ))
81 })?;
82
83 if !caps
84 .supported_signature_algorithms
85 .iter()
86 .any(|a| a == "ed25519")
87 {
88 return Err(AcdpError::SchemaViolation(
89 "capabilities.supported_signature_algorithms MUST contain 'ed25519' \
90 (RFC-ACDP-0001 §5.10)"
91 .into(),
92 ));
93 }
94
95 if !caps.supported_did_methods.iter().any(|m| m == "did:web") {
96 return Err(AcdpError::SchemaViolation(
97 "capabilities.supported_did_methods MUST contain 'did:web' \
98 (RFC-ACDP-0001 §5.4)"
99 .into(),
100 ));
101 }
102
103 if !caps.profiles.iter().any(|p| p == "acdp-registry-core") {
104 return Err(AcdpError::SchemaViolation(
105 "capabilities.profiles MUST contain 'acdp-registry-core' \
106 (RFC-ACDP-0001 §9.1)"
107 .into(),
108 ));
109 }
110
111 if caps.limits.max_embedded_bytes != 65_536 {
112 return Err(AcdpError::SchemaViolation(format!(
113 "capabilities.limits.max_embedded_bytes must be 65536 (fixed by \
114 RFC-ACDP-0007 §3.1), got {}",
115 caps.limits.max_embedded_bytes
116 )));
117 }
118
119 if caps.limits.max_payload_bytes < 1024 {
120 return Err(AcdpError::SchemaViolation(format!(
121 "capabilities.limits.max_payload_bytes must be ≥ 1024, got {}",
122 caps.limits.max_payload_bytes
123 )));
124 }
125
126 if caps.supports_idempotency_key {
127 let ttl = caps.limits.idempotency_key_ttl_seconds.ok_or_else(|| {
128 AcdpError::SchemaViolation(
129 "limits.idempotency_key_ttl_seconds is required when \
130 supports_idempotency_key is true (RFC-ACDP-0007 §3.2)"
131 .into(),
132 )
133 })?;
134 if !(86_400..=604_800).contains(&ttl) {
135 return Err(AcdpError::SchemaViolation(format!(
136 "limits.idempotency_key_ttl_seconds must be in 86400..=604800, got {ttl}"
137 )));
138 }
139 }
140
141 Ok(())
142}
143
144pub fn validate_publish_request(req: &PublishRequest) -> Result<(), AcdpError> {
149 validate_title(&req.title)?;
150 validate_optional_string(
151 req.description.as_deref(),
152 "description",
153 MAX_DESCRIPTION_LEN,
154 )?;
155 validate_optional_string(req.summary.as_deref(), "summary", MAX_SUMMARY_LEN)?;
156 validate_optional_string(req.domain.as_deref(), "domain", MAX_DOMAIN_LEN)?;
157
158 validate_agent_did(&req.agent_id)?;
159 for c in &req.contributors {
160 validate_loose_did(c)?;
161 }
162 validate_unique_array("contributors", &req.contributors, MAX_CONTRIBUTORS)?;
163 validate_unique_array("derived_from", &req.derived_from, MAX_DERIVED_FROM)?;
164
165 if let Some(tags) = &req.tags {
166 validate_tags(tags)?;
167 }
168 if let Some(audience) = &req.audience {
169 validate_unique_array("audience", audience, MAX_AUDIENCE)?;
170 for did in audience {
171 validate_loose_did(did)?;
172 }
173 }
174
175 validate_visibility_audience(&req.visibility, req.audience.as_deref())?;
176
177 if let Some(dp) = &req.data_period {
178 if dp.start > dp.end {
179 return Err(AcdpError::SchemaViolation(
180 "data_period.start must not be after data_period.end".into(),
181 ));
182 }
183 }
184
185 if let Some(ct) = &req.context_type.namespaced_form() {
186 validate_namespaced_context_type(ct)?;
187 }
188
189 if let Some(meta) = &req.metadata {
190 validate_metadata(meta)?;
191 }
192
193 for dr in &req.data_refs {
194 validate_data_ref(dr)?;
195 }
196
197 validate_signature_length(&req.signature.algorithm, &req.signature.value)?;
198 validate_did_key_key_id_form(&req.signature.key_id)?;
199 ContentHash::parse(req.content_hash.as_str())?;
200
201 if let Some(prev) = &req.supersedes {
203 CtxId::parse(prev.as_str())?;
204 }
205 for ancestor in &req.derived_from {
206 CtxId::parse(ancestor.as_str())?;
207 }
208 if let Some(lineage) = &req.lineage_id {
209 acdp_types::primitives::LineageId::parse(lineage.as_str())?;
210 }
211
212 if let Some(v) = &req.acdp_version {
214 validate_semver_pattern("acdp_version", v)?;
215 }
216
217 match (&req.supersedes, req.version) {
219 (None, 1) => {}
220 (None, v) => {
221 return Err(AcdpError::SchemaViolation(format!(
222 "first-version publish requires version=1, got {v}"
223 )));
224 }
225 (Some(_), v) if v >= 2 => {}
226 (Some(_), v) => {
227 return Err(AcdpError::SchemaViolation(format!(
228 "supersession publish requires version >= 2, got {v}"
229 )));
230 }
231 }
232
233 if req.version == 1 && req.lineage_id.is_some() {
241 return Err(AcdpError::SchemaViolation(
242 "lineage_id MUST NOT be set on v1 publish requests (RFC-ACDP-0003 §2.2)".into(),
243 ));
244 }
245
246 Ok(())
247}
248
249pub fn validate_body(body: &Body) -> Result<(), AcdpError> {
251 validate_body_inner(body, true)
252}
253
254pub fn validate_body_structural(body: &Body) -> Result<(), AcdpError> {
265 validate_body_inner(body, false)
266}
267
268fn validate_body_inner(body: &Body, check_embedded_hashes: bool) -> Result<(), AcdpError> {
269 validate_title(&body.title)?;
270 validate_optional_string(
271 body.description.as_deref(),
272 "description",
273 MAX_DESCRIPTION_LEN,
274 )?;
275 validate_optional_string(body.summary.as_deref(), "summary", MAX_SUMMARY_LEN)?;
276 validate_optional_string(body.domain.as_deref(), "domain", MAX_DOMAIN_LEN)?;
277
278 validate_agent_did(&body.agent_id)?;
279 for c in &body.contributors {
280 validate_loose_did(c)?;
281 }
282 validate_unique_array("contributors", &body.contributors, MAX_CONTRIBUTORS)?;
283 validate_unique_array("derived_from", &body.derived_from, MAX_DERIVED_FROM)?;
284
285 if let Some(tags) = &body.tags {
286 validate_tags(tags)?;
287 }
288 if let Some(audience) = &body.audience {
289 validate_unique_array("audience", audience, MAX_AUDIENCE)?;
290 for did in audience {
291 validate_loose_did(did)?;
292 }
293 }
294 validate_visibility_audience(&body.visibility, body.audience.as_deref())?;
295
296 if let Some(dp) = &body.data_period {
297 if dp.start > dp.end {
298 return Err(AcdpError::SchemaViolation(
299 "data_period.start must not be after data_period.end".into(),
300 ));
301 }
302 }
303
304 if let Some(meta) = &body.metadata {
305 validate_metadata(meta)?;
306 }
307
308 validate_extensions(&body.extensions)?;
312
313 for dr in &body.data_refs {
314 if check_embedded_hashes {
315 validate_data_ref(dr)?;
316 } else {
317 validate_data_ref_structural(dr)?;
318 }
319 }
320
321 validate_signature_length(&body.signature.algorithm, &body.signature.value)?;
322 validate_did_key_key_id_form(&body.signature.key_id)?;
323 validate_identifiers(&body.ctx_id, &body.lineage_id, &body.content_hash)?;
324
325 if let Some(prev) = &body.supersedes {
327 CtxId::parse(prev.as_str())?;
328 }
329 for ancestor in &body.derived_from {
330 CtxId::parse(ancestor.as_str())?;
331 }
332
333 if let Some(v) = &body.acdp_version {
334 validate_semver_pattern("acdp_version", v)?;
335 }
336
337 let _ = &body.created_at; validate_origin_registry(&body.origin_registry)?;
339
340 let _ = std::any::type_name::<Status>();
342 let _: &Visibility = &body.visibility;
343
344 Ok(())
345}
346
347pub fn validate_identifiers(
349 ctx_id: &CtxId,
350 lineage_id: &LineageId,
351 content_hash: &ContentHash,
352) -> Result<(), AcdpError> {
353 CtxId::parse(ctx_id.as_str())?;
354 LineageId::parse(lineage_id.as_str())?;
355 ContentHash::parse(content_hash.as_str())?;
356 Ok(())
357}
358
359pub fn validate_data_ref(dr: &DataRef) -> Result<(), AcdpError> {
364 validate_data_ref_structural(dr)?;
365 if dr.embedded.is_some() {
370 verify_embedded_hash(dr)?;
371 }
372 Ok(())
373}
374
375pub fn validate_data_ref_structural(dr: &DataRef) -> Result<(), AcdpError> {
381 match (&dr.location, &dr.embedded) {
383 (None, None) => {
384 return Err(AcdpError::SchemaViolation(
385 "DataRef requires exactly one of 'location' or 'embedded' (got neither)".into(),
386 ));
387 }
388 (Some(_), Some(_)) => {
389 return Err(AcdpError::SchemaViolation(
390 "DataRef requires exactly one of 'location' or 'embedded' (got both)".into(),
391 ));
392 }
393 _ => {}
394 }
395
396 if let Some(desc) = &dr.description {
397 if desc.len() > MAX_DATA_REF_DESCRIPTION_LEN {
398 return Err(AcdpError::SchemaViolation(format!(
399 "DataRef.description {} chars exceeds {} limit",
400 desc.len(),
401 MAX_DATA_REF_DESCRIPTION_LEN
402 )));
403 }
404 }
405
406 if let Some(loc) = &dr.location {
407 validate_location(loc)?;
408 }
409 if let Some(emb) = &dr.embedded {
410 validate_embedded(emb)?;
411 }
412
413 Ok(())
414}
415
416fn validate_location(loc: &Location) -> Result<(), AcdpError> {
417 match loc {
418 Location::Uri(uri) => validate_uri_location(uri),
419 Location::Structured(map) => validate_structured_locator(map),
420 }
421}
422
423fn validate_uri_location(uri: &str) -> Result<(), AcdpError> {
424 if uri.len() < 3 || uri.len() > MAX_URI_LEN {
425 return Err(AcdpError::SchemaViolation(format!(
426 "DataRef.location URI length {} not in 3..={}",
427 uri.len(),
428 MAX_URI_LEN
429 )));
430 }
431 let (scheme, rest) = uri
433 .split_once(':')
434 .ok_or_else(|| AcdpError::SchemaViolation(format!("URI missing scheme: {uri}")))?;
435 if scheme.is_empty()
436 || !scheme
437 .chars()
438 .next()
439 .is_some_and(|c| c.is_ascii_lowercase())
440 || !scheme
441 .chars()
442 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, '+' | '.' | '-'))
443 {
444 return Err(AcdpError::SchemaViolation(format!(
445 "URI scheme '{scheme}' invalid; must match [a-z][a-z0-9+.-]*"
446 )));
447 }
448 if let Some(after_slashes) = rest.strip_prefix("//") {
450 if let Some(authority_end) = after_slashes.find(['/', '?', '#']) {
451 let authority = &after_slashes[..authority_end];
452 if authority.contains('@') {
453 return Err(AcdpError::SchemaViolation(format!(
454 "URI MUST NOT contain credentials in userinfo: {uri}"
455 )));
456 }
457 } else if after_slashes.contains('@') {
458 return Err(AcdpError::SchemaViolation(format!(
459 "URI MUST NOT contain credentials in userinfo: {uri}"
460 )));
461 }
462 }
463 Ok(())
464}
465
466fn validate_structured_locator(
467 map: &serde_json::Map<String, serde_json::Value>,
468) -> Result<(), AcdpError> {
469 let scheme = map.get("scheme").and_then(|v| v.as_str()).ok_or_else(|| {
470 AcdpError::SchemaViolation("structured locator missing required 'scheme'".into())
471 })?;
472 if !is_dotted_namespace_scheme(scheme) {
473 return Err(AcdpError::SchemaViolation(format!(
474 "structured locator scheme '{scheme}' must match ^[a-z][a-z0-9-]*(\\.[a-z][a-z0-9-]*)+$"
475 )));
476 }
477 Ok(())
478}
479
480fn is_dotted_namespace_scheme(s: &str) -> bool {
481 let parts: Vec<&str> = s.split('.').collect();
482 if parts.len() < 2 {
483 return false;
484 }
485 parts.iter().all(|part| {
486 !part.is_empty()
487 && part.chars().next().is_some_and(|c| c.is_ascii_lowercase())
488 && part
489 .chars()
490 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
491 })
492}
493
494fn validate_embedded(emb: &EmbeddedContent) -> Result<(), AcdpError> {
495 match emb.encoding {
497 EmbeddedEncoding::Utf8 | EmbeddedEncoding::Base64 => {
498 if !emb.content.is_string() {
499 return Err(AcdpError::SchemaViolation(format!(
500 "embedded {:?} content MUST be a JSON string",
501 emb.encoding
502 )));
503 }
504 }
505 EmbeddedEncoding::Json => {}
506 }
507 let decoded = embedded_decoded_bytes(emb)?;
509 if decoded.len() > MAX_EMBEDDED_BYTES {
510 return Err(AcdpError::EmbeddedTooLarge(format!(
511 "embedded decoded size {} bytes exceeds {} limit",
512 decoded.len(),
513 MAX_EMBEDDED_BYTES
514 )));
515 }
516 Ok(())
517}
518
519pub fn embedded_decoded_bytes(emb: &EmbeddedContent) -> Result<Vec<u8>, AcdpError> {
525 Ok(match emb.encoding {
526 EmbeddedEncoding::Json => try_canonicalize_value(&emb.content)?,
527 EmbeddedEncoding::Utf8 => {
528 let s = emb.content.as_str().ok_or_else(|| {
529 AcdpError::SchemaViolation("utf8 embedded content must be a JSON string".into())
530 })?;
531 s.as_bytes().to_vec()
532 }
533 EmbeddedEncoding::Base64 => {
534 let s = emb.content.as_str().ok_or_else(|| {
535 AcdpError::SchemaViolation("base64 embedded content must be a JSON string".into())
536 })?;
537 STANDARD
538 .decode(s)
539 .map_err(|e| AcdpError::SchemaViolation(format!("base64 decode failed: {e}")))?
540 }
541 })
542}
543
544pub fn compute_embedded_hash(emb: &EmbeddedContent) -> Result<ContentHash, AcdpError> {
546 let bytes = embedded_decoded_bytes(emb)?;
547 let digest = Sha256::digest(&bytes);
548 Ok(ContentHash(format!("sha256:{}", hex::encode(digest))))
549}
550
551pub fn verify_embedded_hash(dr: &DataRef) -> Result<(), AcdpError> {
560 let (Some(emb), Some(stored)) = (&dr.embedded, &dr.content_hash) else {
561 return Ok(());
562 };
563 let recomputed = compute_embedded_hash(emb)?;
564 if &recomputed != stored {
565 return Err(AcdpError::DataRefHashMismatch(format!(
566 "embedded content_hash mismatch: declared {}, computed {}",
567 stored.as_str(),
568 recomputed.as_str()
569 )));
570 }
571 Ok(())
572}
573
574pub fn validate_metadata(value: &serde_json::Value) -> Result<(), AcdpError> {
579 validate_json_object_limits(value, "metadata")
580}
581
582fn validate_json_object_limits(value: &serde_json::Value, field: &str) -> Result<(), AcdpError> {
587 let obj = value
588 .as_object()
589 .ok_or_else(|| AcdpError::SchemaViolation(format!("{field} must be a JSON object")))?;
590 if obj.len() > MAX_METADATA_PROPERTIES {
591 return Err(AcdpError::SchemaViolation(format!(
592 "{field} has {} top-level properties, exceeds {} limit",
593 obj.len(),
594 MAX_METADATA_PROPERTIES
595 )));
596 }
597 let depth = json_depth(value);
598 if depth > MAX_METADATA_DEPTH {
599 return Err(AcdpError::SchemaViolation(format!(
600 "{field} nesting depth {depth} exceeds {MAX_METADATA_DEPTH}"
601 )));
602 }
603 let canonical_size = try_canonicalize_value(value)?.len();
604 if canonical_size > MAX_METADATA_JCS_BYTES {
605 return Err(AcdpError::SchemaViolation(format!(
606 "{field} JCS-canonical size {canonical_size} bytes exceeds {MAX_METADATA_JCS_BYTES}"
607 )));
608 }
609 Ok(())
610}
611
612pub fn validate_extensions(
615 extensions: &serde_json::Map<String, serde_json::Value>,
616) -> Result<(), AcdpError> {
617 if extensions.is_empty() {
618 return Ok(());
619 }
620 let value = serde_json::Value::Object(extensions.clone());
624 validate_json_object_limits(&value, "extensions")
625}
626
627fn json_depth(v: &serde_json::Value) -> usize {
637 const MAX_JSON_DEPTH_SCAN: usize = 256;
639 fn go(v: &serde_json::Value, budget: usize) -> usize {
640 if budget == 0 {
641 return 1; }
643 match v {
644 serde_json::Value::Object(map) => {
645 1 + map.values().map(|x| go(x, budget - 1)).max().unwrap_or(0)
646 }
647 serde_json::Value::Array(arr) => {
648 1 + arr.iter().map(|x| go(x, budget - 1)).max().unwrap_or(0)
649 }
650 _ => 0,
651 }
652 }
653 go(v, MAX_JSON_DEPTH_SCAN)
654}
655
656fn validate_visibility_audience(
659 vis: &Visibility,
660 audience: Option<&[AgentDid]>,
661) -> Result<(), AcdpError> {
662 match vis {
663 Visibility::Restricted => {
664 if audience.is_none_or(|a| a.is_empty()) {
665 return Err(AcdpError::SchemaViolation(
666 "visibility:restricted requires a non-empty audience".into(),
667 ));
668 }
669 }
670 Visibility::Public => {
671 if audience.is_some_and(|a| !a.is_empty()) {
672 return Err(AcdpError::SchemaViolation(
673 "visibility:public MUST NOT include audience".into(),
674 ));
675 }
676 }
677 Visibility::Private => {}
678 }
679 Ok(())
680}
681
682fn validate_title(title: &str) -> Result<(), AcdpError> {
685 if title.is_empty() || title.chars().count() > MAX_TITLE_LEN {
686 return Err(AcdpError::SchemaViolation(format!(
687 "title length {} not in 1..={}",
688 title.chars().count(),
689 MAX_TITLE_LEN
690 )));
691 }
692 Ok(())
693}
694
695fn validate_optional_string(s: Option<&str>, name: &str, max_len: usize) -> Result<(), AcdpError> {
696 if let Some(value) = s {
697 if value.chars().count() > max_len {
698 return Err(AcdpError::SchemaViolation(format!(
699 "{name} length {} exceeds {max_len}",
700 value.chars().count()
701 )));
702 }
703 }
704 Ok(())
705}
706
707fn validate_unique_array<T: PartialEq + std::fmt::Debug>(
708 name: &str,
709 items: &[T],
710 max: usize,
711) -> Result<(), AcdpError> {
712 if items.len() > max {
713 return Err(AcdpError::SchemaViolation(format!(
714 "{name} has {} items, exceeds {max}",
715 items.len()
716 )));
717 }
718 for (i, item) in items.iter().enumerate() {
719 if items[i + 1..].iter().any(|other| other == item) {
720 return Err(AcdpError::SchemaViolation(format!(
721 "{name} contains duplicate entry: {item:?}"
722 )));
723 }
724 }
725 Ok(())
726}
727
728fn validate_tags(tags: &[String]) -> Result<(), AcdpError> {
729 if tags.len() > MAX_TAGS {
730 return Err(AcdpError::SchemaViolation(format!(
731 "tags has {} entries, exceeds {}",
732 tags.len(),
733 MAX_TAGS
734 )));
735 }
736 for tag in tags {
737 validate_tag(tag)?;
738 }
739 for (i, tag) in tags.iter().enumerate() {
741 if tags[i + 1..].iter().any(|t| t == tag) {
742 return Err(AcdpError::SchemaViolation(format!(
743 "tags contains duplicate entry: {tag}"
744 )));
745 }
746 }
747 Ok(())
748}
749
750fn validate_tag(tag: &str) -> Result<(), AcdpError> {
751 if tag.is_empty() || tag.len() > MAX_TAG_LEN {
752 return Err(AcdpError::SchemaViolation(format!(
753 "tag '{tag}' length not in 1..={MAX_TAG_LEN}"
754 )));
755 }
756 let mut chars = tag.chars();
757 let first = chars.next().unwrap();
758 if !first.is_ascii_alphanumeric() {
759 return Err(AcdpError::SchemaViolation(format!(
760 "tag '{tag}' first char must be alphanumeric"
761 )));
762 }
763 if !chars.all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '.' | '-')) {
764 return Err(AcdpError::SchemaViolation(format!(
765 "tag '{tag}' must match [A-Za-z0-9][A-Za-z0-9_.-]*"
766 )));
767 }
768 Ok(())
769}
770
771fn validate_did_key_key_id_form(key_id: &str) -> Result<(), AcdpError> {
791 if !key_id.starts_with("did:key:") {
792 return Ok(());
793 }
794 acdp_did::key::resolve_did_key_url(key_id).map_err(|e| {
795 AcdpError::SchemaViolation(format!(
796 "signature.key_id is not a well-formed did:key URL: {e}"
797 ))
798 })?;
799 Ok(())
800}
801
802fn validate_agent_did(did: &AgentDid) -> Result<(), AcdpError> {
803 if did.as_str().starts_with("did:key:") {
804 AgentDid::parse(did.as_str())?;
805 acdp_did::key::resolve_did_key(did.as_str()).map_err(|e| {
806 AcdpError::SchemaViolation(format!("agent_id is not a well-formed did:key: {e}"))
807 })?;
808 return Ok(());
809 }
810 AgentDid::parse_web(did.as_str())?;
811 Ok(())
812}
813
814fn validate_origin_registry(s: &str) -> Result<(), AcdpError> {
822 if s.is_empty() {
823 return Err(AcdpError::SchemaViolation(
824 "origin_registry must be a non-empty DNS hostname".into(),
825 ));
826 }
827 if s.starts_with("did:") {
828 return Err(AcdpError::SchemaViolation(format!(
829 "origin_registry must be a DNS hostname, not a DID URI (got '{s}'); \
830 use the bare authority — capabilities.registry_did carries the did:web form"
831 )));
832 }
833 if s.contains("://") {
834 return Err(AcdpError::SchemaViolation(format!(
835 "origin_registry must be a DNS hostname, not a URL (got '{s}')"
836 )));
837 }
838 if s.ends_with('.') || s.starts_with('.') {
839 return Err(AcdpError::SchemaViolation(format!(
840 "origin_registry must be a syntactically valid DNS hostname (got '{s}')"
841 )));
842 }
843 if !acdp_types::primitives::is_valid_dns_authority(s) {
849 return Err(AcdpError::SchemaViolation(format!(
850 "origin_registry '{s}' is not a valid DNS hostname (must be lowercase \
851 labels of [a-z0-9-] separated by dots, e.g. 'registry.example.com')"
852 )));
853 }
854 Ok(())
855}
856
857fn validate_loose_did(did: &AgentDid) -> Result<(), AcdpError> {
867 AgentDid::parse(did.as_str())?;
868 Ok(())
869}
870
871fn validate_namespaced_context_type(value: &str) -> Result<(), AcdpError> {
874 let (ns, name) = value.split_once(':').ok_or_else(|| {
876 AcdpError::SchemaViolation(format!(
877 "context_type '{value}' missing namespace separator"
878 ))
879 })?;
880 if ns.is_empty()
881 || !ns.chars().next().is_some_and(|c| c.is_ascii_lowercase())
882 || !ns
883 .chars()
884 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
885 {
886 return Err(AcdpError::SchemaViolation(format!(
887 "context_type namespace '{ns}' must match [a-z][a-z0-9_]*"
888 )));
889 }
890 if name.is_empty()
891 || !name.chars().next().is_some_and(|c| c.is_ascii_lowercase())
892 || !name
893 .chars()
894 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, '_' | '-'))
895 {
896 return Err(AcdpError::SchemaViolation(format!(
897 "context_type name '{name}' must match [a-z][a-z0-9_-]*"
898 )));
899 }
900 Ok(())
901}
902
903trait ContextTypeExt {
904 fn namespaced_form(&self) -> Option<&str>;
905}
906
907impl ContextTypeExt for ContextType {
908 fn namespaced_form(&self) -> Option<&str> {
909 match self {
910 ContextType::Custom(s) => Some(s.as_str()),
911 _ => None,
912 }
913 }
914}
915
916fn validate_semver_pattern(name: &str, value: &str) -> Result<(), AcdpError> {
919 let parts: Vec<&str> = value.split('.').collect();
920 let ok = parts.len() == 3
921 && parts
922 .iter()
923 .all(|p| !p.is_empty() && p.chars().all(|c| c.is_ascii_digit()));
924 if !ok {
925 return Err(AcdpError::SchemaViolation(format!(
926 "{name} '{value}' must match the semver pattern ^\\d+\\.\\d+\\.\\d+$"
927 )));
928 }
929 Ok(())
930}
931
932fn validate_signature_length(algorithm: &str, value_b64: &str) -> Result<(), AcdpError> {
933 let expected = match algorithm {
934 "ed25519" => Some(ED25519_SIG_B64_LEN),
935 "ecdsa-p256" => Some(ECDSA_P256_SIG_B64_LEN),
936 _ => None,
937 };
938 if let Some(n) = expected {
939 if value_b64.len() != n {
940 return Err(AcdpError::InvalidSignature(format!(
941 "signature.value for '{algorithm}' must be {n} base64 chars, got {}",
942 value_b64.len()
943 )));
944 }
945 }
946 Ok(())
947}
948
949#[cfg(test)]
952mod tests {
953 use super::*;
954 use acdp_types::data_ref::DataRefType;
955 use serde_json::json;
956
957 fn embedded_json(v: serde_json::Value) -> EmbeddedContent {
958 EmbeddedContent {
959 encoding: EmbeddedEncoding::Json,
960 content: v,
961 }
962 }
963
964 #[test]
967 fn origin_registry_accepts_valid_hostname() {
968 validate_origin_registry("registry.example.com").unwrap();
969 validate_origin_registry("reg.example").unwrap();
970 validate_origin_registry("a-b-c.io").unwrap();
971 }
972
973 #[test]
974 fn origin_registry_rejects_uppercase() {
975 assert!(matches!(
976 validate_origin_registry("REGISTRY.EXAMPLE.COM"),
977 Err(AcdpError::SchemaViolation(_))
978 ));
979 }
980
981 #[test]
982 fn origin_registry_rejects_underscore() {
983 assert!(matches!(
984 validate_origin_registry("registry_example.com"),
985 Err(AcdpError::SchemaViolation(_))
986 ));
987 }
988
989 #[test]
990 fn origin_registry_rejects_hyphen_label_edges() {
991 assert!(matches!(
992 validate_origin_registry("registry-.com"),
993 Err(AcdpError::SchemaViolation(_))
994 ));
995 assert!(matches!(
996 validate_origin_registry("-registry.example.com"),
997 Err(AcdpError::SchemaViolation(_))
998 ));
999 }
1000
1001 #[test]
1004 fn data_ref_neither_location_nor_embedded_rejected() {
1005 let dr = DataRef {
1006 ref_type: DataRefType::PrimaryResult,
1007 description: None,
1008 size_bytes: None,
1009 format: None,
1010 schema_version: None,
1011 content_hash: None,
1012 location: None,
1013 embedded: None,
1014 extensions: serde_json::Map::new(),
1015 };
1016 assert!(matches!(
1017 validate_data_ref(&dr),
1018 Err(AcdpError::SchemaViolation(_))
1019 ));
1020 }
1021
1022 #[test]
1023 fn data_ref_both_location_and_embedded_rejected() {
1024 let dr = DataRef {
1025 ref_type: DataRefType::PrimaryResult,
1026 description: None,
1027 size_bytes: None,
1028 format: None,
1029 schema_version: None,
1030 content_hash: None,
1031 location: Some(Location::Uri("https://x/y".into())),
1032 embedded: Some(embedded_json(json!({"a": 1}))),
1033 extensions: serde_json::Map::new(),
1034 };
1035 assert!(matches!(
1036 validate_data_ref(&dr),
1037 Err(AcdpError::SchemaViolation(_))
1038 ));
1039 }
1040
1041 #[test]
1044 fn uri_credentials_rejected() {
1045 let dr = DataRef::uri(DataRefType::RawData, "https://user:pass@example.com/data");
1046 assert!(matches!(
1047 validate_data_ref(&dr),
1048 Err(AcdpError::SchemaViolation(_))
1049 ));
1050 }
1051
1052 #[test]
1053 fn uri_without_scheme_rejected() {
1054 let dr = DataRef::uri(DataRefType::RawData, "no-scheme");
1055 assert!(matches!(
1056 validate_data_ref(&dr),
1057 Err(AcdpError::SchemaViolation(_))
1058 ));
1059 }
1060
1061 #[test]
1062 fn uri_too_long_rejected() {
1063 let long_uri = format!("https://x.com/{}", "a".repeat(MAX_URI_LEN));
1064 let dr = DataRef::uri(DataRefType::RawData, long_uri);
1065 assert!(matches!(
1066 validate_data_ref(&dr),
1067 Err(AcdpError::SchemaViolation(_))
1068 ));
1069 }
1070
1071 #[test]
1074 fn structured_locator_missing_scheme_rejected() {
1075 let mut map = serde_json::Map::new();
1076 map.insert("offset".into(), json!(42));
1077 let dr = DataRef {
1078 ref_type: DataRefType::RawData,
1079 description: None,
1080 size_bytes: None,
1081 format: None,
1082 schema_version: None,
1083 content_hash: None,
1084 location: Some(Location::Structured(map)),
1085 embedded: None,
1086 extensions: serde_json::Map::new(),
1087 };
1088 assert!(matches!(
1089 validate_data_ref(&dr),
1090 Err(AcdpError::SchemaViolation(_))
1091 ));
1092 }
1093
1094 #[test]
1095 fn structured_locator_bad_scheme_rejected() {
1096 let err =
1100 DataRef::try_structured(DataRefType::RawData, "not_dotted", serde_json::Map::new())
1101 .unwrap_err();
1102 assert!(matches!(err, AcdpError::SchemaViolation(_)));
1103
1104 let mut bad = serde_json::Map::new();
1107 bad.insert(
1108 "scheme".into(),
1109 serde_json::Value::String("not_dotted".into()),
1110 );
1111 let dr = DataRef {
1112 ref_type: DataRefType::RawData,
1113 description: None,
1114 size_bytes: None,
1115 format: None,
1116 schema_version: None,
1117 content_hash: None,
1118 location: Some(Location::Structured(bad)),
1119 embedded: None,
1120 extensions: serde_json::Map::new(),
1121 };
1122 assert!(matches!(
1123 validate_data_ref(&dr),
1124 Err(AcdpError::SchemaViolation(_))
1125 ));
1126 }
1127
1128 #[test]
1129 fn structured_locator_valid() {
1130 let mut extra = serde_json::Map::new();
1131 extra.insert("topic".into(), json!("events"));
1132 let dr = DataRef::structured(DataRefType::RawData, "kafka.offset", extra);
1133 validate_data_ref(&dr).unwrap();
1134 }
1135
1136 #[test]
1139 fn embedded_utf8_must_be_string() {
1140 let dr = DataRef {
1141 ref_type: DataRefType::PrimaryResult,
1142 description: None,
1143 size_bytes: None,
1144 format: None,
1145 schema_version: None,
1146 content_hash: None,
1147 location: None,
1148 embedded: Some(EmbeddedContent {
1149 encoding: EmbeddedEncoding::Utf8,
1150 content: json!(42),
1151 }),
1152 extensions: serde_json::Map::new(),
1153 };
1154 assert!(matches!(
1155 validate_data_ref(&dr),
1156 Err(AcdpError::SchemaViolation(_))
1157 ));
1158 }
1159
1160 #[test]
1161 fn embedded_too_large_rejected() {
1162 let big = "a".repeat(70 * 1024);
1164 let dr = DataRef::embedded_utf8(DataRefType::PrimaryResult, big);
1165 assert!(matches!(
1166 validate_data_ref(&dr),
1167 Err(AcdpError::EmbeddedTooLarge(_))
1168 ));
1169 }
1170
1171 #[test]
1174 fn embedded_hash_json_round_trip() {
1175 let emb = embedded_json(json!({"b": 2, "a": 1}));
1176 let h = compute_embedded_hash(&emb).unwrap();
1177 let expected = {
1179 let bytes = b"{\"a\":1,\"b\":2}";
1180 format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
1181 };
1182 assert_eq!(h.as_str(), expected);
1183 }
1184
1185 #[test]
1186 fn embedded_hash_utf8() {
1187 let emb = EmbeddedContent {
1188 encoding: EmbeddedEncoding::Utf8,
1189 content: json!("hello"),
1190 };
1191 let h = compute_embedded_hash(&emb).unwrap();
1192 let expected = format!("sha256:{}", hex::encode(Sha256::digest(b"hello")));
1193 assert_eq!(h.as_str(), expected);
1194 }
1195
1196 #[test]
1197 fn embedded_hash_base64() {
1198 let raw = b"binary data";
1199 let b64 = STANDARD.encode(raw);
1200 let emb = EmbeddedContent {
1201 encoding: EmbeddedEncoding::Base64,
1202 content: json!(b64),
1203 };
1204 let h = compute_embedded_hash(&emb).unwrap();
1205 let expected = format!("sha256:{}", hex::encode(Sha256::digest(raw)));
1206 assert_eq!(h.as_str(), expected);
1207 }
1208
1209 #[test]
1210 fn verify_embedded_hash_mismatch_detected() {
1211 let emb = embedded_json(json!({"x": 1}));
1212 let dr = DataRef {
1213 ref_type: DataRefType::PrimaryResult,
1214 description: None,
1215 size_bytes: None,
1216 format: None,
1217 schema_version: None,
1218 content_hash: Some(ContentHash("sha256:0000".into())),
1219 location: None,
1220 embedded: Some(emb),
1221 extensions: serde_json::Map::new(),
1222 };
1223 assert!(matches!(
1224 verify_embedded_hash(&dr),
1225 Err(AcdpError::DataRefHashMismatch(_))
1226 ));
1227 }
1228
1229 #[test]
1232 fn metadata_too_many_properties_rejected() {
1233 let mut obj = serde_json::Map::new();
1234 for i in 0..101 {
1235 obj.insert(format!("k{i}"), json!(i));
1236 }
1237 assert!(matches!(
1238 validate_metadata(&serde_json::Value::Object(obj)),
1239 Err(AcdpError::SchemaViolation(_))
1240 ));
1241 }
1242
1243 #[test]
1244 fn metadata_too_deep_rejected() {
1245 let mut v = json!("leaf");
1247 for _ in 0..10 {
1248 let mut o = serde_json::Map::new();
1249 o.insert("a".into(), v);
1250 v = serde_json::Value::Object(o);
1251 }
1252 assert!(matches!(
1253 validate_metadata(&v),
1254 Err(AcdpError::SchemaViolation(_))
1255 ));
1256 }
1257
1258 #[test]
1259 fn metadata_too_large_rejected() {
1260 let big = "a".repeat(70 * 1024);
1261 let v = json!({"big": big});
1262 assert!(matches!(
1263 validate_metadata(&v),
1264 Err(AcdpError::SchemaViolation(_))
1265 ));
1266 }
1267
1268 #[test]
1269 fn metadata_must_be_object() {
1270 assert!(matches!(
1271 validate_metadata(&json!([1, 2, 3])),
1272 Err(AcdpError::SchemaViolation(_))
1273 ));
1274 }
1275
1276 #[test]
1279 fn public_with_audience_rejected() {
1280 let aud = vec![AgentDid::new("did:web:x")];
1281 assert!(matches!(
1282 validate_visibility_audience(&Visibility::Public, Some(&aud)),
1283 Err(AcdpError::SchemaViolation(_))
1284 ));
1285 }
1286
1287 #[test]
1288 fn public_with_empty_audience_ok() {
1289 validate_visibility_audience(&Visibility::Public, Some(&[])).unwrap();
1290 validate_visibility_audience(&Visibility::Public, None).unwrap();
1291 }
1292
1293 #[test]
1294 fn restricted_without_audience_rejected() {
1295 assert!(matches!(
1296 validate_visibility_audience(&Visibility::Restricted, None),
1297 Err(AcdpError::SchemaViolation(_))
1298 ));
1299 }
1300
1301 #[test]
1304 fn data_period_start_after_end_rejected_via_builder() {
1305 use acdp_crypto::SigningKey;
1306 use acdp_producer::Producer;
1307 use acdp_types::body::DataPeriod;
1308 use chrono::TimeZone;
1309
1310 let p = Producer::new(
1311 SigningKey::from_bytes(&[0u8; 32]),
1312 AgentDid::new("did:web:agents.example.com:test"),
1313 "did:web:agents.example.com:test#key-1",
1314 );
1315 let err = p
1316 .publish_request()
1317 .title("t")
1318 .context_type(ContextType::DataSnapshot)
1319 .data_period(DataPeriod {
1320 start: chrono::Utc.with_ymd_and_hms(2026, 6, 1, 0, 0, 0).unwrap(),
1321 end: chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1322 })
1323 .build()
1324 .unwrap_err();
1325 assert!(matches!(err, AcdpError::SchemaViolation(_)));
1326 }
1327
1328 #[test]
1331 fn tag_pattern_validation() {
1332 validate_tag("hello").unwrap();
1333 validate_tag("Q1-2026").unwrap();
1334 validate_tag("a_b.c").unwrap();
1335 assert!(validate_tag("-bad").is_err());
1337 assert!(validate_tag("space here").is_err());
1339 assert!(validate_tag("").is_err());
1341 }
1342
1343 #[test]
1344 fn duplicate_tags_rejected() {
1345 let tags = vec!["a".to_string(), "b".to_string(), "a".to_string()];
1346 assert!(validate_tags(&tags).is_err());
1347 }
1348
1349 #[test]
1352 fn ed25519_sig_must_be_88_chars() {
1353 assert!(validate_signature_length("ed25519", "AAAA").is_err());
1354 validate_signature_length("ed25519", &"A".repeat(88)).unwrap();
1355 validate_signature_length("future-alg", "any").unwrap();
1357 }
1358
1359 #[test]
1362 fn namespaced_context_type_pattern() {
1363 validate_namespaced_context_type("finance:portfolio_snapshot").unwrap();
1364 assert!(validate_namespaced_context_type("Finance:portfolio").is_err());
1365 assert!(validate_namespaced_context_type("finance:Portfolio").is_err());
1366 assert!(validate_namespaced_context_type("no-colon").is_err());
1367 }
1368
1369 #[test]
1373 fn acdp_version_pattern_rejects_non_semver() {
1374 validate_semver_pattern("acdp_version", "0.1.0").unwrap();
1375 validate_semver_pattern("acdp_version", "10.20.30").unwrap();
1376 assert!(validate_semver_pattern("acdp_version", "0.1.0-rc.1").is_err());
1377 assert!(validate_semver_pattern("acdp_version", "0.0").is_err());
1378 assert!(validate_semver_pattern("acdp_version", "vee.zero.zero").is_err());
1379 }
1380
1381 #[test]
1384 fn derived_from_malformed_ctx_id_rejected() {
1385 use acdp_crypto::SigningKey;
1386 use acdp_producer::Producer;
1387
1388 let p = Producer::new(
1389 SigningKey::from_bytes(&[0u8; 32]),
1390 AgentDid::new("did:web:agents.example.com:test"),
1391 "did:web:agents.example.com:test#key-1",
1392 );
1393 let err = p
1394 .publish_request()
1395 .title("t")
1396 .context_type(ContextType::DataSnapshot)
1397 .derived_from(vec![CtxId("not-a-ctx-id".into())])
1398 .build()
1399 .unwrap_err();
1400 assert!(matches!(err, AcdpError::SchemaViolation(_)));
1401 }
1402
1403 #[test]
1406 fn embedded_content_hash_mismatch_caught() {
1407 use acdp_types::data_ref::DataRefType;
1408 let dr = DataRef {
1409 ref_type: DataRefType::PrimaryResult,
1410 description: None,
1411 size_bytes: None,
1412 format: None,
1413 schema_version: None,
1414 content_hash: Some(ContentHash("sha256:0000".into())),
1415 location: None,
1416 embedded: Some(EmbeddedContent {
1417 encoding: EmbeddedEncoding::Json,
1418 content: json!({"x": 1}),
1419 }),
1420 extensions: serde_json::Map::new(),
1421 };
1422 assert!(matches!(
1423 verify_embedded_hash(&dr),
1424 Err(AcdpError::DataRefHashMismatch(_))
1425 ));
1426 }
1427
1428 #[test]
1430 fn audience_uniqueness_rejected() {
1431 let dup = vec![
1432 AgentDid::new("did:web:a.example.com"),
1433 AgentDid::new("did:web:a.example.com"),
1434 ];
1435 let err = validate_unique_array("audience", &dup, MAX_AUDIENCE).unwrap_err();
1436 assert!(matches!(err, AcdpError::SchemaViolation(_)));
1437 }
1438
1439 #[test]
1442 fn extensions_empty_ok() {
1443 validate_extensions(&serde_json::Map::new()).unwrap();
1444 }
1445
1446 #[test]
1447 fn extensions_small_forward_compat_accepted() {
1448 let mut ext = serde_json::Map::new();
1450 ext.insert("priority".into(), json!("high"));
1451 ext.insert("custom".into(), json!({"k": [1, 2, 3]}));
1452 validate_extensions(&ext).unwrap();
1453 }
1454
1455 #[test]
1456 fn extensions_too_many_properties_rejected() {
1457 let mut ext = serde_json::Map::new();
1458 for i in 0..(MAX_METADATA_PROPERTIES + 1) {
1459 ext.insert(format!("k{i}"), json!(i));
1460 }
1461 let err = validate_extensions(&ext).unwrap_err();
1462 assert!(matches!(err, AcdpError::SchemaViolation(_)));
1463 }
1464
1465 #[test]
1466 fn extensions_oversized_jcs_rejected() {
1467 let mut ext = serde_json::Map::new();
1468 ext.insert("blob".into(), json!("x".repeat(MAX_METADATA_JCS_BYTES + 1)));
1469 let err = validate_extensions(&ext).unwrap_err();
1470 assert!(matches!(err, AcdpError::SchemaViolation(_)));
1471 }
1472
1473 #[test]
1474 fn extensions_too_deep_rejected() {
1475 let mut v = json!(0);
1477 for _ in 0..(MAX_METADATA_DEPTH + 2) {
1478 v = json!({ "n": v });
1479 }
1480 let mut ext = serde_json::Map::new();
1481 ext.insert("deep".into(), v);
1482 let err = validate_extensions(&ext).unwrap_err();
1483 assert!(matches!(err, AcdpError::SchemaViolation(_)));
1484 }
1485
1486 #[test]
1487 fn json_depth_clamps_past_scan_budget() {
1488 let mut v = json!(0);
1493 for _ in 0..400 {
1494 v = json!([v]);
1495 }
1496 assert!(json_depth(&v) > MAX_METADATA_DEPTH);
1497 assert!(acdp_crypto::try_canonicalize_value(&v).is_err());
1498 }
1499}