1use acdp_primitives::primitives::ContentHash;
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
14#[serde(rename_all = "snake_case")]
15pub enum DataRefType {
16 PrimaryResult,
18 RawData,
20 SupportingInfo,
22 DerivedData,
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct DataRef {
33 #[serde(rename = "type")]
35 pub ref_type: DataRefType,
36
37 #[serde(
42 default,
43 skip_serializing_if = "Option::is_none",
44 deserialize_with = "crate::serde_helpers::de_present"
45 )]
46 pub description: Option<String>,
47
48 #[serde(
50 default,
51 skip_serializing_if = "Option::is_none",
52 deserialize_with = "crate::serde_helpers::de_present"
53 )]
54 pub size_bytes: Option<u64>,
55
56 #[serde(
58 default,
59 skip_serializing_if = "Option::is_none",
60 deserialize_with = "crate::serde_helpers::de_present"
61 )]
62 pub format: Option<String>,
63
64 #[serde(
66 default,
67 skip_serializing_if = "Option::is_none",
68 deserialize_with = "crate::serde_helpers::de_present"
69 )]
70 pub schema_version: Option<String>,
71
72 #[serde(
76 default,
77 skip_serializing_if = "Option::is_none",
78 deserialize_with = "crate::serde_helpers::de_present"
79 )]
80 pub content_hash: Option<ContentHash>,
81
82 #[serde(
85 default,
86 skip_serializing_if = "Option::is_none",
87 deserialize_with = "crate::serde_helpers::de_present"
88 )]
89 pub location: Option<Location>,
90
91 #[serde(
93 default,
94 skip_serializing_if = "Option::is_none",
95 deserialize_with = "crate::serde_helpers::de_present"
96 )]
97 pub embedded: Option<EmbeddedContent>,
98
99 #[serde(flatten)]
111 pub extensions: serde_json::Map<String, serde_json::Value>,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
117#[serde(untagged)]
118pub enum Location {
119 Uri(String),
123 Structured(serde_json::Map<String, serde_json::Value>),
127}
128
129impl DataRef {
130 pub fn uri(ref_type: DataRefType, uri: impl Into<String>) -> Self {
132 Self {
133 ref_type,
134 description: None,
135 size_bytes: None,
136 format: None,
137 schema_version: None,
138 content_hash: None,
139 location: Some(Location::Uri(uri.into())),
140 embedded: None,
141 extensions: serde_json::Map::new(),
142 }
143 }
144
145 pub fn uri_verified(ref_type: DataRefType, uri: impl Into<String>, hash: ContentHash) -> Self {
147 Self {
148 ref_type,
149 description: None,
150 size_bytes: None,
151 format: None,
152 schema_version: None,
153 content_hash: Some(hash),
154 location: Some(Location::Uri(uri.into())),
155 embedded: None,
156 extensions: serde_json::Map::new(),
157 }
158 }
159
160 pub fn structured(
170 ref_type: DataRefType,
171 scheme: impl Into<String>,
172 extra: serde_json::Map<String, serde_json::Value>,
173 ) -> Self {
174 let scheme: String = scheme.into();
175 debug_assert!(
176 is_dotted_namespace_scheme(&scheme),
177 "DataRef::structured: scheme '{scheme}' does not match \
178 ^[a-z][a-z0-9-]*(\\.[a-z][a-z0-9-]*)+$ — pass a dotted-namespace identifier \
179 like 'kafka.offset' or use try_structured for runtime checking"
180 );
181 let mut map = extra;
182 map.insert("scheme".into(), serde_json::Value::String(scheme));
183 Self {
184 ref_type,
185 description: None,
186 size_bytes: None,
187 format: None,
188 schema_version: None,
189 content_hash: None,
190 location: Some(Location::Structured(map)),
191 embedded: None,
192 extensions: serde_json::Map::new(),
193 }
194 }
195
196 pub fn try_structured(
200 ref_type: DataRefType,
201 scheme: impl Into<String>,
202 extra: serde_json::Map<String, serde_json::Value>,
203 ) -> Result<Self, acdp_primitives::error::AcdpError> {
204 let scheme: String = scheme.into();
205 if !is_dotted_namespace_scheme(&scheme) {
206 return Err(acdp_primitives::error::AcdpError::SchemaViolation(format!(
207 "structured locator scheme '{scheme}' must match \
208 ^[a-z][a-z0-9-]*(\\.[a-z][a-z0-9-]*)+$"
209 )));
210 }
211 let mut map = extra;
212 map.insert("scheme".into(), serde_json::Value::String(scheme));
213 Ok(Self {
214 ref_type,
215 description: None,
216 size_bytes: None,
217 format: None,
218 schema_version: None,
219 content_hash: None,
220 location: Some(Location::Structured(map)),
221 embedded: None,
222 extensions: serde_json::Map::new(),
223 })
224 }
225
226 pub fn embedded_json(ref_type: DataRefType, content: serde_json::Value) -> Self {
228 Self {
229 ref_type,
230 description: None,
231 size_bytes: None,
232 format: Some("application/json".into()),
233 schema_version: None,
234 content_hash: None,
235 location: None,
236 embedded: Some(EmbeddedContent {
237 encoding: EmbeddedEncoding::Json,
238 content,
239 }),
240 extensions: serde_json::Map::new(),
241 }
242 }
243
244 pub fn embedded_utf8(ref_type: DataRefType, text: impl Into<String>) -> Self {
246 Self {
247 ref_type,
248 description: None,
249 size_bytes: None,
250 format: None,
251 schema_version: None,
252 content_hash: None,
253 location: None,
254 embedded: Some(EmbeddedContent {
255 encoding: EmbeddedEncoding::Utf8,
256 content: serde_json::Value::String(text.into()),
257 }),
258 extensions: serde_json::Map::new(),
259 }
260 }
261
262 pub fn embedded_base64(ref_type: DataRefType, b64: impl Into<String>) -> Self {
264 Self {
265 ref_type,
266 description: None,
267 size_bytes: None,
268 format: None,
269 schema_version: None,
270 content_hash: None,
271 location: None,
272 embedded: Some(EmbeddedContent {
273 encoding: EmbeddedEncoding::Base64,
274 content: serde_json::Value::String(b64.into()),
275 }),
276 extensions: serde_json::Map::new(),
277 }
278 }
279
280 pub fn primary_result_uri(uri: impl Into<String>) -> Self {
287 Self::uri(DataRefType::PrimaryResult, uri)
288 }
289 pub fn raw_data_uri(uri: impl Into<String>) -> Self {
291 Self::uri(DataRefType::RawData, uri)
292 }
293 pub fn supporting_info_uri(uri: impl Into<String>) -> Self {
295 Self::uri(DataRefType::SupportingInfo, uri)
296 }
297 pub fn derived_data_uri(uri: impl Into<String>) -> Self {
299 Self::uri(DataRefType::DerivedData, uri)
300 }
301
302 pub fn primary_result_json(content: serde_json::Value) -> Self {
304 Self::embedded_json(DataRefType::PrimaryResult, content)
305 }
306 pub fn derived_data_json(content: serde_json::Value) -> Self {
308 Self::embedded_json(DataRefType::DerivedData, content)
309 }
310}
311
312#[derive(Debug, Clone, Serialize, Deserialize)]
314#[serde(deny_unknown_fields)]
315pub struct EmbeddedContent {
316 pub encoding: EmbeddedEncoding,
318 pub content: serde_json::Value,
321}
322
323fn is_dotted_namespace_scheme(s: &str) -> bool {
325 let parts: Vec<&str> = s.split('.').collect();
326 if parts.len() < 2 {
327 return false;
328 }
329 parts.iter().all(|part| {
330 !part.is_empty()
331 && part.chars().next().is_some_and(|c| c.is_ascii_lowercase())
332 && part
333 .chars()
334 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
335 })
336}
337
338#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
340#[serde(rename_all = "lowercase")]
341pub enum EmbeddedEncoding {
342 Json,
344 Utf8,
346 Base64,
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353 use serde_json::json;
354
355 #[test]
358 fn dotted_namespace_scheme_accepts_valid() {
359 for s in [
360 "kafka.offset",
361 "ipfs.cid",
362 "db.row",
363 "a.b",
364 "a1.b2.c3",
365 "with-hyphen.part-two",
366 ] {
367 assert!(is_dotted_namespace_scheme(s), "should accept {s:?}");
368 }
369 }
370
371 #[test]
372 fn dotted_namespace_scheme_rejects_invalid() {
373 for s in [
374 "", "nodot", "Kafka.offset", "kafka.Offset", "kafka..offset", ".leading", "trailing.", "1kafka.offset", "kafka.1offset", "kafka.off_set", ] {
385 assert!(!is_dotted_namespace_scheme(s), "should reject {s:?}");
386 }
387 }
388
389 #[test]
392 fn try_structured_ok_inserts_scheme_and_extra() {
393 let mut extra = serde_json::Map::new();
394 extra.insert("offset".into(), json!(42));
395 let dr = DataRef::try_structured(DataRefType::RawData, "kafka.offset", extra).unwrap();
396 match dr.location {
397 Some(Location::Structured(map)) => {
398 assert_eq!(map["scheme"], json!("kafka.offset"));
399 assert_eq!(map["offset"], json!(42));
400 }
401 other => panic!("expected structured location, got {other:?}"),
402 }
403 assert!(dr.embedded.is_none(), "structured locator has no embedded");
404 }
405
406 #[test]
407 fn try_structured_rejects_bad_scheme() {
408 let err = DataRef::try_structured(DataRefType::RawData, "nodot", serde_json::Map::new())
409 .unwrap_err();
410 assert!(
411 matches!(err, acdp_primitives::error::AcdpError::SchemaViolation(_)),
412 "bad scheme must be SchemaViolation, got {err:?}"
413 );
414 }
415
416 #[test]
417 fn structured_inserts_scheme_for_valid_input() {
418 let dr = DataRef::structured(DataRefType::RawData, "ipfs.cid", serde_json::Map::new());
420 match dr.location {
421 Some(Location::Structured(map)) => assert_eq!(map["scheme"], json!("ipfs.cid")),
422 other => panic!("expected structured location, got {other:?}"),
423 }
424 }
425
426 #[test]
429 fn uri_constructor_sets_location_without_hash() {
430 let dr = DataRef::uri(DataRefType::PrimaryResult, "https://x.example/d");
431 assert_eq!(dr.ref_type, DataRefType::PrimaryResult);
432 assert!(matches!(dr.location, Some(Location::Uri(ref u)) if u == "https://x.example/d"));
433 assert!(dr.content_hash.is_none());
434 assert!(dr.embedded.is_none());
435 }
436
437 #[test]
438 fn uri_verified_carries_content_hash() {
439 let hash = ContentHash(
440 "sha256:f170150ddbf59d99794e7797824591b374d459782084597b644ecc57a41031b5".into(),
441 );
442 let dr = DataRef::uri_verified(DataRefType::RawData, "https://x/d", hash.clone());
443 assert_eq!(dr.content_hash, Some(hash));
444 assert!(matches!(dr.location, Some(Location::Uri(_))));
445 }
446
447 #[test]
448 fn type_bound_uri_shortcuts_pick_the_right_type() {
449 assert_eq!(
450 DataRef::primary_result_uri("u").ref_type,
451 DataRefType::PrimaryResult
452 );
453 assert_eq!(DataRef::raw_data_uri("u").ref_type, DataRefType::RawData);
454 assert_eq!(
455 DataRef::supporting_info_uri("u").ref_type,
456 DataRefType::SupportingInfo
457 );
458 assert_eq!(
459 DataRef::derived_data_uri("u").ref_type,
460 DataRefType::DerivedData
461 );
462 }
463
464 #[test]
467 fn embedded_json_sets_json_encoding_and_format() {
468 let dr = DataRef::embedded_json(DataRefType::PrimaryResult, json!({"k": 1}));
469 let e = dr.embedded.expect("embedded set");
470 assert_eq!(e.encoding, EmbeddedEncoding::Json);
471 assert_eq!(e.content, json!({"k": 1}));
472 assert_eq!(dr.format.as_deref(), Some("application/json"));
473 assert!(dr.location.is_none(), "embedded ref has no location");
474 }
475
476 #[test]
477 fn embedded_utf8_stores_text_as_json_string() {
478 let dr = DataRef::embedded_utf8(DataRefType::SupportingInfo, "hello");
479 let e = dr.embedded.expect("embedded set");
480 assert_eq!(e.encoding, EmbeddedEncoding::Utf8);
481 assert_eq!(e.content, json!("hello"));
482 }
483
484 #[test]
485 fn embedded_base64_stores_payload_as_json_string() {
486 let dr = DataRef::embedded_base64(DataRefType::DerivedData, "aGVsbG8=");
487 let e = dr.embedded.expect("embedded set");
488 assert_eq!(e.encoding, EmbeddedEncoding::Base64);
489 assert_eq!(e.content, json!("aGVsbG8="));
490 }
491
492 #[test]
493 fn type_bound_json_shortcuts_pick_the_right_type() {
494 assert_eq!(
495 DataRef::primary_result_json(json!(1)).ref_type,
496 DataRefType::PrimaryResult
497 );
498 assert_eq!(
499 DataRef::derived_data_json(json!(1)).ref_type,
500 DataRefType::DerivedData
501 );
502 }
503
504 #[test]
507 fn data_ref_type_serializes_snake_case() {
508 assert_eq!(
509 serde_json::to_value(DataRefType::PrimaryResult).unwrap(),
510 json!("primary_result")
511 );
512 assert_eq!(
513 serde_json::to_value(DataRefType::RawData).unwrap(),
514 json!("raw_data")
515 );
516 assert_eq!(
517 serde_json::to_value(DataRefType::SupportingInfo).unwrap(),
518 json!("supporting_info")
519 );
520 assert_eq!(
521 serde_json::to_value(DataRefType::DerivedData).unwrap(),
522 json!("derived_data")
523 );
524 }
525
526 #[test]
527 fn embedded_content_rejects_unknown_field() {
528 let raw = json!({"encoding": "utf8", "content": "x", "surprise": 1});
530 let parsed: Result<EmbeddedContent, _> = serde_json::from_value(raw);
531 assert!(parsed.is_err(), "unknown field must be rejected");
532 }
533
534 #[test]
535 fn constructed_uri_ref_round_trips_through_json() {
536 let dr = DataRef::uri(DataRefType::PrimaryResult, "https://x/d");
537 let v = serde_json::to_value(&dr).unwrap();
538 assert_eq!(v["type"], json!("primary_result"));
540 assert_eq!(v["location"], json!("https://x/d"));
541 assert!(v.as_object().unwrap().get("embedded").is_none());
542 let back: DataRef = serde_json::from_value(v).unwrap();
543 assert_eq!(back.ref_type, DataRefType::PrimaryResult);
544 }
545}