use crate::types::primitives::ContentHash;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DataRefType {
PrimaryResult,
RawData,
SupportingInfo,
DerivedData,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataRef {
#[serde(rename = "type")]
pub ref_type: DataRefType,
#[serde(
default,
skip_serializing_if = "Option::is_none",
deserialize_with = "crate::types::serde_helpers::de_present"
)]
pub description: Option<String>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
deserialize_with = "crate::types::serde_helpers::de_present"
)]
pub size_bytes: Option<u64>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
deserialize_with = "crate::types::serde_helpers::de_present"
)]
pub format: Option<String>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
deserialize_with = "crate::types::serde_helpers::de_present"
)]
pub schema_version: Option<String>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
deserialize_with = "crate::types::serde_helpers::de_present"
)]
pub content_hash: Option<ContentHash>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
deserialize_with = "crate::types::serde_helpers::de_present"
)]
pub location: Option<Location>,
#[serde(
default,
skip_serializing_if = "Option::is_none",
deserialize_with = "crate::types::serde_helpers::de_present"
)]
pub embedded: Option<EmbeddedContent>,
#[serde(flatten)]
pub extensions: serde_json::Map<String, serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Location {
Uri(String),
Structured(serde_json::Map<String, serde_json::Value>),
}
impl DataRef {
pub fn uri(ref_type: DataRefType, uri: impl Into<String>) -> Self {
Self {
ref_type,
description: None,
size_bytes: None,
format: None,
schema_version: None,
content_hash: None,
location: Some(Location::Uri(uri.into())),
embedded: None,
extensions: serde_json::Map::new(),
}
}
pub fn uri_verified(ref_type: DataRefType, uri: impl Into<String>, hash: ContentHash) -> Self {
Self {
ref_type,
description: None,
size_bytes: None,
format: None,
schema_version: None,
content_hash: Some(hash),
location: Some(Location::Uri(uri.into())),
embedded: None,
extensions: serde_json::Map::new(),
}
}
pub fn structured(
ref_type: DataRefType,
scheme: impl Into<String>,
extra: serde_json::Map<String, serde_json::Value>,
) -> Self {
let scheme: String = scheme.into();
debug_assert!(
is_dotted_namespace_scheme(&scheme),
"DataRef::structured: scheme '{scheme}' does not match \
^[a-z][a-z0-9-]*(\\.[a-z][a-z0-9-]*)+$ — pass a dotted-namespace identifier \
like 'kafka.offset' or use try_structured for runtime checking"
);
let mut map = extra;
map.insert("scheme".into(), serde_json::Value::String(scheme));
Self {
ref_type,
description: None,
size_bytes: None,
format: None,
schema_version: None,
content_hash: None,
location: Some(Location::Structured(map)),
embedded: None,
extensions: serde_json::Map::new(),
}
}
pub fn try_structured(
ref_type: DataRefType,
scheme: impl Into<String>,
extra: serde_json::Map<String, serde_json::Value>,
) -> Result<Self, crate::error::AcdpError> {
let scheme: String = scheme.into();
if !is_dotted_namespace_scheme(&scheme) {
return Err(crate::error::AcdpError::SchemaViolation(format!(
"structured locator scheme '{scheme}' must match \
^[a-z][a-z0-9-]*(\\.[a-z][a-z0-9-]*)+$"
)));
}
let mut map = extra;
map.insert("scheme".into(), serde_json::Value::String(scheme));
Ok(Self {
ref_type,
description: None,
size_bytes: None,
format: None,
schema_version: None,
content_hash: None,
location: Some(Location::Structured(map)),
embedded: None,
extensions: serde_json::Map::new(),
})
}
pub fn embedded_json(ref_type: DataRefType, content: serde_json::Value) -> Self {
Self {
ref_type,
description: None,
size_bytes: None,
format: Some("application/json".into()),
schema_version: None,
content_hash: None,
location: None,
embedded: Some(EmbeddedContent {
encoding: EmbeddedEncoding::Json,
content,
}),
extensions: serde_json::Map::new(),
}
}
pub fn embedded_utf8(ref_type: DataRefType, text: impl Into<String>) -> Self {
Self {
ref_type,
description: None,
size_bytes: None,
format: None,
schema_version: None,
content_hash: None,
location: None,
embedded: Some(EmbeddedContent {
encoding: EmbeddedEncoding::Utf8,
content: serde_json::Value::String(text.into()),
}),
extensions: serde_json::Map::new(),
}
}
pub fn embedded_base64(ref_type: DataRefType, b64: impl Into<String>) -> Self {
Self {
ref_type,
description: None,
size_bytes: None,
format: None,
schema_version: None,
content_hash: None,
location: None,
embedded: Some(EmbeddedContent {
encoding: EmbeddedEncoding::Base64,
content: serde_json::Value::String(b64.into()),
}),
extensions: serde_json::Map::new(),
}
}
pub fn primary_result_uri(uri: impl Into<String>) -> Self {
Self::uri(DataRefType::PrimaryResult, uri)
}
pub fn raw_data_uri(uri: impl Into<String>) -> Self {
Self::uri(DataRefType::RawData, uri)
}
pub fn supporting_info_uri(uri: impl Into<String>) -> Self {
Self::uri(DataRefType::SupportingInfo, uri)
}
pub fn derived_data_uri(uri: impl Into<String>) -> Self {
Self::uri(DataRefType::DerivedData, uri)
}
pub fn primary_result_json(content: serde_json::Value) -> Self {
Self::embedded_json(DataRefType::PrimaryResult, content)
}
pub fn derived_data_json(content: serde_json::Value) -> Self {
Self::embedded_json(DataRefType::DerivedData, content)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct EmbeddedContent {
pub encoding: EmbeddedEncoding,
pub content: serde_json::Value,
}
fn is_dotted_namespace_scheme(s: &str) -> bool {
let parts: Vec<&str> = s.split('.').collect();
if parts.len() < 2 {
return false;
}
parts.iter().all(|part| {
!part.is_empty()
&& part.chars().next().is_some_and(|c| c.is_ascii_lowercase())
&& part
.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
})
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EmbeddedEncoding {
Json,
Utf8,
Base64,
}