use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::Ref;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
#[non_exhaustive]
pub enum SourceType {
Doi,
Arxiv,
}
impl SourceType {
pub fn as_wire_str(&self) -> &'static str {
match self {
SourceType::Doi => "doi",
SourceType::Arxiv => "arxiv",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub struct CanonicalRef {
pub source_type: SourceType,
pub source_id: String,
pub resolver_profile: String,
pub version: Option<String>,
}
impl CanonicalRef {
pub fn new(
source_type: SourceType,
source_id: impl Into<String>,
resolver_profile: impl Into<String>,
version: Option<String>,
) -> Self {
Self {
source_type,
source_id: source_id.into(),
resolver_profile: resolver_profile.into(),
version,
}
}
pub fn digest(&self) -> [u8; 32] {
let mut hasher = Sha256::new();
hasher.update(self.source_type.as_wire_str().as_bytes());
hasher.update([0x00]);
hasher.update(self.source_id.as_bytes());
hasher.update([0x00]);
hasher.update(self.resolver_profile.as_bytes());
hasher.update([0x00]);
if let Some(v) = &self.version {
hasher.update(v.as_bytes());
}
hasher.finalize().into()
}
pub fn digest_hex(&self) -> String {
hex::encode(self.digest())
}
}
impl Ref {
pub fn promote(&self, resolver_profile: &str, version: Option<&str>) -> CanonicalRef {
let (source_type, source_id) = match self {
Ref::Doi(d) => (SourceType::Doi, d.as_str().to_string()),
Ref::Arxiv(a) => (SourceType::Arxiv, a.as_str().to_string()),
};
CanonicalRef {
source_type,
source_id,
resolver_profile: resolver_profile.to_string(),
version: version.map(str::to_string),
}
}
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
mod tests {
use super::*;
use crate::{ArxivId, Doi};
fn reference_digest_hex(
source_type: &str,
source_id: &str,
resolver_profile: &str,
version: Option<&str>,
) -> String {
let mut buf: Vec<u8> = Vec::new();
buf.extend_from_slice(source_type.as_bytes());
buf.push(0x00);
buf.extend_from_slice(source_id.as_bytes());
buf.push(0x00);
buf.extend_from_slice(resolver_profile.as_bytes());
buf.push(0x00);
if let Some(v) = version {
buf.extend_from_slice(v.as_bytes());
}
let d = Sha256::digest(&buf);
hex::encode(d)
}
#[test]
fn digest_matches_reference_doi_crossref_no_version() {
let c = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
let expected = reference_digest_hex("doi", "10.1234/foo", "crossref", None);
assert_eq!(c.digest_hex(), expected);
}
#[test]
fn digest_matches_reference_doi_unpaywall_no_version() {
let c = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "unpaywall", None);
let expected = reference_digest_hex("doi", "10.1234/foo", "unpaywall", None);
assert_eq!(c.digest_hex(), expected);
let c_cross = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
assert_ne!(c.digest_hex(), c_cross.digest_hex());
}
#[test]
fn digest_matches_reference_doi_oa_publisher_no_version() {
let c = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "oa-publisher", None);
let expected = reference_digest_hex("doi", "10.1234/foo", "oa-publisher", None);
assert_eq!(c.digest_hex(), expected);
}
#[test]
fn digest_matches_reference_arxiv_no_version() {
let c = CanonicalRef::new(SourceType::Arxiv, "2401.12345", "arxiv", None);
let expected = reference_digest_hex("arxiv", "2401.12345", "arxiv", None);
assert_eq!(c.digest_hex(), expected);
}
#[test]
fn digest_matches_reference_arxiv_with_version_v2() {
let c = CanonicalRef::new(SourceType::Arxiv, "2401.12345", "arxiv", Some("v2".into()));
let expected = reference_digest_hex("arxiv", "2401.12345", "arxiv", Some("v2"));
assert_eq!(c.digest_hex(), expected);
let c_none = CanonicalRef::new(SourceType::Arxiv, "2401.12345", "arxiv", None);
assert_ne!(c.digest_hex(), c_none.digest_hex());
}
#[test]
fn digest_matches_reference_arxiv_with_version_v10() {
let c = CanonicalRef::new(SourceType::Arxiv, "2401.12345", "arxiv", Some("v10".into()));
let expected = reference_digest_hex("arxiv", "2401.12345", "arxiv", Some("v10"));
assert_eq!(c.digest_hex(), expected);
}
#[test]
fn digest_matches_reference_doi_crossref_with_snapshot_date() {
let c = CanonicalRef::new(
SourceType::Doi,
"10.1234/foo",
"crossref",
Some("2026-05-12".into()),
);
let expected = reference_digest_hex("doi", "10.1234/foo", "crossref", Some("2026-05-12"));
assert_eq!(c.digest_hex(), expected);
}
#[test]
fn digest_matches_reference_real_publisher_doi() {
let c = CanonicalRef::new(
SourceType::Doi,
"10.1103/PhysRevLett.130.200601",
"oa-publisher",
None,
);
let expected = reference_digest_hex(
"doi",
"10.1103/PhysRevLett.130.200601",
"oa-publisher",
None,
);
assert_eq!(c.digest_hex(), expected);
}
#[test]
fn digest_some_empty_string_version_equals_none_version() {
let c_some_empty = CanonicalRef::new(
SourceType::Doi,
"10.1234/foo",
"crossref",
Some(String::new()),
);
let c_none = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
assert_eq!(c_some_empty.digest_hex(), c_none.digest_hex());
}
#[test]
fn digest_matches_reference_old_style_arxiv() {
let c = CanonicalRef::new(SourceType::Arxiv, "cond-mat/9501001", "arxiv", None);
let expected = reference_digest_hex("arxiv", "cond-mat/9501001", "arxiv", None);
assert_eq!(c.digest_hex(), expected);
}
#[test]
fn digest_hex_is_64_lowercase_hex_chars() {
let c = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
let s = c.digest_hex();
assert_eq!(s.len(), 64);
assert!(
s.chars()
.all(|ch| ch.is_ascii_hexdigit() && !ch.is_ascii_uppercase()),
"digest_hex must be lowercase ASCII hex, got {s}"
);
}
#[test]
fn ref_promote_doi_round_trip() {
let r = Ref::Doi(Doi("10.1234/foo".into()));
let c = r.promote("crossref", None);
assert!(matches!(c.source_type, SourceType::Doi));
assert_eq!(c.source_id, "10.1234/foo");
assert_eq!(c.resolver_profile, "crossref");
assert!(c.version.is_none());
}
#[test]
fn ref_promote_arxiv_with_version_round_trip() {
let r = Ref::Arxiv(ArxivId("2401.12345".into()));
let c = r.promote("arxiv", Some("v2"));
assert!(matches!(c.source_type, SourceType::Arxiv));
assert_eq!(c.source_id, "2401.12345");
assert_eq!(c.resolver_profile, "arxiv");
assert_eq!(c.version.as_deref(), Some("v2"));
}
#[test]
fn ref_promote_then_digest_matches_direct_construction() {
let r = Ref::Doi(Doi("10.1234/foo".into()));
let c_promoted = r.promote("crossref", None);
let c_direct = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
assert_eq!(c_promoted.digest_hex(), c_direct.digest_hex());
}
#[test]
fn source_type_serializes_lowercase() {
let s = serde_json::to_string(&SourceType::Doi).expect("serialize");
assert_eq!(s, "\"doi\"");
let a = serde_json::to_string(&SourceType::Arxiv).expect("serialize");
assert_eq!(a, "\"arxiv\"");
}
}