1use serde::{Deserialize, Serialize};
26use sha2::{Digest, Sha256};
27
28use crate::Ref;
29
30#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
38#[serde(rename_all = "lowercase")]
39#[non_exhaustive]
40pub enum SourceType {
41 Doi,
43 Arxiv,
45}
46
47impl SourceType {
48 pub fn as_wire_str(&self) -> &'static str {
51 match self {
52 SourceType::Doi => "doi",
53 SourceType::Arxiv => "arxiv",
54 }
55 }
56}
57
58#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
73#[non_exhaustive]
74pub struct CanonicalRef {
75 pub source_type: SourceType,
77 pub source_id: String,
79 pub resolver_profile: String,
83 pub version: Option<String>,
87}
88
89impl CanonicalRef {
90 pub fn new(
95 source_type: SourceType,
96 source_id: impl Into<String>,
97 resolver_profile: impl Into<String>,
98 version: Option<String>,
99 ) -> Self {
100 Self {
101 source_type,
102 source_id: source_id.into(),
103 resolver_profile: resolver_profile.into(),
104 version,
105 }
106 }
107
108 pub fn digest(&self) -> [u8; 32] {
119 let mut hasher = Sha256::new();
120 hasher.update(self.source_type.as_wire_str().as_bytes());
121 hasher.update([0x00]);
122 hasher.update(self.source_id.as_bytes());
123 hasher.update([0x00]);
124 hasher.update(self.resolver_profile.as_bytes());
125 hasher.update([0x00]);
126 if let Some(v) = &self.version {
127 hasher.update(v.as_bytes());
128 }
129 hasher.finalize().into()
133 }
134
135 pub fn digest_hex(&self) -> String {
137 hex::encode(self.digest())
138 }
139}
140
141impl Ref {
142 pub fn promote(&self, resolver_profile: &str, version: Option<&str>) -> CanonicalRef {
155 let (source_type, source_id) = match self {
156 Ref::Doi(d) => (SourceType::Doi, d.as_str().to_string()),
157 Ref::Arxiv(a) => (SourceType::Arxiv, a.as_str().to_string()),
158 };
159 CanonicalRef {
160 source_type,
161 source_id,
162 resolver_profile: resolver_profile.to_string(),
163 version: version.map(str::to_string),
164 }
165 }
166}
167
168#[cfg(test)]
179#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
180mod tests {
181 use super::*;
182 use crate::{ArxivId, Doi};
183
184 fn reference_digest_hex(
188 source_type: &str,
189 source_id: &str,
190 resolver_profile: &str,
191 version: Option<&str>,
192 ) -> String {
193 let mut buf: Vec<u8> = Vec::new();
194 buf.extend_from_slice(source_type.as_bytes());
195 buf.push(0x00);
196 buf.extend_from_slice(source_id.as_bytes());
197 buf.push(0x00);
198 buf.extend_from_slice(resolver_profile.as_bytes());
199 buf.push(0x00);
200 if let Some(v) = version {
201 buf.extend_from_slice(v.as_bytes());
202 }
203 let d = Sha256::digest(&buf);
204 hex::encode(d)
205 }
206
207 #[test]
208 fn digest_matches_reference_doi_crossref_no_version() {
209 let c = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
213 let expected = reference_digest_hex("doi", "10.1234/foo", "crossref", None);
214 assert_eq!(c.digest_hex(), expected);
215 }
216
217 #[test]
218 fn digest_matches_reference_doi_unpaywall_no_version() {
219 let c = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "unpaywall", None);
220 let expected = reference_digest_hex("doi", "10.1234/foo", "unpaywall", None);
221 assert_eq!(c.digest_hex(), expected);
222 let c_cross = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
225 assert_ne!(c.digest_hex(), c_cross.digest_hex());
226 }
227
228 #[test]
229 fn digest_matches_reference_doi_oa_publisher_no_version() {
230 let c = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "oa-publisher", None);
231 let expected = reference_digest_hex("doi", "10.1234/foo", "oa-publisher", None);
232 assert_eq!(c.digest_hex(), expected);
233 }
234
235 #[test]
236 fn digest_matches_reference_arxiv_no_version() {
237 let c = CanonicalRef::new(SourceType::Arxiv, "2401.12345", "arxiv", None);
238 let expected = reference_digest_hex("arxiv", "2401.12345", "arxiv", None);
239 assert_eq!(c.digest_hex(), expected);
240 }
241
242 #[test]
243 fn digest_matches_reference_arxiv_with_version_v2() {
244 let c = CanonicalRef::new(SourceType::Arxiv, "2401.12345", "arxiv", Some("v2".into()));
245 let expected = reference_digest_hex("arxiv", "2401.12345", "arxiv", Some("v2"));
246 assert_eq!(c.digest_hex(), expected);
247 let c_none = CanonicalRef::new(SourceType::Arxiv, "2401.12345", "arxiv", None);
249 assert_ne!(c.digest_hex(), c_none.digest_hex());
250 }
251
252 #[test]
253 fn digest_matches_reference_arxiv_with_version_v10() {
254 let c = CanonicalRef::new(SourceType::Arxiv, "2401.12345", "arxiv", Some("v10".into()));
255 let expected = reference_digest_hex("arxiv", "2401.12345", "arxiv", Some("v10"));
256 assert_eq!(c.digest_hex(), expected);
257 }
258
259 #[test]
260 fn digest_matches_reference_doi_crossref_with_snapshot_date() {
261 let c = CanonicalRef::new(
262 SourceType::Doi,
263 "10.1234/foo",
264 "crossref",
265 Some("2026-05-12".into()),
266 );
267 let expected = reference_digest_hex("doi", "10.1234/foo", "crossref", Some("2026-05-12"));
268 assert_eq!(c.digest_hex(), expected);
269 }
270
271 #[test]
272 fn digest_matches_reference_real_publisher_doi() {
273 let c = CanonicalRef::new(
274 SourceType::Doi,
275 "10.1103/PhysRevLett.130.200601",
276 "oa-publisher",
277 None,
278 );
279 let expected = reference_digest_hex(
280 "doi",
281 "10.1103/PhysRevLett.130.200601",
282 "oa-publisher",
283 None,
284 );
285 assert_eq!(c.digest_hex(), expected);
286 }
287
288 #[test]
289 fn digest_some_empty_string_version_equals_none_version() {
290 let c_some_empty = CanonicalRef::new(
294 SourceType::Doi,
295 "10.1234/foo",
296 "crossref",
297 Some(String::new()),
298 );
299 let c_none = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
300 assert_eq!(c_some_empty.digest_hex(), c_none.digest_hex());
301 }
302
303 #[test]
304 fn digest_matches_reference_old_style_arxiv() {
305 let c = CanonicalRef::new(SourceType::Arxiv, "cond-mat/9501001", "arxiv", None);
306 let expected = reference_digest_hex("arxiv", "cond-mat/9501001", "arxiv", None);
307 assert_eq!(c.digest_hex(), expected);
308 }
309
310 #[test]
311 fn digest_hex_is_64_lowercase_hex_chars() {
312 let c = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
314 let s = c.digest_hex();
315 assert_eq!(s.len(), 64);
316 assert!(
317 s.chars()
318 .all(|ch| ch.is_ascii_hexdigit() && !ch.is_ascii_uppercase()),
319 "digest_hex must be lowercase ASCII hex, got {s}"
320 );
321 }
322
323 #[test]
324 fn ref_promote_doi_round_trip() {
325 let r = Ref::Doi(Doi("10.1234/foo".into()));
328 let c = r.promote("crossref", None);
329 assert!(matches!(c.source_type, SourceType::Doi));
330 assert_eq!(c.source_id, "10.1234/foo");
331 assert_eq!(c.resolver_profile, "crossref");
332 assert!(c.version.is_none());
333 }
334
335 #[test]
336 fn ref_promote_arxiv_with_version_round_trip() {
337 let r = Ref::Arxiv(ArxivId("2401.12345".into()));
338 let c = r.promote("arxiv", Some("v2"));
339 assert!(matches!(c.source_type, SourceType::Arxiv));
340 assert_eq!(c.source_id, "2401.12345");
341 assert_eq!(c.resolver_profile, "arxiv");
342 assert_eq!(c.version.as_deref(), Some("v2"));
343 }
344
345 #[test]
346 fn ref_promote_then_digest_matches_direct_construction() {
347 let r = Ref::Doi(Doi("10.1234/foo".into()));
348 let c_promoted = r.promote("crossref", None);
349 let c_direct = CanonicalRef::new(SourceType::Doi, "10.1234/foo", "crossref", None);
350 assert_eq!(c_promoted.digest_hex(), c_direct.digest_hex());
351 }
352
353 #[test]
354 fn source_type_serializes_lowercase() {
355 let s = serde_json::to_string(&SourceType::Doi).expect("serialize");
358 assert_eq!(s, "\"doi\"");
359 let a = serde_json::to_string(&SourceType::Arxiv).expect("serialize");
360 assert_eq!(a, "\"arxiv\"");
361 }
362}