scitadel_core/models/
doi.rs1fn is_valid_suffix_char(c: char) -> bool {
4 c.is_ascii_alphanumeric() || matches!(c, '-' | '.' | '_' | ';' | '(' | ')' | '/' | ':')
5}
6
7pub fn normalize_doi(doi: &str) -> String {
9 let trimmed = doi.trim();
10 let stripped = trimmed
11 .strip_prefix("https://doi.org/")
12 .or_else(|| trimmed.strip_prefix("http://doi.org/"))
13 .or_else(|| trimmed.strip_prefix("https://dx.doi.org/"))
14 .or_else(|| trimmed.strip_prefix("http://dx.doi.org/"))
15 .unwrap_or(trimmed);
16 stripped.to_lowercase()
17}
18
19pub fn validate_doi(doi: &str) -> bool {
27 let normalized = normalize_doi(doi);
28
29 let rest = match normalized.strip_prefix("10.") {
31 Some(r) => r,
32 None => return false,
33 };
34
35 let slash_pos = match rest.find('/') {
37 Some(pos) => pos,
38 None => return false,
39 };
40
41 let registrant = &rest[..slash_pos];
42 if registrant.len() < 4 || registrant.len() > 9 {
43 return false;
44 }
45 if !registrant.chars().all(|c| c.is_ascii_digit()) {
46 return false;
47 }
48
49 let suffix = &rest[slash_pos + 1..];
51 if suffix.is_empty() {
52 return false;
53 }
54
55 suffix.chars().all(is_valid_suffix_char)
56}
57
58pub fn doi_to_filename(doi: &str) -> String {
60 let normalized = normalize_doi(doi);
61 normalized
62 .chars()
63 .map(|c| {
64 if c.is_ascii_alphanumeric() || matches!(c, '-' | '.' | '_') {
65 c
66 } else {
67 '_'
68 }
69 })
70 .collect()
71}
72
73#[cfg(test)]
74mod tests {
75 use super::*;
76
77 #[test]
78 fn valid_dois() {
79 assert!(validate_doi("10.1038/s41586-020-2649-2"));
80 assert!(validate_doi("10.1371/journal.pone.0000000"));
81 assert!(validate_doi("10.1002/anie.200906232"));
82 assert!(validate_doi("10.1103/PhysRevLett.116.061102"));
83 assert!(validate_doi("10.48550/arXiv.2301.00001"));
84 }
85
86 #[test]
87 fn valid_with_url_prefix() {
88 assert!(validate_doi("https://doi.org/10.1038/s41586-020-2649-2"));
89 assert!(validate_doi("http://dx.doi.org/10.1002/anie.200906232"));
90 }
91
92 #[test]
93 fn valid_with_special_suffix_chars() {
94 assert!(validate_doi("10.1000/xyz_(abc)"));
95 assert!(validate_doi("10.1000/a:b;c.d_e-f"));
96 assert!(validate_doi("10.1234/sub/path/deep"));
97 }
98
99 #[test]
100 fn invalid_dois() {
101 assert!(!validate_doi(""));
102 assert!(!validate_doi("not-a-doi"));
103 assert!(!validate_doi("10.123/too-short-registrant"));
104 assert!(!validate_doi("10.1234/")); assert!(!validate_doi("10.1234")); assert!(!validate_doi("11.1234/test")); assert!(!validate_doi("10.12345678901/too-long-registrant")); assert!(!validate_doi("10.abcd/test")); }
110
111 #[test]
112 fn normalize_strips_prefix() {
113 assert_eq!(
114 normalize_doi("https://doi.org/10.1038/TEST"),
115 "10.1038/test"
116 );
117 assert_eq!(
118 normalize_doi("http://dx.doi.org/10.1038/TEST"),
119 "10.1038/test"
120 );
121 }
122
123 #[test]
124 fn normalize_lowercases() {
125 assert_eq!(normalize_doi("10.1038/ABC"), "10.1038/abc");
126 }
127
128 #[test]
129 fn filename_sanitization() {
130 assert_eq!(
131 doi_to_filename("10.1038/s41586-020-2649-2"),
132 "10.1038_s41586-020-2649-2"
133 );
134 }
135}