Skip to main content

crossref_xml/journal/
metadata.rs

1use crate::{
2    enums::{Archive, ContentVersion, Iso639_1, MediaType},
3    regex::{DOI_REGEX, RESOURCE_URL_REGEX},
4    serializers::*,
5};
6use serde::Serialize;
7use validator::Validate;
8
9#[derive(Debug, Clone, Default, Serialize, Validate)]
10pub struct JournalMetadata {
11    #[serde(rename = "@language")]
12    pub lang: Iso639_1,
13    #[validate(length(min = 1, max = 255))]
14    pub full_title: String,
15    #[serde(skip_serializing_if = "Option::is_none")]
16    #[validate(length(min = 1, max = 150))]
17    pub abbrev_title: Option<String>,
18    #[serde(skip_serializing_if = "Option::is_none")]
19    #[validate(nested)]
20    pub issn: Option<Issn>,
21    // https://en.wikipedia.org/wiki/CODEN
22    #[serde(skip_serializing_if = "Option::is_none")]
23    #[validate(length(min = 1, max = 6))]
24    pub coden: Option<String>,
25    #[serde(skip_serializing_if = "ArchiveLocations::is_empty")]
26    #[validate(nested)]
27    pub archive_locations: ArchiveLocations,
28    #[validate(nested)]
29    pub doi_data: DoiData,
30}
31
32#[derive(Debug, Clone, Default, Serialize, Validate)]
33pub struct ArchiveLocations {
34    #[serde(default, skip_serializing_if = "Vec::is_empty")]
35    pub archive: Vec<Archive>,
36}
37
38impl ArchiveLocations {
39    pub fn is_empty(&self) -> bool {
40        self.archive.is_empty()
41    }
42}
43
44#[derive(Debug, Clone, Default, Serialize, Validate)]
45pub struct Issn {
46    #[serde(default, rename = "@media_type")]
47    pub media_type: MediaType,
48    #[validate(length(min = 8, max = 9))]
49    #[serde(rename = "$text")]
50    pub value: String,
51}
52
53#[derive(Debug, Clone, Default, Serialize, Validate)]
54pub struct DoiData {
55    #[validate(length(min = 6, max = 2048), regex(path = *DOI_REGEX))]
56    pub doi: String,
57    #[serde(serialize_with = "serialize_timestamp")]
58    pub timestamp: (),
59    #[validate(nested)]
60    pub resource: Resource,
61    // TODO: collection: Vec<Collection>
62}
63
64#[derive(Debug, Clone, Default, Serialize, Validate)]
65pub struct Resource {
66    #[validate(length(min = 1, max = 2048), regex(path = *RESOURCE_URL_REGEX))]
67    #[serde(rename = "$text")]
68    pub value: String,
69    #[serde(skip_serializing_if = "Option::is_none")]
70    #[serde(rename = "@mime_type")]
71    pub mime_type: Option<String>,
72    #[serde(default, rename = "@content_version")]
73    pub content_version: ContentVersion,
74}
75
76#[cfg(test)]
77mod unit {
78    use super::*;
79
80    mod resource {
81        use super::*;
82
83        fn valid_resource() -> Resource {
84            Resource {
85                value: "https://example.com/article".to_string(),
86                mime_type: Some("text/html".to_string()),
87                content_version: ContentVersion::Vor,
88            }
89        }
90
91        #[test]
92        fn valid_resource_passes() {
93            let resource = valid_resource();
94            assert!(resource.validate().is_ok());
95        }
96
97        #[test]
98        fn value_too_short_fails() {
99            let mut resource = valid_resource();
100            resource.value = "".to_string();
101            assert!(resource.validate().is_err());
102        }
103
104        #[test]
105        fn value_too_long_fails() {
106            let mut resource = valid_resource();
107            resource.value = format!("https://example.com/{}", "a".repeat(2030));
108            assert!(resource.validate().is_err());
109        }
110
111        #[test]
112        fn value_invalid_url_fails() {
113            let mut resource = valid_resource();
114            resource.value = "not-a-url".to_string();
115            assert!(resource.validate().is_err());
116        }
117
118        #[test]
119        fn value_at_max_length_passes() {
120            let mut resource = valid_resource();
121            // 2048 total chars
122            resource.value = format!("https://example.com/{}", "a".repeat(2028));
123            assert!(resource.validate().is_ok());
124        }
125
126        #[test]
127        fn various_valid_urls_pass() {
128            let urls = [
129                "http://example.com",
130                "https://example.com",
131                "ftp://files.example.com",
132                "https://example.com/path/to/resource?query=1",
133            ];
134            for url in urls {
135                let resource = Resource {
136                    value: url.to_string(),
137                    mime_type: None,
138                    content_version: ContentVersion::Vor,
139                };
140                assert!(resource.validate().is_ok(), "URL should be valid: {}", url);
141            }
142        }
143    }
144
145    mod doi_data {
146        use super::*;
147
148        fn valid_doi_data() -> DoiData {
149            DoiData {
150                doi: "10.1234/example.2024.001".to_string(),
151                timestamp: (),
152                resource: Resource {
153                    value: "https://example.com/article".to_string(),
154                    mime_type: None,
155                    content_version: ContentVersion::Vor,
156                },
157            }
158        }
159
160        #[test]
161        fn valid_doi_data_passes() {
162            let doi_data = valid_doi_data();
163            assert!(doi_data.validate().is_ok());
164        }
165
166        #[test]
167        fn doi_too_short_fails() {
168            let mut doi_data = valid_doi_data();
169            doi_data.doi = "10.12".to_string(); // 5 chars
170            assert!(doi_data.validate().is_err());
171        }
172
173        #[test]
174        fn doi_too_long_fails() {
175            let mut doi_data = valid_doi_data();
176            doi_data.doi = format!("10.1234/{}", "a".repeat(2043)); // > 2048 chars
177            assert!(doi_data.validate().is_err());
178        }
179
180        #[test]
181        fn doi_invalid_format_fails() {
182            let mut doi_data = valid_doi_data();
183            doi_data.doi = "invalid-doi".to_string();
184            assert!(doi_data.validate().is_err());
185        }
186
187        #[test]
188        fn doi_wrong_prefix_fails() {
189            let mut doi_data = valid_doi_data();
190            doi_data.doi = "11.1234/example".to_string();
191            assert!(doi_data.validate().is_err());
192        }
193
194        #[test]
195        fn doi_at_min_length_passes() {
196            let mut doi_data = valid_doi_data();
197            doi_data.doi = "10.1234/a".to_string(); // 10 chars total
198            assert!(doi_data.validate().is_ok());
199        }
200
201        #[test]
202        fn doi_at_max_length_passes() {
203            let mut doi_data = valid_doi_data();
204            doi_data.doi = format!("10.1234/{}", "a".repeat(200)); // exactly 2048 chars
205            assert!(doi_data.validate().is_ok());
206        }
207
208        #[test]
209        fn various_valid_dois_pass() {
210            let dois = [
211                "10.1000/xyz123",
212                "10.1016/j.cell.2020.01.001",
213                "10.1038/nature12373",
214                "10.1002/(SICI)1097-4636(199905)45:2<133::AID-JBM9>3.0.CO;2-T",
215            ];
216            for doi in dois {
217                let mut doi_data = valid_doi_data();
218                doi_data.doi = doi.to_string();
219                assert!(doi_data.validate().is_ok(), "DOI should be valid: {}", doi);
220            }
221        }
222    }
223
224    mod issn {
225        use super::*;
226
227        fn valid_issn() -> Issn {
228            Issn {
229                media_type: MediaType::Print,
230                value: "1234-5678".to_string(), // 9 chars with dash
231            }
232        }
233
234        #[test]
235        fn valid_issn_passes() {
236            let issn = valid_issn();
237            assert!(issn.validate().is_ok());
238        }
239
240        #[test]
241        fn value_too_short_fails() {
242            let mut issn = valid_issn();
243            issn.value = "1234567".to_string(); // 7 chars
244            assert!(issn.validate().is_err());
245        }
246
247        #[test]
248        fn value_too_long_fails() {
249            let mut issn = valid_issn();
250            issn.value = "1234-56789".to_string(); // 10 chars
251            assert!(issn.validate().is_err());
252        }
253
254        #[test]
255        fn value_at_min_length_passes() {
256            let mut issn = valid_issn();
257            issn.value = "12345678".to_string(); // exactly 8 chars
258            assert!(issn.validate().is_ok());
259        }
260
261        #[test]
262        fn value_at_max_length_passes() {
263            let mut issn = valid_issn();
264            issn.value = "123456789".to_string(); // exactly 9 chars
265            assert!(issn.validate().is_ok());
266        }
267
268        #[test]
269        fn electronic_media_type_passes() {
270            let issn = Issn {
271                media_type: MediaType::Electronic,
272                value: "1234-5678".to_string(),
273            };
274            assert!(issn.validate().is_ok());
275        }
276    }
277
278    mod archive_locations {
279        use super::*;
280        use crate::enums::Archive;
281
282        #[test]
283        fn empty_archive_locations_passes() {
284            let locations = ArchiveLocations::default();
285            assert!(locations.validate().is_ok());
286        }
287
288        #[test]
289        fn is_empty_returns_true_for_empty() {
290            let locations = ArchiveLocations::default();
291            assert!(locations.is_empty());
292        }
293
294        #[test]
295        fn is_empty_returns_false_for_non_empty() {
296            let locations = ArchiveLocations {
297                archive: vec![Archive::CLOCKSS],
298            };
299            assert!(!locations.is_empty());
300        }
301
302        #[test]
303        fn archive_with_values_passes() {
304            let locations = ArchiveLocations {
305                archive: vec![Archive::CLOCKSS, Archive::Portico, Archive::LOCKSS],
306            };
307            assert!(locations.validate().is_ok());
308        }
309    }
310
311    mod journal_metadata {
312        use super::*;
313
314        fn valid_journal_metadata() -> JournalMetadata {
315            JournalMetadata {
316                lang: Iso639_1::En,
317                full_title: "Journal of Example Research".to_string(),
318                abbrev_title: Some("J. Ex. Res.".to_string()),
319                issn: Some(Issn {
320                    media_type: MediaType::Electronic,
321                    value: "1234-5678".to_string(),
322                }),
323                coden: Some("JEXR".to_string()),
324                archive_locations: ArchiveLocations::default(),
325                doi_data: DoiData {
326                    doi: "10.1234/journal.example".to_string(),
327                    timestamp: (),
328                    resource: Resource {
329                        value: "https://example.com/journal".to_string(),
330                        mime_type: None,
331                        content_version: ContentVersion::Vor,
332                    },
333                },
334            }
335        }
336
337        #[test]
338        fn valid_journal_metadata_passes() {
339            let metadata = valid_journal_metadata();
340            assert!(metadata.validate().is_ok());
341        }
342
343        #[test]
344        fn full_title_too_short_fails() {
345            let mut metadata = valid_journal_metadata();
346            metadata.full_title = "".to_string();
347            assert!(metadata.validate().is_err());
348        }
349
350        #[test]
351        fn full_title_too_long_fails() {
352            let mut metadata = valid_journal_metadata();
353            metadata.full_title = "a".repeat(256);
354            assert!(metadata.validate().is_err());
355        }
356
357        #[test]
358        fn full_title_at_min_length_passes() {
359            let mut metadata = valid_journal_metadata();
360            metadata.full_title = "A".to_string();
361            assert!(metadata.validate().is_ok());
362        }
363
364        #[test]
365        fn full_title_at_max_length_passes() {
366            let mut metadata = valid_journal_metadata();
367            metadata.full_title = "a".repeat(255);
368            assert!(metadata.validate().is_ok());
369        }
370
371        #[test]
372        fn abbrev_title_too_short_fails() {
373            let mut metadata = valid_journal_metadata();
374            metadata.abbrev_title = Some("".to_string());
375            assert!(metadata.validate().is_err());
376        }
377
378        #[test]
379        fn abbrev_title_too_long_fails() {
380            let mut metadata = valid_journal_metadata();
381            metadata.abbrev_title = Some("a".repeat(151));
382            assert!(metadata.validate().is_err());
383        }
384
385        #[test]
386        fn abbrev_title_at_min_length_passes() {
387            let mut metadata = valid_journal_metadata();
388            metadata.abbrev_title = Some("A".to_string());
389            assert!(metadata.validate().is_ok());
390        }
391
392        #[test]
393        fn abbrev_title_at_max_length_passes() {
394            let mut metadata = valid_journal_metadata();
395            metadata.abbrev_title = Some("a".repeat(150));
396            assert!(metadata.validate().is_ok());
397        }
398
399        #[test]
400        fn abbrev_title_none_passes() {
401            let mut metadata = valid_journal_metadata();
402            metadata.abbrev_title = None;
403            assert!(metadata.validate().is_ok());
404        }
405
406        #[test]
407        fn coden_too_short_fails() {
408            let mut metadata = valid_journal_metadata();
409            metadata.coden = Some("".to_string());
410            assert!(metadata.validate().is_err());
411        }
412
413        #[test]
414        fn coden_too_long_fails() {
415            let mut metadata = valid_journal_metadata();
416            metadata.coden = Some("ABCDEFG".to_string()); // 7 chars
417            assert!(metadata.validate().is_err());
418        }
419
420        #[test]
421        fn coden_at_min_length_passes() {
422            let mut metadata = valid_journal_metadata();
423            metadata.coden = Some("A".to_string());
424            assert!(metadata.validate().is_ok());
425        }
426
427        #[test]
428        fn coden_at_max_length_passes() {
429            let mut metadata = valid_journal_metadata();
430            metadata.coden = Some("ABCDEF".to_string()); // 6 chars
431            assert!(metadata.validate().is_ok());
432        }
433
434        #[test]
435        fn coden_none_passes() {
436            let mut metadata = valid_journal_metadata();
437            metadata.coden = None;
438            assert!(metadata.validate().is_ok());
439        }
440
441        #[test]
442        fn issn_none_passes() {
443            let mut metadata = valid_journal_metadata();
444            metadata.issn = None;
445            assert!(metadata.validate().is_ok());
446        }
447
448        #[test]
449        fn invalid_issn_fails() {
450            let mut metadata = valid_journal_metadata();
451            metadata.issn = Some(Issn {
452                media_type: MediaType::Print,
453                value: "123".to_string(), // too short
454            });
455            assert!(metadata.validate().is_err());
456        }
457
458        #[test]
459        fn invalid_doi_data_fails() {
460            let mut metadata = valid_journal_metadata();
461            metadata.doi_data.doi = "invalid".to_string();
462            assert!(metadata.validate().is_err());
463        }
464
465        #[test]
466        fn minimal_valid_metadata_passes() {
467            let metadata = JournalMetadata {
468                lang: Iso639_1::En,
469                full_title: "J".to_string(),
470                abbrev_title: None,
471                issn: None,
472                coden: None,
473                archive_locations: ArchiveLocations::default(),
474                doi_data: DoiData {
475                    doi: "10.1234/a".to_string(),
476                    timestamp: (),
477                    resource: Resource {
478                        value: "https://example.com".to_string(),
479                        mime_type: None,
480                        content_version: ContentVersion::Vor,
481                    },
482                },
483            };
484            assert!(metadata.validate().is_ok());
485        }
486
487        #[test]
488        fn different_languages_pass() {
489            let languages = [
490                Iso639_1::En,
491                Iso639_1::Es,
492                Iso639_1::Fr,
493                Iso639_1::De,
494                Iso639_1::Zh,
495            ];
496            for lang in languages {
497                let mut metadata = valid_journal_metadata();
498                metadata.lang = lang;
499                assert!(
500                    metadata.validate().is_ok(),
501                    "Language {:?} should be valid",
502                    lang
503                );
504            }
505        }
506    }
507}