1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
8#[serde(rename_all = "lowercase")]
9pub enum SourceType {
10 Arxiv,
11 PubMed,
12 BioRxiv,
13 MedRxiv,
14 SemanticScholar,
15 OpenAlex,
16 CrossRef,
17 IACR,
18 PMC,
19 HAL,
20 DBLP,
21 SSRN,
22 GoogleScholar,
23 SciHub,
24 CORE,
25 EuropePMC,
26 Dimensions,
27 IeeeXplore,
28 Zenodo,
29 Unpaywall,
30 MDPI,
31 Jstor,
32 Scispace,
33 Acm,
34 ConnectedPapers,
35 Doaj,
36 WorldWideScience,
37 Osf,
38 Base,
39 Springer,
40 #[serde(untagged)]
41 Other(String),
42}
43
44impl SourceType {
45 pub fn name(&self) -> &str {
47 match self {
48 SourceType::Arxiv => "arXiv",
49 SourceType::PubMed => "PubMed",
50 SourceType::BioRxiv => "bioRxiv",
51 SourceType::MedRxiv => "medRxiv",
52 SourceType::SemanticScholar => "Semantic Scholar",
53 SourceType::OpenAlex => "OpenAlex",
54 SourceType::CrossRef => "CrossRef",
55 SourceType::IACR => "IACR ePrint",
56 SourceType::PMC => "PubMed Central",
57 SourceType::HAL => "HAL",
58 SourceType::DBLP => "DBLP",
59 SourceType::SSRN => "SSRN",
60 SourceType::GoogleScholar => "Google Scholar",
61 SourceType::SciHub => "Sci-Hub",
62 SourceType::CORE => "CORE",
63 SourceType::EuropePMC => "Europe PMC",
64 SourceType::Dimensions => "Dimensions",
65 SourceType::IeeeXplore => "IEEE Xplore",
66 SourceType::Zenodo => "Zenodo",
67 SourceType::Unpaywall => "Unpaywall",
68 SourceType::MDPI => "MDPI",
69 SourceType::Jstor => "JSTOR",
70 SourceType::Scispace => "SciSpace",
71 SourceType::Acm => "ACM Digital Library",
72 SourceType::ConnectedPapers => "Connected Papers",
73 SourceType::Doaj => "DOAJ",
74 SourceType::WorldWideScience => "WorldWideScience",
75 SourceType::Osf => "OSF Preprints",
76 SourceType::Base => "BASE",
77 SourceType::Springer => "Springer",
78 SourceType::Other(s) => s,
79 }
80 }
81
82 pub fn id(&self) -> &str {
84 match self {
85 SourceType::Arxiv => "arxiv",
86 SourceType::PubMed => "pubmed",
87 SourceType::BioRxiv => "biorxiv",
88 SourceType::MedRxiv => "medrxiv",
89 SourceType::SemanticScholar => "semantic",
90 SourceType::OpenAlex => "openalex",
91 SourceType::CrossRef => "crossref",
92 SourceType::IACR => "iacr",
93 SourceType::PMC => "pmc",
94 SourceType::HAL => "hal",
95 SourceType::DBLP => "dblp",
96 SourceType::SSRN => "ssrn",
97 SourceType::GoogleScholar => "google_scholar",
98 SourceType::SciHub => "sci_hub",
99 SourceType::CORE => "core",
100 SourceType::EuropePMC => "europe_pmc",
101 SourceType::Dimensions => "dimensions",
102 SourceType::IeeeXplore => "ieee_xplore",
103 SourceType::Zenodo => "zenodo",
104 SourceType::Unpaywall => "unpaywall",
105 SourceType::MDPI => "mdpi",
106 SourceType::Jstor => "jstor",
107 SourceType::Scispace => "scispace",
108 SourceType::Acm => "acm",
109 SourceType::ConnectedPapers => "connected_papers",
110 SourceType::Doaj => "doaj",
111 SourceType::WorldWideScience => "worldwidescience",
112 SourceType::Osf => "osf",
113 SourceType::Base => "base",
114 SourceType::Springer => "springer",
115 SourceType::Other(s) => s,
116 }
117 }
118}
119
120impl std::fmt::Display for SourceType {
121 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122 write!(f, "{}", self.name())
123 }
124}
125
126#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct Paper {
132 pub paper_id: String,
134
135 pub title: String,
137
138 pub authors: String,
140
141 pub r#abstract: String,
143
144 pub doi: Option<String>,
146
147 pub published_date: Option<String>,
149
150 pub updated_date: Option<String>,
152
153 pub pdf_url: Option<String>,
155
156 pub url: String,
158
159 pub source: SourceType,
161
162 pub categories: Option<String>,
164
165 pub keywords: Option<String>,
167
168 pub citations: Option<u32>,
170
171 pub references: Option<String>,
173
174 pub extra: Option<HashMap<String, serde_json::Value>>,
176}
177
178impl Paper {
179 pub fn new(paper_id: String, title: String, url: String, source: SourceType) -> Self {
181 Self {
182 paper_id,
183 title,
184 authors: String::new(),
185 r#abstract: String::new(),
186 doi: None,
187 published_date: None,
188 updated_date: None,
189 pdf_url: None,
190 url,
191 source,
192 categories: None,
193 keywords: None,
194 citations: None,
195 references: None,
196 extra: None,
197 }
198 }
199
200 pub fn primary_id(&self) -> &str {
202 self.doi.as_ref().unwrap_or(&self.paper_id)
203 }
204
205 pub fn author_list(&self) -> Vec<&str> {
207 self.authors
208 .split(';')
209 .map(|s| s.trim())
210 .filter(|s| !s.is_empty())
211 .collect()
212 }
213
214 pub fn category_list(&self) -> Vec<&str> {
216 self.categories
217 .as_ref()
218 .map(|c| {
219 c.split(';')
220 .map(|s| s.trim())
221 .filter(|s| !s.is_empty())
222 .collect()
223 })
224 .unwrap_or_default()
225 }
226
227 pub fn keyword_list(&self) -> Vec<&str> {
229 self.keywords
230 .as_ref()
231 .map(|k| {
232 k.split(';')
233 .map(|s| s.trim())
234 .filter(|s| !s.is_empty())
235 .collect()
236 })
237 .unwrap_or_default()
238 }
239
240 pub fn has_pdf(&self) -> bool {
242 self.pdf_url.is_some()
243 }
244}
245
246#[derive(Debug, Clone)]
248pub struct PaperBuilder {
249 paper: Paper,
250}
251
252impl PaperBuilder {
253 pub fn new(
255 paper_id: impl Into<String>,
256 title: impl Into<String>,
257 url: impl Into<String>,
258 source: SourceType,
259 ) -> Self {
260 Self {
261 paper: Paper::new(paper_id.into(), title.into(), url.into(), source),
262 }
263 }
264
265 pub fn authors(mut self, authors: impl Into<String>) -> Self {
267 self.paper.authors = authors.into();
268 self
269 }
270
271 pub fn abstract_text(mut self, abstract_text: impl Into<String>) -> Self {
273 self.paper.r#abstract = abstract_text.into();
274 self
275 }
276
277 pub fn doi(mut self, doi: impl Into<String>) -> Self {
279 self.paper.doi = Some(doi.into());
280 self
281 }
282
283 pub fn published_date(mut self, date: impl Into<String>) -> Self {
285 self.paper.published_date = Some(date.into());
286 self
287 }
288
289 pub fn updated_date(mut self, date: impl Into<String>) -> Self {
291 self.paper.updated_date = Some(date.into());
292 self
293 }
294
295 pub fn pdf_url(mut self, url: impl Into<String>) -> Self {
297 self.paper.pdf_url = Some(url.into());
298 self
299 }
300
301 pub fn categories(mut self, categories: impl Into<String>) -> Self {
303 self.paper.categories = Some(categories.into());
304 self
305 }
306
307 pub fn keywords(mut self, keywords: impl Into<String>) -> Self {
309 self.paper.keywords = Some(keywords.into());
310 self
311 }
312
313 pub fn citations(mut self, count: u32) -> Self {
315 self.paper.citations = Some(count);
316 self
317 }
318
319 pub fn references(mut self, references: impl Into<String>) -> Self {
321 self.paper.references = Some(references.into());
322 self
323 }
324
325 pub fn extra(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
327 self.paper
328 .extra
329 .get_or_insert_with(HashMap::new)
330 .insert(key.into(), value);
331 self
332 }
333
334 pub fn build(self) -> Paper {
336 self.paper
337 }
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343
344 #[test]
345 fn test_paper_builder() {
346 let paper = PaperBuilder::new(
347 "1234.5678",
348 "Test Paper",
349 "https://example.com",
350 SourceType::Arxiv,
351 )
352 .authors("John Doe; Jane Smith")
353 .abstract_text("This is a test abstract.")
354 .doi("10.1234/test.1234")
355 .pdf_url("https://example.com/paper.pdf")
356 .citations(42)
357 .build();
358
359 assert_eq!(paper.paper_id, "1234.5678");
360 assert_eq!(paper.title, "Test Paper");
361 assert_eq!(paper.authors, "John Doe; Jane Smith");
362 assert_eq!(paper.doi, Some("10.1234/test.1234".to_string()));
363 assert_eq!(paper.citations, Some(42));
364 }
365
366 #[test]
367 fn test_author_list() {
368 let paper = PaperBuilder::new(
369 "1234".to_string(),
370 "Test".to_string(),
371 "https://example.com".to_string(),
372 SourceType::Arxiv,
373 )
374 .authors("John Doe; Jane Smith; Bob Jones")
375 .build();
376
377 let authors = paper.author_list();
378 assert_eq!(authors, vec!["John Doe", "Jane Smith", "Bob Jones"]);
379 }
380
381 #[test]
382 fn test_primary_id() {
383 let with_doi = PaperBuilder::new(
384 "1234".to_string(),
385 "Test".to_string(),
386 "https://example.com".to_string(),
387 SourceType::Arxiv,
388 )
389 .doi("10.1234/test")
390 .build();
391
392 assert_eq!(with_doi.primary_id(), "10.1234/test");
393
394 let without_doi = Paper::new(
395 "1234".to_string(),
396 "Test".to_string(),
397 "https://example.com".to_string(),
398 SourceType::Arxiv,
399 );
400
401 assert_eq!(without_doi.primary_id(), "1234");
402 }
403
404 #[test]
405 fn test_paper_builder_all_fields() {
406 let paper = PaperBuilder::new(
407 "PMC12345",
408 "Medical Research Paper",
409 "https://pubmed.ncbi.nlm.nih.gov/12345/",
410 SourceType::PubMed,
411 )
412 .authors("Alice Johnson; Bob Williams")
413 .abstract_text("This is a medical abstract.")
414 .doi("10.1000/abc123")
415 .pdf_url("https://example.com/fulltext.pdf")
416 .published_date("2023-05-15")
417 .categories("Medicine;Biology")
418 .keywords("gene therapy;CRISPR")
419 .citations(100)
420 .references("ref1;ref2")
421 .build();
422
423 assert_eq!(paper.paper_id, "PMC12345");
424 assert_eq!(paper.title, "Medical Research Paper");
425 assert_eq!(paper.source, SourceType::PubMed);
426 assert_eq!(paper.authors, "Alice Johnson; Bob Williams");
427 assert_eq!(paper.doi, Some("10.1000/abc123".to_string()));
428 assert_eq!(paper.published_date, Some("2023-05-15".to_string()));
429 assert_eq!(paper.categories, Some("Medicine;Biology".to_string()));
430 assert_eq!(paper.keywords, Some("gene therapy;CRISPR".to_string()));
431 assert_eq!(paper.citations, Some(100));
432 assert_eq!(paper.references, Some("ref1;ref2".to_string()));
433 }
434
435 #[test]
436 fn test_paper_builder_empty_authors() {
437 let paper = PaperBuilder::new(
438 "1234",
439 "Anonymous Paper",
440 "https://example.com",
441 SourceType::Arxiv,
442 )
443 .authors("")
444 .build();
445
446 let authors = paper.author_list();
447 assert!(authors.is_empty());
448 }
449
450 #[test]
451 fn test_paper_builder_minimal() {
452 let paper = PaperBuilder::new(
453 "minimal",
454 "Minimal Paper",
455 "https://example.com",
456 SourceType::SemanticScholar,
457 )
458 .build();
459
460 assert_eq!(paper.paper_id, "minimal");
461 assert_eq!(paper.title, "Minimal Paper");
462 assert!(paper.authors.is_empty());
463 assert!(paper.doi.is_none());
464 assert!(paper.r#abstract.is_empty());
465 }
466
467 #[test]
468 fn test_paper_with_pdf() {
469 let paper = PaperBuilder::new(
470 "1234",
471 "Paper with PDF",
472 "https://example.com",
473 SourceType::Arxiv,
474 )
475 .pdf_url("https://arxiv.org/pdf/1234.pdf")
476 .build();
477
478 assert!(paper.has_pdf());
479 assert!(paper.pdf_url.is_some());
480 }
481
482 #[test]
483 fn test_paper_without_pdf() {
484 let paper = Paper::new(
485 "1234".to_string(),
486 "Paper without PDF".to_string(),
487 "https://example.com".to_string(),
488 SourceType::Arxiv,
489 );
490
491 assert!(!paper.has_pdf());
492 }
493
494 #[test]
495 fn test_category_list() {
496 let paper = PaperBuilder::new("1234", "Test", "https://example.com", SourceType::Arxiv)
497 .categories("cs.AI;cs.LG")
498 .build();
499
500 let categories = paper.category_list();
501 assert_eq!(categories, vec!["cs.AI", "cs.LG"]);
502 }
503
504 #[test]
505 fn test_keyword_list() {
506 let paper = PaperBuilder::new("1234", "Test", "https://example.com", SourceType::Arxiv)
507 .keywords("neural networks;deep learning")
508 .build();
509
510 let keywords = paper.keyword_list();
511 assert_eq!(keywords, vec!["neural networks", "deep learning"]);
512 }
513}