Skip to main content

scitadel_core/models/
paper.rs

1use std::collections::HashMap;
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5
6use super::PaperId;
7
8/// Canonical, deduplicated paper record.
9///
10/// A paper exists once regardless of how many searches found it.
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
12pub struct Paper {
13    pub id: PaperId,
14    pub title: String,
15    pub authors: Vec<String>,
16    #[serde(default)]
17    pub r#abstract: String,
18    pub full_text: Option<String>,
19    pub summary: Option<String>,
20    pub doi: Option<String>,
21    pub arxiv_id: Option<String>,
22    pub pubmed_id: Option<String>,
23    pub inspire_id: Option<String>,
24    pub openalex_id: Option<String>,
25    pub year: Option<i32>,
26    pub journal: Option<String>,
27    pub url: Option<String>,
28    #[serde(default)]
29    pub source_urls: HashMap<String, String>,
30    pub created_at: DateTime<Utc>,
31    pub updated_at: DateTime<Utc>,
32    /// Absolute path to the locally downloaded file (PDF/HTML), if any.
33    /// Populated by the download pipeline; `None` until first successful
34    /// download attempt. See #112.
35    #[serde(default)]
36    pub local_path: Option<String>,
37    /// Outcome of the most recent download attempt. `None` = never tried.
38    #[serde(default)]
39    pub download_status: Option<DownloadStatus>,
40    /// Wall-clock time of the most recent download attempt. Together with
41    /// `download_status` lets the UI distinguish "fresh failure" from
42    /// "tried weeks ago, retry might work".
43    #[serde(default)]
44    pub last_attempt_at: Option<DateTime<Utc>>,
45    /// Stable citation key used in BibTeX / BibLaTeX export (#132).
46    /// Assigned on first encounter via the Better-BibTeX-style
47    /// algorithm in `scitadel-export::bibtex::generate_key` and frozen
48    /// thereafter — the freeze contract is why we persist it rather
49    /// than recompute. `None` means the paper predates migration 009
50    /// and will be backfilled on next `Database::migrate` call.
51    #[serde(default)]
52    pub bibtex_key: Option<String>,
53}
54
55impl Paper {
56    #[must_use]
57    pub fn new(title: impl Into<String>) -> Self {
58        let now = Utc::now();
59        Self {
60            id: PaperId::new(),
61            title: title.into(),
62            authors: Vec::new(),
63            r#abstract: String::new(),
64            full_text: None,
65            summary: None,
66            doi: None,
67            arxiv_id: None,
68            pubmed_id: None,
69            inspire_id: None,
70            openalex_id: None,
71            year: None,
72            journal: None,
73            url: None,
74            source_urls: HashMap::new(),
75            created_at: now,
76            updated_at: now,
77            local_path: None,
78            download_status: None,
79            last_attempt_at: None,
80            bibtex_key: None,
81        }
82    }
83}
84
85/// Outcome of a paper download attempt. Persisted on `papers.download_status`.
86///
87/// `Downloaded` means the adapter classified the fetched bytes as full
88/// content. `Paywall` means we got bytes but they're an HTML stub /
89/// abstract / paywall page — file exists but doesn't contain the paper.
90/// `Failed` means the download itself errored (network, 404, etc.).
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum DownloadStatus {
94    Downloaded,
95    Paywall,
96    Failed,
97}
98
99impl DownloadStatus {
100    /// SQL-friendly string used in the `download_status` text column.
101    #[must_use]
102    pub fn as_str(self) -> &'static str {
103        match self {
104            Self::Downloaded => "downloaded",
105            Self::Paywall => "paywall",
106            Self::Failed => "failed",
107        }
108    }
109
110    /// Inverse of `as_str`. Returns `None` for unknown values so a
111    /// stale row from a future schema doesn't crash the loader.
112    #[must_use]
113    pub fn parse(s: &str) -> Option<Self> {
114        match s {
115            "downloaded" => Some(Self::Downloaded),
116            "paywall" => Some(Self::Paywall),
117            "failed" => Some(Self::Failed),
118            _ => None,
119        }
120    }
121}
122
123/// Un-deduplicated paper record from a single source adapter.
124///
125/// Adapters produce candidates; the dedup engine merges them into Papers.
126#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
127pub struct CandidatePaper {
128    pub source: String,
129    pub source_id: String,
130    pub title: String,
131    #[serde(default)]
132    pub authors: Vec<String>,
133    #[serde(default)]
134    pub r#abstract: String,
135    pub doi: Option<String>,
136    pub arxiv_id: Option<String>,
137    pub pubmed_id: Option<String>,
138    pub inspire_id: Option<String>,
139    pub openalex_id: Option<String>,
140    pub year: Option<i32>,
141    pub journal: Option<String>,
142    pub url: Option<String>,
143    pub rank: Option<i32>,
144    pub score: Option<f64>,
145    #[serde(default)]
146    pub raw_data: serde_json::Value,
147}
148
149impl CandidatePaper {
150    #[must_use]
151    pub fn new(
152        source: impl Into<String>,
153        source_id: impl Into<String>,
154        title: impl Into<String>,
155    ) -> Self {
156        Self {
157            source: source.into(),
158            source_id: source_id.into(),
159            title: title.into(),
160            authors: Vec::new(),
161            r#abstract: String::new(),
162            doi: None,
163            arxiv_id: None,
164            pubmed_id: None,
165            inspire_id: None,
166            openalex_id: None,
167            year: None,
168            journal: None,
169            url: None,
170            rank: None,
171            score: None,
172            raw_data: serde_json::Value::Null,
173        }
174    }
175}