Skip to main content

papers_core/
selection.rs

1use std::path::PathBuf;
2
3use papers_openalex::{GetParams, ListParams};
4use papers_zotero::ItemListParams;
5use serde::{Deserialize, Serialize};
6
7// ── Error ──────────────────────────────────────────────────────────────────
8
9#[derive(thiserror::Error, Debug)]
10pub enum SelectionError {
11    #[error("no data directory available")]
12    NoDataDir,
13    #[error("selection {0:?} not found")]
14    NotFound(String),
15    #[error("selection {0:?} already exists")]
16    AlreadyExists(String),
17    #[error("no active selection; run: papers selection list")]
18    NoActiveSelection,
19    #[error("invalid selection name {0:?}: use only alphanumeric, - and _")]
20    InvalidName(String),
21    #[error("item not found in selection")]
22    ItemNotFound,
23    #[error("could not resolve paper: {0}")]
24    CannotResolve(String),
25    #[error("io error: {0}")]
26    Io(#[from] std::io::Error),
27    #[error("json error: {0}")]
28    Json(#[from] serde_json::Error),
29}
30
31// ── Data model ─────────────────────────────────────────────────────────────
32
33/// A single paper in a selection. Stores as much metadata as resolved.
34/// If `zotero_key` is None, the paper has not been matched to the local Zotero
35/// library; metadata can be used later to prompt the user to download it.
36#[derive(Serialize, Deserialize, Clone, Debug)]
37pub struct SelectionEntry {
38    pub zotero_key: Option<String>,
39    pub openalex_id: Option<String>,
40    pub doi: Option<String>,
41    pub title: Option<String>,
42    pub authors: Option<Vec<String>>,
43    pub year: Option<u32>,
44    pub issn: Option<Vec<String>>,
45    pub isbn: Option<Vec<String>>,
46}
47
48#[derive(Serialize, Deserialize, Debug)]
49pub struct Selection {
50    pub name: String,
51    pub entries: Vec<SelectionEntry>,
52}
53
54#[derive(Serialize, Deserialize, Default)]
55pub struct SelectionState {
56    pub active: Option<String>,
57}
58
59// ── Storage paths ──────────────────────────────────────────────────────────
60
61/// Returns the selections directory, allowing `PAPERS_DATA_DIR` env var override
62/// (used by tests). Falls back to `dirs::data_dir()/papers/selections`.
63pub fn selections_dir() -> Option<PathBuf> {
64    if let Ok(override_dir) = std::env::var("PAPERS_DATA_DIR") {
65        let mut p = PathBuf::from(override_dir);
66        p.push("papers");
67        p.push("selections");
68        return Some(p);
69    }
70    dirs::data_dir().map(|mut p| {
71        p.push("papers");
72        p.push("selections");
73        p
74    })
75}
76
77fn state_path() -> Option<PathBuf> {
78    selections_dir().map(|mut p| {
79        p.push("state.json");
80        p
81    })
82}
83
84fn selection_path(name: &str) -> Option<PathBuf> {
85    selections_dir().map(|mut p| {
86        p.push(format!("{name}.json"));
87        p
88    })
89}
90
91// ── Validation ─────────────────────────────────────────────────────────────
92
93pub fn validate_name(name: &str) -> Result<(), SelectionError> {
94    if name.is_empty()
95        || !name
96            .chars()
97            .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
98    {
99        Err(SelectionError::InvalidName(name.to_string()))
100    } else {
101        Ok(())
102    }
103}
104
105// ── List & resolve ─────────────────────────────────────────────────────────
106
107/// List all selection names (sorted alphabetically).
108pub fn list_selection_names() -> Vec<String> {
109    let dir = match selections_dir() {
110        Some(d) => d,
111        None => return Vec::new(),
112    };
113    if !dir.exists() {
114        return Vec::new();
115    }
116    let mut names: Vec<String> = std::fs::read_dir(&dir)
117        .ok()
118        .into_iter()
119        .flatten()
120        .filter_map(|entry| {
121            let entry = entry.ok()?;
122            let name = entry.file_name();
123            let s = name.to_string_lossy().into_owned();
124            if s == "state.json" {
125                return None;
126            }
127            s.strip_suffix(".json").map(|n| n.to_string())
128        })
129        .collect();
130    names.sort();
131    names
132}
133
134/// Resolve a name-or-index string to a selection name.
135/// Accepts 1-based index or case-insensitive exact name match.
136pub fn resolve_selection(input: &str) -> Result<String, SelectionError> {
137    if let Ok(idx) = input.parse::<usize>() {
138        let names = list_selection_names();
139        if idx == 0 || idx > names.len() {
140            return Err(SelectionError::NotFound(input.to_string()));
141        }
142        return Ok(names[idx - 1].clone());
143    }
144    let input_lower = input.to_lowercase();
145    list_selection_names()
146        .into_iter()
147        .find(|n| n.to_lowercase() == input_lower)
148        .ok_or_else(|| SelectionError::NotFound(input.to_string()))
149}
150
151// ── State ──────────────────────────────────────────────────────────────────
152
153pub fn load_state() -> SelectionState {
154    let path = match state_path() {
155        Some(p) => p,
156        None => return SelectionState::default(),
157    };
158    if !path.exists() {
159        return SelectionState::default();
160    }
161    std::fs::read_to_string(&path)
162        .ok()
163        .and_then(|s| serde_json::from_str(&s).ok())
164        .unwrap_or_default()
165}
166
167pub fn save_state(s: &SelectionState) -> Result<(), SelectionError> {
168    let path = state_path().ok_or(SelectionError::NoDataDir)?;
169    if let Some(parent) = path.parent() {
170        std::fs::create_dir_all(parent)?;
171    }
172    let json = serde_json::to_string_pretty(s)?;
173    atomic_write(&path, &json)?;
174    Ok(())
175}
176
177pub fn active_selection_name() -> Option<String> {
178    load_state().active
179}
180
181// ── CRUD ───────────────────────────────────────────────────────────────────
182
183pub fn load_selection(name: &str) -> Result<Selection, SelectionError> {
184    let path = selection_path(name).ok_or(SelectionError::NoDataDir)?;
185    if !path.exists() {
186        return Err(SelectionError::NotFound(name.to_string()));
187    }
188    let s = std::fs::read_to_string(&path)?;
189    Ok(serde_json::from_str(&s)?)
190}
191
192pub fn save_selection(sel: &Selection) -> Result<(), SelectionError> {
193    let path = selection_path(&sel.name).ok_or(SelectionError::NoDataDir)?;
194    if let Some(parent) = path.parent() {
195        std::fs::create_dir_all(parent)?;
196    }
197    let json = serde_json::to_string_pretty(sel)?;
198    atomic_write(&path, &json)?;
199    Ok(())
200}
201
202pub fn delete_selection(name: &str) -> Result<(), SelectionError> {
203    let path = selection_path(name).ok_or(SelectionError::NoDataDir)?;
204    if !path.exists() {
205        return Err(SelectionError::NotFound(name.to_string()));
206    }
207    std::fs::remove_file(&path)?;
208    Ok(())
209}
210
211fn atomic_write(path: &PathBuf, content: &str) -> Result<(), std::io::Error> {
212    let tmp = path.with_extension("tmp");
213    std::fs::write(&tmp, content.as_bytes())?;
214    std::fs::rename(&tmp, path)?;
215    Ok(())
216}
217
218// ── Deduplication helpers ──────────────────────────────────────────────────
219
220pub fn entry_matches_key(entry: &SelectionEntry, zotero_key: &str) -> bool {
221    entry.zotero_key.as_deref() == Some(zotero_key)
222}
223
224pub fn entry_matches_openalex(entry: &SelectionEntry, oa_id: &str) -> bool {
225    entry.openalex_id.as_deref() == Some(oa_id)
226}
227
228pub fn entry_matches_doi(entry: &SelectionEntry, doi: &str) -> bool {
229    let normalized = normalize_doi(doi);
230    entry
231        .doi
232        .as_deref()
233        .map(normalize_doi)
234        .as_deref()
235        .map(|d| d == normalized.as_str())
236        .unwrap_or(false)
237}
238
239/// Match a removal input against an entry (by key, OA ID, DOI, or title substring).
240pub fn entry_matches_remove_input(entry: &SelectionEntry, input: &str) -> bool {
241    if crate::zotero::looks_like_zotero_key(input) && entry_matches_key(entry, input) {
242        return true;
243    }
244    let id = input
245        .strip_prefix("https://openalex.org/")
246        .unwrap_or(input);
247    if looks_like_openalex_work_id(id) && entry_matches_openalex(entry, id) {
248        return true;
249    }
250    if looks_like_doi(input) && entry_matches_doi(entry, input) {
251        return true;
252    }
253    if let Some(title) = &entry.title {
254        if title.to_lowercase().contains(&input.to_lowercase()) {
255            return true;
256        }
257    }
258    false
259}
260
261// ── Input type detection ───────────────────────────────────────────────────
262
263pub fn looks_like_doi(input: &str) -> bool {
264    let s = input
265        .strip_prefix("https://doi.org/")
266        .or_else(|| input.strip_prefix("http://doi.org/"))
267        .or_else(|| input.strip_prefix("doi:"))
268        .unwrap_or(input);
269    s.starts_with("10.") && s.contains('/')
270}
271
272pub fn looks_like_openalex_work_id(input: &str) -> bool {
273    let id = input
274        .strip_prefix("https://openalex.org/")
275        .unwrap_or(input);
276    id.starts_with('W') && id.len() > 1 && id[1..].chars().all(|c| c.is_ascii_digit())
277}
278
279pub fn strip_doi_prefix(doi: &str) -> &str {
280    doi.strip_prefix("https://doi.org/")
281        .or_else(|| doi.strip_prefix("http://doi.org/"))
282        .or_else(|| doi.strip_prefix("doi:"))
283        .unwrap_or(doi)
284}
285
286fn normalize_doi(doi: &str) -> String {
287    strip_doi_prefix(doi).to_lowercase()
288}
289
290// ── Smart add resolution ───────────────────────────────────────────────────
291
292/// Resolve a paper input string to a SelectionEntry.
293/// Tries Zotero first (if available), then OpenAlex. Merges metadata from both.
294pub async fn resolve_paper(
295    input: &str,
296    client: &papers_openalex::OpenAlexClient,
297    zotero: Option<&papers_zotero::ZoteroClient>,
298) -> Result<SelectionEntry, SelectionError> {
299    let input = input.trim();
300    let mut entry = SelectionEntry {
301        zotero_key: None,
302        openalex_id: None,
303        doi: None,
304        title: None,
305        authors: None,
306        year: None,
307        issn: None,
308        isbn: None,
309    };
310
311    let is_zotero_key = crate::zotero::looks_like_zotero_key(input);
312    let is_doi = looks_like_doi(input);
313    let is_oa_id = looks_like_openalex_work_id(input);
314
315    // Step 2: Attempt Zotero resolution
316    if let Some(z) = zotero {
317        if is_zotero_key {
318            if let Ok(item) = z.get_item(input).await {
319                entry.zotero_key = Some(item.key.clone());
320                fill_from_zotero_item(&mut entry, &item);
321            }
322        } else if is_doi {
323            let bare = strip_doi_prefix(input);
324            let params = ItemListParams {
325                q: Some(bare.to_string()),
326                qmode: Some("everything".into()),
327                limit: Some(1),
328                ..Default::default()
329            };
330            if let Ok(resp) = z.list_top_items(&params).await {
331                if let Some(item) = resp.items.into_iter().next() {
332                    entry.zotero_key = Some(item.key.clone());
333                    fill_from_zotero_item(&mut entry, &item);
334                }
335            }
336        } else if !is_oa_id {
337            // Free-text / title search in Zotero
338            let params = ItemListParams::builder().q(input).limit(1).build();
339            if let Ok(resp) = z.list_top_items(&params).await {
340                // Only auto-pick if there is exactly 1 result
341                if resp.items.len() == 1 {
342                    let item = resp.items.into_iter().next().unwrap();
343                    entry.zotero_key = Some(item.key.clone());
344                    fill_from_zotero_item(&mut entry, &item);
345                }
346            }
347        }
348    }
349
350    // Step 3: Attempt OpenAlex resolution
351    let oa_work = resolve_via_openalex(input, client, is_doi, is_oa_id).await;
352    if let Some(work) = oa_work {
353        fill_from_oa_work(&mut entry, &work);
354
355        // Step 4: Retry Zotero with DOI if step 2 failed but OA found a DOI
356        if entry.zotero_key.is_none() {
357            if let (Some(z), Some(doi)) = (zotero, &entry.doi.clone()) {
358                let bare = strip_doi_prefix(doi);
359                let params = ItemListParams {
360                    q: Some(bare.to_string()),
361                    qmode: Some("everything".into()),
362                    limit: Some(1),
363                    ..Default::default()
364                };
365                if let Ok(resp) = z.list_top_items(&params).await {
366                    if let Some(item) = resp.items.into_iter().next() {
367                        entry.zotero_key = Some(item.key.clone());
368                        // Only fill Zotero fields not already set by OA
369                        if entry.isbn.is_none() {
370                            if let Some(isbn) = &item.data.isbn {
371                                if !isbn.is_empty() {
372                                    entry.isbn = Some(vec![isbn.clone()]);
373                                }
374                            }
375                        }
376                    }
377                }
378            }
379        }
380    }
381
382    // Step 5: Fail if nothing at all was resolved
383    if entry.zotero_key.is_none()
384        && entry.openalex_id.is_none()
385        && entry.doi.is_none()
386        && entry.title.is_none()
387    {
388        return Err(SelectionError::CannotResolve(input.to_string()));
389    }
390
391    Ok(entry)
392}
393
394async fn resolve_via_openalex(
395    input: &str,
396    client: &papers_openalex::OpenAlexClient,
397    is_doi: bool,
398    is_oa_id: bool,
399) -> Option<papers_openalex::Work> {
400    if is_doi {
401        let bare = strip_doi_prefix(input);
402        let oa_id = format!("doi:{bare}");
403        client
404            .get_work(&oa_id, &GetParams::default())
405            .await
406            .ok()
407    } else if is_oa_id {
408        let id = input
409            .strip_prefix("https://openalex.org/")
410            .unwrap_or(input);
411        client.get_work(id, &GetParams::default()).await.ok()
412    } else {
413        let params = ListParams {
414            search: Some(input.to_string()),
415            per_page: Some(1),
416            ..Default::default()
417        };
418        client
419            .list_works(&params)
420            .await
421            .ok()
422            .and_then(|resp| resp.results.into_iter().next())
423    }
424}
425
426fn fill_from_zotero_item(entry: &mut SelectionEntry, item: &papers_zotero::Item) {
427    if entry.title.is_none() {
428        entry.title = item.data.title.clone();
429    }
430    if entry.authors.is_none() {
431        let authors: Vec<String> = item
432            .data
433            .creators
434            .iter()
435            .filter_map(|c| {
436                if let (Some(first), Some(last)) = (&c.first_name, &c.last_name) {
437                    let name = format!("{first} {last}").trim().to_string();
438                    if !name.is_empty() {
439                        return Some(name);
440                    }
441                }
442                c.name.clone().filter(|n| !n.is_empty())
443            })
444            .collect();
445        if !authors.is_empty() {
446            entry.authors = Some(authors);
447        }
448    }
449    if entry.year.is_none() {
450        // Try parsed_date first, then raw date field
451        let date_str = item
452            .meta
453            .parsed_date
454            .as_deref()
455            .or_else(|| item.data.date.as_deref());
456        entry.year = date_str
457            .and_then(|d| d.split('-').next())
458            .and_then(|y| y.parse().ok());
459    }
460    if entry.doi.is_none() {
461        entry.doi = item.data.doi.as_deref().map(|d| {
462            strip_doi_prefix(d).to_string()
463        });
464    }
465    if entry.issn.is_none() {
466        if let Some(issn) = &item.data.issn {
467            if !issn.is_empty() {
468                entry.issn = Some(vec![issn.clone()]);
469            }
470        }
471    }
472    if entry.isbn.is_none() {
473        if let Some(isbn) = &item.data.isbn {
474            if !isbn.is_empty() {
475                entry.isbn = Some(vec![isbn.clone()]);
476            }
477        }
478    }
479}
480
481fn fill_from_oa_work(entry: &mut SelectionEntry, work: &papers_openalex::Work) {
482    if entry.openalex_id.is_none() {
483        let id = work
484            .id
485            .strip_prefix("https://openalex.org/")
486            .unwrap_or(&work.id);
487        entry.openalex_id = Some(id.to_string());
488    }
489    if entry.doi.is_none() {
490        entry.doi = work.doi.as_deref().map(|d| {
491            d.strip_prefix("https://doi.org/")
492                .or_else(|| d.strip_prefix("http://doi.org/"))
493                .unwrap_or(d)
494                .to_string()
495        });
496    }
497    if entry.title.is_none() {
498        entry.title = work.display_name.clone().or_else(|| work.title.clone());
499    }
500    if entry.authors.is_none() {
501        if let Some(authorships) = &work.authorships {
502            let names: Vec<String> = authorships
503                .iter()
504                .filter_map(|a| a.author.as_ref()?.display_name.clone())
505                .collect();
506            if !names.is_empty() {
507                entry.authors = Some(names);
508            }
509        }
510    }
511    if entry.year.is_none() {
512        entry.year = work.publication_year.map(|y| y as u32);
513    }
514    if entry.issn.is_none() {
515        entry.issn = work
516            .primary_location
517            .as_ref()
518            .and_then(|l| l.source.as_ref())
519            .and_then(|s| s.issn.clone());
520    }
521}