Skip to main content

research_master/sources/
mod.rs

1//! Research source plugins with extensible trait-based architecture.
2//!
3//! This module defines the [`Source`] trait that all research sources implement.
4//! New sources can be added by implementing this trait and registering them with
5//! the [`SourceRegistry`].
6//!
7//! # Feature Flags
8//!
9//! Individual sources can be disabled at compile time using Cargo features:
10//!
11//! - `arxiv` - Enable arXiv source (default: enabled)
12//! - `pubmed` - Enable PubMed source (default: enabled)
13//! - `biorxiv` - Enable bioRxiv source (default: enabled)
14//! - `semantic` - Enable Semantic Scholar source (default: enabled)
15//! - `openalex` - Enable OpenAlex source (default: enabled)
16//! - `crossref` - Enable CrossRef source (default: enabled)
17//! - `iacr` - Enable IACR ePrint source (default: enabled)
18//! - `pmc` - Enable PMC source (default: enabled)
19//! - `hal` - Enable HAL source (default: enabled)
20//! - `dblp` - Enable DBLP source (default: enabled)
21//! - `ssrn` - Enable SSRN source (default: enabled)
22//! - `dimensions` - Enable Dimensions source (default: enabled)
23//! - `ieee_xplore` - Enable IEEE Xplore source (default: enabled)
24//! - `core` - Enable CORE source (default: enabled)
25//! - `zenodo` - Enable Zenodo source (default: enabled)
26//! - `unpaywall` - Enable Unpaywall source (default: enabled)
27//! - `mdpi` - Enable MDPI source (default: enabled)
28//! - `jstor` - Enable JSTOR source (default: enabled)
29//! - `scispace` - Enable SciSpace source (default: enabled)
30//! - `acm` - Enable ACM Digital Library source (default: enabled)
31//! - `connected_papers` - Enable Connected Papers source (default: enabled)
32//! - `doaj` - Enable DOAJ source (default: enabled)
33//! - `worldwidescience` - Enable WorldWideScience source (default: enabled)
34//! - `osf` - Enable OSF Preprints source (default: enabled)
35//! - `base` - Enable BASE source (default: enabled)
36//! - `springer` - Enable Springer source (default: enabled)
37//! - `google_scholar` - Enable Google Scholar source (default: disabled, requires GOOGLE_SCHOLAR_ENABLED=true)
38//!
39//! # Feature Groups
40//!
41//! - `core` - arxiv, pubmed, semantic
42//! - `preprints` - arxiv, biorxiv
43//! - `full` - All sources (default)
44//!
45//! # Examples
46//!
47//! ```bash
48//! # Build with only core sources
49//! cargo build --no-default-features --features core
50//!
51//! # Build with specific sources
52//! cargo build --no-default-features --features arxiv,semantic
53//!
54//! # Build with all sources except dblp
55//! cargo build --features -dblp
56//! ```
57
58#[cfg(feature = "source-acm")]
59mod acm;
60#[cfg(feature = "source-arxiv")]
61mod arxiv;
62#[cfg(feature = "source-base")]
63mod base;
64#[cfg(feature = "source-biorxiv")]
65mod biorxiv;
66#[cfg(feature = "source-connected_papers")]
67mod connected_papers;
68#[cfg(feature = "source-core-repo")]
69mod core;
70#[cfg(feature = "source-crossref")]
71mod crossref;
72#[cfg(feature = "source-dblp")]
73mod dblp;
74#[cfg(feature = "source-dimensions")]
75mod dimensions;
76#[cfg(feature = "source-doaj")]
77mod doaj;
78#[cfg(feature = "source-europe_pmc")]
79mod europe_pmc;
80#[cfg(feature = "source-google_scholar")]
81mod google_scholar;
82#[cfg(feature = "source-hal")]
83mod hal;
84#[cfg(feature = "source-iacr")]
85mod iacr;
86#[cfg(feature = "source-ieee_xplore")]
87mod ieee_xplore;
88#[cfg(feature = "source-jstor")]
89mod jstor;
90#[cfg(feature = "source-mdpi")]
91mod mdpi;
92#[cfg(feature = "source-openalex")]
93mod openalex;
94#[cfg(feature = "source-osf")]
95mod osf;
96#[cfg(feature = "source-pmc")]
97mod pmc;
98#[cfg(feature = "source-pubmed")]
99mod pubmed;
100mod registry;
101#[cfg(feature = "source-scispace")]
102mod scispace;
103#[cfg(feature = "source-semantic")]
104mod semantic;
105#[cfg(feature = "source-springer")]
106mod springer;
107#[cfg(feature = "source-ssrn")]
108mod ssrn;
109#[cfg(feature = "source-unpaywall")]
110mod unpaywall;
111#[cfg(feature = "source-worldwidescience")]
112mod worldwidescience;
113#[cfg(feature = "source-zenodo")]
114mod zenodo;
115
116pub mod mock;
117
118pub use mock::MockSource;
119
120pub use registry::{SourceCapabilities, SourceRegistry};
121
122use crate::models::{
123    CitationRequest, DownloadRequest, DownloadResult, Paper, ReadRequest, ReadResult, SearchQuery,
124    SearchResponse,
125};
126use async_trait::async_trait;
127
128/// The Source trait defines the interface for all research source plugins.
129///
130/// # Implementing a New Source
131///
132/// To add a new research source:
133///
134/// 1. Create a new struct that implements `Source`
135/// 2. Implement the required methods (at minimum `id`, `name`, and `search`)
136/// 3. Implement optional methods if the source supports them
137/// 4. Add the source to `SourceRegistry::new()` or register it dynamically
138#[async_trait]
139pub trait Source: Send + Sync + std::fmt::Debug {
140    /// Unique identifier for this source (used in tool names, e.g., "arxiv", "pubmed")
141    fn id(&self) -> &str;
142
143    /// Human-readable name of this source
144    fn name(&self) -> &str;
145
146    /// Describe the capabilities of this source
147    fn capabilities(&self) -> SourceCapabilities {
148        SourceCapabilities::SEARCH
149    }
150
151    /// Whether this source supports search
152    fn supports_search(&self) -> bool {
153        self.capabilities().contains(SourceCapabilities::SEARCH)
154    }
155
156    /// Whether this source supports downloading PDFs
157    fn supports_download(&self) -> bool {
158        self.capabilities().contains(SourceCapabilities::DOWNLOAD)
159    }
160
161    /// Whether this source supports reading/parsing PDFs
162    fn supports_read(&self) -> bool {
163        self.capabilities().contains(SourceCapabilities::READ)
164    }
165
166    /// Whether this source supports citation/reference lookup
167    fn supports_citations(&self) -> bool {
168        self.capabilities().contains(SourceCapabilities::CITATIONS)
169    }
170
171    /// Whether this source supports lookup by DOI
172    fn supports_doi_lookup(&self) -> bool {
173        self.capabilities().contains(SourceCapabilities::DOI_LOOKUP)
174    }
175
176    /// Whether this source supports author search
177    fn supports_author_search(&self) -> bool {
178        self.capabilities()
179            .contains(SourceCapabilities::AUTHOR_SEARCH)
180    }
181
182    // ========== SEARCH METHODS ==========
183
184    /// Search for papers matching the query
185    async fn search(&self, _query: &SearchQuery) -> Result<SearchResponse, SourceError> {
186        Err(SourceError::NotImplemented)
187    }
188
189    /// Search for papers by a specific author
190    async fn search_by_author(
191        &self,
192        _author: &str,
193        _max_results: usize,
194        _year: Option<&str>,
195    ) -> Result<SearchResponse, SourceError> {
196        Err(SourceError::NotImplemented)
197    }
198
199    // ========== DOWNLOAD METHODS ==========
200
201    /// Download a paper's PDF to the specified path
202    async fn download(&self, _request: &DownloadRequest) -> Result<DownloadResult, SourceError> {
203        Err(SourceError::NotImplemented)
204    }
205
206    // ========== READ METHODS ==========
207
208    /// Read and extract text from a paper's PDF
209    async fn read(&self, _request: &ReadRequest) -> Result<ReadResult, SourceError> {
210        Err(SourceError::NotImplemented)
211    }
212
213    // ========== CITATION METHODS ==========
214
215    /// Get papers that cite this paper
216    async fn get_citations(
217        &self,
218        _request: &CitationRequest,
219    ) -> Result<SearchResponse, SourceError> {
220        Err(SourceError::NotImplemented)
221    }
222
223    /// Get papers referenced by this paper
224    async fn get_references(
225        &self,
226        _request: &CitationRequest,
227    ) -> Result<SearchResponse, SourceError> {
228        Err(SourceError::NotImplemented)
229    }
230
231    /// Get related papers
232    async fn get_related(&self, _request: &CitationRequest) -> Result<SearchResponse, SourceError> {
233        Err(SourceError::NotImplemented)
234    }
235
236    // ========== LOOKUP METHODS ==========
237
238    /// Get a paper by its DOI
239    async fn get_by_doi(&self, _doi: &str) -> Result<Paper, SourceError> {
240        Err(SourceError::NotImplemented)
241    }
242
243    /// Get a paper by its ID (source-specific)
244    async fn get_by_id(&self, _id: &str) -> Result<Paper, SourceError> {
245        Err(SourceError::NotImplemented)
246    }
247
248    /// Validate that a paper ID is correctly formatted for this source
249    fn validate_id(&self, _id: &str) -> Result<(), SourceError> {
250        Ok(())
251    }
252}
253
254/// Errors that can occur when interacting with a source
255#[derive(Debug, thiserror::Error)]
256pub enum SourceError {
257    /// The requested operation is not implemented for this source
258    #[error("Operation not implemented for this source")]
259    NotImplemented,
260
261    /// Network or HTTP error
262    #[error("Network error: {0}")]
263    Network(String),
264
265    /// Parsing error (XML, JSON, HTML, etc.)
266    #[error("Parse error: {0}")]
267    Parse(String),
268
269    /// Invalid request parameters
270    #[error("Invalid request: {0}")]
271    InvalidRequest(String),
272
273    /// Rate limit exceeded
274    #[error("Rate limit exceeded")]
275    RateLimit,
276
277    /// Paper not found
278    #[error("Paper not found: {0}")]
279    NotFound(String),
280
281    /// API error from the source
282    #[error("API error: {0}")]
283    Api(String),
284
285    /// IO error (file system)
286    #[error("IO error: {0}")]
287    Io(#[from] std::io::Error),
288
289    /// Other error
290    #[error("Error: {0}")]
291    Other(String),
292}
293
294impl From<reqwest::Error> for SourceError {
295    fn from(err: reqwest::Error) -> Self {
296        SourceError::Network(err.to_string())
297    }
298}
299
300impl From<serde_json::Error> for SourceError {
301    fn from(err: serde_json::Error) -> Self {
302        SourceError::Parse(format!("JSON: {}", err))
303    }
304}
305
306impl From<quick_xml::DeError> for SourceError {
307    fn from(err: quick_xml::DeError) -> Self {
308        SourceError::Parse(format!("XML: {}", err))
309    }
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315
316    #[test]
317    fn test_source_capabilities() {
318        let caps = SourceCapabilities::SEARCH | SourceCapabilities::DOWNLOAD;
319
320        assert!(caps.contains(SourceCapabilities::SEARCH));
321        assert!(caps.contains(SourceCapabilities::DOWNLOAD));
322        assert!(!caps.contains(SourceCapabilities::CITATIONS));
323    }
324}