use async_trait::async_trait;
use std::sync::Arc;
use crate::models::{Paper, PaperBuilder, SearchQuery, SearchResponse, SourceType};
use crate::sources::{Source, SourceCapabilities, SourceError};
use crate::utils::{api_retry_config, with_retry, HttpClient};
const GOOGLE_SCHOLAR_URL: &str = "https://scholar.google.com";
const GOOGLE_SCHOLAR_ENABLED_VAR: &str = "GOOGLE_SCHOLAR_ENABLED";
#[derive(Debug, Clone)]
pub struct GoogleScholarSource {
client: Arc<HttpClient>,
enabled: bool,
}
impl GoogleScholarSource {
pub fn new() -> Result<Self, SourceError> {
let enabled = std::env::var(GOOGLE_SCHOLAR_ENABLED_VAR).unwrap_or_default() == "true";
if !enabled {
tracing::info!(
"Google Scholar source is disabled. Set {} to enable.",
GOOGLE_SCHOLAR_ENABLED_VAR
);
}
Ok(Self {
client: Arc::new(HttpClient::new()?),
enabled,
})
}
}
impl Default for GoogleScholarSource {
fn default() -> Self {
Self::new().expect("Failed to create GoogleScholarSource")
}
}
#[async_trait]
impl Source for GoogleScholarSource {
fn id(&self) -> &str {
"google_scholar"
}
fn name(&self) -> &str {
"Google Scholar"
}
fn capabilities(&self) -> SourceCapabilities {
if !self.enabled {
return SourceCapabilities::empty();
}
SourceCapabilities::SEARCH | SourceCapabilities::DOI_LOOKUP
}
async fn search(&self, query: &SearchQuery) -> Result<SearchResponse, SourceError> {
if !self.enabled {
return Err(SourceError::Other(format!(
"Google Scholar is disabled. Set {} to enable.",
GOOGLE_SCHOLAR_ENABLED_VAR
)));
}
let max_results = query.max_results.min(10);
let url = format!(
"{}?hl=en&q={}&start={}&num={}",
GOOGLE_SCHOLAR_URL,
urlencoding::encode(&query.query),
0,
max_results
);
let client = Arc::clone(&self.client);
let url_for_retry = url.clone();
let response = with_retry(api_retry_config(), || {
let client = Arc::clone(&client);
let url = url_for_retry.clone();
async move {
let request = client.get(&url).header(
"User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
);
let response = request.send().await.map_err(|e| {
SourceError::Network(format!("Failed to search Google Scholar: {}", e))
})?;
if !response.status().is_success() {
let status = response.status();
return Err(SourceError::Api(format!(
"Google Scholar returned status: {}",
status
)));
}
let text = response
.text()
.await
.map_err(|e| SourceError::Parse(format!("Failed to read response: {}", e)))?;
Ok(text)
}
})
.await?;
let papers = self.parse_results(&response, &query.query)?;
let papers_len = papers.len();
let mut search_response = SearchResponse::new(papers, "Google Scholar", &query.query);
search_response.total_results = Some(papers_len);
Ok(search_response)
}
async fn get_by_doi(&self, doi: &str) -> Result<Paper, SourceError> {
if !self.enabled {
return Err(SourceError::Other(format!(
"Google Scholar is disabled. Set {} to enable.",
GOOGLE_SCHOLAR_ENABLED_VAR
)));
}
let url = format!(
"{}?hl=en&q={}&btnI",
GOOGLE_SCHOLAR_URL,
urlencoding::encode(doi)
);
let client = Arc::clone(&self.client);
let response = client
.get(&url)
.header(
"User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
)
.send()
.await
.map_err(|e| {
SourceError::Network(format!("Failed to lookup DOI in Google Scholar: {}", e))
})?;
if !response.status().is_success() {
return Err(SourceError::NotFound(format!(
"Paper not found in Google Scholar: {}",
doi
)));
}
let clean_doi = doi
.replace("https://doi.org/", "")
.replace("doi:", "")
.trim()
.to_string();
Ok(PaperBuilder::new(
clean_doi.clone(),
"Paper from Google Scholar".to_string(),
format!("https://doi.org/{}", clean_doi),
SourceType::GoogleScholar,
)
.doi(doi)
.build())
}
}
impl GoogleScholarSource {
fn parse_results(&self, _html: &str, _query: &str) -> Result<Vec<Paper>, SourceError> {
tracing::warn!("Google Scholar parsing not fully implemented - returning empty results");
Ok(vec![])
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_source_creation() {
let source = GoogleScholarSource::new();
assert!(source.is_ok());
}
#[test]
fn test_source_disabled_by_default() {
let source = GoogleScholarSource::new();
assert!(!source.unwrap().enabled);
}
}