#![allow(clippy::upper_case_acronyms)]
use async_trait::async_trait;
use exn::Exn;
use url::Url;
use reqwest::Client;
use std::{any::Any, str::FromStr};
use crate::{
repo::{Endpoint, FileMeta, RepoError},
DatasetBackend, DirMeta, Entry,
};
#[derive(Debug)]
pub struct Arxiv {
pub id: String,
}
impl Arxiv {
#[must_use]
pub fn new(id: impl Into<String>) -> Self {
Arxiv { id: id.into() }
}
}
#[async_trait]
impl DatasetBackend for Arxiv {
fn root_url(&self) -> Url {
let mut url = Url::from_str("https://arxiv.org").unwrap();
url.path_segments_mut().unwrap().extend(["pdf", &self.id]);
url
}
async fn list(&self, _client: &Client, dir: DirMeta) -> Result<Vec<Entry>, Exn<RepoError>> {
let root_url = dir.root_url();
let name: Vec<&str> = root_url.path_segments().unwrap().collect::<Vec<_>>();
let name = name[1];
let download_url = root_url.clone();
let endpoint = Endpoint {
parent_url: dir.root_url(),
key: Some(name.to_string()),
};
let file = FileMeta::new(
None,
None,
dir.join(&format!("{name}.pdf")),
endpoint,
download_url,
None,
vec![],
Some(mime::APPLICATION_PDF),
None,
None,
None,
true,
);
Ok(vec![Entry::File(file)])
}
fn as_any(&self) -> &dyn Any {
self
}
}