#![allow(clippy::upper_case_acronyms)]
use async_trait::async_trait;
use exn::{Exn, ResultExt};
use serde_json::Value as JsonValue;
use url::Url;
use reqwest::{Client, StatusCode};
use std::{any::Any, str::FromStr};
use crate::helper::json_extract;
use crate::{
repo::{Endpoint, FileMeta, RepoError},
Checksum, DatasetBackend, DirMeta, Entry,
};
#[derive(Debug)]
pub struct OSF {
pub id: String,
}
impl OSF {
#[must_use]
pub fn new(id: impl Into<String>) -> Self {
OSF { id: id.into() }
}
}
#[async_trait]
impl DatasetBackend for OSF {
fn root_url(&self) -> Url {
let mut url = Url::from_str("https://api.osf.io/v2/nodes/").unwrap();
url.path_segments_mut().unwrap().extend([&self.id, "files"]);
url
}
async fn list(&self, client: &Client, dir: DirMeta) -> Result<Vec<Entry>, Exn<RepoError>> {
let resp = client
.get(dir.api_url())
.send()
.await
.or_raise(|| RepoError {
message: format!("fail at client sent GET {}", dir.api_url()),
})?;
let resp = resp.error_for_status().map_err(|err| match err.status() {
Some(StatusCode::NOT_FOUND) => RepoError {
message: format!("resource not found when GET {}", dir.api_url()),
},
Some(status_code) => RepoError {
message: format!(
"fail GET {}, with state code: {}",
dir.api_url(),
status_code.as_str()
),
},
None => RepoError {
message: format!("fail GET {}, network / protocol error", dir.api_url(),),
},
})?;
let resp: JsonValue = resp.json().await.or_raise(|| RepoError {
message: format!("fail GET {}, unable to convert to json", dir.api_url(),),
})?;
let files = resp
.get("data")
.and_then(JsonValue::as_array)
.ok_or_else(|| RepoError {
message: "field with key 'data' not resolve to an json array".to_string(),
})?;
let mut entries = Vec::with_capacity(files.len());
for (idx, filej) in files.iter().enumerate() {
let endpoint = Endpoint {
parent_url: dir.api_url(),
key: Some(format!("data.{idx}")),
};
let name: String = json_extract(filej, "attributes.name").or_raise(|| RepoError {
message: "fail to extracting 'attributes.name' as String from json".to_string(),
})?;
let kind: String = json_extract(filej, "attributes.kind").or_raise(|| RepoError {
message: "fail to extracting 'attributes.kind' as String from json".to_string(),
})?;
let guess = mime_guess::from_path(&name);
match kind.as_ref() {
"file" => {
let size: u64 =
json_extract(filej, "attributes.size").or_raise(|| RepoError {
message: "fail to extracting 'attributes.size' as u64 from json"
.to_string(),
})?;
let download_url: String =
json_extract(filej, "links.download").or_raise(|| RepoError {
message: "fail to extracting 'links.download' as String from json"
.to_string(),
})?;
let download_url = Url::from_str(&download_url).or_raise(|| RepoError {
message: format!("cannot parse '{download_url}' download url"),
})?;
let hash: String = json_extract(filej, "attributes.extra.hashes.sha256")
.or_raise(|| RepoError {
message: "fail to extracting 'attributes.extra.hashes.sha256' as String from json"
.to_string(),
})?;
let checksum = Checksum::Sha256(hash);
let file = FileMeta::new(
None,
None,
dir.join(&name),
endpoint,
download_url,
Some(size),
vec![checksum],
guess.first(),
None,
None,
None,
true,
);
entries.push(Entry::File(file));
}
"folder" => {
let api_url: String =
json_extract(filej, "relationships.files.links.related.href")
.or_raise(|| RepoError {
message: "fail to extracting 'relationships.files.links.related.href' as String from json"
.to_string(),
})?;
let api_url = Url::from_str(&api_url).or_raise(|| RepoError {
message: format!("cannot parse '{api_url}' api url"),
})?;
let dir = DirMeta::new(dir.join(&name), api_url, dir.root_url());
entries.push(Entry::Dir(dir));
}
typ => {
exn::bail!(RepoError {
message: format!(
"kind can be 'dataset' or 'kind' for an OSF entry, got {typ}"
)
});
}
}
}
Ok(entries)
}
fn as_any(&self) -> &dyn Any {
self
}
}