use askalono::{Store, TextData};
use futures::{future::BoxFuture, Future, FutureExt, TryFutureExt};
use licensebat_core::{Comment, Dependency, RetrievedDependency};
use reqwest::Client;
use scraper::{ElementRef, Html, Selector};
use selectors::Element;
use std::{sync::Arc, vec};
use tracing::instrument;
pub trait Retriever: Send + Sync + std::fmt::Debug {
type Error: std::fmt::Debug + std::fmt::Display;
type Response: Future<Output = Result<RetrievedDependency, Self::Error>> + Send;
fn get_dependency(&self, dependency: Dependency) -> Self::Response;
}
pub struct Hosted {
client: Client,
store: Arc<Option<Store>>,
}
impl Hosted {
#[must_use]
pub fn new(client: Client, store: Arc<Option<Store>>) -> Self {
Self { client, store }
}
}
impl Default for Hosted {
fn default() -> Self {
Self::new(Client::new(), Arc::new(None))
}
}
impl Clone for Hosted {
fn clone(&self) -> Self {
Self {
client: self.client.clone(),
store: self.store.clone(),
}
}
}
impl std::fmt::Debug for Hosted {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Hosted")
.field("client", &self.client)
.field(
"store",
if self.store.is_some() {
&"Some(Store)"
} else {
&"None"
},
)
.finish()
}
}
impl Retriever for Hosted {
type Error = reqwest::Error;
type Response = BoxFuture<'static, Result<RetrievedDependency, Self::Error>>;
#[instrument(skip(self), level = "debug")]
fn get_dependency(&self, dependency: Dependency) -> Self::Response {
let url = format!(
"https://pub.dev/packages/{}/versions/{}",
dependency.name, dependency.version,
);
let store = self.store.clone();
self.client
.get(format!("{}/license", url))
.send()
.and_then(reqwest::Response::text)
.map(move |html| {
html.map(|html| {
let url = url.clone();
let document = Html::parse_document(&html);
let declared_license = Selector::parse(r#"h3[class="title"]"#).ok()
.and_then(|selector| {
document
.select(&selector)
.filter(|s| s.inner_html() == "License")
.map(|s| s.next_sibling_element().and_then(|sibling| get_imprecise_license(&sibling)))
.next()
.flatten()
});
let mut official_license = Selector::parse(r#".detail-container.detail-body-main .highlight pre"#).ok().and_then( |selector| {
document.select(&selector).map(|s| s.inner_html()).next()
});
if official_license.is_none() {
official_license = Selector::parse(r#".detail-container.detail-body-main .tab-content"#).ok().and_then( |selector| {
document.select(&selector).map(|s| s.inner_html()).next()
});
}
let declared_licenses = declared_license.clone().map(|x| vec![x]);
if let (Some(official_license), Some(store)) = (official_license, store.as_ref()) {
#[allow(clippy::single_match_else)]
match declared_license.as_deref() {
Some("MIT") => retrieved_dependency(&dependency, declared_licenses, None, Some(url), None, None),
_ => {
let result = store.analyze(&TextData::from(official_license.as_str()));
tracing::debug!(
"Detailed scrapping: SCORE {:?}, LICENSE: {}",
result.score,
result.name
);
let (license, comment) = if result.score >= 0.8 {
let comment = if Some(result.name.replace('-', " ")) == declared_license {
None
} else {
let comment = format!(
"Pub Dev license: {}. Our score for **{}** is **{:.2}%**.",
declared_license.unwrap_or_else(|| "NOT DECLARED".to_owned()),
result.name,
result.score * 100.0
);
Some(comment)
};
(Some(result.name.to_string()), comment)
} else {
let comment = format!(
"Using **Pub Dev Generic License**. Our analysis, though, estimated that it could be **{}** with a **{:.2}%** score.",
result.name,
result.score * 100.0
);
(declared_license.clone(), Some(comment))
};
retrieved_dependency(
&dependency,
license.map(|l| vec![l]),
None,
Some(url),
comment.map(Comment::non_removable),
Some(vec![(result.name.to_string(), result.score)])
)
}
}
} else {
retrieved_dependency(&dependency, declared_licenses, None, Some(url), Some(Comment::removable("Using **Pub Dev Generic License**. We couldn't get the original license.")), None)
}
})
}).boxed()
}
}
fn retrieved_dependency(
dependency: &Dependency,
licenses: Option<Vec<String>>,
error: Option<String>,
url: Option<String>,
comment: Option<Comment>,
suggested_licenses: Option<Vec<(String, f32)>>,
) -> RetrievedDependency {
RetrievedDependency::new(
dependency.name.clone(),
dependency.version.clone(),
crate::DART.to_owned(),
url,
licenses,
error,
comment,
suggested_licenses,
dependency.is_dev,
dependency.is_optional,
)
}
fn get_imprecise_license(sibling: &ElementRef) -> Option<String> {
let lic = sibling.inner_html();
if lic.contains(" (") {
let imprecise_license = &lic[..lic.find(" (").unwrap()];
if imprecise_license.starts_with("<img") && imprecise_license.contains("\">") {
return Some(
imprecise_license[imprecise_license.find("\">").unwrap() + 2..].to_owned(),
);
}
Some(imprecise_license.to_owned())
} else {
None
}
}