licensebat_rust/retriever/
docs_rs.rs1use super::utils::crates_io_retrieved_dependency;
11use askalono::{Store, TextData};
12use futures::{future::BoxFuture, Future, FutureExt, TryFutureExt};
13use licensebat_core::{Dependency, RetrievedDependency};
14use reqwest::Client;
15use std::{string::String, sync::Arc};
16use thiserror::Error;
17use tracing::instrument;
18
19pub trait Retriever: Send + Sync + std::fmt::Debug {
21 type Response: Future<Output = RetrievedDependency> + Send;
24 fn get_dependency(&self, dependency: Dependency) -> Self::Response;
26}
27
28pub struct DocsRs {
40 client: Client,
41 store: Arc<Option<Store>>,
42}
43
44impl DocsRs {
45 #[must_use]
48 pub const fn new(client: Client, store: Arc<Option<Store>>) -> Self {
49 Self { client, store }
50 }
51}
52
53impl Default for DocsRs {
54 fn default() -> Self {
57 Self::new(Client::new(), Arc::new(None))
58 }
59}
60
61impl Clone for DocsRs {
62 fn clone(&self) -> Self {
63 Self {
64 client: self.client.clone(),
65 store: self.store.clone(),
66 }
67 }
68}
69
70impl std::fmt::Debug for DocsRs {
71 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72 f.debug_struct("DocsRs")
73 .field("client", &self.client)
74 .field(
75 "store",
76 if self.store.is_some() {
77 &"Some(Store)"
78 } else {
79 &"None"
80 },
81 )
82 .finish()
83 }
84}
85
86impl Retriever for DocsRs {
87 type Response = BoxFuture<'static, RetrievedDependency>;
88
89 #[instrument(skip(self), level = "debug")]
90 fn get_dependency(&self, dependency: Dependency) -> Self::Response {
91 let crate_url = docs_rs_url(&dependency.name, &dependency.version);
92 let cargo_toml_url = format!("{crate_url}Cargo.toml");
93
94 let dep_clone = dependency.clone();
95 let client = self.client.clone();
96 let store = self.store.clone();
97
98 async move {
99 let html = client
100 .get(&cargo_toml_url)
101 .header("User-Agent", "licensebat-cli (licensebat.com)")
102 .send()
103 .await?
104 .text()
105 .await?;
106
107 let license_info = easy_scraper::Pattern::new(
112 r#"<div id="source-code"><pre><code>{{value}}</code></pre></div>"#,
113 )
114 .map(|pattern| pattern.matches(&html))
115 .map(|matches| {
116 matches
117 .into_iter()
118 .map(|m| m.get("value").unwrap().to_string())
119 .collect::<Vec<String>>().join("\n")
120 })
121 .map(|code| {
122 let text= code
123 .replace("\n=\n", "=");
124 text.lines().find(|l| l.starts_with("license")).map(|l| {
127 let items = l.split('=').map(|x| x.trim()).collect::<Vec<_>>();
128 (items[0].to_string(), items[1].replace('\"', ""))
129 })
130 });
131
132 let retrieved_dependency = match license_info {
133 Ok(license_info) => {
134 if let Some((key, value)) = license_info {
135 match key.as_ref() {
136 "license" => {
137 crates_io_retrieved_dependency(&dependency, Some(vec![value]), None, None, None)
139 }
140 "license-file" => {
141 get_retrieved_dependency_from_license_file(store, crate_url, value, client, &dependency).await
142 }
143 _ => {
145 tracing::error!("Unknown license key: {}", key);
146 crates_io_retrieved_dependency(&dependency, None, Some("Unexpected license key while parsing cargo.toml"), None, None)
147 }
148 }
149 } else {
150 let user_error = "No information found in Cargo.toml regarding license or license-file.";
151 tracing::error!(
152 "{} Crate {} : {}",
153 user_error,
154 &dependency.name,
155 &dependency.version,
156 );
157 crates_io_retrieved_dependency(&dependency, None, Some(user_error), None, None)
158 }
159 }
160 Err(e) => {
161 tracing::error!(error = ?e, "Error trying to parse docs.rs for crate {} : {}", &dependency.name, &dependency.version);
162 crates_io_retrieved_dependency(
163 &dependency,
164 None,
165 Some("Error trying to parse docs.rs"), None, None
166 )
167 }
168 };
169
170 Ok::<_, anyhow::Error>(retrieved_dependency)
171 }.unwrap_or_else(move |e| {
172 let error = e.to_string();
173 crates_io_retrieved_dependency(&dep_clone, None, Some(error.as_str()), None, None)
174 })
175 .boxed()
176 }
177}
178
179fn docs_rs_url(dependency_name: &str, dependency_version: &str) -> String {
181 format!("https://docs.rs/crate/{dependency_name}/{dependency_version}/source/")
182}
183
184async fn get_retrieved_dependency_from_license_file(
188 store: Arc<Option<Store>>,
189 crate_url: String,
190 license: String,
191 client: Client,
192 dependency: &Dependency,
193) -> RetrievedDependency {
194 if let Some(store) = store.as_ref() {
195 let license_url = format!("{crate_url}{license}");
196 if let Ok((license, score)) = get_license_from_docs_rs(&client, store, &license_url).await {
197 crates_io_retrieved_dependency(
198 dependency,
199 Some(vec![license.clone()]),
200 None,
201 Some(format!(
202 "Our score for this license is {:.2}%.",
203 score * 100.0
204 )),
205 Some(vec![(license, score)]),
206 )
207 } else {
208 crates_io_retrieved_dependency(
209 dependency,
210 None,
211 Some(&format!(
212 "Not declared in Cargo.toml. Check the url: {license_url}"
213 )),
214 None,
215 None,
216 )
217 }
218 } else {
219 tracing::error!("No askalono store present in Rust docs.rs retriever");
220 crates_io_retrieved_dependency(
221 dependency,
222 None,
223 Some("No askalono store present"),
224 None,
225 None,
226 )
227 }
228}
229
230async fn get_license_from_docs_rs(
231 client: &Client,
232 store: &Store,
233 url: &str,
234) -> Result<(String, f32), anyhow::Error> {
235 let html = client
236 .get(url)
237 .header("User-Agent", "licensebat-cli (licensebat.com)")
238 .send()
239 .await?
240 .text()
241 .await?;
242
243 let pattern = easy_scraper::Pattern::new(
244 r#"<div id="source-code"><pre><code>{{value}}</code></pre></div>"#,
245 )
246 .map_err(Error)?;
247
248 let matches = pattern.matches(&html);
249 if matches.is_empty() {
250 tracing::error!(%url, "Couldn't get original license from docs.rs");
251 Err(Error(String::from("Not found")).into())
252 } else {
253 let license_html = matches[0]["value"].clone();
254 let license = html2text::from_read(license_html.as_bytes(), 3000);
255 let result = store.analyze(&TextData::from(license.as_str()));
256 Ok((result.name.to_string(), result.score))
257 }
258}
259
260#[derive(Error, Debug)]
261#[error("DocRs Error: {0}")]
262struct Error(String);