1use chrono::prelude::*;
2use std::convert::From;
3use lazy_static::lazy_static;
4use scraper::{Html, Selector, ElementRef};
5use tokio::sync::mpsc;
6use anyhow::Result;
7use tabled::Tabled;
8
9lazy_static! {
10 static ref NAME_SELECTOR: Selector = Selector::parse("span.package-snippet__name").unwrap();
11 static ref VERSION_SELECTOR: Selector = Selector::parse("span.package-snippet__version").unwrap();
12 static ref RELEASE_SELECTOR: Selector = Selector::parse("span.package-snippet__released").unwrap();
13 static ref DESCRIPTION_SELECTOR: Selector = Selector::parse("p.package-snippet__description").unwrap();
14 static ref DATETIME_SELECTOR: Selector = Selector::parse("time").unwrap();
15}
16
17fn unwrap_selector(input: &ElementRef, selector: &Selector) -> String {
18 input.select(selector).next().map(|e| e.inner_html()).unwrap_or("".into())
19}
20
21fn unwrap_time_selector(input: &ElementRef) -> Option<DateTime<Utc>> {
22 input.select(&RELEASE_SELECTOR).next()
23 .and_then(|release| release.select(&DATETIME_SELECTOR).next())
24 .and_then(|time| time.value().attr("datetime"))
25 .and_then(|dt| dt.parse::<DateTime<Utc>>().ok())
26}
27
28fn format_date(release: &DateTime<Utc>) -> String {
29 release.format("%Y-%m-%d").to_string()
30}
31
32#[derive(Debug,Tabled)]
33pub struct Package {
34 #[header("Name")]
35 pub name: String,
36 #[header("Version")]
37 pub version: String,
38 #[header("Released")]
39 #[field(display_with="format_date")]
40 pub release: DateTime<Utc>,
41 #[header("Description")]
42 pub description: String,
43}
44
45impl From<&ElementRef<'_>> for Package {
46 fn from(input: &ElementRef) -> Self {
47 let release = unwrap_time_selector(input);
48 Package{
49 name: unwrap_selector(input, &NAME_SELECTOR),
50 version: unwrap_selector(input, &VERSION_SELECTOR),
51 release: release.unwrap(),
52 description: unwrap_selector(input, &DESCRIPTION_SELECTOR),
53 }
54 }
55}
56
57
58pub async fn query_pypi(name: String, pages: usize) -> Result<Vec<Package>>{
59 let client = reqwest::Client::new();
60 let (tx, mut rx) = mpsc::channel(32);
61
62 let package_snippet = Selector::parse("a.package-snippet").unwrap();
63
64 tokio::spawn(async move {
65 for page_idx in (1..=pages).map(|i| i.to_string()) {
66 let query_params = vec![("q", &name), ("page", &page_idx)];
67
68 let page_body = client.get("https://pypi.org/search/")
69 .query(&query_params)
70 .send()
71 .await;
72 tx.send(page_body).await.expect("can send on package channel");
73 }
74 });
75
76 let mut packages = vec![];
77
78 while let Some(response) = rx.recv().await {
79 let page_body = response?.text().await?;
80 let page_result = Html::parse_document(&page_body);
81 for element in page_result.select(&package_snippet) {
82 let package = Package::from(&element);
83 packages.push(package);
84 }
85 }
86
87 Ok(packages)
88}
89
90
91#[cfg(test)]
92mod tests {
93 use super::*;
94
95 #[test]
96 fn parse_package_data_test() {
97 let input = r#"
98 <a class="package-snippet" href="/project/gitlab3/">
99 <h3 class="package-snippet__title">
100 <span class="package-snippet__name">gitlab3</span>
101 <span class="package-snippet__version">0.5.8</span>
102 <span class="package-snippet__released"><time datetime="2017-03-18T19:38:52+0000" data-controller="localized-time" data-localized-time-relative="true" data-localized-time-show-time="false" title="2017-03-18 20:38:52" aria-label="2017-03-18 20:38:52">Mar 18, 2017</time></span>
103 </h3>
104 <p class="package-snippet__description">GitLab API v3 Python Wrapper.</p>
105 </a>"#;
106 let page = Html::parse_fragment(input);
107 let snippet = page.root_element();
108 let package = Package::from(&snippet);
109
110 assert_eq!(package.name, "gitlab3");
111 assert_eq!(package.version, "0.5.8");
112 assert_eq!(package.release, "2017-03-18T19:38:52+0000".parse::<DateTime<Utc>>().unwrap());
113 assert_eq!(package.description, "GitLab API v3 Python Wrapper.");
114 }
115}