pypi_search/
lib.rs

1use chrono::prelude::*;
2use std::convert::From;
3use lazy_static::lazy_static;
4use scraper::{Html, Selector, ElementRef};
5use tokio::sync::mpsc;
6use anyhow::Result;
7use tabled::Tabled;
8
9lazy_static! {
10    static ref NAME_SELECTOR: Selector = Selector::parse("span.package-snippet__name").unwrap();
11    static ref VERSION_SELECTOR: Selector = Selector::parse("span.package-snippet__version").unwrap();
12    static ref RELEASE_SELECTOR: Selector = Selector::parse("span.package-snippet__released").unwrap();
13    static ref DESCRIPTION_SELECTOR: Selector = Selector::parse("p.package-snippet__description").unwrap();
14    static ref DATETIME_SELECTOR: Selector = Selector::parse("time").unwrap();
15}
16
17fn unwrap_selector(input: &ElementRef, selector: &Selector) -> String {
18    input.select(selector).next().map(|e| e.inner_html()).unwrap_or("".into())
19}
20
21fn unwrap_time_selector(input: &ElementRef) -> Option<DateTime<Utc>> {
22    input.select(&RELEASE_SELECTOR).next()
23        .and_then(|release| release.select(&DATETIME_SELECTOR).next())
24        .and_then(|time| time.value().attr("datetime"))
25        .and_then(|dt| dt.parse::<DateTime<Utc>>().ok())
26}
27
28fn format_date(release: &DateTime<Utc>) -> String {
29    release.format("%Y-%m-%d").to_string()
30}
31
32#[derive(Debug,Tabled)]
33pub struct Package {
34    #[header("Name")]
35    pub name: String,
36    #[header("Version")]
37    pub version: String,
38    #[header("Released")]
39    #[field(display_with="format_date")]
40    pub release: DateTime<Utc>,
41    #[header("Description")]
42    pub description: String,
43}
44
45impl From<&ElementRef<'_>> for Package {
46    fn from(input: &ElementRef) -> Self {
47        let release = unwrap_time_selector(input);
48        Package{
49            name: unwrap_selector(input, &NAME_SELECTOR),
50            version: unwrap_selector(input, &VERSION_SELECTOR),
51            release: release.unwrap(),
52            description: unwrap_selector(input, &DESCRIPTION_SELECTOR),
53        }
54    }
55}
56
57
58pub async fn query_pypi(name: String, pages: usize) -> Result<Vec<Package>>{
59    let client = reqwest::Client::new();
60    let (tx, mut rx) = mpsc::channel(32);
61
62    let package_snippet = Selector::parse("a.package-snippet").unwrap();
63
64    tokio::spawn(async move {
65        for page_idx in (1..=pages).map(|i| i.to_string()) {
66            let query_params = vec![("q", &name), ("page", &page_idx)];
67
68            let page_body = client.get("https://pypi.org/search/")
69                .query(&query_params)
70                .send()
71                .await;
72            tx.send(page_body).await.expect("can send on package channel");
73        }
74    });
75
76    let mut packages = vec![];
77
78    while let Some(response) = rx.recv().await {
79        let page_body = response?.text().await?;
80        let page_result = Html::parse_document(&page_body);
81        for element in page_result.select(&package_snippet) {
82            let package = Package::from(&element);
83            packages.push(package);
84        }
85    }
86
87    Ok(packages)
88}
89
90
91#[cfg(test)]
92mod tests {
93    use super::*;
94
95    #[test]
96    fn parse_package_data_test() {
97        let input = r#"
98            <a class="package-snippet" href="/project/gitlab3/">
99            <h3 class="package-snippet__title">
100              <span class="package-snippet__name">gitlab3</span>
101              <span class="package-snippet__version">0.5.8</span>
102              <span class="package-snippet__released"><time datetime="2017-03-18T19:38:52+0000" data-controller="localized-time" data-localized-time-relative="true" data-localized-time-show-time="false" title="2017-03-18 20:38:52" aria-label="2017-03-18 20:38:52">Mar 18, 2017</time></span>
103            </h3>
104            <p class="package-snippet__description">GitLab API v3 Python Wrapper.</p>
105          </a>"#;
106        let page = Html::parse_fragment(input);
107        let snippet = page.root_element();
108        let package = Package::from(&snippet);
109
110        assert_eq!(package.name, "gitlab3");
111        assert_eq!(package.version, "0.5.8");
112        assert_eq!(package.release, "2017-03-18T19:38:52+0000".parse::<DateTime<Utc>>().unwrap());
113        assert_eq!(package.description, "GitLab API v3 Python Wrapper.");
114    }
115}