1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//! # howto
//!
//! Instant coding answers with Google and StackOverflow.
//! Inspired by [gleitz/howdoi](https://github.com/gleitz/howdoi).
//!
//! ## Usage
//!
//! ```
//! # use futures::prelude::*;
//! # async move {
//! let mut answers = howto::howto("file io rust").await;
//!
//! while let Some(answer) = answers.next().await {
//!     println!("Answer from {}\n{}", answer.link, answer.instruction);
//! }
//! # };
//! ```

#[cfg(test)]
mod tests;

use failure::{ensure, format_err, Fallible};
use futures::prelude::*;
use lazy_static::lazy_static;
use scraper::{Html, Selector};
use std::pin::Pin;

/// Struct containing the answer of given query.
#[derive(Debug, Clone)]
pub struct Answer {
    pub question_title: String,
    pub link: String,
    pub full_text: String,
    pub instruction: String,
}

async fn get(url: &str) -> Fallible<String> {
    let resp = reqwest::Client::new()
        .get(url)
        .header(
            "User-Agent",
            "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0",
        )
        .send()
        .await
        .map_err(|e| failure::Error::from_boxed_compat(Box::new(e)))?;

    ensure!(
        resp.status().is_success(),
        format_err!("Request error: {}", resp.status())
    );

    Ok(resp
        .text()
        .await
        .map_err(|e| failure::Error::from_boxed_compat(Box::new(e)))?)
}

async fn get_stackoverflow_links(query: &str) -> Fallible<Vec<String>> {
    lazy_static! {
        static ref LINK_SELECTOR: Selector = Selector::parse(".r>a").unwrap();
    }

    let url = format!(
        "https://www.google.com/search?q=site:stackoverflow.com {}",
        query,
    );

    let content = get(&url).await?;
    let html = Html::parse_document(&content);

    let links: Vec<_> = html
        .select(&LINK_SELECTOR)
        .filter_map(|e| e.value().attr("href"))
        .map(ToString::to_string)
        .filter(|link| link.starts_with("https://stackoverflow.com/"))
        .collect();

    Ok(links)
}

async fn get_answer(link: &str) -> Fallible<Answer> {
    lazy_static! {
        static ref TITLE_SELECTOR: Selector = Selector::parse("#question-header>h1").unwrap();
        static ref ANSWER_SELECTOR: Selector = Selector::parse(".answer").unwrap();
        static ref TEXT_SELECTOR: Selector = Selector::parse(".post-text>*").unwrap();
        static ref PRE_INSTRUCTION_SELECTOR: Selector = Selector::parse("pre").unwrap();
        static ref CODE_INSTRUCTION_SELECTOR: Selector = Selector::parse("code").unwrap();
    }
    macro_rules! unwrap_or_bail {
        ($o:expr) => {
            $o.ok_or_else(|| format_err!("Cannot parse StackOverflow"))?
        };
    };

    let url = format!("{}?answerstab=votes", link);
    let link = link.to_string();

    let content = get(&url).await?;
    let html = Html::parse_document(&content);

    let title_html = unwrap_or_bail!(html.select(&TITLE_SELECTOR).next());
    let question_title = title_html.text().collect::<Vec<_>>().join("");

    let answer = unwrap_or_bail!(html.select(&ANSWER_SELECTOR).next());

    let instruction_html = unwrap_or_bail!(answer
        .select(&PRE_INSTRUCTION_SELECTOR)
        .next()
        .or_else(|| answer.select(&CODE_INSTRUCTION_SELECTOR).next()));
    let instruction = instruction_html.text().collect::<Vec<_>>().join("");
    let full_text = answer
        .select(&TEXT_SELECTOR)
        .flat_map(|e| e.text())
        .collect::<Vec<_>>()
        .join("");

    Ok(Answer {
        question_title,
        link,
        instruction,
        full_text,
    })
}

/// Query function. Give query to this function and thats it! Google and StackOverflow will do the rest.
pub async fn howto(query: &str) -> Pin<Box<dyn Stream<Item = Answer> + Send>> {
    let links = get_stackoverflow_links(query).await.unwrap_or_default();

    stream::iter(links)
        .filter_map(move |link| async move { get_answer(&link).await.ok() })
        .boxed()
}

/// Prefetch n queries with `FuturesOrdered`, and then others.
pub async fn prefetch_howto(query: &str, n: usize) -> Pin<Box<dyn Stream<Item = Answer> + Send>> {
    let mut links = get_stackoverflow_links(query).await.unwrap_or_default();

    let others = if links.len() < n {
        vec![]
    } else {
        links.split_off(n)
    };

    let prefetch_stream = links
        .into_iter()
        .map(move |link| async move { get_answer(&link).await.ok() })
        .collect::<stream::FuturesOrdered<_>>()
        .filter_map(future::ready);
    let others_stream =
        stream::iter(others).filter_map(move |link| async move { get_answer(&link).await.ok() });

    prefetch_stream.chain(others_stream).boxed()
}