hacker_news/client/
html_client.rs1use std::error::Error;
2use log;
3use std::collections::HashMap;
4use std::cell::RefCell;
5use lazy_static::lazy_static;
6use regex::Regex;
7use reqwest;
8use reqwest::blocking::ClientBuilder;
9use reqwest::header::HeaderValue;
10use reqwest::header::HeaderMap;
11use reqwest::cookie::Cookie;
12use reqwest::redirect::Policy;
13use scraper;
14use scraper::Html;
15use scraper::Selector;
16use scraper::ElementRef;
17use crate::error::HttpError;
18use crate::error::HnError;
19use crate::parser::HtmlParse;
20use crate::parser::ListingsParser;
21use crate::parser::CommentsParser;
22use crate::parser::extract_fnid;
23use crate::parser::comments::create_comment_tree;
24use crate::model::Id;
25use crate::model::Listing;
26use crate::model::Date;
27use crate::model::Thread;
28
29
30const URL_LOGIN: &str = "https://news.ycombinator.com/login";
31const URL_SUBMIT_FORM: &str = "https://news.ycombinator.com/submit";
32const URL_SUBMIT: &str = "https://news.ycombinator.com/r";
33
34lazy_static! {
35 static ref FNID_REGEX: Regex = Regex::new(r#"<input.*value="(.+?)".*>"#).unwrap();
36}
37
38pub struct Client {
39 http_client: reqwest::blocking::Client,
40 cookie: RefCell<Option<(String, String)>>,
41}
42
43impl Client {
44
45 pub fn new() -> Self {
46 Self {
47 http_client: reqwest::blocking::Client::new(),
48 cookie: RefCell::new(None),
49 }
50 }
51
52 fn cookie(&self) -> Result<String, Box<dyn Error>> {
53 let pair = self.cookie.borrow();
55 let pair = pair.as_ref().ok_or(HnError::UnauthenticatedError)?;
56
57 Ok(format!("{}={};", pair.0, pair.1))
58 }
59
60 pub fn submit(
61 &self,
62 title: String,
63 url: Option<String>,
64 text: Option<String>,
65 ) -> Result<(), Box<dyn Error>> {
66
67 let cookie_string = self.cookie()?;
68 let cookie: HeaderValue = cookie_string.parse()
69 .expect("Got a user cookie, but failed to parse it to a header");
70
71 let mut formdata = HashMap::new();
72 formdata.insert("fnid", self.get_fnid()?);
73 formdata.insert("fnop", "submit-page".to_string());
74 formdata.insert("url", url.unwrap_or_else(|| "".to_string()));
75 formdata.insert("text", text.unwrap_or_else(|| "".to_string()));
76 log::debug!("submit post body = {:?}", formdata);
77 formdata.insert("title", title);
78
79 let req = self.http_client.post(URL_SUBMIT)
80 .header("Cookie", cookie)
81 .form(&formdata);
82 log::debug!("submit post request = {:?}", req);
83 let resp = req.send()?;
84 log::debug!("submit post response = {:?}", resp);
85
86 Ok(())
87
88 }
89
90 fn get_fnid(&self) -> Result<String, Box<dyn Error>> {
91 let cookie_string = self.cookie()?;
92 let cookie: HeaderValue = cookie_string.parse()
93 .expect("Got a user cookie, but failed to parse it to a header");
94
95 let req = self.http_client
96 .get(URL_SUBMIT_FORM)
97 .header("Cookie", cookie);
98 log::debug!("submit form request = {:?}", req);
99 let resp = req.send()?;
100 log::debug!("submit form response = {:?}", resp);
101 let body = resp.text()?;
102 let dom = Html::parse_document(&body);
103
104 let selector = match Selector::parse("input[name='fnid']") {
107 Err(_src) => {
108 return Err(Box::new(HnError::HtmlParsingError));
109 },
110 Ok(selector) => selector,
111 };
112
113 let result: Vec<ElementRef> = dom.select(&selector).collect();
114 let el = match result.get(0) {
115 Some(el) => el,
116 None => {
117 return Err(Box::new(HnError::HtmlParsingError));
118 }
119 };
120 let fnid = extract_fnid(el)?;
121
122 Ok(fnid)
123 }
124
125 pub fn login(&self, username: &str, password: &str) -> Result<(), Box<dyn Error>> {
126 let mut formdata = HashMap::new();
127 formdata.insert("acct", username);
128 formdata.insert("pw", password);
129 let goto = "newest".to_string();
130 formdata.insert("goto", &goto);
131
132 let mut headers = HeaderMap::new();
133 headers.insert("User-Agent", "hacker-news client/0.0.1".parse().unwrap());
134
135 let client = ClientBuilder::new()
139 .redirect(Policy::none())
140 .build()?;
141
142 let req = client.post(URL_LOGIN)
144 .headers(headers)
145 .form(&formdata);
146 log::debug!("login request = {:?}", req);
147 let resp = req.send()?;
148 if resp.status().as_u16() != 302 {
149 log::error!("login response = {:?}", resp);
150 return Err(Box::new(HnError::AuthenticationError));
151 }
152 log::debug!("login response = {:?}", resp);
153
154 let cookies: Vec<Cookie> = resp.cookies().collect();
156 let cookie = cookies.get(0)
157 .ok_or_else(|| {
159 log::error!("Unable to parse user cookie from succesful login response, \
160 response = {:?}, cookies = {:?}", resp, cookies);
161 HnError::HtmlParsingError
162 })?;
163 let cookie = Some((cookie.name().to_string(), cookie.value().to_string()));
164
165 *self.cookie.borrow_mut() = cookie;
167 println!("cookie = {:?}", self.cookie);
168
169 Ok(())
170 }
171
172 pub fn item(&self, id: Id) -> Result<Listing, Box<dyn Error>> {
173 let url = format!("https://news.ycombinator.com/item?id={}", id);
174 let req = self.http_client.get(&url);
175 log::debug!("Send GET request to {:?}", url);
176 let resp = req.send()?;
177 let status = resp.status().as_u16();
178 if status != 200 {
179 let err = HttpError {
180 url: resp.url().to_string(),
181 code: status,
182 };
183 log::error!("Received non-200 response: {:?}", err);
184 return Err(Box::new(HnError::HttpError(err)));
185 }
186 log::debug!("Received 200 response from {:?}", url);
187
188 let text = resp.text()?;
189 let html = Html::parse_document(&text);
190
191 let item = ListingsParser::parse(&html)?
196 .pop()
197 .ok_or(format!("Did not find item {}", id))?;
198
199 Ok(item)
200 }
201
202 pub fn thread(&self, id: Id) -> Result<Thread, Box<dyn Error>> {
203 log::debug!("HTML client attempting comments for id = {:?}", id);
204 let url = format!("https://news.ycombinator.com/item?id={}", id);
205 let req = self.http_client.get(&url);
206 let resp = req.send()?;
207 let text = resp.text()?;
208 let html = Html::parse_document(&text);
209 let comments = CommentsParser::parse(&html)?;
210 let comments = create_comment_tree(comments);
211 let listings = ListingsParser::parse(&html)?;
212 if listings.len() > 1 {
213 log::warn!("Parsed multiple listings for a thread, where only 1 is expected");
214 }
215 let listing = listings.into_iter()
216 .next()
217 .ok_or_else(|| {
218 log::error!("Succesfully parsed HTML, but found no listings");
219 HnError::HtmlParsingError
220 })?;
221 let thread = Thread { listing, comments };
222
223 Ok(thread)
224 }
225
226 pub fn news(&self) -> Result<Vec<Listing>, Box<dyn Error>> {
227 self.listings("https://news.ycombinator.com/news")
228 }
229
230 pub fn past(&self, date: Date) -> Result<Vec<Listing>, Box<dyn Error>> {
231 let url = format!("https://news.ycombinator.com/front?day={}-{}-{}",
232 date.0, date.1, date.2);
233
234 self.listings(&url)
235 }
236
237 pub fn listings(&self, url: &str) -> Result<Vec<Listing>, Box<dyn Error>> {
246 let req = self.http_client.get(url);
247 let resp = req.send()?;
248 let text = resp.text()?;
249 let html = Html::parse_document(&text);
250 let listings = ListingsParser::parse(&html)?;
251
252 Ok(listings)
253 }
254}
255
256
257#[cfg(test)]
258mod tests {
259
260 use super::*;
261
262 use crate::util::setup;
263
264 #[test]
265 fn test_news() -> Result<(), Box<dyn Error>> {
266 setup();
267 let client = Client::new();
268 let listings = client.news()?;
269 log::info!("Successfully called Client::news()");
270 log::trace!("Listings output from Client::news() = {:?}", listings);
271
272 Ok(())
273 }
274
275 #[test]
276 fn test_item() -> Result<(), Box<dyn Error>> {
277 setup();
278 let client = Client::new();
279 let item = client.item(25925926)?;
280 log::debug!("test_item item = {:#?}", item);
281
282 Ok(())
283 }
284
285 #[test]
286 fn test_comments() -> Result<(), Box<dyn Error>> {
287 setup();
288 let client = Client::new();
289 let comments = client.thread(100)?;
290 log::debug!("comments = {:?}", comments);
291
292 Ok(())
293 }
294
295 #[test]
296 fn test_login() -> Result<(), Box<dyn Error>> {
297 setup();
298 let user: String = match std::env::var("HN_USER") {
299 Ok(user) => user,
300 Err(_) => {
301 log::warn!("login test unable to retrieve Hacker News username from \
302 environment variable $HN_USER. Omitting test.");
303 return Ok(());
304 }
305 };
306
307 let pwd: String = match std::env::var("HN_PASS") {
308 Ok(pwd) => pwd,
309 Err(_) => {
310 log::warn!("login test unable to retrieve Hacker News password from \
311 environment variable $HN_PASS. Omitting test.");
312 return Ok(());
313 }
314 };
315
316 let client = Client::new();
317 client.login(&user, &pwd)?;
318
319 Ok(())
320 }
321
322}