something_awful/
post_list.rs

1use crate::Error;
2
3/// Represents a specific post within a thread.
4#[derive(Debug)]
5pub struct Post {
6    pub id: String,
7    pub index: i64,
8    pub author_username: String,
9    pub author_registration_date: String,
10    pub post_date: String,
11    pub post_body: String,
12}
13
14impl Post {
15    // Parses all posts on a thread page.
16    pub fn parse_list(document: &str) -> Result<Vec<Post>, Error> {
17        let mut posts = Vec::new();
18        let document = scraper::Html::parse_document(document);
19        let selector = scraper::Selector::parse(r#"table.post"#).expect("BUG: illegal selector");
20
21        for post in document.select(&selector) {
22            let parsing_error = Error::PostParsingError(post.inner_html());
23            let selector =
24                scraper::Selector::parse(r#"table.post>tbody>tr"#).expect("BUG: illegal selector");
25            let mut post_body = post.select(&selector);
26
27            let Some(author_and_body) = post_body.next() else {
28                return Err(parsing_error);
29            };
30
31            let Some(date_and_links) = post_body.next() else {
32                return Err(parsing_error);
33            };
34
35            let Some(id) = post.value().attr("id") else {
36                return Err(parsing_error);
37            };
38
39            if !id.starts_with("post") {
40                return Err(parsing_error);
41            }
42            let id = id["post".len()..].to_owned();
43
44            let Some(index) = post.value().attr("data-idx") else {
45                return Err(parsing_error);
46            };
47            let Ok(index) = index.parse() else {
48                return Err(parsing_error);
49            };
50
51            let selector =
52                scraper::Selector::parse(r#"dl.userinfo>dt"#).expect("BUG: illegal selector");
53            let Some(author_username) = author_and_body.select(&selector).next() else {
54                return Err(parsing_error);
55            };
56
57            let mut author_username = author_username.text();
58            let Some(author_username) = author_username.next() else {
59                return Err(parsing_error);
60            };
61            let author_username = author_username.to_owned();
62
63            let selector = scraper::Selector::parse(r#"dl.userinfo>dd.registered"#)
64                .expect("BUG: illegal selector");
65            let Some(author_registration_date) = author_and_body.select(&selector).next() else {
66                return Err(parsing_error);
67            };
68            let author_registration_date = author_registration_date.inner_html();
69
70            let selector =
71                scraper::Selector::parse(r#"tr>td.postdate"#).expect("BUG: illegal selector");
72            let Some(post_date) = date_and_links.select(&selector).next() else {
73                return Err(parsing_error);
74            };
75            let Some(post_date) = post_date.text().last() else {
76                return Err(parsing_error);
77            };
78            let post_date = post_date.trim().to_owned();
79
80            let selector =
81                scraper::Selector::parse(r#"td.postbody"#).expect("BUG: illegal selector");
82            let Some(post_body) = author_and_body.select(&selector).next() else {
83                return Err(parsing_error);
84            };
85            let post_body = post_body.inner_html();
86
87            posts.push(Post {
88                id,
89                index,
90                author_username,
91                author_registration_date,
92                post_date,
93                post_body,
94            });
95        }
96
97        Ok(posts)
98    }
99}