something_awful/
thread_list.rs

1use crate::Error;
2
3/// Represents a single thread within a list of threads.
4#[derive(Debug)]
5pub struct Thread {
6    pub id: String,
7    pub title: String,
8    pub author_username: String,
9    pub replies: i64,
10    pub views: i64,
11    pub last_post_date: String,
12    pub last_post_username: String,
13
14    /// Zero if there are no unread posts in this thread. Otherwise, the
15    /// number of unread posts.
16    pub unread: i64,
17}
18
19impl Thread {
20    /// Parses all threads on a list of threads within a page.
21    pub fn parse_list(document: &str) -> Result<Vec<Thread>, Error> {
22        let mut threads = Vec::new();
23        let document = scraper::Html::parse_document(document);
24        let selector =
25            scraper::Selector::parse(r#"tbody>tr.thread"#).expect("BUG: illegal selector");
26
27        for thread in document.select(&selector) {
28            let parsing_error = Error::ThreadParsingError(thread.inner_html());
29            let Some(id) = thread.value().attr("id") else {
30                    return Err(parsing_error);
31                };
32            if !id.starts_with("thread") {
33                return Err(parsing_error);
34            }
35            let thread_id = id["thread".len()..].to_owned();
36            let selector =
37                scraper::Selector::parse(r#"a.thread_title"#).expect("BUG: illegal selector");
38            let Some(title) = thread.select(&selector).next() else {
39                    return Err(parsing_error);
40                };
41            let title = title.inner_html();
42
43            let selector =
44                scraper::Selector::parse(r#"td.author>a"#).expect("BUG: illegal selector");
45            let Some(author) = thread.select(&selector).next() else {
46                    return Err(parsing_error);
47                };
48            let author_username = author.inner_html();
49
50            let selector =
51                scraper::Selector::parse(r#"td.replies>a"#).expect("BUG: illegal selector");
52            let Some(replies) = thread.select(&selector).next() else {
53                    return Err(parsing_error);
54                };
55            let replies = replies.inner_html();
56            let Ok(replies) = replies.parse() else {
57                    return Err(parsing_error);
58                };
59
60            let selector = scraper::Selector::parse(r#"td.views"#).expect("BUG: illegal selector");
61            let Some(views) = thread.select(&selector).next() else {
62                    return Err(parsing_error);
63                };
64            let views = views.inner_html();
65            let Ok(views) = views.parse() else {
66                    return Err(parsing_error);
67                };
68
69            let selector =
70                scraper::Selector::parse(r#"td.lastpost>div.date"#).expect("BUG: illegal selector");
71            let Some(last_post_date) = thread.select(&selector).next() else {
72                    return Err(parsing_error);
73                };
74            let last_post_date = last_post_date.inner_html();
75
76            let selector =
77                scraper::Selector::parse(r#"td.lastpost>a.author"#).expect("BUG: illegal selector");
78            let Some(last_post_username) = thread.select(&selector).next() else {
79                    return Err(parsing_error);
80                };
81            let last_post_username = last_post_username.inner_html();
82
83            let selector =
84                scraper::Selector::parse(r#"td.title>div.title_inner>div.lastseen>a.count>b"#)
85                    .expect("BUG: illegal selector");
86            let unread = thread
87                .select(&selector)
88                .next()
89                .map(|x| x.inner_html())
90                .unwrap_or(String::from("0"));
91            let Ok(unread) = unread.parse() else {
92                    return Err(parsing_error);
93                };
94
95            threads.push(Thread {
96                id: thread_id,
97                title,
98                author_username,
99                replies,
100                views,
101                last_post_date,
102                last_post_username,
103                unread,
104            });
105        }
106        Ok(threads)
107    }
108}