something_awful/
post_list.rs1use crate::Error;
2
3#[derive(Debug)]
5pub struct Post {
6 pub id: String,
7 pub index: i64,
8 pub author_username: String,
9 pub author_registration_date: String,
10 pub post_date: String,
11 pub post_body: String,
12}
13
14impl Post {
15 pub fn parse_list(document: &str) -> Result<Vec<Post>, Error> {
17 let mut posts = Vec::new();
18 let document = scraper::Html::parse_document(document);
19 let selector = scraper::Selector::parse(r#"table.post"#).expect("BUG: illegal selector");
20
21 for post in document.select(&selector) {
22 let parsing_error = Error::PostParsingError(post.inner_html());
23 let selector =
24 scraper::Selector::parse(r#"table.post>tbody>tr"#).expect("BUG: illegal selector");
25 let mut post_body = post.select(&selector);
26
27 let Some(author_and_body) = post_body.next() else {
28 return Err(parsing_error);
29 };
30
31 let Some(date_and_links) = post_body.next() else {
32 return Err(parsing_error);
33 };
34
35 let Some(id) = post.value().attr("id") else {
36 return Err(parsing_error);
37 };
38
39 if !id.starts_with("post") {
40 return Err(parsing_error);
41 }
42 let id = id["post".len()..].to_owned();
43
44 let Some(index) = post.value().attr("data-idx") else {
45 return Err(parsing_error);
46 };
47 let Ok(index) = index.parse() else {
48 return Err(parsing_error);
49 };
50
51 let selector =
52 scraper::Selector::parse(r#"dl.userinfo>dt"#).expect("BUG: illegal selector");
53 let Some(author_username) = author_and_body.select(&selector).next() else {
54 return Err(parsing_error);
55 };
56
57 let mut author_username = author_username.text();
58 let Some(author_username) = author_username.next() else {
59 return Err(parsing_error);
60 };
61 let author_username = author_username.to_owned();
62
63 let selector = scraper::Selector::parse(r#"dl.userinfo>dd.registered"#)
64 .expect("BUG: illegal selector");
65 let Some(author_registration_date) = author_and_body.select(&selector).next() else {
66 return Err(parsing_error);
67 };
68 let author_registration_date = author_registration_date.inner_html();
69
70 let selector =
71 scraper::Selector::parse(r#"tr>td.postdate"#).expect("BUG: illegal selector");
72 let Some(post_date) = date_and_links.select(&selector).next() else {
73 return Err(parsing_error);
74 };
75 let Some(post_date) = post_date.text().last() else {
76 return Err(parsing_error);
77 };
78 let post_date = post_date.trim().to_owned();
79
80 let selector =
81 scraper::Selector::parse(r#"td.postbody"#).expect("BUG: illegal selector");
82 let Some(post_body) = author_and_body.select(&selector).next() else {
83 return Err(parsing_error);
84 };
85 let post_body = post_body.inner_html();
86
87 posts.push(Post {
88 id,
89 index,
90 author_username,
91 author_registration_date,
92 post_date,
93 post_body,
94 });
95 }
96
97 Ok(posts)
98 }
99}