herta/extractor/
character.rs1use scraper::Selector;
2
3use super::{parse_html, parse_url};
4use crate::url::{canonicalize, get_original_image};
5
6pub struct Character {
7 pub id: usize,
8 pub name: String,
9 pub link: String,
10 pub rarity: String,
11 pub rarity_image: String,
12 pub path: String,
13 pub path_image: String,
14 pub ctype: String,
15 pub ctype_image: String,
16}
17
18pub fn index_characters(html: String) -> Vec<Character> {
19 let html = parse_html(&html);
20 let selector = Selector::parse("table.article-table>tbody").unwrap();
21 let row_selector = Selector::parse("tr").unwrap();
22 let name_selector = Selector::parse("td>a").unwrap();
23 let rarity_selector = Selector::parse("td>img").unwrap();
24 let path_selector = Selector::parse("td>span>a").unwrap();
25 let path_image_selector = Selector::parse("img").unwrap();
26 let ctype_selector = Selector::parse("td>span>span>b").unwrap();
27 let ctype_image_selector = Selector::parse("td img").unwrap();
28
29 let mut res = vec![];
30 let table = html.select(&selector).next().unwrap();
31 for (indx, entry) in table.select(&row_selector).skip(1).into_iter().enumerate() {
32 let link = entry.select(&name_selector).next().unwrap().value();
33 let name = link.attr("title").unwrap();
34 let link = canonicalize(link.attr("href").unwrap());
35
36 if name == "Trailblazer" {
41 continue;
42 }
43
44 let rarity = entry.select(&rarity_selector).next().unwrap().value();
45 let rarity_image = rarity.attr("data-src").unwrap();
46 let rarity = rarity.attr("alt").unwrap();
47
48 let path = entry.select(&path_selector).next().unwrap();
49 let path_image = path
50 .select(&path_image_selector)
51 .next()
52 .unwrap()
53 .value()
54 .attr("data-src")
55 .unwrap();
56 let path = path.value().attr("title").unwrap();
57
58 let ctype = entry.select(&ctype_selector).next().unwrap();
59 let ctype_image = entry
60 .select(&ctype_image_selector)
61 .last()
62 .unwrap()
63 .value()
64 .attr("data-src")
65 .unwrap();
66 let ctype = ctype.inner_html();
67
68 res.push(Character {
69 id: indx,
70 link: link.to_string(),
71 name: name.to_string(),
72 rarity: rarity.to_string(),
73 rarity_image: get_original_image(&parse_url(rarity_image))
74 .unwrap()
75 .to_string(),
76 path: path.to_string(),
77 path_image: path_image.to_string(),
78 ctype: ctype.to_string(),
79 ctype_image: get_original_image(&parse_url(ctype_image))
80 .unwrap()
81 .to_string(),
82 })
83 }
84
85 res
86}
87
88pub fn get_character_art(html: String) -> Option<(String, String)> {
89 let html = parse_html(&html);
90 let portrait_selector = Selector::parse("img[alt=Portrait]").ok()?;
91 let splash_selector = Selector::parse("img[alt=\"Splash Art\"]").ok()?;
92
93 let portrait = get_original_image(&parse_url(
94 html.select(&portrait_selector)
95 .next()?
96 .value()
97 .attr("data-src")?,
98 ))
99 .unwrap()
100 .to_string();
101
102 let splash = get_original_image(&parse_url(
103 html.select(&splash_selector)
104 .next()?
105 .value()
106 .attr("data-src")?,
107 ))
108 .unwrap()
109 .to_string();
110
111 Some((portrait, splash))
112}
113
114pub fn get_voice_overs(html: String) -> Vec<(String, String)> {
115 let html = parse_html(&html);
116 let voice_over_entry = Selector::parse("table.wikitable>tbody>tr").unwrap();
117 let vo_type = Selector::parse("th>div").unwrap();
118 let vo_audio = Selector::parse("td>span>a").unwrap();
119
120 let mut res = vec![];
121 for voice_over in html.select(&voice_over_entry) {
122 let audio = voice_over.select(&vo_audio).next();
123 let audio_type = voice_over.select(&vo_type).next();
124
125 if audio.is_none() || audio_type.is_none() {
126 continue;
127 }
128
129 let audio = audio.unwrap();
130
131 if audio
132 .value()
133 .classes()
134 .collect::<Vec<_>>()
135 .contains(&"no-audio")
136 || audio.value().attr("href").unwrap().starts_with("/")
137 {
138 continue;
139 }
140
141 let audio_link = audio.value().attr("href").unwrap().to_string();
142 let audio_type = audio_type.unwrap().value().id().unwrap().to_string();
143 res.push((audio_type, audio_link));
146 }
147
148 res
149}