herta/extractor/
character.rs

1use scraper::Selector;
2
3use super::{parse_html, parse_url};
4use crate::url::{canonicalize, get_original_image};
5
6pub struct Character {
7    pub id: usize,
8    pub name: String,
9    pub link: String,
10    pub rarity: String,
11    pub rarity_image: String,
12    pub path: String,
13    pub path_image: String,
14    pub ctype: String,
15    pub ctype_image: String,
16}
17
18pub fn index_characters(html: String) -> Vec<Character> {
19    let html = parse_html(&html);
20    let selector = Selector::parse("table.article-table>tbody").unwrap();
21    let row_selector = Selector::parse("tr").unwrap();
22    let name_selector = Selector::parse("td>a").unwrap();
23    let rarity_selector = Selector::parse("td>img").unwrap();
24    let path_selector = Selector::parse("td>span>a").unwrap();
25    let path_image_selector = Selector::parse("img").unwrap();
26    let ctype_selector = Selector::parse("td>span>span>b").unwrap();
27    let ctype_image_selector = Selector::parse("td img").unwrap();
28
29    let mut res = vec![];
30    let table = html.select(&selector).next().unwrap();
31    for (indx, entry) in table.select(&row_selector).skip(1).into_iter().enumerate() {
32        let link = entry.select(&name_selector).next().unwrap().value();
33        let name = link.attr("title").unwrap();
34        let link = canonicalize(link.attr("href").unwrap());
35
36        // we skip the trailblazer because
37        // its an 'adaptive' character. there
38        // will be a special implementation
39        // for Steele and Caelus
40        if name == "Trailblazer" {
41            continue;
42        }
43
44        let rarity = entry.select(&rarity_selector).next().unwrap().value();
45        let rarity_image = rarity.attr("data-src").unwrap();
46        let rarity = rarity.attr("alt").unwrap();
47
48        let path = entry.select(&path_selector).next().unwrap();
49        let path_image = path
50            .select(&path_image_selector)
51            .next()
52            .unwrap()
53            .value()
54            .attr("data-src")
55            .unwrap();
56        let path = path.value().attr("title").unwrap();
57
58        let ctype = entry.select(&ctype_selector).next().unwrap();
59        let ctype_image = entry
60            .select(&ctype_image_selector)
61            .last()
62            .unwrap()
63            .value()
64            .attr("data-src")
65            .unwrap();
66        let ctype = ctype.inner_html();
67
68        res.push(Character {
69            id: indx,
70            link: link.to_string(),
71            name: name.to_string(),
72            rarity: rarity.to_string(),
73            rarity_image: get_original_image(&parse_url(rarity_image))
74                .unwrap()
75                .to_string(),
76            path: path.to_string(),
77            path_image: path_image.to_string(),
78            ctype: ctype.to_string(),
79            ctype_image: get_original_image(&parse_url(ctype_image))
80                .unwrap()
81                .to_string(),
82        })
83    }
84
85    res
86}
87
88pub fn get_character_art(html: String) -> Option<(String, String)> {
89    let html = parse_html(&html);
90    let portrait_selector = Selector::parse("img[alt=Portrait]").ok()?;
91    let splash_selector = Selector::parse("img[alt=\"Splash Art\"]").ok()?;
92
93    let portrait = get_original_image(&parse_url(
94        html.select(&portrait_selector)
95            .next()?
96            .value()
97            .attr("data-src")?,
98    ))
99    .unwrap()
100    .to_string();
101
102    let splash = get_original_image(&parse_url(
103        html.select(&splash_selector)
104            .next()?
105            .value()
106            .attr("data-src")?,
107    ))
108    .unwrap()
109    .to_string();
110
111    Some((portrait, splash))
112}
113
114pub fn get_voice_overs(html: String) -> Vec<(String, String)> {
115    let html = parse_html(&html);
116    let voice_over_entry = Selector::parse("table.wikitable>tbody>tr").unwrap();
117    let vo_type = Selector::parse("th>div").unwrap();
118    let vo_audio = Selector::parse("td>span>a").unwrap();
119
120    let mut res = vec![];
121    for voice_over in html.select(&voice_over_entry) {
122        let audio = voice_over.select(&vo_audio).next();
123        let audio_type = voice_over.select(&vo_type).next();
124
125        if audio.is_none() || audio_type.is_none() {
126            continue;
127        }
128
129        let audio = audio.unwrap();
130
131        if audio
132            .value()
133            .classes()
134            .collect::<Vec<_>>()
135            .contains(&"no-audio")
136            || audio.value().attr("href").unwrap().starts_with("/")
137        {
138            continue;
139        }
140
141        let audio_link = audio.value().attr("href").unwrap().to_string();
142        let audio_type = audio_type.unwrap().value().id().unwrap().to_string();
143        // let audio_type = audio_type.value().id().unwrap().to_string();
144
145        res.push((audio_type, audio_link));
146    }
147
148    res
149}