thghosting_data_centers/
html.rs

1use std::{
2    net::{AddrParseError, Ipv4Addr},
3    str,
4};
5
6use http_api_client_endpoint::{
7    http::{Error as HttpError, Method},
8    Body, Endpoint, Request, Response,
9};
10use scraper::{Html, Selector};
11
12pub const URL: &str = "https://www.thghosting.com/network/data-centers/";
13
14use crate::{AvailableService, DataCenter};
15
16//
17//
18//
19#[derive(Debug, Clone)]
20pub struct HtmlEndpoint;
21
22impl Endpoint for HtmlEndpoint {
23    type RenderRequestError = HtmlEndpointError;
24
25    type ParseResponseOutput = Vec<DataCenter>;
26    type ParseResponseError = HtmlEndpointError;
27
28    fn render_request(&self) -> Result<Request<Body>, Self::RenderRequestError> {
29        let request = Request::builder()
30            .method(Method::GET)
31            .uri(URL)
32            .body(vec![])
33            .map_err(HtmlEndpointError::MakeRequestFailed)?;
34
35        Ok(request)
36    }
37
38    fn parse_response(
39        &self,
40        response: Response<Body>,
41    ) -> Result<Self::ParseResponseOutput, Self::ParseResponseError> {
42        let html =
43            str::from_utf8(response.body()).map_err(|_| HtmlEndpointError::ResponseBodyInvalid)?;
44
45        parse_html(html).map_err(HtmlEndpointError::ParseHtmlError)
46    }
47}
48
49#[derive(thiserror::Error, Debug)]
50pub enum HtmlEndpointError {
51    #[error("MakeRequestFailed {0}")]
52    MakeRequestFailed(HttpError),
53    #[error("ResponseBodyInvalid")]
54    ResponseBodyInvalid,
55    #[error("ParseHtmlError {0}")]
56    ParseHtmlError(ParseHtmlError),
57}
58
59//
60//
61//
62pub fn parse_html(html: impl AsRef<str>) -> Result<Vec<DataCenter>, ParseHtmlError> {
63    let document = Html::parse_document(html.as_ref());
64
65    let location_selector = Selector::parse("div.location").unwrap();
66
67    let mut data_centers = vec![];
68
69    for location_element in document.select(&location_selector) {
70        let id = location_element
71            .value()
72            .attr("id")
73            .ok_or(ParseHtmlError::IdMissing)?
74            .to_owned();
75
76        let city_selector = Selector::parse(".dc-city").unwrap();
77        let city = location_element
78            .select(&city_selector)
79            .next()
80            .ok_or(ParseHtmlError::CityMissing)?
81            .inner_html();
82
83        let mut available_services: Vec<AvailableService> = vec![];
84        let mut standard_bare_metal_bandwidth: Option<String> = None;
85        let mut ping: Option<Ipv4Addr> = None;
86        let mut test_download: Option<String> = None;
87
88        let tr_selector = Selector::parse("table tr").unwrap();
89        for tr_element in location_element.select(&tr_selector) {
90            let td_selector = Selector::parse("td").unwrap();
91            let mut td_element_iter = tr_element.select(&td_selector);
92            let head_element = td_element_iter
93                .next()
94                .ok_or(ParseHtmlError::AttrElementInvalid)?;
95            let _ = td_element_iter
96                .next()
97                .ok_or(ParseHtmlError::AttrElementInvalid)?;
98            let value_element = td_element_iter
99                .next()
100                .ok_or(ParseHtmlError::AttrElementInvalid)?;
101            if td_element_iter.next().is_some() {
102                return Err(ParseHtmlError::AttrElementInvalid);
103            }
104            match head_element.inner_html().as_str() {
105                "Available Services" => {
106                    let a_selector = Selector::parse("a").unwrap();
107                    for ele in value_element.select(&a_selector) {
108                        if let Some(title) = ele.value().attr("title") {
109                            match title {
110                                "Bare Metal Servers" => {
111                                    available_services.push(AvailableService::BareMetalServers)
112                                }
113                                "Virtual Servers" => {
114                                    available_services.push(AvailableService::VirtualServers)
115                                }
116                                "Private Cloud" => {
117                                    available_services.push(AvailableService::PrivateCloud)
118                                }
119                                _ => return Err(ParseHtmlError::AvailableServiceUnknown),
120                            }
121                        }
122                    }
123                }
124                "Available Networks" => {}
125                "Standard Bare Metal Bandwidth" => {
126                    let s = value_element.inner_html();
127                    match s.as_str() {
128                        "" => {}
129                        _ => {
130                            standard_bare_metal_bandwidth = Some(s);
131                        }
132                    }
133                }
134                "Ping/Trace Route" => {
135                    let s = value_element.inner_html();
136                    match s.as_str() {
137                        "-" | "" => {}
138                        _ => {
139                            let v = s
140                                .parse()
141                                .map_err(|err| ParseHtmlError::PingInvalid(s, err))?;
142                            ping = Some(v);
143                        }
144                    }
145                }
146                "Certifications" => {}
147                "Test Download" => {
148                    let s = value_element.inner_html();
149                    match s.as_str() {
150                        "" => {}
151                        _ => {
152                            let a_selector = Selector::parse("a").unwrap();
153                            if let Some(v) = value_element
154                                .select(&a_selector)
155                                .next()
156                                .and_then(|ele| ele.value().attr("href"))
157                                .map(ToOwned::to_owned)
158                            {
159                                test_download = Some(v);
160                            }
161                        }
162                    }
163                }
164                _ => {}
165            }
166        }
167
168        let url_selector = Selector::parse(".popover-container a").unwrap();
169        let url = location_element
170            .select(&url_selector)
171            .next()
172            .and_then(|ele| ele.value().attr("href"))
173            .map(ToOwned::to_owned);
174
175        data_centers.push(DataCenter {
176            id,
177            city,
178            available_services,
179            standard_bare_metal_bandwidth,
180            ping,
181            test_download,
182            url,
183        });
184    }
185
186    Ok(data_centers)
187}
188
189#[derive(thiserror::Error, Debug)]
190pub enum ParseHtmlError {
191    #[error("IdMissing")]
192    IdMissing,
193    #[error("CityMissing")]
194    CityMissing,
195    #[error("AttrElementInvalid")]
196    AttrElementInvalid,
197    #[error("AvailableServiceUnknown")]
198    AvailableServiceUnknown,
199    #[error("PingInvalid {0} {1}")]
200    PingInvalid(String, AddrParseError),
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206
207    use std::{env, fs, path::PathBuf};
208
209    #[test]
210    fn test_parse_html() {
211        let manifest_path = if let Ok(manifest_dir) = env::var("CARGO_MANIFEST_DIR") {
212            PathBuf::from(&manifest_dir)
213        } else {
214            PathBuf::new()
215        };
216
217        let html_path_1 = manifest_path
218            .join("thghosting-data-centers")
219            .join("tests")
220            .join("data-centers.html");
221        let html_path = if html_path_1.exists() {
222            html_path_1
223        } else {
224            manifest_path.join("tests").join("data-centers.html")
225        };
226        println!("html_path:{:?}", html_path);
227
228        let html = match fs::read_to_string(html_path) {
229            Ok(x) => x,
230            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return,
231            Err(err) => panic!("{}", err),
232        };
233
234        let data_centers = parse_html(html).unwrap();
235
236        println!("{:?}", data_centers);
237
238        let dc_london = data_centers.iter().find(|dc| dc.id == "london").unwrap();
239        assert_eq!(dc_london.city, "London");
240        assert_eq!(
241            dc_london.available_services,
242            vec![
243                AvailableService::BareMetalServers,
244                AvailableService::VirtualServers
245            ]
246        );
247        assert_eq!(
248            dc_london.standard_bare_metal_bandwidth,
249            Some("100TB".to_owned())
250        );
251        assert_eq!(dc_london.ping, Some("82.163.78.28".parse().unwrap()));
252        assert_eq!(
253            dc_london.test_download,
254            Some("http://82.163.78.28/speedtest.256mb".to_owned())
255        );
256        assert_eq!(
257            dc_london.url,
258            Some("https://info.ingenuitycloudservices.com/us/data-center/london".to_owned())
259        );
260    }
261}