thghosting_data_centers/
html.rs1use std::{
2 net::{AddrParseError, Ipv4Addr},
3 str,
4};
5
6use http_api_client_endpoint::{
7 http::{Error as HttpError, Method},
8 Body, Endpoint, Request, Response,
9};
10use scraper::{Html, Selector};
11
12pub const URL: &str = "https://www.thghosting.com/network/data-centers/";
13
14use crate::{AvailableService, DataCenter};
15
16#[derive(Debug, Clone)]
20pub struct HtmlEndpoint;
21
22impl Endpoint for HtmlEndpoint {
23 type RenderRequestError = HtmlEndpointError;
24
25 type ParseResponseOutput = Vec<DataCenter>;
26 type ParseResponseError = HtmlEndpointError;
27
28 fn render_request(&self) -> Result<Request<Body>, Self::RenderRequestError> {
29 let request = Request::builder()
30 .method(Method::GET)
31 .uri(URL)
32 .body(vec![])
33 .map_err(HtmlEndpointError::MakeRequestFailed)?;
34
35 Ok(request)
36 }
37
38 fn parse_response(
39 &self,
40 response: Response<Body>,
41 ) -> Result<Self::ParseResponseOutput, Self::ParseResponseError> {
42 let html =
43 str::from_utf8(response.body()).map_err(|_| HtmlEndpointError::ResponseBodyInvalid)?;
44
45 parse_html(html).map_err(HtmlEndpointError::ParseHtmlError)
46 }
47}
48
49#[derive(thiserror::Error, Debug)]
50pub enum HtmlEndpointError {
51 #[error("MakeRequestFailed {0}")]
52 MakeRequestFailed(HttpError),
53 #[error("ResponseBodyInvalid")]
54 ResponseBodyInvalid,
55 #[error("ParseHtmlError {0}")]
56 ParseHtmlError(ParseHtmlError),
57}
58
59pub fn parse_html(html: impl AsRef<str>) -> Result<Vec<DataCenter>, ParseHtmlError> {
63 let document = Html::parse_document(html.as_ref());
64
65 let location_selector = Selector::parse("div.location").unwrap();
66
67 let mut data_centers = vec![];
68
69 for location_element in document.select(&location_selector) {
70 let id = location_element
71 .value()
72 .attr("id")
73 .ok_or(ParseHtmlError::IdMissing)?
74 .to_owned();
75
76 let city_selector = Selector::parse(".dc-city").unwrap();
77 let city = location_element
78 .select(&city_selector)
79 .next()
80 .ok_or(ParseHtmlError::CityMissing)?
81 .inner_html();
82
83 let mut available_services: Vec<AvailableService> = vec![];
84 let mut standard_bare_metal_bandwidth: Option<String> = None;
85 let mut ping: Option<Ipv4Addr> = None;
86 let mut test_download: Option<String> = None;
87
88 let tr_selector = Selector::parse("table tr").unwrap();
89 for tr_element in location_element.select(&tr_selector) {
90 let td_selector = Selector::parse("td").unwrap();
91 let mut td_element_iter = tr_element.select(&td_selector);
92 let head_element = td_element_iter
93 .next()
94 .ok_or(ParseHtmlError::AttrElementInvalid)?;
95 let _ = td_element_iter
96 .next()
97 .ok_or(ParseHtmlError::AttrElementInvalid)?;
98 let value_element = td_element_iter
99 .next()
100 .ok_or(ParseHtmlError::AttrElementInvalid)?;
101 if td_element_iter.next().is_some() {
102 return Err(ParseHtmlError::AttrElementInvalid);
103 }
104 match head_element.inner_html().as_str() {
105 "Available Services" => {
106 let a_selector = Selector::parse("a").unwrap();
107 for ele in value_element.select(&a_selector) {
108 if let Some(title) = ele.value().attr("title") {
109 match title {
110 "Bare Metal Servers" => {
111 available_services.push(AvailableService::BareMetalServers)
112 }
113 "Virtual Servers" => {
114 available_services.push(AvailableService::VirtualServers)
115 }
116 "Private Cloud" => {
117 available_services.push(AvailableService::PrivateCloud)
118 }
119 _ => return Err(ParseHtmlError::AvailableServiceUnknown),
120 }
121 }
122 }
123 }
124 "Available Networks" => {}
125 "Standard Bare Metal Bandwidth" => {
126 let s = value_element.inner_html();
127 match s.as_str() {
128 "" => {}
129 _ => {
130 standard_bare_metal_bandwidth = Some(s);
131 }
132 }
133 }
134 "Ping/Trace Route" => {
135 let s = value_element.inner_html();
136 match s.as_str() {
137 "-" | "" => {}
138 _ => {
139 let v = s
140 .parse()
141 .map_err(|err| ParseHtmlError::PingInvalid(s, err))?;
142 ping = Some(v);
143 }
144 }
145 }
146 "Certifications" => {}
147 "Test Download" => {
148 let s = value_element.inner_html();
149 match s.as_str() {
150 "" => {}
151 _ => {
152 let a_selector = Selector::parse("a").unwrap();
153 if let Some(v) = value_element
154 .select(&a_selector)
155 .next()
156 .and_then(|ele| ele.value().attr("href"))
157 .map(ToOwned::to_owned)
158 {
159 test_download = Some(v);
160 }
161 }
162 }
163 }
164 _ => {}
165 }
166 }
167
168 let url_selector = Selector::parse(".popover-container a").unwrap();
169 let url = location_element
170 .select(&url_selector)
171 .next()
172 .and_then(|ele| ele.value().attr("href"))
173 .map(ToOwned::to_owned);
174
175 data_centers.push(DataCenter {
176 id,
177 city,
178 available_services,
179 standard_bare_metal_bandwidth,
180 ping,
181 test_download,
182 url,
183 });
184 }
185
186 Ok(data_centers)
187}
188
189#[derive(thiserror::Error, Debug)]
190pub enum ParseHtmlError {
191 #[error("IdMissing")]
192 IdMissing,
193 #[error("CityMissing")]
194 CityMissing,
195 #[error("AttrElementInvalid")]
196 AttrElementInvalid,
197 #[error("AvailableServiceUnknown")]
198 AvailableServiceUnknown,
199 #[error("PingInvalid {0} {1}")]
200 PingInvalid(String, AddrParseError),
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206
207 use std::{env, fs, path::PathBuf};
208
209 #[test]
210 fn test_parse_html() {
211 let manifest_path = if let Ok(manifest_dir) = env::var("CARGO_MANIFEST_DIR") {
212 PathBuf::from(&manifest_dir)
213 } else {
214 PathBuf::new()
215 };
216
217 let html_path_1 = manifest_path
218 .join("thghosting-data-centers")
219 .join("tests")
220 .join("data-centers.html");
221 let html_path = if html_path_1.exists() {
222 html_path_1
223 } else {
224 manifest_path.join("tests").join("data-centers.html")
225 };
226 println!("html_path:{:?}", html_path);
227
228 let html = match fs::read_to_string(html_path) {
229 Ok(x) => x,
230 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return,
231 Err(err) => panic!("{}", err),
232 };
233
234 let data_centers = parse_html(html).unwrap();
235
236 println!("{:?}", data_centers);
237
238 let dc_london = data_centers.iter().find(|dc| dc.id == "london").unwrap();
239 assert_eq!(dc_london.city, "London");
240 assert_eq!(
241 dc_london.available_services,
242 vec![
243 AvailableService::BareMetalServers,
244 AvailableService::VirtualServers
245 ]
246 );
247 assert_eq!(
248 dc_london.standard_bare_metal_bandwidth,
249 Some("100TB".to_owned())
250 );
251 assert_eq!(dc_london.ping, Some("82.163.78.28".parse().unwrap()));
252 assert_eq!(
253 dc_london.test_download,
254 Some("http://82.163.78.28/speedtest.256mb".to_owned())
255 );
256 assert_eq!(
257 dc_london.url,
258 Some("https://info.ingenuitycloudservices.com/us/data-center/london".to_owned())
259 );
260 }
261}