1use crate::models::PageLoadResult;
2use scraper::Html;
3use std::time::Instant;
4
5pub struct HtmlParser {
7 client: reqwest::Client,
8}
9
10impl HtmlParser {
11 pub fn new() -> Self {
13 let client = reqwest::Client::builder()
14 .user_agent("AXIS-CORE/1.0 (https://github.com/ABHIRAM-CREATOR06/Acess1)")
15 .timeout(std::time::Duration::from_secs(30))
16 .build()
17 .expect("Failed to create HTTP client");
18
19 Self { client }
20 }
21
22 pub async fn load_from_url(&self, url: &str) -> Result<PageLoadResult, Box<dyn std::error::Error>> {
24 let start_time = Instant::now();
25
26 let response = self.client.get(url).send().await?;
28 let status = response.status();
29
30 if !status.is_success() {
31 return Err(format!("HTTP request failed with status: {}", status).into());
32 }
33
34 let has_caching_headers = response.headers()
36 .get("cache-control")
37 .or_else(|| response.headers().get("expires"))
38 .is_some();
39
40 let content = response.text().await?;
42 let load_time = start_time.elapsed().as_secs_f64();
43
44 let document = Html::parse_document(&content);
46
47 let page_size = content.len() as u64;
49 let request_count = 1; let is_compressed = content.contains("gzip") || content.contains("deflate");
53
54 Ok(PageLoadResult::new(
55 document,
56 load_time,
57 request_count,
58 page_size,
59 is_compressed,
60 has_caching_headers,
61 Some(url),
62 ))
63 }
64
65 pub fn parse_html(&self, html: &str, base_url: Option<&str>) -> PageLoadResult {
67 let document = Html::parse_document(html);
68 let page_size = html.len() as u64;
69
70 PageLoadResult::new(
71 document,
72 0.0, 1, page_size,
75 false, false, base_url,
78 )
79 }
80}
81
82impl Default for HtmlParser {
83 fn default() -> Self {
84 Self::new()
85 }
86}