Skip to main content

axis_core/
parser.rs

1use crate::models::PageLoadResult;
2use scraper::Html;
3use std::time::Instant;
4
5/// HTML parser for loading and parsing web content
6pub struct HtmlParser {
7    client: reqwest::Client,
8}
9
10impl HtmlParser {
11    /// Create a new HTML parser
12    pub fn new() -> Self {
13        let client = reqwest::Client::builder()
14            .user_agent("AXIS-CORE/1.0 (https://github.com/ABHIRAM-CREATOR06/Acess1)")
15            .timeout(std::time::Duration::from_secs(30))
16            .build()
17            .expect("Failed to create HTTP client");
18
19        Self { client }
20    }
21
22    /// Load HTML content from a URL
23    pub async fn load_from_url(&self, url: &str) -> Result<PageLoadResult, Box<dyn std::error::Error>> {
24        let start_time = Instant::now();
25
26        // Make the HTTP request
27        let response = self.client.get(url).send().await?;
28        let status = response.status();
29
30        if !status.is_success() {
31            return Err(format!("HTTP request failed with status: {}", status).into());
32        }
33
34        // Check for caching headers before consuming the response
35        let has_caching_headers = response.headers()
36            .get("cache-control")
37            .or_else(|| response.headers().get("expires"))
38            .is_some();
39
40        // Get the content
41        let content = response.text().await?;
42        let load_time = start_time.elapsed().as_secs_f64();
43
44        // Parse HTML
45        let document = Html::parse_document(&content);
46
47        // Basic analysis (simplified compared to full implementation)
48        let page_size = content.len() as u64;
49        let request_count = 1; // Simplified - would need more complex analysis for full count
50
51        // Check for compression (basic check)
52        let is_compressed = content.contains("gzip") || content.contains("deflate");
53
54        Ok(PageLoadResult::new(
55            document,
56            load_time,
57            request_count,
58            page_size,
59            is_compressed,
60            has_caching_headers,
61            Some(url),
62        ))
63    }
64
65    /// Parse HTML content directly
66    pub fn parse_html(&self, html: &str, base_url: Option<&str>) -> PageLoadResult {
67        let document = Html::parse_document(html);
68        let page_size = html.len() as u64;
69
70        PageLoadResult::new(
71            document,
72            0.0, // No load time for direct parsing
73            1,   // Single "request"
74            page_size,
75            false, // Assume not compressed
76            false, // Assume no caching headers
77            base_url,
78        )
79    }
80}
81
82impl Default for HtmlParser {
83    fn default() -> Self {
84        Self::new()
85    }
86}