use crate::models::PageLoadResult;
use scraper::Html;
use std::time::Instant;
pub struct HtmlParser {
client: reqwest::Client,
}
impl HtmlParser {
pub fn new() -> Self {
let client = reqwest::Client::builder()
.user_agent("AXIS-CORE/1.0 (https://github.com/ABHIRAM-CREATOR06/Acess1)")
.timeout(std::time::Duration::from_secs(30))
.build()
.expect("Failed to create HTTP client");
Self { client }
}
pub async fn load_from_url(&self, url: &str) -> Result<PageLoadResult, Box<dyn std::error::Error>> {
let start_time = Instant::now();
let response = self.client.get(url).send().await?;
let status = response.status();
if !status.is_success() {
return Err(format!("HTTP request failed with status: {}", status).into());
}
let has_caching_headers = response.headers()
.get("cache-control")
.or_else(|| response.headers().get("expires"))
.is_some();
let content = response.text().await?;
let load_time = start_time.elapsed().as_secs_f64();
let document = Html::parse_document(&content);
let page_size = content.len() as u64;
let request_count = 1;
let is_compressed = content.contains("gzip") || content.contains("deflate");
Ok(PageLoadResult::new(
document,
load_time,
request_count,
page_size,
is_compressed,
has_caching_headers,
Some(url),
))
}
pub fn parse_html(&self, html: &str, base_url: Option<&str>) -> PageLoadResult {
let document = Html::parse_document(html);
let page_size = html.len() as u64;
PageLoadResult::new(
document,
0.0, 1, page_size,
false, false, base_url,
)
}
}
impl Default for HtmlParser {
fn default() -> Self {
Self::new()
}
}