web2llm 0.4.0

Fetch web pages and convert to clean Markdown for LLM pipelines
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
use url::Url;

use crate::error::Result;

/// Fetches the raw HTML content of a page at the given URL using the provided client.
///
/// Uses a static HTTP request — no JavaScript execution. Returns the full HTML
/// body as a string for downstream parsing into `PageElements`.
pub(crate) async fn get_html(url: &Url, client: &reqwest::Client) -> Result<String> {
    let response = client.get(url.as_str()).send().await?;
    let response = response.error_for_status()?;
    let html = response.text().await?;
    Ok(html)
}