#![allow(clippy::field_reassign_with_default)]
use scraper::{ElementRef, Html, Selector};
use types::{OgType, WebData};
use url::Url;
pub mod types;
pub use types as ty;
#[cfg(feature = "driver")]
pub mod driver;
#[cfg(feature = "driver")]
pub use driver as dr;
pub async fn fetch(url: &str) -> Result<WebData, String> {
let document = Html::parse_document(
&reqwest::get(url)
.await
.map_err(|e| format!("Failed to fetch url: {:?}", e))?
.text()
.await
.map_err(|e| format!("Failed to read response: {:?}", e))?,
);
let find = |id: &str| {
document
.select(
&Selector::parse(id).unwrap_or_else(|_| panic!("Failed to build selector: {id}")),
)
.collect::<Vec<ElementRef>>()
};
let mut data = WebData::default();
data.title = find("title").first().unwrap().text().collect();
data.description = find("meta[property=\"og:description\"]")
.first()
.map(|e| e.value().attr("content").unwrap().to_string());
data.r#type = find("meta[property=\"og:type\"]")
.first()
.map(|e| OgType::from_meta(e.value().attr("content").unwrap()))
.unwrap_or_default();
data.image = find("meta[property=\"og:image\"]")
.first()
.map(|e| resolve_url(e.value().attr("content").unwrap(), url));
data.author = find("meta[property$=\":author\"]")
.iter()
.map(|e| e.value().attr("content").unwrap().to_string())
.collect();
data.colour = find("meta[name=\"theme-color\"]")
.first()
.map(|e| e.value().attr("content").unwrap().to_string());
Ok(data)
}
pub fn resolve_url(url: &str, base: &str) -> String {
if url.starts_with('/') || url.starts_with("./") {
let base = Url::parse(base).unwrap().origin().unicode_serialization();
return Url::parse(&base).unwrap().join(url).unwrap().to_string();
}
url.to_string()
}
#[cfg(test)]
pub mod test {
use crate::fetch;
#[tokio::test]
async fn a() {
let url = "https://reneweconomy.com.au/market-operator-ticks-off-one-of-major-challenges-of-meeting-100-pct-renewables/";
println!("{:?}", fetch(url).await.unwrap());
}
#[tokio::test]
async fn b() {
let url = "https://oilprice.com/Latest-Energy-News/World-News/US-Natural-Gas-Prices-Tumble-10-on-Mild-Weather.html";
println!("{:?}", fetch(url).await.unwrap());
}
}