use std::sync::Arc;
use async_trait::async_trait;
use scraper::Selector;
use crate::fetcher::PageFetcher;
use crate::{Engine, EngineConfig, Result, SearchError, SearchQuery, SearchResult};
pub fn selector(css: &str) -> Result<Selector> {
Selector::parse(css)
.map_err(|e| SearchError::Parse(format!("Invalid CSS selector '{}': {:?}", css, e)))
}
pub trait HtmlParser: Send + Sync {
fn default_config() -> EngineConfig;
fn build_url(&self, query: &SearchQuery) -> String;
fn validate(&self, _html: &str) -> Result<()> {
Ok(())
}
fn parse(&self, html: &str) -> Result<Vec<SearchResult>>;
}
pub struct HtmlEngine<P: HtmlParser> {
config: EngineConfig,
fetcher: Arc<dyn PageFetcher>,
pub(crate) parser: P,
}
impl<P: HtmlParser> HtmlEngine<P> {
pub fn with_fetcher(parser: P, fetcher: Arc<dyn PageFetcher>) -> Self {
Self {
config: P::default_config(),
fetcher,
parser,
}
}
pub fn with_config(mut self, config: EngineConfig) -> Self {
self.config = config;
self
}
}
#[async_trait]
impl<P: HtmlParser> Engine for HtmlEngine<P> {
fn config(&self) -> &EngineConfig {
&self.config
}
async fn search(&self, query: &SearchQuery) -> Result<Vec<SearchResult>> {
let url = self.parser.build_url(query);
let html = self.fetcher.fetch(&url).await?;
self.parser.validate(&html)?;
self.parser.parse(&html)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_selector_valid() {
let sel = selector("div.g");
assert!(sel.is_ok());
}
#[test]
fn test_selector_complex() {
let sel = selector("div.snippet[data-type=\"web\"]");
assert!(sel.is_ok());
}
#[test]
fn test_selector_invalid() {
let sel = selector("[[[invalid");
assert!(sel.is_err());
let err = sel.unwrap_err().to_string();
assert!(err.contains("Invalid CSS selector"));
}
}