use crate::engine::CrawlEngine;
use crate::error::CrawlError;
use crate::types::{CrawlConfig, CrawlResult, MapResult, ScrapeResult};
use serde::{Deserialize, Serialize};
#[derive(Clone)]
pub struct CrawlEngineHandle {
inner: CrawlEngine,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct BatchScrapeResult {
pub url: String,
pub result: Option<ScrapeResult>,
pub error: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct BatchCrawlResult {
pub url: String,
pub result: Option<CrawlResult>,
pub error: Option<String>,
}
pub fn create_engine(config: Option<CrawlConfig>) -> Result<CrawlEngineHandle, CrawlError> {
let mut builder = CrawlEngine::builder();
if let Some(config) = config {
builder = builder.config(config);
}
let engine = builder.build()?;
Ok(CrawlEngineHandle { inner: engine })
}
pub async fn scrape(engine: &CrawlEngineHandle, url: &str) -> Result<ScrapeResult, CrawlError> {
engine.inner.scrape(url).await
}
pub async fn crawl(engine: &CrawlEngineHandle, url: &str) -> Result<CrawlResult, CrawlError> {
engine.inner.crawl(url).await
}
pub async fn map_urls(engine: &CrawlEngineHandle, url: &str) -> Result<MapResult, CrawlError> {
engine.inner.map(url).await
}
pub async fn batch_scrape(engine: &CrawlEngineHandle, urls: Vec<String>) -> Vec<BatchScrapeResult> {
let url_refs: Vec<&str> = urls.iter().map(String::as_str).collect();
let results = engine.inner.batch_scrape(&url_refs).await;
results
.into_iter()
.map(|(url, result)| match result {
Ok(r) => BatchScrapeResult {
url,
result: Some(r),
error: None,
},
Err(e) => BatchScrapeResult {
url,
result: None,
error: Some(e.to_string()),
},
})
.collect()
}
pub async fn batch_crawl(engine: &CrawlEngineHandle, urls: Vec<String>) -> Vec<BatchCrawlResult> {
let url_refs: Vec<&str> = urls.iter().map(String::as_str).collect();
let results = engine.inner.batch_crawl(&url_refs).await;
results
.into_iter()
.map(|(url, result)| match result {
Ok(r) => BatchCrawlResult {
url,
result: Some(r),
error: None,
},
Err(e) => BatchCrawlResult {
url,
result: None,
error: Some(e.to_string()),
},
})
.collect()
}