use dashmap::DashMap;
use serde_json::Value;
use std::sync::Arc;
use std::time::Duration;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FetchStrategy {
Auto,
Fast,
}
pub const DEFAULT_FETCH_TIMEOUT: Duration = Duration::from_secs(30);
pub const DEFAULT_CONCURRENCY: usize = 1000;
#[derive(Debug, Clone)]
pub struct Context {
pub fetch_strategy: FetchStrategy,
pub fetch_timeout: Duration,
pub concurrency: usize,
pub allow_domains: Option<Vec<String>>,
pub block_domains: Option<Vec<String>>,
}
impl Context {
pub fn auto() -> Self {
Self {
fetch_strategy: FetchStrategy::Auto,
fetch_timeout: DEFAULT_FETCH_TIMEOUT,
concurrency: DEFAULT_CONCURRENCY,
allow_domains: None,
block_domains: None,
}
}
pub fn fast() -> Self {
Self {
fetch_strategy: FetchStrategy::Fast,
..Self::auto()
}
}
pub fn with_fetch_timeout(mut self, timeout: Duration) -> Self {
self.fetch_timeout = timeout;
self
}
pub fn with_concurrency(mut self, concurrency: usize) -> Self {
self.concurrency = concurrency;
self
}
pub fn with_allow_domains(mut self, domains: &[&str]) -> Self {
self.allow_domains = Some(domains.iter().map(|s| s.to_string()).collect());
self
}
pub fn with_block_domains(mut self, domains: &[&str]) -> Self {
self.block_domains = Some(domains.iter().map(|s| s.to_string()).collect());
self
}
}
tokio::task_local! {
pub static CTX: Arc<Context>;
pub static FETCH_CACHE: Arc<DashMap<String, String>>;
}
pub fn fetch_cache_new() -> Arc<DashMap<String, String>> {
Arc::new(DashMap::new())
}
pub fn fetch_cache_get(url: &str) -> Option<String> {
let key = crate::tools::clean::canonicalize_url(url);
FETCH_CACHE
.try_with(|cache| cache.get(&key).map(|v| v.clone()))
.ok()
.flatten()
}
pub fn fetch_cache_put(url: &str, html: &str) {
let key = crate::tools::clean::canonicalize_url(url);
let _ = FETCH_CACHE.try_with(|cache| {
cache.insert(key, html.to_string());
});
}
pub fn get_fetch_strategy() -> FetchStrategy {
CTX.try_with(|ctx| ctx.fetch_strategy)
.ok()
.unwrap_or(FetchStrategy::Auto)
}
pub fn get_fetch_timeout() -> Duration {
CTX.try_with(|ctx| ctx.fetch_timeout)
.ok()
.unwrap_or(DEFAULT_FETCH_TIMEOUT)
}
pub type Jsonld = Vec<Value>;
pub type Metadata = Vec<(String, String)>;