crawlex 1.0.4

Stealth crawler with Chrome-perfect TLS/H2 fingerprint, render pool, hooks, persistent queue
Documentation
use bytes::Bytes;
use dashmap::DashMap;
use http::HeaderMap;
use url::Url;

use crate::storage::{
    ArtifactStorage, ChallengeStorage, IntelStorage, PageMetadata, StateStorage, Storage,
    TelemetryStorage,
};
use crate::Result;

#[derive(Default)]
pub struct MemoryStorage {
    pub raw: DashMap<String, Bytes>,
    pub rendered: DashMap<String, String>,
    pub edges: DashMap<(String, String), u32>,
}

impl MemoryStorage {
    pub fn new() -> Self {
        Self::default()
    }
}

#[async_trait::async_trait]
impl ArtifactStorage for MemoryStorage {
    async fn save_raw(&self, url: &Url, _headers: &HeaderMap, body: &Bytes) -> Result<()> {
        self.raw.insert(url.to_string(), body.clone());
        Ok(())
    }
    async fn save_rendered(&self, url: &Url, html: &str, _meta: &PageMetadata) -> Result<()> {
        self.rendered.insert(url.to_string(), html.to_string());
        Ok(())
    }
    async fn save_edge(&self, from: &Url, to: &Url) -> Result<()> {
        *self
            .edges
            .entry((from.to_string(), to.to_string()))
            .or_insert(0) += 1;
        Ok(())
    }
}

impl StateStorage for MemoryStorage {}
impl ChallengeStorage for MemoryStorage {}
impl TelemetryStorage for MemoryStorage {}
impl IntelStorage for MemoryStorage {}

impl Storage for MemoryStorage {
    fn as_any_ref(&self) -> Option<&dyn std::any::Any> {
        Some(self)
    }
}