Struct CrawlStats

Source

pub struct CrawlStats {Show 21 fields
    pub requests_count: u64,
    pub concurrent_requests: u32,
    pub concurrent_requests_per_domain: u32,
    pub failed_requests_count: u64,
    pub offsite_requests_count: u64,
    pub robots_disallowed_count: u64,
    pub cache_hits: u64,
    pub cache_misses: u64,
    pub response_bytes: u64,
    pub items_scraped: u64,
    pub items_dropped: u64,
    pub start_time: f64,
    pub end_time: f64,
    pub download_delay: f64,
    pub blocked_requests_count: u64,
    pub custom_stats: HashMap<String, Value>,
    pub response_status_count: HashMap<String, u64>,
    pub domains_response_bytes: HashMap<String, u64>,
    pub sessions_requests_count: HashMap<String, u64>,
    pub proxies: Vec<String>,
    pub log_levels_counter: HashMap<String, u64>,
}

Expand description

Aggregate statistics collected during a crawl run.

The crawler engine populates this struct as it processes requests. After the crawl finishes, you can inspect it via CrawlerEngine::stats or from the returned CrawlResult. All counters start at zero and are incremented atomically during the crawl loop.

Fields§

§requests_count: u64

Total number of requests dispatched.

§concurrent_requests: u32

Maximum number of concurrent requests allowed.

§concurrent_requests_per_domain: u32

Maximum number of concurrent requests per domain.

§failed_requests_count: u64

Number of requests that failed with an error.

§offsite_requests_count: u64

Number of requests rejected because their domain was not allowed.

§robots_disallowed_count: u64

Number of requests blocked by robots.txt rules.

§cache_hits: u64

Number of responses served from the cache.

§cache_misses: u64

Number of responses that were not found in the cache.

§response_bytes: u64

Total bytes received across all responses.

§items_scraped: u64

Number of items successfully scraped.

§items_dropped: u64

Number of items dropped by the item pipeline.

§start_time: f64

Unix timestamp when the crawl started.

§end_time: f64

Unix timestamp when the crawl ended.

§download_delay: f64

Configured delay in seconds between consecutive requests.

§blocked_requests_count: u64

Number of requests that received a blocked status code.

§custom_stats: HashMap<String, Value>

User-defined custom statistics.

§response_status_count: HashMap<String, u64>

Count of responses grouped by HTTP status code.

§domains_response_bytes: HashMap<String, u64>

Total bytes received grouped by domain.

§sessions_requests_count: HashMap<String, u64>

Number of requests dispatched per session.

§proxies: Vec<String>

List of proxy addresses used during the crawl.

§log_levels_counter: HashMap<String, u64>

Count of log messages grouped by level.

Struct CrawlStats Copy item path

Fields§

Implementations§

impl CrawlStats

pub fn elapsed_seconds(&self) -> f64

pub fn requests_per_second(&self) -> f64

pub fn increment_status(&mut self, status: u16)

pub fn increment_response_bytes(&mut self, domain: &str, count: u64)

pub fn increment_requests_count(&mut self, sid: &str)

Trait Implementations§

impl Clone for CrawlStats

fn clone(&self) -> CrawlStats

fn clone_from(&mut self, source: &Self)

impl Debug for CrawlStats

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for CrawlStats

fn default() -> CrawlStats

impl<'de> Deserialize<'de> for CrawlStats

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl Serialize for CrawlStats

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

Auto Trait Implementations§

impl Freeze for CrawlStats

impl RefUnwindSafe for CrawlStats

impl Send for CrawlStats

impl Sync for CrawlStats

impl Unpin for CrawlStats

impl UnsafeUnpin for CrawlStats

impl UnwindSafe for CrawlStats

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> Same for T

type Output = T

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Struct CrawlStats

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,