spider_lib/
item.rs

1use crate::request::Request;
2use serde_json::Value;
3use std::any::Any;
4use std::fmt::Debug;
5
6#[derive(Debug, Clone)]
7pub struct CrawlOutput<I> {
8    pub items: Vec<I>,
9    pub requests: Vec<Request>,
10}
11
12impl<I> CrawlOutput<I> {
13    pub fn new() -> Self {
14        Self {
15            items: Vec::new(),
16            requests: Vec::new(),
17        }
18    }
19
20    pub fn add_item(&mut self, item: I) {
21        self.items.push(item);
22    }
23
24    pub fn add_request(&mut self, request: Request) {
25        self.requests.push(request);
26    }
27}
28
29impl<I> Default for CrawlOutput<I> {
30    fn default() -> Self {
31        Self::new()
32    }
33}
34
35// Item Trait for Boxed Items
36pub trait ScrapedItem: Debug + Send + Sync + Any + 'static {
37    fn as_any(&self) -> &dyn Any;
38    fn box_clone(&self) -> Box<dyn ScrapedItem + Send + Sync>;
39    fn to_json_value(&self) -> Value;
40}
41
42impl Clone for Box<dyn ScrapedItem + Send + Sync> {
43    fn clone(&self) -> Self {
44        self.box_clone()
45    }
46}