Skip to main content

spider_lib/
item.rs

1use crate::request::Request;
2use serde_json::Value;
3use std::any::Any;
4use std::fmt::Debug;
5
6/// The output of a spider's `parse` method.
7#[derive(Debug, Clone)]
8pub struct ParseOutput<I> {
9    items: Vec<I>,
10    requests: Vec<Request>,
11}
12
13impl<I> ParseOutput<I> {
14    /// Creates a new, empty `ParseOutput`.
15    pub fn new() -> Self {
16        Self {
17            items: Vec::new(),
18            requests: Vec::new(),
19        }
20    }
21
22    /// Consumes the `ParseOutput` and returns its inner items and requests.
23    pub fn into_parts(self) -> (Vec<I>, Vec<Request>) {
24        (self.items, self.requests)
25    }
26
27    /// Adds a scraped item to the output.
28    pub fn add_item(&mut self, item: I) {
29        self.items.push(item);
30    }
31
32    /// Adds a new request to be crawled.
33    pub fn add_request(&mut self, request: Request) {
34        self.requests.push(request);
35    }
36
37    /// Adds multiple scraped items to the output.
38    pub fn add_items(&mut self, items: impl IntoIterator<Item = I>) {
39        self.items.extend(items);
40    }
41
42    /// Adds multiple new requests to be crawled.
43    pub fn add_requests(&mut self, requests: impl IntoIterator<Item = Request>) {
44        self.requests.extend(requests);
45    }
46}
47
48impl<I> Default for ParseOutput<I> {
49    fn default() -> Self {
50        Self::new()
51    }
52}
53
54/// A trait representing a scraped item.
55pub trait ScrapedItem: Debug + Send + Sync + Any + 'static {
56    /// Returns the item as a `dyn Any` for downcasting.
57    fn as_any(&self) -> &dyn Any;
58    /// Clones the item into a `Box<dyn ScrapedItem>`.
59    fn box_clone(&self) -> Box<dyn ScrapedItem + Send + Sync>;
60    /// Converts the item to a `serde_json::Value`.
61    fn to_json_value(&self) -> Value;
62}
63
64impl Clone for Box<dyn ScrapedItem + Send + Sync> {
65    fn clone(&self) -> Self {
66        self.box_clone()
67    }
68}