Skip to main content

spider_util/
item.rs

1//! Data structures for scraped items in `spider-lib`.
2//!
3//! Defines the `ScrapedItem` trait and `ParseOutput` for spider results.
4//!
5//! ## Example
6//!
7//! ```rust,ignore
8//! use spider_util::item::{ScrapedItem, ParseOutput};
9//!
10//! #[spider_macro::scraped_item]
11//! struct Article {
12//!     title: String,
13//!     content: String,
14//! }
15//!
16//! // In your spider's parse method:
17//! // let mut output = ParseOutput::new();
18//! // output.add_item(Article { title: "...", content: "..." });
19//! // Ok(output)
20//! ```
21
22use crate::request::Request;
23use serde_json::Value;
24use std::any::Any;
25use std::fmt::Debug;
26
27/// The output of a spider's `parse` method.
28#[derive(Debug, Clone)]
29pub struct ParseOutput<I> {
30    items: Vec<I>,
31    requests: Vec<Request>,
32}
33
34impl<I> ParseOutput<I> {
35    /// Creates a new, empty `ParseOutput`.
36    pub fn new() -> Self {
37        Self {
38            items: Vec::new(),
39            requests: Vec::new(),
40        }
41    }
42
43    /// Consumes the `ParseOutput` and returns its inner items and requests.
44    pub fn into_parts(self) -> (Vec<I>, Vec<Request>) {
45        (self.items, self.requests)
46    }
47
48    /// Adds a scraped item to the output.
49    pub fn add_item(&mut self, item: I) {
50        self.items.push(item);
51    }
52
53    /// Adds a new request to be crawled.
54    pub fn add_request(&mut self, request: Request) {
55        self.requests.push(request);
56    }
57
58    /// Adds multiple scraped items to the output.
59    pub fn add_items(&mut self, items: impl IntoIterator<Item = I>) {
60        self.items.extend(items);
61    }
62
63    /// Adds multiple new requests to be crawled.
64    pub fn add_requests(&mut self, requests: impl IntoIterator<Item = Request>) {
65        self.requests.extend(requests);
66    }
67}
68
69impl<I> Default for ParseOutput<I> {
70    fn default() -> Self {
71        Self::new()
72    }
73}
74
75/// A trait representing a scraped item.
76pub trait ScrapedItem: Debug + Send + Sync + Any + 'static {
77    /// Returns the item as a `dyn Any` for downcasting.
78    fn as_any(&self) -> &dyn Any;
79    /// Clones the item into a `Box<dyn ScrapedItem>`.
80    fn box_clone(&self) -> Box<dyn ScrapedItem + Send + Sync>;
81    /// Converts the item to a `serde_json::Value`.
82    fn to_json_value(&self) -> Value;
83}
84
85impl Clone for Box<dyn ScrapedItem + Send + Sync> {
86    fn clone(&self) -> Self {
87        self.box_clone()
88    }
89}
90