spider_util/item.rs
1//! Data structures for scraped items in `spider-lib`.
2//!
3//! Defines the `ScrapedItem` trait and `ParseOutput` for spider results.
4//!
5//! ## Example
6//!
7//! ```rust,ignore
8//! use spider_util::item::{ScrapedItem, ParseOutput};
9//!
10//! #[spider_macro::scraped_item]
11//! struct Article {
12//! title: String,
13//! content: String,
14//! }
15//!
16//! // In your spider's parse method:
17//! // let mut output = ParseOutput::new();
18//! // output.add_item(Article { title: "...", content: "..." });
19//! // Ok(output)
20//! ```
21
22use crate::request::Request;
23use serde_json::Value;
24use std::any::Any;
25use std::fmt::Debug;
26
27/// The output of a spider's `parse` method.
28#[derive(Debug, Clone)]
29pub struct ParseOutput<I> {
30 items: Vec<I>,
31 requests: Vec<Request>,
32}
33
34impl<I> ParseOutput<I> {
35 /// Creates a new, empty `ParseOutput`.
36 pub fn new() -> Self {
37 Self {
38 items: Vec::new(),
39 requests: Vec::new(),
40 }
41 }
42
43 /// Consumes the `ParseOutput` and returns its inner items and requests.
44 pub fn into_parts(self) -> (Vec<I>, Vec<Request>) {
45 (self.items, self.requests)
46 }
47
48 /// Adds a scraped item to the output.
49 pub fn add_item(&mut self, item: I) {
50 self.items.push(item);
51 }
52
53 /// Adds a new request to be crawled.
54 pub fn add_request(&mut self, request: Request) {
55 self.requests.push(request);
56 }
57
58 /// Adds multiple scraped items to the output.
59 pub fn add_items(&mut self, items: impl IntoIterator<Item = I>) {
60 self.items.extend(items);
61 }
62
63 /// Adds multiple new requests to be crawled.
64 pub fn add_requests(&mut self, requests: impl IntoIterator<Item = Request>) {
65 self.requests.extend(requests);
66 }
67}
68
69impl<I> Default for ParseOutput<I> {
70 fn default() -> Self {
71 Self::new()
72 }
73}
74
75/// A trait representing a scraped item.
76pub trait ScrapedItem: Debug + Send + Sync + Any + 'static {
77 /// Returns the item as a `dyn Any` for downcasting.
78 fn as_any(&self) -> &dyn Any;
79 /// Clones the item into a `Box<dyn ScrapedItem>`.
80 fn box_clone(&self) -> Box<dyn ScrapedItem + Send + Sync>;
81 /// Converts the item to a `serde_json::Value`.
82 fn to_json_value(&self) -> Value;
83}
84
85impl Clone for Box<dyn ScrapedItem + Send + Sync> {
86 fn clone(&self) -> Self {
87 self.box_clone()
88 }
89}
90