Skip to main content

spider_util/
request.rs

1//! Request types used by the crawler runtime.
2//!
3//! [`Request`] is the runtime's transport-neutral request model. It stores the
4//! URL, method, headers, optional body, and a lazily allocated metadata map used
5//! by middleware and runtime internals.
6//!
7//! ## Example
8//!
9//! ```rust,ignore
10//! use spider_util::request::{Body, Method, Request};
11//! use url::Url;
12//! use serde_json::json;
13//!
14//! // Create a simple GET request
15//! let url = Url::parse("https://example.com")?;
16//! let request = Request::new(url);
17//!
18//! // Parse the URL as part of request construction
19//! let parsed_request = Request::try_new("https://example.com")?;
20//!
21//! // Create a POST request with JSON body
22//! let post_request = Request::new(Url::parse("https://api.example.com/data")?)
23//!     .with_method(Method::Post)
24//!     .with_json(json!({"key": "value"}));
25//! # Ok::<(), Box<dyn std::error::Error>>(())
26//! ```
27
28use bytes::Bytes;
29use dashmap::DashMap;
30use http::header::HeaderMap;
31use reqwest::{Method as ReqwestMethod, Url};
32use serde::de::DeserializeOwned;
33use serde::{Deserialize, Serialize};
34use serde_json::Value;
35use std::collections::HashMap;
36use std::hash::Hasher;
37use std::str::FromStr;
38use std::sync::Arc;
39use twox_hash::XxHash64;
40
41use crate::error::SpiderError;
42
43/// Request body variants supported by the default downloader.
44///
45/// ## Example
46///
47/// ```rust,ignore
48/// use spider_util::request::Body;
49/// use serde_json::json;
50/// use dashmap::DashMap;
51/// use bytes::Bytes;
52///
53/// // JSON body
54/// let json_body = Body::Json(json!({"name": "test"}));
55///
56/// // Form data
57/// let mut form = DashMap::new();
58/// form.insert("key".to_string(), "value".to_string());
59/// let form_body = Body::Form(form);
60///
61/// // Raw bytes
62/// let bytes_body = Body::Bytes(Bytes::from("raw data"));
63/// ```
64#[derive(Debug, Clone)]
65pub enum Body {
66    /// JSON payload.
67    Json(serde_json::Value),
68    /// Form data (key-value pairs).
69    Form(DashMap<String, String>),
70    /// Raw binary data.
71    Bytes(Bytes),
72}
73
74// Custom serialization for Body enum
75impl Serialize for Body {
76    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
77    where
78        S: serde::Serializer,
79    {
80        use serde::ser::SerializeMap;
81        let mut map = serializer.serialize_map(Some(1))?;
82
83        match self {
84            Body::Json(value) => map.serialize_entry("Json", value)?,
85            Body::Form(dashmap) => {
86                let hmap: HashMap<String, String> = dashmap.clone().into_iter().collect();
87                map.serialize_entry("Form", &hmap)?
88            }
89            Body::Bytes(bytes) => map.serialize_entry("Bytes", bytes)?,
90        }
91
92        map.end()
93    }
94}
95
96impl<'de> Deserialize<'de> for Body {
97    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
98    where
99        D: serde::Deserializer<'de>,
100    {
101        use serde::de::{self, MapAccess, Visitor};
102        use std::fmt;
103
104        struct BodyVisitor;
105
106        impl<'de> Visitor<'de> for BodyVisitor {
107            type Value = Body;
108
109            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
110                formatter.write_str("a body object")
111            }
112
113            fn visit_map<V>(self, mut map: V) -> Result<Body, V::Error>
114            where
115                V: MapAccess<'de>,
116            {
117                let entry = map.next_entry::<String, Value>()?;
118                let (key, value) = match entry {
119                    Some((k, v)) => (k, v),
120                    None => return Err(de::Error::custom("Expected a body variant")),
121                };
122
123                match key.as_str() {
124                    "Json" => Ok(Body::Json(value)),
125                    "Form" => {
126                        let form_data: HashMap<String, String> =
127                            serde_json::from_value(value).map_err(de::Error::custom)?;
128                        let dashmap = DashMap::new();
129                        for (k, v) in form_data {
130                            dashmap.insert(k, v);
131                        }
132                        Ok(Body::Form(dashmap))
133                    }
134                    "Bytes" => {
135                        let bytes: Bytes =
136                            serde_json::from_value(value).map_err(de::Error::custom)?;
137                        Ok(Body::Bytes(bytes))
138                    }
139                    _ => Err(de::Error::custom(format!("Unknown body variant: {}", key))),
140                }
141            }
142        }
143
144        deserializer.deserialize_map(BodyVisitor)
145    }
146}
147
148/// Transport-neutral HTTP method used by [`Request`].
149#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
150pub enum Method {
151    /// `GET`
152    #[default]
153    Get,
154    /// `POST`
155    Post,
156    /// `PUT`
157    Put,
158    /// `PATCH`
159    Patch,
160    /// `DELETE`
161    Delete,
162    /// `HEAD`
163    Head,
164    /// `OPTIONS`
165    Options,
166    /// `TRACE`
167    Trace,
168    /// `CONNECT`
169    Connect,
170    /// Any other valid HTTP method token.
171    Custom(String),
172}
173
174impl Method {
175    /// Returns the wire-format method string.
176    pub fn as_str(&self) -> &str {
177        match self {
178            Method::Get => "GET",
179            Method::Post => "POST",
180            Method::Put => "PUT",
181            Method::Patch => "PATCH",
182            Method::Delete => "DELETE",
183            Method::Head => "HEAD",
184            Method::Options => "OPTIONS",
185            Method::Trace => "TRACE",
186            Method::Connect => "CONNECT",
187            Method::Custom(method) => method.as_str(),
188        }
189    }
190}
191
192impl std::fmt::Display for Method {
193    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194        f.write_str(self.as_str())
195    }
196}
197
198impl FromStr for Method {
199    type Err = String;
200
201    fn from_str(method: &str) -> Result<Self, Self::Err> {
202        let parsed = ReqwestMethod::from_bytes(method.as_bytes()).map_err(|err| err.to_string())?;
203
204        Ok(match parsed.as_str() {
205            "GET" => Method::Get,
206            "POST" => Method::Post,
207            "PUT" => Method::Put,
208            "PATCH" => Method::Patch,
209            "DELETE" => Method::Delete,
210            "HEAD" => Method::Head,
211            "OPTIONS" => Method::Options,
212            "TRACE" => Method::Trace,
213            "CONNECT" => Method::Connect,
214            other => Method::Custom(other.to_string()),
215        })
216    }
217}
218
219impl From<Method> for ReqwestMethod {
220    fn from(method: Method) -> Self {
221        match method {
222            Method::Get => ReqwestMethod::GET,
223            Method::Post => ReqwestMethod::POST,
224            Method::Put => ReqwestMethod::PUT,
225            Method::Patch => ReqwestMethod::PATCH,
226            Method::Delete => ReqwestMethod::DELETE,
227            Method::Head => ReqwestMethod::HEAD,
228            Method::Options => ReqwestMethod::OPTIONS,
229            Method::Trace => ReqwestMethod::TRACE,
230            Method::Connect => ReqwestMethod::CONNECT,
231            Method::Custom(method) => {
232                ReqwestMethod::from_bytes(method.as_bytes()).expect("custom method validated")
233            }
234        }
235    }
236}
237
238/// Outgoing HTTP request used by the crawler runtime.
239///
240/// [`Request`] is the handoff type between spiders, middleware, the scheduler,
241/// and the downloader. It is transport-neutral enough to be shared across the
242/// workspace, but expressive enough for custom methods, headers, bodies, and
243/// request-scoped metadata.
244///
245/// ## Example
246///
247/// ```rust,ignore
248/// use spider_util::request::{Method, Request};
249/// use url::Url;
250///
251/// // Create a basic GET request
252/// let request = Request::new(Url::parse("https://example.com")?);
253///
254/// // Or parse a string into a request directly
255/// let request = Request::try_new("https://example.com")?;
256///
257/// // Build a request with headers and method
258/// let post_request = Request::new(Url::parse("https://api.example.com")?)
259///     .with_method(Method::Post)
260///     .with_header("Accept", "application/json")
261///     ?;
262/// # Ok::<(), Box<dyn std::error::Error>>(())
263/// ```
264#[derive(Debug, Clone)]
265pub struct Request {
266    /// The target URL for this request.
267    pub url: Url,
268    /// Request scheduling priority.
269    ///
270    /// Higher values are dequeued before lower values.
271    pub priority: i32,
272    /// The HTTP method (GET, POST, etc.).
273    pub method: Method,
274    /// HTTP headers for the request.
275    pub headers: http::header::HeaderMap,
276    /// Optional request body.
277    pub body: Option<Body>,
278    /// Lazy-initialized metadata - only allocated when actually used.
279    /// This reduces memory allocation for simple requests without metadata.
280    meta: Option<Arc<DashMap<String, Value>>>,
281}
282
283// Custom serialization for Request struct
284impl Serialize for Request {
285    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
286    where
287        S: serde::Serializer,
288    {
289        use serde::ser::SerializeStruct;
290        // Convert HeaderMap to a serializable format
291        let headers_vec: Vec<(String, String)> = self
292            .headers
293            .iter()
294            .filter_map(|(name, value)| {
295                value
296                    .to_str()
297                    .ok()
298                    .map(|val_str| (name.as_str().to_string(), val_str.to_string()))
299            })
300            .collect();
301
302        let mut s = serializer.serialize_struct("Request", 6)?;
303        s.serialize_field("url", &self.url.as_str())?;
304        s.serialize_field("priority", &self.priority)?;
305        s.serialize_field("method", &self.method.as_str())?;
306        s.serialize_field("headers", &headers_vec)?;
307        s.serialize_field("body", &self.body)?;
308        // Serialize meta as empty HashMap if None (for backward compatibility)
309        let meta_map: HashMap<String, Value> = self
310            .meta
311            .as_ref()
312            .map(|m| {
313                m.iter()
314                    .map(|e| (e.key().clone(), e.value().clone()))
315                    .collect()
316            })
317            .unwrap_or_default();
318        s.serialize_field("meta", &meta_map)?;
319        s.end()
320    }
321}
322
323impl<'de> Deserialize<'de> for Request {
324    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
325    where
326        D: serde::Deserializer<'de>,
327    {
328        use serde::de::{self, MapAccess, Visitor};
329        use std::fmt;
330
331        #[derive(Deserialize)]
332        #[serde(field_identifier, rename_all = "lowercase")]
333        enum Field {
334            Url,
335            Priority,
336            Method,
337            Headers,
338            Body,
339            Meta,
340        }
341
342        struct RequestVisitor;
343
344        impl<'de> Visitor<'de> for RequestVisitor {
345            type Value = Request;
346
347            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
348                formatter.write_str("struct Request")
349            }
350
351            fn visit_map<V>(self, mut map: V) -> Result<Request, V::Error>
352            where
353                V: MapAccess<'de>,
354            {
355                let mut url = None;
356                let mut priority = None;
357                let mut method = None;
358                let mut headers = None;
359                let mut body = None;
360                let mut meta = None;
361
362                while let Some(key) = map.next_key()? {
363                    match key {
364                        Field::Url => {
365                            if url.is_some() {
366                                return Err(de::Error::duplicate_field("url"));
367                            }
368                            let url_str: String = map.next_value()?;
369                            let parsed_url = Url::parse(&url_str).map_err(de::Error::custom)?;
370                            url = Some(parsed_url);
371                        }
372                        Field::Priority => {
373                            if priority.is_some() {
374                                return Err(de::Error::duplicate_field("priority"));
375                            }
376                            priority = Some(map.next_value()?);
377                        }
378                        Field::Method => {
379                            if method.is_some() {
380                                return Err(de::Error::duplicate_field("method"));
381                            }
382                            let method_str: String = map.next_value()?;
383                            let parsed_method =
384                                Method::from_str(&method_str).map_err(de::Error::custom)?;
385                            method = Some(parsed_method);
386                        }
387                        Field::Headers => {
388                            if headers.is_some() {
389                                return Err(de::Error::duplicate_field("headers"));
390                            }
391                            // Deserialize headers vector and convert back to HeaderMap
392                            let headers_vec: Vec<(String, String)> = map.next_value()?;
393                            let mut header_map = HeaderMap::new();
394                            for (name, value) in headers_vec {
395                                if let Ok(header_name) =
396                                    http::header::HeaderName::from_bytes(name.as_bytes())
397                                    && let Ok(header_value) =
398                                        http::header::HeaderValue::from_str(&value)
399                                {
400                                    header_map.insert(header_name, header_value);
401                                }
402                            }
403                            headers = Some(header_map);
404                        }
405                        Field::Body => {
406                            if body.is_some() {
407                                return Err(de::Error::duplicate_field("body"));
408                            }
409                            body = map.next_value()?;
410                        }
411                        Field::Meta => {
412                            // Deserialize meta HashMap and convert to DashMap
413                            let meta_map: HashMap<String, Value> = map.next_value()?;
414                            if !meta_map.is_empty() {
415                                let dashmap = DashMap::new();
416                                for (k, v) in meta_map {
417                                    dashmap.insert(k, v);
418                                }
419                                meta = Some(Arc::new(dashmap));
420                            }
421                        }
422                    }
423                }
424
425                let url = url.ok_or_else(|| de::Error::missing_field("url"))?;
426                let priority = priority.unwrap_or(0);
427                let method = method.ok_or_else(|| de::Error::missing_field("method"))?;
428                let headers = headers.ok_or_else(|| de::Error::missing_field("headers"))?;
429                let body = body; // Optional field
430
431                Ok(Request {
432                    url,
433                    priority,
434                    method,
435                    headers,
436                    body,
437                    meta, // May be None if no meta was serialized
438                })
439            }
440        }
441
442        const FIELDS: &[&str] = &["url", "priority", "method", "headers", "body", "meta"];
443        deserializer.deserialize_struct("Request", FIELDS, RequestVisitor)
444    }
445}
446
447impl Default for Request {
448    fn default() -> Self {
449        let default_url = match Url::parse("http://default.invalid") {
450            Ok(url) => url,
451            Err(err) => panic!("invalid hardcoded default URL: {}", err),
452        };
453        Self {
454            url: default_url,
455            priority: 0,
456            method: Method::Get,
457            headers: http::header::HeaderMap::new(),
458            body: None,
459            meta: None, // Lazy initialization - no allocation until needed
460        }
461    }
462}
463
464impl Request {
465    /// Creates a new [`Request`] with the given URL.
466    ///
467    /// This is the most common constructor used by spiders when enqueueing
468    /// follow-up pages. It does not allocate metadata storage unless
469    /// [`with_meta`](Request::with_meta) is called.
470    ///
471    /// ## Example
472    ///
473    /// ```rust,ignore
474    /// use spider_util::request::{Method, Request};
475    /// use url::Url;
476    ///
477    /// let request = Request::new(Url::parse("https://example.com")?);
478    /// # Ok::<(), Box<dyn std::error::Error>>(())
479    /// ```
480    pub fn new(url: Url) -> Self {
481        Request {
482            url,
483            priority: 0,
484            method: Method::Get,
485            headers: http::header::HeaderMap::new(),
486            body: None,
487            meta: None,
488        }
489    }
490
491    /// Creates a new [`Request`] from any value that can be converted into a [`Url`].
492    ///
493    /// This is a fallible companion to [`Request::new`] for callers that want to
494    /// pass URL strings directly.
495    ///
496    /// ## Example
497    ///
498    /// ```rust,ignore
499    /// use spider_util::request::Request;
500    ///
501    /// let request = Request::try_new("https://example.com")?;
502    /// # Ok::<(), Box<dyn std::error::Error>>(())
503    /// ```
504    pub fn try_new<U>(url: U) -> Result<Self, SpiderError>
505    where
506        U: TryInto<Url>,
507        U::Error: Into<url::ParseError>,
508    {
509        let url = url
510            .try_into()
511            .map_err(|e| SpiderError::UrlParseError(e.into()))?;
512        Ok(Self::new(url))
513    }
514
515    /// Sets the HTTP method for the request.
516    ///
517    /// Use this together with one of the body helpers for POST, PUT, or PATCH
518    /// workflows.
519    ///
520    /// ## Example
521    ///
522    /// ```rust,ignore
523    /// use spider_util::request::Request;
524    /// use url::Url;
525    ///
526    /// let request = Request::new(Url::parse("https://example.com")?)
527    ///     .with_method(Method::Post);
528    /// # Ok::<(), Box<dyn std::error::Error>>(())
529    /// ```
530    pub fn with_method(mut self, method: Method) -> Self {
531        self.method = method;
532        self
533    }
534
535    /// Sets the scheduling priority for the request.
536    ///
537    /// Higher values are scheduled before lower values. Requests with the same
538    /// priority retain FIFO ordering.
539    pub fn with_priority(mut self, priority: i32) -> Self {
540        self.priority = priority;
541        self
542    }
543
544    /// Returns the scheduling priority for the request.
545    pub fn priority(&self) -> i32 {
546        self.priority
547    }
548
549    /// Adds a header to the request.
550    ///
551    /// Accepts any types that can be converted into [`reqwest::header::HeaderName`]
552    /// and [`reqwest::header::HeaderValue`], including `&str`, `String`, and
553    /// standard header constants such as [`http::header::CONTENT_TYPE`].
554    ///
555    /// Returns an error if the header name or value is invalid.
556    ///
557    /// # Errors
558    ///
559    /// Returns a [`SpiderError::HeaderValueError`] if the header name or value is invalid.
560    ///
561    /// ## Example
562    ///
563    /// ```rust,ignore
564    /// use spider_util::request::Request;
565    /// use url::Url;
566    ///
567    /// let request = Request::new(Url::parse("https://example.com")?)
568    ///     .with_header(http::header::ACCEPT, "application/json")
569    ///     ?
570    ///     .with_header("X-Trace-Id".to_string(), "abc-123".to_string())?;
571    /// # Ok::<(), Box<dyn std::error::Error>>(())
572    /// ```
573    pub fn with_header<N, V>(mut self, name: N, value: V) -> Result<Self, SpiderError>
574    where
575        N: TryInto<reqwest::header::HeaderName>,
576        N::Error: std::fmt::Display,
577        V: TryInto<reqwest::header::HeaderValue>,
578        V::Error: std::fmt::Display,
579    {
580        let header_name = name
581            .try_into()
582            .map_err(|e| SpiderError::HeaderValueError(format!("Invalid header name: {}", e)))?;
583        let header_value = value
584            .try_into()
585            .map_err(|e| SpiderError::HeaderValueError(format!("Invalid header value: {}", e)))?;
586
587        self.headers.insert(header_name, header_value);
588        Ok(self)
589    }
590
591    /// Sets the body of the request and defaults the method to POST.
592    ///
593    /// ## Example
594    ///
595    /// ```rust,ignore
596    /// use spider_util::request::{Request, Body};
597    /// use url::Url;
598    /// use serde_json::json;
599    ///
600    /// let request = Request::new(Url::parse("https://api.example.com")?)
601    ///     .with_body(Body::Json(json!({"key": "value"})));
602    /// # Ok::<(), Box<dyn std::error::Error>>(())
603    /// ```
604    pub fn with_body(mut self, body: Body) -> Self {
605        self.body = Some(body);
606        self.with_method(Method::Post)
607    }
608
609    /// Sets the body of the request to a JSON value and defaults the method to POST.
610    ///
611    /// This helper stores the payload body only. Add content-type headers
612    /// explicitly when the target service expects them.
613    ///
614    /// ## Example
615    ///
616    /// ```rust,ignore
617    /// use spider_util::request::Request;
618    /// use url::Url;
619    /// use serde_json::json;
620    ///
621    /// let request = Request::new(Url::parse("https://api.example.com")?)
622    ///     .with_json(json!({"name": "test"}));
623    /// # Ok::<(), Box<dyn std::error::Error>>(())
624    /// ```
625    pub fn with_json(self, json: serde_json::Value) -> Self {
626        self.with_body(Body::Json(json))
627    }
628
629    /// Sets the body of the request to form data and defaults the method to POST.
630    ///
631    /// ## Example
632    ///
633    /// ```rust,ignore
634    /// use spider_util::request::Request;
635    /// use url::Url;
636    /// use dashmap::DashMap;
637    ///
638    /// let mut form = DashMap::new();
639    /// form.insert("key".to_string(), "value".to_string());
640    ///
641    /// let request = Request::new(Url::parse("https://api.example.com")?)
642    ///     .with_form(form);
643    /// # Ok::<(), Box<dyn std::error::Error>>(())
644    /// ```
645    pub fn with_form(self, form: DashMap<String, String>) -> Self {
646        self.with_body(Body::Form(form))
647    }
648
649    /// Sets the body of the request to raw bytes and defaults the method to POST.
650    ///
651    /// ## Example
652    ///
653    /// ```rust,ignore
654    /// use spider_util::request::Request;
655    /// use url::Url;
656    /// use bytes::Bytes;
657    ///
658    /// let data = Bytes::from("binary data");
659    /// let request = Request::new(Url::parse("https://api.example.com")?)
660    ///     .with_bytes(data);
661    /// # Ok::<(), Box<dyn std::error::Error>>(())
662    /// ```
663    pub fn with_bytes(self, bytes: bytes::Bytes) -> Self {
664        self.with_body(Body::Bytes(bytes))
665    }
666
667    /// Adds a value to the request's metadata.
668    ///
669    /// Lazily allocates the metadata map on first use. Metadata is commonly
670    /// used to carry crawl context such as pagination state, source URLs, or
671    /// retry bookkeeping across middleware and parsing stages.
672    ///
673    /// ## Example
674    ///
675    /// ```rust,ignore
676    /// use spider_util::request::Request;
677    /// use url::Url;
678    ///
679    /// let request = Request::new(Url::parse("https://example.com")?)
680    ///     .with_priority(10)
681    ///     .with_meta("source", serde_json::json!("manual"));
682    /// # Ok::<(), Box<dyn std::error::Error>>(())
683    /// ```
684    pub fn with_meta(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
685        self.meta
686            .get_or_insert_with(|| Arc::new(DashMap::new()))
687            .insert(key.into(), value);
688        self
689    }
690
691    /// Serializes and stores a metadata value under the provided key.
692    ///
693    /// This is a convenient typed companion to [`Request::with_meta`] that
694    /// avoids manual `serde_json::json!(...)` calls for structured metadata.
695    pub fn with_meta_value<T>(self, key: impl Into<String>, value: T) -> Result<Self, SpiderError>
696    where
697        T: Serialize,
698    {
699        Ok(self.with_meta(key, serde_json::to_value(value)?))
700    }
701
702    /// Serializes and stores a metadata value only when it is present.
703    pub fn with_optional_meta_value<T>(
704        self,
705        key: impl Into<String>,
706        value: Option<T>,
707    ) -> Result<Self, SpiderError>
708    where
709        T: Serialize,
710    {
711        match value {
712            Some(value) => self.with_meta_value(key, value),
713            None => Ok(self),
714        }
715    }
716
717    /// Gets a reference to a metadata value, if it exists.
718    ///
719    /// Returns a cloned JSON value because metadata is stored in a shared
720    /// concurrent map. Returns `None` if the key doesn't exist or if metadata
721    /// hasn't been set.
722    pub fn get_meta(&self, key: &str) -> Option<serde_json::Value> {
723        self.meta
724            .as_ref()
725            .and_then(|m| m.get(key).map(|e| e.value().clone()))
726    }
727
728    /// Deserializes a metadata value into the requested type.
729    pub fn meta_value<T>(&self, key: &str) -> Result<Option<T>, SpiderError>
730    where
731        T: DeserializeOwned,
732    {
733        self.get_meta(key)
734            .map(serde_json::from_value)
735            .transpose()
736            .map_err(SpiderError::from)
737    }
738
739    /// Returns `true` if the request has metadata.
740    pub fn has_meta(&self) -> bool {
741        self.meta.as_ref().is_some_and(|m| !m.is_empty())
742    }
743
744    /// Returns a reference to the internal metadata map, if it exists.
745    pub fn meta_map(&self) -> Option<&Arc<DashMap<String, serde_json::Value>>> {
746        self.meta.as_ref()
747    }
748
749    /// Inserts a value into metadata, creating the map if needed.
750    ///
751    /// This is intended for internal framework use.
752    pub fn insert_meta(&mut self, key: impl Into<String>, value: serde_json::Value) {
753        self.meta
754            .get_or_insert_with(|| Arc::new(DashMap::new()))
755            .insert(key.into(), value);
756    }
757
758    /// Serializes and inserts a metadata value for internal or incremental use.
759    pub fn insert_meta_value<T>(
760        &mut self,
761        key: impl Into<String>,
762        value: T,
763    ) -> Result<(), SpiderError>
764    where
765        T: Serialize,
766    {
767        self.insert_meta(key, serde_json::to_value(value)?);
768        Ok(())
769    }
770
771    /// Removes a metadata entry by key, returning the stored JSON value if any.
772    pub fn remove_meta(&mut self, key: &str) -> Option<serde_json::Value> {
773        self.meta
774            .as_ref()
775            .and_then(|meta| meta.remove(key).map(|(_, value)| value))
776    }
777
778    /// Gets a value from metadata using DashMap's API.
779    ///
780    /// This is intended for internal framework use where direct access is needed.
781    pub fn get_meta_ref(
782        &self,
783        key: &str,
784    ) -> Option<dashmap::mapref::one::Ref<'_, String, serde_json::Value>> {
785        self.meta.as_ref().and_then(|m| m.get(key))
786    }
787
788    /// Sets the metadata map directly.
789    ///
790    /// Used for internal framework operations.
791    pub fn set_meta_from_option(&mut self, meta: Option<Arc<DashMap<String, serde_json::Value>>>) {
792        self.meta = meta;
793    }
794
795    /// Clones the metadata map.
796    ///
797    /// Used for internal framework operations where metadata needs to be copied.
798    pub fn clone_meta(&self) -> Option<Arc<DashMap<String, serde_json::Value>>> {
799        self.meta.clone()
800    }
801
802    /// Takes the metadata map, leaving `None` in its place.
803    ///
804    /// Used for internal framework operations.
805    pub fn take_meta(&mut self) -> Option<Arc<DashMap<String, serde_json::Value>>> {
806        self.meta.take()
807    }
808
809    /// Returns a reference to the metadata Arc for internal framework use.
810    pub fn meta_inner(&self) -> &Option<Arc<DashMap<String, serde_json::Value>>> {
811        &self.meta
812    }
813
814    const RETRY_ATTEMPTS_KEY: &str = "retry_attempts";
815
816    /// Gets the number of times the request has been retried.
817    ///
818    /// Returns `0` if no retry attempts have been recorded.
819    pub fn get_retry_attempts(&self) -> u32 {
820        self.meta
821            .as_ref()
822            .and_then(|m| m.get(Self::RETRY_ATTEMPTS_KEY))
823            .and_then(|v| v.value().as_u64())
824            .unwrap_or(0) as u32
825    }
826
827    /// Increments the retry count for the request.
828    ///
829    /// Lazily allocates the metadata map if not already present.
830    pub fn increment_retry_attempts(&mut self) {
831        let current_attempts = self.get_retry_attempts();
832        self.meta
833            .get_or_insert_with(|| Arc::new(DashMap::new()))
834            .insert(
835                Self::RETRY_ATTEMPTS_KEY.to_string(),
836                serde_json::Value::from(current_attempts + 1),
837            );
838    }
839
840    /// Generates a unique fingerprint for the request based on its URL, method, and body.
841    ///
842    /// This is the stable identity used by runtime deduplication and related
843    /// components that need to recognize equivalent requests.
844    ///
845    /// The fingerprint is used for duplicate detection and caching. It combines:
846    /// - The request URL
847    /// - The HTTP method
848    /// - The request body (if present)
849    ///
850    /// ## Example
851    ///
852    /// ```rust,ignore
853    /// use spider_util::request::Request;
854    /// use url::Url;
855    ///
856    /// let request = Request::new(Url::parse("https://example.com")?);
857    /// let fingerprint = request.fingerprint();
858    /// # Ok::<(), Box<dyn std::error::Error>>(())
859    /// ```
860    pub fn fingerprint(&self) -> String {
861        let mut hasher = XxHash64::default();
862        hasher.write(self.url.as_str().as_bytes());
863        hasher.write(self.method.as_str().as_bytes());
864
865        if let Some(ref body) = self.body {
866            match body {
867                Body::Json(json_val) => {
868                    if let Ok(serialized) = serde_json::to_string(json_val) {
869                        hasher.write(serialized.as_bytes());
870                    }
871                }
872                Body::Form(form_val) => {
873                    // Optimized: hash components directly without building intermediate String
874                    for r in form_val.iter() {
875                        hasher.write(r.key().as_bytes());
876                        hasher.write(r.value().as_bytes());
877                    }
878                }
879                Body::Bytes(bytes_val) => {
880                    hasher.write(bytes_val);
881                }
882            }
883        }
884        format!("{:x}", hasher.finish())
885    }
886}