pub struct HttpSourceConfig {
pub urls: Vec<String>,
pub sitemap: Option<String>,
pub crawl_depth: u32,
pub allow_external: bool,
pub request_delay_seconds: f64,
pub respect_robots: bool,
pub max_pages: u64,
pub user_agent: String,
}Fields§
§urls: Vec<String>§sitemap: Option<String>§crawl_depth: u32RM-B Task 4 / Python fcbad65: Depth-bounded link crawl. 0 = current
behavior (fetch only listed URLs + sitemap entries). >=1 follows
that many link-hops from each seed via <a href> extraction.
Capped at 5 to match Python’s ge=0, le=5 Field constraint.
allow_external: boolBy default the crawler only follows same-host links. Flip to true to
follow off-host links too (still rate-limited, still subject to
max_pages and respect_robots).
request_delay_seconds: f64Minimum delay between outbound requests (per source, not per host). Default 0.5s matches Python.
respect_robots: boolEnforce robots.txt. One fetch per host, cached. Default true.
max_pages: u64Hard runaway cap on number of pages fetched per call. Default 1000.
user_agent: StringUser-Agent header. Default matches Python’s chunkshop/0.6 (+https://…).
Trait Implementations§
Source§impl Clone for HttpSourceConfig
impl Clone for HttpSourceConfig
Source§fn clone(&self) -> HttpSourceConfig
fn clone(&self) -> HttpSourceConfig
Returns a duplicate of the value. Read more
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for HttpSourceConfig
impl Debug for HttpSourceConfig
Source§impl Default for HttpSourceConfig
impl Default for HttpSourceConfig
Source§impl<'de> Deserialize<'de> for HttpSourceConfig
impl<'de> Deserialize<'de> for HttpSourceConfig
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for HttpSourceConfig
impl RefUnwindSafe for HttpSourceConfig
impl Send for HttpSourceConfig
impl Sync for HttpSourceConfig
impl Unpin for HttpSourceConfig
impl UnsafeUnpin for HttpSourceConfig
impl UnwindSafe for HttpSourceConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> DeserializeOwned for Twhere
T: for<'de> Deserialize<'de>,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<R, P> ReadPrimitive<R> for P
impl<R, P> ReadPrimitive<R> for P
Source§fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
Read this value from the supplied reader. Same as
ReadEndian::read_from_little_endian().