Struct Web2llmConfig

Source

pub struct Web2llmConfig {
    pub user_agent: String,
    pub timeout: Duration,
    pub block_private_hosts: bool,
    pub sensitivity: f32,
    pub robots_check: bool,
    pub rate_limit: u32,
    pub max_concurrency: usize,
}

Expand description

User-facing configuration for the web2llm pipeline. Controls fetch behavior and request identity. Use Web2llmConfig::default() for sensible defaults.

Fields§

§user_agent: String

The user-agent string sent with every HTTP request. Also used for robots.txt compliance checks.

§timeout: Duration

Maximum time to wait for a response before giving up.

§block_private_hosts: bool

If true, requests to private, loopback, and link-local addresses are rejected during pre-flight validation. This prevents SSRF attacks when web2llm is used in a service that accepts user-supplied URLs.

Set to false if you need to fetch from localhost or internal hosts in a trusted environment, such as local development or testing.

Defaults to true.

§sensitivity: f32

Controls how aggressively secondary content is filtered. A value of 0.1 keeps everything within 10x of the best scoring branch. A value of 0.5 keeps only branches close to the best. Defaults to 0.1.

§robots_check: bool

If true, the pipeline will fetch and respect robots.txt before downloading the target page. Defaults to true.

§rate_limit: u32

The maximum number of requests allowed per second. Defaults to 5.

§max_concurrency: usize

The maximum number of concurrent requests allowed across the whole pipeline. Defaults to 10.

Struct Web2llmConfig Copy item path

Fields§

Implementations§

impl Web2llmConfig

pub fn new( user_agent: String, timeout: Duration, block_private_hosts: bool, sensitivity: f32, rate_limit: u32, max_concurrency: usize, ) -> Self

pub fn with_robots_check(self, check: bool) -> Self

Trait Implementations§

impl Default for Web2llmConfig

fn default() -> Self

Auto Trait Implementations§

impl Freeze for Web2llmConfig

impl RefUnwindSafe for Web2llmConfig

impl Send for Web2llmConfig

impl Sync for Web2llmConfig

impl Unpin for Web2llmConfig

impl UnsafeUnpin for Web2llmConfig

impl UnwindSafe for Web2llmConfig

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct Web2llmConfig

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,