Skip to main content

LinkExtractOptions

Struct LinkExtractOptions 

Source
pub struct LinkExtractOptions {
Show 13 fields pub same_site_only: bool, pub include_text_links: bool, pub sources: Vec<LinkSource>, pub allowed_link_types: Option<Vec<LinkType>>, pub denied_link_types: Vec<LinkType>, pub allow_patterns: Vec<String>, pub deny_patterns: Vec<String>, pub allow_domains: Vec<String>, pub deny_domains: Vec<String>, pub allow_path_prefixes: Vec<String>, pub deny_path_prefixes: Vec<String>, pub allowed_tags: Option<Vec<String>>, pub allowed_attributes: Option<Vec<String>>,
}
Expand description

Options that control link extraction from a Response.

The defaults are intentionally conservative for crawler use: same-site filtering is enabled, text links are included, and common HTML elements are scanned for navigable URLs.

Fields§

§same_site_only: bool

Restrict discovered links to the same registered domain.

§include_text_links: bool

Include URLs found in text content.

§sources: Vec<LinkSource>

HTML sources used to discover attribute-based links.

§allowed_link_types: Option<Vec<LinkType>>

Optional allow-list of link types to include.

§denied_link_types: Vec<LinkType>

Optional deny-list of link types to exclude.

§allow_patterns: Vec<String>

Optional allow-list of glob-style URL patterns (* and ? supported).

§deny_patterns: Vec<String>

Optional deny-list of glob-style URL patterns (* and ? supported).

§allow_domains: Vec<String>

Optional allow-list of domains or registered-domain suffixes.

§deny_domains: Vec<String>

Optional deny-list of domains or registered-domain suffixes.

§allow_path_prefixes: Vec<String>

Optional allow-list of URL path prefixes.

§deny_path_prefixes: Vec<String>

Optional deny-list of URL path prefixes.

§allowed_tags: Option<Vec<String>>

Optional allow-list of HTML tag names used for attribute extraction.

§allowed_attributes: Option<Vec<String>>

Optional allow-list of attribute names used for attribute extraction.

Implementations§

Source§

impl LinkExtractOptions

Source

pub fn same_site_only(self, same_site_only: bool) -> LinkExtractOptions

Sets whether only same-site URLs should be returned.

Sets whether URLs found in text content should be returned.

Source

pub fn with_sources( self, sources: impl IntoIterator<Item = LinkSource>, ) -> LinkExtractOptions

Replaces the configured HTML extraction sources.

Source

pub fn add_source(self, source: LinkSource) -> LinkExtractOptions

Adds an HTML extraction source.

Restricts extraction to the provided link types.

Adds link types that should be excluded even if discovered.

Source

pub fn allow_pattern(self, pattern: impl Into<String>) -> LinkExtractOptions

Adds a glob-style allow pattern that URLs must match.

Source

pub fn with_allow_patterns( self, patterns: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions

Replaces the glob-style allow patterns.

Source

pub fn deny_pattern(self, pattern: impl Into<String>) -> LinkExtractOptions

Adds a glob-style deny pattern that excludes matching URLs.

Source

pub fn with_deny_patterns( self, patterns: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions

Replaces the glob-style deny patterns.

Source

pub fn allow_domain(self, domain: impl Into<String>) -> LinkExtractOptions

Adds a domain or registered-domain suffix to allow.

Source

pub fn with_allow_domains( self, domains: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions

Replaces the allowed domains.

Source

pub fn deny_domain(self, domain: impl Into<String>) -> LinkExtractOptions

Adds a domain or registered-domain suffix to deny.

Source

pub fn with_deny_domains( self, domains: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions

Replaces the denied domains.

Source

pub fn allow_path_prefix(self, prefix: impl Into<String>) -> LinkExtractOptions

Adds a URL path prefix that links must match.

Source

pub fn with_allow_path_prefixes( self, prefixes: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions

Replaces the allowed URL path prefixes.

Source

pub fn deny_path_prefix(self, prefix: impl Into<String>) -> LinkExtractOptions

Adds a URL path prefix that should be excluded.

Source

pub fn with_deny_path_prefixes( self, prefixes: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions

Replaces the denied URL path prefixes.

Source

pub fn with_allowed_tags( self, tags: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions

Restricts attribute-based extraction to specific HTML tag names.

Source

pub fn with_allowed_attributes( self, attributes: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions

Restricts attribute-based extraction to specific attribute names.

Trait Implementations§

Source§

impl Clone for LinkExtractOptions

Source§

fn clone(&self) -> LinkExtractOptions

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for LinkExtractOptions

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
Source§

impl Default for LinkExtractOptions

Source§

fn default() -> LinkExtractOptions

Returns the “default value” for a type. Read more
Source§

impl PartialEq for LinkExtractOptions

Source§

fn eq(&self, other: &LinkExtractOptions) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl Eq for LinkExtractOptions

Source§

impl StructuralPartialEq for LinkExtractOptions

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<Q, K> Equivalent<K> for Q
where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

Source§

fn equivalent(&self, key: &K) -> bool

Checks if this value is equivalent to the given key. Read more
Source§

impl<Q, K> Equivalent<K> for Q
where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

Source§

fn equivalent(&self, key: &K) -> bool

Compare self to key and return true if they are equal.
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more