pub struct LinkExtractOptions {Show 13 fields
pub same_site_only: bool,
pub include_text_links: bool,
pub sources: Vec<LinkSource>,
pub allowed_link_types: Option<Vec<LinkType>>,
pub denied_link_types: Vec<LinkType>,
pub allow_patterns: Vec<String>,
pub deny_patterns: Vec<String>,
pub allow_domains: Vec<String>,
pub deny_domains: Vec<String>,
pub allow_path_prefixes: Vec<String>,
pub deny_path_prefixes: Vec<String>,
pub allowed_tags: Option<Vec<String>>,
pub allowed_attributes: Option<Vec<String>>,
}Expand description
Options that control link extraction from a Response.
The defaults are intentionally conservative for crawler use: same-site filtering is enabled, text links are included, and common HTML elements are scanned for navigable URLs.
Fields§
§same_site_only: boolRestrict discovered links to the same registered domain.
include_text_links: boolInclude URLs found in text content.
sources: Vec<LinkSource>HTML sources used to discover attribute-based links.
allowed_link_types: Option<Vec<LinkType>>Optional allow-list of link types to include.
denied_link_types: Vec<LinkType>Optional deny-list of link types to exclude.
allow_patterns: Vec<String>Optional allow-list of glob-style URL patterns (* and ? supported).
deny_patterns: Vec<String>Optional deny-list of glob-style URL patterns (* and ? supported).
allow_domains: Vec<String>Optional allow-list of domains or registered-domain suffixes.
deny_domains: Vec<String>Optional deny-list of domains or registered-domain suffixes.
allow_path_prefixes: Vec<String>Optional allow-list of URL path prefixes.
deny_path_prefixes: Vec<String>Optional deny-list of URL path prefixes.
Optional allow-list of HTML tag names used for attribute extraction.
allowed_attributes: Option<Vec<String>>Optional allow-list of attribute names used for attribute extraction.
Implementations§
Source§impl LinkExtractOptions
impl LinkExtractOptions
Sourcepub fn same_site_only(self, same_site_only: bool) -> LinkExtractOptions
pub fn same_site_only(self, same_site_only: bool) -> LinkExtractOptions
Sets whether only same-site URLs should be returned.
Sourcepub fn include_text_links(self, include_text_links: bool) -> LinkExtractOptions
pub fn include_text_links(self, include_text_links: bool) -> LinkExtractOptions
Sets whether URLs found in text content should be returned.
Sourcepub fn with_sources(
self,
sources: impl IntoIterator<Item = LinkSource>,
) -> LinkExtractOptions
pub fn with_sources( self, sources: impl IntoIterator<Item = LinkSource>, ) -> LinkExtractOptions
Replaces the configured HTML extraction sources.
Sourcepub fn add_source(self, source: LinkSource) -> LinkExtractOptions
pub fn add_source(self, source: LinkSource) -> LinkExtractOptions
Adds an HTML extraction source.
Sourcepub fn with_allowed_link_types(
self,
allowed_link_types: impl IntoIterator<Item = LinkType>,
) -> LinkExtractOptions
pub fn with_allowed_link_types( self, allowed_link_types: impl IntoIterator<Item = LinkType>, ) -> LinkExtractOptions
Restricts extraction to the provided link types.
Sourcepub fn with_denied_link_types(
self,
denied_link_types: impl IntoIterator<Item = LinkType>,
) -> LinkExtractOptions
pub fn with_denied_link_types( self, denied_link_types: impl IntoIterator<Item = LinkType>, ) -> LinkExtractOptions
Adds link types that should be excluded even if discovered.
Sourcepub fn allow_pattern(self, pattern: impl Into<String>) -> LinkExtractOptions
pub fn allow_pattern(self, pattern: impl Into<String>) -> LinkExtractOptions
Adds a glob-style allow pattern that URLs must match.
Sourcepub fn with_allow_patterns(
self,
patterns: impl IntoIterator<Item = impl Into<String>>,
) -> LinkExtractOptions
pub fn with_allow_patterns( self, patterns: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions
Replaces the glob-style allow patterns.
Sourcepub fn deny_pattern(self, pattern: impl Into<String>) -> LinkExtractOptions
pub fn deny_pattern(self, pattern: impl Into<String>) -> LinkExtractOptions
Adds a glob-style deny pattern that excludes matching URLs.
Sourcepub fn with_deny_patterns(
self,
patterns: impl IntoIterator<Item = impl Into<String>>,
) -> LinkExtractOptions
pub fn with_deny_patterns( self, patterns: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions
Replaces the glob-style deny patterns.
Sourcepub fn allow_domain(self, domain: impl Into<String>) -> LinkExtractOptions
pub fn allow_domain(self, domain: impl Into<String>) -> LinkExtractOptions
Adds a domain or registered-domain suffix to allow.
Sourcepub fn with_allow_domains(
self,
domains: impl IntoIterator<Item = impl Into<String>>,
) -> LinkExtractOptions
pub fn with_allow_domains( self, domains: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions
Replaces the allowed domains.
Sourcepub fn deny_domain(self, domain: impl Into<String>) -> LinkExtractOptions
pub fn deny_domain(self, domain: impl Into<String>) -> LinkExtractOptions
Adds a domain or registered-domain suffix to deny.
Sourcepub fn with_deny_domains(
self,
domains: impl IntoIterator<Item = impl Into<String>>,
) -> LinkExtractOptions
pub fn with_deny_domains( self, domains: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions
Replaces the denied domains.
Sourcepub fn allow_path_prefix(self, prefix: impl Into<String>) -> LinkExtractOptions
pub fn allow_path_prefix(self, prefix: impl Into<String>) -> LinkExtractOptions
Adds a URL path prefix that links must match.
Sourcepub fn with_allow_path_prefixes(
self,
prefixes: impl IntoIterator<Item = impl Into<String>>,
) -> LinkExtractOptions
pub fn with_allow_path_prefixes( self, prefixes: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions
Replaces the allowed URL path prefixes.
Sourcepub fn deny_path_prefix(self, prefix: impl Into<String>) -> LinkExtractOptions
pub fn deny_path_prefix(self, prefix: impl Into<String>) -> LinkExtractOptions
Adds a URL path prefix that should be excluded.
Sourcepub fn with_deny_path_prefixes(
self,
prefixes: impl IntoIterator<Item = impl Into<String>>,
) -> LinkExtractOptions
pub fn with_deny_path_prefixes( self, prefixes: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions
Replaces the denied URL path prefixes.
Restricts attribute-based extraction to specific HTML tag names.
Sourcepub fn with_allowed_attributes(
self,
attributes: impl IntoIterator<Item = impl Into<String>>,
) -> LinkExtractOptions
pub fn with_allowed_attributes( self, attributes: impl IntoIterator<Item = impl Into<String>>, ) -> LinkExtractOptions
Restricts attribute-based extraction to specific attribute names.
Trait Implementations§
Source§impl Clone for LinkExtractOptions
impl Clone for LinkExtractOptions
Source§fn clone(&self) -> LinkExtractOptions
fn clone(&self) -> LinkExtractOptions
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for LinkExtractOptions
impl Debug for LinkExtractOptions
Source§impl Default for LinkExtractOptions
impl Default for LinkExtractOptions
Source§fn default() -> LinkExtractOptions
fn default() -> LinkExtractOptions
Source§impl PartialEq for LinkExtractOptions
impl PartialEq for LinkExtractOptions
impl Eq for LinkExtractOptions
impl StructuralPartialEq for LinkExtractOptions
Auto Trait Implementations§
impl Freeze for LinkExtractOptions
impl RefUnwindSafe for LinkExtractOptions
impl Send for LinkExtractOptions
impl Sync for LinkExtractOptions
impl Unpin for LinkExtractOptions
impl UnsafeUnpin for LinkExtractOptions
impl UnwindSafe for LinkExtractOptions
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
Source§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
Source§fn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
key and return true if they are equal.