pub struct PromptConfiguration {Show 31 fields
pub respect_robots_txt: Option<bool>,
pub subdomains: Option<bool>,
pub tld: Option<bool>,
pub depth: Option<usize>,
pub delay: Option<u64>,
pub request_timeout_ms: Option<u64>,
pub crawl_timeout_ms: Option<u64>,
pub blacklist_url: Option<Vec<String>>,
pub whitelist_url: Option<Vec<String>>,
pub external_domains: Option<Vec<String>>,
pub user_agent: Option<String>,
pub headers: Option<HashMap<String, String>>,
pub http2_prior_knowledge: Option<bool>,
pub accept_invalid_certs: Option<bool>,
pub proxies: Option<Vec<String>>,
pub redirect_limit: Option<usize>,
pub budget: Option<HashMap<String, u32>>,
pub max_page_bytes: Option<f64>,
pub full_resources: Option<bool>,
pub only_html: Option<bool>,
pub return_page_links: Option<bool>,
pub use_chrome: Option<bool>,
pub stealth_mode: Option<String>,
pub viewport_width: Option<u32>,
pub viewport_height: Option<u32>,
pub wait_for_idle_network: Option<bool>,
pub wait_for_delay_ms: Option<u64>,
pub wait_for_selector: Option<String>,
pub evaluate_on_new_document: Option<String>,
pub shared_queue: Option<bool>,
pub retry: Option<u8>,
}Expand description
Configuration response from the LLM for prompt-based crawler setup.
This type is specific to spider’s Website configuration. Use it with
configure_crawler_from_prompt to generate crawler settings from
natural language descriptions.
§Example
use spider::features::automation::configure_crawler_from_prompt;
let config = configure_crawler_from_prompt(
"http://localhost:11434/v1/chat/completions",
"llama3",
None,
"Crawl only blog posts, max 50 pages, respect robots.txt"
).await?;
// Apply to website
website.apply_prompt_configuration(&config);Fields§
§respect_robots_txt: Option<bool>Respect robots.txt rules.
subdomains: Option<bool>Crawl subdomains.
tld: Option<bool>Crawl top-level domain variants.
depth: Option<usize>Maximum crawl depth.
delay: Option<u64>Delay between requests in milliseconds.
request_timeout_ms: Option<u64>Request timeout in milliseconds.
crawl_timeout_ms: Option<u64>Total crawl timeout in milliseconds.
blacklist_url: Option<Vec<String>>URL patterns to exclude.
whitelist_url: Option<Vec<String>>URL patterns to include exclusively.
external_domains: Option<Vec<String>>External domains to allow crawling.
user_agent: Option<String>User agent string.
headers: Option<HashMap<String, String>>Custom HTTP headers.
http2_prior_knowledge: Option<bool>Use HTTP/2 prior knowledge.
accept_invalid_certs: Option<bool>Accept invalid SSL certificates.
proxies: Option<Vec<String>>Proxy URLs for requests.
redirect_limit: Option<usize>Maximum redirect limit.
budget: Option<HashMap<String, u32>>Budget limits per path or domain.
max_page_bytes: Option<f64>Maximum bytes per page.
full_resources: Option<bool>Crawl all resources including assets.
only_html: Option<bool>Only crawl HTML pages.
return_page_links: Option<bool>Return discovered links with pages.
use_chrome: Option<bool>Use headless Chrome for rendering.
stealth_mode: Option<String>Stealth mode level: “none”, “basic”, “low”, “mid”, “full”.
viewport_width: Option<u32>Browser viewport width.
viewport_height: Option<u32>Browser viewport height.
wait_for_idle_network: Option<bool>Wait for network to be idle.
wait_for_delay_ms: Option<u64>Delay after page load in milliseconds.
wait_for_selector: Option<String>CSS selector to wait for.
evaluate_on_new_document: Option<String>JavaScript to inject on each page.
Use shared queue for even distribution.
retry: Option<u8>Retry attempts for failed requests.
Trait Implementations§
Source§impl Clone for PromptConfiguration
impl Clone for PromptConfiguration
Source§fn clone(&self) -> PromptConfiguration
fn clone(&self) -> PromptConfiguration
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for PromptConfiguration
impl Debug for PromptConfiguration
Source§impl Default for PromptConfiguration
impl Default for PromptConfiguration
Source§fn default() -> PromptConfiguration
fn default() -> PromptConfiguration
Auto Trait Implementations§
impl Freeze for PromptConfiguration
impl RefUnwindSafe for PromptConfiguration
impl Send for PromptConfiguration
impl Sync for PromptConfiguration
impl Unpin for PromptConfiguration
impl UnsafeUnpin for PromptConfiguration
impl UnwindSafe for PromptConfiguration
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more