<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `src/config/crawler.rs`."><title>crawler.rs - source</title><script>if(window.location.protocol!=="file:")document.head.insertAdjacentHTML("beforeend","SourceSerif4-Regular-6b053e98.ttf.woff2,FiraSans-Italic-81dc35de.woff2,FiraSans-Regular-0fe48ade.woff2,FiraSans-MediumItalic-ccf7e434.woff2,FiraSans-Medium-e1aa3f0a.woff2,SourceCodePro-Regular-8badfe75.ttf.woff2,SourceCodePro-Semibold-aa29a496.ttf.woff2".split(",").map(f=>`<link rel="preload" as="font" type="font/woff2"href="../../../static.files/${f}">`).join(""))</script><link rel="stylesheet" href="../../../static.files/normalize-9960930a.css"><link rel="stylesheet" href="../../../static.files/rustdoc-b7b9f40b.css"><meta name="rustdoc-vars" data-root-path="../../../" data-static-root-path="../../../static.files/" data-current-crate="scrapfly_sdk" data-themes="" data-resource-suffix="" data-rustdoc-version="1.95.0 (59807616e 2026-04-14)" data-channel="1.95.0" data-search-js="search-63369b7b.js" data-stringdex-js="stringdex-b897f86f.js" data-settings-js="settings-170eb4bf.js" ><script src="../../../static.files/storage-41dd4d93.js"></script><script defer src="../../../static.files/src-script-813739b1.js"></script><script defer src="../../../src-files.js"></script><script defer src="../../../static.files/main-5013f961.js"></script><noscript><link rel="stylesheet" href="../../../static.files/noscript-f7c3ffd8.css"></noscript><link rel="alternate icon" type="image/png" href="../../../static.files/favicon-32x32-eab170b8.png"><link rel="icon" type="image/svg+xml" href="../../../static.files/favicon-044be391.svg"></head><body class="rustdoc src"><a class="skip-main-content" href="#main-content">Skip to main content</a><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><div class="src-sidebar-title"><h2>Files</h2></div></nav><div class="sidebar-resizer" title="Drag to resize sidebar"></div><main><section id="main-content" class="content" tabindex="-1"><div class="main-heading"><h1><div class="sub-heading">scrapfly_sdk/config/</div>crawler.rs</h1><rustdoc-toolbar></rustdoc-toolbar></div><div class="example-wrap digits-3"><pre class="rust"><code><a href=#1 id=1 data-nosnippet>1</a><span class="doccomment">//! Crawler endpoint configuration — ported from `sdk/go/config_crawler.go`.
<a href=#2 id=2 data-nosnippet>2</a>
<a href=#3 id=3 data-nosnippet>3</a></span><span class="kw">use </span>std::collections::BTreeMap;
<a href=#4 id=4 data-nosnippet>4</a>
<a href=#5 id=5 data-nosnippet>5</a><span class="kw">use </span>serde::Serialize;
<a href=#6 id=6 data-nosnippet>6</a>
<a href=#7 id=7 data-nosnippet>7</a><span class="kw">use </span><span class="kw">crate</span>::enums::{CrawlerContentFormat, CrawlerWebhookEvent};
<a href=#8 id=8 data-nosnippet>8</a><span class="kw">use </span><span class="kw">crate</span>::error::ScrapflyError;
<a href=#9 id=9 data-nosnippet>9</a>
<a href=#10 id=10 data-nosnippet>10</a><span class="doccomment">/// Configuration for a `POST /crawl` request.
<a href=#11 id=11 data-nosnippet>11</a>///
<a href=#12 id=12 data-nosnippet>12</a>/// Every field except `url` is optional; unset fields are NOT serialized so
<a href=#13 id=13 data-nosnippet>13</a>/// the server applies its own documented defaults.
<a href=#14 id=14 data-nosnippet>14</a></span><span class="attr">#[derive(Debug, Clone, Default, Serialize)]
<a href=#15 id=15 data-nosnippet>15</a></span><span class="kw">pub struct </span>CrawlerConfig {
<a href=#16 id=16 data-nosnippet>16</a> <span class="doccomment">/// Seed URL (required, must be HTTP/HTTPS).
<a href=#17 id=17 data-nosnippet>17</a> </span><span class="kw">pub </span>url: String,
<a href=#18 id=18 data-nosnippet>18</a>
<a href=#19 id=19 data-nosnippet>19</a> <span class="doccomment">/// Max pages to crawl.
<a href=#20 id=20 data-nosnippet>20</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#21 id=21 data-nosnippet>21</a> </span><span class="kw">pub </span>page_limit: <span class="prelude-ty">Option</span><u32>,
<a href=#22 id=22 data-nosnippet>22</a> <span class="doccomment">/// Max link-follow depth.
<a href=#23 id=23 data-nosnippet>23</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#24 id=24 data-nosnippet>24</a> </span><span class="kw">pub </span>max_depth: <span class="prelude-ty">Option</span><u32>,
<a href=#25 id=25 data-nosnippet>25</a> <span class="doccomment">/// Max duration (seconds, 15..=10800).
<a href=#26 id=26 data-nosnippet>26</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#27 id=27 data-nosnippet>27</a> </span><span class="kw">pub </span>max_duration: <span class="prelude-ty">Option</span><u32>,
<a href=#28 id=28 data-nosnippet>28</a> <span class="doccomment">/// Max API credit to spend (0 = no limit).
<a href=#29 id=29 data-nosnippet>29</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#30 id=30 data-nosnippet>30</a> </span><span class="kw">pub </span>max_api_credit: <span class="prelude-ty">Option</span><u32>,
<a href=#31 id=31 data-nosnippet>31</a>
<a href=#32 id=32 data-nosnippet>32</a> <span class="doccomment">/// Exclude these URL paths (≤100 entries).
<a href=#33 id=33 data-nosnippet>33</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Vec::is_empty"</span>)]
<a href=#34 id=34 data-nosnippet>34</a> </span><span class="kw">pub </span>exclude_paths: Vec<String>,
<a href=#35 id=35 data-nosnippet>35</a> <span class="doccomment">/// Restrict crawl to these paths (≤100 entries).
<a href=#36 id=36 data-nosnippet>36</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Vec::is_empty"</span>)]
<a href=#37 id=37 data-nosnippet>37</a> </span><span class="kw">pub </span>include_only_paths: Vec<String>,
<a href=#38 id=38 data-nosnippet>38</a>
<a href=#39 id=39 data-nosnippet>39</a> <span class="doccomment">/// Ignore the seed URL's base-path restriction.
<a href=#40 id=40 data-nosnippet>40</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"is_false"</span>)]
<a href=#41 id=41 data-nosnippet>41</a> </span><span class="kw">pub </span>ignore_base_path_restriction: bool,
<a href=#42 id=42 data-nosnippet>42</a> <span class="doccomment">/// Follow links to external domains.
<a href=#43 id=43 data-nosnippet>43</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"is_false"</span>)]
<a href=#44 id=44 data-nosnippet>44</a> </span><span class="kw">pub </span>follow_external_links: bool,
<a href=#45 id=45 data-nosnippet>45</a> <span class="doccomment">/// Whitelist of external domains (≤250 entries).
<a href=#46 id=46 data-nosnippet>46</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Vec::is_empty"</span>)]
<a href=#47 id=47 data-nosnippet>47</a> </span><span class="kw">pub </span>allowed_external_domains: Vec<String>,
<a href=#48 id=48 data-nosnippet>48</a>
<a href=#49 id=49 data-nosnippet>49</a> <span class="doccomment">/// Tri-state: None = unset (server default true), Some(v) = explicit.
<a href=#50 id=50 data-nosnippet>50</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#51 id=51 data-nosnippet>51</a> </span><span class="kw">pub </span>follow_internal_subdomains: <span class="prelude-ty">Option</span><bool>,
<a href=#52 id=52 data-nosnippet>52</a> <span class="doccomment">/// Whitelist of internal subdomains (≤250 entries).
<a href=#53 id=53 data-nosnippet>53</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Vec::is_empty"</span>)]
<a href=#54 id=54 data-nosnippet>54</a> </span><span class="kw">pub </span>allowed_internal_subdomains: Vec<String>,
<a href=#55 id=55 data-nosnippet>55</a>
<a href=#56 id=56 data-nosnippet>56</a> <span class="doccomment">/// Request headers sent for every crawled page.
<a href=#57 id=57 data-nosnippet>57</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"BTreeMap::is_empty"</span>)]
<a href=#58 id=58 data-nosnippet>58</a> </span><span class="kw">pub </span>headers: BTreeMap<String, String>,
<a href=#59 id=59 data-nosnippet>59</a> <span class="doccomment">/// Delay between requests (ms, 0..=15000).
<a href=#60 id=60 data-nosnippet>60</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#61 id=61 data-nosnippet>61</a> </span><span class="kw">pub </span>delay: <span class="prelude-ty">Option</span><u32>,
<a href=#62 id=62 data-nosnippet>62</a> <span class="doccomment">/// Override User-Agent.
<a href=#63 id=63 data-nosnippet>63</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#64 id=64 data-nosnippet>64</a> </span><span class="kw">pub </span>user_agent: <span class="prelude-ty">Option</span><String>,
<a href=#65 id=65 data-nosnippet>65</a> <span class="doccomment">/// Max concurrent workers.
<a href=#66 id=66 data-nosnippet>66</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#67 id=67 data-nosnippet>67</a> </span><span class="kw">pub </span>max_concurrency: <span class="prelude-ty">Option</span><u32>,
<a href=#68 id=68 data-nosnippet>68</a> <span class="doccomment">/// Rendering delay (ms, 0..=25000).
<a href=#69 id=69 data-nosnippet>69</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#70 id=70 data-nosnippet>70</a> </span><span class="kw">pub </span>rendering_delay: <span class="prelude-ty">Option</span><u32>,
<a href=#71 id=71 data-nosnippet>71</a>
<a href=#72 id=72 data-nosnippet>72</a> <span class="doccomment">/// Honor sitemaps.
<a href=#73 id=73 data-nosnippet>73</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"is_false"</span>)]
<a href=#74 id=74 data-nosnippet>74</a> </span><span class="kw">pub </span>use_sitemaps: bool,
<a href=#75 id=75 data-nosnippet>75</a> <span class="doccomment">/// Follow `nofollow` links anyway.
<a href=#76 id=76 data-nosnippet>76</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"is_false"</span>)]
<a href=#77 id=77 data-nosnippet>77</a> </span><span class="kw">pub </span>ignore_no_follow: bool,
<a href=#78 id=78 data-nosnippet>78</a>
<a href=#79 id=79 data-nosnippet>79</a> <span class="doccomment">/// Tri-state: None = server default (true).
<a href=#80 id=80 data-nosnippet>80</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#81 id=81 data-nosnippet>81</a> </span><span class="kw">pub </span>respect_robots_txt: <span class="prelude-ty">Option</span><bool>,
<a href=#82 id=82 data-nosnippet>82</a>
<a href=#83 id=83 data-nosnippet>83</a> <span class="doccomment">/// Enable cache.
<a href=#84 id=84 data-nosnippet>84</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"is_false"</span>)]
<a href=#85 id=85 data-nosnippet>85</a> </span><span class="kw">pub </span>cache: bool,
<a href=#86 id=86 data-nosnippet>86</a> <span class="doccomment">/// Cache TTL seconds (0..=604800).
<a href=#87 id=87 data-nosnippet>87</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#88 id=88 data-nosnippet>88</a> </span><span class="kw">pub </span>cache_ttl: <span class="prelude-ty">Option</span><u32>,
<a href=#89 id=89 data-nosnippet>89</a> <span class="doccomment">/// Force cache refresh.
<a href=#90 id=90 data-nosnippet>90</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"is_false"</span>)]
<a href=#91 id=91 data-nosnippet>91</a> </span><span class="kw">pub </span>cache_clear: bool,
<a href=#92 id=92 data-nosnippet>92</a>
<a href=#93 id=93 data-nosnippet>93</a> <span class="doccomment">/// Desired content formats.
<a href=#94 id=94 data-nosnippet>94</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Vec::is_empty"</span>)]
<a href=#95 id=95 data-nosnippet>95</a> </span><span class="kw">pub </span>content_formats: Vec<CrawlerContentFormat>,
<a href=#96 id=96 data-nosnippet>96</a> <span class="doccomment">/// Inline extraction rules.
<a href=#97 id=97 data-nosnippet>97</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#98 id=98 data-nosnippet>98</a> </span><span class="kw">pub </span>extraction_rules: <span class="prelude-ty">Option</span><serde_json::Value>,
<a href=#99 id=99 data-nosnippet>99</a>
<a href=#100 id=100 data-nosnippet>100</a> <span class="doccomment">/// Enable ASP bypass.
<a href=#101 id=101 data-nosnippet>101</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"is_false"</span>)]
<a href=#102 id=102 data-nosnippet>102</a> </span><span class="kw">pub </span>asp: bool,
<a href=#103 id=103 data-nosnippet>103</a> <span class="doccomment">/// Proxy pool name.
<a href=#104 id=104 data-nosnippet>104</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#105 id=105 data-nosnippet>105</a> </span><span class="kw">pub </span>proxy_pool: <span class="prelude-ty">Option</span><String>,
<a href=#106 id=106 data-nosnippet>106</a> <span class="doccomment">/// Proxy country.
<a href=#107 id=107 data-nosnippet>107</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#108 id=108 data-nosnippet>108</a> </span><span class="kw">pub </span>country: <span class="prelude-ty">Option</span><String>,
<a href=#109 id=109 data-nosnippet>109</a>
<a href=#110 id=110 data-nosnippet>110</a> <span class="doccomment">/// Webhook name.
<a href=#111 id=111 data-nosnippet>111</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Option::is_none"</span>)]
<a href=#112 id=112 data-nosnippet>112</a> </span><span class="kw">pub </span>webhook_name: <span class="prelude-ty">Option</span><String>,
<a href=#113 id=113 data-nosnippet>113</a> <span class="doccomment">/// Webhook events.
<a href=#114 id=114 data-nosnippet>114</a> </span><span class="attr">#[serde(skip_serializing_if = <span class="string">"Vec::is_empty"</span>)]
<a href=#115 id=115 data-nosnippet>115</a> </span><span class="kw">pub </span>webhook_events: Vec<CrawlerWebhookEvent>,
<a href=#116 id=116 data-nosnippet>116</a>}
<a href=#117 id=117 data-nosnippet>117</a>
<a href=#118 id=118 data-nosnippet>118</a><span class="kw">fn </span>is_false(v: <span class="kw-2">&</span>bool) -> bool {
<a href=#119 id=119 data-nosnippet>119</a> !<span class="kw-2">*</span>v
<a href=#120 id=120 data-nosnippet>120</a>}
<a href=#121 id=121 data-nosnippet>121</a>
<a href=#122 id=122 data-nosnippet>122</a><span class="kw">impl </span>CrawlerConfig {
<a href=#123 id=123 data-nosnippet>123</a> <span class="doccomment">/// Start a builder for `url`.
<a href=#124 id=124 data-nosnippet>124</a> </span><span class="kw">pub fn </span>builder(url: <span class="kw">impl </span>Into<String>) -> CrawlerConfigBuilder {
<a href=#125 id=125 data-nosnippet>125</a> CrawlerConfigBuilder {
<a href=#126 id=126 data-nosnippet>126</a> cfg: CrawlerConfig {
<a href=#127 id=127 data-nosnippet>127</a> url: url.into(),
<a href=#128 id=128 data-nosnippet>128</a> ..Default::default()
<a href=#129 id=129 data-nosnippet>129</a> },
<a href=#130 id=130 data-nosnippet>130</a> }
<a href=#131 id=131 data-nosnippet>131</a> }
<a href=#132 id=132 data-nosnippet>132</a>
<a href=#133 id=133 data-nosnippet>133</a> <span class="doccomment">/// Validate numeric bounds + list sizes. Ported from
<a href=#134 id=134 data-nosnippet>134</a> /// `sdk/go/config_crawler.go::validateBounds`.
<a href=#135 id=135 data-nosnippet>135</a> </span><span class="kw">pub fn </span>validate(<span class="kw-2">&</span><span class="self">self</span>) -> <span class="prelude-ty">Result</span><(), ScrapflyError> {
<a href=#136 id=136 data-nosnippet>136</a> <span class="kw">if </span><span class="self">self</span>.url.is_empty() {
<a href=#137 id=137 data-nosnippet>137</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"url is required"</span>.into()));
<a href=#138 id=138 data-nosnippet>138</a> }
<a href=#139 id=139 data-nosnippet>139</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(d) = <span class="self">self</span>.max_duration {
<a href=#140 id=140 data-nosnippet>140</a> <span class="kw">if </span>!(<span class="number">15</span>..=<span class="number">10800</span>).contains(<span class="kw-2">&</span>d) {
<a href=#141 id=141 data-nosnippet>141</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="macro">format!</span>(
<a href=#142 id=142 data-nosnippet>142</a> <span class="string">"max_duration must be between 15 and 10800 seconds, got {}"</span>,
<a href=#143 id=143 data-nosnippet>143</a> d
<a href=#144 id=144 data-nosnippet>144</a> )));
<a href=#145 id=145 data-nosnippet>145</a> }
<a href=#146 id=146 data-nosnippet>146</a> }
<a href=#147 id=147 data-nosnippet>147</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(rd) = <span class="self">self</span>.rendering_delay {
<a href=#148 id=148 data-nosnippet>148</a> <span class="kw">if </span>rd > <span class="number">25000 </span>{
<a href=#149 id=149 data-nosnippet>149</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="macro">format!</span>(
<a href=#150 id=150 data-nosnippet>150</a> <span class="string">"rendering_delay must be between 0 and 25000 ms, got {}"</span>,
<a href=#151 id=151 data-nosnippet>151</a> rd
<a href=#152 id=152 data-nosnippet>152</a> )));
<a href=#153 id=153 data-nosnippet>153</a> }
<a href=#154 id=154 data-nosnippet>154</a> }
<a href=#155 id=155 data-nosnippet>155</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(delay) = <span class="self">self</span>.delay {
<a href=#156 id=156 data-nosnippet>156</a> <span class="kw">if </span>delay > <span class="number">15000 </span>{
<a href=#157 id=157 data-nosnippet>157</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="macro">format!</span>(
<a href=#158 id=158 data-nosnippet>158</a> <span class="string">"delay must be between 0 and 15000 ms, got {}"</span>,
<a href=#159 id=159 data-nosnippet>159</a> delay
<a href=#160 id=160 data-nosnippet>160</a> )));
<a href=#161 id=161 data-nosnippet>161</a> }
<a href=#162 id=162 data-nosnippet>162</a> }
<a href=#163 id=163 data-nosnippet>163</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(ttl) = <span class="self">self</span>.cache_ttl {
<a href=#164 id=164 data-nosnippet>164</a> <span class="kw">if </span>ttl > <span class="number">604800 </span>{
<a href=#165 id=165 data-nosnippet>165</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="macro">format!</span>(
<a href=#166 id=166 data-nosnippet>166</a> <span class="string">"cache_ttl must be between 0 and 604800 seconds, got {}"</span>,
<a href=#167 id=167 data-nosnippet>167</a> ttl
<a href=#168 id=168 data-nosnippet>168</a> )));
<a href=#169 id=169 data-nosnippet>169</a> }
<a href=#170 id=170 data-nosnippet>170</a> }
<a href=#171 id=171 data-nosnippet>171</a> <span class="kw">if </span><span class="self">self</span>.exclude_paths.len() > <span class="number">100 </span>{
<a href=#172 id=172 data-nosnippet>172</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="macro">format!</span>(
<a href=#173 id=173 data-nosnippet>173</a> <span class="string">"exclude_paths is limited to 100 entries, got {}"</span>,
<a href=#174 id=174 data-nosnippet>174</a> <span class="self">self</span>.exclude_paths.len()
<a href=#175 id=175 data-nosnippet>175</a> )));
<a href=#176 id=176 data-nosnippet>176</a> }
<a href=#177 id=177 data-nosnippet>177</a> <span class="kw">if </span><span class="self">self</span>.include_only_paths.len() > <span class="number">100 </span>{
<a href=#178 id=178 data-nosnippet>178</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="macro">format!</span>(
<a href=#179 id=179 data-nosnippet>179</a> <span class="string">"include_only_paths is limited to 100 entries, got {}"</span>,
<a href=#180 id=180 data-nosnippet>180</a> <span class="self">self</span>.include_only_paths.len()
<a href=#181 id=181 data-nosnippet>181</a> )));
<a href=#182 id=182 data-nosnippet>182</a> }
<a href=#183 id=183 data-nosnippet>183</a> <span class="kw">if </span>!<span class="self">self</span>.exclude_paths.is_empty() && !<span class="self">self</span>.include_only_paths.is_empty() {
<a href=#184 id=184 data-nosnippet>184</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(
<a href=#185 id=185 data-nosnippet>185</a> <span class="string">"exclude_paths and include_only_paths are mutually exclusive"</span>.into(),
<a href=#186 id=186 data-nosnippet>186</a> ));
<a href=#187 id=187 data-nosnippet>187</a> }
<a href=#188 id=188 data-nosnippet>188</a> <span class="kw">if </span><span class="self">self</span>.allowed_external_domains.len() > <span class="number">250 </span>{
<a href=#189 id=189 data-nosnippet>189</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="macro">format!</span>(
<a href=#190 id=190 data-nosnippet>190</a> <span class="string">"allowed_external_domains is limited to 250 entries, got {}"</span>,
<a href=#191 id=191 data-nosnippet>191</a> <span class="self">self</span>.allowed_external_domains.len()
<a href=#192 id=192 data-nosnippet>192</a> )));
<a href=#193 id=193 data-nosnippet>193</a> }
<a href=#194 id=194 data-nosnippet>194</a> <span class="kw">if </span><span class="self">self</span>.allowed_internal_subdomains.len() > <span class="number">250 </span>{
<a href=#195 id=195 data-nosnippet>195</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="macro">format!</span>(
<a href=#196 id=196 data-nosnippet>196</a> <span class="string">"allowed_internal_subdomains is limited to 250 entries, got {}"</span>,
<a href=#197 id=197 data-nosnippet>197</a> <span class="self">self</span>.allowed_internal_subdomains.len()
<a href=#198 id=198 data-nosnippet>198</a> )));
<a href=#199 id=199 data-nosnippet>199</a> }
<a href=#200 id=200 data-nosnippet>200</a> <span class="prelude-val">Ok</span>(())
<a href=#201 id=201 data-nosnippet>201</a> }
<a href=#202 id=202 data-nosnippet>202</a>
<a href=#203 id=203 data-nosnippet>203</a> <span class="doccomment">/// Serialize into the JSON body the crawler endpoint expects.
<a href=#204 id=204 data-nosnippet>204</a> </span><span class="kw">pub fn </span>to_json_body(<span class="kw-2">&</span><span class="self">self</span>) -> <span class="prelude-ty">Result</span><Vec<u8>, ScrapflyError> {
<a href=#205 id=205 data-nosnippet>205</a> <span class="self">self</span>.validate()<span class="question-mark">?</span>;
<a href=#206 id=206 data-nosnippet>206</a> <span class="prelude-val">Ok</span>(serde_json::to_vec(<span class="self">self</span>)<span class="question-mark">?</span>)
<a href=#207 id=207 data-nosnippet>207</a> }
<a href=#208 id=208 data-nosnippet>208</a>}
<a href=#209 id=209 data-nosnippet>209</a>
<a href=#210 id=210 data-nosnippet>210</a><span class="doccomment">/// Builder for [`CrawlerConfig`].
<a href=#211 id=211 data-nosnippet>211</a></span><span class="attr">#[derive(Debug, Clone)]
<a href=#212 id=212 data-nosnippet>212</a></span><span class="kw">pub struct </span>CrawlerConfigBuilder {
<a href=#213 id=213 data-nosnippet>213</a> cfg: CrawlerConfig,
<a href=#214 id=214 data-nosnippet>214</a>}
<a href=#215 id=215 data-nosnippet>215</a>
<a href=#216 id=216 data-nosnippet>216</a><span class="kw">impl </span>CrawlerConfigBuilder {
<a href=#217 id=217 data-nosnippet>217</a> <span class="doccomment">/// Set page limit.
<a href=#218 id=218 data-nosnippet>218</a> </span><span class="kw">pub fn </span>page_limit(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#219 id=219 data-nosnippet>219</a> <span class="self">self</span>.cfg.page_limit = <span class="prelude-val">Some</span>(v);
<a href=#220 id=220 data-nosnippet>220</a> <span class="self">self
<a href=#221 id=221 data-nosnippet>221</a> </span>}
<a href=#222 id=222 data-nosnippet>222</a> <span class="doccomment">/// Set max depth.
<a href=#223 id=223 data-nosnippet>223</a> </span><span class="kw">pub fn </span>max_depth(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#224 id=224 data-nosnippet>224</a> <span class="self">self</span>.cfg.max_depth = <span class="prelude-val">Some</span>(v);
<a href=#225 id=225 data-nosnippet>225</a> <span class="self">self
<a href=#226 id=226 data-nosnippet>226</a> </span>}
<a href=#227 id=227 data-nosnippet>227</a> <span class="doccomment">/// Set max duration (seconds).
<a href=#228 id=228 data-nosnippet>228</a> </span><span class="kw">pub fn </span>max_duration(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#229 id=229 data-nosnippet>229</a> <span class="self">self</span>.cfg.max_duration = <span class="prelude-val">Some</span>(v);
<a href=#230 id=230 data-nosnippet>230</a> <span class="self">self
<a href=#231 id=231 data-nosnippet>231</a> </span>}
<a href=#232 id=232 data-nosnippet>232</a> <span class="doccomment">/// Set max API credit.
<a href=#233 id=233 data-nosnippet>233</a> </span><span class="kw">pub fn </span>max_api_credit(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#234 id=234 data-nosnippet>234</a> <span class="self">self</span>.cfg.max_api_credit = <span class="prelude-val">Some</span>(v);
<a href=#235 id=235 data-nosnippet>235</a> <span class="self">self
<a href=#236 id=236 data-nosnippet>236</a> </span>}
<a href=#237 id=237 data-nosnippet>237</a> <span class="doccomment">/// Set exclude paths.
<a href=#238 id=238 data-nosnippet>238</a> </span><span class="kw">pub fn </span>exclude_paths(<span class="kw-2">mut </span><span class="self">self</span>, v: Vec<String>) -> <span class="self">Self </span>{
<a href=#239 id=239 data-nosnippet>239</a> <span class="self">self</span>.cfg.exclude_paths = v;
<a href=#240 id=240 data-nosnippet>240</a> <span class="self">self
<a href=#241 id=241 data-nosnippet>241</a> </span>}
<a href=#242 id=242 data-nosnippet>242</a> <span class="doccomment">/// Set include-only paths.
<a href=#243 id=243 data-nosnippet>243</a> </span><span class="kw">pub fn </span>include_only_paths(<span class="kw-2">mut </span><span class="self">self</span>, v: Vec<String>) -> <span class="self">Self </span>{
<a href=#244 id=244 data-nosnippet>244</a> <span class="self">self</span>.cfg.include_only_paths = v;
<a href=#245 id=245 data-nosnippet>245</a> <span class="self">self
<a href=#246 id=246 data-nosnippet>246</a> </span>}
<a href=#247 id=247 data-nosnippet>247</a> <span class="doccomment">/// Ignore base-path restriction.
<a href=#248 id=248 data-nosnippet>248</a> </span><span class="kw">pub fn </span>ignore_base_path_restriction(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#249 id=249 data-nosnippet>249</a> <span class="self">self</span>.cfg.ignore_base_path_restriction = v;
<a href=#250 id=250 data-nosnippet>250</a> <span class="self">self
<a href=#251 id=251 data-nosnippet>251</a> </span>}
<a href=#252 id=252 data-nosnippet>252</a> <span class="doccomment">/// Follow external links.
<a href=#253 id=253 data-nosnippet>253</a> </span><span class="kw">pub fn </span>follow_external_links(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#254 id=254 data-nosnippet>254</a> <span class="self">self</span>.cfg.follow_external_links = v;
<a href=#255 id=255 data-nosnippet>255</a> <span class="self">self
<a href=#256 id=256 data-nosnippet>256</a> </span>}
<a href=#257 id=257 data-nosnippet>257</a> <span class="doccomment">/// Set allowed external domains.
<a href=#258 id=258 data-nosnippet>258</a> </span><span class="kw">pub fn </span>allowed_external_domains(<span class="kw-2">mut </span><span class="self">self</span>, v: Vec<String>) -> <span class="self">Self </span>{
<a href=#259 id=259 data-nosnippet>259</a> <span class="self">self</span>.cfg.allowed_external_domains = v;
<a href=#260 id=260 data-nosnippet>260</a> <span class="self">self
<a href=#261 id=261 data-nosnippet>261</a> </span>}
<a href=#262 id=262 data-nosnippet>262</a> <span class="doccomment">/// Tri-state follow-internal-subdomains.
<a href=#263 id=263 data-nosnippet>263</a> </span><span class="kw">pub fn </span>follow_internal_subdomains(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#264 id=264 data-nosnippet>264</a> <span class="self">self</span>.cfg.follow_internal_subdomains = <span class="prelude-val">Some</span>(v);
<a href=#265 id=265 data-nosnippet>265</a> <span class="self">self
<a href=#266 id=266 data-nosnippet>266</a> </span>}
<a href=#267 id=267 data-nosnippet>267</a> <span class="doccomment">/// Set allowed internal subdomains.
<a href=#268 id=268 data-nosnippet>268</a> </span><span class="kw">pub fn </span>allowed_internal_subdomains(<span class="kw-2">mut </span><span class="self">self</span>, v: Vec<String>) -> <span class="self">Self </span>{
<a href=#269 id=269 data-nosnippet>269</a> <span class="self">self</span>.cfg.allowed_internal_subdomains = v;
<a href=#270 id=270 data-nosnippet>270</a> <span class="self">self
<a href=#271 id=271 data-nosnippet>271</a> </span>}
<a href=#272 id=272 data-nosnippet>272</a> <span class="doccomment">/// Add header.
<a href=#273 id=273 data-nosnippet>273</a> </span><span class="kw">pub fn </span>header(<span class="kw-2">mut </span><span class="self">self</span>, k: <span class="kw">impl </span>Into<String>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#274 id=274 data-nosnippet>274</a> <span class="self">self</span>.cfg.headers.insert(k.into(), v.into());
<a href=#275 id=275 data-nosnippet>275</a> <span class="self">self
<a href=#276 id=276 data-nosnippet>276</a> </span>}
<a href=#277 id=277 data-nosnippet>277</a> <span class="doccomment">/// Set delay (ms).
<a href=#278 id=278 data-nosnippet>278</a> </span><span class="kw">pub fn </span>delay(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#279 id=279 data-nosnippet>279</a> <span class="self">self</span>.cfg.delay = <span class="prelude-val">Some</span>(v);
<a href=#280 id=280 data-nosnippet>280</a> <span class="self">self
<a href=#281 id=281 data-nosnippet>281</a> </span>}
<a href=#282 id=282 data-nosnippet>282</a> <span class="doccomment">/// Set User-Agent.
<a href=#283 id=283 data-nosnippet>283</a> </span><span class="kw">pub fn </span>user_agent(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#284 id=284 data-nosnippet>284</a> <span class="self">self</span>.cfg.user_agent = <span class="prelude-val">Some</span>(v.into());
<a href=#285 id=285 data-nosnippet>285</a> <span class="self">self
<a href=#286 id=286 data-nosnippet>286</a> </span>}
<a href=#287 id=287 data-nosnippet>287</a> <span class="doccomment">/// Set max concurrency.
<a href=#288 id=288 data-nosnippet>288</a> </span><span class="kw">pub fn </span>max_concurrency(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#289 id=289 data-nosnippet>289</a> <span class="self">self</span>.cfg.max_concurrency = <span class="prelude-val">Some</span>(v);
<a href=#290 id=290 data-nosnippet>290</a> <span class="self">self
<a href=#291 id=291 data-nosnippet>291</a> </span>}
<a href=#292 id=292 data-nosnippet>292</a> <span class="doccomment">/// Set rendering delay (ms).
<a href=#293 id=293 data-nosnippet>293</a> </span><span class="kw">pub fn </span>rendering_delay(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#294 id=294 data-nosnippet>294</a> <span class="self">self</span>.cfg.rendering_delay = <span class="prelude-val">Some</span>(v);
<a href=#295 id=295 data-nosnippet>295</a> <span class="self">self
<a href=#296 id=296 data-nosnippet>296</a> </span>}
<a href=#297 id=297 data-nosnippet>297</a> <span class="doccomment">/// Honor sitemaps.
<a href=#298 id=298 data-nosnippet>298</a> </span><span class="kw">pub fn </span>use_sitemaps(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#299 id=299 data-nosnippet>299</a> <span class="self">self</span>.cfg.use_sitemaps = v;
<a href=#300 id=300 data-nosnippet>300</a> <span class="self">self
<a href=#301 id=301 data-nosnippet>301</a> </span>}
<a href=#302 id=302 data-nosnippet>302</a> <span class="doccomment">/// Ignore nofollow.
<a href=#303 id=303 data-nosnippet>303</a> </span><span class="kw">pub fn </span>ignore_no_follow(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#304 id=304 data-nosnippet>304</a> <span class="self">self</span>.cfg.ignore_no_follow = v;
<a href=#305 id=305 data-nosnippet>305</a> <span class="self">self
<a href=#306 id=306 data-nosnippet>306</a> </span>}
<a href=#307 id=307 data-nosnippet>307</a> <span class="doccomment">/// Tri-state respect-robots-txt.
<a href=#308 id=308 data-nosnippet>308</a> </span><span class="kw">pub fn </span>respect_robots_txt(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#309 id=309 data-nosnippet>309</a> <span class="self">self</span>.cfg.respect_robots_txt = <span class="prelude-val">Some</span>(v);
<a href=#310 id=310 data-nosnippet>310</a> <span class="self">self
<a href=#311 id=311 data-nosnippet>311</a> </span>}
<a href=#312 id=312 data-nosnippet>312</a> <span class="doccomment">/// Enable cache.
<a href=#313 id=313 data-nosnippet>313</a> </span><span class="kw">pub fn </span>cache(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#314 id=314 data-nosnippet>314</a> <span class="self">self</span>.cfg.cache = v;
<a href=#315 id=315 data-nosnippet>315</a> <span class="self">self
<a href=#316 id=316 data-nosnippet>316</a> </span>}
<a href=#317 id=317 data-nosnippet>317</a> <span class="doccomment">/// Cache TTL.
<a href=#318 id=318 data-nosnippet>318</a> </span><span class="kw">pub fn </span>cache_ttl(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#319 id=319 data-nosnippet>319</a> <span class="self">self</span>.cfg.cache_ttl = <span class="prelude-val">Some</span>(v);
<a href=#320 id=320 data-nosnippet>320</a> <span class="self">self
<a href=#321 id=321 data-nosnippet>321</a> </span>}
<a href=#322 id=322 data-nosnippet>322</a> <span class="doccomment">/// Force cache refresh.
<a href=#323 id=323 data-nosnippet>323</a> </span><span class="kw">pub fn </span>cache_clear(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#324 id=324 data-nosnippet>324</a> <span class="self">self</span>.cfg.cache_clear = v;
<a href=#325 id=325 data-nosnippet>325</a> <span class="self">self
<a href=#326 id=326 data-nosnippet>326</a> </span>}
<a href=#327 id=327 data-nosnippet>327</a> <span class="doccomment">/// Add content format.
<a href=#328 id=328 data-nosnippet>328</a> </span><span class="kw">pub fn </span>content_format(<span class="kw-2">mut </span><span class="self">self</span>, v: CrawlerContentFormat) -> <span class="self">Self </span>{
<a href=#329 id=329 data-nosnippet>329</a> <span class="self">self</span>.cfg.content_formats.push(v);
<a href=#330 id=330 data-nosnippet>330</a> <span class="self">self
<a href=#331 id=331 data-nosnippet>331</a> </span>}
<a href=#332 id=332 data-nosnippet>332</a> <span class="doccomment">/// Set extraction rules.
<a href=#333 id=333 data-nosnippet>333</a> </span><span class="kw">pub fn </span>extraction_rules(<span class="kw-2">mut </span><span class="self">self</span>, v: serde_json::Value) -> <span class="self">Self </span>{
<a href=#334 id=334 data-nosnippet>334</a> <span class="self">self</span>.cfg.extraction_rules = <span class="prelude-val">Some</span>(v);
<a href=#335 id=335 data-nosnippet>335</a> <span class="self">self
<a href=#336 id=336 data-nosnippet>336</a> </span>}
<a href=#337 id=337 data-nosnippet>337</a> <span class="doccomment">/// Enable ASP.
<a href=#338 id=338 data-nosnippet>338</a> </span><span class="kw">pub fn </span>asp(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#339 id=339 data-nosnippet>339</a> <span class="self">self</span>.cfg.asp = v;
<a href=#340 id=340 data-nosnippet>340</a> <span class="self">self
<a href=#341 id=341 data-nosnippet>341</a> </span>}
<a href=#342 id=342 data-nosnippet>342</a> <span class="doccomment">/// Set proxy pool name.
<a href=#343 id=343 data-nosnippet>343</a> </span><span class="kw">pub fn </span>proxy_pool(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#344 id=344 data-nosnippet>344</a> <span class="self">self</span>.cfg.proxy_pool = <span class="prelude-val">Some</span>(v.into());
<a href=#345 id=345 data-nosnippet>345</a> <span class="self">self
<a href=#346 id=346 data-nosnippet>346</a> </span>}
<a href=#347 id=347 data-nosnippet>347</a> <span class="doccomment">/// Set country.
<a href=#348 id=348 data-nosnippet>348</a> </span><span class="kw">pub fn </span>country(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#349 id=349 data-nosnippet>349</a> <span class="self">self</span>.cfg.country = <span class="prelude-val">Some</span>(v.into());
<a href=#350 id=350 data-nosnippet>350</a> <span class="self">self
<a href=#351 id=351 data-nosnippet>351</a> </span>}
<a href=#352 id=352 data-nosnippet>352</a> <span class="doccomment">/// Set webhook name.
<a href=#353 id=353 data-nosnippet>353</a> </span><span class="kw">pub fn </span>webhook_name(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#354 id=354 data-nosnippet>354</a> <span class="self">self</span>.cfg.webhook_name = <span class="prelude-val">Some</span>(v.into());
<a href=#355 id=355 data-nosnippet>355</a> <span class="self">self
<a href=#356 id=356 data-nosnippet>356</a> </span>}
<a href=#357 id=357 data-nosnippet>357</a> <span class="doccomment">/// Add webhook event.
<a href=#358 id=358 data-nosnippet>358</a> </span><span class="kw">pub fn </span>webhook_event(<span class="kw-2">mut </span><span class="self">self</span>, v: CrawlerWebhookEvent) -> <span class="self">Self </span>{
<a href=#359 id=359 data-nosnippet>359</a> <span class="self">self</span>.cfg.webhook_events.push(v);
<a href=#360 id=360 data-nosnippet>360</a> <span class="self">self
<a href=#361 id=361 data-nosnippet>361</a> </span>}
<a href=#362 id=362 data-nosnippet>362</a> <span class="doccomment">/// Finalize the builder.
<a href=#363 id=363 data-nosnippet>363</a> </span><span class="kw">pub fn </span>build(<span class="self">self</span>) -> <span class="prelude-ty">Result</span><CrawlerConfig, ScrapflyError> {
<a href=#364 id=364 data-nosnippet>364</a> <span class="self">self</span>.cfg.validate()<span class="question-mark">?</span>;
<a href=#365 id=365 data-nosnippet>365</a> <span class="prelude-val">Ok</span>(<span class="self">self</span>.cfg)
<a href=#366 id=366 data-nosnippet>366</a> }
<a href=#367 id=367 data-nosnippet>367</a>}
</code></pre></div></section></main></body></html>