scrapfly-sdk 0.2.4

Async Rust client for the Scrapfly web scraping, screenshot, extraction and crawler APIs
Documentation
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `src/crawler.rs`."><title>crawler.rs - source</title><script>if(window.location.protocol!=="file:")document.head.insertAdjacentHTML("beforeend","SourceSerif4-Regular-6b053e98.ttf.woff2,FiraSans-Italic-81dc35de.woff2,FiraSans-Regular-0fe48ade.woff2,FiraSans-MediumItalic-ccf7e434.woff2,FiraSans-Medium-e1aa3f0a.woff2,SourceCodePro-Regular-8badfe75.ttf.woff2,SourceCodePro-Semibold-aa29a496.ttf.woff2".split(",").map(f=>`<link rel="preload" as="font" type="font/woff2"href="../../static.files/${f}">`).join(""))</script><link rel="stylesheet" href="../../static.files/normalize-9960930a.css"><link rel="stylesheet" href="../../static.files/rustdoc-b7b9f40b.css"><meta name="rustdoc-vars" data-root-path="../../" data-static-root-path="../../static.files/" data-current-crate="scrapfly_sdk" data-themes="" data-resource-suffix="" data-rustdoc-version="1.95.0 (59807616e 2026-04-14)" data-channel="1.95.0" data-search-js="search-63369b7b.js" data-stringdex-js="stringdex-b897f86f.js" data-settings-js="settings-170eb4bf.js" ><script src="../../static.files/storage-41dd4d93.js"></script><script defer src="../../static.files/src-script-813739b1.js"></script><script defer src="../../src-files.js"></script><script defer src="../../static.files/main-5013f961.js"></script><noscript><link rel="stylesheet" href="../../static.files/noscript-f7c3ffd8.css"></noscript><link rel="alternate icon" type="image/png" href="../../static.files/favicon-32x32-eab170b8.png"><link rel="icon" type="image/svg+xml" href="../../static.files/favicon-044be391.svg"></head><body class="rustdoc src"><a class="skip-main-content" href="#main-content">Skip to main content</a><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><div class="src-sidebar-title"><h2>Files</h2></div></nav><div class="sidebar-resizer" title="Drag to resize sidebar"></div><main><section id="main-content" class="content" tabindex="-1"><div class="main-heading"><h1><div class="sub-heading">scrapfly_sdk/</div>crawler.rs</h1><rustdoc-toolbar></rustdoc-toolbar></div><div class="example-wrap digits-3"><pre class="rust"><code><a href=#1 id=1 data-nosnippet>1</a><span class="doccomment">//! High-level [`Crawl`] wrapper — port of `sdk/go/crawl.go`.
<a href=#2 id=2 data-nosnippet>2</a>
<a href=#3 id=3 data-nosnippet>3</a></span><span class="kw">use </span>std::time::{Duration, Instant};
<a href=#4 id=4 data-nosnippet>4</a>
<a href=#5 id=5 data-nosnippet>5</a><span class="kw">use </span><span class="kw">crate</span>::client::Client;
<a href=#6 id=6 data-nosnippet>6</a><span class="kw">use </span><span class="kw">crate</span>::config::crawler::CrawlerConfig;
<a href=#7 id=7 data-nosnippet>7</a><span class="kw">use </span><span class="kw">crate</span>::enums::CrawlerContentFormat;
<a href=#8 id=8 data-nosnippet>8</a><span class="kw">use </span><span class="kw">crate</span>::error::ScrapflyError;
<a href=#9 id=9 data-nosnippet>9</a><span class="kw">use </span><span class="kw">crate</span>::result::crawler::{
<a href=#10 id=10 data-nosnippet>10</a>    CrawlContent, CrawlerArtifact, CrawlerArtifactType, CrawlerContents, CrawlerStatus, CrawlerUrls,
<a href=#11 id=11 data-nosnippet>11</a>};
<a href=#12 id=12 data-nosnippet>12</a>
<a href=#13 id=13 data-nosnippet>13</a><span class="doccomment">/// Polling options for [`Crawl::wait`].
<a href=#14 id=14 data-nosnippet>14</a></span><span class="attr">#[derive(Debug, Clone)]
<a href=#15 id=15 data-nosnippet>15</a></span><span class="kw">pub struct </span>WaitOptions {
<a href=#16 id=16 data-nosnippet>16</a>    <span class="doccomment">/// How often to poll (default 5s).
<a href=#17 id=17 data-nosnippet>17</a>    </span><span class="kw">pub </span>poll_interval: Duration,
<a href=#18 id=18 data-nosnippet>18</a>    <span class="doccomment">/// Optional deadline; `None` means wait forever.
<a href=#19 id=19 data-nosnippet>19</a>    </span><span class="kw">pub </span>max_wait: <span class="prelude-ty">Option</span>&lt;Duration&gt;,
<a href=#20 id=20 data-nosnippet>20</a>    <span class="doccomment">/// Verbose logging (currently a no-op — reserved for future use).
<a href=#21 id=21 data-nosnippet>21</a>    </span><span class="kw">pub </span>verbose: bool,
<a href=#22 id=22 data-nosnippet>22</a>    <span class="doccomment">/// Return `Ok(())` instead of `CrawlerCancelled` when the job
<a href=#23 id=23 data-nosnippet>23</a>    /// terminates in the CANCELLED state. Useful for the
<a href=#24 id=24 data-nosnippet>24</a>    /// cancel-then-wait pattern.
<a href=#25 id=25 data-nosnippet>25</a>    </span><span class="kw">pub </span>allow_cancelled: bool,
<a href=#26 id=26 data-nosnippet>26</a>}
<a href=#27 id=27 data-nosnippet>27</a>
<a href=#28 id=28 data-nosnippet>28</a><span class="kw">impl </span>Default <span class="kw">for </span>WaitOptions {
<a href=#29 id=29 data-nosnippet>29</a>    <span class="kw">fn </span>default() -&gt; <span class="self">Self </span>{
<a href=#30 id=30 data-nosnippet>30</a>        <span class="self">Self </span>{
<a href=#31 id=31 data-nosnippet>31</a>            poll_interval: Duration::from_secs(<span class="number">5</span>),
<a href=#32 id=32 data-nosnippet>32</a>            max_wait: <span class="prelude-val">None</span>,
<a href=#33 id=33 data-nosnippet>33</a>            verbose: <span class="bool-val">false</span>,
<a href=#34 id=34 data-nosnippet>34</a>            allow_cancelled: <span class="bool-val">false</span>,
<a href=#35 id=35 data-nosnippet>35</a>        }
<a href=#36 id=36 data-nosnippet>36</a>    }
<a href=#37 id=37 data-nosnippet>37</a>}
<a href=#38 id=38 data-nosnippet>38</a>
<a href=#39 id=39 data-nosnippet>39</a><span class="doccomment">/// High-level crawler lifecycle wrapper. Holds a borrow of the [`Client`]
<a href=#40 id=40 data-nosnippet>40</a>/// and caches the last status + downloaded artifacts.
<a href=#41 id=41 data-nosnippet>41</a></span><span class="kw">pub struct </span>Crawl&lt;<span class="lifetime">'a</span>&gt; {
<a href=#42 id=42 data-nosnippet>42</a>    client: <span class="kw-2">&amp;</span><span class="lifetime">'a </span>Client,
<a href=#43 id=43 data-nosnippet>43</a>    config: CrawlerConfig,
<a href=#44 id=44 data-nosnippet>44</a>    uuid: <span class="prelude-ty">Option</span>&lt;String&gt;,
<a href=#45 id=45 data-nosnippet>45</a>    cached_status: <span class="prelude-ty">Option</span>&lt;CrawlerStatus&gt;,
<a href=#46 id=46 data-nosnippet>46</a>    cached_warc: <span class="prelude-ty">Option</span>&lt;CrawlerArtifact&gt;,
<a href=#47 id=47 data-nosnippet>47</a>    cached_har: <span class="prelude-ty">Option</span>&lt;CrawlerArtifact&gt;,
<a href=#48 id=48 data-nosnippet>48</a>}
<a href=#49 id=49 data-nosnippet>49</a>
<a href=#50 id=50 data-nosnippet>50</a><span class="kw">impl</span>&lt;<span class="lifetime">'a</span>&gt; Crawl&lt;<span class="lifetime">'a</span>&gt; {
<a href=#51 id=51 data-nosnippet>51</a>    <span class="doccomment">/// Wrap a [`CrawlerConfig`] without starting the job.
<a href=#52 id=52 data-nosnippet>52</a>    </span><span class="kw">pub fn </span>new(client: <span class="kw-2">&amp;</span><span class="lifetime">'a </span>Client, config: CrawlerConfig) -&gt; <span class="self">Self </span>{
<a href=#53 id=53 data-nosnippet>53</a>        <span class="self">Self </span>{
<a href=#54 id=54 data-nosnippet>54</a>            client,
<a href=#55 id=55 data-nosnippet>55</a>            config,
<a href=#56 id=56 data-nosnippet>56</a>            uuid: <span class="prelude-val">None</span>,
<a href=#57 id=57 data-nosnippet>57</a>            cached_status: <span class="prelude-val">None</span>,
<a href=#58 id=58 data-nosnippet>58</a>            cached_warc: <span class="prelude-val">None</span>,
<a href=#59 id=59 data-nosnippet>59</a>            cached_har: <span class="prelude-val">None</span>,
<a href=#60 id=60 data-nosnippet>60</a>        }
<a href=#61 id=61 data-nosnippet>61</a>    }
<a href=#62 id=62 data-nosnippet>62</a>
<a href=#63 id=63 data-nosnippet>63</a>    <span class="doccomment">/// Job UUID (empty string before [`Crawl::start`]).
<a href=#64 id=64 data-nosnippet>64</a>    </span><span class="kw">pub fn </span>uuid(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="kw-2">&amp;</span>str {
<a href=#65 id=65 data-nosnippet>65</a>        <span class="self">self</span>.uuid.as_deref().unwrap_or(<span class="string">""</span>)
<a href=#66 id=66 data-nosnippet>66</a>    }
<a href=#67 id=67 data-nosnippet>67</a>
<a href=#68 id=68 data-nosnippet>68</a>    <span class="doccomment">/// Whether `start()` has been called successfully.
<a href=#69 id=69 data-nosnippet>69</a>    </span><span class="kw">pub fn </span>started(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; bool {
<a href=#70 id=70 data-nosnippet>70</a>        <span class="self">self</span>.uuid.is_some()
<a href=#71 id=71 data-nosnippet>71</a>    }
<a href=#72 id=72 data-nosnippet>72</a>
<a href=#73 id=73 data-nosnippet>73</a>    <span class="doccomment">/// Schedule the crawler job. Returns `CrawlerAlreadyStarted` on re-entry.
<a href=#74 id=74 data-nosnippet>74</a>    </span><span class="kw">pub async fn </span>start(<span class="kw-2">&amp;mut </span><span class="self">self</span>) -&gt; <span class="prelude-ty">Result</span>&lt;(), ScrapflyError&gt; {
<a href=#75 id=75 data-nosnippet>75</a>        <span class="kw">if </span><span class="self">self</span>.uuid.is_some() {
<a href=#76 id=76 data-nosnippet>76</a>            <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::CrawlerAlreadyStarted);
<a href=#77 id=77 data-nosnippet>77</a>        }
<a href=#78 id=78 data-nosnippet>78</a>        <span class="kw">let </span>resp = <span class="self">self</span>.client.start_crawl(<span class="kw-2">&amp;</span><span class="self">self</span>.config).<span class="kw">await</span><span class="question-mark">?</span>;
<a href=#79 id=79 data-nosnippet>79</a>        <span class="self">self</span>.uuid = <span class="prelude-val">Some</span>(resp.crawler_uuid);
<a href=#80 id=80 data-nosnippet>80</a>        <span class="prelude-val">Ok</span>(())
<a href=#81 id=81 data-nosnippet>81</a>    }
<a href=#82 id=82 data-nosnippet>82</a>
<a href=#83 id=83 data-nosnippet>83</a>    <span class="kw">fn </span>uuid_required(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="prelude-ty">Result</span>&lt;<span class="kw-2">&amp;</span>str, ScrapflyError&gt; {
<a href=#84 id=84 data-nosnippet>84</a>        <span class="kw">match </span><span class="kw-2">&amp;</span><span class="self">self</span>.uuid {
<a href=#85 id=85 data-nosnippet>85</a>            <span class="prelude-val">Some</span>(u) =&gt; <span class="prelude-val">Ok</span>(u.as_str()),
<a href=#86 id=86 data-nosnippet>86</a>            <span class="prelude-val">None </span>=&gt; <span class="prelude-val">Err</span>(ScrapflyError::CrawlerNotStarted),
<a href=#87 id=87 data-nosnippet>87</a>        }
<a href=#88 id=88 data-nosnippet>88</a>    }
<a href=#89 id=89 data-nosnippet>89</a>
<a href=#90 id=90 data-nosnippet>90</a>    <span class="doccomment">/// Fetch the status, optionally using the cached copy.
<a href=#91 id=91 data-nosnippet>91</a>    </span><span class="kw">pub async fn </span>status(<span class="kw-2">&amp;mut </span><span class="self">self</span>, refresh: bool) -&gt; <span class="prelude-ty">Result</span>&lt;<span class="kw-2">&amp;</span>CrawlerStatus, ScrapflyError&gt; {
<a href=#92 id=92 data-nosnippet>92</a>        <span class="kw">let </span>uuid = <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>.to_string();
<a href=#93 id=93 data-nosnippet>93</a>        <span class="kw">if </span>refresh || <span class="self">self</span>.cached_status.is_none() {
<a href=#94 id=94 data-nosnippet>94</a>            <span class="kw">let </span>s = <span class="self">self</span>.client.crawl_status(<span class="kw-2">&amp;</span>uuid).<span class="kw">await</span><span class="question-mark">?</span>;
<a href=#95 id=95 data-nosnippet>95</a>            <span class="self">self</span>.cached_status = <span class="prelude-val">Some</span>(s);
<a href=#96 id=96 data-nosnippet>96</a>        }
<a href=#97 id=97 data-nosnippet>97</a>        <span class="kw">match </span><span class="kw-2">&amp;</span><span class="self">self</span>.cached_status {
<a href=#98 id=98 data-nosnippet>98</a>            <span class="prelude-val">Some</span>(s) =&gt; <span class="prelude-val">Ok</span>(s),
<a href=#99 id=99 data-nosnippet>99</a>            <span class="prelude-val">None </span>=&gt; <span class="prelude-val">Err</span>(ScrapflyError::CrawlerNotStarted),
<a href=#100 id=100 data-nosnippet>100</a>        }
<a href=#101 id=101 data-nosnippet>101</a>    }
<a href=#102 id=102 data-nosnippet>102</a>
<a href=#103 id=103 data-nosnippet>103</a>    <span class="doccomment">/// Poll status until the job reaches a terminal state.
<a href=#104 id=104 data-nosnippet>104</a>    </span><span class="kw">pub async fn </span>wait(<span class="kw-2">&amp;mut </span><span class="self">self</span>, opts: WaitOptions) -&gt; <span class="prelude-ty">Result</span>&lt;(), ScrapflyError&gt; {
<a href=#105 id=105 data-nosnippet>105</a>        <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>;
<a href=#106 id=106 data-nosnippet>106</a>        <span class="kw">let </span>deadline = opts.max_wait.map(|d| Instant::now() + d);
<a href=#107 id=107 data-nosnippet>107</a>        <span class="kw">loop </span>{
<a href=#108 id=108 data-nosnippet>108</a>            <span class="kw">let </span>status = <span class="self">self</span>.status(<span class="bool-val">true</span>).<span class="kw">await</span><span class="question-mark">?</span>.clone();
<a href=#109 id=109 data-nosnippet>109</a>            <span class="kw">if </span>status.is_finished || status.is_cancelled() {
<a href=#110 id=110 data-nosnippet>110</a>                <span class="kw">if </span>status.is_failed() {
<a href=#111 id=111 data-nosnippet>111</a>                    <span class="kw">let </span>reason = status.state.stop_reason.clone().unwrap_or_default();
<a href=#112 id=112 data-nosnippet>112</a>                    <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::CrawlerFailed(<span class="kw">crate</span>::error::ApiError {
<a href=#113 id=113 data-nosnippet>113</a>                        message: <span class="macro">format!</span>(<span class="string">"crawl failed (stop_reason={})"</span>, reason),
<a href=#114 id=114 data-nosnippet>114</a>                        ..Default::default()
<a href=#115 id=115 data-nosnippet>115</a>                    }));
<a href=#116 id=116 data-nosnippet>116</a>                }
<a href=#117 id=117 data-nosnippet>117</a>                <span class="kw">if </span>status.is_cancelled() {
<a href=#118 id=118 data-nosnippet>118</a>                    <span class="kw">if </span>opts.allow_cancelled {
<a href=#119 id=119 data-nosnippet>119</a>                        <span class="kw">return </span><span class="prelude-val">Ok</span>(());
<a href=#120 id=120 data-nosnippet>120</a>                    }
<a href=#121 id=121 data-nosnippet>121</a>                    <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::CrawlerCancelled);
<a href=#122 id=122 data-nosnippet>122</a>                }
<a href=#123 id=123 data-nosnippet>123</a>                <span class="kw">return </span><span class="prelude-val">Ok</span>(());
<a href=#124 id=124 data-nosnippet>124</a>            }
<a href=#125 id=125 data-nosnippet>125</a>            <span class="kw">if let </span><span class="prelude-val">Some</span>(d) = deadline {
<a href=#126 id=126 data-nosnippet>126</a>                <span class="kw">if </span>Instant::now() + opts.poll_interval &gt; d {
<a href=#127 id=127 data-nosnippet>127</a>                    <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::CrawlerTimeout);
<a href=#128 id=128 data-nosnippet>128</a>                }
<a href=#129 id=129 data-nosnippet>129</a>            }
<a href=#130 id=130 data-nosnippet>130</a>            tokio::time::sleep(opts.poll_interval).<span class="kw">await</span>;
<a href=#131 id=131 data-nosnippet>131</a>        }
<a href=#132 id=132 data-nosnippet>132</a>    }
<a href=#133 id=133 data-nosnippet>133</a>
<a href=#134 id=134 data-nosnippet>134</a>    <span class="doccomment">/// Cancel the running crawl. No-op if already finished server-side.
<a href=#135 id=135 data-nosnippet>135</a>    </span><span class="kw">pub async fn </span>cancel(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="prelude-ty">Result</span>&lt;(), ScrapflyError&gt; {
<a href=#136 id=136 data-nosnippet>136</a>        <span class="kw">let </span>uuid = <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>;
<a href=#137 id=137 data-nosnippet>137</a>        <span class="self">self</span>.client.crawl_cancel(uuid).<span class="kw">await
<a href=#138 id=138 data-nosnippet>138</a>    </span>}
<a href=#139 id=139 data-nosnippet>139</a>
<a href=#140 id=140 data-nosnippet>140</a>    <span class="doccomment">/// Paginated URL listing.
<a href=#141 id=141 data-nosnippet>141</a>    </span><span class="kw">pub async fn </span>urls(
<a href=#142 id=142 data-nosnippet>142</a>        <span class="kw-2">&amp;</span><span class="self">self</span>,
<a href=#143 id=143 data-nosnippet>143</a>        status_filter: <span class="prelude-ty">Option</span>&lt;<span class="kw-2">&amp;</span>str&gt;,
<a href=#144 id=144 data-nosnippet>144</a>        page: u32,
<a href=#145 id=145 data-nosnippet>145</a>        per_page: u32,
<a href=#146 id=146 data-nosnippet>146</a>    ) -&gt; <span class="prelude-ty">Result</span>&lt;CrawlerUrls, ScrapflyError&gt; {
<a href=#147 id=147 data-nosnippet>147</a>        <span class="kw">let </span>uuid = <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>;
<a href=#148 id=148 data-nosnippet>148</a>        <span class="self">self</span>.client
<a href=#149 id=149 data-nosnippet>149</a>            .crawl_urls(uuid, status_filter, page, per_page)
<a href=#150 id=150 data-nosnippet>150</a>            .<span class="kw">await
<a href=#151 id=151 data-nosnippet>151</a>    </span>}
<a href=#152 id=152 data-nosnippet>152</a>
<a href=#153 id=153 data-nosnippet>153</a>    <span class="doccomment">/// Read a single URL's content and wrap it in a [`CrawlContent`]. Returns
<a href=#154 id=154 data-nosnippet>154</a>    /// `Ok(None)` when the URL isn't part of the crawl.
<a href=#155 id=155 data-nosnippet>155</a>    </span><span class="kw">pub async fn </span>read(
<a href=#156 id=156 data-nosnippet>156</a>        <span class="kw-2">&amp;</span><span class="self">self</span>,
<a href=#157 id=157 data-nosnippet>157</a>        target_url: <span class="kw-2">&amp;</span>str,
<a href=#158 id=158 data-nosnippet>158</a>        format: CrawlerContentFormat,
<a href=#159 id=159 data-nosnippet>159</a>    ) -&gt; <span class="prelude-ty">Result</span>&lt;<span class="prelude-ty">Option</span>&lt;CrawlContent&gt;, ScrapflyError&gt; {
<a href=#160 id=160 data-nosnippet>160</a>        <span class="kw">let </span>uuid = <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>.to_string();
<a href=#161 id=161 data-nosnippet>161</a>        <span class="kw">match </span><span class="self">self
<a href=#162 id=162 data-nosnippet>162</a>            </span>.client
<a href=#163 id=163 data-nosnippet>163</a>            .crawl_contents_plain(<span class="kw-2">&amp;</span>uuid, target_url, format)
<a href=#164 id=164 data-nosnippet>164</a>            .<span class="kw">await
<a href=#165 id=165 data-nosnippet>165</a>        </span>{
<a href=#166 id=166 data-nosnippet>166</a>            <span class="prelude-val">Ok</span>(content) =&gt; <span class="prelude-val">Ok</span>(<span class="prelude-val">Some</span>(CrawlContent {
<a href=#167 id=167 data-nosnippet>167</a>                url: target_url.to_string(),
<a href=#168 id=168 data-nosnippet>168</a>                content,
<a href=#169 id=169 data-nosnippet>169</a>                crawl_uuid: uuid,
<a href=#170 id=170 data-nosnippet>170</a>            })),
<a href=#171 id=171 data-nosnippet>171</a>            <span class="prelude-val">Err</span>(ScrapflyError::ApiClient(e)) <span class="kw">if </span>e.http_status == <span class="number">404 </span>=&gt; <span class="prelude-val">Ok</span>(<span class="prelude-val">None</span>),
<a href=#172 id=172 data-nosnippet>172</a>            <span class="prelude-val">Err</span>(ScrapflyError::CrawlerFailed(e)) <span class="kw">if </span>e.http_status == <span class="number">404 </span>=&gt; <span class="prelude-val">Ok</span>(<span class="prelude-val">None</span>),
<a href=#173 id=173 data-nosnippet>173</a>            <span class="prelude-val">Err</span>(e) =&gt; <span class="prelude-val">Err</span>(e),
<a href=#174 id=174 data-nosnippet>174</a>        }
<a href=#175 id=175 data-nosnippet>175</a>    }
<a href=#176 id=176 data-nosnippet>176</a>
<a href=#177 id=177 data-nosnippet>177</a>    <span class="doccomment">/// Read the raw content string (empty string when URL not in crawl).
<a href=#178 id=178 data-nosnippet>178</a>    </span><span class="kw">pub async fn </span>read_string(
<a href=#179 id=179 data-nosnippet>179</a>        <span class="kw-2">&amp;</span><span class="self">self</span>,
<a href=#180 id=180 data-nosnippet>180</a>        target_url: <span class="kw-2">&amp;</span>str,
<a href=#181 id=181 data-nosnippet>181</a>        format: CrawlerContentFormat,
<a href=#182 id=182 data-nosnippet>182</a>    ) -&gt; <span class="prelude-ty">Result</span>&lt;String, ScrapflyError&gt; {
<a href=#183 id=183 data-nosnippet>183</a>        <span class="prelude-val">Ok</span>(<span class="self">self
<a href=#184 id=184 data-nosnippet>184</a>            </span>.read(target_url, format)
<a href=#185 id=185 data-nosnippet>185</a>            .<span class="kw">await</span><span class="question-mark">?
<a href=#186 id=186 data-nosnippet>186</a>            </span>.map(|c| c.content)
<a href=#187 id=187 data-nosnippet>187</a>            .unwrap_or_default())
<a href=#188 id=188 data-nosnippet>188</a>    }
<a href=#189 id=189 data-nosnippet>189</a>
<a href=#190 id=190 data-nosnippet>190</a>    <span class="doccomment">/// Batch read up to 100 URLs.
<a href=#191 id=191 data-nosnippet>191</a>    </span><span class="kw">pub async fn </span>read_batch(
<a href=#192 id=192 data-nosnippet>192</a>        <span class="kw-2">&amp;</span><span class="self">self</span>,
<a href=#193 id=193 data-nosnippet>193</a>        urls: <span class="kw-2">&amp;</span>[String],
<a href=#194 id=194 data-nosnippet>194</a>        formats: <span class="kw-2">&amp;</span>[CrawlerContentFormat],
<a href=#195 id=195 data-nosnippet>195</a>    ) -&gt; <span class="prelude-ty">Result</span>&lt;
<a href=#196 id=196 data-nosnippet>196</a>        std::collections::BTreeMap&lt;String, std::collections::BTreeMap&lt;String, String&gt;&gt;,
<a href=#197 id=197 data-nosnippet>197</a>        ScrapflyError,
<a href=#198 id=198 data-nosnippet>198</a>    &gt; {
<a href=#199 id=199 data-nosnippet>199</a>        <span class="kw">let </span>uuid = <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>;
<a href=#200 id=200 data-nosnippet>200</a>        <span class="self">self</span>.client.crawl_contents_batch(uuid, urls, formats).<span class="kw">await
<a href=#201 id=201 data-nosnippet>201</a>    </span>}
<a href=#202 id=202 data-nosnippet>202</a>
<a href=#203 id=203 data-nosnippet>203</a>    <span class="doccomment">/// Bulk JSON contents.
<a href=#204 id=204 data-nosnippet>204</a>    </span><span class="kw">pub async fn </span>contents(
<a href=#205 id=205 data-nosnippet>205</a>        <span class="kw-2">&amp;</span><span class="self">self</span>,
<a href=#206 id=206 data-nosnippet>206</a>        format: CrawlerContentFormat,
<a href=#207 id=207 data-nosnippet>207</a>        limit: <span class="prelude-ty">Option</span>&lt;u32&gt;,
<a href=#208 id=208 data-nosnippet>208</a>        offset: <span class="prelude-ty">Option</span>&lt;u32&gt;,
<a href=#209 id=209 data-nosnippet>209</a>    ) -&gt; <span class="prelude-ty">Result</span>&lt;CrawlerContents, ScrapflyError&gt; {
<a href=#210 id=210 data-nosnippet>210</a>        <span class="kw">let </span>uuid = <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>;
<a href=#211 id=211 data-nosnippet>211</a>        <span class="self">self</span>.client
<a href=#212 id=212 data-nosnippet>212</a>            .crawl_contents_json(uuid, format, limit, offset)
<a href=#213 id=213 data-nosnippet>213</a>            .<span class="kw">await
<a href=#214 id=214 data-nosnippet>214</a>    </span>}
<a href=#215 id=215 data-nosnippet>215</a>
<a href=#216 id=216 data-nosnippet>216</a>    <span class="doccomment">/// Download + cache the WARC artifact.
<a href=#217 id=217 data-nosnippet>217</a>    </span><span class="kw">pub async fn </span>warc(<span class="kw-2">&amp;mut </span><span class="self">self</span>) -&gt; <span class="prelude-ty">Result</span>&lt;<span class="kw-2">&amp;</span>CrawlerArtifact, ScrapflyError&gt; {
<a href=#218 id=218 data-nosnippet>218</a>        <span class="kw">let </span>uuid = <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>.to_string();
<a href=#219 id=219 data-nosnippet>219</a>        <span class="kw">if </span><span class="self">self</span>.cached_warc.is_none() {
<a href=#220 id=220 data-nosnippet>220</a>            <span class="kw">let </span>a = <span class="self">self
<a href=#221 id=221 data-nosnippet>221</a>                </span>.client
<a href=#222 id=222 data-nosnippet>222</a>                .crawl_artifact(<span class="kw-2">&amp;</span>uuid, CrawlerArtifactType::Warc)
<a href=#223 id=223 data-nosnippet>223</a>                .<span class="kw">await</span><span class="question-mark">?</span>;
<a href=#224 id=224 data-nosnippet>224</a>            <span class="self">self</span>.cached_warc = <span class="prelude-val">Some</span>(a);
<a href=#225 id=225 data-nosnippet>225</a>        }
<a href=#226 id=226 data-nosnippet>226</a>        <span class="kw">match </span><span class="kw-2">&amp;</span><span class="self">self</span>.cached_warc {
<a href=#227 id=227 data-nosnippet>227</a>            <span class="prelude-val">Some</span>(a) =&gt; <span class="prelude-val">Ok</span>(a),
<a href=#228 id=228 data-nosnippet>228</a>            <span class="prelude-val">None </span>=&gt; <span class="prelude-val">Err</span>(ScrapflyError::Config(
<a href=#229 id=229 data-nosnippet>229</a>                <span class="string">"warc cache unexpectedly empty"</span>.into(),
<a href=#230 id=230 data-nosnippet>230</a>            )),
<a href=#231 id=231 data-nosnippet>231</a>        }
<a href=#232 id=232 data-nosnippet>232</a>    }
<a href=#233 id=233 data-nosnippet>233</a>
<a href=#234 id=234 data-nosnippet>234</a>    <span class="doccomment">/// Download + cache the HAR artifact.
<a href=#235 id=235 data-nosnippet>235</a>    </span><span class="kw">pub async fn </span>har(<span class="kw-2">&amp;mut </span><span class="self">self</span>) -&gt; <span class="prelude-ty">Result</span>&lt;<span class="kw-2">&amp;</span>CrawlerArtifact, ScrapflyError&gt; {
<a href=#236 id=236 data-nosnippet>236</a>        <span class="kw">let </span>uuid = <span class="self">self</span>.uuid_required()<span class="question-mark">?</span>.to_string();
<a href=#237 id=237 data-nosnippet>237</a>        <span class="kw">if </span><span class="self">self</span>.cached_har.is_none() {
<a href=#238 id=238 data-nosnippet>238</a>            <span class="kw">let </span>a = <span class="self">self
<a href=#239 id=239 data-nosnippet>239</a>                </span>.client
<a href=#240 id=240 data-nosnippet>240</a>                .crawl_artifact(<span class="kw-2">&amp;</span>uuid, CrawlerArtifactType::Har)
<a href=#241 id=241 data-nosnippet>241</a>                .<span class="kw">await</span><span class="question-mark">?</span>;
<a href=#242 id=242 data-nosnippet>242</a>            <span class="self">self</span>.cached_har = <span class="prelude-val">Some</span>(a);
<a href=#243 id=243 data-nosnippet>243</a>        }
<a href=#244 id=244 data-nosnippet>244</a>        <span class="kw">match </span><span class="kw-2">&amp;</span><span class="self">self</span>.cached_har {
<a href=#245 id=245 data-nosnippet>245</a>            <span class="prelude-val">Some</span>(a) =&gt; <span class="prelude-val">Ok</span>(a),
<a href=#246 id=246 data-nosnippet>246</a>            <span class="prelude-val">None </span>=&gt; <span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"har cache unexpectedly empty"</span>.into())),
<a href=#247 id=247 data-nosnippet>247</a>        }
<a href=#248 id=248 data-nosnippet>248</a>    }
<a href=#249 id=249 data-nosnippet>249</a>}
</code></pre></div></section></main></body></html>