scrapfly-sdk 0.2.3

Async Rust client for the Scrapfly web scraping, screenshot, extraction and crawler APIs
Documentation
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `src/config/extraction.rs`."><title>extraction.rs - source</title><script>if(window.location.protocol!=="file:")document.head.insertAdjacentHTML("beforeend","SourceSerif4-Regular-6b053e98.ttf.woff2,FiraSans-Regular-0fe48ade.woff2,FiraSans-Medium-e1aa3f0a.woff2,SourceCodePro-Regular-8badfe75.ttf.woff2,SourceCodePro-Semibold-aa29a496.ttf.woff2".split(",").map(f=>`<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../static.files/${f}">`).join(""))</script><link rel="stylesheet" href="../../../static.files/normalize-9960930a.css"><link rel="stylesheet" href="../../../static.files/rustdoc-46132b98.css"><meta name="rustdoc-vars" data-root-path="../../../" data-static-root-path="../../../static.files/" data-current-crate="scrapfly_sdk" data-themes="" data-resource-suffix="" data-rustdoc-version="1.85.0 (4d91de4e4 2025-02-17)" data-channel="1.85.0" data-search-js="search-75f5ac3e.js" data-settings-js="settings-0f613d39.js" ><script src="../../../static.files/storage-59e33391.js"></script><script defer src="../../../static.files/src-script-56102188.js"></script><script defer src="../../../src-files.js"></script><script defer src="../../../static.files/main-5f194d8c.js"></script><noscript><link rel="stylesheet" href="../../../static.files/noscript-893ab5e7.css"></noscript><link rel="alternate icon" type="image/png" href="../../../static.files/favicon-32x32-6580c154.png"><link rel="icon" type="image/svg+xml" href="../../../static.files/favicon-044be391.svg"></head><body class="rustdoc src"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><div class="src-sidebar-title"><h2>Files</h2></div></nav><div class="sidebar-resizer"></div><main><rustdoc-search></rustdoc-search><section id="main-content" class="content"><div class="main-heading"><h1><div class="sub-heading">scrapfly_sdk/config/</div>extraction.rs</h1><rustdoc-toolbar></rustdoc-toolbar></div><div class="example-wrap"><div data-nosnippet><pre class="src-line-numbers">
<a href="#1" id="1">1</a>
<a href="#2" id="2">2</a>
<a href="#3" id="3">3</a>
<a href="#4" id="4">4</a>
<a href="#5" id="5">5</a>
<a href="#6" id="6">6</a>
<a href="#7" id="7">7</a>
<a href="#8" id="8">8</a>
<a href="#9" id="9">9</a>
<a href="#10" id="10">10</a>
<a href="#11" id="11">11</a>
<a href="#12" id="12">12</a>
<a href="#13" id="13">13</a>
<a href="#14" id="14">14</a>
<a href="#15" id="15">15</a>
<a href="#16" id="16">16</a>
<a href="#17" id="17">17</a>
<a href="#18" id="18">18</a>
<a href="#19" id="19">19</a>
<a href="#20" id="20">20</a>
<a href="#21" id="21">21</a>
<a href="#22" id="22">22</a>
<a href="#23" id="23">23</a>
<a href="#24" id="24">24</a>
<a href="#25" id="25">25</a>
<a href="#26" id="26">26</a>
<a href="#27" id="27">27</a>
<a href="#28" id="28">28</a>
<a href="#29" id="29">29</a>
<a href="#30" id="30">30</a>
<a href="#31" id="31">31</a>
<a href="#32" id="32">32</a>
<a href="#33" id="33">33</a>
<a href="#34" id="34">34</a>
<a href="#35" id="35">35</a>
<a href="#36" id="36">36</a>
<a href="#37" id="37">37</a>
<a href="#38" id="38">38</a>
<a href="#39" id="39">39</a>
<a href="#40" id="40">40</a>
<a href="#41" id="41">41</a>
<a href="#42" id="42">42</a>
<a href="#43" id="43">43</a>
<a href="#44" id="44">44</a>
<a href="#45" id="45">45</a>
<a href="#46" id="46">46</a>
<a href="#47" id="47">47</a>
<a href="#48" id="48">48</a>
<a href="#49" id="49">49</a>
<a href="#50" id="50">50</a>
<a href="#51" id="51">51</a>
<a href="#52" id="52">52</a>
<a href="#53" id="53">53</a>
<a href="#54" id="54">54</a>
<a href="#55" id="55">55</a>
<a href="#56" id="56">56</a>
<a href="#57" id="57">57</a>
<a href="#58" id="58">58</a>
<a href="#59" id="59">59</a>
<a href="#60" id="60">60</a>
<a href="#61" id="61">61</a>
<a href="#62" id="62">62</a>
<a href="#63" id="63">63</a>
<a href="#64" id="64">64</a>
<a href="#65" id="65">65</a>
<a href="#66" id="66">66</a>
<a href="#67" id="67">67</a>
<a href="#68" id="68">68</a>
<a href="#69" id="69">69</a>
<a href="#70" id="70">70</a>
<a href="#71" id="71">71</a>
<a href="#72" id="72">72</a>
<a href="#73" id="73">73</a>
<a href="#74" id="74">74</a>
<a href="#75" id="75">75</a>
<a href="#76" id="76">76</a>
<a href="#77" id="77">77</a>
<a href="#78" id="78">78</a>
<a href="#79" id="79">79</a>
<a href="#80" id="80">80</a>
<a href="#81" id="81">81</a>
<a href="#82" id="82">82</a>
<a href="#83" id="83">83</a>
<a href="#84" id="84">84</a>
<a href="#85" id="85">85</a>
<a href="#86" id="86">86</a>
<a href="#87" id="87">87</a>
<a href="#88" id="88">88</a>
<a href="#89" id="89">89</a>
<a href="#90" id="90">90</a>
<a href="#91" id="91">91</a>
<a href="#92" id="92">92</a>
<a href="#93" id="93">93</a>
<a href="#94" id="94">94</a>
<a href="#95" id="95">95</a>
<a href="#96" id="96">96</a>
<a href="#97" id="97">97</a>
<a href="#98" id="98">98</a>
<a href="#99" id="99">99</a>
<a href="#100" id="100">100</a>
<a href="#101" id="101">101</a>
<a href="#102" id="102">102</a>
<a href="#103" id="103">103</a>
<a href="#104" id="104">104</a>
<a href="#105" id="105">105</a>
<a href="#106" id="106">106</a>
<a href="#107" id="107">107</a>
<a href="#108" id="108">108</a>
<a href="#109" id="109">109</a>
<a href="#110" id="110">110</a>
<a href="#111" id="111">111</a>
<a href="#112" id="112">112</a>
<a href="#113" id="113">113</a>
<a href="#114" id="114">114</a>
<a href="#115" id="115">115</a>
<a href="#116" id="116">116</a>
<a href="#117" id="117">117</a>
<a href="#118" id="118">118</a>
<a href="#119" id="119">119</a>
<a href="#120" id="120">120</a>
<a href="#121" id="121">121</a>
<a href="#122" id="122">122</a>
<a href="#123" id="123">123</a>
<a href="#124" id="124">124</a>
<a href="#125" id="125">125</a>
<a href="#126" id="126">126</a>
<a href="#127" id="127">127</a>
<a href="#128" id="128">128</a>
<a href="#129" id="129">129</a>
<a href="#130" id="130">130</a>
<a href="#131" id="131">131</a>
<a href="#132" id="132">132</a>
<a href="#133" id="133">133</a>
<a href="#134" id="134">134</a>
<a href="#135" id="135">135</a>
<a href="#136" id="136">136</a>
<a href="#137" id="137">137</a>
<a href="#138" id="138">138</a>
<a href="#139" id="139">139</a>
<a href="#140" id="140">140</a>
<a href="#141" id="141">141</a>
<a href="#142" id="142">142</a>
<a href="#143" id="143">143</a>
<a href="#144" id="144">144</a>
<a href="#145" id="145">145</a>
<a href="#146" id="146">146</a>
<a href="#147" id="147">147</a>
<a href="#148" id="148">148</a>
<a href="#149" id="149">149</a>
<a href="#150" id="150">150</a>
<a href="#151" id="151">151</a>
<a href="#152" id="152">152</a>
<a href="#153" id="153">153</a>
<a href="#154" id="154">154</a>
<a href="#155" id="155">155</a>
<a href="#156" id="156">156</a>
<a href="#157" id="157">157</a>
<a href="#158" id="158">158</a>
<a href="#159" id="159">159</a>
<a href="#160" id="160">160</a>
<a href="#161" id="161">161</a>
<a href="#162" id="162">162</a>
<a href="#163" id="163">163</a>
<a href="#164" id="164">164</a>
<a href="#165" id="165">165</a>
<a href="#166" id="166">166</a>
<a href="#167" id="167">167</a>
<a href="#168" id="168">168</a>
<a href="#169" id="169">169</a>
<a href="#170" id="170">170</a>
<a href="#171" id="171">171</a></pre></div><pre class="rust"><code><span class="doccomment">//! Extraction endpoint configuration — ported from `sdk/go/config_extraction.go`.

</span><span class="kw">use </span><span class="kw">crate</span>::enums::{CompressionFormat, ExtractionModel};
<span class="kw">use </span><span class="kw">crate</span>::error::ScrapflyError;

<span class="kw">use </span><span class="kw">super</span>::url_safe_b64_encode;

<span class="doccomment">/// Configuration for a `POST /extraction` request.
</span><span class="attr">#[derive(Debug, Clone, Default)]
</span><span class="kw">pub struct </span>ExtractionConfig {
    <span class="doccomment">/// Document bytes (required).
    </span><span class="kw">pub </span>body: Vec&lt;u8&gt;,
    <span class="doccomment">/// Content type, e.g. `text/html` (required).
    </span><span class="kw">pub </span>content_type: String,
    <span class="doccomment">/// Original URL (helps the AI with context).
    </span><span class="kw">pub </span>url: <span class="prelude-ty">Option</span>&lt;String&gt;,
    <span class="doccomment">/// Character set.
    </span><span class="kw">pub </span>charset: <span class="prelude-ty">Option</span>&lt;String&gt;,
    <span class="doccomment">/// Saved extraction template name.
    </span><span class="kw">pub </span>extraction_template: <span class="prelude-ty">Option</span>&lt;String&gt;,
    <span class="doccomment">/// Inline (ephemeral) template.
    </span><span class="kw">pub </span>extraction_ephemeral_template: <span class="prelude-ty">Option</span>&lt;serde_json::Value&gt;,
    <span class="doccomment">/// AI extraction prompt.
    </span><span class="kw">pub </span>extraction_prompt: <span class="prelude-ty">Option</span>&lt;String&gt;,
    <span class="doccomment">/// Extraction model.
    </span><span class="kw">pub </span>extraction_model: <span class="prelude-ty">Option</span>&lt;ExtractionModel&gt;,
    <span class="doccomment">/// Body is compressed.
    </span><span class="kw">pub </span>is_document_compressed: bool,
    <span class="doccomment">/// Compression format.
    </span><span class="kw">pub </span>document_compression_format: <span class="prelude-ty">Option</span>&lt;CompressionFormat&gt;,
    <span class="doccomment">/// Webhook name.
    </span><span class="kw">pub </span>webhook: <span class="prelude-ty">Option</span>&lt;String&gt;,
    <span class="doccomment">/// Maximum time in seconds for extraction processing.
    </span><span class="kw">pub </span>timeout: <span class="prelude-ty">Option</span>&lt;u32&gt;,
}

<span class="kw">impl </span>ExtractionConfig {
    <span class="doccomment">/// Start a builder.
    </span><span class="kw">pub fn </span>builder(body: Vec&lt;u8&gt;, content_type: <span class="kw">impl </span>Into&lt;String&gt;) -&gt; ExtractionConfigBuilder {
        ExtractionConfigBuilder {
            cfg: ExtractionConfig {
                body,
                content_type: content_type.into(),
                ..Default::default()
            },
        }
    }

    <span class="doccomment">/// Query params (key is added separately by the client).
    </span><span class="kw">pub fn </span>to_query_pairs(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="prelude-ty">Result</span>&lt;Vec&lt;(String, String)&gt;, ScrapflyError&gt; {
        <span class="kw">if </span><span class="self">self</span>.body.is_empty() {
            <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"body is required"</span>.into()));
        }
        <span class="kw">if </span><span class="self">self</span>.content_type.is_empty() {
            <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"content_type is required"</span>.into()));
        }
        <span class="kw">let </span>tpl_count = [
            <span class="self">self</span>.extraction_template.is_some(),
            <span class="self">self</span>.extraction_ephemeral_template.is_some(),
        ]
        .iter()
        .filter(|x| <span class="kw-2">**</span>x)
        .count();
        <span class="kw">if </span>tpl_count &gt; <span class="number">1 </span>{
            <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(
                <span class="string">"cannot use both extraction_template and extraction_ephemeral_template"</span>.into(),
            ));
        }

        <span class="kw">let </span><span class="kw-2">mut </span>out = Vec::new();
        out.push((<span class="string">"content_type"</span>.into(), <span class="self">self</span>.content_type.clone()));
        <span class="kw">if let </span><span class="prelude-val">Some</span>(u) = <span class="kw-2">&amp;</span><span class="self">self</span>.url {
            out.push((<span class="string">"url"</span>.into(), u.clone()));
        }
        <span class="kw">if let </span><span class="prelude-val">Some</span>(c) = <span class="kw-2">&amp;</span><span class="self">self</span>.charset {
            out.push((<span class="string">"charset"</span>.into(), c.clone()));
        }
        <span class="kw">if let </span><span class="prelude-val">Some</span>(t) = <span class="kw-2">&amp;</span><span class="self">self</span>.extraction_template {
            out.push((<span class="string">"extraction_template"</span>.into(), t.clone()));
        }
        <span class="kw">if let </span><span class="prelude-val">Some</span>(t) = <span class="kw-2">&amp;</span><span class="self">self</span>.extraction_ephemeral_template {
            <span class="kw">let </span>s = serde_json::to_string(t)<span class="question-mark">?</span>;
            out.push((
                <span class="string">"extraction_template"</span>.into(),
                <span class="macro">format!</span>(<span class="string">"ephemeral:{}"</span>, url_safe_b64_encode(<span class="kw-2">&amp;</span>s)),
            ));
        }
        <span class="kw">if let </span><span class="prelude-val">Some</span>(p) = <span class="kw-2">&amp;</span><span class="self">self</span>.extraction_prompt {
            out.push((<span class="string">"extraction_prompt"</span>.into(), p.clone()));
        }
        <span class="kw">if let </span><span class="prelude-val">Some</span>(m) = <span class="self">self</span>.extraction_model {
            out.push((<span class="string">"extraction_model"</span>.into(), m.as_str().into()));
        }
        <span class="kw">if let </span><span class="prelude-val">Some</span>(wh) = <span class="kw-2">&amp;</span><span class="self">self</span>.webhook {
            out.push((<span class="string">"webhook_name"</span>.into(), wh.clone()));
        }
        <span class="kw">if let </span><span class="prelude-val">Some</span>(t) = <span class="self">self</span>.timeout {
            out.push((<span class="string">"timeout"</span>.into(), t.to_string()));
        }
        <span class="prelude-val">Ok</span>(out)
    }
}

<span class="doccomment">/// Builder for [`ExtractionConfig`].
</span><span class="attr">#[derive(Debug, Clone)]
</span><span class="kw">pub struct </span>ExtractionConfigBuilder {
    cfg: ExtractionConfig,
}

<span class="kw">impl </span>ExtractionConfigBuilder {
    <span class="doccomment">/// Original URL.
    </span><span class="kw">pub fn </span>url(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into&lt;String&gt;) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.url = <span class="prelude-val">Some</span>(v.into());
        <span class="self">self
    </span>}
    <span class="doccomment">/// Character set.
    </span><span class="kw">pub fn </span>charset(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into&lt;String&gt;) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.charset = <span class="prelude-val">Some</span>(v.into());
        <span class="self">self
    </span>}
    <span class="doccomment">/// Saved template name.
    </span><span class="kw">pub fn </span>extraction_template(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into&lt;String&gt;) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.extraction_template = <span class="prelude-val">Some</span>(v.into());
        <span class="self">self
    </span>}
    <span class="doccomment">/// Inline template.
    </span><span class="kw">pub fn </span>extraction_ephemeral_template(<span class="kw-2">mut </span><span class="self">self</span>, v: serde_json::Value) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.extraction_ephemeral_template = <span class="prelude-val">Some</span>(v);
        <span class="self">self
    </span>}
    <span class="doccomment">/// AI prompt.
    </span><span class="kw">pub fn </span>extraction_prompt(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into&lt;String&gt;) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.extraction_prompt = <span class="prelude-val">Some</span>(v.into());
        <span class="self">self
    </span>}
    <span class="doccomment">/// Model.
    </span><span class="kw">pub fn </span>extraction_model(<span class="kw-2">mut </span><span class="self">self</span>, v: ExtractionModel) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.extraction_model = <span class="prelude-val">Some</span>(v);
        <span class="self">self
    </span>}
    <span class="doccomment">/// Body is compressed.
    </span><span class="kw">pub fn </span>is_document_compressed(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.is_document_compressed = v;
        <span class="self">self
    </span>}
    <span class="doccomment">/// Compression format.
    </span><span class="kw">pub fn </span>document_compression_format(<span class="kw-2">mut </span><span class="self">self</span>, v: CompressionFormat) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.document_compression_format = <span class="prelude-val">Some</span>(v);
        <span class="self">self
    </span>}
    <span class="doccomment">/// Webhook name.
    </span><span class="kw">pub fn </span>timeout(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.timeout = <span class="prelude-val">Some</span>(v);
        <span class="self">self
    </span>}
    <span class="doccomment">/// Set webhook name for post-extraction notification.
    </span><span class="kw">pub fn </span>webhook(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into&lt;String&gt;) -&gt; <span class="self">Self </span>{
        <span class="self">self</span>.cfg.webhook = <span class="prelude-val">Some</span>(v.into());
        <span class="self">self
    </span>}
    <span class="doccomment">/// Finalize the builder.
    </span><span class="kw">pub fn </span>build(<span class="self">self</span>) -&gt; <span class="prelude-ty">Result</span>&lt;ExtractionConfig, ScrapflyError&gt; {
        <span class="kw">if </span><span class="self">self</span>.cfg.body.is_empty() {
            <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"body is required"</span>.into()));
        }
        <span class="kw">if </span><span class="self">self</span>.cfg.content_type.is_empty() {
            <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"content_type is required"</span>.into()));
        }
        <span class="prelude-val">Ok</span>(<span class="self">self</span>.cfg)
    }
}
</code></pre></div></section></main></body></html>