<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `src/config/extraction.rs`."><title>extraction.rs - source</title><script>if(window.location.protocol!=="file:")document.head.insertAdjacentHTML("beforeend","SourceSerif4-Regular-6b053e98.ttf.woff2,FiraSans-Italic-81dc35de.woff2,FiraSans-Regular-0fe48ade.woff2,FiraSans-MediumItalic-ccf7e434.woff2,FiraSans-Medium-e1aa3f0a.woff2,SourceCodePro-Regular-8badfe75.ttf.woff2,SourceCodePro-Semibold-aa29a496.ttf.woff2".split(",").map(f=>`<link rel="preload" as="font" type="font/woff2"href="../../../static.files/${f}">`).join(""))</script><link rel="stylesheet" href="../../../static.files/normalize-9960930a.css"><link rel="stylesheet" href="../../../static.files/rustdoc-b7b9f40b.css"><meta name="rustdoc-vars" data-root-path="../../../" data-static-root-path="../../../static.files/" data-current-crate="scrapfly_sdk" data-themes="" data-resource-suffix="" data-rustdoc-version="1.95.0 (59807616e 2026-04-14)" data-channel="1.95.0" data-search-js="search-63369b7b.js" data-stringdex-js="stringdex-b897f86f.js" data-settings-js="settings-170eb4bf.js" ><script src="../../../static.files/storage-41dd4d93.js"></script><script defer src="../../../static.files/src-script-813739b1.js"></script><script defer src="../../../src-files.js"></script><script defer src="../../../static.files/main-5013f961.js"></script><noscript><link rel="stylesheet" href="../../../static.files/noscript-f7c3ffd8.css"></noscript><link rel="alternate icon" type="image/png" href="../../../static.files/favicon-32x32-eab170b8.png"><link rel="icon" type="image/svg+xml" href="../../../static.files/favicon-044be391.svg"></head><body class="rustdoc src"><a class="skip-main-content" href="#main-content">Skip to main content</a><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><div class="src-sidebar-title"><h2>Files</h2></div></nav><div class="sidebar-resizer" title="Drag to resize sidebar"></div><main><section id="main-content" class="content" tabindex="-1"><div class="main-heading"><h1><div class="sub-heading">scrapfly_sdk/config/</div>extraction.rs</h1><rustdoc-toolbar></rustdoc-toolbar></div><div class="example-wrap digits-3"><pre class="rust"><code><a href=#1 id=1 data-nosnippet>1</a><span class="doccomment">//! Extraction endpoint configuration — ported from `sdk/go/config_extraction.go`.
<a href=#2 id=2 data-nosnippet>2</a>
<a href=#3 id=3 data-nosnippet>3</a></span><span class="kw">use </span><span class="kw">crate</span>::enums::{CompressionFormat, ExtractionModel};
<a href=#4 id=4 data-nosnippet>4</a><span class="kw">use </span><span class="kw">crate</span>::error::ScrapflyError;
<a href=#5 id=5 data-nosnippet>5</a>
<a href=#6 id=6 data-nosnippet>6</a><span class="kw">use </span><span class="kw">super</span>::url_safe_b64_encode;
<a href=#7 id=7 data-nosnippet>7</a>
<a href=#8 id=8 data-nosnippet>8</a><span class="doccomment">/// Configuration for a `POST /extraction` request.
<a href=#9 id=9 data-nosnippet>9</a></span><span class="attr">#[derive(Debug, Clone, Default)]
<a href=#10 id=10 data-nosnippet>10</a></span><span class="kw">pub struct </span>ExtractionConfig {
<a href=#11 id=11 data-nosnippet>11</a> <span class="doccomment">/// Document bytes (required).
<a href=#12 id=12 data-nosnippet>12</a> </span><span class="kw">pub </span>body: Vec<u8>,
<a href=#13 id=13 data-nosnippet>13</a> <span class="doccomment">/// Content type, e.g. `text/html` (required).
<a href=#14 id=14 data-nosnippet>14</a> </span><span class="kw">pub </span>content_type: String,
<a href=#15 id=15 data-nosnippet>15</a> <span class="doccomment">/// Original URL (helps the AI with context).
<a href=#16 id=16 data-nosnippet>16</a> </span><span class="kw">pub </span>url: <span class="prelude-ty">Option</span><String>,
<a href=#17 id=17 data-nosnippet>17</a> <span class="doccomment">/// Character set.
<a href=#18 id=18 data-nosnippet>18</a> </span><span class="kw">pub </span>charset: <span class="prelude-ty">Option</span><String>,
<a href=#19 id=19 data-nosnippet>19</a> <span class="doccomment">/// Saved extraction template name.
<a href=#20 id=20 data-nosnippet>20</a> </span><span class="kw">pub </span>extraction_template: <span class="prelude-ty">Option</span><String>,
<a href=#21 id=21 data-nosnippet>21</a> <span class="doccomment">/// Inline (ephemeral) template.
<a href=#22 id=22 data-nosnippet>22</a> </span><span class="kw">pub </span>extraction_ephemeral_template: <span class="prelude-ty">Option</span><serde_json::Value>,
<a href=#23 id=23 data-nosnippet>23</a> <span class="doccomment">/// AI extraction prompt.
<a href=#24 id=24 data-nosnippet>24</a> </span><span class="kw">pub </span>extraction_prompt: <span class="prelude-ty">Option</span><String>,
<a href=#25 id=25 data-nosnippet>25</a> <span class="doccomment">/// Extraction model.
<a href=#26 id=26 data-nosnippet>26</a> </span><span class="kw">pub </span>extraction_model: <span class="prelude-ty">Option</span><ExtractionModel>,
<a href=#27 id=27 data-nosnippet>27</a> <span class="doccomment">/// Body is compressed.
<a href=#28 id=28 data-nosnippet>28</a> </span><span class="kw">pub </span>is_document_compressed: bool,
<a href=#29 id=29 data-nosnippet>29</a> <span class="doccomment">/// Compression format.
<a href=#30 id=30 data-nosnippet>30</a> </span><span class="kw">pub </span>document_compression_format: <span class="prelude-ty">Option</span><CompressionFormat>,
<a href=#31 id=31 data-nosnippet>31</a> <span class="doccomment">/// Webhook name.
<a href=#32 id=32 data-nosnippet>32</a> </span><span class="kw">pub </span>webhook: <span class="prelude-ty">Option</span><String>,
<a href=#33 id=33 data-nosnippet>33</a> <span class="doccomment">/// Maximum time in seconds for extraction processing.
<a href=#34 id=34 data-nosnippet>34</a> </span><span class="kw">pub </span>timeout: <span class="prelude-ty">Option</span><u32>,
<a href=#35 id=35 data-nosnippet>35</a>}
<a href=#36 id=36 data-nosnippet>36</a>
<a href=#37 id=37 data-nosnippet>37</a><span class="kw">impl </span>ExtractionConfig {
<a href=#38 id=38 data-nosnippet>38</a> <span class="doccomment">/// Start a builder.
<a href=#39 id=39 data-nosnippet>39</a> </span><span class="kw">pub fn </span>builder(body: Vec<u8>, content_type: <span class="kw">impl </span>Into<String>) -> ExtractionConfigBuilder {
<a href=#40 id=40 data-nosnippet>40</a> ExtractionConfigBuilder {
<a href=#41 id=41 data-nosnippet>41</a> cfg: ExtractionConfig {
<a href=#42 id=42 data-nosnippet>42</a> body,
<a href=#43 id=43 data-nosnippet>43</a> content_type: content_type.into(),
<a href=#44 id=44 data-nosnippet>44</a> ..Default::default()
<a href=#45 id=45 data-nosnippet>45</a> },
<a href=#46 id=46 data-nosnippet>46</a> }
<a href=#47 id=47 data-nosnippet>47</a> }
<a href=#48 id=48 data-nosnippet>48</a>
<a href=#49 id=49 data-nosnippet>49</a> <span class="doccomment">/// Query params (key is added separately by the client).
<a href=#50 id=50 data-nosnippet>50</a> </span><span class="kw">pub fn </span>to_query_pairs(<span class="kw-2">&</span><span class="self">self</span>) -> <span class="prelude-ty">Result</span><Vec<(String, String)>, ScrapflyError> {
<a href=#51 id=51 data-nosnippet>51</a> <span class="kw">if </span><span class="self">self</span>.body.is_empty() {
<a href=#52 id=52 data-nosnippet>52</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"body is required"</span>.into()));
<a href=#53 id=53 data-nosnippet>53</a> }
<a href=#54 id=54 data-nosnippet>54</a> <span class="kw">if </span><span class="self">self</span>.content_type.is_empty() {
<a href=#55 id=55 data-nosnippet>55</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"content_type is required"</span>.into()));
<a href=#56 id=56 data-nosnippet>56</a> }
<a href=#57 id=57 data-nosnippet>57</a> <span class="kw">let </span>tpl_count = [
<a href=#58 id=58 data-nosnippet>58</a> <span class="self">self</span>.extraction_template.is_some(),
<a href=#59 id=59 data-nosnippet>59</a> <span class="self">self</span>.extraction_ephemeral_template.is_some(),
<a href=#60 id=60 data-nosnippet>60</a> ]
<a href=#61 id=61 data-nosnippet>61</a> .iter()
<a href=#62 id=62 data-nosnippet>62</a> .filter(|x| <span class="kw-2">**</span>x)
<a href=#63 id=63 data-nosnippet>63</a> .count();
<a href=#64 id=64 data-nosnippet>64</a> <span class="kw">if </span>tpl_count > <span class="number">1 </span>{
<a href=#65 id=65 data-nosnippet>65</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(
<a href=#66 id=66 data-nosnippet>66</a> <span class="string">"cannot use both extraction_template and extraction_ephemeral_template"</span>.into(),
<a href=#67 id=67 data-nosnippet>67</a> ));
<a href=#68 id=68 data-nosnippet>68</a> }
<a href=#69 id=69 data-nosnippet>69</a>
<a href=#70 id=70 data-nosnippet>70</a> <span class="kw">let </span><span class="kw-2">mut </span>out = Vec::new();
<a href=#71 id=71 data-nosnippet>71</a> out.push((<span class="string">"content_type"</span>.into(), <span class="self">self</span>.content_type.clone()));
<a href=#72 id=72 data-nosnippet>72</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(u) = <span class="kw-2">&</span><span class="self">self</span>.url {
<a href=#73 id=73 data-nosnippet>73</a> out.push((<span class="string">"url"</span>.into(), u.clone()));
<a href=#74 id=74 data-nosnippet>74</a> }
<a href=#75 id=75 data-nosnippet>75</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(c) = <span class="kw-2">&</span><span class="self">self</span>.charset {
<a href=#76 id=76 data-nosnippet>76</a> out.push((<span class="string">"charset"</span>.into(), c.clone()));
<a href=#77 id=77 data-nosnippet>77</a> }
<a href=#78 id=78 data-nosnippet>78</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(t) = <span class="kw-2">&</span><span class="self">self</span>.extraction_template {
<a href=#79 id=79 data-nosnippet>79</a> out.push((<span class="string">"extraction_template"</span>.into(), t.clone()));
<a href=#80 id=80 data-nosnippet>80</a> }
<a href=#81 id=81 data-nosnippet>81</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(t) = <span class="kw-2">&</span><span class="self">self</span>.extraction_ephemeral_template {
<a href=#82 id=82 data-nosnippet>82</a> <span class="kw">let </span>s = serde_json::to_string(t)<span class="question-mark">?</span>;
<a href=#83 id=83 data-nosnippet>83</a> out.push((
<a href=#84 id=84 data-nosnippet>84</a> <span class="string">"extraction_template"</span>.into(),
<a href=#85 id=85 data-nosnippet>85</a> <span class="macro">format!</span>(<span class="string">"ephemeral:{}"</span>, url_safe_b64_encode(<span class="kw-2">&</span>s)),
<a href=#86 id=86 data-nosnippet>86</a> ));
<a href=#87 id=87 data-nosnippet>87</a> }
<a href=#88 id=88 data-nosnippet>88</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(p) = <span class="kw-2">&</span><span class="self">self</span>.extraction_prompt {
<a href=#89 id=89 data-nosnippet>89</a> out.push((<span class="string">"extraction_prompt"</span>.into(), p.clone()));
<a href=#90 id=90 data-nosnippet>90</a> }
<a href=#91 id=91 data-nosnippet>91</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(m) = <span class="self">self</span>.extraction_model {
<a href=#92 id=92 data-nosnippet>92</a> out.push((<span class="string">"extraction_model"</span>.into(), m.as_str().into()));
<a href=#93 id=93 data-nosnippet>93</a> }
<a href=#94 id=94 data-nosnippet>94</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(wh) = <span class="kw-2">&</span><span class="self">self</span>.webhook {
<a href=#95 id=95 data-nosnippet>95</a> out.push((<span class="string">"webhook_name"</span>.into(), wh.clone()));
<a href=#96 id=96 data-nosnippet>96</a> }
<a href=#97 id=97 data-nosnippet>97</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(t) = <span class="self">self</span>.timeout {
<a href=#98 id=98 data-nosnippet>98</a> out.push((<span class="string">"timeout"</span>.into(), t.to_string()));
<a href=#99 id=99 data-nosnippet>99</a> }
<a href=#100 id=100 data-nosnippet>100</a> <span class="prelude-val">Ok</span>(out)
<a href=#101 id=101 data-nosnippet>101</a> }
<a href=#102 id=102 data-nosnippet>102</a>}
<a href=#103 id=103 data-nosnippet>103</a>
<a href=#104 id=104 data-nosnippet>104</a><span class="doccomment">/// Builder for [`ExtractionConfig`].
<a href=#105 id=105 data-nosnippet>105</a></span><span class="attr">#[derive(Debug, Clone)]
<a href=#106 id=106 data-nosnippet>106</a></span><span class="kw">pub struct </span>ExtractionConfigBuilder {
<a href=#107 id=107 data-nosnippet>107</a> cfg: ExtractionConfig,
<a href=#108 id=108 data-nosnippet>108</a>}
<a href=#109 id=109 data-nosnippet>109</a>
<a href=#110 id=110 data-nosnippet>110</a><span class="kw">impl </span>ExtractionConfigBuilder {
<a href=#111 id=111 data-nosnippet>111</a> <span class="doccomment">/// Original URL.
<a href=#112 id=112 data-nosnippet>112</a> </span><span class="kw">pub fn </span>url(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#113 id=113 data-nosnippet>113</a> <span class="self">self</span>.cfg.url = <span class="prelude-val">Some</span>(v.into());
<a href=#114 id=114 data-nosnippet>114</a> <span class="self">self
<a href=#115 id=115 data-nosnippet>115</a> </span>}
<a href=#116 id=116 data-nosnippet>116</a> <span class="doccomment">/// Character set.
<a href=#117 id=117 data-nosnippet>117</a> </span><span class="kw">pub fn </span>charset(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#118 id=118 data-nosnippet>118</a> <span class="self">self</span>.cfg.charset = <span class="prelude-val">Some</span>(v.into());
<a href=#119 id=119 data-nosnippet>119</a> <span class="self">self
<a href=#120 id=120 data-nosnippet>120</a> </span>}
<a href=#121 id=121 data-nosnippet>121</a> <span class="doccomment">/// Saved template name.
<a href=#122 id=122 data-nosnippet>122</a> </span><span class="kw">pub fn </span>extraction_template(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#123 id=123 data-nosnippet>123</a> <span class="self">self</span>.cfg.extraction_template = <span class="prelude-val">Some</span>(v.into());
<a href=#124 id=124 data-nosnippet>124</a> <span class="self">self
<a href=#125 id=125 data-nosnippet>125</a> </span>}
<a href=#126 id=126 data-nosnippet>126</a> <span class="doccomment">/// Inline template.
<a href=#127 id=127 data-nosnippet>127</a> </span><span class="kw">pub fn </span>extraction_ephemeral_template(<span class="kw-2">mut </span><span class="self">self</span>, v: serde_json::Value) -> <span class="self">Self </span>{
<a href=#128 id=128 data-nosnippet>128</a> <span class="self">self</span>.cfg.extraction_ephemeral_template = <span class="prelude-val">Some</span>(v);
<a href=#129 id=129 data-nosnippet>129</a> <span class="self">self
<a href=#130 id=130 data-nosnippet>130</a> </span>}
<a href=#131 id=131 data-nosnippet>131</a> <span class="doccomment">/// AI prompt.
<a href=#132 id=132 data-nosnippet>132</a> </span><span class="kw">pub fn </span>extraction_prompt(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#133 id=133 data-nosnippet>133</a> <span class="self">self</span>.cfg.extraction_prompt = <span class="prelude-val">Some</span>(v.into());
<a href=#134 id=134 data-nosnippet>134</a> <span class="self">self
<a href=#135 id=135 data-nosnippet>135</a> </span>}
<a href=#136 id=136 data-nosnippet>136</a> <span class="doccomment">/// Model.
<a href=#137 id=137 data-nosnippet>137</a> </span><span class="kw">pub fn </span>extraction_model(<span class="kw-2">mut </span><span class="self">self</span>, v: ExtractionModel) -> <span class="self">Self </span>{
<a href=#138 id=138 data-nosnippet>138</a> <span class="self">self</span>.cfg.extraction_model = <span class="prelude-val">Some</span>(v);
<a href=#139 id=139 data-nosnippet>139</a> <span class="self">self
<a href=#140 id=140 data-nosnippet>140</a> </span>}
<a href=#141 id=141 data-nosnippet>141</a> <span class="doccomment">/// Body is compressed.
<a href=#142 id=142 data-nosnippet>142</a> </span><span class="kw">pub fn </span>is_document_compressed(<span class="kw-2">mut </span><span class="self">self</span>, v: bool) -> <span class="self">Self </span>{
<a href=#143 id=143 data-nosnippet>143</a> <span class="self">self</span>.cfg.is_document_compressed = v;
<a href=#144 id=144 data-nosnippet>144</a> <span class="self">self
<a href=#145 id=145 data-nosnippet>145</a> </span>}
<a href=#146 id=146 data-nosnippet>146</a> <span class="doccomment">/// Compression format.
<a href=#147 id=147 data-nosnippet>147</a> </span><span class="kw">pub fn </span>document_compression_format(<span class="kw-2">mut </span><span class="self">self</span>, v: CompressionFormat) -> <span class="self">Self </span>{
<a href=#148 id=148 data-nosnippet>148</a> <span class="self">self</span>.cfg.document_compression_format = <span class="prelude-val">Some</span>(v);
<a href=#149 id=149 data-nosnippet>149</a> <span class="self">self
<a href=#150 id=150 data-nosnippet>150</a> </span>}
<a href=#151 id=151 data-nosnippet>151</a> <span class="doccomment">/// Webhook name.
<a href=#152 id=152 data-nosnippet>152</a> </span><span class="kw">pub fn </span>timeout(<span class="kw-2">mut </span><span class="self">self</span>, v: u32) -> <span class="self">Self </span>{
<a href=#153 id=153 data-nosnippet>153</a> <span class="self">self</span>.cfg.timeout = <span class="prelude-val">Some</span>(v);
<a href=#154 id=154 data-nosnippet>154</a> <span class="self">self
<a href=#155 id=155 data-nosnippet>155</a> </span>}
<a href=#156 id=156 data-nosnippet>156</a> <span class="doccomment">/// Set webhook name for post-extraction notification.
<a href=#157 id=157 data-nosnippet>157</a> </span><span class="kw">pub fn </span>webhook(<span class="kw-2">mut </span><span class="self">self</span>, v: <span class="kw">impl </span>Into<String>) -> <span class="self">Self </span>{
<a href=#158 id=158 data-nosnippet>158</a> <span class="self">self</span>.cfg.webhook = <span class="prelude-val">Some</span>(v.into());
<a href=#159 id=159 data-nosnippet>159</a> <span class="self">self
<a href=#160 id=160 data-nosnippet>160</a> </span>}
<a href=#161 id=161 data-nosnippet>161</a> <span class="doccomment">/// Finalize the builder.
<a href=#162 id=162 data-nosnippet>162</a> </span><span class="kw">pub fn </span>build(<span class="self">self</span>) -> <span class="prelude-ty">Result</span><ExtractionConfig, ScrapflyError> {
<a href=#163 id=163 data-nosnippet>163</a> <span class="kw">if </span><span class="self">self</span>.cfg.body.is_empty() {
<a href=#164 id=164 data-nosnippet>164</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"body is required"</span>.into()));
<a href=#165 id=165 data-nosnippet>165</a> }
<a href=#166 id=166 data-nosnippet>166</a> <span class="kw">if </span><span class="self">self</span>.cfg.content_type.is_empty() {
<a href=#167 id=167 data-nosnippet>167</a> <span class="kw">return </span><span class="prelude-val">Err</span>(ScrapflyError::Config(<span class="string">"content_type is required"</span>.into()));
<a href=#168 id=168 data-nosnippet>168</a> }
<a href=#169 id=169 data-nosnippet>169</a> <span class="prelude-val">Ok</span>(<span class="self">self</span>.cfg)
<a href=#170 id=170 data-nosnippet>170</a> }
<a href=#171 id=171 data-nosnippet>171</a>}
</code></pre></div></section></main></body></html>