scrapfly_sdk/config/
extraction.rs1use crate::enums::{CompressionFormat, ExtractionModel};
4use crate::error::ScrapflyError;
5
6use super::url_safe_b64_encode;
7
8#[derive(Debug, Clone, Default)]
10pub struct ExtractionConfig {
11 pub body: Vec<u8>,
13 pub content_type: String,
15 pub url: Option<String>,
17 pub charset: Option<String>,
19 pub extraction_template: Option<String>,
21 pub extraction_ephemeral_template: Option<serde_json::Value>,
23 pub extraction_prompt: Option<String>,
25 pub extraction_model: Option<ExtractionModel>,
27 pub is_document_compressed: bool,
29 pub document_compression_format: Option<CompressionFormat>,
31 pub webhook: Option<String>,
33 pub timeout: Option<u32>,
35}
36
37impl ExtractionConfig {
38 pub fn builder(body: Vec<u8>, content_type: impl Into<String>) -> ExtractionConfigBuilder {
40 ExtractionConfigBuilder {
41 cfg: ExtractionConfig {
42 body,
43 content_type: content_type.into(),
44 ..Default::default()
45 },
46 }
47 }
48
49 pub fn to_query_pairs(&self) -> Result<Vec<(String, String)>, ScrapflyError> {
51 if self.body.is_empty() {
52 return Err(ScrapflyError::Config("body is required".into()));
53 }
54 if self.content_type.is_empty() {
55 return Err(ScrapflyError::Config("content_type is required".into()));
56 }
57 let tpl_count = [
58 self.extraction_template.is_some(),
59 self.extraction_ephemeral_template.is_some(),
60 ]
61 .iter()
62 .filter(|x| **x)
63 .count();
64 if tpl_count > 1 {
65 return Err(ScrapflyError::Config(
66 "cannot use both extraction_template and extraction_ephemeral_template".into(),
67 ));
68 }
69
70 let mut out = Vec::new();
71 out.push(("content_type".into(), self.content_type.clone()));
72 if let Some(u) = &self.url {
73 out.push(("url".into(), u.clone()));
74 }
75 if let Some(c) = &self.charset {
76 out.push(("charset".into(), c.clone()));
77 }
78 if let Some(t) = &self.extraction_template {
79 out.push(("extraction_template".into(), t.clone()));
80 }
81 if let Some(t) = &self.extraction_ephemeral_template {
82 let s = serde_json::to_string(t)?;
83 out.push((
84 "extraction_template".into(),
85 format!("ephemeral:{}", url_safe_b64_encode(&s)),
86 ));
87 }
88 if let Some(p) = &self.extraction_prompt {
89 out.push(("extraction_prompt".into(), p.clone()));
90 }
91 if let Some(m) = self.extraction_model {
92 out.push(("extraction_model".into(), m.as_str().into()));
93 }
94 if let Some(wh) = &self.webhook {
95 out.push(("webhook_name".into(), wh.clone()));
96 }
97 if let Some(t) = self.timeout {
98 out.push(("timeout".into(), t.to_string()));
99 }
100 Ok(out)
101 }
102}
103
104#[derive(Debug, Clone)]
106pub struct ExtractionConfigBuilder {
107 cfg: ExtractionConfig,
108}
109
110impl ExtractionConfigBuilder {
111 pub fn url(mut self, v: impl Into<String>) -> Self {
113 self.cfg.url = Some(v.into());
114 self
115 }
116 pub fn charset(mut self, v: impl Into<String>) -> Self {
118 self.cfg.charset = Some(v.into());
119 self
120 }
121 pub fn extraction_template(mut self, v: impl Into<String>) -> Self {
123 self.cfg.extraction_template = Some(v.into());
124 self
125 }
126 pub fn extraction_ephemeral_template(mut self, v: serde_json::Value) -> Self {
128 self.cfg.extraction_ephemeral_template = Some(v);
129 self
130 }
131 pub fn extraction_prompt(mut self, v: impl Into<String>) -> Self {
133 self.cfg.extraction_prompt = Some(v.into());
134 self
135 }
136 pub fn extraction_model(mut self, v: ExtractionModel) -> Self {
138 self.cfg.extraction_model = Some(v);
139 self
140 }
141 pub fn is_document_compressed(mut self, v: bool) -> Self {
143 self.cfg.is_document_compressed = v;
144 self
145 }
146 pub fn document_compression_format(mut self, v: CompressionFormat) -> Self {
148 self.cfg.document_compression_format = Some(v);
149 self
150 }
151 pub fn timeout(mut self, v: u32) -> Self {
153 self.cfg.timeout = Some(v);
154 self
155 }
156 pub fn webhook(mut self, v: impl Into<String>) -> Self {
158 self.cfg.webhook = Some(v.into());
159 self
160 }
161 pub fn build(self) -> Result<ExtractionConfig, ScrapflyError> {
163 if self.cfg.body.is_empty() {
164 return Err(ScrapflyError::Config("body is required".into()));
165 }
166 if self.cfg.content_type.is_empty() {
167 return Err(ScrapflyError::Config("content_type is required".into()));
168 }
169 Ok(self.cfg)
170 }
171}