unstructured_client/
client.rs1use reqwest::multipart::Form;
2use reqwest::{multipart, Url};
3use std::fs;
4use std::path::Path;
5
6use crate::error::{ClientError, Result};
7use crate::partition::{PartitionParameters, PartitionResponse};
8
9const VERSION: &str = env!("CARGO_PKG_VERSION");
11
12const API_ROUTE: &str = "/general/v0/general";
14
15#[derive(Debug, Clone)]
16pub struct UnstructuredClient {
17 client: reqwest::Client,
18 base_url: Url,
19 api_key: Option<String>,
20}
21
22impl UnstructuredClient {
23 pub fn new(base_url: &str) -> Result<Self> {
42 let url = Url::parse(base_url).map_err(|e| ClientError::URLParseFailed(e.to_string()))?;
43 Ok(UnstructuredClient {
44 client: reqwest::Client::new(),
45 base_url: url,
46 api_key: None,
47 })
48 }
49
50 pub fn with_api_key(self, api_key: &str) -> Self {
63 Self {
64 api_key: Some(api_key.to_string()),
65 ..self
66 }
67 }
68
69 #[tracing::instrument]
84 pub async fn partition_file(
85 &self,
86 file_path: &Path,
87 params: PartitionParameters,
88 ) -> Result<PartitionResponse> {
89 let url = self
90 .base_url
91 .join(API_ROUTE)
92 .map_err(|e| ClientError::URLParseFailed(e.to_string()))?;
93
94 tracing::trace!("Building partition request for {file_path:?} to {url}.");
95
96 let file = fs::read(file_path)?;
97
98 let file_name = file_path
99 .file_name()
100 .ok_or(ClientError::FileIOError("No filename found.".into()))?
101 .to_str()
102 .ok_or(ClientError::FileIOError("File name not valid UTF-8".into()))?
103 .to_string();
104
105 tracing::debug!("Reading file into memory");
106 let file_part = multipart::Part::bytes(file).file_name(file_name);
107
108 let form: Form = params.into();
110
111 let form = form.part("files", file_part);
113
114 tracing::debug!("Performing request");
116 let request = self
117 .client
118 .post(url)
119 .multipart(form)
120 .header("Content-Type", "multipart/form-data")
121 .header("User-Agent", format!("Unstructured-Rust-Client/{VERSION}"));
122
123 let request = {
125 match &self.api_key {
126 None => request,
127 Some(api_key) => request.header("unstructured-api-key", api_key),
128 }
129 };
130
131 let response = request.send().await?;
133 let element_list = response.json().await?;
134
135 Ok(element_list)
136 }
137}
138
139#[cfg(test)]
140mod tests {
141 use super::*;
142 use crate::partition::PartitionResponse::Success;
143 use mockito::Matcher;
144 use std::io::Write;
145 use tempfile::NamedTempFile;
146
147 #[tokio::test]
148 async fn test_partition_file() -> Result<()> {
149 let mut server = mockito::Server::new_async().await;
151
152 let url = server.url();
154
155 let mock = server
157 .mock("POST", "/general/v0/general")
158 .match_header(
159 "content-type",
160 Matcher::Regex("multipart/form-data.*".to_string()),
161 )
162 .with_status(200)
163 .with_body(
164 r#"
165 [
166 {
167 "type": "NarrativeText",
168 "element_id": "1",
169 "text": "This is a test paragraph.",
170 "metadata": null
171 },
172 {
173 "type": "NarrativeText",
174 "element_id": "1",
175 "text": "This is a test paragraph."
176 },
177 {
178 "type": "Image",
179 "element_id": "2",
180 "text": "base64encodedstring",
181 "metadata": {
182 "filename": "image.jpg"
183 }
184 }
185 ]
186 "#,
187 )
188 .create();
189
190 let mut temp_file = NamedTempFile::new().unwrap();
192 writeln!(temp_file, "This is a test file.").unwrap();
193
194 let client = UnstructuredClient::new(&url).unwrap();
196 let params = PartitionParameters::default(); let result = client.partition_file(temp_file.path(), params).await?;
200
201 match result {
203 Success(element_list) => {
204 assert_eq!(element_list.len(), 3);
205 }
206 e => {
207 panic!("Test failed with error: {:?}", e);
208 }
209 }
210
211 mock.assert();
213
214 Ok(())
215 }
216}