unstructured_client/
client.rs

1use reqwest::multipart::Form;
2use reqwest::{multipart, Url};
3use std::fs;
4use std::path::Path;
5
6use crate::error::{ClientError, Result};
7use crate::partition::{PartitionParameters, PartitionResponse};
8
9/// Current crate version
10const VERSION: &str = env!("CARGO_PKG_VERSION");
11
12/// The sub-route for partitioning
13const API_ROUTE: &str = "/general/v0/general";
14
15#[derive(Debug, Clone)]
16pub struct UnstructuredClient {
17    client: reqwest::Client,
18    base_url: Url,
19    api_key: Option<String>,
20}
21
22impl UnstructuredClient {
23    /// Creates a new `UnstructuredClient` with a specified base URL.
24    ///
25    /// # Arguments
26    ///
27    /// * `base_url`: A string slice that holds the base URL for the client.
28    ///
29    /// returns: `Result<UnstructuredClient, ClientError>` - On success, returns an instance of `UnstructuredClient`.
30    /// On failure, returns a `ClientError` explaining what went wrong.
31    ///
32    /// # Examples
33    ///
34    /// ```
35    /// let client = UnstructuredClient::new("https://example.com");
36    /// match client {
37    ///     Ok(client) => println!("Client created successfully."),
38    ///     Err(e) => println!("Failed to create client: {:?}", e),
39    /// }
40    /// ```
41    pub fn new(base_url: &str) -> Result<Self> {
42        let url = Url::parse(base_url).map_err(|e| ClientError::URLParseFailed(e.to_string()))?;
43        Ok(UnstructuredClient {
44            client: reqwest::Client::new(),
45            base_url: url,
46            api_key: None,
47        })
48    }
49
50    /// Sets the API key for the `UnstructuredClient`.
51    ///
52    /// This method allows you to provide an API key that will be included in the
53    /// headers of requests made by the client.
54    ///
55    /// # Arguments
56    ///
57    /// * `api_key`: A string slice that holds the API key.
58    ///
59    /// # Returns
60    ///
61    /// `Self` with the API key set.
62    pub fn with_api_key(self, api_key: &str) -> Self {
63        Self {
64            api_key: Some(api_key.to_string()),
65            ..self
66        }
67    }
68
69    /// Partitions the content of a given file using Unstructured's API.
70    ///
71    /// This asynchronous function reads the content of a specified file, creates a multipart
72    /// form along with given parameters, and sends a POST request to the Unstructured API route.
73    /// The result is a text representation of the file's content, partitioned by the type of the
74    /// text element.
75    ///
76    /// # Arguments
77    ///
78    /// * `file_path`: The path to the file that needs to be partitioned.
79    /// * `params`: Parameters for partitioning which are defined by the `PartitionParameters` type.
80    ///
81    /// Returns: `Result<ElementList, ClientError>` - On success, returns a [ElementList];
82    /// otherwise returns a `ClientError`.
83    #[tracing::instrument]
84    pub async fn partition_file(
85        &self,
86        file_path: &Path,
87        params: PartitionParameters,
88    ) -> Result<PartitionResponse> {
89        let url = self
90            .base_url
91            .join(API_ROUTE)
92            .map_err(|e| ClientError::URLParseFailed(e.to_string()))?;
93
94        tracing::trace!("Building partition request for {file_path:?} to {url}.");
95
96        let file = fs::read(file_path)?;
97
98        let file_name = file_path
99            .file_name()
100            .ok_or(ClientError::FileIOError("No filename found.".into()))?
101            .to_str()
102            .ok_or(ClientError::FileIOError("File name not valid UTF-8".into()))?
103            .to_string();
104
105        tracing::debug!("Reading file into memory");
106        let file_part = multipart::Part::bytes(file).file_name(file_name);
107
108        // Create reqwest multipart Form using the implementation for Into<Form>
109        let form: Form = params.into();
110
111        // Add file part
112        let form = form.part("files", file_part);
113
114        // Post request and await response
115        tracing::debug!("Performing request");
116        let request = self
117            .client
118            .post(url)
119            .multipart(form)
120            .header("Content-Type", "multipart/form-data")
121            .header("User-Agent", format!("Unstructured-Rust-Client/{VERSION}"));
122
123        // Add api key
124        let request = {
125            match &self.api_key {
126                None => request,
127                Some(api_key) => request.header("unstructured-api-key", api_key),
128            }
129        };
130
131        // Process response
132        let response = request.send().await?;
133        let element_list = response.json().await?;
134
135        Ok(element_list)
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142    use crate::partition::PartitionResponse::Success;
143    use mockito::Matcher;
144    use std::io::Write;
145    use tempfile::NamedTempFile;
146
147    #[tokio::test]
148    async fn test_partition_file() -> Result<()> {
149        // Request a new server from the pool
150        let mut server = mockito::Server::new_async().await;
151
152        // Use one of these addresses to configure your client
153        let url = server.url();
154
155        // Mock server setup
156        let mock = server
157            .mock("POST", "/general/v0/general")
158            .match_header(
159                "content-type",
160                Matcher::Regex("multipart/form-data.*".to_string()),
161            )
162            .with_status(200)
163            .with_body(
164                r#"
165		        [
166		            {
167		                "type": "NarrativeText",
168		                "element_id": "1",
169		                "text": "This is a test paragraph.",
170		                "metadata": null
171		            },
172		            {
173		                "type": "NarrativeText",
174		                "element_id": "1",
175		                "text": "This is a test paragraph."
176		            },
177		            {
178		                "type": "Image",
179		                "element_id": "2",
180		                "text": "base64encodedstring",
181		                "metadata": {
182		                    "filename": "image.jpg"
183		                }
184		            }
185		        ]
186            "#,
187            )
188            .create();
189
190        // Create a temporary file using tempfile
191        let mut temp_file = NamedTempFile::new().unwrap();
192        writeln!(temp_file, "This is a test file.").unwrap();
193
194        // Create the client and parameters
195        let client = UnstructuredClient::new(&url).unwrap();
196        let params = PartitionParameters::default(); // Adjust with actual defaults
197
198        // Call the function
199        let result = client.partition_file(temp_file.path(), params).await?;
200
201        // Ensure the result is OK and matches expected structure
202        match result {
203            Success(element_list) => {
204                assert_eq!(element_list.len(), 3);
205            }
206            e => {
207                panic!("Test failed with error: {:?}", e);
208            }
209        }
210
211        // Verify that the mock was called
212        mock.assert();
213
214        Ok(())
215    }
216}