openai/
files.rs

1//! Upload, download, list, and delete files in openapi platform. Usually used for fine-tuning files.
2//!
3//! See the [Files API for OpenAI](https://platform.openai.com/docs/api-reference/files) for
4//! more information.
5//!
6//! # Examples
7//!
8//! All examples and tests require the `OPENAI_KEY` environment variable
9//! be set with your personal openai platform API key.
10//!
11//! Upload a new file. [Reference API](https://platform.openai.com/docs/api-reference/files/upload)
12//! ```
13//!use openai::files::File;
14//!use openai::ApiResponseOrError;
15//!use dotenvy::dotenv;
16//!use std::env;
17//!use openai::Credentials;
18//!
19//!#[tokio::main]
20//!async fn main() -> ApiResponseOrError<()> {
21//!     dotenv().ok();
22//!     let credentials = Credentials::from_env();
23//!     let uploaded_file = File::builder()
24//!         .file_name("test_data/file_upload_test1.jsonl") // local file path to upload.
25//!         .purpose("fine-tune")
26//!         .create()
27//!         .await?;
28//!     assert_eq!(uploaded_file.filename, "file_upload_test1.jsonl");
29//!     Ok(())
30//!}
31//! ```
32//!
33//! List files. [Reference API](https://platform.openai.com/docs/api-reference/files/list)
34//! ```
35//!use openai::files::Files;
36//!use openai::ApiResponseOrError;
37//!use dotenvy::dotenv;
38//!use std::env;
39//!use openai::Credentials;
40//!
41//!#[tokio::main]
42//!async fn main() -> ApiResponseOrError<()> {
43//!     dotenv().ok();
44//!     let credentials = Credentials::from_env();
45//!     let openai_files = Files::list(credentials).await?;
46//!     let file_count = openai_files.len();
47//!     println!("Listing {} files", file_count);
48//!     for openai_file in openai_files.into_iter() {
49//!         println!("  id: {}, file: {}, size: {}", openai_file.id, openai_file.filename, openai_file.bytes)
50//!     }
51//!     Ok(())
52//!}
53//! ```
54//!
55//! Retrieve a file (json metadata only). [Reference API](https://platform.openai.com/docs/api-reference/files/retrieve)
56//!
57//! ```no_run
58//!use openai::files::File;
59//!use openai::ApiResponseOrError;
60//!use dotenvy::dotenv;
61//!use std::env;
62//!use openai::Credentials;
63//!
64//!#[tokio::main]
65//!async fn main() -> ApiResponseOrError<()> {
66//!     dotenv().ok();
67//!     let credentials = Credentials::from_env();
68//!     let file_id = "file-XjGxS3KTG0uNmNOK362iJua3"; // Use a real file id.
69//!     let file = File::fetch(file_id, credentials).await?;
70//!     println!("id: {}, file: {}, size: {}", file.id, file.filename, file.bytes);
71//!     Ok(())
72//!}
73//! ```
74//!
75//! Download to a local file. [Reference API](https://platform.openai.com/docs/api-reference/files/retrieve-content)
76//!
77//! ```no_run
78//!use openai::files::File;
79//!use openai::ApiResponseOrError;
80//!use dotenvy::dotenv;
81//!use std::env;
82//!use openai::Credentials;
83//!
84//!#[tokio::main]
85//!async fn main() -> ApiResponseOrError<()> {
86//!     dotenv().ok();
87//!     let credentials = Credentials::from_env();
88//!     let test_file = "test_file.jsonl";
89//!     let file_id = "file-XjGxS3KTG0uNmNOK362iJua3"; // Use a real file id.
90//!     File::download_content_to_file(file_id, test_file, credentials).await?;
91//!     Ok(())
92//!}
93//! ```
94//!
95//! Delete a file. [Reference API](https://platform.openai.com/docs/api-reference/files/delete)
96//!
97//! ```no_run
98//!use openai::files::File;
99//!use openai::ApiResponseOrError;
100//!use dotenvy::dotenv;
101//!use std::env;
102//!use openai::Credentials;
103//!
104//!#[tokio::main]
105//!async fn main() -> ApiResponseOrError<()> {
106//!     dotenv().ok();
107//!     let credentials = Credentials::from_env();
108//!     let file_id = "file-XjGxS3KTG0uNmNOK362iJua3"; // Use a real file id.
109//!     File::delete(file_id, credentials).await?;
110//!     Ok(())
111//!}
112//! ```
113//!
114//! For more examples see the files tests.
115//!
116
117use std::io::Write;
118use std::path::Path;
119
120use bytes::{BufMut, BytesMut};
121use derive_builder::Builder;
122use futures_util::StreamExt;
123use reqwest::multipart::{Form, Part};
124use reqwest::Method;
125use serde::{Deserialize, Serialize};
126
127use crate::{openai_delete, openai_get, openai_post_multipart, openai_request, Credentials};
128
129use super::ApiResponseOrError;
130
131/// Upload, download and delete a file from the openai platform.
132#[derive(Deserialize, Serialize, Clone)]
133pub struct File {
134    /// The unique id for this uploaded the in the openai platform.
135    /// This id is generated by openai for each uploaded file.
136    pub id: String,
137    /// The object type uploaded. ie: "file"
138    pub object: String,
139    /// The size in bytes of the uploaded file.
140    pub bytes: usize,
141    /// Unix timestamp, seconds since epoch, of when the file was uploaded.
142    pub created_at: usize,
143    /// The name of the file uploaded.
144    pub filename: String,
145    /// The purpose of the file. ie: "fine-tine"
146    pub purpose: String,
147}
148
149#[derive(Deserialize, Serialize, Clone)]
150pub struct DeletedFile {
151    pub id: String,
152    pub object: String,
153    pub deleted: bool,
154}
155
156/// List files in the openai platform.
157#[derive(Deserialize, Serialize, Clone)]
158pub struct Files {
159    data: Vec<File>,
160    pub object: String,
161}
162
163#[derive(Serialize, Builder, Debug, Clone)]
164#[builder(pattern = "owned")]
165#[builder(name = "FileUploadBuilder")]
166#[builder(setter(strip_option, into))]
167pub struct FileUploadRequest {
168    file_name: String,
169    purpose: String,
170    /// The credentials to use for this request.
171    #[serde(skip_serializing)]
172    #[builder(default)]
173    pub credentials: Option<Credentials>,
174}
175
176impl File {
177    async fn create(request: FileUploadRequest) -> ApiResponseOrError<Self> {
178        let upload_file_path = Path::new(request.file_name.as_str());
179        let upload_file_path = upload_file_path.canonicalize()?;
180        let simple_name = upload_file_path
181            .file_name()
182            .unwrap()
183            .to_str()
184            .unwrap()
185            .to_string()
186            .clone();
187        let async_file = tokio::fs::File::open(upload_file_path).await?;
188        let file_part = Part::stream(async_file)
189            .file_name(simple_name)
190            .mime_str("application/jsonl")?;
191        let form = Form::new()
192            .part("file", file_part)
193            .text("purpose", request.purpose);
194        openai_post_multipart("files", form, request.credentials).await
195    }
196
197    /// New FileUploadBuilder
198    pub fn builder() -> FileUploadBuilder {
199        FileUploadBuilder::create_empty()
200    }
201
202    /// Delete a file from openai platform by id.
203    pub async fn delete(id: &str, credentials: Credentials) -> ApiResponseOrError<DeletedFile> {
204        openai_delete(format!("files/{}", id).as_str(), Some(credentials)).await
205    }
206
207    /// Get a file from openai platform by id.
208    #[deprecated(since = "1.0.0-alpha.16", note = "use `fetch` instead")]
209    pub async fn get(id: &str) -> ApiResponseOrError<File> {
210        openai_get(format!("files/{}", id).as_str(), None).await
211    }
212
213    /// Get a file from openai platform by id.
214    pub async fn fetch(id: &str, credentials: Credentials) -> ApiResponseOrError<File> {
215        openai_get(format!("files/{}", id).as_str(), Some(credentials)).await
216    }
217
218    /// Download a file as bytes into memory by id.
219    #[deprecated(since = "1.0.0-alpha.16", note = "use `fetch_content_bytes` instead")]
220    pub async fn get_content_bytes(id: &str) -> ApiResponseOrError<Vec<u8>> {
221        Self::fetch_content_bytes_with_credentials_opt(id, None).await
222    }
223
224    /// Download a file as bytes into memory by id.
225    pub async fn fetch_content_bytes(
226        id: &str,
227        credentials: Credentials,
228    ) -> ApiResponseOrError<Vec<u8>> {
229        Self::fetch_content_bytes_with_credentials_opt(id, Some(credentials)).await
230    }
231
232    async fn fetch_content_bytes_with_credentials_opt(
233        id: &str,
234        credentials_opt: Option<Credentials>,
235    ) -> ApiResponseOrError<Vec<u8>> {
236        let route = format!("files/{}/content", id);
237        let response = openai_request(
238            Method::GET,
239            route.as_str(),
240            |request| request,
241            credentials_opt,
242        )
243        .await?;
244        let content_len = response.content_length().unwrap_or(1024) as usize;
245        let mut file_bytes = BytesMut::with_capacity(content_len);
246        let mut bytes_stream = response.bytes_stream();
247        while let Some(Ok(bytes)) = bytes_stream.next().await {
248            file_bytes.put(bytes);
249        }
250        Ok(file_bytes.to_vec())
251    }
252
253    /// Download a file to a new local file by id.
254    pub async fn download_content_to_file(
255        id: &str,
256        file_path: &str,
257        credentials: Credentials,
258    ) -> ApiResponseOrError<()> {
259        let mut output_file = std::fs::File::create(file_path)?;
260        let route = format!("files/{}/content", id);
261        let response = openai_request(
262            Method::GET,
263            route.as_str(),
264            |request| request,
265            Some(credentials),
266        )
267        .await?;
268        let mut bytes_stream = response.bytes_stream();
269        while let Some(Ok(bytes)) = bytes_stream.next().await {
270            output_file.write_all(bytes.as_ref())?;
271        }
272        Ok(())
273    }
274}
275
276impl FileUploadBuilder {
277    /// Upload the file to the openai platform.
278    pub async fn create(self) -> ApiResponseOrError<File> {
279        File::create(self.build().unwrap()).await
280    }
281}
282
283impl Files {
284    /// Get a list of all uploaded files in the openai platform.
285    pub async fn list(credentials: Credentials) -> ApiResponseOrError<Files> {
286        openai_get("files", Some(credentials)).await
287    }
288    pub fn len(&self) -> usize {
289        self.data.len()
290    }
291}
292
293impl<'a> IntoIterator for &'a Files {
294    type Item = &'a File;
295    type IntoIter = core::slice::Iter<'a, File>;
296
297    fn into_iter(self) -> Self::IntoIter {
298        self.data.as_slice().iter()
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use std::env;
305    use std::io::Read;
306    use std::time::Duration;
307
308    use dotenvy::dotenv;
309
310    use crate::DEFAULT_CREDENTIALS;
311
312    use super::*;
313
314    fn test_upload_builder() -> FileUploadBuilder {
315        File::builder()
316            .file_name("test_data/file_upload_test1.jsonl")
317            .purpose("fine-tune")
318    }
319
320    fn test_upload_request() -> FileUploadRequest {
321        test_upload_builder().build().unwrap()
322    }
323
324    #[tokio::test]
325    async fn upload_file() {
326        dotenv().ok();
327        let credentials = Credentials::from_env();
328        let file_upload = test_upload_builder()
329            .credentials(credentials)
330            .create()
331            .await
332            .unwrap();
333        println!(
334            "upload: {}",
335            serde_json::to_string_pretty(&file_upload).unwrap()
336        );
337        assert_eq!(file_upload.id.as_bytes()[..5], *"file-".as_bytes())
338    }
339
340    #[tokio::test]
341    async fn missing_file() {
342        dotenv().ok();
343        let credentials = Credentials::from_env();
344        let test_builder = File::builder()
345            .file_name("test_data/missing_file.jsonl")
346            .credentials(credentials)
347            .purpose("fine-tune");
348        let response = test_builder.create().await;
349        assert!(response.is_err());
350        let openapi_err = response.err().unwrap();
351        assert_eq!(openapi_err.error_type, "io");
352        assert_eq!(
353            openapi_err.message,
354            "No such file or directory (os error 2)"
355        )
356    }
357
358    #[tokio::test]
359    async fn list_files() {
360        dotenv().ok();
361        let credentials = Credentials::from_env();
362        // ensure at least one file exists
363        test_upload_builder().create().await.unwrap();
364        let openai_files = Files::list(credentials).await.unwrap();
365        let file_count = openai_files.len();
366        assert!(file_count > 0);
367        for openai_file in openai_files.into_iter() {
368            assert_eq!(openai_file.id.as_bytes()[..5], *"file-".as_bytes())
369        }
370        println!(
371            "files [{}]: {}",
372            file_count,
373            serde_json::to_string_pretty(&openai_files).unwrap()
374        );
375    }
376
377    #[tokio::test]
378    async fn delete_files() {
379        dotenv().ok();
380        let credentials = Credentials::from_env();
381        // ensure at least one file exists
382        test_upload_builder().create().await.unwrap();
383        // wait to avoid recent upload still processing error
384        tokio::time::sleep(Duration::from_secs(7)).await;
385        let openai_files = Files::list(credentials).await.unwrap();
386        assert!(openai_files.data.len() > 0);
387        let mut files = openai_files.data;
388        files.sort_by(|a, b| a.created_at.cmp(&b.created_at));
389        for file in files {
390            let deleted_file = File::delete(
391                file.id.as_str(),
392                DEFAULT_CREDENTIALS.read().unwrap().clone(),
393            )
394            .await
395            .unwrap();
396            assert!(deleted_file.deleted);
397            println!("deleted: {} {}", deleted_file.id, deleted_file.deleted)
398        }
399    }
400
401    #[tokio::test]
402    async fn get_file_and_contents() {
403        dotenv().ok();
404        let credentials = Credentials::from_env();
405
406        let file = test_upload_builder()
407            .credentials(credentials.clone())
408            .create()
409            .await
410            .unwrap();
411        let file_get = File::fetch(file.id.as_str(), credentials.clone())
412            .await
413            .unwrap();
414        assert_eq!(file.id, file_get.id);
415
416        // get file as bytes
417        let body_bytes = File::fetch_content_bytes(file.id.as_str(), credentials.clone())
418            .await
419            .unwrap();
420        assert_eq!(body_bytes.len(), file.bytes);
421
422        // download file to a file
423        let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
424        let test_dir = format!("{}/{}", manifest_dir, "target/files-test");
425        std::fs::create_dir_all(test_dir.as_str()).unwrap();
426        let test_file_save_path = format!("{}/{}", test_dir.as_str(), file.filename);
427        File::download_content_to_file(file.id.as_str(), test_file_save_path.as_str(), credentials)
428            .await
429            .unwrap();
430        let mut local_file = std::fs::File::open(test_file_save_path.as_str()).unwrap();
431        let mut local_bytes: Vec<u8> = Vec::new();
432        local_file.read_to_end(&mut local_bytes).unwrap();
433        assert_eq!(body_bytes, local_bytes)
434    }
435
436    #[test]
437    fn file_name_path_test() {
438        let request = test_upload_request();
439        let file_upload_path = Path::new(request.file_name.as_str());
440        let file_name = file_upload_path.file_name().unwrap().to_str().unwrap();
441        assert_eq!(file_name, "file_upload_test1.jsonl");
442        let file_upload_path = file_upload_path.canonicalize().unwrap();
443        let file_exists = file_upload_path.exists();
444        assert!(file_exists)
445    }
446}