openai_fork/
files.rs

1//! Upload, download, list, and delete files in openapi platform. Usually used for fine-tuning files.
2//!
3//! See the [Files API for OpenAI](https://platform.openai.com/docs/api-reference/files) for
4//! more information.
5//!
6//! # Examples
7//!
8//! All examples and tests require the `OPENAI_KEY` environment variable
9//! be set with your personal openai platform API key.
10//!
11//! Upload a new file. [Reference API](https://platform.openai.com/docs/api-reference/files/upload)
12//! ```
13//!use openai::files::File;
14//!use openai::ApiResponseOrError;
15//!use dotenvy::dotenv;
16//!use std::env;
17//!use openai::set_key;
18//!
19//!#[tokio::main]
20//!async fn main() -> ApiResponseOrError<()> {
21//!     dotenv().ok();
22//!     set_key(env::var("OPENAI_KEY").unwrap());
23//!     let uploaded_file = File::builder()
24//!         .file_name("test_data/file_upload_test1.jsonl") // local file path to upload.
25//!         .purpose("fine-tune")
26//!         .create()
27//!         .await?;
28//!     assert_eq!(uploaded_file.filename, "file_upload_test1.jsonl");
29//!     Ok(())
30//!}
31//! ```
32//!
33//! List files. [Reference API](https://platform.openai.com/docs/api-reference/files/list)
34//! ```
35//!use openai::files::Files;
36//!use openai::ApiResponseOrError;
37//!use dotenvy::dotenv;
38//!use std::env;
39//!use openai::set_key;
40//!
41//!#[tokio::main]
42//!async fn main() -> ApiResponseOrError<()> {
43//!     dotenv().ok();
44//!     set_key(env::var("OPENAI_KEY").unwrap());
45//!     let openai_files = Files::list().await?;
46//!     let file_count = openai_files.len();
47//!     println!("Listing {} files", file_count);
48//!     for openai_file in openai_files.into_iter() {
49//!         println!("  id: {}, file: {}, size: {}", openai_file.id, openai_file.filename, openai_file.bytes)
50//!     }
51//!     Ok(())
52//!}
53//! ```
54//!
55//! Retrieve a file (json metadata only). [Reference API](https://platform.openai.com/docs/api-reference/files/retrieve)
56//!
57//! ```no_run
58//!use openai::files::File;
59//!use openai::ApiResponseOrError;
60//!use dotenvy::dotenv;
61//!use std::env;
62//!use openai::set_key;
63//!
64//!#[tokio::main]
65//!async fn main() -> ApiResponseOrError<()> {
66//!     dotenv().ok();
67//!     set_key(env::var("OPENAI_KEY").unwrap());
68//!     let file_id = "file-XjGxS3KTG0uNmNOK362iJua3"; // Use a real file id.
69//!     let file = File::get(file_id).await?;
70//!     println!("id: {}, file: {}, size: {}", file.id, file.filename, file.bytes);
71//!     Ok(())
72//!}
73//! ```
74//!
75//! Download to a local file. [Reference API](https://platform.openai.com/docs/api-reference/files/retrieve-content)
76//!
77//! ```no_run
78//!use openai::files::File;
79//!use openai::ApiResponseOrError;
80//!use dotenvy::dotenv;
81//!use std::env;
82//!use openai::set_key;
83//!
84//!#[tokio::main]
85//!async fn main() -> ApiResponseOrError<()> {
86//!     dotenv().ok();
87//!     set_key(env::var("OPENAI_KEY").unwrap());
88//!     let test_file = "test_file.jsonl";
89//!     let file_id = "file-XjGxS3KTG0uNmNOK362iJua3"; // Use a real file id.
90//!     File::download_content_to_file(file_id, test_file).await?;
91//!     Ok(())
92//!}
93//! ```
94//!
95//! Delete a file. [Reference API](https://platform.openai.com/docs/api-reference/files/delete)
96//!
97//! ```no_run
98//!use openai::files::File;
99//!use openai::ApiResponseOrError;
100//!use dotenvy::dotenv;
101//!use std::env;
102//!use openai::set_key;
103//!
104//!#[tokio::main]
105//!async fn main() -> ApiResponseOrError<()> {
106//!     dotenv().ok();
107//!     set_key(env::var("OPENAI_KEY").unwrap());
108//!     let file_id = "file-XjGxS3KTG0uNmNOK362iJua3"; // Use a real file id.
109//!     File::delete(file_id).await?;
110//!     Ok(())
111//!}
112//! ```
113//!
114//! For more examples see the files tests.
115//!
116
117use std::io::Write;
118use std::path::Path;
119
120use bytes::{BufMut, BytesMut};
121use derive_builder::Builder;
122use futures_util::StreamExt;
123use reqwest::multipart::{Form, Part};
124use reqwest::Method;
125use serde::{Deserialize, Serialize};
126
127use crate::{openai_delete, openai_get, openai_post_multipart, openai_request};
128
129use super::ApiResponseOrError;
130
131/// Upload, download and delete a file from the openai platform.
132#[derive(Deserialize, Serialize, Clone)]
133pub struct File {
134    /// The unique id for this uploaded the in the openai platform.
135    /// This id is generated by openai for each uploaded file.
136    pub id: String,
137    /// The object type uploaded. ie: "file"
138    pub object: String,
139    /// The size in bytes of the uploaded file.
140    pub bytes: usize,
141    /// Unix timestamp, seconds since epoch, of when the file was uploaded.
142    pub created_at: usize,
143    /// The name of the file uploaded.
144    pub filename: String,
145    /// The purpose of the file. ie: "fine-tine"
146    pub purpose: String,
147}
148
149#[derive(Deserialize, Serialize, Clone)]
150pub struct DeletedFile {
151    pub id: String,
152    pub object: String,
153    pub deleted: bool,
154}
155
156/// List files in the openai platform.
157#[derive(Deserialize, Serialize, Clone)]
158pub struct Files {
159    data: Vec<File>,
160    pub object: String,
161}
162
163#[derive(Serialize, Builder, Debug, Clone)]
164#[builder(pattern = "owned")]
165#[builder(name = "FileUploadBuilder")]
166#[builder(setter(strip_option, into))]
167pub struct FileUploadRequest {
168    file_name: String,
169    purpose: String,
170}
171
172impl File {
173    async fn create(request: &FileUploadRequest) -> ApiResponseOrError<Self> {
174        let purpose = request.purpose.clone();
175        let upload_file_path = Path::new(request.file_name.as_str());
176        let upload_file_path = upload_file_path.canonicalize()?;
177        let simple_name = upload_file_path
178            .file_name()
179            .unwrap()
180            .to_str()
181            .unwrap()
182            .to_string()
183            .clone();
184        let async_file = tokio::fs::File::open(upload_file_path).await?;
185        let file_part = Part::stream(async_file)
186            .file_name(simple_name)
187            .mime_str("application/jsonl")?;
188        let form = Form::new().part("file", file_part).text("purpose", purpose);
189        openai_post_multipart("files", form).await
190    }
191
192    /// New FileUploadBuilder
193    pub fn builder() -> FileUploadBuilder {
194        FileUploadBuilder::create_empty()
195    }
196
197    /// Delete a file from openai platform by id.
198    pub async fn delete(id: &str) -> ApiResponseOrError<DeletedFile> {
199        openai_delete(format!("files/{}", id).as_str()).await
200    }
201
202    /// Get a file from openai platform by id.
203    pub async fn get(id: &str) -> ApiResponseOrError<File> {
204        openai_get(format!("files/{}", id).as_str()).await
205    }
206
207    /// Download a file as bytes into memory by id.
208    pub async fn get_content_bytes(id: &str) -> ApiResponseOrError<Vec<u8>> {
209        let route = format!("files/{}/content", id);
210        let response = openai_request(Method::GET, route.as_str(), |request| request).await?;
211        let content_len = response.content_length().unwrap_or(1024) as usize;
212        let mut file_bytes = BytesMut::with_capacity(content_len);
213        let mut bytes_stream = response.bytes_stream();
214        while let Some(Ok(bytes)) = bytes_stream.next().await {
215            file_bytes.put(bytes);
216        }
217        Ok(file_bytes.to_vec())
218    }
219
220    /// Download a file to a new local file by id.
221    pub async fn download_content_to_file(id: &str, file_path: &str) -> ApiResponseOrError<()> {
222        let mut output_file = std::fs::File::create(file_path)?;
223        let route = format!("files/{}/content", id);
224        let response = openai_request(Method::GET, route.as_str(), |request| request).await?;
225        let mut bytes_stream = response.bytes_stream();
226        while let Some(Ok(bytes)) = bytes_stream.next().await {
227            output_file.write_all(bytes.as_ref())?;
228        }
229        Ok(())
230    }
231}
232
233impl FileUploadBuilder {
234    /// Upload the file to the openai platform.
235    pub async fn create(self) -> ApiResponseOrError<File> {
236        File::create(&self.build().unwrap()).await
237    }
238}
239
240impl Files {
241    /// Get a list of all uploaded files in the openai platform.
242    pub async fn list() -> ApiResponseOrError<Files> {
243        openai_get("files").await
244    }
245    pub fn len(&self) -> usize {
246        self.data.len()
247    }
248}
249
250impl<'a> IntoIterator for &'a Files {
251    type Item = &'a File;
252    type IntoIter = core::slice::Iter<'a, File>;
253
254    fn into_iter(self) -> Self::IntoIter {
255        self.data.as_slice().iter()
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use std::env;
262    use std::io::Read;
263    use std::time::Duration;
264
265    use dotenvy::dotenv;
266
267    use crate::set_key;
268
269    use super::*;
270
271    fn test_upload_builder() -> FileUploadBuilder {
272        File::builder()
273            .file_name("test_data/file_upload_test1.jsonl")
274            .purpose("fine-tune")
275    }
276
277    fn test_upload_request() -> FileUploadRequest {
278        test_upload_builder().build().unwrap()
279    }
280
281    #[tokio::test]
282    async fn upload_file() {
283        dotenv().ok();
284        set_key(env::var("OPENAI_KEY").unwrap());
285        let file_upload = test_upload_builder().create().await.unwrap();
286        println!(
287            "upload: {}",
288            serde_json::to_string_pretty(&file_upload).unwrap()
289        );
290        assert_eq!(file_upload.id.as_bytes()[..5], *"file-".as_bytes())
291    }
292
293    #[tokio::test]
294    async fn missing_file() {
295        dotenv().ok();
296        set_key(env::var("OPENAI_KEY").unwrap());
297        let test_builder = File::builder()
298            .file_name("test_data/missing_file.jsonl")
299            .purpose("fine-tune");
300        let response = test_builder.create().await;
301        assert!(response.is_err());
302        let openapi_err = response.err().unwrap();
303        assert_eq!(openapi_err.error_type, "io");
304        assert_eq!(
305            openapi_err.message,
306            "No such file or directory (os error 2)"
307        )
308    }
309
310    #[tokio::test]
311    async fn list_files() {
312        dotenv().ok();
313        set_key(env::var("OPENAI_KEY").unwrap());
314        // ensure at least one file exists
315        test_upload_builder().create().await.unwrap();
316        let openai_files = Files::list().await.unwrap();
317        let file_count = openai_files.len();
318        assert!(file_count > 0);
319        for openai_file in openai_files.into_iter() {
320            assert_eq!(openai_file.id.as_bytes()[..5], *"file-".as_bytes())
321        }
322        println!(
323            "files [{}]: {}",
324            file_count,
325            serde_json::to_string_pretty(&openai_files).unwrap()
326        );
327    }
328
329    #[tokio::test]
330    async fn delete_files() {
331        dotenv().ok();
332        set_key(env::var("OPENAI_KEY").unwrap());
333        // ensure at least one file exists
334        test_upload_builder().create().await.unwrap();
335        // wait to avoid recent upload still processing error
336        tokio::time::sleep(Duration::from_secs(7)).await;
337        let openai_files = Files::list().await.unwrap();
338        assert!(openai_files.data.len() > 0);
339        let mut files = openai_files.data;
340        files.sort_by(|a, b| a.created_at.cmp(&b.created_at));
341        for file in files {
342            let deleted_file = File::delete(file.id.as_str()).await.unwrap();
343            assert!(deleted_file.deleted);
344            println!("deleted: {} {}", deleted_file.id, deleted_file.deleted)
345        }
346    }
347
348    #[tokio::test]
349    async fn get_file_and_contents() {
350        dotenv().ok();
351        set_key(env::var("OPENAI_KEY").unwrap());
352
353        let file = test_upload_builder().create().await.unwrap();
354        let file_get = File::get(file.id.as_str()).await.unwrap();
355        assert_eq!(file.id, file_get.id);
356
357        // get file as bytes
358        let body_bytes = File::get_content_bytes(file.id.as_str()).await.unwrap();
359        assert_eq!(body_bytes.len(), file.bytes);
360
361        // download file to a file
362        let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
363        let test_dir = format!("{}/{}", manifest_dir, "target/files-test");
364        std::fs::create_dir_all(test_dir.as_str()).unwrap();
365        let test_file_save_path = format!("{}/{}", test_dir.as_str(), file.filename);
366        File::download_content_to_file(file.id.as_str(), test_file_save_path.as_str())
367            .await
368            .unwrap();
369        let mut local_file = std::fs::File::open(test_file_save_path.as_str()).unwrap();
370        let mut local_bytes: Vec<u8> = Vec::new();
371        local_file.read_to_end(&mut local_bytes).unwrap();
372        assert_eq!(body_bytes, local_bytes)
373    }
374
375    #[test]
376    fn file_name_path_test() {
377        let request = test_upload_request();
378        let file_upload_path = Path::new(request.file_name.as_str());
379        let file_name = file_upload_path
380            .clone()
381            .file_name()
382            .unwrap()
383            .to_str()
384            .unwrap();
385        assert_eq!(file_name, "file_upload_test1.jsonl");
386        let file_upload_path = file_upload_path.canonicalize().unwrap();
387        let file_exists = file_upload_path.exists();
388        assert!(file_exists)
389    }
390}