batch-mode-batch-client 0.2.4

This crate provides a client for interacting with OpenAI's batch processing API, allowing you to manage and download batch files asynchronously. It offers functionality for managing batch statuses, uploading files, and retrieving results after batch processing.
Documentation
// ---------------- [ File: batch-mode-batch-client/src/download_output_file.rs ]
crate::ix!();

#[async_trait]
impl<E> DownloadOutputFile<E> for BatchFileTriple
where
    E: From<BatchDownloadError>
        + From<std::io::Error>
        + From<BatchMetadataError>
        + From<OpenAIClientError>
        + Debug,
{
    async fn download_output_file(
        &mut self,
        client: &dyn LanguageModelClientInterface<E>,
    ) -> Result<(), E> {
        info!("downloading batch output file");

        // CHANGE: Instead of failing when `self.output().is_some()`,
        // we only fail if the path is actually present on disk.
        if let Some(out_path) = &self.output() {
            if out_path.exists() {
                warn!(
                    "Output file already present on disk at path={:?}. \
                     Aborting to avoid overwriting.",
                    out_path
                );
                return Err(BatchDownloadError::OutputFileAlreadyExists {
                    triple: self.clone(),
                }
                .into());
            }
        }

        // Use associated_metadata if we have it:
        let metadata_filename: PathBuf = if let Some(path) = self.associated_metadata() {
            path.clone()
        } else {
            self.effective_metadata_filename()
        };
        debug!("Using metadata file for output: {:?}", metadata_filename);

        let metadata = BatchMetadata::load_from_file(&metadata_filename).await?;
        let output_file_id = metadata.output_file_id()?; // fails if none

        let file_content = client.file_content(output_file_id).await?;

        let output_path = self.effective_output_filename();
        if let Some(parent) = output_path.parent() {
            tokio::fs::create_dir_all(parent).await.ok();
        }

        std::fs::write(&output_path, file_content)?;
        self.set_output_path(Some(output_path));

        Ok(())
    }
}

#[cfg(test)]
mod download_output_file_tests {
    use super::*;
    use futures::executor::block_on;
    use std::fs;
    use tempfile::tempdir;
    use tracing::{debug, error, info, trace, warn};

    /// Exhaustive test suite for `DownloadOutputFile` on `BatchFileTriple`.
    /// We cover scenarios of success, missing file_id, existing file, and client/IO errors.
    #[traced_test]
    async fn test_download_output_file_ok() {
        info!("Beginning test_download_output_file_ok");
        trace!("Constructing mock client...");
        let mock_client = MockLanguageModelClientBuilder::<MockBatchClientError>::default()
            .build()
            .unwrap();
        debug!("Mock client: {:?}", mock_client);

        // Insert a known file in the mock so "client.file_content()" call succeeds
        let output_file_id = "some_output_file_id";
        {
            let mut files_guard = mock_client.files().write().unwrap();
            files_guard.insert(output_file_id.to_string(), Bytes::from("mock output contents"));
        }

        // Insert batch metadata
        let tmpdir = tempdir().unwrap();
        let metadata_path = tmpdir.path().join("metadata.json");
        let metadata = BatchMetadataBuilder::default()
            .batch_id("batch_for_download_output_ok".to_string())
            .input_file_id("some_input_file_id".to_string())
            .output_file_id(Some(output_file_id.to_string()))
            .error_file_id(None)
            .build()
            .unwrap();
        metadata.save_to_file(&metadata_path).await.unwrap();

        trace!("Creating BatchFileTriple with known metadata path...");
        let mut triple = BatchFileTriple::new_for_test_with_metadata_path(metadata_path.clone());
        triple.set_metadata_path(Some(metadata_path.clone()));

        // --- IMPORTANT: set the output path to the ephemeral temp dir. ---
        let out_path = tmpdir.path().join("output.json");
        triple.set_output_path(Some(out_path.clone()));

        trace!("Calling download_output_file...");
        let result = triple.download_output_file(&mock_client).await;
        debug!("Result from download_output_file: {:?}", result);

        assert!(result.is_ok(), "Should succeed for a valid output file");
        // Ensure file was written inside our tempdir
        let contents = fs::read_to_string(&out_path).unwrap();
        pretty_assert_eq!(contents, "mock output contents");

        info!("test_download_output_file_ok passed");
    }

    #[traced_test]
    async fn test_download_output_file_already_exists() {
        info!("Beginning test_download_output_file_already_exists");
        let mock_client = MockLanguageModelClientBuilder::<MockBatchClientError>::default()
            .build()
            .unwrap();
        debug!("Mock client: {:?}", mock_client);

        // Insert metadata
        let tmpdir = tempdir().unwrap();
        let metadata_path = tmpdir.path().join("metadata.json");
        let metadata = BatchMetadataBuilder::default()
            .batch_id("batch_exists_output")
            .input_file_id("some_input_file_id".to_string())
            .output_file_id(Some("already_exists_output_file_id".to_string()))
            .error_file_id(None)
            .build()
            .unwrap();
        metadata.save_to_file(&metadata_path).await.unwrap();

        // Prepare the triple
        let mut triple = BatchFileTriple::new_for_test_with_metadata_path(metadata_path.clone());
        triple.set_metadata_path(Some(metadata_path.clone()));

        // Simulate that the output file is already downloaded (in the tempdir)
        let existing_output_path = tmpdir.path().join("output.json");
        fs::write(&existing_output_path, b"existing content").unwrap();
        triple.set_output_path(Some(existing_output_path.clone()));

        let result = triple.download_output_file(&mock_client).await;
        debug!("Result from download_output_file: {:?}", result);

        assert!(
            result.is_err(),
            "Should fail if output file already exists on disk"
        );
        info!("test_download_output_file_already_exists passed");
    }

    #[traced_test]
    async fn test_download_output_file_missing_output_file_id() {
        info!("Beginning test_download_output_file_missing_output_file_id");
        let mock_client = MockLanguageModelClientBuilder::<MockBatchClientError>::default()
            .build()
            .unwrap();
        debug!("Mock client: {:?}", mock_client);

        // Insert metadata that does NOT have an output_file_id
        let tmpdir = tempdir().unwrap();
        let metadata_path = tmpdir.path().join("metadata.json");
        let metadata = BatchMetadataBuilder::default()
            .batch_id("batch_no_out_id")
            .input_file_id("input_file_id".to_string())
            .output_file_id(None)
            .error_file_id(None)
            .build()
            .unwrap();
        metadata.save_to_file(&metadata_path).await.unwrap();

        // Prepare the triple
        let mut triple = BatchFileTriple::new_for_test_with_metadata_path(metadata_path.clone());
        triple.set_metadata_path(Some(metadata_path.clone()));

        // --- Just to keep everything in ephemeral space, set an output path anyway ---
        let out_path = tmpdir.path().join("will_not_be_written.json");
        triple.set_output_path(Some(out_path.clone()));

        let result = triple.download_output_file(&mock_client).await;
        debug!("Result from download_output_file: {:?}", result);

        assert!(
            result.is_err(),
            "Should fail if output_file_id is not present in metadata"
        );
        info!("test_download_output_file_missing_output_file_id passed");
    }

    #[traced_test]
    async fn test_download_output_file_client_file_not_found() {
        info!("Beginning test_download_output_file_client_file_not_found");
        let mock_client = MockLanguageModelClientBuilder::<MockBatchClientError>::default()
            .build()
            .unwrap();

        // Insert metadata referencing a nonexistent output file in the mock
        let tmpdir = tempdir().unwrap();
        let metadata_path = tmpdir.path().join("metadata.json");
        let metadata = BatchMetadataBuilder::default()
            .batch_id("batch_out_file_not_found")
            .input_file_id("some_input".to_string())
            .output_file_id(Some("out_file_that_does_not_exist".to_string()))
            .error_file_id(None)
            .build()
            .unwrap();
        metadata.save_to_file(&metadata_path).await.unwrap();

        let mut triple = BatchFileTriple::new_for_test_with_metadata_path(metadata_path.clone());
        triple.set_metadata_path(Some(metadata_path.clone()));

        // ephemeral path
        let out_path = tmpdir.path().join("output_file.json");
        triple.set_output_path(Some(out_path.clone()));

        let result = triple.download_output_file(&mock_client).await;
        debug!("Result from download_output_file: {:?}", result);

        assert!(
            result.is_err(),
            "Should fail if the mock client cannot find the output file_id"
        );
        info!("test_download_output_file_client_file_not_found passed");
    }

    #[traced_test]
    async fn test_download_output_file_io_write_error() {
        info!("Beginning test_download_output_file_io_write_error");
        let mock_client = MockLanguageModelClientBuilder::<MockBatchClientError>::default()
            .build()
            .unwrap();

        // Put a real file in the mock, so the initial content retrieval works
        let output_file_id = "some_out_file_id_for_io_error";
        {
            let mut files_guard = mock_client.files().write().unwrap();
            files_guard.insert(output_file_id.to_string(), Bytes::from("output content"));
        }

        // We'll create one tempdir for metadata (fully writable),
        // and a separate read-only tempdir for the actual output file attempt:
        let tmpdir_meta = tempdir().unwrap();
        let tmpdir_readonly = tempdir().unwrap();

        let metadata_path = tmpdir_meta.path().join("metadata.json");
        let metadata = BatchMetadataBuilder::default()
            .batch_id("batch_io_error")
            .input_file_id("some_input".to_string())
            .output_file_id(Some(output_file_id.to_string()))
            .error_file_id(None)
            .build()
            .unwrap();
        metadata.save_to_file(&metadata_path).await.unwrap();
        debug!("Metadata saved at {:?}", metadata_path);

        // Create the triple, referencing that metadata
        let mut triple = BatchFileTriple::new_for_test_with_metadata_path(metadata_path.clone());
        triple.set_metadata_path(Some(metadata_path.clone()));

        // We forcibly set the "output file" path to the read-only dir
        let out_path = tmpdir_readonly.path().join("output.json");
        triple.set_output_path(Some(out_path.clone()));

        // Now make the directory read-only, to cause an I/O error on write:
        let mut perms = fs::metadata(tmpdir_readonly.path()).unwrap().permissions();
        perms.set_readonly(true);
        fs::set_permissions(tmpdir_readonly.path(), perms).unwrap();

        let result = triple.download_output_file(&mock_client).await;
        debug!("Result from download_output_file: {:?}", result);

        // Revert permissions so the tempdir can be cleaned up
        let mut perms = fs::metadata(tmpdir_readonly.path()).unwrap().permissions();
        perms.set_readonly(false);
        fs::set_permissions(tmpdir_readonly.path(), perms).unwrap();

        assert!(
            result.is_err(),
            "Should fail with an I/O error when the directory is read-only"
        );
        info!("test_download_output_file_io_write_error passed");
    }
}