use crate::error::{ExcelError, Result};
use crate::streaming_reader::{RowIterator, RowStructIterator, StreamingReader};
#[cfg(feature = "cloud-s3")]
use aws_sdk_s3::error::ProvideErrorMetadata;
#[cfg(feature = "cloud-s3")]
use aws_sdk_s3::Client;
#[cfg(feature = "cloud-s3")]
use std::io::Write;
#[cfg(feature = "cloud-s3")]
use tokio::io::AsyncReadExt;
pub struct S3ExcelReader {
bucket: String,
key: String,
_region: String,
_s3_client: Option<Client>,
_temp_file: Option<tempfile::NamedTempFile>,
streaming_reader: Option<StreamingReader>,
}
impl std::fmt::Debug for S3ExcelReader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("S3ExcelReader")
.field("bucket", &self.bucket)
.field("key", &self.key)
.field("region", &self._region)
.field("has_s3_client", &self._s3_client.is_some())
.field("has_temp_file", &self._temp_file.is_some())
.field("has_streaming_reader", &self.streaming_reader.is_some())
.finish()
}
}
impl S3ExcelReader {
pub fn builder() -> S3ExcelReaderBuilder {
S3ExcelReaderBuilder::default()
}
pub async fn from_s3_client(
s3_client: aws_sdk_s3::Client,
bucket: impl Into<String>,
key: impl Into<String>,
) -> Result<Self> {
let bucket = bucket.into();
let key = key.into();
let get_object_output = s3_client
.get_object()
.bucket(&bucket)
.key(&key)
.send()
.await
.map_err(|e| {
let error_code = e.code().unwrap_or("");
let error_message = e.message().unwrap_or("Unknown error");
match error_code {
"NoSuchKey" => ExcelError::FileNotFound(format!("s3://{}/{}", bucket, key)),
"NoSuchBucket" => {
ExcelError::ReadError(format!("Bucket '{}' does not exist", bucket))
}
"AccessDenied" => ExcelError::ReadError(format!(
"Access denied to s3://{}/{}. Error: {}",
bucket, key, error_message
)),
_ => ExcelError::ReadError(format!(
"S3 GetObject failed ({}): {}",
error_code, error_message
)),
}
})?;
let mut temp_file = tempfile::NamedTempFile::new().map_err(|e| {
ExcelError::IoError(std::io::Error::other(format!(
"Failed to create temp file: {}",
e
)))
})?;
let mut body = get_object_output.body.into_async_read();
let mut buffer = Vec::new();
use tokio::io::AsyncReadExt;
body.read_to_end(&mut buffer)
.await
.map_err(ExcelError::IoError)?;
use std::io::Write;
temp_file.write_all(&buffer).map_err(ExcelError::IoError)?;
temp_file.flush().map_err(ExcelError::IoError)?;
let streaming_reader = StreamingReader::open(temp_file.path())?;
Ok(Self {
bucket,
key,
_region: "custom".to_string(),
_s3_client: Some(s3_client),
_temp_file: Some(temp_file),
streaming_reader: Some(streaming_reader),
})
}
pub fn sheet_names(&self) -> Vec<String> {
self.streaming_reader
.as_ref()
.map(|r| r.sheet_names())
.unwrap_or_default()
}
pub fn rows(&mut self, sheet_name: &str) -> Result<RowStructIterator<'_>> {
self.streaming_reader
.as_mut()
.ok_or_else(|| ExcelError::InvalidState("Reader not initialized".to_string()))?
.rows(sheet_name)
}
pub fn rows_by_index(&mut self, sheet_index: usize) -> Result<RowStructIterator<'_>> {
self.streaming_reader
.as_mut()
.ok_or_else(|| ExcelError::InvalidState("Reader not initialized".to_string()))?
.rows_by_index(sheet_index)
}
pub fn stream_rows(&mut self, sheet_name: &str) -> Result<RowIterator<'_>> {
self.streaming_reader
.as_mut()
.ok_or_else(|| ExcelError::InvalidState("Reader not initialized".to_string()))?
.stream_rows(sheet_name)
}
pub fn dimensions(&mut self, sheet_name: &str) -> Result<(usize, usize)> {
self.streaming_reader
.as_mut()
.ok_or_else(|| ExcelError::InvalidState("Reader not initialized".to_string()))?
.dimensions(sheet_name)
}
pub fn bucket(&self) -> &str {
&self.bucket
}
pub fn key(&self) -> &str {
&self.key
}
}
impl Default for S3ExcelReader {
fn default() -> Self {
Self {
bucket: String::new(),
key: String::new(),
_region: "us-east-1".to_string(),
_s3_client: None,
_temp_file: None,
streaming_reader: None,
}
}
}
pub struct S3ExcelReaderBuilder {
bucket: Option<String>,
key: Option<String>,
region: Option<String>,
endpoint_url: Option<String>,
force_path_style: bool,
}
impl Default for S3ExcelReaderBuilder {
fn default() -> Self {
Self {
bucket: None,
key: None,
region: Some("us-east-1".to_string()),
endpoint_url: None,
force_path_style: false,
}
}
}
impl S3ExcelReaderBuilder {
pub fn bucket(mut self, bucket: impl Into<String>) -> Self {
self.bucket = Some(bucket.into());
self
}
pub fn key(mut self, key: impl Into<String>) -> Self {
self.key = Some(key.into());
self
}
pub fn region(mut self, region: impl Into<String>) -> Self {
self.region = Some(region.into());
self
}
pub fn endpoint_url(mut self, endpoint: impl Into<String>) -> Self {
self.endpoint_url = Some(endpoint.into());
self
}
pub fn force_path_style(mut self, force: bool) -> Self {
self.force_path_style = force;
self
}
#[cfg(feature = "cloud-s3")]
pub async fn build(self) -> Result<S3ExcelReader> {
let bucket = self
.bucket
.ok_or_else(|| ExcelError::InvalidState("Bucket name required".to_string()))?;
let key = self
.key
.ok_or_else(|| ExcelError::InvalidState("Object key required".to_string()))?;
let region_str = self.region.unwrap_or_else(|| "us-east-1".to_string());
let region_provider = aws_sdk_s3::config::Region::new(region_str.clone());
let sdk_config = aws_config::defaults(aws_config::BehaviorVersion::latest())
.region(region_provider)
.load()
.await;
let mut s3_config_builder = aws_sdk_s3::config::Builder::from(&sdk_config);
if let Some(endpoint) = &self.endpoint_url {
s3_config_builder = s3_config_builder.endpoint_url(endpoint);
}
if self.force_path_style {
s3_config_builder = s3_config_builder.force_path_style(true);
}
let s3_client = Client::from_conf(s3_config_builder.build());
Self::build_reader_from_client(s3_client, bucket, key, region_str).await
}
#[cfg(not(feature = "cloud-s3"))]
pub async fn build(self) -> Result<S3ExcelReader> {
Err(ExcelError::InvalidState(
"cloud-s3 feature not enabled".to_string(),
))
}
#[cfg(feature = "cloud-s3")]
pub async fn build_with_client(self, s3_client: Client) -> Result<S3ExcelReader> {
let bucket = self
.bucket
.ok_or_else(|| ExcelError::InvalidState("Bucket name required".to_string()))?;
let key = self
.key
.ok_or_else(|| ExcelError::InvalidState("Object key required".to_string()))?;
let region_str = self.region.unwrap_or_else(|| "us-east-1".to_string());
Self::build_reader_from_client(s3_client, bucket, key, region_str).await
}
#[cfg(not(feature = "cloud-s3"))]
pub async fn build_with_client(self, _s3_client: Client) -> Result<S3ExcelReader> {
Err(ExcelError::InvalidState(
"cloud-s3 feature not enabled".to_string(),
))
}
#[cfg(feature = "cloud-s3")]
async fn download_from_s3(
client: &Client,
bucket: &str,
key: &str,
) -> Result<aws_sdk_s3::operation::get_object::GetObjectOutput> {
client
.get_object()
.bucket(bucket)
.key(key)
.send()
.await
.map_err(|e| {
let error_code = e.code().unwrap_or("");
let error_message = e.message().unwrap_or("Unknown error");
match error_code {
"NoSuchKey" => ExcelError::FileNotFound(format!("s3://{}/{}", bucket, key)),
"NoSuchBucket" => {
ExcelError::ReadError(format!("Bucket '{}' does not exist", bucket))
}
"AccessDenied" => ExcelError::ReadError(format!(
"Access denied to s3://{}/{}. Error: {}",
bucket, key, error_message
)),
_ => ExcelError::ReadError(format!(
"S3 GetObject failed ({}): {}",
error_code, error_message
)),
}
})
}
#[cfg(feature = "cloud-s3")]
async fn create_reader_from_s3_response(
get_object_output: aws_sdk_s3::operation::get_object::GetObjectOutput,
) -> Result<(tempfile::NamedTempFile, StreamingReader)> {
let mut body = get_object_output.body.into_async_read();
let mut buffer = Vec::new();
body.read_to_end(&mut buffer)
.await
.map_err(ExcelError::IoError)?;
let mut temp_file = tempfile::NamedTempFile::new().map_err(ExcelError::IoError)?;
temp_file.write_all(&buffer).map_err(ExcelError::IoError)?;
temp_file.flush().map_err(ExcelError::IoError)?;
let temp_path = temp_file.path().to_path_buf();
let streaming_reader = StreamingReader::open(&temp_path)?;
Ok((temp_file, streaming_reader))
}
#[cfg(feature = "cloud-s3")]
async fn build_reader_from_client(
s3_client: Client,
bucket: String,
key: String,
region_str: String,
) -> Result<S3ExcelReader> {
let get_object_output = Self::download_from_s3(&s3_client, &bucket, &key).await?;
let (temp_file, streaming_reader) =
Self::create_reader_from_s3_response(get_object_output).await?;
Ok(S3ExcelReader {
bucket,
key,
_region: region_str,
_s3_client: Some(s3_client),
_temp_file: Some(temp_file),
streaming_reader: Some(streaming_reader),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use aws_config::BehaviorVersion;
use aws_sdk_s3::config::Region;
#[test]
fn test_builder_validation_missing_bucket() {
let builder = S3ExcelReaderBuilder::default().key("test.xlsx");
let rt = tokio::runtime::Runtime::new().unwrap();
let result = rt.block_on(builder.build());
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("Bucket name required"));
}
#[test]
fn test_builder_validation_missing_key() {
let builder = S3ExcelReaderBuilder::default().bucket("test-bucket");
let rt = tokio::runtime::Runtime::new().unwrap();
let result = rt.block_on(builder.build());
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("Object key required"));
}
#[test]
fn test_default_region() {
let builder = S3ExcelReaderBuilder::default();
assert_eq!(builder.region, Some("us-east-1".to_string()));
}
#[test]
fn test_builder_methods() {
let builder = S3ExcelReaderBuilder::default()
.bucket("my-bucket")
.key("path/to/file.xlsx")
.region("ap-southeast-1");
assert_eq!(builder.bucket, Some("my-bucket".to_string()));
assert_eq!(builder.key, Some("path/to/file.xlsx".to_string()));
assert_eq!(builder.region, Some("ap-southeast-1".to_string()));
}
#[tokio::test]
async fn test_build_with_client() {
let config = aws_config::defaults(BehaviorVersion::latest())
.region(Region::new("us-west-2"))
.load()
.await;
let client = Client::new(&config);
let result = S3ExcelReaderBuilder::default()
.bucket("test-bucket")
.key("test.xlsx")
.build_with_client(client)
.await;
assert!(result.is_err());
}
}