use anyhow::Result;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;
#[derive(Debug, Clone)]
pub struct DataExportManager {
config: ExportConfig,
active_jobs: HashMap<Uuid, ExportJob>,
export_history: Vec<ExportRecord>,
supported_formats: Vec<ExportFormat>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportConfig {
pub default_directory: String,
pub max_file_size: u64,
pub enable_compression: bool,
pub default_format: ExportFormat,
pub include_metadata: bool,
pub templates: Vec<ExportTemplate>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportJob {
pub id: Uuid,
pub name: String,
pub format: ExportFormat,
pub output_path: String,
pub status: ExportStatus,
pub progress: f64,
pub started_at: DateTime<Utc>,
pub completed_at: Option<DateTime<Utc>>,
pub data_size: u64,
pub error_message: Option<String>,
pub options: ExportOptions,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportRecord {
pub id: Uuid,
pub job_id: Uuid,
pub timestamp: DateTime<Utc>,
pub file_path: String,
pub file_size: u64,
pub format: ExportFormat,
pub success: bool,
pub duration: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportTemplate {
pub id: String,
pub name: String,
pub description: String,
pub format: ExportFormat,
pub options: ExportOptions,
pub filters: DataFilters,
pub tags: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportOptions {
pub include_headers: bool,
pub date_format: String,
pub float_precision: u32,
pub separator: String,
pub compression_level: u32,
pub include_metadata: bool,
pub custom_options: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataFilters {
pub date_range: Option<DateRange>,
pub data_types: Vec<DataType>,
pub exclude_fields: Vec<String>,
pub include_fields: Option<Vec<String>>,
pub custom_filters: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DateRange {
pub start: DateTime<Utc>,
pub end: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Hash, Eq)]
pub enum ExportFormat {
Csv,
Excel,
Json,
JsonPretty,
Hdf5,
Parquet,
Xml,
Yaml,
Sqlite,
MessagePack,
Arrow,
Custom(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ExportStatus {
Pending,
InProgress,
Completed,
Failed,
Cancelled,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DataType {
TensorData,
GradientData,
PerformanceMetrics,
MemoryProfiles,
ActivityLogs,
AnnotationData,
CommentData,
ModelDiagnostics,
TrainingDynamics,
ArchitectureAnalysis,
Custom(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportableData {
pub id: Uuid,
pub name: String,
pub data_type: DataType,
pub timestamp: DateTime<Utc>,
pub content: ExportDataContent,
pub metadata: HashMap<String, serde_json::Value>,
pub size: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ExportDataContent {
Table(TableData),
TimeSeries(TimeSeriesData),
KeyValue(HashMap<String, serde_json::Value>),
Structured(serde_json::Value),
Binary(Vec<u8>),
Text(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TableData {
pub headers: Vec<String>,
pub rows: Vec<Vec<serde_json::Value>>,
pub column_types: HashMap<String, ColumnType>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimeSeriesData {
pub timestamps: Vec<DateTime<Utc>>,
pub series: HashMap<String, Vec<f64>>,
pub metadata: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ColumnType {
Integer,
Float,
String,
Boolean,
DateTime,
Binary,
}
impl DataExportManager {
pub fn new(config: ExportConfig) -> Self {
let supported_formats = vec![
ExportFormat::Csv,
ExportFormat::Excel,
ExportFormat::Json,
ExportFormat::JsonPretty,
ExportFormat::Xml,
ExportFormat::Yaml,
ExportFormat::Sqlite,
];
Self {
config,
active_jobs: HashMap::new(),
export_history: Vec::new(),
supported_formats,
}
}
pub fn start_export(
&mut self,
name: String,
data: Vec<ExportableData>,
format: ExportFormat,
output_path: String,
options: ExportOptions,
) -> Result<Uuid> {
let job_id = Uuid::new_v4();
let data_size: u64 = data.iter().map(|d| d.size).sum();
if data_size > self.config.max_file_size {
return Err(anyhow::anyhow!("Data size exceeds maximum file size limit"));
}
let job = ExportJob {
id: job_id,
name: name.clone(),
format: format.clone(),
output_path: output_path.clone(),
status: ExportStatus::Pending,
progress: 0.0,
started_at: Utc::now(),
completed_at: None,
data_size,
error_message: None,
options: options.clone(),
};
self.active_jobs.insert(job_id, job);
self.execute_export(job_id, data, options)?;
Ok(job_id)
}
fn execute_export(
&mut self,
job_id: Uuid,
data: Vec<ExportableData>,
options: ExportOptions,
) -> Result<()> {
let (format, output_path) = {
if let Some(job) = self.active_jobs.get_mut(&job_id) {
job.status = ExportStatus::InProgress;
(job.format.clone(), job.output_path.clone())
} else {
return Err(anyhow::anyhow!("Export job not found"));
}
};
let result = match format {
ExportFormat::Csv => self.export_csv(&data, &output_path, &options),
ExportFormat::Json => self.export_json(&data, &output_path, &options),
ExportFormat::JsonPretty => self.export_json_pretty(&data, &output_path, &options),
ExportFormat::Excel => self.export_excel(&data, &output_path, &options),
ExportFormat::Xml => self.export_xml(&data, &output_path, &options),
ExportFormat::Yaml => self.export_yaml(&data, &output_path, &options),
ExportFormat::Sqlite => self.export_sqlite(&data, &output_path, &options),
_ => Err(anyhow::anyhow!("Format not yet implemented")),
};
if let Some(job) = self.active_jobs.get_mut(&job_id) {
match result {
Ok(_) => {
job.status = ExportStatus::Completed;
job.progress = 100.0;
job.completed_at = Some(Utc::now());
let job_copy = job.clone();
self.add_export_record(&job_copy);
},
Err(e) => {
job.status = ExportStatus::Failed;
job.error_message = Some(e.to_string());
},
}
}
Ok(())
}
fn export_csv(
&mut self,
data: &[ExportableData],
output_path: &str,
options: &ExportOptions,
) -> Result<()> {
use std::fs::File;
use std::io::Write;
let mut file = File::create(output_path)?;
for item in data {
match &item.content {
ExportDataContent::Table(table_data) => {
if options.include_headers {
let header_line = table_data.headers.join(&options.separator);
writeln!(file, "{}", header_line)?;
}
for row in &table_data.rows {
let row_values: Vec<String> =
row.iter().map(|v| self.format_value_for_csv(v, options)).collect();
let row_line = row_values.join(&options.separator);
writeln!(file, "{}", row_line)?;
}
},
ExportDataContent::TimeSeries(ts_data) => {
if options.include_headers {
let mut headers = vec!["timestamp".to_string()];
headers.extend(ts_data.series.keys().cloned());
let header_line = headers.join(&options.separator);
writeln!(file, "{}", header_line)?;
}
for (i, timestamp) in ts_data.timestamps.iter().enumerate() {
let mut row = vec![timestamp.format(&options.date_format).to_string()];
for series_name in ts_data.series.keys() {
if let Some(series) = ts_data.series.get(series_name) {
if let Some(value) = series.get(i) {
row.push(format!(
"{:.precision$}",
value,
precision = options.float_precision as usize
));
} else {
row.push("".to_string());
}
}
}
let row_line = row.join(&options.separator);
writeln!(file, "{}", row_line)?;
}
},
_ => {
let json_str = serde_json::to_string(&item.content)?;
writeln!(file, "{}", json_str)?;
},
}
}
Ok(())
}
fn export_json(
&mut self,
data: &[ExportableData],
output_path: &str,
_options: &ExportOptions,
) -> Result<()> {
use std::fs::File;
let file = File::create(output_path)?;
serde_json::to_writer(file, data)?;
Ok(())
}
fn export_json_pretty(
&mut self,
data: &[ExportableData],
output_path: &str,
_options: &ExportOptions,
) -> Result<()> {
use std::fs::File;
let file = File::create(output_path)?;
serde_json::to_writer_pretty(file, data)?;
Ok(())
}
fn export_excel(
&mut self,
data: &[ExportableData],
output_path: &str,
options: &ExportOptions,
) -> Result<()> {
self.export_csv(data, output_path, options)
}
fn export_xml(
&mut self,
data: &[ExportableData],
output_path: &str,
_options: &ExportOptions,
) -> Result<()> {
use std::fs::File;
use std::io::Write;
let mut file = File::create(output_path)?;
writeln!(file, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
writeln!(file, "<export_data>")?;
for item in data {
writeln!(
file,
" <data_item id=\"{}\" type=\"{:?}\">",
item.id, item.data_type
)?;
writeln!(file, " <name>{}</name>", item.name)?;
writeln!(
file,
" <timestamp>{}</timestamp>",
item.timestamp.to_rfc3339()
)?;
writeln!(file, " <size>{}</size>", item.size)?;
let content_json = serde_json::to_string(&item.content)?;
writeln!(file, " <content><![CDATA[{}]]></content>", content_json)?;
writeln!(file, " </data_item>")?;
}
writeln!(file, "</export_data>")?;
Ok(())
}
fn export_yaml(
&mut self,
data: &[ExportableData],
output_path: &str,
_options: &ExportOptions,
) -> Result<()> {
use std::fs::File;
let file = File::create(output_path)?;
serde_json::to_writer_pretty(file, data)?;
Ok(())
}
fn export_sqlite(
&mut self,
data: &[ExportableData],
output_path: &str,
_options: &ExportOptions,
) -> Result<()> {
self.export_json(data, output_path, _options)
}
fn format_value_for_csv(&self, value: &serde_json::Value, options: &ExportOptions) -> String {
match value {
serde_json::Value::Number(n) => {
if let Some(f) = n.as_f64() {
format!(
"{:.precision$}",
f,
precision = options.float_precision as usize
)
} else {
n.to_string()
}
},
serde_json::Value::String(s) => {
if s.contains(',') || s.contains('"') || s.contains('\n') {
format!("\"{}\"", s.replace('"', "\"\""))
} else {
s.clone()
}
},
_ => value.to_string(),
}
}
fn add_export_record(&mut self, job: &ExportJob) {
let record = ExportRecord {
id: Uuid::new_v4(),
job_id: job.id,
timestamp: Utc::now(),
file_path: job.output_path.clone(),
file_size: job.data_size,
format: job.format.clone(),
success: matches!(job.status, ExportStatus::Completed),
duration: job
.completed_at
.map(|end| (end - job.started_at).num_milliseconds() as f64 / 1000.0)
.unwrap_or(0.0),
};
self.export_history.push(record);
}
pub fn get_job_status(&self, job_id: Uuid) -> Option<&ExportJob> {
self.active_jobs.get(&job_id)
}
pub fn get_export_history(&self) -> &[ExportRecord] {
&self.export_history
}
pub fn create_template(
&mut self,
name: String,
description: String,
format: ExportFormat,
options: ExportOptions,
filters: DataFilters,
tags: Vec<String>,
) -> String {
let template_id = Uuid::new_v4().to_string();
let template = ExportTemplate {
id: template_id.clone(),
name,
description,
format,
options,
filters,
tags,
};
self.config.templates.push(template);
template_id
}
pub fn apply_template(
&self,
template_id: &str,
) -> Option<(&ExportFormat, &ExportOptions, &DataFilters)> {
self.config
.templates
.iter()
.find(|t| t.id == template_id)
.map(|t| (&t.format, &t.options, &t.filters))
}
pub fn get_supported_formats(&self) -> &[ExportFormat] {
&self.supported_formats
}
pub fn cancel_job(&mut self, job_id: Uuid) -> Result<()> {
if let Some(job) = self.active_jobs.get_mut(&job_id) {
if matches!(job.status, ExportStatus::Pending | ExportStatus::InProgress) {
job.status = ExportStatus::Cancelled;
Ok(())
} else {
Err(anyhow::anyhow!("Job cannot be cancelled in current status"))
}
} else {
Err(anyhow::anyhow!("Job not found"))
}
}
pub fn get_export_statistics(&self) -> ExportStatistics {
let total_exports = self.export_history.len();
let successful_exports = self.export_history.iter().filter(|r| r.success).count();
let total_size: u64 = self.export_history.iter().map(|r| r.file_size).sum();
let avg_duration = if total_exports > 0 {
self.export_history.iter().map(|r| r.duration).sum::<f64>() / total_exports as f64
} else {
0.0
};
let format_stats: HashMap<ExportFormat, usize> =
self.export_history.iter().fold(HashMap::new(), |mut acc, record| {
*acc.entry(record.format.clone()).or_insert(0) += 1;
acc
});
ExportStatistics {
total_exports,
successful_exports,
failed_exports: total_exports - successful_exports,
total_size_bytes: total_size,
average_duration_seconds: avg_duration,
format_statistics: format_stats,
active_jobs: self.active_jobs.len(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportStatistics {
pub total_exports: usize,
pub successful_exports: usize,
pub failed_exports: usize,
pub total_size_bytes: u64,
pub average_duration_seconds: f64,
pub format_statistics: HashMap<ExportFormat, usize>,
pub active_jobs: usize,
}
impl Default for ExportConfig {
fn default() -> Self {
Self {
default_directory: "./exports".to_string(),
max_file_size: 1024 * 1024 * 1024, enable_compression: true,
default_format: ExportFormat::Json,
include_metadata: true,
templates: Vec::new(),
}
}
}
impl Default for ExportOptions {
fn default() -> Self {
Self {
include_headers: true,
date_format: "%Y-%m-%d %H:%M:%S UTC".to_string(),
float_precision: 6,
separator: ",".to_string(),
compression_level: 6,
include_metadata: true,
custom_options: HashMap::new(),
}
}
}
impl Default for DataFilters {
fn default() -> Self {
Self {
date_range: None,
data_types: vec![
DataType::TensorData,
DataType::GradientData,
DataType::PerformanceMetrics,
],
exclude_fields: Vec::new(),
include_fields: None,
custom_filters: HashMap::new(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[allow(clippy::approx_constant)]
fn create_test_data() -> Vec<ExportableData> {
let table_data = TableData {
headers: vec![
"id".to_string(),
"value".to_string(),
"timestamp".to_string(),
],
rows: vec![
vec![
serde_json::Value::Number(serde_json::Number::from(1)),
serde_json::Value::Number(
serde_json::Number::from_f64(3.14).expect("operation failed in test"),
),
serde_json::Value::String("2023-01-01T12:00:00Z".to_string()),
],
vec![
serde_json::Value::Number(serde_json::Number::from(2)),
serde_json::Value::Number(
serde_json::Number::from_f64(2.71).expect("operation failed in test"),
),
serde_json::Value::String("2023-01-01T12:01:00Z".to_string()),
],
],
column_types: HashMap::new(),
};
vec![ExportableData {
id: Uuid::new_v4(),
name: "Test Data".to_string(),
data_type: DataType::TensorData,
timestamp: Utc::now(),
content: ExportDataContent::Table(table_data),
metadata: HashMap::new(),
size: 1024,
}]
}
#[test]
fn test_export_manager_creation() {
let config = ExportConfig::default();
let manager = DataExportManager::new(config);
assert!(manager.get_supported_formats().contains(&ExportFormat::Json));
assert!(manager.get_supported_formats().contains(&ExportFormat::Csv));
}
#[test]
fn test_csv_export() {
let config = ExportConfig::default();
let mut manager = DataExportManager::new(config);
let test_data = create_test_data();
let temp_dir = tempdir().expect("temp file creation failed");
let output_path = temp_dir.path().join("test.csv").to_string_lossy().to_string();
let job_id = manager
.start_export(
"Test CSV Export".to_string(),
test_data,
ExportFormat::Csv,
output_path.clone(),
ExportOptions::default(),
)
.expect("operation failed in test");
assert!(manager.active_jobs.contains_key(&job_id));
assert!(std::path::Path::new(&output_path).exists());
}
#[test]
fn test_json_export() {
let config = ExportConfig::default();
let mut manager = DataExportManager::new(config);
let test_data = create_test_data();
let temp_dir = tempdir().expect("temp file creation failed");
let output_path = temp_dir.path().join("test.json").to_string_lossy().to_string();
let job_id = manager
.start_export(
"Test JSON Export".to_string(),
test_data,
ExportFormat::Json,
output_path.clone(),
ExportOptions::default(),
)
.expect("operation failed in test");
assert!(manager.active_jobs.contains_key(&job_id));
assert!(std::path::Path::new(&output_path).exists());
}
#[test]
fn test_export_template() {
let config = ExportConfig::default();
let mut manager = DataExportManager::new(config);
let template_id = manager.create_template(
"CSV Template".to_string(),
"Standard CSV export".to_string(),
ExportFormat::Csv,
ExportOptions::default(),
DataFilters::default(),
vec!["csv".to_string(), "standard".to_string()],
);
let (format, options, _filters) =
manager.apply_template(&template_id).expect("temp file creation failed");
assert_eq!(*format, ExportFormat::Csv);
assert!(options.include_headers);
}
#[test]
fn test_export_statistics() {
let config = ExportConfig::default();
let mut manager = DataExportManager::new(config);
manager.export_history.push(ExportRecord {
id: Uuid::new_v4(),
job_id: Uuid::new_v4(),
timestamp: Utc::now(),
file_path: "test1.csv".to_string(),
file_size: 1024,
format: ExportFormat::Csv,
success: true,
duration: 2.5,
});
manager.export_history.push(ExportRecord {
id: Uuid::new_v4(),
job_id: Uuid::new_v4(),
timestamp: Utc::now(),
file_path: "test2.json".to_string(),
file_size: 2048,
format: ExportFormat::Json,
success: true,
duration: 1.8,
});
let stats = manager.get_export_statistics();
assert_eq!(stats.total_exports, 2);
assert_eq!(stats.successful_exports, 2);
assert_eq!(stats.total_size_bytes, 3072);
}
}