1use anyhow::Result;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use uuid::Uuid;
11
12#[derive(Debug, Clone)]
14pub struct DataExportManager {
15 config: ExportConfig,
17 active_jobs: HashMap<Uuid, ExportJob>,
19 export_history: Vec<ExportRecord>,
21 supported_formats: Vec<ExportFormat>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ExportConfig {
28 pub default_directory: String,
30 pub max_file_size: u64,
32 pub enable_compression: bool,
34 pub default_format: ExportFormat,
36 pub include_metadata: bool,
38 pub templates: Vec<ExportTemplate>,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ExportJob {
45 pub id: Uuid,
47 pub name: String,
49 pub format: ExportFormat,
51 pub output_path: String,
53 pub status: ExportStatus,
55 pub progress: f64,
57 pub started_at: DateTime<Utc>,
59 pub completed_at: Option<DateTime<Utc>>,
61 pub data_size: u64,
63 pub error_message: Option<String>,
65 pub options: ExportOptions,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct ExportRecord {
72 pub id: Uuid,
74 pub job_id: Uuid,
76 pub timestamp: DateTime<Utc>,
78 pub file_path: String,
80 pub file_size: u64,
82 pub format: ExportFormat,
84 pub success: bool,
86 pub duration: f64,
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct ExportTemplate {
93 pub id: String,
95 pub name: String,
97 pub description: String,
99 pub format: ExportFormat,
101 pub options: ExportOptions,
103 pub filters: DataFilters,
105 pub tags: Vec<String>,
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ExportOptions {
112 pub include_headers: bool,
114 pub date_format: String,
116 pub float_precision: u32,
118 pub separator: String,
120 pub compression_level: u32,
122 pub include_metadata: bool,
124 pub custom_options: HashMap<String, serde_json::Value>,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct DataFilters {
131 pub date_range: Option<DateRange>,
133 pub data_types: Vec<DataType>,
135 pub exclude_fields: Vec<String>,
137 pub include_fields: Option<Vec<String>>,
139 pub custom_filters: HashMap<String, serde_json::Value>,
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct DateRange {
146 pub start: DateTime<Utc>,
148 pub end: DateTime<Utc>,
150}
151
152#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Hash, Eq)]
154pub enum ExportFormat {
155 Csv,
157 Excel,
159 Json,
161 JsonPretty,
163 Hdf5,
165 Parquet,
167 Xml,
169 Yaml,
171 Sqlite,
173 MessagePack,
175 Arrow,
177 Custom(String),
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize)]
183pub enum ExportStatus {
184 Pending,
185 InProgress,
186 Completed,
187 Failed,
188 Cancelled,
189}
190
191#[derive(Debug, Clone, Serialize, Deserialize)]
193pub enum DataType {
194 TensorData,
195 GradientData,
196 PerformanceMetrics,
197 MemoryProfiles,
198 ActivityLogs,
199 AnnotationData,
200 CommentData,
201 ModelDiagnostics,
202 TrainingDynamics,
203 ArchitectureAnalysis,
204 Custom(String),
205}
206
207#[derive(Debug, Clone, Serialize, Deserialize)]
209pub struct ExportableData {
210 pub id: Uuid,
212 pub name: String,
214 pub data_type: DataType,
216 pub timestamp: DateTime<Utc>,
218 pub content: ExportDataContent,
220 pub metadata: HashMap<String, serde_json::Value>,
222 pub size: u64,
224}
225
226#[derive(Debug, Clone, Serialize, Deserialize)]
228pub enum ExportDataContent {
229 Table(TableData),
231 TimeSeries(TimeSeriesData),
233 KeyValue(HashMap<String, serde_json::Value>),
235 Structured(serde_json::Value),
237 Binary(Vec<u8>),
239 Text(String),
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct TableData {
246 pub headers: Vec<String>,
248 pub rows: Vec<Vec<serde_json::Value>>,
250 pub column_types: HashMap<String, ColumnType>,
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct TimeSeriesData {
257 pub timestamps: Vec<DateTime<Utc>>,
259 pub series: HashMap<String, Vec<f64>>,
261 pub metadata: HashMap<String, String>,
263}
264
265#[derive(Debug, Clone, Serialize, Deserialize)]
267pub enum ColumnType {
268 Integer,
269 Float,
270 String,
271 Boolean,
272 DateTime,
273 Binary,
274}
275
276impl DataExportManager {
277 pub fn new(config: ExportConfig) -> Self {
279 let supported_formats = vec![
280 ExportFormat::Csv,
281 ExportFormat::Excel,
282 ExportFormat::Json,
283 ExportFormat::JsonPretty,
284 ExportFormat::Xml,
285 ExportFormat::Yaml,
286 ExportFormat::Sqlite,
287 ];
288
289 Self {
290 config,
291 active_jobs: HashMap::new(),
292 export_history: Vec::new(),
293 supported_formats,
294 }
295 }
296
297 pub fn start_export(
299 &mut self,
300 name: String,
301 data: Vec<ExportableData>,
302 format: ExportFormat,
303 output_path: String,
304 options: ExportOptions,
305 ) -> Result<Uuid> {
306 let job_id = Uuid::new_v4();
307
308 let data_size: u64 = data.iter().map(|d| d.size).sum();
310
311 if data_size > self.config.max_file_size {
313 return Err(anyhow::anyhow!("Data size exceeds maximum file size limit"));
314 }
315
316 let job = ExportJob {
317 id: job_id,
318 name: name.clone(),
319 format: format.clone(),
320 output_path: output_path.clone(),
321 status: ExportStatus::Pending,
322 progress: 0.0,
323 started_at: Utc::now(),
324 completed_at: None,
325 data_size,
326 error_message: None,
327 options: options.clone(),
328 };
329
330 self.active_jobs.insert(job_id, job);
331
332 self.execute_export(job_id, data, options)?;
334
335 Ok(job_id)
336 }
337
338 fn execute_export(
340 &mut self,
341 job_id: Uuid,
342 data: Vec<ExportableData>,
343 options: ExportOptions,
344 ) -> Result<()> {
345 let (format, output_path) = {
347 if let Some(job) = self.active_jobs.get_mut(&job_id) {
348 job.status = ExportStatus::InProgress;
349 (job.format.clone(), job.output_path.clone())
350 } else {
351 return Err(anyhow::anyhow!("Export job not found"));
352 }
353 };
354
355 let result = match format {
356 ExportFormat::Csv => self.export_csv(&data, &output_path, &options),
357 ExportFormat::Json => self.export_json(&data, &output_path, &options),
358 ExportFormat::JsonPretty => self.export_json_pretty(&data, &output_path, &options),
359 ExportFormat::Excel => self.export_excel(&data, &output_path, &options),
360 ExportFormat::Xml => self.export_xml(&data, &output_path, &options),
361 ExportFormat::Yaml => self.export_yaml(&data, &output_path, &options),
362 ExportFormat::Sqlite => self.export_sqlite(&data, &output_path, &options),
363 _ => Err(anyhow::anyhow!("Format not yet implemented")),
364 };
365
366 if let Some(job) = self.active_jobs.get_mut(&job_id) {
368 match result {
369 Ok(_) => {
370 job.status = ExportStatus::Completed;
371 job.progress = 100.0;
372 job.completed_at = Some(Utc::now());
373
374 let job_copy = job.clone();
376 self.add_export_record(&job_copy);
377 },
378 Err(e) => {
379 job.status = ExportStatus::Failed;
380 job.error_message = Some(e.to_string());
381 },
382 }
383 }
384
385 Ok(())
386 }
387
388 fn export_csv(
390 &mut self,
391 data: &[ExportableData],
392 output_path: &str,
393 options: &ExportOptions,
394 ) -> Result<()> {
395 use std::fs::File;
396 use std::io::Write;
397
398 let mut file = File::create(output_path)?;
399
400 for item in data {
401 match &item.content {
402 ExportDataContent::Table(table_data) => {
403 if options.include_headers {
405 let header_line = table_data.headers.join(&options.separator);
406 writeln!(file, "{}", header_line)?;
407 }
408
409 for row in &table_data.rows {
411 let row_values: Vec<String> =
412 row.iter().map(|v| self.format_value_for_csv(v, options)).collect();
413 let row_line = row_values.join(&options.separator);
414 writeln!(file, "{}", row_line)?;
415 }
416 },
417 ExportDataContent::TimeSeries(ts_data) => {
418 if options.include_headers {
420 let mut headers = vec!["timestamp".to_string()];
421 headers.extend(ts_data.series.keys().cloned());
422 let header_line = headers.join(&options.separator);
423 writeln!(file, "{}", header_line)?;
424 }
425
426 for (i, timestamp) in ts_data.timestamps.iter().enumerate() {
427 let mut row = vec![timestamp.format(&options.date_format).to_string()];
428 for series_name in ts_data.series.keys() {
429 if let Some(series) = ts_data.series.get(series_name) {
430 if let Some(value) = series.get(i) {
431 row.push(format!(
432 "{:.precision$}",
433 value,
434 precision = options.float_precision as usize
435 ));
436 } else {
437 row.push("".to_string());
438 }
439 }
440 }
441 let row_line = row.join(&options.separator);
442 writeln!(file, "{}", row_line)?;
443 }
444 },
445 _ => {
446 let json_str = serde_json::to_string(&item.content)?;
448 writeln!(file, "{}", json_str)?;
449 },
450 }
451 }
452
453 Ok(())
454 }
455
456 fn export_json(
458 &mut self,
459 data: &[ExportableData],
460 output_path: &str,
461 _options: &ExportOptions,
462 ) -> Result<()> {
463 use std::fs::File;
464
465 let file = File::create(output_path)?;
466 serde_json::to_writer(file, data)?;
467 Ok(())
468 }
469
470 fn export_json_pretty(
472 &mut self,
473 data: &[ExportableData],
474 output_path: &str,
475 _options: &ExportOptions,
476 ) -> Result<()> {
477 use std::fs::File;
478
479 let file = File::create(output_path)?;
480 serde_json::to_writer_pretty(file, data)?;
481 Ok(())
482 }
483
484 fn export_excel(
486 &mut self,
487 data: &[ExportableData],
488 output_path: &str,
489 options: &ExportOptions,
490 ) -> Result<()> {
491 self.export_csv(data, output_path, options)
496 }
497
498 fn export_xml(
500 &mut self,
501 data: &[ExportableData],
502 output_path: &str,
503 _options: &ExportOptions,
504 ) -> Result<()> {
505 use std::fs::File;
506 use std::io::Write;
507
508 let mut file = File::create(output_path)?;
509
510 writeln!(file, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
511 writeln!(file, "<export_data>")?;
512
513 for item in data {
514 writeln!(
515 file,
516 " <data_item id=\"{}\" type=\"{:?}\">",
517 item.id, item.data_type
518 )?;
519 writeln!(file, " <name>{}</name>", item.name)?;
520 writeln!(
521 file,
522 " <timestamp>{}</timestamp>",
523 item.timestamp.to_rfc3339()
524 )?;
525 writeln!(file, " <size>{}</size>", item.size)?;
526
527 let content_json = serde_json::to_string(&item.content)?;
529 writeln!(file, " <content><![CDATA[{}]]></content>", content_json)?;
530
531 writeln!(file, " </data_item>")?;
532 }
533
534 writeln!(file, "</export_data>")?;
535 Ok(())
536 }
537
538 fn export_yaml(
540 &mut self,
541 data: &[ExportableData],
542 output_path: &str,
543 _options: &ExportOptions,
544 ) -> Result<()> {
545 use std::fs::File;
546
547 let file = File::create(output_path)?;
548 serde_json::to_writer_pretty(file, data)?;
549 Ok(())
550 }
551
552 fn export_sqlite(
554 &mut self,
555 data: &[ExportableData],
556 output_path: &str,
557 _options: &ExportOptions,
558 ) -> Result<()> {
559 self.export_json(data, output_path, _options)
562 }
563
564 fn format_value_for_csv(&self, value: &serde_json::Value, options: &ExportOptions) -> String {
566 match value {
567 serde_json::Value::Number(n) => {
568 if let Some(f) = n.as_f64() {
569 format!(
570 "{:.precision$}",
571 f,
572 precision = options.float_precision as usize
573 )
574 } else {
575 n.to_string()
576 }
577 },
578 serde_json::Value::String(s) => {
579 if s.contains(',') || s.contains('"') || s.contains('\n') {
581 format!("\"{}\"", s.replace('"', "\"\""))
582 } else {
583 s.clone()
584 }
585 },
586 _ => value.to_string(),
587 }
588 }
589
590 fn add_export_record(&mut self, job: &ExportJob) {
592 let record = ExportRecord {
593 id: Uuid::new_v4(),
594 job_id: job.id,
595 timestamp: Utc::now(),
596 file_path: job.output_path.clone(),
597 file_size: job.data_size,
598 format: job.format.clone(),
599 success: matches!(job.status, ExportStatus::Completed),
600 duration: job
601 .completed_at
602 .map(|end| (end - job.started_at).num_milliseconds() as f64 / 1000.0)
603 .unwrap_or(0.0),
604 };
605
606 self.export_history.push(record);
607 }
608
609 pub fn get_job_status(&self, job_id: Uuid) -> Option<&ExportJob> {
611 self.active_jobs.get(&job_id)
612 }
613
614 pub fn get_export_history(&self) -> &[ExportRecord] {
616 &self.export_history
617 }
618
619 pub fn create_template(
621 &mut self,
622 name: String,
623 description: String,
624 format: ExportFormat,
625 options: ExportOptions,
626 filters: DataFilters,
627 tags: Vec<String>,
628 ) -> String {
629 let template_id = Uuid::new_v4().to_string();
630
631 let template = ExportTemplate {
632 id: template_id.clone(),
633 name,
634 description,
635 format,
636 options,
637 filters,
638 tags,
639 };
640
641 self.config.templates.push(template);
642 template_id
643 }
644
645 pub fn apply_template(
647 &self,
648 template_id: &str,
649 ) -> Option<(&ExportFormat, &ExportOptions, &DataFilters)> {
650 self.config
651 .templates
652 .iter()
653 .find(|t| t.id == template_id)
654 .map(|t| (&t.format, &t.options, &t.filters))
655 }
656
657 pub fn get_supported_formats(&self) -> &[ExportFormat] {
659 &self.supported_formats
660 }
661
662 pub fn cancel_job(&mut self, job_id: Uuid) -> Result<()> {
664 if let Some(job) = self.active_jobs.get_mut(&job_id) {
665 if matches!(job.status, ExportStatus::Pending | ExportStatus::InProgress) {
666 job.status = ExportStatus::Cancelled;
667 Ok(())
668 } else {
669 Err(anyhow::anyhow!("Job cannot be cancelled in current status"))
670 }
671 } else {
672 Err(anyhow::anyhow!("Job not found"))
673 }
674 }
675
676 pub fn get_export_statistics(&self) -> ExportStatistics {
678 let total_exports = self.export_history.len();
679 let successful_exports = self.export_history.iter().filter(|r| r.success).count();
680 let total_size: u64 = self.export_history.iter().map(|r| r.file_size).sum();
681 let avg_duration = if total_exports > 0 {
682 self.export_history.iter().map(|r| r.duration).sum::<f64>() / total_exports as f64
683 } else {
684 0.0
685 };
686
687 let format_stats: HashMap<ExportFormat, usize> =
688 self.export_history.iter().fold(HashMap::new(), |mut acc, record| {
689 *acc.entry(record.format.clone()).or_insert(0) += 1;
690 acc
691 });
692
693 ExportStatistics {
694 total_exports,
695 successful_exports,
696 failed_exports: total_exports - successful_exports,
697 total_size_bytes: total_size,
698 average_duration_seconds: avg_duration,
699 format_statistics: format_stats,
700 active_jobs: self.active_jobs.len(),
701 }
702 }
703}
704
705#[derive(Debug, Clone, Serialize, Deserialize)]
707pub struct ExportStatistics {
708 pub total_exports: usize,
709 pub successful_exports: usize,
710 pub failed_exports: usize,
711 pub total_size_bytes: u64,
712 pub average_duration_seconds: f64,
713 pub format_statistics: HashMap<ExportFormat, usize>,
714 pub active_jobs: usize,
715}
716
717impl Default for ExportConfig {
718 fn default() -> Self {
719 Self {
720 default_directory: "./exports".to_string(),
721 max_file_size: 1024 * 1024 * 1024, enable_compression: true,
723 default_format: ExportFormat::Json,
724 include_metadata: true,
725 templates: Vec::new(),
726 }
727 }
728}
729
730impl Default for ExportOptions {
731 fn default() -> Self {
732 Self {
733 include_headers: true,
734 date_format: "%Y-%m-%d %H:%M:%S UTC".to_string(),
735 float_precision: 6,
736 separator: ",".to_string(),
737 compression_level: 6,
738 include_metadata: true,
739 custom_options: HashMap::new(),
740 }
741 }
742}
743
744impl Default for DataFilters {
745 fn default() -> Self {
746 Self {
747 date_range: None,
748 data_types: vec![
749 DataType::TensorData,
750 DataType::GradientData,
751 DataType::PerformanceMetrics,
752 ],
753 exclude_fields: Vec::new(),
754 include_fields: None,
755 custom_filters: HashMap::new(),
756 }
757 }
758}
759
760#[cfg(test)]
761mod tests {
762 use super::*;
763 use tempfile::tempdir;
764
765 #[allow(clippy::approx_constant)]
767 fn create_test_data() -> Vec<ExportableData> {
768 let table_data = TableData {
769 headers: vec![
770 "id".to_string(),
771 "value".to_string(),
772 "timestamp".to_string(),
773 ],
774 rows: vec![
775 vec![
776 serde_json::Value::Number(serde_json::Number::from(1)),
777 serde_json::Value::Number(
778 serde_json::Number::from_f64(3.14).expect("operation failed in test"),
779 ),
780 serde_json::Value::String("2023-01-01T12:00:00Z".to_string()),
781 ],
782 vec![
783 serde_json::Value::Number(serde_json::Number::from(2)),
784 serde_json::Value::Number(
785 serde_json::Number::from_f64(2.71).expect("operation failed in test"),
786 ),
787 serde_json::Value::String("2023-01-01T12:01:00Z".to_string()),
788 ],
789 ],
790 column_types: HashMap::new(),
791 };
792
793 vec![ExportableData {
794 id: Uuid::new_v4(),
795 name: "Test Data".to_string(),
796 data_type: DataType::TensorData,
797 timestamp: Utc::now(),
798 content: ExportDataContent::Table(table_data),
799 metadata: HashMap::new(),
800 size: 1024,
801 }]
802 }
803
804 #[test]
805 fn test_export_manager_creation() {
806 let config = ExportConfig::default();
807 let manager = DataExportManager::new(config);
808
809 assert!(manager.get_supported_formats().contains(&ExportFormat::Json));
810 assert!(manager.get_supported_formats().contains(&ExportFormat::Csv));
811 }
812
813 #[test]
814 fn test_csv_export() {
815 let config = ExportConfig::default();
816 let mut manager = DataExportManager::new(config);
817 let test_data = create_test_data();
818
819 let temp_dir = tempdir().expect("temp file creation failed");
820 let output_path = temp_dir.path().join("test.csv").to_string_lossy().to_string();
821
822 let job_id = manager
823 .start_export(
824 "Test CSV Export".to_string(),
825 test_data,
826 ExportFormat::Csv,
827 output_path.clone(),
828 ExportOptions::default(),
829 )
830 .expect("operation failed in test");
831
832 assert!(manager.active_jobs.contains_key(&job_id));
834
835 assert!(std::path::Path::new(&output_path).exists());
837 }
838
839 #[test]
840 fn test_json_export() {
841 let config = ExportConfig::default();
842 let mut manager = DataExportManager::new(config);
843 let test_data = create_test_data();
844
845 let temp_dir = tempdir().expect("temp file creation failed");
846 let output_path = temp_dir.path().join("test.json").to_string_lossy().to_string();
847
848 let job_id = manager
849 .start_export(
850 "Test JSON Export".to_string(),
851 test_data,
852 ExportFormat::Json,
853 output_path.clone(),
854 ExportOptions::default(),
855 )
856 .expect("operation failed in test");
857
858 assert!(manager.active_jobs.contains_key(&job_id));
859 assert!(std::path::Path::new(&output_path).exists());
860 }
861
862 #[test]
863 fn test_export_template() {
864 let config = ExportConfig::default();
865 let mut manager = DataExportManager::new(config);
866
867 let template_id = manager.create_template(
868 "CSV Template".to_string(),
869 "Standard CSV export".to_string(),
870 ExportFormat::Csv,
871 ExportOptions::default(),
872 DataFilters::default(),
873 vec!["csv".to_string(), "standard".to_string()],
874 );
875
876 let (format, options, _filters) =
877 manager.apply_template(&template_id).expect("temp file creation failed");
878 assert_eq!(*format, ExportFormat::Csv);
879 assert!(options.include_headers);
880 }
881
882 #[test]
883 fn test_export_statistics() {
884 let config = ExportConfig::default();
885 let mut manager = DataExportManager::new(config);
886
887 manager.export_history.push(ExportRecord {
889 id: Uuid::new_v4(),
890 job_id: Uuid::new_v4(),
891 timestamp: Utc::now(),
892 file_path: "test1.csv".to_string(),
893 file_size: 1024,
894 format: ExportFormat::Csv,
895 success: true,
896 duration: 2.5,
897 });
898
899 manager.export_history.push(ExportRecord {
900 id: Uuid::new_v4(),
901 job_id: Uuid::new_v4(),
902 timestamp: Utc::now(),
903 file_path: "test2.json".to_string(),
904 file_size: 2048,
905 format: ExportFormat::Json,
906 success: true,
907 duration: 1.8,
908 });
909
910 let stats = manager.get_export_statistics();
911 assert_eq!(stats.total_exports, 2);
912 assert_eq!(stats.successful_exports, 2);
913 assert_eq!(stats.total_size_bytes, 3072);
914 }
915}