1use anyhow::Result;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use uuid::Uuid;
11
12#[derive(Debug, Clone)]
14pub struct DataExportManager {
15 config: ExportConfig,
17 active_jobs: HashMap<Uuid, ExportJob>,
19 export_history: Vec<ExportRecord>,
21 supported_formats: Vec<ExportFormat>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ExportConfig {
28 pub default_directory: String,
30 pub max_file_size: u64,
32 pub enable_compression: bool,
34 pub default_format: ExportFormat,
36 pub include_metadata: bool,
38 pub templates: Vec<ExportTemplate>,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ExportJob {
45 pub id: Uuid,
47 pub name: String,
49 pub format: ExportFormat,
51 pub output_path: String,
53 pub status: ExportStatus,
55 pub progress: f64,
57 pub started_at: DateTime<Utc>,
59 pub completed_at: Option<DateTime<Utc>>,
61 pub data_size: u64,
63 pub error_message: Option<String>,
65 pub options: ExportOptions,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct ExportRecord {
72 pub id: Uuid,
74 pub job_id: Uuid,
76 pub timestamp: DateTime<Utc>,
78 pub file_path: String,
80 pub file_size: u64,
82 pub format: ExportFormat,
84 pub success: bool,
86 pub duration: f64,
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct ExportTemplate {
93 pub id: String,
95 pub name: String,
97 pub description: String,
99 pub format: ExportFormat,
101 pub options: ExportOptions,
103 pub filters: DataFilters,
105 pub tags: Vec<String>,
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ExportOptions {
112 pub include_headers: bool,
114 pub date_format: String,
116 pub float_precision: u32,
118 pub separator: String,
120 pub compression_level: u32,
122 pub include_metadata: bool,
124 pub custom_options: HashMap<String, serde_json::Value>,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct DataFilters {
131 pub date_range: Option<DateRange>,
133 pub data_types: Vec<DataType>,
135 pub exclude_fields: Vec<String>,
137 pub include_fields: Option<Vec<String>>,
139 pub custom_filters: HashMap<String, serde_json::Value>,
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct DateRange {
146 pub start: DateTime<Utc>,
148 pub end: DateTime<Utc>,
150}
151
152#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Hash, Eq)]
154pub enum ExportFormat {
155 Csv,
157 Excel,
159 Json,
161 JsonPretty,
163 Hdf5,
165 Parquet,
167 Xml,
169 Yaml,
171 Sqlite,
173 MessagePack,
175 Arrow,
177 Custom(String),
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize)]
183pub enum ExportStatus {
184 Pending,
185 InProgress,
186 Completed,
187 Failed,
188 Cancelled,
189}
190
191#[derive(Debug, Clone, Serialize, Deserialize)]
193pub enum DataType {
194 TensorData,
195 GradientData,
196 PerformanceMetrics,
197 MemoryProfiles,
198 ActivityLogs,
199 AnnotationData,
200 CommentData,
201 ModelDiagnostics,
202 TrainingDynamics,
203 ArchitectureAnalysis,
204 Custom(String),
205}
206
207#[derive(Debug, Clone, Serialize, Deserialize)]
209pub struct ExportableData {
210 pub id: Uuid,
212 pub name: String,
214 pub data_type: DataType,
216 pub timestamp: DateTime<Utc>,
218 pub content: ExportDataContent,
220 pub metadata: HashMap<String, serde_json::Value>,
222 pub size: u64,
224}
225
226#[derive(Debug, Clone, Serialize, Deserialize)]
228pub enum ExportDataContent {
229 Table(TableData),
231 TimeSeries(TimeSeriesData),
233 KeyValue(HashMap<String, serde_json::Value>),
235 Structured(serde_json::Value),
237 Binary(Vec<u8>),
239 Text(String),
241}
242
243#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct TableData {
246 pub headers: Vec<String>,
248 pub rows: Vec<Vec<serde_json::Value>>,
250 pub column_types: HashMap<String, ColumnType>,
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct TimeSeriesData {
257 pub timestamps: Vec<DateTime<Utc>>,
259 pub series: HashMap<String, Vec<f64>>,
261 pub metadata: HashMap<String, String>,
263}
264
265#[derive(Debug, Clone, Serialize, Deserialize)]
267pub enum ColumnType {
268 Integer,
269 Float,
270 String,
271 Boolean,
272 DateTime,
273 Binary,
274}
275
276impl DataExportManager {
277 pub fn new(config: ExportConfig) -> Self {
279 let supported_formats = vec![
280 ExportFormat::Csv,
281 ExportFormat::Excel,
282 ExportFormat::Json,
283 ExportFormat::JsonPretty,
284 ExportFormat::Xml,
285 ExportFormat::Yaml,
286 ExportFormat::Sqlite,
287 ];
288
289 Self {
290 config,
291 active_jobs: HashMap::new(),
292 export_history: Vec::new(),
293 supported_formats,
294 }
295 }
296
297 pub fn start_export(
299 &mut self,
300 name: String,
301 data: Vec<ExportableData>,
302 format: ExportFormat,
303 output_path: String,
304 options: ExportOptions,
305 ) -> Result<Uuid> {
306 let job_id = Uuid::new_v4();
307
308 let data_size: u64 = data.iter().map(|d| d.size).sum();
310
311 if data_size > self.config.max_file_size {
313 return Err(anyhow::anyhow!("Data size exceeds maximum file size limit"));
314 }
315
316 let job = ExportJob {
317 id: job_id,
318 name: name.clone(),
319 format: format.clone(),
320 output_path: output_path.clone(),
321 status: ExportStatus::Pending,
322 progress: 0.0,
323 started_at: Utc::now(),
324 completed_at: None,
325 data_size,
326 error_message: None,
327 options: options.clone(),
328 };
329
330 self.active_jobs.insert(job_id, job);
331
332 self.execute_export(job_id, data, options)?;
334
335 Ok(job_id)
336 }
337
338 fn execute_export(
340 &mut self,
341 job_id: Uuid,
342 data: Vec<ExportableData>,
343 options: ExportOptions,
344 ) -> Result<()> {
345 let (format, output_path) = {
347 if let Some(job) = self.active_jobs.get_mut(&job_id) {
348 job.status = ExportStatus::InProgress;
349 (job.format.clone(), job.output_path.clone())
350 } else {
351 return Err(anyhow::anyhow!("Export job not found"));
352 }
353 };
354
355 let result = match format {
356 ExportFormat::Csv => self.export_csv(&data, &output_path, &options),
357 ExportFormat::Json => self.export_json(&data, &output_path, &options),
358 ExportFormat::JsonPretty => self.export_json_pretty(&data, &output_path, &options),
359 ExportFormat::Excel => self.export_excel(&data, &output_path, &options),
360 ExportFormat::Xml => self.export_xml(&data, &output_path, &options),
361 ExportFormat::Yaml => self.export_yaml(&data, &output_path, &options),
362 ExportFormat::Sqlite => self.export_sqlite(&data, &output_path, &options),
363 ExportFormat::Hdf5 => Err(anyhow::anyhow!(
367 "HDF5 export is not supported in this build. Use JSON or CSV instead."
368 )),
369 ExportFormat::Parquet => Err(anyhow::anyhow!(
370 "Parquet export is not supported in this build. Use JSON or CSV instead."
371 )),
372 ExportFormat::MessagePack => Err(anyhow::anyhow!(
373 "MessagePack export is not supported in this build. Use JSON instead."
374 )),
375 ExportFormat::Arrow => Err(anyhow::anyhow!(
376 "Apache Arrow export is not supported in this build. Use JSON or CSV instead."
377 )),
378 ExportFormat::Custom(ref name) => Err(anyhow::anyhow!(
379 "Custom export format '{}' is not registered. \
380 Register a handler or use one of the built-in formats.",
381 name
382 )),
383 };
384
385 if let Some(job) = self.active_jobs.get_mut(&job_id) {
387 match result {
388 Ok(_) => {
389 job.status = ExportStatus::Completed;
390 job.progress = 100.0;
391 job.completed_at = Some(Utc::now());
392
393 let job_copy = job.clone();
395 self.add_export_record(&job_copy);
396 },
397 Err(e) => {
398 job.status = ExportStatus::Failed;
399 job.error_message = Some(e.to_string());
400 },
401 }
402 }
403
404 Ok(())
405 }
406
407 fn export_csv(
409 &mut self,
410 data: &[ExportableData],
411 output_path: &str,
412 options: &ExportOptions,
413 ) -> Result<()> {
414 use std::fs::File;
415 use std::io::Write;
416
417 let mut file = File::create(output_path)?;
418
419 for item in data {
420 match &item.content {
421 ExportDataContent::Table(table_data) => {
422 if options.include_headers {
424 let header_line = table_data.headers.join(&options.separator);
425 writeln!(file, "{}", header_line)?;
426 }
427
428 for row in &table_data.rows {
430 let row_values: Vec<String> =
431 row.iter().map(|v| self.format_value_for_csv(v, options)).collect();
432 let row_line = row_values.join(&options.separator);
433 writeln!(file, "{}", row_line)?;
434 }
435 },
436 ExportDataContent::TimeSeries(ts_data) => {
437 if options.include_headers {
439 let mut headers = vec!["timestamp".to_string()];
440 headers.extend(ts_data.series.keys().cloned());
441 let header_line = headers.join(&options.separator);
442 writeln!(file, "{}", header_line)?;
443 }
444
445 for (i, timestamp) in ts_data.timestamps.iter().enumerate() {
446 let mut row = vec![timestamp.format(&options.date_format).to_string()];
447 for series_name in ts_data.series.keys() {
448 if let Some(series) = ts_data.series.get(series_name) {
449 if let Some(value) = series.get(i) {
450 row.push(format!(
451 "{:.precision$}",
452 value,
453 precision = options.float_precision as usize
454 ));
455 } else {
456 row.push("".to_string());
457 }
458 }
459 }
460 let row_line = row.join(&options.separator);
461 writeln!(file, "{}", row_line)?;
462 }
463 },
464 _ => {
465 let json_str = serde_json::to_string(&item.content)?;
467 writeln!(file, "{}", json_str)?;
468 },
469 }
470 }
471
472 Ok(())
473 }
474
475 fn export_json(
477 &mut self,
478 data: &[ExportableData],
479 output_path: &str,
480 _options: &ExportOptions,
481 ) -> Result<()> {
482 use std::fs::File;
483
484 let file = File::create(output_path)?;
485 serde_json::to_writer(file, data)?;
486 Ok(())
487 }
488
489 fn export_json_pretty(
491 &mut self,
492 data: &[ExportableData],
493 output_path: &str,
494 _options: &ExportOptions,
495 ) -> Result<()> {
496 use std::fs::File;
497
498 let file = File::create(output_path)?;
499 serde_json::to_writer_pretty(file, data)?;
500 Ok(())
501 }
502
503 fn export_excel(
505 &mut self,
506 data: &[ExportableData],
507 output_path: &str,
508 options: &ExportOptions,
509 ) -> Result<()> {
510 self.export_csv(data, output_path, options)
515 }
516
517 fn export_xml(
519 &mut self,
520 data: &[ExportableData],
521 output_path: &str,
522 _options: &ExportOptions,
523 ) -> Result<()> {
524 use std::fs::File;
525 use std::io::Write;
526
527 let mut file = File::create(output_path)?;
528
529 writeln!(file, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
530 writeln!(file, "<export_data>")?;
531
532 for item in data {
533 writeln!(
534 file,
535 " <data_item id=\"{}\" type=\"{:?}\">",
536 item.id, item.data_type
537 )?;
538 writeln!(file, " <name>{}</name>", item.name)?;
539 writeln!(
540 file,
541 " <timestamp>{}</timestamp>",
542 item.timestamp.to_rfc3339()
543 )?;
544 writeln!(file, " <size>{}</size>", item.size)?;
545
546 let content_json = serde_json::to_string(&item.content)?;
548 writeln!(file, " <content><![CDATA[{}]]></content>", content_json)?;
549
550 writeln!(file, " </data_item>")?;
551 }
552
553 writeln!(file, "</export_data>")?;
554 Ok(())
555 }
556
557 fn export_yaml(
559 &mut self,
560 data: &[ExportableData],
561 output_path: &str,
562 _options: &ExportOptions,
563 ) -> Result<()> {
564 use std::fs::File;
565
566 let file = File::create(output_path)?;
567 serde_json::to_writer_pretty(file, data)?;
568 Ok(())
569 }
570
571 fn export_sqlite(
573 &mut self,
574 data: &[ExportableData],
575 output_path: &str,
576 _options: &ExportOptions,
577 ) -> Result<()> {
578 self.export_json(data, output_path, _options)
581 }
582
583 fn format_value_for_csv(&self, value: &serde_json::Value, options: &ExportOptions) -> String {
585 match value {
586 serde_json::Value::Number(n) => {
587 if let Some(f) = n.as_f64() {
588 format!(
589 "{:.precision$}",
590 f,
591 precision = options.float_precision as usize
592 )
593 } else {
594 n.to_string()
595 }
596 },
597 serde_json::Value::String(s) => {
598 if s.contains(',') || s.contains('"') || s.contains('\n') {
600 format!("\"{}\"", s.replace('"', "\"\""))
601 } else {
602 s.clone()
603 }
604 },
605 _ => value.to_string(),
606 }
607 }
608
609 fn add_export_record(&mut self, job: &ExportJob) {
611 let record = ExportRecord {
612 id: Uuid::new_v4(),
613 job_id: job.id,
614 timestamp: Utc::now(),
615 file_path: job.output_path.clone(),
616 file_size: job.data_size,
617 format: job.format.clone(),
618 success: matches!(job.status, ExportStatus::Completed),
619 duration: job
620 .completed_at
621 .map(|end| (end - job.started_at).num_milliseconds() as f64 / 1000.0)
622 .unwrap_or(0.0),
623 };
624
625 self.export_history.push(record);
626 }
627
628 pub fn get_job_status(&self, job_id: Uuid) -> Option<&ExportJob> {
630 self.active_jobs.get(&job_id)
631 }
632
633 pub fn get_export_history(&self) -> &[ExportRecord] {
635 &self.export_history
636 }
637
638 pub fn create_template(
640 &mut self,
641 name: String,
642 description: String,
643 format: ExportFormat,
644 options: ExportOptions,
645 filters: DataFilters,
646 tags: Vec<String>,
647 ) -> String {
648 let template_id = Uuid::new_v4().to_string();
649
650 let template = ExportTemplate {
651 id: template_id.clone(),
652 name,
653 description,
654 format,
655 options,
656 filters,
657 tags,
658 };
659
660 self.config.templates.push(template);
661 template_id
662 }
663
664 pub fn apply_template(
666 &self,
667 template_id: &str,
668 ) -> Option<(&ExportFormat, &ExportOptions, &DataFilters)> {
669 self.config
670 .templates
671 .iter()
672 .find(|t| t.id == template_id)
673 .map(|t| (&t.format, &t.options, &t.filters))
674 }
675
676 pub fn get_supported_formats(&self) -> &[ExportFormat] {
678 &self.supported_formats
679 }
680
681 pub fn cancel_job(&mut self, job_id: Uuid) -> Result<()> {
683 if let Some(job) = self.active_jobs.get_mut(&job_id) {
684 if matches!(job.status, ExportStatus::Pending | ExportStatus::InProgress) {
685 job.status = ExportStatus::Cancelled;
686 Ok(())
687 } else {
688 Err(anyhow::anyhow!("Job cannot be cancelled in current status"))
689 }
690 } else {
691 Err(anyhow::anyhow!("Job not found"))
692 }
693 }
694
695 pub fn get_export_statistics(&self) -> ExportStatistics {
697 let total_exports = self.export_history.len();
698 let successful_exports = self.export_history.iter().filter(|r| r.success).count();
699 let total_size: u64 = self.export_history.iter().map(|r| r.file_size).sum();
700 let avg_duration = if total_exports > 0 {
701 self.export_history.iter().map(|r| r.duration).sum::<f64>() / total_exports as f64
702 } else {
703 0.0
704 };
705
706 let format_stats: HashMap<ExportFormat, usize> =
707 self.export_history.iter().fold(HashMap::new(), |mut acc, record| {
708 *acc.entry(record.format.clone()).or_insert(0) += 1;
709 acc
710 });
711
712 ExportStatistics {
713 total_exports,
714 successful_exports,
715 failed_exports: total_exports - successful_exports,
716 total_size_bytes: total_size,
717 average_duration_seconds: avg_duration,
718 format_statistics: format_stats,
719 active_jobs: self.active_jobs.len(),
720 }
721 }
722}
723
724#[derive(Debug, Clone, Serialize, Deserialize)]
726pub struct ExportStatistics {
727 pub total_exports: usize,
728 pub successful_exports: usize,
729 pub failed_exports: usize,
730 pub total_size_bytes: u64,
731 pub average_duration_seconds: f64,
732 pub format_statistics: HashMap<ExportFormat, usize>,
733 pub active_jobs: usize,
734}
735
736impl Default for ExportConfig {
737 fn default() -> Self {
738 Self {
739 default_directory: "./exports".to_string(),
740 max_file_size: 1024 * 1024 * 1024, enable_compression: true,
742 default_format: ExportFormat::Json,
743 include_metadata: true,
744 templates: Vec::new(),
745 }
746 }
747}
748
749impl Default for ExportOptions {
750 fn default() -> Self {
751 Self {
752 include_headers: true,
753 date_format: "%Y-%m-%d %H:%M:%S UTC".to_string(),
754 float_precision: 6,
755 separator: ",".to_string(),
756 compression_level: 6,
757 include_metadata: true,
758 custom_options: HashMap::new(),
759 }
760 }
761}
762
763impl Default for DataFilters {
764 fn default() -> Self {
765 Self {
766 date_range: None,
767 data_types: vec![
768 DataType::TensorData,
769 DataType::GradientData,
770 DataType::PerformanceMetrics,
771 ],
772 exclude_fields: Vec::new(),
773 include_fields: None,
774 custom_filters: HashMap::new(),
775 }
776 }
777}
778
779#[cfg(test)]
780mod tests {
781 use super::*;
782 use tempfile::tempdir;
783
784 #[allow(clippy::approx_constant)]
786 fn create_test_data() -> Vec<ExportableData> {
787 let table_data = TableData {
788 headers: vec![
789 "id".to_string(),
790 "value".to_string(),
791 "timestamp".to_string(),
792 ],
793 rows: vec![
794 vec![
795 serde_json::Value::Number(serde_json::Number::from(1)),
796 serde_json::Value::Number(
797 serde_json::Number::from_f64(3.14).expect("operation failed in test"),
798 ),
799 serde_json::Value::String("2023-01-01T12:00:00Z".to_string()),
800 ],
801 vec![
802 serde_json::Value::Number(serde_json::Number::from(2)),
803 serde_json::Value::Number(
804 serde_json::Number::from_f64(2.71).expect("operation failed in test"),
805 ),
806 serde_json::Value::String("2023-01-01T12:01:00Z".to_string()),
807 ],
808 ],
809 column_types: HashMap::new(),
810 };
811
812 vec![ExportableData {
813 id: Uuid::new_v4(),
814 name: "Test Data".to_string(),
815 data_type: DataType::TensorData,
816 timestamp: Utc::now(),
817 content: ExportDataContent::Table(table_data),
818 metadata: HashMap::new(),
819 size: 1024,
820 }]
821 }
822
823 #[test]
824 fn test_export_manager_creation() {
825 let config = ExportConfig::default();
826 let manager = DataExportManager::new(config);
827
828 assert!(manager.get_supported_formats().contains(&ExportFormat::Json));
829 assert!(manager.get_supported_formats().contains(&ExportFormat::Csv));
830 }
831
832 #[test]
833 fn test_csv_export() {
834 let config = ExportConfig::default();
835 let mut manager = DataExportManager::new(config);
836 let test_data = create_test_data();
837
838 let temp_dir = tempdir().expect("temp file creation failed");
839 let output_path = temp_dir.path().join("test.csv").to_string_lossy().to_string();
840
841 let job_id = manager
842 .start_export(
843 "Test CSV Export".to_string(),
844 test_data,
845 ExportFormat::Csv,
846 output_path.clone(),
847 ExportOptions::default(),
848 )
849 .expect("operation failed in test");
850
851 assert!(manager.active_jobs.contains_key(&job_id));
853
854 assert!(std::path::Path::new(&output_path).exists());
856 }
857
858 #[test]
859 fn test_json_export() {
860 let config = ExportConfig::default();
861 let mut manager = DataExportManager::new(config);
862 let test_data = create_test_data();
863
864 let temp_dir = tempdir().expect("temp file creation failed");
865 let output_path = temp_dir.path().join("test.json").to_string_lossy().to_string();
866
867 let job_id = manager
868 .start_export(
869 "Test JSON Export".to_string(),
870 test_data,
871 ExportFormat::Json,
872 output_path.clone(),
873 ExportOptions::default(),
874 )
875 .expect("operation failed in test");
876
877 assert!(manager.active_jobs.contains_key(&job_id));
878 assert!(std::path::Path::new(&output_path).exists());
879 }
880
881 #[test]
882 fn test_export_template() {
883 let config = ExportConfig::default();
884 let mut manager = DataExportManager::new(config);
885
886 let template_id = manager.create_template(
887 "CSV Template".to_string(),
888 "Standard CSV export".to_string(),
889 ExportFormat::Csv,
890 ExportOptions::default(),
891 DataFilters::default(),
892 vec!["csv".to_string(), "standard".to_string()],
893 );
894
895 let (format, options, _filters) =
896 manager.apply_template(&template_id).expect("temp file creation failed");
897 assert_eq!(*format, ExportFormat::Csv);
898 assert!(options.include_headers);
899 }
900
901 #[test]
902 fn test_export_statistics() {
903 let config = ExportConfig::default();
904 let mut manager = DataExportManager::new(config);
905
906 manager.export_history.push(ExportRecord {
908 id: Uuid::new_v4(),
909 job_id: Uuid::new_v4(),
910 timestamp: Utc::now(),
911 file_path: "test1.csv".to_string(),
912 file_size: 1024,
913 format: ExportFormat::Csv,
914 success: true,
915 duration: 2.5,
916 });
917
918 manager.export_history.push(ExportRecord {
919 id: Uuid::new_v4(),
920 job_id: Uuid::new_v4(),
921 timestamp: Utc::now(),
922 file_path: "test2.json".to_string(),
923 file_size: 2048,
924 format: ExportFormat::Json,
925 success: true,
926 duration: 1.8,
927 });
928
929 let stats = manager.get_export_statistics();
930 assert_eq!(stats.total_exports, 2);
931 assert_eq!(stats.successful_exports, 2);
932 assert_eq!(stats.total_size_bytes, 3072);
933 }
934}