1pub mod core;
12
13pub use core::*;
15
16use crate::{DataConfig, GenerationResult, OutputFormat, SchemaDefinition};
18use crate::{Error, Result};
19use serde::{Deserialize, Serialize};
20use std::collections::HashMap;
21use std::path::Path;
22use tokio::fs;
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct DatasetValidationResult {
27 pub valid: bool,
29 pub errors: Vec<String>,
31 pub warnings: Vec<String>,
33 pub total_rows_validated: usize,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct DatasetMetadata {
40 pub name: String,
42 pub description: Option<String>,
44 pub schema_name: String,
46 pub row_count: usize,
48 pub config: DataConfig,
50 pub created_at: chrono::DateTime<chrono::Utc>,
52 pub generation_time_ms: u128,
54 pub format: OutputFormat,
56 pub file_size_bytes: Option<u64>,
58 pub tags: HashMap<String, String>,
60}
61
62impl Default for DatasetMetadata {
63 fn default() -> Self {
64 Self {
65 name: String::new(),
66 description: None,
67 schema_name: String::new(),
68 row_count: 0,
69 config: DataConfig::default(),
70 created_at: chrono::Utc::now(),
71 generation_time_ms: 0,
72 format: OutputFormat::Json,
73 file_size_bytes: None,
74 tags: HashMap::new(),
75 }
76 }
77}
78
79impl DatasetMetadata {
80 pub fn new(
82 name: String,
83 schema_name: String,
84 result: &GenerationResult,
85 config: DataConfig,
86 ) -> Self {
87 Self {
88 name,
89 description: None,
90 schema_name,
91 row_count: result.count,
92 config,
93 created_at: chrono::Utc::now(),
94 generation_time_ms: result.generation_time_ms,
95 format: OutputFormat::Json,
96 file_size_bytes: None,
97 tags: HashMap::new(),
98 }
99 }
100
101 pub fn with_description(mut self, description: String) -> Self {
103 self.description = Some(description);
104 self
105 }
106
107 pub fn with_tag(mut self, key: String, value: String) -> Self {
109 self.tags.insert(key, value);
110 self
111 }
112
113 pub fn with_file_size(mut self, size: u64) -> Self {
115 self.file_size_bytes = Some(size);
116 self
117 }
118}
119
120#[derive(Debug)]
122pub struct Dataset {
123 pub metadata: DatasetMetadata,
125 pub data: Vec<serde_json::Value>,
127}
128
129impl Dataset {
130 pub fn new(metadata: DatasetMetadata, data: Vec<serde_json::Value>) -> Self {
132 Self { metadata, data }
133 }
134
135 pub fn from_generation_result(
137 name: String,
138 schema_name: String,
139 result: GenerationResult,
140 config: DataConfig,
141 ) -> Self {
142 let metadata = DatasetMetadata::new(name, schema_name, &result, config);
143 Self::new(metadata, result.data)
144 }
145
146 pub fn to_json_string(&self) -> Result<String> {
148 serde_json::to_string_pretty(&self.data)
149 .map_err(|e| Error::generic(format!("Failed to serialize dataset: {}", e)))
150 }
151
152 pub fn to_jsonl_string(&self) -> Result<String> {
154 let lines: Result<Vec<String>> = self
155 .data
156 .iter()
157 .map(|value| {
158 serde_json::to_string(value)
159 .map_err(|e| Error::generic(format!("JSON serialization error: {}", e)))
160 })
161 .collect();
162
163 lines.map(|lines| lines.join("\n"))
164 }
165
166 pub fn to_csv_string(&self) -> Result<String> {
168 if self.data.is_empty() {
169 return Ok(String::new());
170 }
171
172 let mut csv_output = String::new();
173
174 if let Some(first_row) = self.data.first() {
176 if let Some(obj) = first_row.as_object() {
177 let headers: Vec<String> = obj.keys().cloned().collect();
178 csv_output.push_str(&headers.join(","));
179 csv_output.push('\n');
180
181 for row in &self.data {
183 if let Some(obj) = row.as_object() {
184 let values: Vec<String> = headers
185 .iter()
186 .map(|header| {
187 obj.get(header)
188 .map(|v| v.to_string().trim_matches('"').to_string())
189 .unwrap_or_default()
190 })
191 .collect();
192 csv_output.push_str(&values.join(","));
193 csv_output.push('\n');
194 }
195 }
196 }
197 }
198
199 Ok(csv_output)
200 }
201
202 pub fn to_yaml_string(&self) -> Result<String> {
204 serde_yaml::to_string(&self.data)
205 .map_err(|e| Error::generic(format!("Failed to serialize dataset: {}", e)))
206 }
207
208 pub async fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
210 let content = match self.metadata.format {
211 OutputFormat::Json => self.to_json_string()?,
212 OutputFormat::JsonLines => self.to_jsonl_string()?,
213 OutputFormat::Csv => self.to_csv_string()?,
214 OutputFormat::Yaml => self.to_yaml_string()?,
215 };
216
217 fs::write(path, content)
218 .await
219 .map_err(|e| Error::generic(format!("Failed to write dataset file: {}", e)))
220 }
221
222 pub async fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
224 let content = fs::read_to_string(path)
225 .await
226 .map_err(|e| Error::generic(format!("Failed to read dataset file: {}", e)))?;
227
228 if let Ok(data) = serde_json::from_str::<Vec<serde_json::Value>>(&content) {
230 let metadata = DatasetMetadata {
231 name: "loaded_dataset".to_string(),
232 description: None,
233 schema_name: "unknown".to_string(),
234 row_count: data.len(),
235 config: DataConfig::default(),
236 created_at: chrono::Utc::now(),
237 generation_time_ms: 0,
238 format: OutputFormat::Json,
239 file_size_bytes: Some(content.len() as u64),
240 tags: HashMap::new(),
241 };
242
243 return Ok(Self::new(metadata, data));
244 }
245
246 Err(Error::generic("Unsupported file format or invalid content"))
247 }
248
249 pub fn row_count(&self) -> usize {
251 self.data.len()
252 }
253
254 pub fn sample(&self, count: usize) -> &[serde_json::Value] {
256 let sample_count = count.min(self.data.len());
257 &self.data[..sample_count]
258 }
259
260 pub fn filter<F>(&self, predicate: F) -> Dataset
262 where
263 F: Fn(&serde_json::Value) -> bool,
264 {
265 let filtered_data: Vec<serde_json::Value> =
266 self.data.iter().filter(|row| predicate(row)).cloned().collect();
267
268 let mut metadata = self.metadata.clone();
269 metadata.row_count = filtered_data.len();
270
271 Self::new(metadata, filtered_data)
272 }
273
274 pub fn map<F>(&self, mapper: F) -> Dataset
276 where
277 F: Fn(&serde_json::Value) -> serde_json::Value,
278 {
279 let mapped_data: Vec<serde_json::Value> = self.data.iter().map(mapper).collect();
280
281 let metadata = self.metadata.clone();
282 Self::new(metadata, mapped_data)
283 }
284
285 pub fn validate_against_schema(&self, schema: &SchemaDefinition) -> Result<Vec<String>> {
287 utils::validate_dataset_against_schema(self, schema)
288 }
289
290 pub fn validate_with_details(&self, schema: &SchemaDefinition) -> DatasetValidationResult {
292 utils::validate_dataset_with_details(self, schema)
293 }
294}
295
296#[derive(Debug)]
298pub struct DatasetCollection {
299 datasets: HashMap<String, Dataset>,
301}
302
303impl DatasetCollection {
304 pub fn new() -> Self {
306 Self {
307 datasets: HashMap::new(),
308 }
309 }
310
311 pub fn add_dataset(&mut self, dataset: Dataset) -> Result<()> {
313 let name = dataset.metadata.name.clone();
314 self.datasets.insert(name, dataset);
315 Ok(())
316 }
317
318 pub fn get_dataset(&self, name: &str) -> Option<&Dataset> {
320 self.datasets.get(name)
321 }
322
323 pub fn remove_dataset(&mut self, name: &str) -> Option<Dataset> {
325 self.datasets.remove(name)
326 }
327
328 pub fn list_datasets(&self) -> Vec<String> {
330 self.datasets.keys().cloned().collect()
331 }
332
333 pub fn size(&self) -> usize {
335 self.datasets.len()
336 }
337
338 pub async fn save_to_directory<P: AsRef<Path>>(&self, dir_path: P) -> Result<()> {
340 fs::create_dir_all(&dir_path)
341 .await
342 .map_err(|e| Error::generic(format!("Failed to create directory: {}", e)))?;
343
344 for (name, dataset) in &self.datasets {
345 let file_path = dir_path.as_ref().join(format!("{}.json", name));
346 dataset.save_to_file(file_path).await?;
347 }
348
349 Ok(())
350 }
351
352 pub async fn load_from_directory<P: AsRef<Path>>(dir_path: P) -> Result<Self> {
354 let mut collection = Self::new();
355 let mut entries = fs::read_dir(dir_path)
356 .await
357 .map_err(|e| Error::generic(format!("Failed to read directory: {}", e)))?;
358
359 while let Some(entry) = entries
360 .next_entry()
361 .await
362 .map_err(|e| Error::generic(format!("Failed to read directory entry: {}", e)))?
363 {
364 let path = entry.path();
365 if path.extension().and_then(|s| s.to_str()) == Some("json") {
366 if let Some(_file_name) = path.file_stem().and_then(|s| s.to_str()) {
367 let dataset = Dataset::load_from_file(&path).await?;
368 collection.add_dataset(dataset)?;
369 }
370 }
371 }
372
373 Ok(collection)
374 }
375
376 pub fn statistics(&self) -> HashMap<String, serde_json::Value> {
378 let mut stats = HashMap::new();
379
380 stats.insert("total_datasets".to_string(), self.size().into());
381 stats.insert(
382 "total_rows".to_string(),
383 self.datasets.values().map(|d| d.row_count()).sum::<usize>().into(),
384 );
385
386 let dataset_info: Vec<serde_json::Value> = self
387 .datasets
388 .values()
389 .map(|d| {
390 serde_json::json!({
391 "name": d.metadata.name,
392 "schema": d.metadata.schema_name,
393 "rows": d.row_count(),
394 "format": format!("{:?}", d.metadata.format),
395 })
396 })
397 .collect();
398
399 stats.insert("datasets".to_string(), dataset_info.into());
400
401 stats
402 }
403}
404
405impl Default for DatasetCollection {
406 fn default() -> Self {
407 Self::new()
408 }
409}
410
411pub mod utils {
413 use super::*;
414
415 pub async fn create_sample_collection() -> Result<DatasetCollection> {
417 let mut collection = DatasetCollection::new();
418
419 let users_result = crate::generator::utils::generate_users(50).await?;
421 let users_dataset = Dataset::from_generation_result(
422 "users".to_string(),
423 "User".to_string(),
424 users_result,
425 DataConfig {
426 rows: 50,
427 ..Default::default()
428 },
429 );
430 collection.add_dataset(users_dataset)?;
431
432 let products_result = crate::generator::utils::generate_products(25).await?;
434 let products_dataset = Dataset::from_generation_result(
435 "products".to_string(),
436 "Product".to_string(),
437 products_result,
438 DataConfig {
439 rows: 25,
440 ..Default::default()
441 },
442 );
443 collection.add_dataset(products_dataset)?;
444
445 Ok(collection)
446 }
447
448 pub async fn export_dataset(
450 dataset: &Dataset,
451 format: OutputFormat,
452 output_path: &Path,
453 ) -> Result<()> {
454 let content = match format {
455 OutputFormat::Json => dataset.to_json_string()?,
456 OutputFormat::JsonLines => dataset.to_jsonl_string()?,
457 OutputFormat::Csv => dataset.to_csv_string()?,
458 OutputFormat::Yaml => dataset.to_yaml_string()?,
459 };
460
461 fs::write(output_path, content)
462 .await
463 .map_err(|e| Error::generic(format!("Failed to export dataset: {}", e)))
464 }
465
466 pub fn validate_dataset_against_schema(
468 dataset: &Dataset,
469 schema: &SchemaDefinition,
470 ) -> Result<Vec<String>> {
471 let mut errors = Vec::new();
472
473 for (row_index, row) in dataset.data.iter().enumerate() {
475 match row {
476 serde_json::Value::Object(row_obj) => {
477 for field in &schema.fields {
479 let field_name = &field.name;
480
481 if let Some(field_value) = row_obj.get(field_name) {
482 if let Err(validation_error) = field.validate_value(field_value) {
484 errors.push(format!(
485 "Row {}: Field '{}': {}",
486 row_index + 1,
487 field_name,
488 validation_error
489 ));
490 }
491 } else if field.required {
492 errors.push(format!(
493 "Row {}: Required field '{}' is missing",
494 row_index + 1,
495 field_name
496 ));
497 }
498 }
499
500 for (key, _) in row_obj {
502 let field_exists_in_schema = schema.fields.iter().any(|f| f.name == *key);
503 if !field_exists_in_schema {
504 errors.push(format!(
505 "Row {}: Unexpected field '{}' not defined in schema",
506 row_index + 1,
507 key
508 ));
509 }
510 }
511 }
512 _ => {
513 errors.push(format!("Row {}: Expected object, got {}", row_index + 1, row));
514 }
515 }
516 }
517
518 if let Err(count_error) = validate_dataset_size(dataset, schema) {
520 errors.push(count_error.to_string());
521 }
522
523 Ok(errors)
524 }
525
526 fn validate_dataset_size(dataset: &Dataset, schema: &SchemaDefinition) -> Result<()> {
528 if let Some(min_rows) = schema.metadata.get("min_rows") {
530 if let Some(min_count) = min_rows.as_u64() {
531 if dataset.data.len() < min_count as usize {
532 return Err(Error::validation(format!(
533 "Dataset has {} rows, but schema requires at least {} rows",
534 dataset.data.len(),
535 min_count
536 )));
537 }
538 }
539 }
540
541 if let Some(max_rows) = schema.metadata.get("max_rows") {
542 if let Some(max_count) = max_rows.as_u64() {
543 if dataset.data.len() > max_count as usize {
544 return Err(Error::validation(format!(
545 "Dataset has {} rows, but schema allows at most {} rows",
546 dataset.data.len(),
547 max_count
548 )));
549 }
550 }
551 }
552
553 Ok(())
554 }
555
556 pub fn validate_dataset_with_details(
558 dataset: &Dataset,
559 schema: &SchemaDefinition,
560 ) -> DatasetValidationResult {
561 let errors = validate_dataset_against_schema(dataset, schema);
562
563 match errors {
564 Ok(validation_errors) => {
565 let warnings = Vec::new(); DatasetValidationResult {
567 valid: validation_errors.is_empty(),
568 errors: validation_errors,
569 warnings,
570 total_rows_validated: dataset.data.len(),
571 }
572 }
573 Err(e) => DatasetValidationResult {
574 valid: false,
575 errors: vec![format!("Validation failed: {}", e)],
576 warnings: Vec::new(),
577 total_rows_validated: dataset.data.len(),
578 },
579 }
580 }
581}
582
583#[cfg(test)]
584mod tests {
585 use super::*;
586
587 #[test]
592 fn test_dataset_validation_result_creation() {
593 let result = DatasetValidationResult {
594 valid: true,
595 errors: vec![],
596 warnings: vec![],
597 total_rows_validated: 100,
598 };
599
600 assert!(result.valid);
601 assert_eq!(result.total_rows_validated, 100);
602 }
603
604 #[test]
605 fn test_dataset_validation_result_with_errors() {
606 let result = DatasetValidationResult {
607 valid: false,
608 errors: vec!["Error 1".to_string(), "Error 2".to_string()],
609 warnings: vec![],
610 total_rows_validated: 50,
611 };
612
613 assert!(!result.valid);
614 assert_eq!(result.errors.len(), 2);
615 }
616
617 #[test]
618 fn test_dataset_validation_result_with_warnings() {
619 let result = DatasetValidationResult {
620 valid: true,
621 errors: vec![],
622 warnings: vec!["Warning 1".to_string()],
623 total_rows_validated: 75,
624 };
625
626 assert!(result.valid);
627 assert_eq!(result.warnings.len(), 1);
628 }
629
630 #[test]
631 fn test_dataset_validation_result_clone() {
632 let result = DatasetValidationResult {
633 valid: true,
634 errors: vec!["err".to_string()],
635 warnings: vec!["warn".to_string()],
636 total_rows_validated: 50,
637 };
638 let cloned = result.clone();
639 assert_eq!(cloned.total_rows_validated, 50);
640 assert_eq!(cloned.errors.len(), 1);
641 }
642
643 #[test]
644 fn test_dataset_validation_result_serialize() {
645 let result = DatasetValidationResult {
646 valid: true,
647 errors: vec![],
648 warnings: vec![],
649 total_rows_validated: 25,
650 };
651 let json = serde_json::to_string(&result).unwrap();
652 assert!(json.contains("true"));
653 assert!(json.contains("25"));
654 }
655
656 #[test]
657 fn test_dataset_validation_result_deserialize() {
658 let json =
659 r#"{"valid": false, "errors": ["e1"], "warnings": [], "total_rows_validated": 10}"#;
660 let result: DatasetValidationResult = serde_json::from_str(json).unwrap();
661 assert!(!result.valid);
662 assert_eq!(result.errors.len(), 1);
663 }
664
665 #[test]
666 fn test_dataset_validation_result_debug() {
667 let result = DatasetValidationResult {
668 valid: true,
669 errors: vec![],
670 warnings: vec![],
671 total_rows_validated: 0,
672 };
673 let debug_str = format!("{:?}", result);
674 assert!(debug_str.contains("valid"));
675 }
676
677 #[test]
682 fn test_dataset_metadata_creation() {
683 let config = DataConfig::default();
684 let metadata = DatasetMetadata {
685 name: "TestDataset".to_string(),
686 description: Some("Test description".to_string()),
687 schema_name: "TestSchema".to_string(),
688 row_count: 100,
689 config,
690 created_at: chrono::Utc::now(),
691 generation_time_ms: 1000,
692 format: OutputFormat::Json,
693 file_size_bytes: Some(1024),
694 tags: HashMap::new(),
695 };
696
697 assert_eq!(metadata.name, "TestDataset");
698 assert_eq!(metadata.row_count, 100);
699 assert!(metadata.description.is_some());
700 assert_eq!(metadata.generation_time_ms, 1000);
701 }
702
703 #[test]
704 fn test_dataset_metadata_default() {
705 let metadata = DatasetMetadata::default();
706 assert!(metadata.name.is_empty());
707 assert!(metadata.description.is_none());
708 assert_eq!(metadata.row_count, 0);
709 assert!(metadata.tags.is_empty());
710 }
711
712 #[test]
713 fn test_dataset_metadata_new() {
714 let result = GenerationResult {
715 data: vec![serde_json::json!({"id": 1}), serde_json::json!({"id": 2})],
716 count: 2,
717 generation_time_ms: 100,
718 warnings: vec![],
719 };
720 let config = DataConfig::default();
721 let metadata = DatasetMetadata::new(
722 "my_dataset".to_string(),
723 "TestSchema".to_string(),
724 &result,
725 config,
726 );
727
728 assert_eq!(metadata.name, "my_dataset");
729 assert_eq!(metadata.schema_name, "TestSchema");
730 assert_eq!(metadata.row_count, 2);
731 assert_eq!(metadata.generation_time_ms, 100);
732 }
733
734 #[test]
735 fn test_dataset_metadata_with_description() {
736 let metadata = DatasetMetadata::default().with_description("A test dataset".to_string());
737 assert_eq!(metadata.description, Some("A test dataset".to_string()));
738 }
739
740 #[test]
741 fn test_dataset_metadata_with_tag() {
742 let metadata = DatasetMetadata::default()
743 .with_tag("env".to_string(), "test".to_string())
744 .with_tag("version".to_string(), "1.0".to_string());
745 assert_eq!(metadata.tags.get("env"), Some(&"test".to_string()));
746 assert_eq!(metadata.tags.get("version"), Some(&"1.0".to_string()));
747 }
748
749 #[test]
750 fn test_dataset_metadata_with_file_size() {
751 let metadata = DatasetMetadata::default().with_file_size(2048);
752 assert_eq!(metadata.file_size_bytes, Some(2048));
753 }
754
755 #[test]
756 fn test_dataset_metadata_clone() {
757 let metadata = DatasetMetadata {
758 name: "cloneable".to_string(),
759 ..Default::default()
760 };
761 let cloned = metadata.clone();
762 assert_eq!(cloned.name, "cloneable");
763 }
764
765 #[test]
766 fn test_dataset_metadata_serialize() {
767 let metadata = DatasetMetadata::default();
768 let json = serde_json::to_string(&metadata).unwrap();
769 assert!(json.contains("name"));
770 assert!(json.contains("row_count"));
771 }
772
773 #[test]
778 fn test_dataset_new() {
779 let metadata = DatasetMetadata::default();
780 let data = vec![
781 serde_json::json!({"id": 1, "name": "Alice"}),
782 serde_json::json!({"id": 2, "name": "Bob"}),
783 ];
784 let dataset = Dataset::new(metadata, data);
785 assert_eq!(dataset.row_count(), 2);
786 }
787
788 #[test]
789 fn test_dataset_from_generation_result() {
790 let result = GenerationResult {
791 data: vec![serde_json::json!({"id": 1})],
792 count: 1,
793 generation_time_ms: 50,
794 warnings: vec![],
795 };
796 let config = DataConfig::default();
797 let dataset = Dataset::from_generation_result(
798 "test_dataset".to_string(),
799 "TestSchema".to_string(),
800 result,
801 config,
802 );
803 assert_eq!(dataset.metadata.name, "test_dataset");
804 assert_eq!(dataset.row_count(), 1);
805 }
806
807 #[test]
808 fn test_dataset_to_json_string() {
809 let metadata = DatasetMetadata::default();
810 let data = vec![serde_json::json!({"id": 1}), serde_json::json!({"id": 2})];
811 let dataset = Dataset::new(metadata, data);
812 let json = dataset.to_json_string().unwrap();
813 assert!(json.contains("id"));
814 assert!(json.contains("1"));
815 assert!(json.contains("2"));
816 }
817
818 #[test]
819 fn test_dataset_to_jsonl_string() {
820 let metadata = DatasetMetadata::default();
821 let data = vec![serde_json::json!({"id": 1}), serde_json::json!({"id": 2})];
822 let dataset = Dataset::new(metadata, data);
823 let jsonl = dataset.to_jsonl_string().unwrap();
824 let lines: Vec<&str> = jsonl.split('\n').collect();
825 assert_eq!(lines.len(), 2);
826 }
827
828 #[test]
829 fn test_dataset_to_csv_string() {
830 let metadata = DatasetMetadata::default();
831 let data = vec![
832 serde_json::json!({"id": 1, "name": "Alice"}),
833 serde_json::json!({"id": 2, "name": "Bob"}),
834 ];
835 let dataset = Dataset::new(metadata, data);
836 let csv = dataset.to_csv_string().unwrap();
837 assert!(csv.contains("id") || csv.contains("name")); assert!(csv.contains("Alice") || csv.contains("Bob")); }
840
841 #[test]
842 fn test_dataset_to_csv_string_empty() {
843 let metadata = DatasetMetadata::default();
844 let dataset = Dataset::new(metadata, vec![]);
845 let csv = dataset.to_csv_string().unwrap();
846 assert!(csv.is_empty());
847 }
848
849 #[test]
850 fn test_dataset_to_yaml_string() {
851 let metadata = DatasetMetadata::default();
852 let data = vec![serde_json::json!({"id": 1})];
853 let dataset = Dataset::new(metadata, data);
854 let yaml = dataset.to_yaml_string().unwrap();
855 assert!(yaml.contains("id"));
856 }
857
858 #[test]
859 fn test_dataset_row_count() {
860 let metadata = DatasetMetadata::default();
861 let data = vec![
862 serde_json::json!({}),
863 serde_json::json!({}),
864 serde_json::json!({}),
865 ];
866 let dataset = Dataset::new(metadata, data);
867 assert_eq!(dataset.row_count(), 3);
868 }
869
870 #[test]
871 fn test_dataset_sample() {
872 let metadata = DatasetMetadata::default();
873 let data: Vec<serde_json::Value> = (0..10).map(|i| serde_json::json!({"id": i})).collect();
874 let dataset = Dataset::new(metadata, data);
875
876 let sample = dataset.sample(3);
877 assert_eq!(sample.len(), 3);
878
879 let big_sample = dataset.sample(100);
880 assert_eq!(big_sample.len(), 10); }
882
883 #[test]
884 fn test_dataset_filter() {
885 let metadata = DatasetMetadata {
886 name: "filterable".to_string(),
887 ..Default::default()
888 };
889 let data = vec![
890 serde_json::json!({"id": 1, "active": true}),
891 serde_json::json!({"id": 2, "active": false}),
892 serde_json::json!({"id": 3, "active": true}),
893 ];
894 let dataset = Dataset::new(metadata, data);
895
896 let filtered =
897 dataset.filter(|row| row.get("active").and_then(|v| v.as_bool()).unwrap_or(false));
898
899 assert_eq!(filtered.row_count(), 2);
900 assert_eq!(filtered.metadata.row_count, 2);
901 }
902
903 #[test]
904 fn test_dataset_map() {
905 let metadata = DatasetMetadata::default();
906 let data = vec![
907 serde_json::json!({"value": 1}),
908 serde_json::json!({"value": 2}),
909 ];
910 let dataset = Dataset::new(metadata, data);
911
912 let mapped = dataset.map(|row| {
913 let mut new_row = row.clone();
914 if let Some(obj) = new_row.as_object_mut() {
915 obj.insert("doubled".to_string(), serde_json::json!(true));
916 }
917 new_row
918 });
919
920 assert_eq!(mapped.row_count(), 2);
921 assert!(mapped.data[0].get("doubled").is_some());
922 }
923
924 #[test]
925 fn test_dataset_debug() {
926 let metadata = DatasetMetadata {
927 name: "debug_test".to_string(),
928 ..Default::default()
929 };
930 let dataset = Dataset::new(metadata, vec![]);
931 let debug_str = format!("{:?}", dataset);
932 assert!(debug_str.contains("metadata"));
933 }
934
935 #[test]
940 fn test_dataset_collection_new() {
941 let collection = DatasetCollection::new();
942 assert_eq!(collection.size(), 0);
943 }
944
945 #[test]
946 fn test_dataset_collection_default() {
947 let collection = DatasetCollection::default();
948 assert_eq!(collection.size(), 0);
949 }
950
951 #[test]
952 fn test_dataset_collection_add_dataset() {
953 let mut collection = DatasetCollection::new();
954 let dataset = Dataset::new(
955 DatasetMetadata {
956 name: "test1".to_string(),
957 ..Default::default()
958 },
959 vec![],
960 );
961 collection.add_dataset(dataset).unwrap();
962 assert_eq!(collection.size(), 1);
963 }
964
965 #[test]
966 fn test_dataset_collection_get_dataset() {
967 let mut collection = DatasetCollection::new();
968 let dataset = Dataset::new(
969 DatasetMetadata {
970 name: "findme".to_string(),
971 ..Default::default()
972 },
973 vec![serde_json::json!({"id": 1})],
974 );
975 collection.add_dataset(dataset).unwrap();
976
977 let found = collection.get_dataset("findme");
978 assert!(found.is_some());
979 assert_eq!(found.unwrap().row_count(), 1);
980 }
981
982 #[test]
983 fn test_dataset_collection_get_dataset_not_found() {
984 let collection = DatasetCollection::new();
985 assert!(collection.get_dataset("nonexistent").is_none());
986 }
987
988 #[test]
989 fn test_dataset_collection_remove_dataset() {
990 let mut collection = DatasetCollection::new();
991 let dataset = Dataset::new(
992 DatasetMetadata {
993 name: "removable".to_string(),
994 ..Default::default()
995 },
996 vec![],
997 );
998 collection.add_dataset(dataset).unwrap();
999
1000 let removed = collection.remove_dataset("removable");
1001 assert!(removed.is_some());
1002 assert_eq!(collection.size(), 0);
1003 }
1004
1005 #[test]
1006 fn test_dataset_collection_list_datasets() {
1007 let mut collection = DatasetCollection::new();
1008 collection
1009 .add_dataset(Dataset::new(
1010 DatasetMetadata {
1011 name: "a".to_string(),
1012 ..Default::default()
1013 },
1014 vec![],
1015 ))
1016 .unwrap();
1017 collection
1018 .add_dataset(Dataset::new(
1019 DatasetMetadata {
1020 name: "b".to_string(),
1021 ..Default::default()
1022 },
1023 vec![],
1024 ))
1025 .unwrap();
1026
1027 let names = collection.list_datasets();
1028 assert_eq!(names.len(), 2);
1029 assert!(names.contains(&"a".to_string()));
1030 assert!(names.contains(&"b".to_string()));
1031 }
1032
1033 #[test]
1034 fn test_dataset_collection_size() {
1035 let mut collection = DatasetCollection::new();
1036 assert_eq!(collection.size(), 0);
1037
1038 collection
1039 .add_dataset(Dataset::new(
1040 DatasetMetadata {
1041 name: "x".to_string(),
1042 ..Default::default()
1043 },
1044 vec![],
1045 ))
1046 .unwrap();
1047 assert_eq!(collection.size(), 1);
1048 }
1049
1050 #[test]
1051 fn test_dataset_collection_statistics() {
1052 let mut collection = DatasetCollection::new();
1053 collection
1054 .add_dataset(Dataset::new(
1055 DatasetMetadata {
1056 name: "ds1".to_string(),
1057 schema_name: "Schema1".to_string(),
1058 ..Default::default()
1059 },
1060 vec![serde_json::json!({}), serde_json::json!({})],
1061 ))
1062 .unwrap();
1063 collection
1064 .add_dataset(Dataset::new(
1065 DatasetMetadata {
1066 name: "ds2".to_string(),
1067 schema_name: "Schema2".to_string(),
1068 ..Default::default()
1069 },
1070 vec![serde_json::json!({})],
1071 ))
1072 .unwrap();
1073
1074 let stats = collection.statistics();
1075 assert_eq!(stats.get("total_datasets").and_then(|v| v.as_u64()), Some(2));
1076 assert_eq!(stats.get("total_rows").and_then(|v| v.as_u64()), Some(3));
1077 }
1078
1079 #[test]
1080 fn test_dataset_collection_debug() {
1081 let collection = DatasetCollection::new();
1082 let debug_str = format!("{:?}", collection);
1083 assert!(debug_str.contains("datasets"));
1084 }
1085}