1use std::{
62 collections::HashMap,
63 fs::File,
64 path::{Path, PathBuf},
65};
66
67use walkdir::WalkDir;
68
69use crate::Error;
70
71pub const IMAGE_EXTENSIONS: &[&str] = &[
73 "jpg",
74 "jpeg",
75 "png",
76 "camera.jpeg",
77 "camera.png",
78 "camera.jpg",
79];
80
81#[cfg(feature = "polars")]
118pub fn resolve_arrow_files(arrow_path: &Path) -> Result<HashMap<String, PathBuf>, Error> {
119 use polars::prelude::*;
120
121 let mut file = File::open(arrow_path).map_err(|e| {
122 Error::InvalidParameters(format!("Cannot open Arrow file {:?}: {}", arrow_path, e))
123 })?;
124
125 let df = IpcReader::new(&mut file).finish().map_err(|e| {
126 Error::InvalidParameters(format!("Failed to read Arrow file {:?}: {}", arrow_path, e))
127 })?;
128
129 let names = df
131 .column("name")
132 .map_err(|e| Error::InvalidParameters(format!("Missing 'name' column: {}", e)))?
133 .str()
134 .map_err(|e| Error::InvalidParameters(format!("Invalid 'name' column type: {}", e)))?;
135
136 let frames = df.column("frame").ok();
138
139 let mut result = HashMap::new();
140
141 for idx in 0..df.height() {
142 let name = match names.get(idx) {
144 Some(n) => n.to_string(),
145 None => continue, };
147
148 if result.contains_key(&name) {
150 continue;
151 }
152
153 let frame = frames.and_then(|col| {
155 col.u64()
157 .ok()
158 .and_then(|s| s.get(idx))
159 .or_else(|| col.u32().ok().and_then(|s| s.get(idx).map(|v| v as u64)))
160 });
161
162 let relative_path = if let Some(frame_num) = frame {
164 PathBuf::from(&name).join(format!("{}_{:03}.camera.jpeg", name, frame_num))
167 } else {
168 PathBuf::from(format!("{}.camera.jpeg", name))
170 };
171
172 result.insert(name, relative_path);
173 }
174
175 Ok(result)
176}
177
178#[derive(Debug, Clone)]
180pub struct ResolvedFile {
181 pub name: String,
183 pub frame: Option<u64>,
185 pub path: Option<PathBuf>,
187 pub expected_path: PathBuf,
189}
190
191#[cfg(feature = "polars")]
225pub fn resolve_files_with_container(
226 arrow_path: &Path,
227 sensor_container: &Path,
228) -> Result<Vec<ResolvedFile>, Error> {
229 use polars::prelude::*;
230
231 let mut file = File::open(arrow_path).map_err(|e| {
232 Error::InvalidParameters(format!("Cannot open Arrow file {:?}: {}", arrow_path, e))
233 })?;
234
235 let df = IpcReader::new(&mut file).finish().map_err(|e| {
236 Error::InvalidParameters(format!("Failed to read Arrow file {:?}: {}", arrow_path, e))
237 })?;
238
239 let file_index = build_file_index(sensor_container)?;
241
242 let names = df
244 .column("name")
245 .map_err(|e| Error::InvalidParameters(format!("Missing 'name' column: {}", e)))?
246 .str()
247 .map_err(|e| Error::InvalidParameters(format!("Invalid 'name' column type: {}", e)))?;
248
249 let frames = df.column("frame").ok();
251
252 let mut result = Vec::new();
253 let mut seen_samples: HashMap<String, bool> = HashMap::new();
254
255 for idx in 0..df.height() {
256 let name = match names.get(idx) {
257 Some(n) => n.to_string(),
258 None => continue,
259 };
260
261 let frame = frames.and_then(|col| {
263 col.u64()
264 .ok()
265 .and_then(|s| s.get(idx))
266 .or_else(|| col.u32().ok().and_then(|s| s.get(idx).map(|v| v as u64)))
267 });
268
269 let sample_key = match frame {
270 Some(f) => format!("{}_{}", name, f),
271 None => name.clone(),
272 };
273
274 if seen_samples.contains_key(&sample_key) {
276 continue;
277 }
278 seen_samples.insert(sample_key.clone(), true);
279
280 let expected_path = if let Some(frame_num) = frame {
282 PathBuf::from(&name).join(format!("{}_{:03}.camera.jpeg", name, frame_num))
283 } else {
284 PathBuf::from(format!("{}.camera.jpeg", name))
285 };
286
287 let actual_path = find_matching_file(&file_index, &name, frame);
289
290 result.push(ResolvedFile {
291 name,
292 frame,
293 path: actual_path,
294 expected_path,
295 });
296 }
297
298 Ok(result)
299}
300
301fn build_file_index(root: &Path) -> Result<HashMap<String, PathBuf>, Error> {
303 let mut index = HashMap::new();
304
305 if !root.exists() {
306 return Ok(index);
307 }
308
309 for entry in WalkDir::new(root)
310 .into_iter()
311 .filter_map(|e| e.ok())
312 .filter(|e| e.file_type().is_file())
313 {
314 let path = entry.path().to_path_buf();
315 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
316 index.insert(filename.to_lowercase(), path.clone());
318
319 if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
321 let clean_stem = stem.strip_suffix(".camera").unwrap_or(stem).to_lowercase();
323 index.entry(clean_stem).or_insert_with(|| path.clone());
324 }
325 }
326 }
327
328 Ok(index)
329}
330
331fn find_matching_file(
333 index: &HashMap<String, PathBuf>,
334 name: &str,
335 frame: Option<u64>,
336) -> Option<PathBuf> {
337 let search_key = match frame {
338 Some(f) => format!("{}_{:03}", name, f).to_lowercase(),
339 None => name.to_lowercase(),
340 };
341
342 for ext in IMAGE_EXTENSIONS {
344 let key = format!("{}.{}", search_key, ext);
345 if let Some(path) = index.get(&key) {
346 return Some(path.clone());
347 }
348 }
349
350 if let Some(path) = index.get(&search_key) {
352 return Some(path.clone());
353 }
354
355 None
356}
357
358#[derive(Debug, Clone, PartialEq, Eq)]
360pub enum ValidationIssue {
361 MissingArrowFile { expected: PathBuf },
363 MissingSensorContainer { expected: PathBuf },
365 MissingFile { name: String, expected: PathBuf },
367 UnreferencedFile { path: PathBuf },
369 InvalidStructure { message: String },
371}
372
373impl std::fmt::Display for ValidationIssue {
374 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
375 match self {
376 ValidationIssue::MissingArrowFile { expected } => {
377 write!(f, "Missing Arrow file: {:?}", expected)
378 }
379 ValidationIssue::MissingSensorContainer { expected } => {
380 write!(f, "Missing sensor container directory: {:?}", expected)
381 }
382 ValidationIssue::MissingFile { name, expected } => {
383 write!(f, "Missing file for sample '{}': {:?}", name, expected)
384 }
385 ValidationIssue::UnreferencedFile { path } => {
386 write!(f, "Unreferenced file in container: {:?}", path)
387 }
388 ValidationIssue::InvalidStructure { message } => {
389 write!(f, "Invalid structure: {}", message)
390 }
391 }
392 }
393}
394
395#[cfg(feature = "polars")]
428pub fn validate_dataset_structure(dataset_dir: &Path) -> Result<Vec<ValidationIssue>, Error> {
429 let mut issues = Vec::new();
430
431 let dataset_name = dataset_dir
433 .file_name()
434 .and_then(|n| n.to_str())
435 .ok_or_else(|| Error::InvalidParameters("Invalid dataset directory path".to_owned()))?;
436
437 let arrow_path = dataset_dir.join(format!("{}.arrow", dataset_name));
439 if !arrow_path.exists() {
440 issues.push(ValidationIssue::MissingArrowFile {
441 expected: arrow_path.clone(),
442 });
443 return Ok(issues);
445 }
446
447 let container_path = dataset_dir.join(dataset_name);
449 if !container_path.exists() {
450 issues.push(ValidationIssue::MissingSensorContainer {
451 expected: container_path.clone(),
452 });
453 return Ok(issues);
455 }
456
457 let resolved = resolve_files_with_container(&arrow_path, &container_path)?;
459
460 let mut referenced_files: std::collections::HashSet<PathBuf> = std::collections::HashSet::new();
462
463 for file in &resolved {
464 match &file.path {
465 Some(path) => {
466 referenced_files.insert(path.clone());
467 }
468 None => {
469 issues.push(ValidationIssue::MissingFile {
470 name: file.name.clone(),
471 expected: file.expected_path.clone(),
472 });
473 }
474 }
475 }
476
477 for entry in WalkDir::new(&container_path)
479 .into_iter()
480 .filter_map(|e| e.ok())
481 .filter(|e| e.file_type().is_file())
482 {
483 let path = entry.path().to_path_buf();
484
485 let is_image = path
487 .extension()
488 .and_then(|e| e.to_str())
489 .map(|e| {
490 matches!(
491 e.to_lowercase().as_str(),
492 "jpg" | "jpeg" | "png" | "pcd" | "bin"
493 )
494 })
495 .unwrap_or(false);
496
497 if is_image && !referenced_files.contains(&path) {
498 issues.push(ValidationIssue::UnreferencedFile { path });
499 }
500 }
501
502 Ok(issues)
503}
504
505#[cfg(feature = "polars")]
544pub fn generate_arrow_from_folder(
545 folder: &Path,
546 output: &Path,
547 detect_sequences: bool,
548) -> Result<usize, Error> {
549 use polars::prelude::*;
550 use std::io::BufWriter;
551
552 let image_files: Vec<PathBuf> = WalkDir::new(folder)
554 .into_iter()
555 .filter_map(|e| e.ok())
556 .filter(|e| e.file_type().is_file())
557 .filter(|e| {
558 e.path()
559 .extension()
560 .and_then(|ext| ext.to_str())
561 .map(|ext| {
562 matches!(
563 ext.to_lowercase().as_str(),
564 "jpg" | "jpeg" | "png" | "pcd" | "bin"
565 )
566 })
567 .unwrap_or(false)
568 })
569 .map(|e| e.path().to_path_buf())
570 .collect();
571
572 if image_files.is_empty() {
573 return Err(Error::InvalidParameters(
574 "No image files found in folder".to_owned(),
575 ));
576 }
577
578 let mut names: Vec<String> = Vec::new();
580 let mut frames: Vec<Option<u64>> = Vec::new();
581
582 for path in &image_files {
583 let (name, frame) = parse_image_filename(path, folder, detect_sequences);
584 names.push(name);
585 frames.push(frame);
586 }
587
588 let name_series = Series::new("name".into(), &names);
590 let frame_series = Series::new("frame".into(), &frames);
591
592 let null_strings: Vec<Option<&str>> = vec![None; names.len()];
594 let null_u64s: Vec<Option<u64>> = vec![None; names.len()];
595
596 let object_id_series = Series::new("object_id".into(), &null_strings);
597 let label_series = Series::new("label".into(), &null_strings);
598 let label_index_series = Series::new("label_index".into(), &null_u64s);
599 let group_series = Series::new("group".into(), &null_strings);
600
601 let null_series_vec: Vec<Option<Series>> = vec![None; names.len()];
603
604 let mask_series = Series::new("mask".into(), null_series_vec.clone())
605 .cast(&DataType::List(Box::new(DataType::Float32)))?;
606
607 let box2d_series = Series::new("box2d".into(), null_series_vec.clone())
608 .cast(&DataType::Array(Box::new(DataType::Float32), 4))?;
609
610 let box3d_series = Series::new("box3d".into(), null_series_vec)
611 .cast(&DataType::Array(Box::new(DataType::Float32), 6))?;
612
613 let mut df = DataFrame::new(vec![
614 name_series.into(),
615 frame_series.into(),
616 object_id_series.into(),
617 label_series.into(),
618 label_index_series.into(),
619 group_series.into(),
620 mask_series.into(),
621 box2d_series.into(),
622 box3d_series.into(),
623 ])?;
624
625 if let Some(parent) = output.parent() {
627 std::fs::create_dir_all(parent)?;
628 }
629
630 let file = File::create(output)?;
632 let writer = BufWriter::new(file);
633 IpcWriter::new(writer)
634 .finish(&mut df)
635 .map_err(|e| Error::InvalidParameters(format!("Failed to write Arrow file: {}", e)))?;
636
637 Ok(image_files.len())
638}
639
640fn parse_image_filename(path: &Path, root: &Path, detect_sequences: bool) -> (String, Option<u64>) {
642 let stem = path
643 .file_stem()
644 .and_then(|s| s.to_str())
645 .unwrap_or("unknown");
646
647 let clean_stem = stem.strip_suffix(".camera").unwrap_or(stem);
649
650 if !detect_sequences {
651 return (clean_stem.to_string(), None);
652 }
653
654 if let Some(idx) = clean_stem.rfind('_') {
657 let (name_part, frame_part) = clean_stem.split_at(idx);
658 let frame_str = &frame_part[1..]; if let Ok(frame) = frame_str.parse::<u64>() {
661 let relative = path.strip_prefix(root).unwrap_or(path);
663 if relative.components().count() > 1 {
664 return (name_part.to_string(), Some(frame));
666 }
667
668 return (name_part.to_string(), Some(frame));
671 }
672 }
673
674 (clean_stem.to_string(), None)
676}
677
678pub fn get_sensor_container_path(dataset_dir: &Path) -> Option<PathBuf> {
688 let dataset_name = dataset_dir.file_name()?.to_str()?;
689 Some(dataset_dir.join(dataset_name))
690}
691
692pub fn get_arrow_path(dataset_dir: &Path) -> Option<PathBuf> {
702 let dataset_name = dataset_dir.file_name()?.to_str()?;
703 Some(dataset_dir.join(format!("{}.arrow", dataset_name)))
704}
705
706#[cfg(test)]
707mod tests {
708 use super::*;
709 use std::io::Write;
710 use tempfile::TempDir;
711
712 fn create_test_image(path: &Path) {
714 let jpeg_data: &[u8] = &[
716 0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46, 0x00, 0x01, 0x01, 0x00,
717 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0xFF, 0xDB, 0x00, 0x43, 0x00, 0x08, 0x06, 0x06,
718 0x07, 0x06, 0x05, 0x08, 0x07, 0x07, 0x07, 0x09, 0x09, 0x08, 0x0A, 0x0C, 0x14, 0x0D,
719 0x0C, 0x0B, 0x0B, 0x0C, 0x19, 0x12, 0x13, 0x0F, 0x14, 0x1D, 0x1A, 0x1F, 0x1E, 0x1D,
720 0x1A, 0x1C, 0x1C, 0x20, 0x24, 0x2E, 0x27, 0x20, 0x22, 0x2C, 0x23, 0x1C, 0x1C, 0x28,
721 0x37, 0x29, 0x2C, 0x30, 0x31, 0x34, 0x34, 0x34, 0x1F, 0x27, 0x39, 0x3D, 0x38, 0x32,
722 0x3C, 0x2E, 0x33, 0x34, 0x32, 0xFF, 0xC0, 0x00, 0x0B, 0x08, 0x00, 0x01, 0x00, 0x01,
723 0x01, 0x01, 0x11, 0x00, 0xFF, 0xC4, 0x00, 0x1F, 0x00, 0x00, 0x01, 0x05, 0x01, 0x01,
724 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02,
725 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xFF, 0xC4, 0x00, 0xB5, 0x10,
726 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00, 0x00,
727 0x01, 0x7D, 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06,
728 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23, 0x42,
729 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16,
730 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37,
731 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x53, 0x54, 0x55,
732 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x73,
733 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
734 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5,
735 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA,
736 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
737 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA,
738 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFF, 0xDA, 0x00, 0x08,
739 0x01, 0x01, 0x00, 0x00, 0x3F, 0x00, 0xFB, 0xD5, 0xDB, 0x20, 0xA8, 0xF1, 0x4D, 0x9E,
740 0xBA, 0x79, 0xC5, 0x14, 0x51, 0x40, 0xFF, 0xD9,
741 ];
742
743 if let Some(parent) = path.parent() {
744 std::fs::create_dir_all(parent).unwrap();
745 }
746 let mut file = File::create(path).unwrap();
747 file.write_all(jpeg_data).unwrap();
748 }
749
750 #[test]
751 fn test_get_arrow_path() {
752 let dir = Path::new("/data/my_dataset");
753 let arrow = get_arrow_path(dir).unwrap();
754 assert_eq!(arrow, PathBuf::from("/data/my_dataset/my_dataset.arrow"));
755 }
756
757 #[test]
758 fn test_get_sensor_container_path() {
759 let dir = Path::new("/data/my_dataset");
760 let container = get_sensor_container_path(dir).unwrap();
761 assert_eq!(container, PathBuf::from("/data/my_dataset/my_dataset"));
762 }
763
764 #[test]
765 fn test_parse_image_filename_standalone() {
766 let root = Path::new("/data");
767 let path = Path::new("/data/image.jpg");
768
769 let (name, frame) = parse_image_filename(path, root, true);
770 assert_eq!(name, "image");
771 assert_eq!(frame, None);
772 }
773
774 #[test]
775 fn test_parse_image_filename_camera_extension() {
776 let root = Path::new("/data");
777 let path = Path::new("/data/sample.camera.jpeg");
778
779 let (name, frame) = parse_image_filename(path, root, true);
780 assert_eq!(name, "sample");
781 assert_eq!(frame, None);
782 }
783
784 #[test]
785 fn test_parse_image_filename_sequence() {
786 let root = Path::new("/data");
787 let path = Path::new("/data/seq/seq_001.camera.jpeg");
788
789 let (name, frame) = parse_image_filename(path, root, true);
790 assert_eq!(name, "seq");
791 assert_eq!(frame, Some(1));
792 }
793
794 #[test]
795 fn test_parse_image_filename_no_sequence_detection() {
796 let root = Path::new("/data");
797 let path = Path::new("/data/seq/seq_001.camera.jpeg");
798
799 let (name, frame) = parse_image_filename(path, root, false);
800 assert_eq!(name, "seq_001");
801 assert_eq!(frame, None);
802 }
803
804 #[test]
805 fn test_build_file_index() {
806 let temp_dir = TempDir::new().unwrap();
807 let root = temp_dir.path();
808
809 create_test_image(&root.join("image1.jpg"));
811 create_test_image(&root.join("sub/image2.camera.jpeg"));
812
813 let index = build_file_index(root).unwrap();
814
815 assert!(index.contains_key("image1.jpg"));
817 assert!(index.contains_key("image2.camera.jpeg"));
818
819 assert!(index.contains_key("image1"));
821 assert!(index.contains_key("image2"));
822 }
823
824 #[test]
825 fn test_find_matching_file() {
826 let temp_dir = TempDir::new().unwrap();
827 let root = temp_dir.path();
828
829 create_test_image(&root.join("sample.camera.jpeg"));
831 create_test_image(&root.join("seq/seq_001.camera.jpeg"));
832
833 let index = build_file_index(root).unwrap();
834
835 let found = find_matching_file(&index, "sample", None);
837 assert!(found.is_some());
838
839 let found = find_matching_file(&index, "seq", Some(1));
841 assert!(found.is_some());
842
843 let found = find_matching_file(&index, "nonexistent", None);
845 assert!(found.is_none());
846 }
847
848 #[cfg(feature = "polars")]
849 #[test]
850 fn test_generate_arrow_from_folder() {
851 use polars::prelude::*;
852
853 let temp_dir = TempDir::new().unwrap();
854 let root = temp_dir.path();
855
856 let images_dir = root.join("images");
858 create_test_image(&images_dir.join("photo1.jpg"));
859 create_test_image(&images_dir.join("photo2.png"));
860 create_test_image(&images_dir.join("seq/seq_001.camera.jpeg"));
861 create_test_image(&images_dir.join("seq/seq_002.camera.jpeg"));
862
863 let arrow_path = root.join("output.arrow");
865 let count = generate_arrow_from_folder(&images_dir, &arrow_path, true).unwrap();
866
867 assert_eq!(count, 4);
868 assert!(arrow_path.exists());
869
870 let mut file = File::open(&arrow_path).unwrap();
872 let df = IpcReader::new(&mut file).finish().unwrap();
873
874 assert_eq!(df.height(), 4);
875 assert!(df.column("name").is_ok());
876 assert!(df.column("frame").is_ok());
877 assert!(df.column("label").is_ok());
878 }
879
880 #[cfg(feature = "polars")]
881 #[test]
882 fn test_resolve_arrow_files() {
883 use polars::prelude::*;
884 use std::io::BufWriter;
885
886 let temp_dir = TempDir::new().unwrap();
887 let root = temp_dir.path();
888
889 let names = Series::new("name".into(), &["sample1", "sample2", "seq"]);
891 let frames: Vec<Option<u64>> = vec![None, None, Some(1)];
892 let frame_series = Series::new("frame".into(), &frames);
893
894 let mut df = DataFrame::new(vec![names.into(), frame_series.into()]).unwrap();
895
896 let arrow_path = root.join("test.arrow");
897 let file = File::create(&arrow_path).unwrap();
898 let writer = BufWriter::new(file);
899 IpcWriter::new(writer).finish(&mut df).unwrap();
900
901 let resolved = resolve_arrow_files(&arrow_path).unwrap();
903
904 assert_eq!(resolved.len(), 3);
905 assert!(resolved.contains_key("sample1"));
906 assert!(resolved.contains_key("sample2"));
907 assert!(resolved.contains_key("seq"));
908 }
909
910 #[cfg(feature = "polars")]
911 #[test]
912 fn test_validate_dataset_structure_valid() {
913 use polars::prelude::*;
914 use std::io::BufWriter;
915
916 let temp_dir = TempDir::new().unwrap();
917 let dataset_dir = temp_dir.path().join("my_dataset");
918 std::fs::create_dir_all(&dataset_dir).unwrap();
919
920 let names = Series::new("name".into(), &["image1"]);
922 let frames: Vec<Option<u64>> = vec![None];
923 let frame_series = Series::new("frame".into(), &frames);
924
925 let mut df = DataFrame::new(vec![names.into(), frame_series.into()]).unwrap();
926
927 let arrow_path = dataset_dir.join("my_dataset.arrow");
928 let file = File::create(&arrow_path).unwrap();
929 let writer = BufWriter::new(file);
930 IpcWriter::new(writer).finish(&mut df).unwrap();
931
932 let container = dataset_dir.join("my_dataset");
934 create_test_image(&container.join("image1.camera.jpeg"));
935
936 let issues = validate_dataset_structure(&dataset_dir).unwrap();
938
939 let missing_files: Vec<_> = issues
941 .iter()
942 .filter(|i| matches!(i, ValidationIssue::MissingFile { .. }))
943 .collect();
944 assert!(
945 missing_files.is_empty(),
946 "Unexpected missing files: {:?}",
947 missing_files
948 );
949 }
950
951 #[cfg(feature = "polars")]
952 #[test]
953 fn test_validate_dataset_structure_missing_arrow() {
954 let temp_dir = TempDir::new().unwrap();
955 let dataset_dir = temp_dir.path().join("my_dataset");
956 std::fs::create_dir_all(&dataset_dir).unwrap();
957
958 let issues = validate_dataset_structure(&dataset_dir).unwrap();
959
960 assert_eq!(issues.len(), 1);
961 assert!(matches!(
962 &issues[0],
963 ValidationIssue::MissingArrowFile { .. }
964 ));
965 }
966
967 #[test]
968 fn test_image_extensions() {
969 assert!(IMAGE_EXTENSIONS.contains(&"jpg"));
970 assert!(IMAGE_EXTENSIONS.contains(&"jpeg"));
971 assert!(IMAGE_EXTENSIONS.contains(&"png"));
972 assert!(IMAGE_EXTENSIONS.contains(&"camera.jpeg"));
973 }
974
975 #[test]
976 fn test_validation_issue_display() {
977 let issue = ValidationIssue::MissingFile {
978 name: "test".to_string(),
979 expected: PathBuf::from("test.jpg"),
980 };
981 let display = format!("{}", issue);
982 assert!(display.contains("test"));
983 assert!(display.contains("test.jpg"));
984 }
985}