edgefirst_client/
dataset.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright © 2025 Au-Zone Technologies. All Rights Reserved.
3
4use std::{collections::HashMap, fmt::Display};
5
6use crate::{
7    Client, Error,
8    api::{AnnotationSetID, DatasetID, ProjectID, SampleID},
9};
10use chrono::{DateTime, Utc};
11use serde::{Deserialize, Serialize};
12
13#[cfg(feature = "polars")]
14use polars::prelude::*;
15
16/// File types supported in EdgeFirst Studio datasets.
17///
18/// Represents the different types of sensor data files that can be stored
19/// and processed in a dataset. EdgeFirst Studio supports various modalities
20/// including visual images and different forms of LiDAR and radar data.
21///
22/// # Examples
23///
24/// ```rust
25/// use edgefirst_client::FileType;
26///
27/// // Create file types from strings
28/// let image_type: FileType = "image".try_into().unwrap();
29/// let lidar_type: FileType = "lidar.pcd".try_into().unwrap();
30///
31/// // Display file types
32/// println!("Processing {} files", image_type); // "Processing image files"
33///
34/// // Use in dataset operations - example usage
35/// let file_type = FileType::Image;
36/// match file_type {
37///     FileType::Image => println!("Processing image files"),
38///     FileType::LidarPcd => println!("Processing LiDAR point cloud files"),
39///     _ => println!("Processing other sensor data"),
40/// }
41/// ```
42#[derive(Clone, Eq, PartialEq, Debug)]
43pub enum FileType {
44    /// Standard image files (JPEG, PNG, etc.)
45    Image,
46    /// LiDAR point cloud data files (.pcd format)
47    LidarPcd,
48    /// LiDAR depth images (.png format)
49    LidarDepth,
50    /// LiDAR reflectance images (.jpg format)
51    LidarReflect,
52    /// Radar point cloud data files (.pcd format)
53    RadarPcd,
54    /// Radar cube data files (.png format)
55    RadarCube,
56}
57
58impl std::fmt::Display for FileType {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        let value = match self {
61            FileType::Image => "image",
62            FileType::LidarPcd => "lidar.pcd",
63            FileType::LidarDepth => "lidar.png",
64            FileType::LidarReflect => "lidar.jpg",
65            FileType::RadarPcd => "radar.pcd",
66            FileType::RadarCube => "radar.png",
67        };
68        write!(f, "{}", value)
69    }
70}
71
72impl TryFrom<&str> for FileType {
73    type Error = crate::Error;
74
75    fn try_from(s: &str) -> Result<Self, Self::Error> {
76        match s {
77            "image" => Ok(FileType::Image),
78            "lidar.pcd" => Ok(FileType::LidarPcd),
79            "lidar.png" => Ok(FileType::LidarDepth),
80            "lidar.jpg" => Ok(FileType::LidarReflect),
81            "radar.pcd" => Ok(FileType::RadarPcd),
82            "radar.png" => Ok(FileType::RadarCube),
83            _ => Err(crate::Error::InvalidFileType(s.to_string())),
84        }
85    }
86}
87
88impl std::str::FromStr for FileType {
89    type Err = crate::Error;
90
91    fn from_str(s: &str) -> Result<Self, Self::Err> {
92        s.try_into()
93    }
94}
95
96/// Annotation types supported for labeling data in EdgeFirst Studio.
97///
98/// Represents the different types of annotations that can be applied to
99/// sensor data for machine learning tasks. Each type corresponds to a
100/// different annotation geometry and use case.
101///
102/// # Examples
103///
104/// ```rust
105/// use edgefirst_client::AnnotationType;
106///
107/// // Create annotation types from strings (using TryFrom)
108/// let box_2d: AnnotationType = "box2d".try_into().unwrap();
109/// let segmentation: AnnotationType = "mask".try_into().unwrap();
110///
111/// // Or use From with String
112/// let box_2d = AnnotationType::from("box2d".to_string());
113/// let segmentation = AnnotationType::from("mask".to_string());
114///
115/// // Display annotation types
116/// println!("Annotation type: {}", box_2d); // "Annotation type: box2d"
117///
118/// // Use in matching and processing
119/// let annotation_type = AnnotationType::Box2d;
120/// match annotation_type {
121///     AnnotationType::Box2d => println!("Processing 2D bounding boxes"),
122///     AnnotationType::Box3d => println!("Processing 3D bounding boxes"),
123///     AnnotationType::Mask => println!("Processing segmentation masks"),
124/// }
125/// ```
126#[derive(Clone, Eq, PartialEq, Debug)]
127pub enum AnnotationType {
128    /// 2D bounding boxes for object detection in images
129    Box2d,
130    /// 3D bounding boxes for object detection in 3D space (LiDAR, etc.)
131    Box3d,
132    /// Pixel-level segmentation masks for semantic/instance segmentation
133    Mask,
134}
135
136impl TryFrom<&str> for AnnotationType {
137    type Error = crate::Error;
138
139    fn try_from(s: &str) -> Result<Self, Self::Error> {
140        match s {
141            "box2d" => Ok(AnnotationType::Box2d),
142            "box3d" => Ok(AnnotationType::Box3d),
143            "mask" => Ok(AnnotationType::Mask),
144            _ => Err(crate::Error::InvalidAnnotationType(s.to_string())),
145        }
146    }
147}
148
149impl From<String> for AnnotationType {
150    fn from(s: String) -> Self {
151        // For backward compatibility, default to Box2d if invalid
152        s.as_str().try_into().unwrap_or(AnnotationType::Box2d)
153    }
154}
155
156impl From<&String> for AnnotationType {
157    fn from(s: &String) -> Self {
158        // For backward compatibility, default to Box2d if invalid
159        s.as_str().try_into().unwrap_or(AnnotationType::Box2d)
160    }
161}
162
163impl AnnotationType {
164    /// Returns the server API type name for this annotation type.
165    ///
166    /// The server uses different naming conventions than the client:
167    /// - `Box2d` → `"box"` (server) vs `"box2d"` (client display)
168    /// - `Box3d` → `"box3d"` (same)
169    /// - `Mask` → `"seg"` (server) vs `"mask"` (client display)
170    pub fn as_server_type(&self) -> &'static str {
171        match self {
172            AnnotationType::Box2d => "box",
173            AnnotationType::Box3d => "box3d",
174            AnnotationType::Mask => "seg",
175        }
176    }
177}
178
179impl std::fmt::Display for AnnotationType {
180    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
181        let value = match self {
182            AnnotationType::Box2d => "box2d",
183            AnnotationType::Box3d => "box3d",
184            AnnotationType::Mask => "mask",
185        };
186        write!(f, "{}", value)
187    }
188}
189
190/// A dataset in EdgeFirst Studio containing sensor data and annotations.
191///
192/// Datasets are collections of multi-modal sensor data (images, LiDAR, radar)
193/// along with their corresponding annotations (bounding boxes, segmentation
194/// masks, 3D annotations). Datasets belong to projects and can be used for
195/// training and validation of machine learning models.
196///
197/// # Features
198///
199/// - **Multi-modal Data**: Support for images, LiDAR point clouds, radar data
200/// - **Rich Annotations**: 2D/3D bounding boxes, segmentation masks
201/// - **Metadata**: Timestamps, sensor configurations, calibration data
202/// - **Version Control**: Track changes and maintain data lineage
203/// - **Format Conversion**: Export to popular ML frameworks
204///
205/// # Examples
206///
207/// ```no_run
208/// use edgefirst_client::{Client, Dataset, DatasetID};
209/// use std::str::FromStr;
210///
211/// # async fn example() -> Result<(), edgefirst_client::Error> {
212/// # let client = Client::new()?;
213/// // Get dataset information
214/// let dataset_id = DatasetID::from_str("ds-abc123")?;
215/// let dataset = client.dataset(dataset_id).await?;
216/// println!("Dataset: {}", dataset.name());
217///
218/// // Access dataset metadata
219/// println!("Dataset ID: {}", dataset.id());
220/// println!("Description: {}", dataset.description());
221/// println!("Created: {}", dataset.created());
222///
223/// // Work with dataset data would require additional methods
224/// // that are implemented in the full API
225/// # Ok(())
226/// # }
227/// ```
228#[derive(Deserialize, Clone, Debug)]
229pub struct Dataset {
230    id: DatasetID,
231    project_id: ProjectID,
232    name: String,
233    description: String,
234    cloud_key: String,
235    #[serde(rename = "createdAt")]
236    created: DateTime<Utc>,
237}
238
239impl Display for Dataset {
240    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
241        write!(f, "{} {}", self.id, self.name)
242    }
243}
244
245impl Dataset {
246    pub fn id(&self) -> DatasetID {
247        self.id
248    }
249
250    pub fn project_id(&self) -> ProjectID {
251        self.project_id
252    }
253
254    pub fn name(&self) -> &str {
255        &self.name
256    }
257
258    pub fn description(&self) -> &str {
259        &self.description
260    }
261
262    pub fn cloud_key(&self) -> &str {
263        &self.cloud_key
264    }
265
266    pub fn created(&self) -> &DateTime<Utc> {
267        &self.created
268    }
269
270    pub async fn project(&self, client: &Client) -> Result<crate::api::Project, Error> {
271        client.project(self.project_id).await
272    }
273
274    pub async fn annotation_sets(&self, client: &Client) -> Result<Vec<AnnotationSet>, Error> {
275        client.annotation_sets(self.id).await
276    }
277
278    pub async fn labels(&self, client: &Client) -> Result<Vec<Label>, Error> {
279        client.labels(self.id).await
280    }
281
282    pub async fn add_label(&self, client: &Client, name: &str) -> Result<(), Error> {
283        client.add_label(self.id, name).await
284    }
285
286    pub async fn remove_label(&self, client: &Client, name: &str) -> Result<(), Error> {
287        let labels = self.labels(client).await?;
288        let label = labels
289            .iter()
290            .find(|l| l.name() == name)
291            .ok_or_else(|| Error::MissingLabel(name.to_string()))?;
292        client.remove_label(label.id()).await
293    }
294}
295
296/// The AnnotationSet class represents a collection of annotations in a dataset.
297/// A dataset can have multiple annotation sets, each containing annotations for
298/// different tasks or purposes.
299#[derive(Deserialize)]
300pub struct AnnotationSet {
301    id: AnnotationSetID,
302    dataset_id: DatasetID,
303    name: String,
304    description: String,
305    #[serde(rename = "date")]
306    created: DateTime<Utc>,
307}
308
309impl Display for AnnotationSet {
310    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
311        write!(f, "{} {}", self.id, self.name)
312    }
313}
314
315impl AnnotationSet {
316    pub fn id(&self) -> AnnotationSetID {
317        self.id
318    }
319
320    pub fn dataset_id(&self) -> DatasetID {
321        self.dataset_id
322    }
323
324    pub fn name(&self) -> &str {
325        &self.name
326    }
327
328    pub fn description(&self) -> &str {
329        &self.description
330    }
331
332    pub fn created(&self) -> DateTime<Utc> {
333        self.created
334    }
335
336    pub async fn dataset(&self, client: &Client) -> Result<Dataset, Error> {
337        client.dataset(self.dataset_id).await
338    }
339}
340
341/// A sample in a dataset, typically representing a single image with metadata
342/// and optional sensor data.
343///
344/// Each sample has a unique ID, image reference, and can include additional
345/// sensor data like LiDAR, radar, or depth maps. Samples can also have
346/// associated annotations.
347#[derive(Serialize, Deserialize, Clone, Debug)]
348pub struct Sample {
349    #[serde(skip_serializing_if = "Option::is_none")]
350    pub id: Option<SampleID>,
351    /// Dataset split (train, val, test) - stored in Arrow metadata, not used
352    /// for directory structure.
353    /// API field name discrepancy: samples.populate2 expects "group", but
354    /// samples.list returns "group_name".
355    #[serde(
356        alias = "group_name",
357        rename(serialize = "group", deserialize = "group_name"),
358        skip_serializing_if = "Option::is_none"
359    )]
360    pub group: Option<String>,
361    #[serde(skip_serializing_if = "Option::is_none")]
362    pub sequence_name: Option<String>,
363    #[serde(skip_serializing_if = "Option::is_none")]
364    pub sequence_uuid: Option<String>,
365    #[serde(skip_serializing_if = "Option::is_none")]
366    pub sequence_description: Option<String>,
367    #[serde(
368        default,
369        skip_serializing_if = "Option::is_none",
370        deserialize_with = "deserialize_frame_number"
371    )]
372    pub frame_number: Option<u32>,
373    #[serde(skip_serializing_if = "Option::is_none")]
374    pub uuid: Option<String>,
375    #[serde(skip_serializing_if = "Option::is_none")]
376    pub image_name: Option<String>,
377    #[serde(skip_serializing_if = "Option::is_none")]
378    pub image_url: Option<String>,
379    #[serde(skip_serializing_if = "Option::is_none")]
380    pub width: Option<u32>,
381    #[serde(skip_serializing_if = "Option::is_none")]
382    pub height: Option<u32>,
383    #[serde(skip_serializing_if = "Option::is_none")]
384    pub date: Option<DateTime<Utc>>,
385    #[serde(skip_serializing_if = "Option::is_none")]
386    pub source: Option<String>,
387    /// Camera location and pose (GPS + IMU data).
388    /// Serialized as "sensors" for API compatibility with populate endpoint.
389    #[serde(rename = "sensors", skip_serializing_if = "Option::is_none")]
390    pub location: Option<Location>,
391    /// Image degradation type (blur, occlusion, weather, etc.).
392    #[serde(skip_serializing_if = "Option::is_none")]
393    pub degradation: Option<String>,
394    /// Additional sensor files (LiDAR, radar, depth maps, etc.).
395    /// When deserializing from samples.list: Vec<SampleFile>
396    /// When serializing for samples.populate2: HashMap<String, String>
397    /// (file_type -> filename)
398    #[serde(
399        default,
400        skip_serializing_if = "Vec::is_empty",
401        serialize_with = "serialize_files",
402        deserialize_with = "deserialize_files"
403    )]
404    pub files: Vec<SampleFile>,
405    #[serde(
406        default,
407        skip_serializing_if = "Vec::is_empty",
408        serialize_with = "serialize_annotations",
409        deserialize_with = "deserialize_annotations"
410    )]
411    pub annotations: Vec<Annotation>,
412}
413
414// Custom deserializer for frame_number - converts -1 to None
415// Server returns -1 for non-sequence samples, but clients should see None
416fn deserialize_frame_number<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
417where
418    D: serde::Deserializer<'de>,
419{
420    use serde::Deserialize;
421
422    let value = Option::<i32>::deserialize(deserializer)?;
423    Ok(value.and_then(|v| if v < 0 { None } else { Some(v as u32) }))
424}
425
426// Custom serializer for files field - converts Vec<SampleFile> to
427// HashMap<String, String>
428fn serialize_files<S>(files: &[SampleFile], serializer: S) -> Result<S::Ok, S::Error>
429where
430    S: serde::Serializer,
431{
432    use serde::Serialize;
433    let map: HashMap<String, String> = files
434        .iter()
435        .filter_map(|f| {
436            f.filename()
437                .map(|filename| (f.file_type().to_string(), filename.to_string()))
438        })
439        .collect();
440    map.serialize(serializer)
441}
442
443// Custom deserializer for files field - converts HashMap or Vec to
444// Vec<SampleFile>
445fn deserialize_files<'de, D>(deserializer: D) -> Result<Vec<SampleFile>, D::Error>
446where
447    D: serde::Deserializer<'de>,
448{
449    use serde::Deserialize;
450
451    #[derive(Deserialize)]
452    #[serde(untagged)]
453    enum FilesFormat {
454        Vec(Vec<SampleFile>),
455        Map(HashMap<String, String>),
456    }
457
458    let value = Option::<FilesFormat>::deserialize(deserializer)?;
459    Ok(value
460        .map(|v| match v {
461            FilesFormat::Vec(files) => files,
462            FilesFormat::Map(map) => convert_files_map_to_vec(map),
463        })
464        .unwrap_or_default())
465}
466
467// Custom serializer for annotations field - serializes to a flat
468// Vec<Annotation> to match the updated samples.populate2 contract (annotations
469// array)
470fn serialize_annotations<S>(annotations: &Vec<Annotation>, serializer: S) -> Result<S::Ok, S::Error>
471where
472    S: serde::Serializer,
473{
474    serde::Serialize::serialize(annotations, serializer)
475}
476
477// Custom deserializer for annotations field - converts server format back to
478// Vec<Annotation>
479fn deserialize_annotations<'de, D>(deserializer: D) -> Result<Vec<Annotation>, D::Error>
480where
481    D: serde::Deserializer<'de>,
482{
483    use serde::Deserialize;
484
485    #[derive(Deserialize)]
486    #[serde(untagged)]
487    enum AnnotationsFormat {
488        Vec(Vec<Annotation>),
489        Map(HashMap<String, Vec<Annotation>>),
490    }
491
492    let value = Option::<AnnotationsFormat>::deserialize(deserializer)?;
493    Ok(value
494        .map(|v| match v {
495            AnnotationsFormat::Vec(annotations) => annotations,
496            AnnotationsFormat::Map(map) => convert_annotations_map_to_vec(map),
497        })
498        .unwrap_or_default())
499}
500
501impl Display for Sample {
502    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
503        write!(
504            f,
505            "{} {}",
506            self.id
507                .map(|id| id.to_string())
508                .unwrap_or_else(|| "unknown".to_string()),
509            self.image_name().unwrap_or("unknown")
510        )
511    }
512}
513
514impl Default for Sample {
515    fn default() -> Self {
516        Self::new()
517    }
518}
519
520impl Sample {
521    /// Creates a new empty sample.
522    pub fn new() -> Self {
523        Self {
524            id: None,
525            group: None,
526            sequence_name: None,
527            sequence_uuid: None,
528            sequence_description: None,
529            frame_number: None,
530            uuid: None,
531            image_name: None,
532            image_url: None,
533            width: None,
534            height: None,
535            date: None,
536            source: None,
537            location: None,
538            degradation: None,
539            files: vec![],
540            annotations: vec![],
541        }
542    }
543
544    pub fn id(&self) -> Option<SampleID> {
545        self.id
546    }
547
548    pub fn name(&self) -> Option<String> {
549        self.image_name.as_ref().map(|n| extract_sample_name(n))
550    }
551
552    pub fn group(&self) -> Option<&String> {
553        self.group.as_ref()
554    }
555
556    pub fn sequence_name(&self) -> Option<&String> {
557        self.sequence_name.as_ref()
558    }
559
560    pub fn sequence_uuid(&self) -> Option<&String> {
561        self.sequence_uuid.as_ref()
562    }
563
564    pub fn sequence_description(&self) -> Option<&String> {
565        self.sequence_description.as_ref()
566    }
567
568    pub fn frame_number(&self) -> Option<u32> {
569        self.frame_number
570    }
571
572    pub fn uuid(&self) -> Option<&String> {
573        self.uuid.as_ref()
574    }
575
576    pub fn image_name(&self) -> Option<&str> {
577        self.image_name.as_deref()
578    }
579
580    pub fn image_url(&self) -> Option<&str> {
581        self.image_url.as_deref()
582    }
583
584    pub fn width(&self) -> Option<u32> {
585        self.width
586    }
587
588    pub fn height(&self) -> Option<u32> {
589        self.height
590    }
591
592    pub fn date(&self) -> Option<DateTime<Utc>> {
593        self.date
594    }
595
596    pub fn source(&self) -> Option<&String> {
597        self.source.as_ref()
598    }
599
600    pub fn location(&self) -> Option<&Location> {
601        self.location.as_ref()
602    }
603
604    pub fn files(&self) -> &[SampleFile] {
605        &self.files
606    }
607
608    pub fn annotations(&self) -> &[Annotation] {
609        &self.annotations
610    }
611
612    pub fn with_annotations(mut self, annotations: Vec<Annotation>) -> Self {
613        self.annotations = annotations;
614        self
615    }
616
617    pub fn with_frame_number(mut self, frame_number: Option<u32>) -> Self {
618        self.frame_number = frame_number;
619        self
620    }
621
622    pub async fn download(
623        &self,
624        client: &Client,
625        file_type: FileType,
626    ) -> Result<Option<Vec<u8>>, Error> {
627        let url = resolve_file_url(&file_type, self.image_url.as_deref(), &self.files);
628
629        Ok(match url {
630            Some(url) => Some(client.download(url).await?),
631            None => None,
632        })
633    }
634}
635
636/// A file associated with a sample (e.g., LiDAR point cloud, radar data).
637///
638/// For samples retrieved from the server, this contains the file type and URL.
639/// For samples being populated to the server, this can be a type and filename.
640#[derive(Serialize, Deserialize, Clone, Debug)]
641pub struct SampleFile {
642    r#type: String,
643    #[serde(skip_serializing_if = "Option::is_none")]
644    url: Option<String>,
645    #[serde(skip_serializing_if = "Option::is_none")]
646    filename: Option<String>,
647}
648
649impl SampleFile {
650    /// Creates a new sample file with type and URL (for downloaded samples).
651    pub fn with_url(file_type: String, url: String) -> Self {
652        Self {
653            r#type: file_type,
654            url: Some(url),
655            filename: None,
656        }
657    }
658
659    /// Creates a new sample file with type and filename (for populate API).
660    pub fn with_filename(file_type: String, filename: String) -> Self {
661        Self {
662            r#type: file_type,
663            url: None,
664            filename: Some(filename),
665        }
666    }
667
668    pub fn file_type(&self) -> &str {
669        &self.r#type
670    }
671
672    pub fn url(&self) -> Option<&str> {
673        self.url.as_deref()
674    }
675
676    pub fn filename(&self) -> Option<&str> {
677        self.filename.as_deref()
678    }
679}
680
681/// Location and pose information for a sample.
682///
683/// Contains GPS coordinates and IMU orientation data describing where and how
684/// the camera was positioned when capturing the sample.
685#[derive(Serialize, Deserialize, Clone, Debug)]
686pub struct Location {
687    #[serde(skip_serializing_if = "Option::is_none")]
688    pub gps: Option<GpsData>,
689    #[serde(skip_serializing_if = "Option::is_none")]
690    pub imu: Option<ImuData>,
691}
692
693/// GPS location data (latitude and longitude).
694#[derive(Serialize, Deserialize, Clone, Debug)]
695pub struct GpsData {
696    pub lat: f64,
697    pub lon: f64,
698}
699
700impl GpsData {
701    /// Validate GPS coordinates are within valid ranges.
702    ///
703    /// Checks if latitude and longitude values are within valid geographic
704    /// ranges. Helps catch data corruption or API issues early.
705    ///
706    /// # Returns
707    /// `Ok(())` if valid, `Err(String)` with descriptive error message
708    /// otherwise
709    ///
710    /// # Valid Ranges
711    /// - Latitude: -90.0 to +90.0 degrees
712    /// - Longitude: -180.0 to +180.0 degrees
713    ///
714    /// # Examples
715    /// ```
716    /// use edgefirst_client::GpsData;
717    ///
718    /// let gps = GpsData {
719    ///     lat: 37.7749,
720    ///     lon: -122.4194,
721    /// };
722    /// assert!(gps.validate().is_ok());
723    ///
724    /// let bad_gps = GpsData {
725    ///     lat: 100.0,
726    ///     lon: 0.0,
727    /// };
728    /// assert!(bad_gps.validate().is_err());
729    /// ```
730    pub fn validate(&self) -> Result<(), String> {
731        validate_gps_coordinates(self.lat, self.lon)
732    }
733}
734
735/// IMU orientation data (roll, pitch, yaw in degrees).
736#[derive(Serialize, Deserialize, Clone, Debug)]
737pub struct ImuData {
738    pub roll: f64,
739    pub pitch: f64,
740    pub yaw: f64,
741}
742
743impl ImuData {
744    /// Validate IMU orientation angles are within valid ranges.
745    ///
746    /// Checks if roll, pitch, and yaw values are finite and within reasonable
747    /// ranges. Helps catch data corruption or sensor errors early.
748    ///
749    /// # Returns
750    /// `Ok(())` if valid, `Err(String)` with descriptive error message
751    /// otherwise
752    ///
753    /// # Valid Ranges
754    /// - Roll: -180.0 to +180.0 degrees
755    /// - Pitch: -90.0 to +90.0 degrees (typical gimbal lock range)
756    /// - Yaw: -180.0 to +180.0 degrees (or 0 to 360, normalized)
757    ///
758    /// # Examples
759    /// ```
760    /// use edgefirst_client::ImuData;
761    ///
762    /// let imu = ImuData {
763    ///     roll: 10.0,
764    ///     pitch: 5.0,
765    ///     yaw: 90.0,
766    /// };
767    /// assert!(imu.validate().is_ok());
768    ///
769    /// let bad_imu = ImuData {
770    ///     roll: 200.0,
771    ///     pitch: 0.0,
772    ///     yaw: 0.0,
773    /// };
774    /// assert!(bad_imu.validate().is_err());
775    /// ```
776    pub fn validate(&self) -> Result<(), String> {
777        validate_imu_orientation(self.roll, self.pitch, self.yaw)
778    }
779}
780
781#[allow(dead_code)]
782pub trait TypeName {
783    fn type_name() -> String;
784}
785
786#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
787pub struct Box3d {
788    x: f32,
789    y: f32,
790    z: f32,
791    w: f32,
792    h: f32,
793    l: f32,
794}
795
796impl TypeName for Box3d {
797    fn type_name() -> String {
798        "box3d".to_owned()
799    }
800}
801
802impl Box3d {
803    pub fn new(cx: f32, cy: f32, cz: f32, width: f32, height: f32, length: f32) -> Self {
804        Self {
805            x: cx,
806            y: cy,
807            z: cz,
808            w: width,
809            h: height,
810            l: length,
811        }
812    }
813
814    pub fn width(&self) -> f32 {
815        self.w
816    }
817
818    pub fn height(&self) -> f32 {
819        self.h
820    }
821
822    pub fn length(&self) -> f32 {
823        self.l
824    }
825
826    pub fn cx(&self) -> f32 {
827        self.x
828    }
829
830    pub fn cy(&self) -> f32 {
831        self.y
832    }
833
834    pub fn cz(&self) -> f32 {
835        self.z
836    }
837
838    pub fn left(&self) -> f32 {
839        self.x - self.w / 2.0
840    }
841
842    pub fn top(&self) -> f32 {
843        self.y - self.h / 2.0
844    }
845
846    pub fn front(&self) -> f32 {
847        self.z - self.l / 2.0
848    }
849}
850
851#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
852pub struct Box2d {
853    h: f32,
854    w: f32,
855    x: f32,
856    y: f32,
857}
858
859impl TypeName for Box2d {
860    fn type_name() -> String {
861        "box2d".to_owned()
862    }
863}
864
865impl Box2d {
866    pub fn new(left: f32, top: f32, width: f32, height: f32) -> Self {
867        Self {
868            x: left,
869            y: top,
870            w: width,
871            h: height,
872        }
873    }
874
875    pub fn width(&self) -> f32 {
876        self.w
877    }
878
879    pub fn height(&self) -> f32 {
880        self.h
881    }
882
883    pub fn left(&self) -> f32 {
884        self.x
885    }
886
887    pub fn top(&self) -> f32 {
888        self.y
889    }
890
891    pub fn cx(&self) -> f32 {
892        self.x + self.w / 2.0
893    }
894
895    pub fn cy(&self) -> f32 {
896        self.y + self.h / 2.0
897    }
898}
899
900#[derive(Clone, Debug, PartialEq)]
901pub struct Mask {
902    pub polygon: Vec<Vec<(f32, f32)>>,
903}
904
905impl TypeName for Mask {
906    fn type_name() -> String {
907        "mask".to_owned()
908    }
909}
910
911impl Mask {
912    pub fn new(polygon: Vec<Vec<(f32, f32)>>) -> Self {
913        Self { polygon }
914    }
915}
916
917impl serde::Serialize for Mask {
918    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
919    where
920        S: serde::Serializer,
921    {
922        serde::Serialize::serialize(&self.polygon, serializer)
923    }
924}
925
926impl<'de> serde::Deserialize<'de> for Mask {
927    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
928    where
929        D: serde::Deserializer<'de>,
930    {
931        // First, deserialize to a raw JSON value to handle various formats
932        let value = serde_json::Value::deserialize(deserializer)?;
933
934        // Try to extract polygon data from various formats
935        let polygon_value = if let Some(obj) = value.as_object() {
936            // Format: {"polygon": [...]}
937            obj.get("polygon")
938                .cloned()
939                .unwrap_or(serde_json::Value::Null)
940        } else {
941            // Format: [[...]] (direct array)
942            value
943        };
944
945        // Parse the polygon array, filtering out null/invalid values
946        let polygon = parse_polygon_value(&polygon_value);
947
948        Ok(Self { polygon })
949    }
950}
951
952/// Parse polygon value from JSON, handling malformed data gracefully.
953///
954/// Handles multiple formats:
955/// - `[[[x,y],[x,y],...]]` - 3D array with point pairs (correct format)
956/// - `[[x,y,x,y,...]]` - 2D array with flat coords (COCO format, legacy)
957/// - `[[null,null,...]]` - corrupted data (returns empty)
958/// - `null` - missing data (returns empty)
959fn parse_polygon_value(value: &serde_json::Value) -> Vec<Vec<(f32, f32)>> {
960    let Some(outer_array) = value.as_array() else {
961        return vec![];
962    };
963
964    let mut result = Vec::new();
965
966    for ring in outer_array {
967        let Some(ring_array) = ring.as_array() else {
968            continue;
969        };
970
971        // Check if this is a 3D array (point pairs) or 2D array (flat coords)
972        let is_3d = ring_array
973            .first()
974            .map(|first| first.is_array())
975            .unwrap_or(false);
976
977        let points: Vec<(f32, f32)> = if is_3d {
978            // 3D format: [[x1,y1], [x2,y2], ...]
979            ring_array
980                .iter()
981                .filter_map(|point| {
982                    let arr = point.as_array()?;
983                    if arr.len() >= 2 {
984                        let x = arr[0].as_f64()? as f32;
985                        let y = arr[1].as_f64()? as f32;
986                        if x.is_finite() && y.is_finite() {
987                            Some((x, y))
988                        } else {
989                            None
990                        }
991                    } else {
992                        None
993                    }
994                })
995                .collect()
996        } else {
997            // 2D format (flat): [x1, y1, x2, y2, ...]
998            ring_array
999                .chunks(2)
1000                .filter_map(|chunk| {
1001                    if chunk.len() >= 2 {
1002                        let x = chunk[0].as_f64()? as f32;
1003                        let y = chunk[1].as_f64()? as f32;
1004                        if x.is_finite() && y.is_finite() {
1005                            Some((x, y))
1006                        } else {
1007                            None
1008                        }
1009                    } else {
1010                        None
1011                    }
1012                })
1013                .collect()
1014        };
1015
1016        // Only add rings with at least 3 valid points
1017        if points.len() >= 3 {
1018            result.push(points);
1019        }
1020    }
1021
1022    result
1023}
1024
1025/// Helper struct for deserializing annotations from the server.
1026///
1027/// The server sends bounding box coordinates as flat fields (x, y, w, h) at the
1028/// annotation level, but we want to store them as a nested Box2d struct.
1029#[derive(Deserialize)]
1030struct AnnotationRaw {
1031    #[serde(default)]
1032    sample_id: Option<SampleID>,
1033    #[serde(default)]
1034    name: Option<String>,
1035    #[serde(default)]
1036    sequence_name: Option<String>,
1037    #[serde(default)]
1038    frame_number: Option<u32>,
1039    #[serde(rename = "group_name", default)]
1040    group: Option<String>,
1041    #[serde(rename = "object_reference", alias = "object_id", default)]
1042    object_id: Option<String>,
1043    #[serde(default)]
1044    label_name: Option<String>,
1045    #[serde(default)]
1046    label_index: Option<u64>,
1047    // Nested box2d format (if server sends it this way)
1048    #[serde(default)]
1049    box2d: Option<Box2d>,
1050    #[serde(default)]
1051    box3d: Option<Box3d>,
1052    #[serde(default)]
1053    mask: Option<Mask>,
1054    // Flat box2d fields from server (x, y, w, h at annotation level)
1055    #[serde(default)]
1056    x: Option<f64>,
1057    #[serde(default)]
1058    y: Option<f64>,
1059    #[serde(default)]
1060    w: Option<f64>,
1061    #[serde(default)]
1062    h: Option<f64>,
1063}
1064
1065#[derive(Serialize, Clone, Debug)]
1066pub struct Annotation {
1067    #[serde(skip_serializing_if = "Option::is_none")]
1068    sample_id: Option<SampleID>,
1069    #[serde(skip_serializing_if = "Option::is_none")]
1070    name: Option<String>,
1071    #[serde(skip_serializing_if = "Option::is_none")]
1072    sequence_name: Option<String>,
1073    #[serde(skip_serializing_if = "Option::is_none")]
1074    frame_number: Option<u32>,
1075    /// Dataset split (train, val, test) - matches `Sample.group`.
1076    /// JSON field name: "group_name" (Studio API uses this name for both upload
1077    /// and download).
1078    #[serde(rename = "group_name", skip_serializing_if = "Option::is_none")]
1079    group: Option<String>,
1080    /// Object tracking identifier across frames.
1081    /// JSON field name: "object_reference" for upload (populate), "object_id"
1082    /// for download (list).
1083    #[serde(
1084        rename = "object_reference",
1085        alias = "object_id",
1086        skip_serializing_if = "Option::is_none"
1087    )]
1088    object_id: Option<String>,
1089    #[serde(skip_serializing_if = "Option::is_none")]
1090    label_name: Option<String>,
1091    #[serde(skip_serializing_if = "Option::is_none")]
1092    label_index: Option<u64>,
1093    #[serde(skip_serializing_if = "Option::is_none")]
1094    box2d: Option<Box2d>,
1095    #[serde(skip_serializing_if = "Option::is_none")]
1096    box3d: Option<Box3d>,
1097    #[serde(skip_serializing_if = "Option::is_none")]
1098    mask: Option<Mask>,
1099}
1100
1101impl<'de> serde::Deserialize<'de> for Annotation {
1102    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1103    where
1104        D: serde::Deserializer<'de>,
1105    {
1106        // Deserialize to AnnotationRaw first to handle server format differences
1107        let raw: AnnotationRaw = serde::Deserialize::deserialize(deserializer)?;
1108
1109        // Prefer nested box2d if present, otherwise construct from flat x/y/w/h
1110        let box2d = raw.box2d.or_else(|| match (raw.x, raw.y, raw.w, raw.h) {
1111            (Some(x), Some(y), Some(w), Some(h)) if w > 0.0 && h > 0.0 => {
1112                Some(Box2d::new(x as f32, y as f32, w as f32, h as f32))
1113            }
1114            _ => None,
1115        });
1116
1117        Ok(Annotation {
1118            sample_id: raw.sample_id,
1119            name: raw.name,
1120            sequence_name: raw.sequence_name,
1121            frame_number: raw.frame_number,
1122            group: raw.group,
1123            object_id: raw.object_id,
1124            label_name: raw.label_name,
1125            label_index: raw.label_index,
1126            box2d,
1127            box3d: raw.box3d,
1128            mask: raw.mask,
1129        })
1130    }
1131}
1132
1133impl Default for Annotation {
1134    fn default() -> Self {
1135        Self::new()
1136    }
1137}
1138
1139impl Annotation {
1140    pub fn new() -> Self {
1141        Self {
1142            sample_id: None,
1143            name: None,
1144            sequence_name: None,
1145            frame_number: None,
1146            group: None,
1147            object_id: None,
1148            label_name: None,
1149            label_index: None,
1150            box2d: None,
1151            box3d: None,
1152            mask: None,
1153        }
1154    }
1155
1156    pub fn set_sample_id(&mut self, sample_id: Option<SampleID>) {
1157        self.sample_id = sample_id;
1158    }
1159
1160    pub fn sample_id(&self) -> Option<SampleID> {
1161        self.sample_id
1162    }
1163
1164    pub fn set_name(&mut self, name: Option<String>) {
1165        self.name = name;
1166    }
1167
1168    pub fn name(&self) -> Option<&String> {
1169        self.name.as_ref()
1170    }
1171
1172    pub fn set_sequence_name(&mut self, sequence_name: Option<String>) {
1173        self.sequence_name = sequence_name;
1174    }
1175
1176    pub fn sequence_name(&self) -> Option<&String> {
1177        self.sequence_name.as_ref()
1178    }
1179
1180    pub fn set_frame_number(&mut self, frame_number: Option<u32>) {
1181        self.frame_number = frame_number;
1182    }
1183
1184    pub fn frame_number(&self) -> Option<u32> {
1185        self.frame_number
1186    }
1187
1188    pub fn set_group(&mut self, group: Option<String>) {
1189        self.group = group;
1190    }
1191
1192    pub fn group(&self) -> Option<&String> {
1193        self.group.as_ref()
1194    }
1195
1196    pub fn object_id(&self) -> Option<&String> {
1197        self.object_id.as_ref()
1198    }
1199
1200    pub fn set_object_id(&mut self, object_id: Option<String>) {
1201        self.object_id = object_id;
1202    }
1203
1204    #[deprecated(note = "renamed to object_id")]
1205    pub fn object_reference(&self) -> Option<&String> {
1206        self.object_id()
1207    }
1208
1209    #[deprecated(note = "renamed to set_object_id")]
1210    pub fn set_object_reference(&mut self, object_reference: Option<String>) {
1211        self.set_object_id(object_reference);
1212    }
1213
1214    pub fn label(&self) -> Option<&String> {
1215        self.label_name.as_ref()
1216    }
1217
1218    pub fn set_label(&mut self, label_name: Option<String>) {
1219        self.label_name = label_name;
1220    }
1221
1222    pub fn label_index(&self) -> Option<u64> {
1223        self.label_index
1224    }
1225
1226    pub fn set_label_index(&mut self, label_index: Option<u64>) {
1227        self.label_index = label_index;
1228    }
1229
1230    pub fn box2d(&self) -> Option<&Box2d> {
1231        self.box2d.as_ref()
1232    }
1233
1234    pub fn set_box2d(&mut self, box2d: Option<Box2d>) {
1235        self.box2d = box2d;
1236    }
1237
1238    pub fn box3d(&self) -> Option<&Box3d> {
1239        self.box3d.as_ref()
1240    }
1241
1242    pub fn set_box3d(&mut self, box3d: Option<Box3d>) {
1243        self.box3d = box3d;
1244    }
1245
1246    pub fn mask(&self) -> Option<&Mask> {
1247        self.mask.as_ref()
1248    }
1249
1250    pub fn set_mask(&mut self, mask: Option<Mask>) {
1251        self.mask = mask;
1252    }
1253}
1254
1255#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
1256pub struct Label {
1257    id: u64,
1258    dataset_id: DatasetID,
1259    index: u64,
1260    name: String,
1261}
1262
1263impl Label {
1264    pub fn id(&self) -> u64 {
1265        self.id
1266    }
1267
1268    pub fn dataset_id(&self) -> DatasetID {
1269        self.dataset_id
1270    }
1271
1272    pub fn index(&self) -> u64 {
1273        self.index
1274    }
1275
1276    pub fn name(&self) -> &str {
1277        &self.name
1278    }
1279
1280    pub async fn remove(&self, client: &Client) -> Result<(), Error> {
1281        client.remove_label(self.id()).await
1282    }
1283
1284    pub async fn set_name(&mut self, client: &Client, name: &str) -> Result<(), Error> {
1285        self.name = name.to_string();
1286        client.update_label(self).await
1287    }
1288
1289    pub async fn set_index(&mut self, client: &Client, index: u64) -> Result<(), Error> {
1290        self.index = index;
1291        client.update_label(self).await
1292    }
1293}
1294
1295impl Display for Label {
1296    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1297        write!(f, "{}", self.name())
1298    }
1299}
1300
1301#[derive(Serialize, Clone, Debug)]
1302pub struct NewLabelObject {
1303    pub name: String,
1304}
1305
1306#[derive(Serialize, Clone, Debug)]
1307pub struct NewLabel {
1308    pub dataset_id: DatasetID,
1309    pub labels: Vec<NewLabelObject>,
1310}
1311
1312/// A dataset group for organizing samples into logical subsets.
1313///
1314/// Groups are used to partition samples within a dataset for different purposes
1315/// such as training, validation, and testing. Each sample can belong to at most
1316/// one group at a time.
1317///
1318/// # Common Group Names
1319///
1320/// - `"train"` - Training data for model fitting
1321/// - `"val"` - Validation data for hyperparameter tuning
1322/// - `"test"` - Test data for final evaluation
1323///
1324/// # Examples
1325///
1326/// ```rust,no_run
1327/// use edgefirst_client::{Client, DatasetID};
1328///
1329/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
1330/// let client = Client::new()?.with_token_path(None)?;
1331/// let dataset_id: DatasetID = "ds-123".try_into()?;
1332///
1333/// // List all groups in the dataset
1334/// let groups = client.groups(dataset_id).await?;
1335/// for group in groups {
1336///     println!("Group [{}]: {}", group.id, group.name);
1337/// }
1338/// # Ok(())
1339/// # }
1340/// ```
1341#[derive(Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
1342pub struct Group {
1343    /// The unique numeric identifier for this group.
1344    ///
1345    /// Group IDs are assigned by the server and are unique within an
1346    /// organization.
1347    pub id: u64,
1348
1349    /// The human-readable name of the group.
1350    ///
1351    /// Common names include "train", "val", "test", but any string is valid.
1352    pub name: String,
1353}
1354
1355#[cfg(feature = "polars")]
1356fn extract_annotation_name(ann: &Annotation) -> Option<(String, Option<u32>)> {
1357    use std::path::Path;
1358
1359    let name = ann.name.as_ref()?;
1360    let name = Path::new(name).file_stem()?.to_str()?;
1361
1362    // For sequences, return base name and frame number
1363    // For non-sequences, return name and None
1364    match &ann.sequence_name {
1365        Some(sequence) => Some((sequence.clone(), ann.frame_number)),
1366        None => Some((name.to_string(), None)),
1367    }
1368}
1369
1370#[cfg(feature = "polars")]
1371fn convert_mask_to_series(mask: &Mask) -> Series {
1372    use polars::series::Series;
1373
1374    let list = flatten_polygon_coordinates(&mask.polygon);
1375    Series::new("mask".into(), list)
1376}
1377
1378/// Create a DataFrame from a slice of annotations (2025.01 schema).
1379///
1380/// **DEPRECATED**: Use [`samples_dataframe()`] instead for full 2025.10 schema
1381/// support including optional metadata columns (size, location, pose,
1382/// degradation).
1383///
1384/// This function generates a DataFrame with the original 9-column schema.
1385/// It remains functional for backward compatibility but does not include
1386/// new optional columns added in version 2025.10.
1387///
1388/// # Schema (2025.01)
1389///
1390/// - `name`: Sample name (String)
1391/// - `frame`: Frame number (UInt64)
1392/// - `object_id`: Object tracking ID (String)
1393/// - `label`: Object label (Categorical)
1394/// - `label_index`: Label index (UInt64)
1395/// - `group`: Dataset group (Categorical)
1396/// - `mask`: Segmentation mask (List<Float32>)
1397/// - `box2d`: 2D bounding box [cx, cy, w, h] (Array<Float32, 4>)
1398/// - `box3d`: 3D bounding box [x, y, z, w, h, l] (Array<Float32, 6>)
1399///
1400/// # Migration
1401///
1402/// ```rust,no_run
1403/// use edgefirst_client::{Client, samples_dataframe};
1404///
1405/// # async fn example() -> Result<(), edgefirst_client::Error> {
1406/// # let client = Client::new()?;
1407/// # let dataset_id = 1.into();
1408/// # let annotation_set_id = 1.into();
1409/// # let groups = vec![];
1410/// # let types = vec![];
1411/// // OLD (deprecated):
1412/// let annotations = client
1413///     .annotations(annotation_set_id, &groups, &types, None)
1414///     .await?;
1415/// let df = edgefirst_client::annotations_dataframe(&annotations)?;
1416///
1417/// // NEW (recommended):
1418/// let samples = client
1419///     .samples(
1420///         dataset_id,
1421///         Some(annotation_set_id),
1422///         &types,
1423///         &groups,
1424///         &[],
1425///         None,
1426///     )
1427///     .await?;
1428/// let df = samples_dataframe(&samples)?;
1429/// # Ok(())
1430/// # }
1431/// ```
1432#[deprecated(
1433    since = "0.8.0",
1434    note = "Use `samples_dataframe()` for complete 2025.10 schema support"
1435)]
1436#[cfg(feature = "polars")]
1437pub fn annotations_dataframe(annotations: &[Annotation]) -> Result<DataFrame, Error> {
1438    use itertools::Itertools;
1439
1440    let (names, frames, objects, labels, label_indices, groups, masks, boxes2d, boxes3d) =
1441        annotations
1442            .iter()
1443            .filter_map(|ann| {
1444                let (name, frame) = extract_annotation_name(ann)?;
1445
1446                let masks = ann.mask.as_ref().map(convert_mask_to_series);
1447
1448                let box2d = ann.box2d.as_ref().map(|box2d| {
1449                    Series::new(
1450                        "box2d".into(),
1451                        [box2d.cx(), box2d.cy(), box2d.width(), box2d.height()],
1452                    )
1453                });
1454
1455                let box3d = ann.box3d.as_ref().map(|box3d| {
1456                    Series::new(
1457                        "box3d".into(),
1458                        [box3d.x, box3d.y, box3d.z, box3d.w, box3d.h, box3d.l],
1459                    )
1460                });
1461
1462                Some((
1463                    name,
1464                    frame,
1465                    ann.object_id().cloned(),
1466                    ann.label_name.clone(),
1467                    ann.label_index,
1468                    ann.group.clone(),
1469                    masks,
1470                    box2d,
1471                    box3d,
1472                ))
1473            })
1474            .multiunzip::<(
1475                Vec<_>, // names
1476                Vec<_>, // frames
1477                Vec<_>, // objects
1478                Vec<_>, // labels
1479                Vec<_>, // label_indices
1480                Vec<_>, // groups
1481                Vec<_>, // masks
1482                Vec<_>, // boxes2d
1483                Vec<_>, // boxes3d
1484            )>();
1485    let names = Series::new("name".into(), names).into();
1486    let frames = Series::new("frame".into(), frames).into();
1487    let objects = Series::new("object_id".into(), objects).into();
1488    let labels = Series::new("label".into(), labels)
1489        .cast(&DataType::Categorical(
1490            Categories::new("labels".into(), "labels".into(), CategoricalPhysical::U8),
1491            Arc::new(CategoricalMapping::new(u8::MAX as usize)),
1492        ))?
1493        .into();
1494    let label_indices = Series::new("label_index".into(), label_indices).into();
1495    let groups = Series::new("group".into(), groups)
1496        .cast(&DataType::Categorical(
1497            Categories::new("groups".into(), "groups".into(), CategoricalPhysical::U8),
1498            Arc::new(CategoricalMapping::new(u8::MAX as usize)),
1499        ))?
1500        .into();
1501    let masks = Series::new("mask".into(), masks)
1502        .cast(&DataType::List(Box::new(DataType::Float32)))?
1503        .into();
1504    let boxes2d = Series::new("box2d".into(), boxes2d)
1505        .cast(&DataType::Array(Box::new(DataType::Float32), 4))?
1506        .into();
1507    let boxes3d = Series::new("box3d".into(), boxes3d)
1508        .cast(&DataType::Array(Box::new(DataType::Float32), 6))?
1509        .into();
1510
1511    Ok(DataFrame::new(vec![
1512        names,
1513        frames,
1514        objects,
1515        labels,
1516        label_indices,
1517        groups,
1518        masks,
1519        boxes2d,
1520        boxes3d,
1521    ])?)
1522}
1523
1524/// Create a DataFrame from a slice of samples with complete 2025.10 schema.
1525///
1526/// This function generates a DataFrame with all 13 columns including optional
1527/// sample metadata (size, location, pose, degradation). Each annotation in
1528/// each sample becomes one row in the DataFrame.
1529///
1530/// # Schema (2025.10)
1531///
1532/// - `name`: Sample name (String)
1533/// - `frame`: Frame number (UInt64)
1534/// - `object_id`: Object tracking ID (String)
1535/// - `label`: Object label (Categorical)
1536/// - `label_index`: Label index (UInt64)
1537/// - `group`: Dataset group (Categorical)
1538/// - `mask`: Segmentation mask (List<Float32>)
1539/// - `box2d`: 2D bounding box [cx, cy, w, h] (Array<Float32, 4>)
1540/// - `box3d`: 3D bounding box [x, y, z, w, h, l] (Array<Float32, 6>)
1541/// - `size`: Image size [width, height] (Array<UInt32, 2>) - OPTIONAL
1542/// - `location`: GPS [lat, lon] (Array<Float32, 2>) - OPTIONAL
1543/// - `pose`: IMU [yaw, pitch, roll] (Array<Float32, 3>) - OPTIONAL
1544/// - `degradation`: Image degradation (String) - OPTIONAL
1545///
1546/// # Example
1547///
1548/// ```rust,no_run
1549/// use edgefirst_client::{Client, samples_dataframe};
1550///
1551/// # async fn example() -> Result<(), edgefirst_client::Error> {
1552/// # let client = Client::new()?;
1553/// # let dataset_id = 1.into();
1554/// # let annotation_set_id = 1.into();
1555/// let samples = client
1556///     .samples(dataset_id, Some(annotation_set_id), &[], &[], &[], None)
1557///     .await?;
1558/// let df = samples_dataframe(&samples)?;
1559/// println!("DataFrame shape: {:?}", df.shape());
1560/// # Ok(())
1561/// # }
1562/// ```
1563#[cfg(feature = "polars")]
1564pub fn samples_dataframe(samples: &[Sample]) -> Result<DataFrame, Error> {
1565    // Flatten samples into annotation rows with sample metadata
1566    let rows: Vec<_> = samples
1567        .iter()
1568        .flat_map(|sample| {
1569            // Extract sample metadata once per sample
1570            let size = match (sample.width, sample.height) {
1571                (Some(w), Some(h)) => Some(vec![w, h]),
1572                _ => None,
1573            };
1574
1575            let location = sample.location.as_ref().and_then(|loc| {
1576                loc.gps
1577                    .as_ref()
1578                    .map(|gps| vec![gps.lat as f32, gps.lon as f32])
1579            });
1580
1581            let pose = sample.location.as_ref().and_then(|loc| {
1582                loc.imu
1583                    .as_ref()
1584                    .map(|imu| vec![imu.yaw as f32, imu.pitch as f32, imu.roll as f32])
1585            });
1586
1587            let degradation = sample.degradation.clone();
1588
1589            // If no annotations, create one row for the sample (null annotations)
1590            if sample.annotations.is_empty() {
1591                let (name, frame) = match extract_annotation_name_from_sample(sample) {
1592                    Some(nf) => nf,
1593                    None => return vec![],
1594                };
1595
1596                return vec![(
1597                    name,
1598                    frame,
1599                    None,                 // object_id placeholder for now
1600                    None,                 // label
1601                    None,                 // label_index
1602                    sample.group.clone(), // group
1603                    None,                 // mask
1604                    None,                 // box2d
1605                    None,                 // box3d
1606                    size.clone(),
1607                    location.clone(),
1608                    pose.clone(),
1609                    degradation.clone(),
1610                )];
1611            }
1612
1613            // Create one row per annotation
1614            sample
1615                .annotations
1616                .iter()
1617                .filter_map(|ann| {
1618                    let (name, frame) = extract_annotation_name(ann)?;
1619
1620                    let mask = ann.mask.as_ref().map(convert_mask_to_series);
1621
1622                    let box2d = ann.box2d.as_ref().map(|box2d| {
1623                        Series::new(
1624                            "box2d".into(),
1625                            [box2d.cx(), box2d.cy(), box2d.width(), box2d.height()],
1626                        )
1627                    });
1628
1629                    let box3d = ann.box3d.as_ref().map(|box3d| {
1630                        Series::new(
1631                            "box3d".into(),
1632                            [box3d.x, box3d.y, box3d.z, box3d.w, box3d.h, box3d.l],
1633                        )
1634                    });
1635
1636                    Some((
1637                        name,
1638                        frame,
1639                        ann.object_id().cloned(),
1640                        ann.label_name.clone(),
1641                        ann.label_index,
1642                        sample.group.clone(), // Group is on Sample, not Annotation
1643                        mask,
1644                        box2d,
1645                        box3d,
1646                        size.clone(),
1647                        location.clone(),
1648                        pose.clone(),
1649                        degradation.clone(),
1650                    ))
1651                })
1652                .collect::<Vec<_>>()
1653        })
1654        .collect();
1655
1656    // Manually unzip into separate vectors
1657    let mut names = Vec::new();
1658    let mut frames = Vec::new();
1659    let mut objects = Vec::new();
1660    let mut labels = Vec::new();
1661    let mut label_indices = Vec::new();
1662    let mut groups = Vec::new();
1663    let mut masks = Vec::new();
1664    let mut boxes2d = Vec::new();
1665    let mut boxes3d = Vec::new();
1666    let mut sizes = Vec::new();
1667    let mut locations = Vec::new();
1668    let mut poses = Vec::new();
1669    let mut degradations = Vec::new();
1670
1671    for (
1672        name,
1673        frame,
1674        object,
1675        label,
1676        label_index,
1677        group,
1678        mask,
1679        box2d,
1680        box3d,
1681        size,
1682        location,
1683        pose,
1684        degradation,
1685    ) in rows
1686    {
1687        names.push(name);
1688        frames.push(frame);
1689        objects.push(object);
1690        labels.push(label);
1691        label_indices.push(label_index);
1692        groups.push(group);
1693        masks.push(mask);
1694        boxes2d.push(box2d);
1695        boxes3d.push(box3d);
1696        sizes.push(size);
1697        locations.push(location);
1698        poses.push(pose);
1699        degradations.push(degradation);
1700    }
1701
1702    // Build DataFrame columns
1703    let names = Series::new("name".into(), names).into();
1704    let frames = Series::new("frame".into(), frames).into();
1705    let objects = Series::new("object_id".into(), objects).into();
1706
1707    // Column name: "label" (NOT "label_name")
1708    let labels = Series::new("label".into(), labels)
1709        .cast(&DataType::Categorical(
1710            Categories::new("labels".into(), "labels".into(), CategoricalPhysical::U8),
1711            Arc::new(CategoricalMapping::new(u8::MAX as usize)),
1712        ))?
1713        .into();
1714
1715    let label_indices = Series::new("label_index".into(), label_indices).into();
1716
1717    // Column name: "group" (NOT "group_name")
1718    let groups = Series::new("group".into(), groups)
1719        .cast(&DataType::Categorical(
1720            Categories::new("groups".into(), "groups".into(), CategoricalPhysical::U8),
1721            Arc::new(CategoricalMapping::new(u8::MAX as usize)),
1722        ))?
1723        .into();
1724
1725    let masks = Series::new("mask".into(), masks)
1726        .cast(&DataType::List(Box::new(DataType::Float32)))?
1727        .into();
1728    let boxes2d = Series::new("box2d".into(), boxes2d)
1729        .cast(&DataType::Array(Box::new(DataType::Float32), 4))?
1730        .into();
1731    let boxes3d = Series::new("box3d".into(), boxes3d)
1732        .cast(&DataType::Array(Box::new(DataType::Float32), 6))?
1733        .into();
1734
1735    // NEW: Optional columns (2025.10)
1736    // Convert Vec<Option<Vec<T>>> to Vec<Option<Series>> for array columns
1737    let size_series: Vec<Option<Series>> = sizes
1738        .into_iter()
1739        .map(|opt_vec| opt_vec.map(|vec| Series::new("size".into(), vec)))
1740        .collect();
1741    let sizes = Series::new("size".into(), size_series)
1742        .cast(&DataType::Array(Box::new(DataType::UInt32), 2))?
1743        .into();
1744
1745    let location_series: Vec<Option<Series>> = locations
1746        .into_iter()
1747        .map(|opt_vec| opt_vec.map(|vec| Series::new("location".into(), vec)))
1748        .collect();
1749    let locations = Series::new("location".into(), location_series)
1750        .cast(&DataType::Array(Box::new(DataType::Float32), 2))?
1751        .into();
1752
1753    let pose_series: Vec<Option<Series>> = poses
1754        .into_iter()
1755        .map(|opt_vec| opt_vec.map(|vec| Series::new("pose".into(), vec)))
1756        .collect();
1757    let poses = Series::new("pose".into(), pose_series)
1758        .cast(&DataType::Array(Box::new(DataType::Float32), 3))?
1759        .into();
1760
1761    let degradations = Series::new("degradation".into(), degradations).into();
1762
1763    Ok(DataFrame::new(vec![
1764        names,
1765        frames,
1766        objects,
1767        labels,
1768        label_indices,
1769        groups,
1770        masks,
1771        boxes2d,
1772        boxes3d,
1773        sizes,
1774        locations,
1775        poses,
1776        degradations,
1777    ])?)
1778}
1779
1780// Helper: Extract name/frame from Sample (for samples with no annotations)
1781#[cfg(feature = "polars")]
1782fn extract_annotation_name_from_sample(sample: &Sample) -> Option<(String, Option<u32>)> {
1783    use std::path::Path;
1784
1785    let name = sample.image_name.as_ref()?;
1786    let name = Path::new(name).file_stem()?.to_str()?;
1787
1788    // For sequences, return base name and frame number
1789    // For non-sequences, return name and None
1790    match &sample.sequence_name {
1791        Some(sequence) => Some((sequence.clone(), sample.frame_number)),
1792        None => Some((name.to_string(), None)),
1793    }
1794}
1795
1796// ============================================================================
1797// PURE FUNCTIONS FOR TESTABLE CORE LOGIC
1798// ============================================================================
1799
1800/// Extract sample name from image filename by:
1801/// 1. Removing file extension (everything after last dot)
1802/// 2. Removing .camera suffix if present
1803///
1804/// # Examples
1805/// - "scene_001.camera.jpg" → "scene_001"
1806/// - "image.jpg" → "image"
1807/// - ".jpg" → ".jpg" (preserves filenames starting with dot)
1808fn extract_sample_name(image_name: &str) -> String {
1809    // Step 1: Remove file extension (but preserve filenames starting with dot)
1810    let name = image_name
1811        .rsplit_once('.')
1812        .and_then(|(name, _)| {
1813            // Only remove extension if the name part is non-empty (handles ".jpg" case)
1814            if name.is_empty() {
1815                None
1816            } else {
1817                Some(name.to_string())
1818            }
1819        })
1820        .unwrap_or_else(|| image_name.to_string());
1821
1822    // Step 2: Remove .camera suffix if present
1823    name.rsplit_once(".camera")
1824        .and_then(|(name, _)| {
1825            // Only remove .camera if the name part is non-empty
1826            if name.is_empty() {
1827                None
1828            } else {
1829                Some(name.to_string())
1830            }
1831        })
1832        .unwrap_or_else(|| name.clone())
1833}
1834
1835/// Resolve file URL for a given file type from sample data.
1836///
1837/// Pure function that extracts the URL resolution logic from
1838/// `Sample::download()`. Returns `Some(url)` if the file exists, `None`
1839/// otherwise.
1840///
1841/// # Examples
1842/// - Image: Uses `image_url` field
1843/// - Other files: Searches `files` array by type matching
1844///
1845/// # Arguments
1846/// * `file_type` - The type of file to resolve (e.g., "image", "lidar.pcd")
1847/// * `image_url` - The sample's image URL (for FileType::Image)
1848/// * `files` - The sample's file list (for other file types)
1849fn resolve_file_url<'a>(
1850    file_type: &FileType,
1851    image_url: Option<&'a str>,
1852    files: &'a [SampleFile],
1853) -> Option<&'a str> {
1854    match file_type {
1855        FileType::Image => image_url,
1856        file => files
1857            .iter()
1858            .find(|f| f.r#type == file.to_string())
1859            .and_then(|f| f.url.as_deref()),
1860    }
1861}
1862
1863// ============================================================================
1864// DESERIALIZATION FORMAT CONVERSION HELPERS
1865// ============================================================================
1866
1867/// Convert files HashMap format to Vec<SampleFile>.
1868///
1869/// Pure function that handles the conversion from the server's populate API
1870/// format (HashMap<String, String>) to the internal Vec<SampleFile>
1871/// representation.
1872///
1873/// # Arguments
1874/// * `map` - HashMap where key is file type (e.g., "lidar.pcd") and value is
1875///   filename
1876fn convert_files_map_to_vec(map: HashMap<String, String>) -> Vec<SampleFile> {
1877    map.into_iter()
1878        .map(|(file_type, filename)| SampleFile::with_filename(file_type, filename))
1879        .collect()
1880}
1881
1882/// Convert annotations grouped format to flat Vec<Annotation>.
1883///
1884/// Pure function that handles the conversion from the server's legacy format
1885/// (HashMap<String, Vec<Annotation>>) to the flat Vec<Annotation>
1886/// representation.
1887///
1888/// # Arguments
1889/// * `map` - HashMap where keys are annotation types ("bbox", "box3d", "mask")
1890fn convert_annotations_map_to_vec(map: HashMap<String, Vec<Annotation>>) -> Vec<Annotation> {
1891    let mut all_annotations = Vec::new();
1892    if let Some(bbox_anns) = map.get("bbox") {
1893        all_annotations.extend(bbox_anns.clone());
1894    }
1895    if let Some(box3d_anns) = map.get("box3d") {
1896        all_annotations.extend(box3d_anns.clone());
1897    }
1898    if let Some(mask_anns) = map.get("mask") {
1899        all_annotations.extend(mask_anns.clone());
1900    }
1901    all_annotations
1902}
1903
1904// ============================================================================
1905// GPS/IMU VALIDATION HELPERS
1906// ============================================================================
1907
1908/// Validate GPS coordinates are within valid ranges.
1909///
1910/// Pure function that checks if latitude and longitude values are within valid
1911/// geographic ranges. Helps catch data corruption or API issues early.
1912///
1913/// # Arguments
1914/// * `lat` - Latitude in degrees
1915/// * `lon` - Longitude in degrees
1916///
1917/// # Returns
1918/// `Ok(())` if valid, `Err(String)` with descriptive error message otherwise
1919///
1920/// # Valid Ranges
1921/// - Latitude: -90.0 to +90.0 degrees
1922/// - Longitude: -180.0 to +180.0 degrees
1923fn validate_gps_coordinates(lat: f64, lon: f64) -> Result<(), String> {
1924    if !lat.is_finite() {
1925        return Err(format!("GPS latitude is not finite: {}", lat));
1926    }
1927    if !lon.is_finite() {
1928        return Err(format!("GPS longitude is not finite: {}", lon));
1929    }
1930    if !(-90.0..=90.0).contains(&lat) {
1931        return Err(format!("GPS latitude out of range [-90, 90]: {}", lat));
1932    }
1933    if !(-180.0..=180.0).contains(&lon) {
1934        return Err(format!("GPS longitude out of range [-180, 180]: {}", lon));
1935    }
1936    Ok(())
1937}
1938
1939/// Validate IMU orientation angles are within valid ranges.
1940///
1941/// Pure function that checks if roll, pitch, and yaw values are finite and
1942/// within reasonable ranges. Helps catch data corruption or sensor errors
1943/// early.
1944///
1945/// # Arguments
1946/// * `roll` - Roll angle in degrees
1947/// * `pitch` - Pitch angle in degrees
1948/// * `yaw` - Yaw angle in degrees
1949///
1950/// # Returns
1951/// `Ok(())` if valid, `Err(String)` with descriptive error message otherwise
1952///
1953/// # Valid Ranges
1954/// - Roll: -180.0 to +180.0 degrees
1955/// - Pitch: -90.0 to +90.0 degrees (typical gimbal lock range)
1956/// - Yaw: -180.0 to +180.0 degrees (or 0 to 360, normalized)
1957fn validate_imu_orientation(roll: f64, pitch: f64, yaw: f64) -> Result<(), String> {
1958    if !roll.is_finite() {
1959        return Err(format!("IMU roll is not finite: {}", roll));
1960    }
1961    if !pitch.is_finite() {
1962        return Err(format!("IMU pitch is not finite: {}", pitch));
1963    }
1964    if !yaw.is_finite() {
1965        return Err(format!("IMU yaw is not finite: {}", yaw));
1966    }
1967    if !(-180.0..=180.0).contains(&roll) {
1968        return Err(format!("IMU roll out of range [-180, 180]: {}", roll));
1969    }
1970    if !(-90.0..=90.0).contains(&pitch) {
1971        return Err(format!("IMU pitch out of range [-90, 90]: {}", pitch));
1972    }
1973    if !(-180.0..=180.0).contains(&yaw) {
1974        return Err(format!("IMU yaw out of range [-180, 180]: {}", yaw));
1975    }
1976    Ok(())
1977}
1978
1979// ============================================================================
1980// MASK POLYGON CONVERSION HELPERS
1981// ============================================================================
1982
1983/// Flatten polygon coordinates into a flat vector of f32 values for Polars
1984/// Series.
1985///
1986/// Converts nested polygon structure into a flat list of coordinates with
1987/// NaN separators between polygons:
1988/// - Input: [[(x1, y1), (x2, y2)], [(x3, y3)]]
1989/// - Output: [x1, y1, x2, y2, NaN, x3, y3]
1990#[cfg(feature = "polars")]
1991fn flatten_polygon_coordinates(polygons: &[Vec<(f32, f32)>]) -> Vec<f32> {
1992    let mut list = Vec::new();
1993
1994    for polygon in polygons {
1995        for &(x, y) in polygon {
1996            list.push(x);
1997            list.push(y);
1998        }
1999        // Separate polygons with NaN
2000        if !polygons.is_empty() {
2001            list.push(f32::NAN);
2002        }
2003    }
2004
2005    // Remove the last NaN if it exists (trailing separator not needed)
2006    if !list.is_empty() && list[list.len() - 1].is_nan() {
2007        list.pop();
2008    }
2009
2010    list
2011}
2012
2013/// Unflatten coordinates with NaN separators back to nested polygon
2014/// structure.
2015///
2016/// Converts flat list of coordinates with NaN separators back to nested
2017/// polygon structure (inverse of flatten_polygon_coordinates):
2018/// - Input: [x1, y1, x2, y2, NaN, x3, y3]
2019/// - Output: [[(x1, y1), (x2, y2)], [(x3, y3)]]
2020///
2021/// This function is used when parsing Arrow files to reconstruct the nested
2022/// polygon format required by the EdgeFirst Studio API.
2023///
2024/// # Examples
2025///
2026/// ```rust
2027/// use edgefirst_client::unflatten_polygon_coordinates;
2028///
2029/// let coords = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0];
2030/// let polygons = unflatten_polygon_coordinates(&coords);
2031///
2032/// assert_eq!(polygons.len(), 2);
2033/// assert_eq!(polygons[0], vec![(1.0, 2.0), (3.0, 4.0)]);
2034/// assert_eq!(polygons[1], vec![(5.0, 6.0)]);
2035/// ```
2036#[cfg(feature = "polars")]
2037pub fn unflatten_polygon_coordinates(coords: &[f32]) -> Vec<Vec<(f32, f32)>> {
2038    let mut polygons = Vec::new();
2039    let mut current_polygon = Vec::new();
2040    let mut i = 0;
2041
2042    while i < coords.len() {
2043        if coords[i].is_nan() {
2044            // NaN separator - save current polygon and start new one
2045            if !current_polygon.is_empty() {
2046                polygons.push(current_polygon.clone());
2047                current_polygon.clear();
2048            }
2049            i += 1;
2050        } else if i + 1 < coords.len() {
2051            // Have both x and y coordinates
2052            current_polygon.push((coords[i], coords[i + 1]));
2053            i += 2;
2054        } else {
2055            // Odd number of coordinates (malformed data) - skip last value
2056            i += 1;
2057        }
2058    }
2059
2060    // Save the last polygon if not empty
2061    if !current_polygon.is_empty() {
2062        polygons.push(current_polygon);
2063    }
2064
2065    polygons
2066}
2067
2068#[cfg(test)]
2069mod tests {
2070    use super::*;
2071    use std::str::FromStr;
2072
2073    // ============================================================================
2074    // TEST HELPER FUNCTIONS (Pure Logic for Testing)
2075    // ============================================================================
2076
2077    /// Flatten legacy grouped annotation format to a single vector.
2078    ///
2079    /// Converts HashMap<String, Vec<Annotation>> (with bbox/box3d/mask keys)
2080    /// into a flat Vec<Annotation> in deterministic order.
2081    fn flatten_annotation_map(
2082        map: std::collections::HashMap<String, Vec<Annotation>>,
2083    ) -> Vec<Annotation> {
2084        let mut all_annotations = Vec::new();
2085
2086        // Process in fixed order for deterministic results
2087        for key in ["bbox", "box3d", "mask"] {
2088            if let Some(mut anns) = map.get(key).cloned() {
2089                all_annotations.append(&mut anns);
2090            }
2091        }
2092
2093        all_annotations
2094    }
2095
2096    /// Get the JSON field name for the Annotation group field (for tests).
2097    fn annotation_group_field_name() -> &'static str {
2098        "group_name"
2099    }
2100
2101    /// Get the JSON field name for the Annotation object_id field (for tests).
2102    fn annotation_object_id_field_name() -> &'static str {
2103        "object_reference"
2104    }
2105
2106    /// Get the accepted alias for the Annotation object_id field (for tests).
2107    fn annotation_object_id_alias() -> &'static str {
2108        "object_id"
2109    }
2110
2111    /// Validate that annotation field names match expected values in JSON (for
2112    /// tests).
2113    fn validate_annotation_field_names(
2114        json_str: &str,
2115        expected_group: bool,
2116        expected_object_ref: bool,
2117    ) -> Result<(), String> {
2118        if expected_group && !json_str.contains("\"group_name\"") {
2119            return Err("Missing expected field: group_name".to_string());
2120        }
2121        if expected_object_ref && !json_str.contains("\"object_reference\"") {
2122            return Err("Missing expected field: object_reference".to_string());
2123        }
2124        Ok(())
2125    }
2126
2127    // ==== FileType Conversion Tests ====
2128    #[test]
2129    fn test_file_type_conversions() {
2130        let cases = vec![
2131            (FileType::Image, "image"),
2132            (FileType::LidarPcd, "lidar.pcd"),
2133            (FileType::LidarDepth, "lidar.png"),
2134            (FileType::LidarReflect, "lidar.jpg"),
2135            (FileType::RadarPcd, "radar.pcd"),
2136            (FileType::RadarCube, "radar.png"),
2137        ];
2138
2139        // Test: Display → to_string()
2140        for (file_type, expected_str) in &cases {
2141            assert_eq!(file_type.to_string(), *expected_str);
2142        }
2143
2144        // Test: try_from() string parsing
2145        for (file_type, type_str) in &cases {
2146            assert_eq!(FileType::try_from(*type_str).unwrap(), *file_type);
2147        }
2148
2149        // Test: FromStr trait
2150        for (file_type, type_str) in &cases {
2151            assert_eq!(FileType::from_str(type_str).unwrap(), *file_type);
2152        }
2153
2154        // Test: Invalid input
2155        assert!(FileType::try_from("invalid").is_err());
2156
2157        // Test: Round-trip (Display → try_from)
2158        for (file_type, _) in &cases {
2159            let s = file_type.to_string();
2160            let parsed = FileType::try_from(s.as_str()).unwrap();
2161            assert_eq!(parsed, *file_type);
2162        }
2163    }
2164
2165    // ==== AnnotationType Conversion Tests ====
2166    #[test]
2167    fn test_annotation_type_conversions() {
2168        let cases = vec![
2169            (AnnotationType::Box2d, "box2d"),
2170            (AnnotationType::Box3d, "box3d"),
2171            (AnnotationType::Mask, "mask"),
2172        ];
2173
2174        // Test: Display → to_string()
2175        for (ann_type, expected_str) in &cases {
2176            assert_eq!(ann_type.to_string(), *expected_str);
2177        }
2178
2179        // Test: try_from() string parsing
2180        for (ann_type, type_str) in &cases {
2181            assert_eq!(AnnotationType::try_from(*type_str).unwrap(), *ann_type);
2182        }
2183
2184        // Test: From<String> (backward compatibility)
2185        assert_eq!(
2186            AnnotationType::from("box2d".to_string()),
2187            AnnotationType::Box2d
2188        );
2189        assert_eq!(
2190            AnnotationType::from("box3d".to_string()),
2191            AnnotationType::Box3d
2192        );
2193        assert_eq!(
2194            AnnotationType::from("mask".to_string()),
2195            AnnotationType::Mask
2196        );
2197
2198        // Invalid defaults to Box2d for backward compatibility
2199        assert_eq!(
2200            AnnotationType::from("invalid".to_string()),
2201            AnnotationType::Box2d
2202        );
2203
2204        // Test: Invalid input
2205        assert!(AnnotationType::try_from("invalid").is_err());
2206
2207        // Test: Round-trip (Display → try_from)
2208        for (ann_type, _) in &cases {
2209            let s = ann_type.to_string();
2210            let parsed = AnnotationType::try_from(s.as_str()).unwrap();
2211            assert_eq!(parsed, *ann_type);
2212        }
2213    }
2214
2215    // ==== Pure Function: extract_sample_name Tests ====
2216    #[test]
2217    fn test_extract_sample_name_with_extension_and_camera() {
2218        assert_eq!(extract_sample_name("scene_001.camera.jpg"), "scene_001");
2219    }
2220
2221    #[test]
2222    fn test_extract_sample_name_multiple_dots() {
2223        assert_eq!(extract_sample_name("image.v2.camera.png"), "image.v2");
2224    }
2225
2226    #[test]
2227    fn test_extract_sample_name_extension_only() {
2228        assert_eq!(extract_sample_name("test.jpg"), "test");
2229    }
2230
2231    #[test]
2232    fn test_extract_sample_name_no_extension() {
2233        assert_eq!(extract_sample_name("test"), "test");
2234    }
2235
2236    #[test]
2237    fn test_extract_sample_name_edge_case_dot_prefix() {
2238        assert_eq!(extract_sample_name(".jpg"), ".jpg");
2239    }
2240
2241    // ==== File URL Resolution Tests ====
2242    #[test]
2243    fn test_resolve_file_url_image_type() {
2244        let image_url = Some("https://example.com/image.jpg");
2245        let files = vec![];
2246        let result = resolve_file_url(&FileType::Image, image_url, &files);
2247        assert_eq!(result, Some("https://example.com/image.jpg"));
2248    }
2249
2250    #[test]
2251    fn test_resolve_file_url_lidar_pcd() {
2252        let image_url = Some("https://example.com/image.jpg");
2253        let files = vec![
2254            SampleFile::with_url(
2255                "lidar.pcd".to_string(),
2256                "https://example.com/file.pcd".to_string(),
2257            ),
2258            SampleFile::with_url(
2259                "radar.pcd".to_string(),
2260                "https://example.com/radar.pcd".to_string(),
2261            ),
2262        ];
2263        let result = resolve_file_url(&FileType::LidarPcd, image_url, &files);
2264        assert_eq!(result, Some("https://example.com/file.pcd"));
2265    }
2266
2267    #[test]
2268    fn test_resolve_file_url_not_found() {
2269        let image_url = Some("https://example.com/image.jpg");
2270        let files = vec![SampleFile::with_url(
2271            "lidar.pcd".to_string(),
2272            "https://example.com/file.pcd".to_string(),
2273        )];
2274        // Requesting radar.pcd which doesn't exist in files
2275        let result = resolve_file_url(&FileType::RadarPcd, image_url, &files);
2276        assert_eq!(result, None);
2277    }
2278
2279    #[test]
2280    fn test_resolve_file_url_no_image_url() {
2281        let image_url = None;
2282        let files = vec![];
2283        let result = resolve_file_url(&FileType::Image, image_url, &files);
2284        assert_eq!(result, None);
2285    }
2286
2287    // ==== Format Conversion Tests ====
2288    #[test]
2289    fn test_convert_files_map_to_vec_single_file() {
2290        let mut map = HashMap::new();
2291        map.insert("lidar.pcd".to_string(), "scan001.pcd".to_string());
2292
2293        let files = convert_files_map_to_vec(map);
2294        assert_eq!(files.len(), 1);
2295        assert_eq!(files[0].file_type(), "lidar.pcd");
2296        assert_eq!(files[0].filename(), Some("scan001.pcd"));
2297    }
2298
2299    #[test]
2300    fn test_convert_files_map_to_vec_multiple_files() {
2301        let mut map = HashMap::new();
2302        map.insert("lidar.pcd".to_string(), "scan.pcd".to_string());
2303        map.insert("radar.pcd".to_string(), "radar.pcd".to_string());
2304
2305        let files = convert_files_map_to_vec(map);
2306        assert_eq!(files.len(), 2);
2307    }
2308
2309    #[test]
2310    fn test_convert_files_map_to_vec_empty() {
2311        let map = HashMap::new();
2312        let files = convert_files_map_to_vec(map);
2313        assert_eq!(files.len(), 0);
2314    }
2315
2316    #[test]
2317    fn test_convert_annotations_map_to_vec_with_bbox() {
2318        let mut map = HashMap::new();
2319        let bbox_ann = Annotation::new();
2320        map.insert("bbox".to_string(), vec![bbox_ann.clone()]);
2321
2322        let annotations = convert_annotations_map_to_vec(map);
2323        assert_eq!(annotations.len(), 1);
2324    }
2325
2326    #[test]
2327    fn test_convert_annotations_map_to_vec_all_types() {
2328        let mut map = HashMap::new();
2329        map.insert("bbox".to_string(), vec![Annotation::new()]);
2330        map.insert("box3d".to_string(), vec![Annotation::new()]);
2331        map.insert("mask".to_string(), vec![Annotation::new()]);
2332
2333        let annotations = convert_annotations_map_to_vec(map);
2334        assert_eq!(annotations.len(), 3);
2335    }
2336
2337    #[test]
2338    fn test_convert_annotations_map_to_vec_empty() {
2339        let map = HashMap::new();
2340        let annotations = convert_annotations_map_to_vec(map);
2341        assert_eq!(annotations.len(), 0);
2342    }
2343
2344    #[test]
2345    fn test_convert_annotations_map_to_vec_unknown_type_ignored() {
2346        let mut map = HashMap::new();
2347        map.insert("unknown".to_string(), vec![Annotation::new()]);
2348
2349        let annotations = convert_annotations_map_to_vec(map);
2350        // Unknown types are ignored
2351        assert_eq!(annotations.len(), 0);
2352    }
2353
2354    // ==== Annotation Field Mapping Tests ====
2355    #[test]
2356    fn test_annotation_group_field_name() {
2357        assert_eq!(annotation_group_field_name(), "group_name");
2358    }
2359
2360    #[test]
2361    fn test_annotation_object_id_field_name() {
2362        assert_eq!(annotation_object_id_field_name(), "object_reference");
2363    }
2364
2365    #[test]
2366    fn test_annotation_object_id_alias() {
2367        assert_eq!(annotation_object_id_alias(), "object_id");
2368    }
2369
2370    #[test]
2371    fn test_validate_annotation_field_names_success() {
2372        let json = r#"{"group_name":"train","object_reference":"obj1"}"#;
2373        assert!(validate_annotation_field_names(json, true, true).is_ok());
2374    }
2375
2376    #[test]
2377    fn test_validate_annotation_field_names_missing_group() {
2378        let json = r#"{"object_reference":"obj1"}"#;
2379        let result = validate_annotation_field_names(json, true, false);
2380        assert!(result.is_err());
2381        assert!(result.unwrap_err().contains("group_name"));
2382    }
2383
2384    #[test]
2385    fn test_validate_annotation_field_names_missing_object_ref() {
2386        let json = r#"{"group_name":"train"}"#;
2387        let result = validate_annotation_field_names(json, false, true);
2388        assert!(result.is_err());
2389        assert!(result.unwrap_err().contains("object_reference"));
2390    }
2391
2392    #[test]
2393    fn test_annotation_serialization_field_names() {
2394        // Test that Annotation serializes with correct field names
2395        let mut ann = Annotation::new();
2396        ann.set_group(Some("train".to_string()));
2397        ann.set_object_id(Some("obj1".to_string()));
2398
2399        let json = serde_json::to_string(&ann).unwrap();
2400        // Verify JSON contains correct field names
2401        assert!(validate_annotation_field_names(&json, true, true).is_ok());
2402    }
2403
2404    // ==== GPS/IMU Validation Tests ====
2405    #[test]
2406    fn test_validate_gps_coordinates_valid() {
2407        assert!(validate_gps_coordinates(37.7749, -122.4194).is_ok()); // San Francisco
2408        assert!(validate_gps_coordinates(0.0, 0.0).is_ok()); // Null Island
2409        assert!(validate_gps_coordinates(90.0, 180.0).is_ok()); // Edge cases
2410        assert!(validate_gps_coordinates(-90.0, -180.0).is_ok()); // Edge cases
2411    }
2412
2413    #[test]
2414    fn test_validate_gps_coordinates_invalid_latitude() {
2415        let result = validate_gps_coordinates(91.0, 0.0);
2416        assert!(result.is_err());
2417        assert!(result.unwrap_err().contains("latitude out of range"));
2418
2419        let result = validate_gps_coordinates(-91.0, 0.0);
2420        assert!(result.is_err());
2421        assert!(result.unwrap_err().contains("latitude out of range"));
2422    }
2423
2424    #[test]
2425    fn test_validate_gps_coordinates_invalid_longitude() {
2426        let result = validate_gps_coordinates(0.0, 181.0);
2427        assert!(result.is_err());
2428        assert!(result.unwrap_err().contains("longitude out of range"));
2429
2430        let result = validate_gps_coordinates(0.0, -181.0);
2431        assert!(result.is_err());
2432        assert!(result.unwrap_err().contains("longitude out of range"));
2433    }
2434
2435    #[test]
2436    fn test_validate_gps_coordinates_non_finite() {
2437        let result = validate_gps_coordinates(f64::NAN, 0.0);
2438        assert!(result.is_err());
2439        assert!(result.unwrap_err().contains("not finite"));
2440
2441        let result = validate_gps_coordinates(0.0, f64::INFINITY);
2442        assert!(result.is_err());
2443        assert!(result.unwrap_err().contains("not finite"));
2444    }
2445
2446    #[test]
2447    fn test_validate_imu_orientation_valid() {
2448        assert!(validate_imu_orientation(0.0, 0.0, 0.0).is_ok());
2449        assert!(validate_imu_orientation(45.0, 30.0, 90.0).is_ok());
2450        assert!(validate_imu_orientation(180.0, 90.0, -180.0).is_ok()); // Edge cases
2451        assert!(validate_imu_orientation(-180.0, -90.0, 180.0).is_ok()); // Edge cases
2452    }
2453
2454    #[test]
2455    fn test_validate_imu_orientation_invalid_roll() {
2456        let result = validate_imu_orientation(181.0, 0.0, 0.0);
2457        assert!(result.is_err());
2458        assert!(result.unwrap_err().contains("roll out of range"));
2459
2460        let result = validate_imu_orientation(-181.0, 0.0, 0.0);
2461        assert!(result.is_err());
2462    }
2463
2464    #[test]
2465    fn test_validate_imu_orientation_invalid_pitch() {
2466        let result = validate_imu_orientation(0.0, 91.0, 0.0);
2467        assert!(result.is_err());
2468        assert!(result.unwrap_err().contains("pitch out of range"));
2469
2470        let result = validate_imu_orientation(0.0, -91.0, 0.0);
2471        assert!(result.is_err());
2472    }
2473
2474    #[test]
2475    fn test_validate_imu_orientation_non_finite() {
2476        let result = validate_imu_orientation(f64::NAN, 0.0, 0.0);
2477        assert!(result.is_err());
2478        assert!(result.unwrap_err().contains("not finite"));
2479
2480        let result = validate_imu_orientation(0.0, f64::INFINITY, 0.0);
2481        assert!(result.is_err());
2482
2483        let result = validate_imu_orientation(0.0, 0.0, f64::NEG_INFINITY);
2484        assert!(result.is_err());
2485    }
2486
2487    // ==== Polygon Flattening Tests ====
2488    #[test]
2489    #[cfg(feature = "polars")]
2490    fn test_flatten_polygon_coordinates_single_polygon() {
2491        let polygons = vec![vec![(1.0, 2.0), (3.0, 4.0)]];
2492        let result = flatten_polygon_coordinates(&polygons);
2493
2494        // Should have x1, y1, x2, y2 (no trailing NaN)
2495        assert_eq!(result.len(), 4);
2496        assert_eq!(&result[..4], &[1.0, 2.0, 3.0, 4.0]);
2497    }
2498
2499    #[test]
2500    #[cfg(feature = "polars")]
2501    fn test_flatten_polygon_coordinates_multiple_polygons() {
2502        let polygons = vec![vec![(1.0, 2.0), (3.0, 4.0)], vec![(5.0, 6.0), (7.0, 8.0)]];
2503        let result = flatten_polygon_coordinates(&polygons);
2504
2505        // x1, y1, x2, y2, NaN, x3, y3, x4, y4 (no trailing NaN)
2506        assert_eq!(result.len(), 9);
2507        assert_eq!(&result[..4], &[1.0, 2.0, 3.0, 4.0]);
2508        assert!(result[4].is_nan()); // NaN separator
2509        assert_eq!(&result[5..9], &[5.0, 6.0, 7.0, 8.0]);
2510    }
2511
2512    #[test]
2513    #[cfg(feature = "polars")]
2514    fn test_flatten_polygon_coordinates_empty() {
2515        let polygons: Vec<Vec<(f32, f32)>> = vec![];
2516        let result = flatten_polygon_coordinates(&polygons);
2517
2518        assert_eq!(result.len(), 0);
2519    }
2520
2521    // ==== Polygon Unflattening Tests ====
2522    #[test]
2523    #[cfg(feature = "polars")]
2524    fn test_unflatten_polygon_coordinates_single_polygon() {
2525        let coords = vec![1.0, 2.0, 3.0, 4.0];
2526        let result = unflatten_polygon_coordinates(&coords);
2527
2528        assert_eq!(result.len(), 1);
2529        assert_eq!(result[0].len(), 2);
2530        assert_eq!(result[0][0], (1.0, 2.0));
2531        assert_eq!(result[0][1], (3.0, 4.0));
2532    }
2533
2534    #[test]
2535    #[cfg(feature = "polars")]
2536    fn test_unflatten_polygon_coordinates_multiple_polygons() {
2537        let coords = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0, 7.0, 8.0];
2538        let result = unflatten_polygon_coordinates(&coords);
2539
2540        assert_eq!(result.len(), 2);
2541        assert_eq!(result[0].len(), 2);
2542        assert_eq!(result[0][0], (1.0, 2.0));
2543        assert_eq!(result[0][1], (3.0, 4.0));
2544        assert_eq!(result[1].len(), 2);
2545        assert_eq!(result[1][0], (5.0, 6.0));
2546        assert_eq!(result[1][1], (7.0, 8.0));
2547    }
2548
2549    #[test]
2550    #[cfg(feature = "polars")]
2551    fn test_unflatten_polygon_coordinates_roundtrip() {
2552        // Test that flatten -> unflatten produces the same result
2553        let original = vec![vec![(1.0, 2.0), (3.0, 4.0)], vec![(5.0, 6.0), (7.0, 8.0)]];
2554        let flattened = flatten_polygon_coordinates(&original);
2555        let result = unflatten_polygon_coordinates(&flattened);
2556
2557        assert_eq!(result, original);
2558    }
2559
2560    // ==== Annotation Format Flattening Tests ====
2561    #[test]
2562    fn test_flatten_annotation_map_all_types() {
2563        use std::collections::HashMap;
2564
2565        let mut map = HashMap::new();
2566
2567        // Create test annotations
2568        let mut bbox_ann = Annotation::new();
2569        bbox_ann.set_label(Some("bbox_label".to_string()));
2570
2571        let mut box3d_ann = Annotation::new();
2572        box3d_ann.set_label(Some("box3d_label".to_string()));
2573
2574        let mut mask_ann = Annotation::new();
2575        mask_ann.set_label(Some("mask_label".to_string()));
2576
2577        map.insert("bbox".to_string(), vec![bbox_ann.clone()]);
2578        map.insert("box3d".to_string(), vec![box3d_ann.clone()]);
2579        map.insert("mask".to_string(), vec![mask_ann.clone()]);
2580
2581        let result = flatten_annotation_map(map);
2582
2583        assert_eq!(result.len(), 3);
2584        // Check ordering: bbox, box3d, mask
2585        assert_eq!(result[0].label(), Some(&"bbox_label".to_string()));
2586        assert_eq!(result[1].label(), Some(&"box3d_label".to_string()));
2587        assert_eq!(result[2].label(), Some(&"mask_label".to_string()));
2588    }
2589
2590    #[test]
2591    fn test_flatten_annotation_map_single_type() {
2592        use std::collections::HashMap;
2593
2594        let mut map = HashMap::new();
2595        let mut bbox_ann = Annotation::new();
2596        bbox_ann.set_label(Some("test".to_string()));
2597        map.insert("bbox".to_string(), vec![bbox_ann]);
2598
2599        let result = flatten_annotation_map(map);
2600
2601        assert_eq!(result.len(), 1);
2602        assert_eq!(result[0].label(), Some(&"test".to_string()));
2603    }
2604
2605    #[test]
2606    fn test_flatten_annotation_map_empty() {
2607        use std::collections::HashMap;
2608
2609        let map = HashMap::new();
2610        let result = flatten_annotation_map(map);
2611
2612        assert_eq!(result.len(), 0);
2613    }
2614
2615    #[test]
2616    fn test_flatten_annotation_map_deterministic_order() {
2617        use std::collections::HashMap;
2618
2619        let mut map = HashMap::new();
2620
2621        let mut bbox_ann = Annotation::new();
2622        bbox_ann.set_label(Some("bbox".to_string()));
2623
2624        let mut box3d_ann = Annotation::new();
2625        box3d_ann.set_label(Some("box3d".to_string()));
2626
2627        let mut mask_ann = Annotation::new();
2628        mask_ann.set_label(Some("mask".to_string()));
2629
2630        // Insert in reverse order to test deterministic ordering
2631        map.insert("mask".to_string(), vec![mask_ann]);
2632        map.insert("box3d".to_string(), vec![box3d_ann]);
2633        map.insert("bbox".to_string(), vec![bbox_ann]);
2634
2635        let result = flatten_annotation_map(map);
2636
2637        // Should be bbox, box3d, mask regardless of insertion order
2638        assert_eq!(result.len(), 3);
2639        assert_eq!(result[0].label(), Some(&"bbox".to_string()));
2640        assert_eq!(result[1].label(), Some(&"box3d".to_string()));
2641        assert_eq!(result[2].label(), Some(&"mask".to_string()));
2642    }
2643
2644    // ==== Box2d Tests ====
2645    #[test]
2646    fn test_box2d_construction_and_accessors() {
2647        // Test case 1: Basic construction with positive coordinates
2648        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
2649        assert_eq!(
2650            (bbox.left(), bbox.top(), bbox.width(), bbox.height()),
2651            (10.0, 20.0, 100.0, 50.0)
2652        );
2653
2654        // Test case 2: Center calculations
2655        assert_eq!((bbox.cx(), bbox.cy()), (60.0, 45.0)); // 10+50, 20+25
2656
2657        // Test case 3: Zero origin
2658        let bbox = Box2d::new(0.0, 0.0, 640.0, 480.0);
2659        assert_eq!(
2660            (bbox.left(), bbox.top(), bbox.width(), bbox.height()),
2661            (0.0, 0.0, 640.0, 480.0)
2662        );
2663        assert_eq!((bbox.cx(), bbox.cy()), (320.0, 240.0));
2664    }
2665
2666    #[test]
2667    fn test_box2d_center_calculation() {
2668        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
2669
2670        // Center = position + size/2
2671        assert_eq!(bbox.cx(), 60.0); // 10 + 100/2
2672        assert_eq!(bbox.cy(), 45.0); // 20 + 50/2
2673    }
2674
2675    #[test]
2676    fn test_box2d_zero_dimensions() {
2677        let bbox = Box2d::new(10.0, 20.0, 0.0, 0.0);
2678
2679        // When width/height are zero, center = position
2680        assert_eq!(bbox.cx(), 10.0);
2681        assert_eq!(bbox.cy(), 20.0);
2682    }
2683
2684    #[test]
2685    fn test_box2d_negative_dimensions() {
2686        let bbox = Box2d::new(100.0, 100.0, -50.0, -50.0);
2687
2688        // Negative dimensions create inverted boxes (valid edge case)
2689        assert_eq!(bbox.width(), -50.0);
2690        assert_eq!(bbox.height(), -50.0);
2691        assert_eq!(bbox.cx(), 75.0); // 100 + (-50)/2
2692        assert_eq!(bbox.cy(), 75.0); // 100 + (-50)/2
2693    }
2694
2695    // ==== Box3d Tests ====
2696    #[test]
2697    fn test_box3d_construction_and_accessors() {
2698        // Test case 1: Basic 3D construction
2699        let bbox = Box3d::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
2700        assert_eq!((bbox.cx(), bbox.cy(), bbox.cz()), (1.0, 2.0, 3.0));
2701        assert_eq!(
2702            (bbox.width(), bbox.height(), bbox.length()),
2703            (4.0, 5.0, 6.0)
2704        );
2705
2706        // Test case 2: Corners calculation with offset center
2707        let bbox = Box3d::new(10.0, 20.0, 30.0, 4.0, 6.0, 8.0);
2708        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (8.0, 17.0, 26.0)); // 10-2, 20-3, 30-4
2709
2710        // Test case 3: Center at origin with negative corners
2711        let bbox = Box3d::new(0.0, 0.0, 0.0, 2.0, 3.0, 4.0);
2712        assert_eq!((bbox.cx(), bbox.cy(), bbox.cz()), (0.0, 0.0, 0.0));
2713        assert_eq!(
2714            (bbox.width(), bbox.height(), bbox.length()),
2715            (2.0, 3.0, 4.0)
2716        );
2717        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (-1.0, -1.5, -2.0));
2718    }
2719
2720    #[test]
2721    fn test_box3d_center_calculation() {
2722        let bbox = Box3d::new(10.0, 20.0, 30.0, 100.0, 50.0, 40.0);
2723
2724        // Center values as specified in constructor
2725        assert_eq!(bbox.cx(), 10.0);
2726        assert_eq!(bbox.cy(), 20.0);
2727        assert_eq!(bbox.cz(), 30.0);
2728    }
2729
2730    #[test]
2731    fn test_box3d_zero_dimensions() {
2732        let bbox = Box3d::new(5.0, 10.0, 15.0, 0.0, 0.0, 0.0);
2733
2734        // When all dimensions are zero, corners = center
2735        assert_eq!(bbox.cx(), 5.0);
2736        assert_eq!(bbox.cy(), 10.0);
2737        assert_eq!(bbox.cz(), 15.0);
2738        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (5.0, 10.0, 15.0));
2739    }
2740
2741    #[test]
2742    fn test_box3d_negative_dimensions() {
2743        let bbox = Box3d::new(100.0, 100.0, 100.0, -50.0, -50.0, -50.0);
2744
2745        // Negative dimensions create inverted boxes
2746        assert_eq!(bbox.width(), -50.0);
2747        assert_eq!(bbox.height(), -50.0);
2748        assert_eq!(bbox.length(), -50.0);
2749        assert_eq!(
2750            (bbox.left(), bbox.top(), bbox.front()),
2751            (125.0, 125.0, 125.0)
2752        );
2753    }
2754
2755    // ==== Mask Tests ====
2756    #[test]
2757    fn test_mask_creation_and_deserialization() {
2758        // Test case 1: Direct construction
2759        let polygon = vec![vec![(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]];
2760        let mask = Mask::new(polygon.clone());
2761        assert_eq!(mask.polygon, polygon);
2762
2763        // Test case 2: Deserialization from legacy format
2764        let legacy = serde_json::json!({
2765            "mask": {
2766                "polygon": [[
2767                    [0.0_f32, 0.0_f32],
2768                    [1.0_f32, 0.0_f32],
2769                    [1.0_f32, 1.0_f32]
2770                ]]
2771            }
2772        });
2773
2774        #[derive(serde::Deserialize)]
2775        struct Wrapper {
2776            mask: Mask,
2777        }
2778
2779        let parsed: Wrapper = serde_json::from_value(legacy).unwrap();
2780        assert_eq!(parsed.mask.polygon.len(), 1);
2781        assert_eq!(parsed.mask.polygon[0].len(), 3);
2782    }
2783
2784    // ==== Sample Tests ====
2785    #[test]
2786    fn test_sample_construction_and_accessors() {
2787        // Test case 1: New sample is empty
2788        let sample = Sample::new();
2789        assert_eq!(sample.id(), None);
2790        assert_eq!(sample.image_name(), None);
2791        assert_eq!(sample.width(), None);
2792        assert_eq!(sample.height(), None);
2793
2794        // Test case 2: Sample with populated fields
2795        let mut sample = Sample::new();
2796        sample.image_name = Some("test.jpg".to_string());
2797        sample.width = Some(1920);
2798        sample.height = Some(1080);
2799        sample.group = Some("group1".to_string());
2800
2801        assert_eq!(sample.image_name(), Some("test.jpg"));
2802        assert_eq!(sample.width(), Some(1920));
2803        assert_eq!(sample.height(), Some(1080));
2804        assert_eq!(sample.group(), Some(&"group1".to_string()));
2805    }
2806
2807    #[test]
2808    fn test_sample_name_extraction_from_image_name() {
2809        let mut sample = Sample::new();
2810
2811        // Test case 1: Basic image name with extension
2812        sample.image_name = Some("test_image.jpg".to_string());
2813        assert_eq!(sample.name(), Some("test_image".to_string()));
2814
2815        // Test case 2: Image name with .camera suffix
2816        sample.image_name = Some("test_image.camera.jpg".to_string());
2817        assert_eq!(sample.name(), Some("test_image".to_string()));
2818
2819        // Test case 3: Image name without extension
2820        sample.image_name = Some("test_image".to_string());
2821        assert_eq!(sample.name(), Some("test_image".to_string()));
2822    }
2823
2824    // ==== Annotation Tests ====
2825    #[test]
2826    fn test_annotation_construction_and_setters() {
2827        // Test case 1: New annotation is empty
2828        let ann = Annotation::new();
2829        assert_eq!(ann.sample_id(), None);
2830        assert_eq!(ann.label(), None);
2831        assert_eq!(ann.box2d(), None);
2832        assert_eq!(ann.box3d(), None);
2833        assert_eq!(ann.mask(), None);
2834
2835        // Test case 2: Setting annotation fields
2836        let mut ann = Annotation::new();
2837        ann.set_label(Some("car".to_string()));
2838        assert_eq!(ann.label(), Some(&"car".to_string()));
2839
2840        ann.set_label_index(Some(42));
2841        assert_eq!(ann.label_index(), Some(42));
2842
2843        // Test case 3: Setting bounding box
2844        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
2845        ann.set_box2d(Some(bbox.clone()));
2846        assert!(ann.box2d().is_some());
2847        assert_eq!(ann.box2d().unwrap().left(), 10.0);
2848    }
2849
2850    // ==== SampleFile Tests ====
2851    #[test]
2852    fn test_sample_file_with_url_and_filename() {
2853        // Test case 1: SampleFile with URL
2854        let file = SampleFile::with_url(
2855            "lidar.pcd".to_string(),
2856            "https://example.com/file.pcd".to_string(),
2857        );
2858        assert_eq!(file.file_type(), "lidar.pcd");
2859        assert_eq!(file.url(), Some("https://example.com/file.pcd"));
2860        assert_eq!(file.filename(), None);
2861
2862        // Test case 2: SampleFile with local filename
2863        let file = SampleFile::with_filename("image".to_string(), "test.jpg".to_string());
2864        assert_eq!(file.file_type(), "image");
2865        assert_eq!(file.filename(), Some("test.jpg"));
2866        assert_eq!(file.url(), None);
2867    }
2868
2869    // ==== Label Tests ====
2870    #[test]
2871    fn test_label_deserialization_and_accessors() {
2872        use serde_json::json;
2873
2874        // Test case 1: Label deserialization and accessors
2875        let label_json = json!({
2876            "id": 123,
2877            "dataset_id": 456,
2878            "index": 5,
2879            "name": "car"
2880        });
2881
2882        let label: Label = serde_json::from_value(label_json).unwrap();
2883        assert_eq!(label.id(), 123);
2884        assert_eq!(label.index(), 5);
2885        assert_eq!(label.name(), "car");
2886        assert_eq!(label.to_string(), "car");
2887        assert_eq!(format!("{}", label), "car");
2888
2889        // Test case 2: Different label
2890        let label_json = json!({
2891            "id": 1,
2892            "dataset_id": 100,
2893            "index": 0,
2894            "name": "person"
2895        });
2896
2897        let label: Label = serde_json::from_value(label_json).unwrap();
2898        assert_eq!(format!("{}", label), "person");
2899    }
2900
2901    // ==== Annotation Serialization Tests ====
2902    #[test]
2903    fn test_annotation_serialization_with_mask_and_box() {
2904        let polygon = vec![vec![
2905            (0.0_f32, 0.0_f32),
2906            (1.0_f32, 0.0_f32),
2907            (1.0_f32, 1.0_f32),
2908        ]];
2909
2910        let mut annotation = Annotation::new();
2911        annotation.set_label(Some("test".to_string()));
2912        annotation.set_box2d(Some(Box2d::new(10.0, 20.0, 30.0, 40.0)));
2913        annotation.set_mask(Some(Mask::new(polygon)));
2914
2915        let mut sample = Sample::new();
2916        sample.annotations.push(annotation);
2917
2918        let json = serde_json::to_value(&sample).unwrap();
2919        let annotations = json
2920            .get("annotations")
2921            .and_then(|value| value.as_array())
2922            .expect("annotations serialized as array");
2923        assert_eq!(annotations.len(), 1);
2924
2925        let annotation_json = annotations[0].as_object().expect("annotation object");
2926        assert!(annotation_json.contains_key("box2d"));
2927        assert!(annotation_json.contains_key("mask"));
2928        assert!(!annotation_json.contains_key("x"));
2929        assert!(
2930            annotation_json
2931                .get("mask")
2932                .and_then(|value| value.as_array())
2933                .is_some()
2934        );
2935    }
2936
2937    #[test]
2938    fn test_frame_number_negative_one_deserializes_as_none() {
2939        // Server returns frame_number: -1 for non-sequence samples
2940        // This should deserialize as None for the client
2941        let json = r#"{
2942            "uuid": "test-uuid",
2943            "frame_number": -1
2944        }"#;
2945
2946        let sample: Sample = serde_json::from_str(json).unwrap();
2947        assert_eq!(sample.frame_number, None);
2948    }
2949
2950    #[test]
2951    fn test_frame_number_positive_value_deserializes_correctly() {
2952        // Valid frame numbers should deserialize normally
2953        let json = r#"{
2954            "uuid": "test-uuid",
2955            "frame_number": 5
2956        }"#;
2957
2958        let sample: Sample = serde_json::from_str(json).unwrap();
2959        assert_eq!(sample.frame_number, Some(5));
2960    }
2961
2962    #[test]
2963    fn test_frame_number_null_deserializes_as_none() {
2964        // Explicit null should also be None
2965        let json = r#"{
2966            "uuid": "test-uuid",
2967            "frame_number": null
2968        }"#;
2969
2970        let sample: Sample = serde_json::from_str(json).unwrap();
2971        assert_eq!(sample.frame_number, None);
2972    }
2973
2974    #[test]
2975    fn test_frame_number_missing_deserializes_as_none() {
2976        // Missing field should be None
2977        let json = r#"{
2978            "uuid": "test-uuid"
2979        }"#;
2980
2981        let sample: Sample = serde_json::from_str(json).unwrap();
2982        assert_eq!(sample.frame_number, None);
2983    }
2984
2985    // =========================================================================
2986    // samples_dataframe tests - CRITICAL: Verify group preservation
2987    // =========================================================================
2988
2989    #[cfg(feature = "polars")]
2990    #[test]
2991    fn test_samples_dataframe_preserves_group_for_samples_without_annotations() {
2992        use polars::prelude::*;
2993
2994        // Create sample WITH annotations
2995        let mut sample_with_ann = Sample::new();
2996        sample_with_ann.image_name = Some("annotated.jpg".to_string());
2997        sample_with_ann.group = Some("train".to_string());
2998        let mut annotation = Annotation::new();
2999        annotation.set_label(Some("car".to_string()));
3000        annotation.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
3001        annotation.set_name(Some("annotated".to_string()));
3002        sample_with_ann.annotations = vec![annotation];
3003
3004        // Create sample WITHOUT annotations (this is the critical case)
3005        let mut sample_no_ann = Sample::new();
3006        sample_no_ann.image_name = Some("unannotated.jpg".to_string());
3007        sample_no_ann.group = Some("val".to_string()); // Should be preserved!
3008        sample_no_ann.annotations = vec![]; // Empty annotations
3009
3010        let samples = vec![sample_with_ann, sample_no_ann];
3011
3012        // Convert to DataFrame
3013        let df = samples_dataframe(&samples).expect("Failed to create DataFrame");
3014
3015        // Verify we have 2 rows (one per sample)
3016        assert_eq!(df.height(), 2, "Expected 2 rows (one per sample)");
3017
3018        // Get the group column
3019        let groups_col = df.column("group").expect("group column should exist");
3020        let groups_cast = groups_col.cast(&DataType::String).expect("cast to string");
3021        let groups = groups_cast.str().expect("as str");
3022
3023        // Find the row for "unannotated" and verify it has group "val"
3024        let names_col = df.column("name").expect("name column should exist");
3025        let names_cast = names_col.cast(&DataType::String).expect("cast to string");
3026        let names = names_cast.str().expect("as str");
3027
3028        let mut found_unannotated = false;
3029        for idx in 0..df.height() {
3030            if let Some(name) = names.get(idx)
3031                && name == "unannotated"
3032            {
3033                found_unannotated = true;
3034                let group = groups.get(idx);
3035                assert_eq!(
3036                    group,
3037                    Some("val"),
3038                    "CRITICAL: Sample 'unannotated' without annotations must have group 'val'"
3039                );
3040            }
3041        }
3042
3043        assert!(
3044            found_unannotated,
3045            "Did not find 'unannotated' sample in DataFrame - \
3046             this means samples without annotations are not being included"
3047        );
3048    }
3049
3050    #[cfg(feature = "polars")]
3051    #[test]
3052    fn test_samples_dataframe_includes_all_samples_even_without_annotations() {
3053        // Verify that samples without annotations still appear in the DataFrame
3054        // with null annotation fields but WITH their group field populated
3055
3056        let mut sample1 = Sample::new();
3057        sample1.image_name = Some("with_ann.jpg".to_string());
3058        sample1.group = Some("train".to_string());
3059        let mut ann = Annotation::new();
3060        ann.set_label(Some("person".to_string()));
3061        ann.set_box2d(Some(Box2d::new(0.0, 0.0, 0.5, 0.5)));
3062        ann.set_name(Some("with_ann".to_string()));
3063        sample1.annotations = vec![ann];
3064
3065        let mut sample2 = Sample::new();
3066        sample2.image_name = Some("no_ann_train.jpg".to_string());
3067        sample2.group = Some("train".to_string());
3068        sample2.annotations = vec![];
3069
3070        let mut sample3 = Sample::new();
3071        sample3.image_name = Some("no_ann_val.jpg".to_string());
3072        sample3.group = Some("val".to_string());
3073        sample3.annotations = vec![];
3074
3075        let samples = vec![sample1, sample2, sample3];
3076
3077        let df = samples_dataframe(&samples).expect("Failed to create DataFrame");
3078
3079        // We should have exactly 3 rows - one per sample
3080        assert_eq!(
3081            df.height(),
3082            3,
3083            "Expected 3 rows (samples without annotations should create one row each)"
3084        );
3085
3086        // Check that all groups are present
3087        let groups_col = df.column("group").expect("group column");
3088        let groups_cast = groups_col.cast(&polars::prelude::DataType::String).unwrap();
3089        let groups = groups_cast.str().unwrap();
3090
3091        let mut train_count = 0;
3092        let mut val_count = 0;
3093
3094        for idx in 0..df.height() {
3095            match groups.get(idx) {
3096                Some("train") => train_count += 1,
3097                Some("val") => val_count += 1,
3098                other => panic!(
3099                    "Unexpected group value at row {}: {:?}. \
3100                     All samples should have their group preserved.",
3101                    idx, other
3102                ),
3103            }
3104        }
3105
3106        assert_eq!(train_count, 2, "Expected 2 samples in 'train' group");
3107        assert_eq!(val_count, 1, "Expected 1 sample in 'val' group");
3108    }
3109
3110    #[cfg(feature = "polars")]
3111    #[test]
3112    fn test_samples_dataframe_group_is_not_null_for_samples_with_group() {
3113        // CRITICAL: Even when a sample has no annotations, if it has a group,
3114        // that group must NOT be null in the DataFrame
3115
3116        let mut sample = Sample::new();
3117        sample.image_name = Some("test.jpg".to_string());
3118        sample.group = Some("test_group".to_string());
3119        sample.annotations = vec![];
3120
3121        let df = samples_dataframe(&[sample]).expect("Failed to create DataFrame");
3122
3123        let groups_col = df.column("group").expect("group column");
3124
3125        // The group column should have NO nulls because our sample has a group
3126        assert_eq!(
3127            groups_col.null_count(),
3128            0,
3129            "Sample with group='test_group' but no annotations has NULL group in DataFrame. \
3130             This is a bug in samples_dataframe - group must be preserved!"
3131        );
3132    }
3133
3134    #[cfg(feature = "polars")]
3135    #[test]
3136    fn test_samples_dataframe_group_consistent_across_all_rows_for_same_image() {
3137        use polars::prelude::*;
3138
3139        // Test that when a sample has multiple annotations, ALL rows have
3140        // the same group value (not just the first one)
3141
3142        let mut sample = Sample::new();
3143        sample.image_name = Some("multi_ann.jpg".to_string());
3144        sample.group = Some("train".to_string());
3145
3146        // Add multiple annotations
3147        let mut ann1 = Annotation::new();
3148        ann1.set_label(Some("car".to_string()));
3149        ann1.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
3150        ann1.set_name(Some("multi_ann".to_string()));
3151
3152        let mut ann2 = Annotation::new();
3153        ann2.set_label(Some("truck".to_string()));
3154        ann2.set_box2d(Some(Box2d::new(0.5, 0.6, 0.2, 0.2)));
3155        ann2.set_name(Some("multi_ann".to_string()));
3156
3157        let mut ann3 = Annotation::new();
3158        ann3.set_label(Some("bus".to_string()));
3159        ann3.set_box2d(Some(Box2d::new(0.7, 0.8, 0.1, 0.1)));
3160        ann3.set_name(Some("multi_ann".to_string()));
3161
3162        sample.annotations = vec![ann1, ann2, ann3];
3163
3164        let df = samples_dataframe(&[sample]).expect("Failed to create DataFrame");
3165
3166        // Should have 3 rows (one per annotation)
3167        assert_eq!(df.height(), 3, "Expected 3 rows (one per annotation)");
3168
3169        // ALL rows should have the group "train" (not just the first one)
3170        let groups_col = df.column("group").expect("group column");
3171        let groups_cast = groups_col.cast(&DataType::String).expect("cast to string");
3172        let groups = groups_cast.str().expect("as str");
3173
3174        // No nulls allowed
3175        assert_eq!(groups_col.null_count(), 0, "No rows should have null group");
3176
3177        // All rows should have the same group
3178        for idx in 0..df.height() {
3179            let group = groups.get(idx);
3180            assert_eq!(
3181                group,
3182                Some("train"),
3183                "Row {} should have group 'train', got {:?}. \
3184                 All rows for the same image must have identical group values.",
3185                idx,
3186                group
3187            );
3188        }
3189    }
3190}