Skip to main content

edgefirst_client/
dataset.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright © 2025 Au-Zone Technologies. All Rights Reserved.
3
4use std::{collections::HashMap, fmt::Display};
5
6use crate::{
7    Client, Error,
8    api::{AnnotationSetID, DatasetID, ProjectID, SampleID},
9    mask::MaskData,
10};
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13
14#[cfg(feature = "polars")]
15use polars::prelude::*;
16
17/// File types supported in EdgeFirst Studio datasets.
18///
19/// Represents the different types of sensor data files that can be stored
20/// and processed in a dataset. EdgeFirst Studio supports various modalities
21/// including visual images and different forms of LiDAR and radar data.
22///
23/// # String Representations
24///
25/// This enum has two string representations:
26/// - **Display** (`fmt::Display`): Returns the server API type name (e.g.,
27///   `"lidar.depth"`) used when making API requests to EdgeFirst Studio.
28/// - **file_extension()**: Returns the file extension for saving (e.g.,
29///   `"lidar.png"`) which may differ from the API type name.
30///
31/// # Examples
32///
33/// ```rust
34/// use edgefirst_client::FileType;
35///
36/// // Create file types from strings
37/// let image_type: FileType = "image".try_into().unwrap();
38/// let lidar_type: FileType = "lidar.pcd".try_into().unwrap();
39///
40/// // Display file types
41/// println!("Processing {} files", image_type); // "Processing image files"
42///
43/// // Use in dataset operations - example usage
44/// let file_type = FileType::Image;
45/// match file_type {
46///     FileType::Image => println!("Processing image files"),
47///     FileType::LidarPcd => println!("Processing LiDAR point cloud files"),
48///     _ => println!("Processing other sensor data"),
49/// }
50/// ```
51#[derive(Clone, Eq, PartialEq, Debug)]
52pub enum FileType {
53    /// Standard image files (JPEG, PNG, etc.)
54    Image,
55    /// LiDAR point cloud data files (.pcd format)
56    LidarPcd,
57    /// LiDAR depth images (.png format)
58    LidarDepth,
59    /// LiDAR reflectance images (.jpg format)
60    LidarReflect,
61    /// Radar point cloud data files (.pcd format)
62    RadarPcd,
63    /// Radar cube data files (.png format)
64    RadarCube,
65    /// All sensor types - expands to all known file types
66    All,
67}
68
69impl std::fmt::Display for FileType {
70    /// Returns the server API type name for this file type.
71    /// Used when making API requests to the server.
72    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73        let value = match self {
74            FileType::Image => "image",
75            FileType::LidarPcd => "lidar.pcd",
76            FileType::LidarDepth => "lidar.depth",
77            FileType::LidarReflect => "lidar.reflect",
78            FileType::RadarPcd => "radar.pcd",
79            FileType::RadarCube => "radar.png",
80            FileType::All => "all",
81        };
82        write!(f, "{}", value)
83    }
84}
85
86impl FileType {
87    /// Returns the file extension to use when saving downloaded files.
88    /// This may differ from the API type name (e.g., lidar.depth → lidar.png).
89    pub fn file_extension(&self) -> &'static str {
90        match self {
91            FileType::Image => "jpg", // Will be overridden by infer detection
92            FileType::LidarPcd => "lidar.pcd",
93            FileType::LidarDepth => "lidar.png",
94            FileType::LidarReflect => "lidar.jpg",
95            FileType::RadarPcd => "radar.pcd",
96            FileType::RadarCube => "radar.png",
97            FileType::All => "",
98        }
99    }
100}
101
102impl TryFrom<&str> for FileType {
103    type Error = crate::Error;
104
105    fn try_from(s: &str) -> Result<Self, Self::Error> {
106        match s {
107            "image" => Ok(FileType::Image),
108            "lidar.pcd" => Ok(FileType::LidarPcd),
109            // Accept CLI names (lidar.png), server names (lidar.depth), and aliases
110            "lidar.png" | "lidar.depth" | "depth.png" | "depthmap" => Ok(FileType::LidarDepth),
111            "lidar.jpg" | "lidar.jpeg" | "lidar.reflect" => Ok(FileType::LidarReflect),
112            "radar.pcd" | "pcd" => Ok(FileType::RadarPcd),
113            "radar.png" | "cube" => Ok(FileType::RadarCube),
114            "all" => Ok(FileType::All),
115            _ => Err(crate::Error::InvalidFileType(s.to_string())),
116        }
117    }
118}
119
120impl std::str::FromStr for FileType {
121    type Err = crate::Error;
122
123    fn from_str(s: &str) -> Result<Self, Self::Err> {
124        s.try_into()
125    }
126}
127
128impl FileType {
129    /// Returns all concrete sensor file types (excludes `All`).
130    ///
131    /// This is useful for expanding the `All` variant or listing available
132    /// types.
133    ///
134    /// # Example
135    ///
136    /// ```rust
137    /// use edgefirst_client::FileType;
138    ///
139    /// let all_types = FileType::all_sensor_types();
140    /// assert!(all_types.contains(&FileType::Image));
141    /// assert!(!all_types.contains(&FileType::All));
142    /// ```
143    pub fn all_sensor_types() -> Vec<FileType> {
144        vec![
145            FileType::Image,
146            FileType::LidarPcd,
147            FileType::LidarDepth,
148            FileType::LidarReflect,
149            FileType::RadarPcd,
150            FileType::RadarCube,
151        ]
152    }
153
154    /// Returns all valid type names as strings for help text.
155    ///
156    /// # Example
157    ///
158    /// ```rust
159    /// use edgefirst_client::FileType;
160    ///
161    /// let names = FileType::type_names();
162    /// assert!(names.contains(&"image"));
163    /// assert!(names.contains(&"all"));
164    /// ```
165    pub fn type_names() -> Vec<&'static str> {
166        vec![
167            "image",
168            "lidar.pcd",
169            "lidar.png",
170            "lidar.jpg",
171            "radar.pcd",
172            "radar.png",
173            "all",
174        ]
175    }
176
177    /// Expands a list of file types, replacing `All` with all concrete sensor
178    /// types.
179    ///
180    /// If the input contains `FileType::All`, returns all sensor types.
181    /// Otherwise, returns the input types unchanged.
182    ///
183    /// # Example
184    ///
185    /// ```rust
186    /// use edgefirst_client::FileType;
187    ///
188    /// let types = vec![FileType::All];
189    /// let expanded = FileType::expand_types(&types);
190    /// assert_eq!(expanded.len(), 6); // All concrete sensor types
191    ///
192    /// let types = vec![FileType::Image, FileType::LidarPcd];
193    /// let expanded = FileType::expand_types(&types);
194    /// assert_eq!(expanded.len(), 2); // Unchanged
195    /// ```
196    pub fn expand_types(types: &[FileType]) -> Vec<FileType> {
197        if types.contains(&FileType::All) {
198            FileType::all_sensor_types()
199        } else {
200            types.to_vec()
201        }
202    }
203}
204
205/// Annotation types supported for labeling data in EdgeFirst Studio.
206///
207/// Represents the different types of annotations that can be applied to
208/// sensor data for machine learning tasks. Each type corresponds to a
209/// different annotation geometry and use case.
210///
211/// # Examples
212///
213/// ```rust
214/// use edgefirst_client::AnnotationType;
215///
216/// // Create annotation types from strings (using TryFrom)
217/// let box_2d: AnnotationType = "box2d".try_into().unwrap();
218/// let segmentation: AnnotationType = "polygon".try_into().unwrap();
219///
220/// // Or use From with String
221/// let box_2d = AnnotationType::from("box2d".to_string());
222/// let segmentation = AnnotationType::from("polygon".to_string());
223///
224/// // Display annotation types
225/// println!("Annotation type: {}", box_2d); // "Annotation type: box2d"
226///
227/// // Use in matching and processing
228/// let annotation_type = AnnotationType::Box2d;
229/// match annotation_type {
230///     AnnotationType::Box2d => println!("Processing 2D bounding boxes"),
231///     AnnotationType::Box3d => println!("Processing 3D bounding boxes"),
232///     AnnotationType::Polygon => println!("Processing polygon contours"),
233///     AnnotationType::Mask => println!("Processing raster pixel masks"),
234/// }
235/// ```
236#[derive(Clone, Eq, PartialEq, Debug)]
237pub enum AnnotationType {
238    /// 2D bounding boxes for object detection in images
239    Box2d,
240    /// 3D bounding boxes for object detection in 3D space (LiDAR, etc.)
241    Box3d,
242    /// Vector polygon contours for instance segmentation
243    Polygon,
244    /// Raster pixel masks for semantic/instance segmentation
245    Mask,
246}
247
248impl TryFrom<&str> for AnnotationType {
249    type Error = crate::Error;
250
251    fn try_from(s: &str) -> Result<Self, Self::Error> {
252        match s {
253            "box2d" => Ok(AnnotationType::Box2d),
254            "box3d" => Ok(AnnotationType::Box3d),
255            "polygon" => Ok(AnnotationType::Polygon),
256            "seg" => Ok(AnnotationType::Polygon),
257            "mask" => Ok(AnnotationType::Polygon), // backward compat
258            "raster" => Ok(AnnotationType::Mask),
259            _ => Err(crate::Error::InvalidAnnotationType(s.to_string())),
260        }
261    }
262}
263
264impl From<String> for AnnotationType {
265    fn from(s: String) -> Self {
266        // For backward compatibility, default to Box2d if invalid
267        s.as_str().try_into().unwrap_or(AnnotationType::Box2d)
268    }
269}
270
271impl From<&String> for AnnotationType {
272    fn from(s: &String) -> Self {
273        // For backward compatibility, default to Box2d if invalid
274        s.as_str().try_into().unwrap_or(AnnotationType::Box2d)
275    }
276}
277
278impl AnnotationType {
279    /// Returns the server API type name for this annotation type.
280    ///
281    /// The server uses different naming conventions than the client:
282    /// - `Box2d` → `"box"` (server) vs `"box2d"` (client display)
283    /// - `Box3d` → `"box3d"` (same)
284    /// - `Polygon` → `"seg"` (server) vs `"polygon"` (client display)
285    /// - `Mask` → `"seg"` (server) vs `"mask"` (client display)
286    pub fn as_server_type(&self) -> &'static str {
287        match self {
288            AnnotationType::Box2d => "box",
289            AnnotationType::Box3d => "box3d",
290            AnnotationType::Polygon => "seg",
291            AnnotationType::Mask => "seg",
292        }
293    }
294}
295
296impl std::fmt::Display for AnnotationType {
297    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
298        let value = match self {
299            AnnotationType::Box2d => "box2d",
300            AnnotationType::Box3d => "box3d",
301            AnnotationType::Polygon => "polygon",
302            AnnotationType::Mask => "mask",
303        };
304        write!(f, "{}", value)
305    }
306}
307
308/// A dataset in EdgeFirst Studio containing sensor data and annotations.
309///
310/// Datasets are collections of multi-modal sensor data (images, LiDAR, radar)
311/// along with their corresponding annotations (bounding boxes, segmentation
312/// masks, 3D annotations). Datasets belong to projects and can be used for
313/// training and validation of machine learning models.
314///
315/// # Features
316///
317/// - **Multi-modal Data**: Support for images, LiDAR point clouds, radar data
318/// - **Rich Annotations**: 2D/3D bounding boxes, segmentation masks
319/// - **Metadata**: Timestamps, sensor configurations, calibration data
320/// - **Version Control**: Track changes and maintain data lineage
321/// - **Format Conversion**: Export to popular ML frameworks
322///
323/// # Examples
324///
325/// ```no_run
326/// use edgefirst_client::{Client, Dataset, DatasetID};
327/// use std::str::FromStr;
328///
329/// # async fn example() -> Result<(), edgefirst_client::Error> {
330/// # let client = Client::new()?;
331/// // Get dataset information
332/// let dataset_id = DatasetID::from_str("ds-abc123")?;
333/// let dataset = client.dataset(dataset_id).await?;
334/// println!("Dataset: {}", dataset.name());
335///
336/// // Access dataset metadata
337/// println!("Dataset ID: {}", dataset.id());
338/// println!("Description: {}", dataset.description());
339/// println!("Created: {}", dataset.created());
340///
341/// // Work with dataset data would require additional methods
342/// // that are implemented in the full API
343/// # Ok(())
344/// # }
345/// ```
346#[derive(Deserialize, Clone, Debug)]
347pub struct Dataset {
348    id: DatasetID,
349    project_id: ProjectID,
350    name: String,
351    description: String,
352    cloud_key: String,
353    #[serde(rename = "createdAt")]
354    created: DateTime<Utc>,
355}
356
357impl Display for Dataset {
358    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
359        write!(f, "{} {}", self.id, self.name)
360    }
361}
362
363impl Dataset {
364    pub fn id(&self) -> DatasetID {
365        self.id
366    }
367
368    pub fn project_id(&self) -> ProjectID {
369        self.project_id
370    }
371
372    pub fn name(&self) -> &str {
373        &self.name
374    }
375
376    pub fn description(&self) -> &str {
377        &self.description
378    }
379
380    pub fn cloud_key(&self) -> &str {
381        &self.cloud_key
382    }
383
384    pub fn created(&self) -> &DateTime<Utc> {
385        &self.created
386    }
387
388    pub async fn project(&self, client: &Client) -> Result<crate::api::Project, Error> {
389        client.project(self.project_id).await
390    }
391
392    pub async fn annotation_sets(&self, client: &Client) -> Result<Vec<AnnotationSet>, Error> {
393        client.annotation_sets(self.id).await
394    }
395
396    pub async fn labels(&self, client: &Client) -> Result<Vec<Label>, Error> {
397        client.labels(self.id).await
398    }
399
400    pub async fn add_label(&self, client: &Client, name: &str) -> Result<(), Error> {
401        client.add_label(self.id, name).await
402    }
403
404    pub async fn add_label_with_index(
405        &self,
406        client: &Client,
407        name: &str,
408        index: u64,
409    ) -> Result<(), Error> {
410        client.add_label_with_index(self.id, name, index).await
411    }
412
413    pub async fn remove_label(&self, client: &Client, name: &str) -> Result<(), Error> {
414        let labels = self.labels(client).await?;
415        let label = labels
416            .iter()
417            .find(|l| l.name() == name)
418            .ok_or_else(|| Error::MissingLabel(name.to_string()))?;
419        client.remove_label(label.id()).await
420    }
421}
422
423/// The AnnotationSet class represents a collection of annotations in a dataset.
424/// A dataset can have multiple annotation sets, each containing annotations for
425/// different tasks or purposes.
426#[derive(Deserialize)]
427pub struct AnnotationSet {
428    id: AnnotationSetID,
429    dataset_id: DatasetID,
430    name: String,
431    description: String,
432    #[serde(rename = "date")]
433    created: DateTime<Utc>,
434}
435
436impl Display for AnnotationSet {
437    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
438        write!(f, "{} {}", self.id, self.name)
439    }
440}
441
442impl AnnotationSet {
443    pub fn id(&self) -> AnnotationSetID {
444        self.id
445    }
446
447    pub fn dataset_id(&self) -> DatasetID {
448        self.dataset_id
449    }
450
451    pub fn name(&self) -> &str {
452        &self.name
453    }
454
455    pub fn description(&self) -> &str {
456        &self.description
457    }
458
459    pub fn created(&self) -> DateTime<Utc> {
460        self.created
461    }
462
463    pub async fn dataset(&self, client: &Client) -> Result<Dataset, Error> {
464        client.dataset(self.dataset_id).await
465    }
466}
467
468/// Pipeline timing measurements for a sample, in nanoseconds.
469///
470/// Each field records the wall-clock duration of one pipeline stage.
471/// Populated from Arrow metadata; not part of the Studio JSON-RPC API.
472#[derive(Clone, Debug, Default, PartialEq)]
473pub struct Timing {
474    /// Duration of the data-loading stage (nanoseconds).
475    pub load: Option<i64>,
476    /// Duration of the preprocessing stage (nanoseconds).
477    pub preprocess: Option<i64>,
478    /// Duration of the inference stage (nanoseconds).
479    pub inference: Option<i64>,
480    /// Duration of the decoding / postprocessing stage (nanoseconds).
481    pub decode: Option<i64>,
482}
483
484/// A sample in a dataset, typically representing a single image with metadata
485/// and optional sensor data.
486///
487/// Each sample has a unique ID, image reference, and can include additional
488/// sensor data like LiDAR, radar, or depth maps. Samples can also have
489/// associated annotations.
490#[derive(Serialize, Clone, Debug)]
491pub struct Sample {
492    #[serde(skip_serializing_if = "Option::is_none")]
493    pub id: Option<SampleID>,
494    /// Dataset split (train, val, test) - stored in Arrow metadata, not used
495    /// for directory structure.
496    /// API field name discrepancy: samples.populate2 expects "group", but
497    /// samples.list returns "group_name".
498    #[serde(
499        alias = "group_name",
500        rename(serialize = "group", deserialize = "group_name"),
501        skip_serializing_if = "Option::is_none"
502    )]
503    pub group: Option<String>,
504    #[serde(skip_serializing_if = "Option::is_none")]
505    pub sequence_name: Option<String>,
506    #[serde(skip_serializing_if = "Option::is_none")]
507    pub sequence_uuid: Option<String>,
508    #[serde(skip_serializing_if = "Option::is_none")]
509    pub sequence_description: Option<String>,
510    #[serde(
511        default,
512        skip_serializing_if = "Option::is_none",
513        deserialize_with = "deserialize_frame_number"
514    )]
515    pub frame_number: Option<u32>,
516    #[serde(skip_serializing_if = "Option::is_none")]
517    pub uuid: Option<String>,
518    #[serde(skip_serializing_if = "Option::is_none")]
519    pub image_name: Option<String>,
520    #[serde(skip_serializing_if = "Option::is_none")]
521    pub image_url: Option<String>,
522    #[serde(skip_serializing_if = "Option::is_none")]
523    pub width: Option<u32>,
524    #[serde(skip_serializing_if = "Option::is_none")]
525    pub height: Option<u32>,
526    #[serde(skip_serializing_if = "Option::is_none")]
527    pub date: Option<DateTime<Utc>>,
528    #[serde(skip_serializing_if = "Option::is_none")]
529    pub source: Option<String>,
530    /// Camera location and pose (GPS + IMU data).
531    /// Location data is extracted from the "sensors" field during
532    /// deserialization. When uploading samples, this field is serialized
533    /// as "sensors" to match the samples.populate2 API format.
534    #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "sensors"))]
535    pub location: Option<Location>,
536    /// Image degradation type (blur, occlusion, weather, etc.).
537    #[serde(skip_serializing_if = "Option::is_none")]
538    pub degradation: Option<String>,
539    /// LVIS: label_index values for categories verified absent from this image.
540    #[serde(default, skip_serializing_if = "Option::is_none")]
541    pub neg_label_indices: Option<Vec<u32>>,
542    /// LVIS: label_index values for categories with incomplete annotation.
543    #[serde(default, skip_serializing_if = "Option::is_none")]
544    pub not_exhaustive_label_indices: Option<Vec<u32>>,
545    /// Additional sensor files (LiDAR, radar, depth maps, etc.).
546    /// Deserialization is handled by custom Deserialize impl which extracts
547    /// files from the "sensors" field. Serialization converts to HashMap for
548    /// samples.populate2 API.
549    #[serde(
550        default,
551        skip_serializing_if = "Vec::is_empty",
552        serialize_with = "serialize_files"
553    )]
554    pub files: Vec<SampleFile>,
555    /// Annotations associated with this sample.
556    /// Deserialization is handled by custom Deserialize impl.
557    #[serde(
558        default,
559        skip_serializing_if = "Vec::is_empty",
560        serialize_with = "serialize_annotations"
561    )]
562    pub annotations: Vec<Annotation>,
563    /// Pipeline timing measurements (populated from Arrow, not from Studio
564    /// JSON-RPC).
565    #[serde(skip)]
566    pub timing: Option<Timing>,
567}
568
569// Custom deserializer for frame_number - converts -1 to None
570// Server returns -1 for non-sequence samples, but clients should see None
571fn deserialize_frame_number<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
572where
573    D: serde::Deserializer<'de>,
574{
575    use serde::Deserialize;
576
577    let value = Option::<i32>::deserialize(deserializer)?;
578    Ok(value.and_then(|v| if v < 0 { None } else { Some(v as u32) }))
579}
580
581/// Check if a string is a valid downloadable URL (http/https).
582/// Used to distinguish between pre-signed URLs and inline base64/JSON data.
583fn is_valid_url(s: &str) -> bool {
584    s.starts_with("http://") || s.starts_with("https://")
585}
586
587// Custom serializer for files field - converts Vec<SampleFile> to
588// HashMap<String, String>
589fn serialize_files<S>(files: &[SampleFile], serializer: S) -> Result<S::Ok, S::Error>
590where
591    S: serde::Serializer,
592{
593    use serde::Serialize;
594    let map: HashMap<String, String> = files
595        .iter()
596        .filter_map(|f| {
597            f.filename()
598                .map(|filename| (f.file_type().to_string(), filename.to_string()))
599        })
600        .collect();
601    map.serialize(serializer)
602}
603
604// Custom serializer for annotations field - serializes to a flat
605// Vec<Annotation> to match the updated samples.populate2 contract (annotations
606// array)
607fn serialize_annotations<S>(annotations: &Vec<Annotation>, serializer: S) -> Result<S::Ok, S::Error>
608where
609    S: serde::Serializer,
610{
611    serde::Serialize::serialize(annotations, serializer)
612}
613
614// Custom deserializer for annotations field - converts server format back to
615// Vec<Annotation>
616fn deserialize_annotations<'de, D>(deserializer: D) -> Result<Vec<Annotation>, D::Error>
617where
618    D: serde::Deserializer<'de>,
619{
620    use serde::Deserialize;
621
622    #[derive(Deserialize)]
623    #[serde(untagged)]
624    enum AnnotationsFormat {
625        Vec(Vec<Annotation>),
626        Map(HashMap<String, Vec<Annotation>>),
627    }
628
629    let value = Option::<AnnotationsFormat>::deserialize(deserializer)?;
630    Ok(value
631        .map(|v| match v {
632            AnnotationsFormat::Vec(annotations) => annotations,
633            AnnotationsFormat::Map(map) => convert_annotations_map_to_vec(map),
634        })
635        .unwrap_or_default())
636}
637
638/// Intermediate struct for deserializing sensors data that may contain both
639/// file references (URLs/data) and location data (GPS/IMU).
640#[derive(Debug, Default)]
641struct SensorsData {
642    files: Vec<SampleFile>,
643    location: Option<Location>,
644}
645
646/// Deserialize sensors field into both files and location data.
647fn deserialize_sensors_data(value: Option<serde_json::Value>) -> SensorsData {
648    use serde_json::Value;
649
650    /// Create a SampleFile from a string value, distinguishing URL vs inline
651    /// data.
652    fn create_sample_file(file_type: String, value: String) -> SampleFile {
653        if is_valid_url(&value) {
654            SampleFile::with_url(file_type, value)
655        } else {
656            SampleFile::with_data(file_type, value)
657        }
658    }
659
660    /// Create a SampleFile from any JSON value, converting non-strings to JSON.
661    fn create_sample_file_from_value(file_type: String, value: Value) -> Option<SampleFile> {
662        match value {
663            Value::String(s) => Some(create_sample_file(file_type, s)),
664            Value::Object(_) | Value::Array(_) => {
665                // Inline JSON data (legacy format) - serialize to string
666                serde_json::to_string(&value)
667                    .ok()
668                    .map(|data| SampleFile::with_data(file_type, data))
669            }
670            _ => None,
671        }
672    }
673
674    /// Try to extract Location from a JSON object containing gps/imu keys.
675    fn extract_location(map: &serde_json::Map<String, Value>) -> Option<Location> {
676        let gps = map
677            .get("gps")
678            .and_then(|v| serde_json::from_value::<GpsData>(v.clone()).ok());
679        let imu = map
680            .get("imu")
681            .and_then(|v| serde_json::from_value::<ImuData>(v.clone()).ok());
682
683        if gps.is_some() || imu.is_some() {
684            Some(Location { gps, imu })
685        } else {
686            None
687        }
688    }
689
690    let mut result = SensorsData::default();
691
692    match value {
693        None => result,
694        Some(Value::Array(arr)) => {
695            // Array of single-key objects: [{"radar.png": "url"}, {"gps": {...}}, ...]
696            for item in arr {
697                if let Value::Object(map) = item {
698                    // Check if this looks like a SampleFile object (has "type" key)
699                    if map.contains_key("type") {
700                        // Try to parse as SampleFile
701                        if let Ok(file) =
702                            serde_json::from_value::<SampleFile>(Value::Object(map.clone()))
703                        {
704                            result.files.push(file);
705                        }
706                    } else {
707                        // Check for location data (gps/imu)
708                        if let Some(loc) = extract_location(&map) {
709                            // Merge with existing location
710                            if let Some(ref mut existing) = result.location {
711                                if loc.gps.is_some() {
712                                    existing.gps = loc.gps;
713                                }
714                                if loc.imu.is_some() {
715                                    existing.imu = loc.imu;
716                                }
717                            } else {
718                                result.location = Some(loc);
719                            }
720                        } else {
721                            // Single-key object: {file_type: url_or_data}
722                            for (file_type, value) in map {
723                                if let Some(file) = create_sample_file_from_value(file_type, value)
724                                {
725                                    result.files.push(file);
726                                }
727                            }
728                        }
729                    }
730                }
731            }
732            result
733        }
734        Some(Value::Object(map)) => {
735            // Check if this contains location data (gps or imu keys with object values)
736            if let Some(loc) = extract_location(&map) {
737                result.location = Some(loc);
738            }
739
740            // Also extract any file references (non-location keys)
741            for (key, value) in map {
742                if key != "gps"
743                    && key != "imu"
744                    && let Some(file) = create_sample_file_from_value(key, value)
745                {
746                    result.files.push(file);
747                }
748            }
749            result
750        }
751        Some(_) => result,
752    }
753}
754
755/// Raw sample structure for deserialization.
756/// This mirrors Sample but deserializes sensors into a combined struct
757/// that captures both files and location data.
758#[derive(Deserialize)]
759struct SampleRaw {
760    #[serde(default)]
761    id: Option<SampleID>,
762    #[serde(alias = "group_name")]
763    group: Option<String>,
764    sequence_name: Option<String>,
765    sequence_uuid: Option<String>,
766    sequence_description: Option<String>,
767    #[serde(default, deserialize_with = "deserialize_frame_number")]
768    frame_number: Option<u32>,
769    uuid: Option<String>,
770    image_name: Option<String>,
771    image_url: Option<String>,
772    width: Option<u32>,
773    height: Option<u32>,
774    date: Option<DateTime<Utc>>,
775    source: Option<String>,
776    degradation: Option<String>,
777    #[serde(default)]
778    neg_label_indices: Option<Vec<u32>>,
779    #[serde(default)]
780    not_exhaustive_label_indices: Option<Vec<u32>>,
781    /// Raw sensors JSON - will be processed into files + location
782    #[serde(default, alias = "sensors")]
783    sensors: Option<serde_json::Value>,
784    #[serde(default, deserialize_with = "deserialize_annotations")]
785    annotations: Vec<Annotation>,
786}
787
788impl From<SampleRaw> for Sample {
789    fn from(raw: SampleRaw) -> Self {
790        let sensors_data = deserialize_sensors_data(raw.sensors);
791
792        Sample {
793            id: raw.id,
794            group: raw.group,
795            sequence_name: raw.sequence_name,
796            sequence_uuid: raw.sequence_uuid,
797            sequence_description: raw.sequence_description,
798            frame_number: raw.frame_number,
799            uuid: raw.uuid,
800            image_name: raw.image_name,
801            image_url: raw.image_url,
802            width: raw.width,
803            height: raw.height,
804            date: raw.date,
805            source: raw.source,
806            location: sensors_data.location,
807            degradation: raw.degradation,
808            neg_label_indices: raw.neg_label_indices,
809            not_exhaustive_label_indices: raw.not_exhaustive_label_indices,
810            files: sensors_data.files,
811            annotations: raw.annotations,
812            timing: None,
813        }
814    }
815}
816
817impl<'de> serde::Deserialize<'de> for Sample {
818    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
819    where
820        D: serde::Deserializer<'de>,
821    {
822        let raw = SampleRaw::deserialize(deserializer)?;
823        Ok(Sample::from(raw))
824    }
825}
826
827impl Display for Sample {
828    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
829        write!(
830            f,
831            "{} {}",
832            self.id
833                .map(|id| id.to_string())
834                .unwrap_or_else(|| "unknown".to_string()),
835            self.image_name().unwrap_or("unknown")
836        )
837    }
838}
839
840impl Default for Sample {
841    fn default() -> Self {
842        Self::new()
843    }
844}
845
846impl Sample {
847    /// Creates a new empty sample.
848    pub fn new() -> Self {
849        Self {
850            id: None,
851            group: None,
852            sequence_name: None,
853            sequence_uuid: None,
854            sequence_description: None,
855            frame_number: None,
856            uuid: None,
857            image_name: None,
858            image_url: None,
859            width: None,
860            height: None,
861            date: None,
862            source: None,
863            location: None,
864            degradation: None,
865            neg_label_indices: None,
866            not_exhaustive_label_indices: None,
867            files: vec![],
868            annotations: vec![],
869            timing: None,
870        }
871    }
872
873    pub fn id(&self) -> Option<SampleID> {
874        self.id
875    }
876
877    pub fn name(&self) -> Option<String> {
878        self.image_name.as_ref().map(|n| extract_sample_name(n))
879    }
880
881    pub fn group(&self) -> Option<&String> {
882        self.group.as_ref()
883    }
884
885    pub fn sequence_name(&self) -> Option<&String> {
886        self.sequence_name.as_ref()
887    }
888
889    pub fn sequence_uuid(&self) -> Option<&String> {
890        self.sequence_uuid.as_ref()
891    }
892
893    pub fn sequence_description(&self) -> Option<&String> {
894        self.sequence_description.as_ref()
895    }
896
897    pub fn frame_number(&self) -> Option<u32> {
898        self.frame_number
899    }
900
901    pub fn uuid(&self) -> Option<&String> {
902        self.uuid.as_ref()
903    }
904
905    pub fn image_name(&self) -> Option<&str> {
906        self.image_name.as_deref()
907    }
908
909    pub fn image_url(&self) -> Option<&str> {
910        self.image_url.as_deref()
911    }
912
913    pub fn width(&self) -> Option<u32> {
914        self.width
915    }
916
917    pub fn height(&self) -> Option<u32> {
918        self.height
919    }
920
921    pub fn date(&self) -> Option<DateTime<Utc>> {
922        self.date
923    }
924
925    pub fn source(&self) -> Option<&String> {
926        self.source.as_ref()
927    }
928
929    pub fn location(&self) -> Option<&Location> {
930        self.location.as_ref()
931    }
932
933    pub fn files(&self) -> &[SampleFile] {
934        &self.files
935    }
936
937    pub fn annotations(&self) -> &[Annotation] {
938        &self.annotations
939    }
940
941    pub fn with_annotations(mut self, annotations: Vec<Annotation>) -> Self {
942        self.annotations = annotations;
943        self
944    }
945
946    pub fn with_frame_number(mut self, frame_number: Option<u32>) -> Self {
947        self.frame_number = frame_number;
948        self
949    }
950
951    /// Downloads a file of the specified type for this sample.
952    ///
953    /// Supports both newer datasets (pre-signed URLs) and legacy datasets
954    /// (inline base64-encoded data):
955    /// 1. First tries to download from URL if available
956    /// 2. Falls back to decoding inline base64 data for legacy datasets
957    pub async fn download(
958        &self,
959        client: &Client,
960        file_type: FileType,
961    ) -> Result<Option<Vec<u8>>, Error> {
962        use base64::{Engine, engine::general_purpose::STANDARD};
963
964        // Handle image type separately (uses image_url field)
965        if file_type == FileType::Image {
966            if let Some(url) = self.image_url.as_deref()
967                && is_valid_url(url)
968            {
969                return Ok(Some(client.download(url).await?));
970            }
971            return Ok(None);
972        }
973
974        // Find the matching file for this type
975        let file = resolve_file(&file_type, &self.files);
976
977        match file {
978            Some(f) => {
979                // Prefer URL (newer datasets)
980                if let Some(url) = f.url() {
981                    return Ok(Some(client.download(url).await?));
982                }
983
984                // Fall back to inline data (legacy datasets)
985                if let Some(data) = f.data() {
986                    // Legacy data can be in several formats:
987                    // 1. Base64-encoded JSON: "eyJyYWRhci5wY2QiOi..." -> {"radar.pcd": "content"}
988                    // 2. Direct JSON wrapper: {"radar.pcd": "content"}
989                    // 3. Raw content (PCD text, etc.)
990
991                    // Try base64 decode first
992                    let decoded = if let Ok(bytes) = STANDARD.decode(data) {
993                        // Check if decoded bytes are UTF-8 JSON
994                        if let Ok(text) = String::from_utf8(bytes.clone()) {
995                            if text.starts_with('{') {
996                                // It's JSON - use the text for further processing
997                                text
998                            } else {
999                                // Non-JSON binary data - return as-is
1000                                return Ok(Some(bytes));
1001                            }
1002                        } else {
1003                            // Binary data - return as-is
1004                            return Ok(Some(bytes));
1005                        }
1006                    } else {
1007                        // Not base64 - use original data
1008                        data.to_string()
1009                    };
1010
1011                    // Try to unwrap JSON wrapper: {"type_name": "content"}
1012                    let content = if decoded.starts_with('{') {
1013                        if let Ok(json) = serde_json::from_str::<serde_json::Value>(&decoded) {
1014                            if let Some(obj) = json.as_object() {
1015                                obj.values()
1016                                    .next()
1017                                    .and_then(|v| v.as_str())
1018                                    .map(|s| s.to_string())
1019                                    .unwrap_or(decoded)
1020                            } else {
1021                                decoded
1022                            }
1023                        } else {
1024                            decoded
1025                        }
1026                    } else {
1027                        decoded
1028                    };
1029
1030                    return Ok(Some(content.as_bytes().to_vec()));
1031                }
1032
1033                Ok(None)
1034            }
1035            None => Ok(None),
1036        }
1037    }
1038}
1039
1040/// A file associated with a sample (e.g., LiDAR point cloud, radar data).
1041///
1042/// For samples retrieved from the server, this contains the file type and URL.
1043/// For samples being populated to the server, this can be a type and filename.
1044///
1045/// Legacy datasets may have inline base64-encoded data instead of URLs.
1046/// The `data` field stores this inline content for fallback when no URL exists.
1047#[derive(Serialize, Deserialize, Clone, Debug)]
1048pub struct SampleFile {
1049    r#type: String,
1050    #[serde(skip_serializing_if = "Option::is_none")]
1051    url: Option<String>,
1052    #[serde(skip_serializing_if = "Option::is_none")]
1053    filename: Option<String>,
1054    /// Inline base64-encoded data for legacy datasets without pre-signed URLs.
1055    #[serde(skip_serializing_if = "Option::is_none", skip_deserializing)]
1056    data: Option<String>,
1057    /// Raw bytes for direct upload (e.g., from ZIP archives).
1058    /// This field is not serialized - it's only used during the upload process.
1059    #[serde(skip)]
1060    bytes: Option<Vec<u8>>,
1061}
1062
1063impl SampleFile {
1064    /// Creates a new sample file with type and URL (for newer datasets).
1065    pub fn with_url(file_type: String, url: String) -> Self {
1066        Self {
1067            r#type: file_type,
1068            url: Some(url),
1069            filename: None,
1070            data: None,
1071            bytes: None,
1072        }
1073    }
1074
1075    /// Creates a new sample file with type and filename (for populate API).
1076    pub fn with_filename(file_type: String, filename: String) -> Self {
1077        Self {
1078            r#type: file_type,
1079            url: None,
1080            filename: Some(filename),
1081            data: None,
1082            bytes: None,
1083        }
1084    }
1085
1086    /// Creates a new sample file with inline data (for legacy datasets).
1087    pub fn with_data(file_type: String, data: String) -> Self {
1088        Self {
1089            r#type: file_type,
1090            url: None,
1091            filename: None,
1092            data: Some(data),
1093            bytes: None,
1094        }
1095    }
1096
1097    /// Creates a new sample file with raw bytes for direct upload.
1098    ///
1099    /// This is useful for uploading files from ZIP archives without extracting
1100    /// to disk first. The bytes are uploaded directly to the presigned URL.
1101    ///
1102    /// # Arguments
1103    /// * `file_type` - The type of file (e.g., "image", "lidar.pcd")
1104    /// * `filename` - The filename to use for the upload
1105    /// * `bytes` - The raw file bytes
1106    pub fn with_bytes(file_type: String, filename: String, bytes: Vec<u8>) -> Self {
1107        Self {
1108            r#type: file_type,
1109            url: None,
1110            filename: Some(filename),
1111            data: None,
1112            bytes: Some(bytes),
1113        }
1114    }
1115
1116    pub fn file_type(&self) -> &str {
1117        &self.r#type
1118    }
1119
1120    pub fn url(&self) -> Option<&str> {
1121        self.url.as_deref()
1122    }
1123
1124    pub fn filename(&self) -> Option<&str> {
1125        self.filename.as_deref()
1126    }
1127
1128    /// Returns inline base64-encoded data (for legacy datasets).
1129    pub fn data(&self) -> Option<&str> {
1130        self.data.as_deref()
1131    }
1132
1133    /// Returns raw bytes for direct upload (from ZIP archives, etc.).
1134    pub fn bytes(&self) -> Option<&[u8]> {
1135        self.bytes.as_deref()
1136    }
1137}
1138
1139/// Location and pose information for a sample.
1140///
1141/// Contains GPS coordinates and IMU orientation data describing where and how
1142/// the camera was positioned when capturing the sample.
1143#[derive(Serialize, Deserialize, Clone, Debug)]
1144pub struct Location {
1145    #[serde(skip_serializing_if = "Option::is_none")]
1146    pub gps: Option<GpsData>,
1147    #[serde(skip_serializing_if = "Option::is_none")]
1148    pub imu: Option<ImuData>,
1149}
1150
1151/// GPS location data (latitude and longitude).
1152#[derive(Serialize, Deserialize, Clone, Debug)]
1153pub struct GpsData {
1154    pub lat: f64,
1155    pub lon: f64,
1156}
1157
1158impl GpsData {
1159    /// Validate GPS coordinates are within valid ranges.
1160    ///
1161    /// Checks if latitude and longitude values are within valid geographic
1162    /// ranges. Helps catch data corruption or API issues early.
1163    ///
1164    /// # Returns
1165    /// `Ok(())` if valid, `Err(String)` with descriptive error message
1166    /// otherwise
1167    ///
1168    /// # Valid Ranges
1169    /// - Latitude: -90.0 to +90.0 degrees
1170    /// - Longitude: -180.0 to +180.0 degrees
1171    ///
1172    /// # Examples
1173    /// ```
1174    /// use edgefirst_client::GpsData;
1175    ///
1176    /// let gps = GpsData {
1177    ///     lat: 37.7749,
1178    ///     lon: -122.4194,
1179    /// };
1180    /// assert!(gps.validate().is_ok());
1181    ///
1182    /// let bad_gps = GpsData {
1183    ///     lat: 100.0,
1184    ///     lon: 0.0,
1185    /// };
1186    /// assert!(bad_gps.validate().is_err());
1187    /// ```
1188    pub fn validate(&self) -> Result<(), String> {
1189        validate_gps_coordinates(self.lat, self.lon)
1190    }
1191}
1192
1193/// IMU orientation data (roll, pitch, yaw in degrees).
1194#[derive(Serialize, Deserialize, Clone, Debug)]
1195pub struct ImuData {
1196    pub roll: f64,
1197    pub pitch: f64,
1198    pub yaw: f64,
1199}
1200
1201impl ImuData {
1202    /// Validate IMU orientation angles are within valid ranges.
1203    ///
1204    /// Checks if roll, pitch, and yaw values are finite and within reasonable
1205    /// ranges. Helps catch data corruption or sensor errors early.
1206    ///
1207    /// # Returns
1208    /// `Ok(())` if valid, `Err(String)` with descriptive error message
1209    /// otherwise
1210    ///
1211    /// # Valid Ranges
1212    /// - Roll: -180.0 to +180.0 degrees
1213    /// - Pitch: -90.0 to +90.0 degrees (typical gimbal lock range)
1214    /// - Yaw: -180.0 to +180.0 degrees (or 0 to 360, normalized)
1215    ///
1216    /// # Examples
1217    /// ```
1218    /// use edgefirst_client::ImuData;
1219    ///
1220    /// let imu = ImuData {
1221    ///     roll: 10.0,
1222    ///     pitch: 5.0,
1223    ///     yaw: 90.0,
1224    /// };
1225    /// assert!(imu.validate().is_ok());
1226    ///
1227    /// let bad_imu = ImuData {
1228    ///     roll: 200.0,
1229    ///     pitch: 0.0,
1230    ///     yaw: 0.0,
1231    /// };
1232    /// assert!(bad_imu.validate().is_err());
1233    /// ```
1234    pub fn validate(&self) -> Result<(), String> {
1235        validate_imu_orientation(self.roll, self.pitch, self.yaw)
1236    }
1237}
1238
1239#[allow(dead_code)]
1240pub trait TypeName {
1241    fn type_name() -> String;
1242}
1243
1244#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
1245pub struct Box3d {
1246    x: f32,
1247    y: f32,
1248    z: f32,
1249    w: f32,
1250    h: f32,
1251    l: f32,
1252}
1253
1254impl TypeName for Box3d {
1255    fn type_name() -> String {
1256        "box3d".to_owned()
1257    }
1258}
1259
1260impl Box3d {
1261    pub fn new(cx: f32, cy: f32, cz: f32, width: f32, height: f32, length: f32) -> Self {
1262        Self {
1263            x: cx,
1264            y: cy,
1265            z: cz,
1266            w: width,
1267            h: height,
1268            l: length,
1269        }
1270    }
1271
1272    pub fn width(&self) -> f32 {
1273        self.w
1274    }
1275
1276    pub fn height(&self) -> f32 {
1277        self.h
1278    }
1279
1280    pub fn length(&self) -> f32 {
1281        self.l
1282    }
1283
1284    pub fn cx(&self) -> f32 {
1285        self.x
1286    }
1287
1288    pub fn cy(&self) -> f32 {
1289        self.y
1290    }
1291
1292    pub fn cz(&self) -> f32 {
1293        self.z
1294    }
1295
1296    pub fn left(&self) -> f32 {
1297        self.x - self.w / 2.0
1298    }
1299
1300    pub fn top(&self) -> f32 {
1301        self.y - self.h / 2.0
1302    }
1303
1304    pub fn front(&self) -> f32 {
1305        self.z - self.l / 2.0
1306    }
1307}
1308
1309#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
1310pub struct Box2d {
1311    h: f32,
1312    w: f32,
1313    x: f32,
1314    y: f32,
1315}
1316
1317impl TypeName for Box2d {
1318    fn type_name() -> String {
1319        "box2d".to_owned()
1320    }
1321}
1322
1323impl Box2d {
1324    pub fn new(left: f32, top: f32, width: f32, height: f32) -> Self {
1325        Self {
1326            x: left,
1327            y: top,
1328            w: width,
1329            h: height,
1330        }
1331    }
1332
1333    pub fn width(&self) -> f32 {
1334        self.w
1335    }
1336
1337    pub fn height(&self) -> f32 {
1338        self.h
1339    }
1340
1341    pub fn left(&self) -> f32 {
1342        self.x
1343    }
1344
1345    pub fn top(&self) -> f32 {
1346        self.y
1347    }
1348
1349    pub fn cx(&self) -> f32 {
1350        self.x + self.w / 2.0
1351    }
1352
1353    pub fn cy(&self) -> f32 {
1354        self.y + self.h / 2.0
1355    }
1356}
1357
1358#[derive(Clone, Debug, PartialEq)]
1359pub struct Polygon {
1360    pub rings: Vec<Vec<(f32, f32)>>,
1361}
1362
1363impl TypeName for Polygon {
1364    fn type_name() -> String {
1365        "polygon".to_owned()
1366    }
1367}
1368
1369impl Polygon {
1370    pub fn new(rings: Vec<Vec<(f32, f32)>>) -> Self {
1371        Self { rings }
1372    }
1373}
1374
1375impl serde::Serialize for Polygon {
1376    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1377    where
1378        S: serde::Serializer,
1379    {
1380        serde::Serialize::serialize(&self.rings, serializer)
1381    }
1382}
1383
1384impl<'de> serde::Deserialize<'de> for Polygon {
1385    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1386    where
1387        D: serde::Deserializer<'de>,
1388    {
1389        // First, deserialize to a raw JSON value to handle various formats
1390        let value = serde_json::Value::deserialize(deserializer)?;
1391
1392        // Try to extract polygon data from various formats
1393        let polygon_value = if let Some(obj) = value.as_object() {
1394            // Format: {"polygon": [...]} or {"rings": [...]}
1395            obj.get("rings")
1396                .or_else(|| obj.get("polygon"))
1397                .cloned()
1398                .unwrap_or(serde_json::Value::Null)
1399        } else {
1400            // Format: [[...]] (direct array)
1401            value
1402        };
1403
1404        // Parse the polygon array, filtering out null/invalid values
1405        let rings = parse_polygon_value(&polygon_value);
1406
1407        Ok(Self { rings })
1408    }
1409}
1410
1411/// Parse polygon value from JSON, handling malformed data gracefully.
1412///
1413/// Handles multiple formats:
1414/// - `[[[x,y],[x,y],...]]` - 3D array with point pairs (correct format)
1415/// - `[[x,y,x,y,...]]` - 2D array with flat coords (COCO format, legacy)
1416/// - `[[null,null,...]]` - corrupted data (returns empty)
1417/// - `null` - missing data (returns empty)
1418fn parse_polygon_value(value: &serde_json::Value) -> Vec<Vec<(f32, f32)>> {
1419    let Some(outer_array) = value.as_array() else {
1420        return vec![];
1421    };
1422
1423    let mut result = Vec::new();
1424
1425    for ring in outer_array {
1426        let Some(ring_array) = ring.as_array() else {
1427            continue;
1428        };
1429
1430        // Check if this is a 3D array (point pairs) or 2D array (flat coords)
1431        let is_3d = ring_array
1432            .first()
1433            .map(|first| first.is_array())
1434            .unwrap_or(false);
1435
1436        let points: Vec<(f32, f32)> = if is_3d {
1437            // 3D format: [[x1,y1], [x2,y2], ...]
1438            ring_array
1439                .iter()
1440                .filter_map(|point| {
1441                    let arr = point.as_array()?;
1442                    if arr.len() >= 2 {
1443                        let x = arr[0].as_f64()? as f32;
1444                        let y = arr[1].as_f64()? as f32;
1445                        if x.is_finite() && y.is_finite() {
1446                            Some((x, y))
1447                        } else {
1448                            None
1449                        }
1450                    } else {
1451                        None
1452                    }
1453                })
1454                .collect()
1455        } else {
1456            // 2D format (flat): [x1, y1, x2, y2, ...]
1457            ring_array
1458                .chunks(2)
1459                .filter_map(|chunk| {
1460                    if chunk.len() >= 2 {
1461                        let x = chunk[0].as_f64()? as f32;
1462                        let y = chunk[1].as_f64()? as f32;
1463                        if x.is_finite() && y.is_finite() {
1464                            Some((x, y))
1465                        } else {
1466                            None
1467                        }
1468                    } else {
1469                        None
1470                    }
1471                })
1472                .collect()
1473        };
1474
1475        // Only add rings with at least 3 valid points
1476        if points.len() >= 3 {
1477            result.push(points);
1478        }
1479    }
1480
1481    result
1482}
1483
1484/// Helper struct for deserializing annotations from the server.
1485///
1486/// The server sends bounding box coordinates as flat fields (x, y, w, h) at the
1487/// annotation level, but we want to store them as a nested Box2d struct.
1488#[derive(Deserialize)]
1489struct AnnotationRaw {
1490    #[serde(default)]
1491    sample_id: Option<SampleID>,
1492    #[serde(default)]
1493    name: Option<String>,
1494    #[serde(default)]
1495    sequence_name: Option<String>,
1496    #[serde(default)]
1497    frame_number: Option<u32>,
1498    #[serde(rename = "group_name", default)]
1499    group: Option<String>,
1500    #[serde(rename = "object_reference", alias = "object_id", default)]
1501    object_id: Option<String>,
1502    #[serde(default)]
1503    label_name: Option<String>,
1504    #[serde(default)]
1505    label_index: Option<u64>,
1506    #[serde(default)]
1507    iscrowd: Option<bool>,
1508    #[serde(default)]
1509    category_frequency: Option<String>,
1510    // Nested box2d format (if server sends it this way)
1511    #[serde(default)]
1512    box2d: Option<Box2d>,
1513    #[serde(default)]
1514    box3d: Option<Box3d>,
1515    #[serde(default, alias = "mask")]
1516    polygon: Option<Polygon>,
1517    // Flat box2d fields from server (x, y, w, h at annotation level)
1518    #[serde(default)]
1519    x: Option<f64>,
1520    #[serde(default)]
1521    y: Option<f64>,
1522    #[serde(default)]
1523    w: Option<f64>,
1524    #[serde(default)]
1525    h: Option<f64>,
1526}
1527
1528#[derive(Serialize, Clone, Debug)]
1529pub struct Annotation {
1530    #[serde(skip_serializing_if = "Option::is_none")]
1531    sample_id: Option<SampleID>,
1532    #[serde(skip_serializing_if = "Option::is_none")]
1533    name: Option<String>,
1534    #[serde(skip_serializing_if = "Option::is_none")]
1535    sequence_name: Option<String>,
1536    #[serde(skip_serializing_if = "Option::is_none")]
1537    frame_number: Option<u32>,
1538    /// Dataset split (train, val, test) - matches `Sample.group`.
1539    /// JSON field name: "group_name" (Studio API uses this name for both upload
1540    /// and download).
1541    #[serde(rename = "group_name", skip_serializing_if = "Option::is_none")]
1542    group: Option<String>,
1543    /// Object tracking identifier across frames.
1544    /// JSON field name: "object_reference" for upload (populate), "object_id"
1545    /// for download (list).
1546    #[serde(
1547        rename = "object_reference",
1548        alias = "object_id",
1549        skip_serializing_if = "Option::is_none"
1550    )]
1551    object_id: Option<String>,
1552    #[serde(skip_serializing_if = "Option::is_none")]
1553    label_name: Option<String>,
1554    #[serde(skip_serializing_if = "Option::is_none")]
1555    label_index: Option<u64>,
1556    /// COCO crowd flag: true = crowd region, false = single instance.
1557    #[serde(default, skip_serializing_if = "Option::is_none")]
1558    iscrowd: Option<bool>,
1559    /// LVIS frequency group: "f" (frequent), "c" (common), "r" (rare).
1560    #[serde(default, skip_serializing_if = "Option::is_none")]
1561    category_frequency: Option<String>,
1562    #[serde(skip_serializing_if = "Option::is_none")]
1563    box2d: Option<Box2d>,
1564    #[serde(skip_serializing_if = "Option::is_none")]
1565    box3d: Option<Box3d>,
1566    /// Polygon vertices for instance segmentation.
1567    ///
1568    /// Wire name is `mask` for historical reasons: the Rust field was
1569    /// renamed from `mask: Mask` to `polygon: Polygon` after the
1570    /// `samples.populate2` contract was already locked in, and the server
1571    /// still expects the key to be `mask`. Uploads that emit `polygon`
1572    /// here get silently dropped. Deserialisation accepts both names
1573    /// because `AnnotationRaw` carries `alias = "mask"`.
1574    #[serde(rename(serialize = "mask"), skip_serializing_if = "Option::is_none")]
1575    polygon: Option<Polygon>,
1576    /// PNG-encoded raster mask (populated from Arrow, not from Studio JSON-RPC).
1577    #[serde(skip)]
1578    mask: Option<MaskData>,
1579    /// Detection confidence score for box2d (0..1).
1580    #[serde(skip_serializing_if = "Option::is_none")]
1581    box2d_score: Option<f32>,
1582    /// Detection confidence score for box3d (0..1).
1583    #[serde(skip_serializing_if = "Option::is_none")]
1584    box3d_score: Option<f32>,
1585    /// Confidence score for polygon (0..1).
1586    #[serde(skip_serializing_if = "Option::is_none")]
1587    polygon_score: Option<f32>,
1588    /// Confidence score for mask (0..1).
1589    #[serde(skip_serializing_if = "Option::is_none")]
1590    mask_score: Option<f32>,
1591}
1592
1593impl<'de> serde::Deserialize<'de> for Annotation {
1594    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1595    where
1596        D: serde::Deserializer<'de>,
1597    {
1598        // Deserialize to AnnotationRaw first to handle server format differences
1599        let raw: AnnotationRaw = serde::Deserialize::deserialize(deserializer)?;
1600
1601        // Prefer nested box2d if present, otherwise construct from flat x/y/w/h
1602        let box2d = raw.box2d.or_else(|| match (raw.x, raw.y, raw.w, raw.h) {
1603            (Some(x), Some(y), Some(w), Some(h)) if w > 0.0 && h > 0.0 => {
1604                Some(Box2d::new(x as f32, y as f32, w as f32, h as f32))
1605            }
1606            _ => None,
1607        });
1608
1609        Ok(Annotation {
1610            sample_id: raw.sample_id,
1611            name: raw.name,
1612            sequence_name: raw.sequence_name,
1613            frame_number: raw.frame_number,
1614            group: raw.group,
1615            object_id: raw.object_id,
1616            label_name: raw.label_name,
1617            label_index: raw.label_index,
1618            iscrowd: raw.iscrowd,
1619            category_frequency: raw.category_frequency,
1620            box2d,
1621            box3d: raw.box3d,
1622            polygon: raw.polygon,
1623            mask: None,
1624            box2d_score: None,
1625            box3d_score: None,
1626            polygon_score: None,
1627            mask_score: None,
1628        })
1629    }
1630}
1631
1632impl Default for Annotation {
1633    fn default() -> Self {
1634        Self::new()
1635    }
1636}
1637
1638impl Annotation {
1639    pub fn new() -> Self {
1640        Self {
1641            sample_id: None,
1642            name: None,
1643            sequence_name: None,
1644            frame_number: None,
1645            group: None,
1646            object_id: None,
1647            label_name: None,
1648            label_index: None,
1649            iscrowd: None,
1650            category_frequency: None,
1651            box2d: None,
1652            box3d: None,
1653            polygon: None,
1654            mask: None,
1655            box2d_score: None,
1656            box3d_score: None,
1657            polygon_score: None,
1658            mask_score: None,
1659        }
1660    }
1661
1662    pub fn set_sample_id(&mut self, sample_id: Option<SampleID>) {
1663        self.sample_id = sample_id;
1664    }
1665
1666    pub fn sample_id(&self) -> Option<SampleID> {
1667        self.sample_id
1668    }
1669
1670    pub fn set_name(&mut self, name: Option<String>) {
1671        self.name = name;
1672    }
1673
1674    pub fn name(&self) -> Option<&String> {
1675        self.name.as_ref()
1676    }
1677
1678    pub fn set_sequence_name(&mut self, sequence_name: Option<String>) {
1679        self.sequence_name = sequence_name;
1680    }
1681
1682    pub fn sequence_name(&self) -> Option<&String> {
1683        self.sequence_name.as_ref()
1684    }
1685
1686    pub fn set_frame_number(&mut self, frame_number: Option<u32>) {
1687        self.frame_number = frame_number;
1688    }
1689
1690    pub fn frame_number(&self) -> Option<u32> {
1691        self.frame_number
1692    }
1693
1694    pub fn set_group(&mut self, group: Option<String>) {
1695        self.group = group;
1696    }
1697
1698    pub fn group(&self) -> Option<&String> {
1699        self.group.as_ref()
1700    }
1701
1702    pub fn object_id(&self) -> Option<&String> {
1703        self.object_id.as_ref()
1704    }
1705
1706    pub fn set_object_id(&mut self, object_id: Option<String>) {
1707        self.object_id = object_id;
1708    }
1709
1710    pub fn label(&self) -> Option<&String> {
1711        self.label_name.as_ref()
1712    }
1713
1714    pub fn set_label(&mut self, label_name: Option<String>) {
1715        self.label_name = label_name;
1716    }
1717
1718    pub fn label_index(&self) -> Option<u64> {
1719        self.label_index
1720    }
1721
1722    pub fn set_label_index(&mut self, label_index: Option<u64>) {
1723        self.label_index = label_index;
1724    }
1725
1726    pub fn iscrowd(&self) -> Option<bool> {
1727        self.iscrowd
1728    }
1729
1730    pub fn set_iscrowd(&mut self, iscrowd: Option<bool>) {
1731        self.iscrowd = iscrowd;
1732    }
1733
1734    pub fn category_frequency(&self) -> Option<&String> {
1735        self.category_frequency.as_ref()
1736    }
1737
1738    pub fn set_category_frequency(&mut self, category_frequency: Option<String>) {
1739        self.category_frequency = category_frequency;
1740    }
1741
1742    pub fn box2d(&self) -> Option<&Box2d> {
1743        self.box2d.as_ref()
1744    }
1745
1746    pub fn set_box2d(&mut self, box2d: Option<Box2d>) {
1747        self.box2d = box2d;
1748    }
1749
1750    pub fn box3d(&self) -> Option<&Box3d> {
1751        self.box3d.as_ref()
1752    }
1753
1754    pub fn set_box3d(&mut self, box3d: Option<Box3d>) {
1755        self.box3d = box3d;
1756    }
1757
1758    pub fn polygon(&self) -> Option<&Polygon> {
1759        self.polygon.as_ref()
1760    }
1761
1762    pub fn set_polygon(&mut self, polygon: Option<Polygon>) {
1763        self.polygon = polygon;
1764    }
1765
1766    pub fn mask(&self) -> Option<&MaskData> {
1767        self.mask.as_ref()
1768    }
1769
1770    pub fn set_mask(&mut self, mask: Option<MaskData>) {
1771        self.mask = mask;
1772    }
1773
1774    pub fn box2d_score(&self) -> Option<f32> {
1775        self.box2d_score
1776    }
1777
1778    pub fn set_box2d_score(&mut self, score: Option<f32>) {
1779        self.box2d_score = score;
1780    }
1781
1782    pub fn box3d_score(&self) -> Option<f32> {
1783        self.box3d_score
1784    }
1785
1786    pub fn set_box3d_score(&mut self, score: Option<f32>) {
1787        self.box3d_score = score;
1788    }
1789
1790    pub fn polygon_score(&self) -> Option<f32> {
1791        self.polygon_score
1792    }
1793
1794    pub fn set_polygon_score(&mut self, score: Option<f32>) {
1795        self.polygon_score = score;
1796    }
1797
1798    pub fn mask_score(&self) -> Option<f32> {
1799        self.mask_score
1800    }
1801
1802    pub fn set_mask_score(&mut self, score: Option<f32>) {
1803        self.mask_score = score;
1804    }
1805}
1806
1807#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
1808pub struct Label {
1809    id: u64,
1810    dataset_id: DatasetID,
1811    index: u64,
1812    name: String,
1813}
1814
1815impl Label {
1816    pub fn id(&self) -> u64 {
1817        self.id
1818    }
1819
1820    pub fn dataset_id(&self) -> DatasetID {
1821        self.dataset_id
1822    }
1823
1824    pub fn index(&self) -> u64 {
1825        self.index
1826    }
1827
1828    pub fn name(&self) -> &str {
1829        &self.name
1830    }
1831
1832    pub async fn remove(&self, client: &Client) -> Result<(), Error> {
1833        client.remove_label(self.id()).await
1834    }
1835
1836    pub async fn set_name(&mut self, client: &Client, name: &str) -> Result<(), Error> {
1837        self.name = name.to_string();
1838        client.update_label(self).await
1839    }
1840
1841    pub async fn set_index(&mut self, client: &Client, index: u64) -> Result<(), Error> {
1842        self.index = index;
1843        client.update_label(self).await
1844    }
1845}
1846
1847impl Display for Label {
1848    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1849        write!(f, "{}", self.name())
1850    }
1851}
1852
1853#[derive(Serialize, Clone, Debug)]
1854pub struct NewLabelObject {
1855    pub name: String,
1856}
1857
1858#[derive(Serialize, Clone, Debug)]
1859pub struct NewLabel {
1860    pub dataset_id: DatasetID,
1861    pub labels: Vec<NewLabelObject>,
1862}
1863
1864/// A dataset group for organizing samples into logical subsets.
1865///
1866/// Groups are used to partition samples within a dataset for different purposes
1867/// such as training, validation, and testing. Each sample can belong to at most
1868/// one group at a time.
1869///
1870/// # Common Group Names
1871///
1872/// - `"train"` - Training data for model fitting
1873/// - `"val"` - Validation data for hyperparameter tuning
1874/// - `"test"` - Test data for final evaluation
1875///
1876/// # Examples
1877///
1878/// ```rust,no_run
1879/// use edgefirst_client::{Client, DatasetID};
1880///
1881/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
1882/// let client = Client::new()?.with_token_path(None)?;
1883/// let dataset_id: DatasetID = "ds-123".try_into()?;
1884///
1885/// // List all groups in the dataset
1886/// let groups = client.groups(dataset_id).await?;
1887/// for group in groups {
1888///     println!("Group [{}]: {}", group.id, group.name);
1889/// }
1890/// # Ok(())
1891/// # }
1892/// ```
1893#[derive(Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
1894pub struct Group {
1895    /// The unique numeric identifier for this group.
1896    ///
1897    /// Group IDs are assigned by the server and are unique within an
1898    /// organization.
1899    pub id: u64,
1900
1901    /// The human-readable name of the group.
1902    ///
1903    /// Common names include "train", "val", "test", but any string is valid.
1904    pub name: String,
1905}
1906
1907#[cfg(feature = "polars")]
1908fn extract_annotation_name(ann: &Annotation) -> Option<(String, Option<u32>)> {
1909    use std::path::Path;
1910
1911    let name = ann.name.as_ref()?;
1912    let name = Path::new(name).file_stem()?.to_str()?;
1913
1914    // For sequences, return base name and frame number
1915    // For non-sequences, return name and None
1916    match &ann.sequence_name {
1917        Some(sequence) => Some((sequence.clone(), ann.frame_number)),
1918        None => Some((name.to_string(), None)),
1919    }
1920}
1921
1922/// Convert a polygon into a nested `List(List(Float32))` Series for the
1923/// 2026.04 schema. Each ring becomes an inner list of interleaved
1924/// `[x1, y1, x2, y2, ...]` floats.
1925#[cfg(feature = "polars")]
1926fn convert_polygon_to_nested_series(polygon: &Polygon) -> Series {
1927    let ring_series: Vec<Option<Series>> = polygon
1928        .rings
1929        .iter()
1930        .map(|ring| {
1931            let coords: Vec<f32> = ring.iter().flat_map(|&(x, y)| [x, y]).collect();
1932            Some(Series::new("".into(), coords))
1933        })
1934        .collect();
1935    Series::new("".into(), ring_series)
1936}
1937
1938/// Create a DataFrame from a slice of samples with the 2026.04 schema.
1939///
1940/// Each annotation in each sample becomes one row. Columns where every value
1941/// is null are automatically dropped, so the result only contains columns
1942/// that carry data. The `name` column is always present.
1943///
1944/// # Schema (2026.04)
1945///
1946/// - `name`: Sample name (String) - ALWAYS PRESENT
1947/// - `frame`: Frame number (UInt32)
1948/// - `object_id`: Object tracking ID (String)
1949/// - `label`: Object label (Categorical)
1950/// - `label_index`: Label index (UInt64)
1951/// - `group`: Dataset group (Categorical)
1952/// - `polygon`: Segmentation polygon rings (List<List<Float32>>)
1953/// - `box2d`: 2D bounding box [cx, cy, w, h] (Array<Float32, 4>)
1954/// - `box3d`: 3D bounding box [x, y, z, w, h, l] (Array<Float32, 6>)
1955/// - `mask`: PNG-encoded raster mask (Binary)
1956/// - `box2d_score`: Box2d confidence (Float32)
1957/// - `box3d_score`: Box3d confidence (Float32)
1958/// - `polygon_score`: Polygon confidence (Float32)
1959/// - `mask_score`: Mask confidence (Float32)
1960/// - `size`: Image size [width, height] (Array<UInt32, 2>)
1961/// - `location`: GPS [lat, lon] (Array<Float32, 2>)
1962/// - `pose`: IMU [yaw, pitch, roll] (Array<Float32, 3>)
1963/// - `degradation`: Image degradation (String)
1964/// - `iscrowd`: COCO crowd flag (Boolean)
1965/// - `category_frequency`: LVIS frequency group (Categorical)
1966/// - `neg_label_indices`: Verified-absent label indices (List<UInt32>)
1967/// - `not_exhaustive_label_indices`: Incomplete label indices (List<UInt32>)
1968/// - `timing`: Pipeline timing (Struct{load, preprocess, inference, decode} of Int64)
1969///
1970/// # Example
1971///
1972/// ```rust,no_run
1973/// use edgefirst_client::{Client, samples_dataframe};
1974///
1975/// # async fn example() -> Result<(), edgefirst_client::Error> {
1976/// # let client = Client::new()?;
1977/// # let dataset_id = 1.into();
1978/// # let annotation_set_id = 1.into();
1979/// let samples = client
1980///     .samples(dataset_id, Some(annotation_set_id), &[], &[], &[], None)
1981///     .await?;
1982/// let df = samples_dataframe(&samples)?;
1983/// println!("DataFrame shape: {:?}", df.shape());
1984/// # Ok(())
1985/// # }
1986/// ```
1987#[cfg(feature = "polars")]
1988pub fn samples_dataframe(samples: &[Sample]) -> Result<DataFrame, Error> {
1989    // Collect per-row vectors directly while iterating samples
1990    let mut names: Vec<String> = Vec::new();
1991    let mut frames: Vec<Option<u32>> = Vec::new();
1992    let mut objects: Vec<Option<String>> = Vec::new();
1993    let mut labels: Vec<Option<String>> = Vec::new();
1994    let mut label_indices: Vec<Option<u64>> = Vec::new();
1995    let mut groups: Vec<Option<String>> = Vec::new();
1996    let mut polygons: Vec<Option<Series>> = Vec::new();
1997    let mut boxes2d: Vec<Option<Series>> = Vec::new();
1998    let mut boxes3d: Vec<Option<Series>> = Vec::new();
1999    let mut mask_bytes: Vec<Option<Vec<u8>>> = Vec::new();
2000    let mut box2d_scores: Vec<Option<f32>> = Vec::new();
2001    let mut box3d_scores: Vec<Option<f32>> = Vec::new();
2002    let mut polygon_scores: Vec<Option<f32>> = Vec::new();
2003    let mut mask_scores: Vec<Option<f32>> = Vec::new();
2004    let mut sizes: Vec<Option<Vec<u32>>> = Vec::new();
2005    let mut locations: Vec<Option<Vec<f32>>> = Vec::new();
2006    let mut poses: Vec<Option<Vec<f32>>> = Vec::new();
2007    let mut degradations: Vec<Option<String>> = Vec::new();
2008    let mut iscrowds: Vec<Option<bool>> = Vec::new();
2009    let mut category_frequencies: Vec<Option<String>> = Vec::new();
2010    let mut neg_label_indices_vec: Vec<Option<Vec<u32>>> = Vec::new();
2011    let mut not_exhaustive_label_indices_vec: Vec<Option<Vec<u32>>> = Vec::new();
2012    let mut timing_load: Vec<Option<i64>> = Vec::new();
2013    let mut timing_preprocess: Vec<Option<i64>> = Vec::new();
2014    let mut timing_inference: Vec<Option<i64>> = Vec::new();
2015    let mut timing_decode: Vec<Option<i64>> = Vec::new();
2016
2017    for sample in samples {
2018        // Extract sample metadata once per sample
2019        let size = match (sample.width, sample.height) {
2020            (Some(w), Some(h)) => Some(vec![w, h]),
2021            _ => None,
2022        };
2023
2024        let location = sample.location.as_ref().and_then(|loc| {
2025            loc.gps
2026                .as_ref()
2027                .map(|gps| vec![gps.lat as f32, gps.lon as f32])
2028        });
2029
2030        let pose = sample.location.as_ref().and_then(|loc| {
2031            loc.imu
2032                .as_ref()
2033                .map(|imu| vec![imu.yaw as f32, imu.pitch as f32, imu.roll as f32])
2034        });
2035
2036        let degradation = sample.degradation.clone();
2037
2038        // Timing from the sample (same for all rows of this sample)
2039        let t_load = sample.timing.as_ref().and_then(|t| t.load);
2040        let t_preprocess = sample.timing.as_ref().and_then(|t| t.preprocess);
2041        let t_inference = sample.timing.as_ref().and_then(|t| t.inference);
2042        let t_decode = sample.timing.as_ref().and_then(|t| t.decode);
2043
2044        // Helper to push shared sample-level fields
2045        macro_rules! push_sample_fields {
2046            () => {
2047                sizes.push(size.clone());
2048                locations.push(location.clone());
2049                poses.push(pose.clone());
2050                degradations.push(degradation.clone());
2051                neg_label_indices_vec.push(sample.neg_label_indices.clone());
2052                not_exhaustive_label_indices_vec.push(sample.not_exhaustive_label_indices.clone());
2053                timing_load.push(t_load);
2054                timing_preprocess.push(t_preprocess);
2055                timing_inference.push(t_inference);
2056                timing_decode.push(t_decode);
2057            };
2058        }
2059
2060        if sample.annotations.is_empty() {
2061            // One row for the sample with null annotation fields
2062            let (name, frame) = match extract_annotation_name_from_sample(sample) {
2063                Some(nf) => nf,
2064                None => continue,
2065            };
2066
2067            names.push(name);
2068            frames.push(frame);
2069            objects.push(None);
2070            labels.push(None);
2071            label_indices.push(None);
2072            groups.push(sample.group.clone());
2073            polygons.push(None);
2074            boxes2d.push(None);
2075            boxes3d.push(None);
2076            mask_bytes.push(None);
2077            box2d_scores.push(None);
2078            box3d_scores.push(None);
2079            polygon_scores.push(None);
2080            mask_scores.push(None);
2081            iscrowds.push(None);
2082            category_frequencies.push(None);
2083            push_sample_fields!();
2084        } else {
2085            // One row per annotation
2086            for ann in &sample.annotations {
2087                let (name, frame) = match extract_annotation_name(ann) {
2088                    Some(nf) => nf,
2089                    None => continue,
2090                };
2091
2092                let polygon = ann.polygon.as_ref().map(convert_polygon_to_nested_series);
2093
2094                let box2d = ann
2095                    .box2d
2096                    .as_ref()
2097                    .map(|b| Series::new("box2d".into(), [b.cx(), b.cy(), b.width(), b.height()]));
2098
2099                let box3d = ann
2100                    .box3d
2101                    .as_ref()
2102                    .map(|b| Series::new("box3d".into(), [b.x, b.y, b.z, b.w, b.h, b.l]));
2103
2104                names.push(name);
2105                frames.push(frame);
2106                objects.push(ann.object_id().cloned());
2107                labels.push(ann.label_name.clone());
2108                label_indices.push(ann.label_index);
2109                groups.push(sample.group.clone());
2110                polygons.push(polygon);
2111                boxes2d.push(box2d);
2112                boxes3d.push(box3d);
2113                mask_bytes.push(ann.mask.as_ref().map(|m| m.as_bytes().to_vec()));
2114                box2d_scores.push(ann.box2d_score());
2115                box3d_scores.push(ann.box3d_score());
2116                polygon_scores.push(ann.polygon_score());
2117                mask_scores.push(ann.mask_score());
2118                iscrowds.push(ann.iscrowd);
2119                category_frequencies.push(ann.category_frequency.clone());
2120                push_sample_fields!();
2121            }
2122        }
2123    }
2124
2125    // Build DataFrame columns
2126    let names_col: Column = Series::new("name".into(), names).into();
2127    let frames_col: Column = Series::new("frame".into(), frames).into();
2128    let objects_col: Column = Series::new("object_id".into(), objects).into();
2129
2130    // Column name: "label" (NOT "label_name")
2131    //
2132    // Physical is U16 so taxonomies larger than 255 labels fit (LVIS v1 has
2133    // 1,203 categories). U16 caps at 65,535 — comfortably above any realistic
2134    // object-detection taxonomy — and only costs one extra byte per row vs U8.
2135    let labels_col: Column = Series::new("label".into(), labels)
2136        .cast(&DataType::Categorical(
2137            Categories::new("labels".into(), "labels".into(), CategoricalPhysical::U16),
2138            Arc::new(CategoricalMapping::with_hasher(
2139                u16::MAX as usize,
2140                Default::default(),
2141            )),
2142        ))?
2143        .into();
2144
2145    let label_indices_col: Column = Series::new("label_index".into(), label_indices).into();
2146
2147    // Column name: "group" (NOT "group_name")
2148    let groups_col: Column = Series::new("group".into(), groups)
2149        .cast(&DataType::Categorical(
2150            Categories::new("groups".into(), "groups".into(), CategoricalPhysical::U8),
2151            Arc::new(CategoricalMapping::with_hasher(
2152                u8::MAX as usize,
2153                Default::default(),
2154            )),
2155        ))?
2156        .into();
2157
2158    // Polygon: List(List(Float32)) — nested rings
2159    // Build using ListChunked to avoid Polars dtype mismatch when mixing Some/None entries.
2160    // Series::new() with Vec<Option<Series>> panics when Some entries are list[f32] but None
2161    // entries infer as list[null].
2162    let polygons_col: Column = if polygons.iter().all(|p| p.is_none()) {
2163        // All null — create a null column that the drop rule will remove
2164        Series::new_null("polygon".into(), polygons.len()).into()
2165    } else {
2166        // Build properly typed column: convert each Option<Series> to Option<Series>,
2167        // ensuring None entries don't cause dtype inference issues
2168        let typed_polygons: Vec<Option<Series>> = polygons
2169            .into_iter()
2170            .map(|opt| {
2171                opt.map(|s| {
2172                    s.cast(&DataType::List(Box::new(DataType::Float32)))
2173                        .unwrap_or(s)
2174                })
2175            })
2176            .collect();
2177        Series::new("polygon".into(), &typed_polygons)
2178            .cast(&DataType::List(Box::new(DataType::List(Box::new(
2179                DataType::Float32,
2180            )))))?
2181            .into()
2182    };
2183
2184    let boxes2d_col: Column = Series::new("box2d".into(), boxes2d)
2185        .cast(&DataType::Array(Box::new(DataType::Float32), 4))?
2186        .into();
2187    let boxes3d_col: Column = Series::new("box3d".into(), boxes3d)
2188        .cast(&DataType::Array(Box::new(DataType::Float32), 6))?
2189        .into();
2190
2191    // Mask: Binary (raw PNG bytes)
2192    let mask_col: Column = Series::new("mask".into(), mask_bytes).into();
2193
2194    // Score columns: Float32
2195    let box2d_score_col: Column = Series::new("box2d_score".into(), box2d_scores).into();
2196    let box3d_score_col: Column = Series::new("box3d_score".into(), box3d_scores).into();
2197    let polygon_score_col: Column = Series::new("polygon_score".into(), polygon_scores).into();
2198    let mask_score_col: Column = Series::new("mask_score".into(), mask_scores).into();
2199
2200    // Optional metadata columns (2025.10)
2201    let size_series: Vec<Option<Series>> = sizes
2202        .into_iter()
2203        .map(|opt_vec| opt_vec.map(|vec| Series::new("size".into(), vec)))
2204        .collect();
2205    let sizes_col: Column = Series::new("size".into(), size_series)
2206        .cast(&DataType::Array(Box::new(DataType::UInt32), 2))?
2207        .into();
2208
2209    let location_series: Vec<Option<Series>> = locations
2210        .into_iter()
2211        .map(|opt_vec| opt_vec.map(|vec| Series::new("location".into(), vec)))
2212        .collect();
2213    let locations_col: Column = Series::new("location".into(), location_series)
2214        .cast(&DataType::Array(Box::new(DataType::Float32), 2))?
2215        .into();
2216
2217    let pose_series: Vec<Option<Series>> = poses
2218        .into_iter()
2219        .map(|opt_vec| opt_vec.map(|vec| Series::new("pose".into(), vec)))
2220        .collect();
2221    let poses_col: Column = Series::new("pose".into(), pose_series)
2222        .cast(&DataType::Array(Box::new(DataType::Float32), 3))?
2223        .into();
2224
2225    let degradations_col: Column = Series::new("degradation".into(), degradations).into();
2226
2227    // LVIS extension columns
2228    let iscrowds_col: Column = Series::new("iscrowd".into(), iscrowds).into();
2229
2230    let category_frequencies_col: Column =
2231        Series::new("category_frequency".into(), category_frequencies)
2232            .cast(&DataType::Categorical(
2233                Categories::new(
2234                    "cat_freq".into(),
2235                    "cat_freq".into(),
2236                    CategoricalPhysical::U8,
2237                ),
2238                Arc::new(CategoricalMapping::with_hasher(
2239                    u8::MAX as usize,
2240                    Default::default(),
2241                )),
2242            ))?
2243            .into();
2244
2245    let neg_label_indices_series: Vec<Option<Series>> = neg_label_indices_vec
2246        .into_iter()
2247        .map(|opt_vec| opt_vec.map(|vec| Series::new("neg_label_indices".into(), vec)))
2248        .collect();
2249    let neg_label_indices_col: Column =
2250        Series::new("neg_label_indices".into(), neg_label_indices_series)
2251            .cast(&DataType::List(Box::new(DataType::UInt32)))?
2252            .into();
2253
2254    let not_exhaustive_label_indices_series: Vec<Option<Series>> = not_exhaustive_label_indices_vec
2255        .into_iter()
2256        .map(|opt_vec| opt_vec.map(|vec| Series::new("not_exhaustive_label_indices".into(), vec)))
2257        .collect();
2258    let not_exhaustive_label_indices_col: Column = Series::new(
2259        "not_exhaustive_label_indices".into(),
2260        not_exhaustive_label_indices_series,
2261    )
2262    .cast(&DataType::List(Box::new(DataType::UInt32)))?
2263    .into();
2264
2265    // Timing: Struct{load, preprocess, inference, decode} of Int64
2266    let timing_col: Column = StructChunked::from_series(
2267        "timing".into(),
2268        frames_col.len(),
2269        [
2270            Series::new("load".into(), &timing_load),
2271            Series::new("preprocess".into(), &timing_preprocess),
2272            Series::new("inference".into(), &timing_inference),
2273            Series::new("decode".into(), &timing_decode),
2274        ]
2275        .iter(),
2276    )?
2277    .into_series()
2278    .into();
2279
2280    // Collect all columns, then drop any where ALL values are null (except "name")
2281    let all_columns: Vec<Column> = vec![
2282        names_col,
2283        frames_col,
2284        objects_col,
2285        labels_col,
2286        label_indices_col,
2287        groups_col,
2288        polygons_col,
2289        boxes2d_col,
2290        boxes3d_col,
2291        mask_col,
2292        box2d_score_col,
2293        box3d_score_col,
2294        polygon_score_col,
2295        mask_score_col,
2296        sizes_col,
2297        locations_col,
2298        poses_col,
2299        degradations_col,
2300        iscrowds_col,
2301        category_frequencies_col,
2302        neg_label_indices_col,
2303        not_exhaustive_label_indices_col,
2304        timing_col,
2305    ];
2306
2307    let height = all_columns.first().map(|c| c.len()).unwrap_or(0);
2308
2309    let non_empty_columns: Vec<Column> = all_columns
2310        .into_iter()
2311        .filter(|col| col.name() == "name" || !is_all_null_column(col))
2312        .collect();
2313
2314    Ok(DataFrame::new(height, non_empty_columns)?)
2315}
2316
2317/// Returns `true` when every value in the column is null. For `Struct`
2318/// columns the check recurses into inner fields — the struct is considered
2319/// all-null when **all** of its fields are individually all-null.
2320#[cfg(feature = "polars")]
2321fn is_all_null_column(col: &Column) -> bool {
2322    if col.is_empty() {
2323        return true;
2324    }
2325    if col.null_count() == col.len() {
2326        return true;
2327    }
2328    // Struct columns may have non-null outer rows but all-null inner fields
2329    if let DataType::Struct(..) = col.dtype()
2330        && let Ok(s) = col.as_materialized_series().struct_()
2331    {
2332        return s
2333            .fields_as_series()
2334            .iter()
2335            .all(|field| field.null_count() == field.len());
2336    }
2337    false
2338}
2339
2340// Helper: Extract name/frame from Sample (for samples with no annotations)
2341#[cfg(feature = "polars")]
2342fn extract_annotation_name_from_sample(sample: &Sample) -> Option<(String, Option<u32>)> {
2343    use std::path::Path;
2344
2345    let name = sample.image_name.as_ref()?;
2346    let name = Path::new(name).file_stem()?.to_str()?;
2347
2348    // For sequences, return base name and frame number
2349    // For non-sequences, return name and None
2350    match &sample.sequence_name {
2351        Some(sequence) => Some((sequence.clone(), sample.frame_number)),
2352        None => Some((name.to_string(), None)),
2353    }
2354}
2355
2356// ============================================================================
2357// PURE FUNCTIONS FOR TESTABLE CORE LOGIC
2358// ============================================================================
2359
2360/// Extract sample name from image filename by:
2361/// 1. Removing file extension (everything after last dot)
2362/// 2. Removing .camera suffix if present
2363///
2364/// # Examples
2365/// - "scene_001.camera.jpg" → "scene_001"
2366/// - "image.jpg" → "image"
2367/// - ".jpg" → ".jpg" (preserves filenames starting with dot)
2368fn extract_sample_name(image_name: &str) -> String {
2369    // Step 1: Remove file extension (but preserve filenames starting with dot)
2370    let name = image_name
2371        .rsplit_once('.')
2372        .and_then(|(name, _)| {
2373            // Only remove extension if the name part is non-empty (handles ".jpg" case)
2374            if name.is_empty() {
2375                None
2376            } else {
2377                Some(name.to_string())
2378            }
2379        })
2380        .unwrap_or_else(|| image_name.to_string());
2381
2382    // Step 2: Remove .camera suffix if present
2383    name.rsplit_once(".camera")
2384        .and_then(|(name, _)| {
2385            // Only remove .camera if the name part is non-empty
2386            if name.is_empty() {
2387                None
2388            } else {
2389                Some(name.to_string())
2390            }
2391        })
2392        .unwrap_or_else(|| name.clone())
2393}
2394
2395/// Resolve a file for a given file type from sample data.
2396///
2397/// Returns the matching `SampleFile` if found, which may contain either
2398/// a URL (newer datasets) or inline data (legacy datasets).
2399///
2400/// # Arguments
2401/// * `file_type` - The type of file to resolve (e.g., LidarPcd, RadarPcd)
2402/// * `files` - The sample's file list
2403fn resolve_file<'a>(file_type: &FileType, files: &'a [SampleFile]) -> Option<&'a SampleFile> {
2404    match file_type {
2405        FileType::Image => None, // Image uses image_url field, not files
2406        FileType::All => None,   // All should be expanded before calling this
2407        file => {
2408            // Get all possible names for this file type (primary + aliases)
2409            let type_names = file_type_names(file);
2410            files
2411                .iter()
2412                .find(|f| type_names.contains(&f.r#type.as_str()))
2413        }
2414    }
2415}
2416
2417/// Returns all possible server-side names for a file type.
2418/// The server uses specific naming conventions in the STUDIO_DB_TYPE_MAP.
2419fn file_type_names(file_type: &FileType) -> Vec<&'static str> {
2420    match file_type {
2421        FileType::Image => vec!["image"],
2422        FileType::LidarPcd => vec!["lidar.pcd"],
2423        FileType::LidarDepth => vec!["lidar.depth", "depth.png", "depthmap"],
2424        FileType::LidarReflect => vec!["lidar.reflect"],
2425        FileType::RadarPcd => vec!["radar.pcd", "pcd"],
2426        FileType::RadarCube => vec!["radar.png", "cube"],
2427        FileType::All => vec![],
2428    }
2429}
2430
2431// ============================================================================
2432// DESERIALIZATION FORMAT CONVERSION HELPERS
2433// ============================================================================
2434
2435/// Convert annotations grouped format to flat Vec<Annotation>.
2436///
2437/// Pure function that handles the conversion from the server's legacy format
2438/// (HashMap<String, Vec<Annotation>>) to the flat Vec<Annotation>
2439/// representation.
2440///
2441/// # Arguments
2442/// * `map` - HashMap where keys are annotation types ("bbox", "box3d", "mask")
2443fn convert_annotations_map_to_vec(map: HashMap<String, Vec<Annotation>>) -> Vec<Annotation> {
2444    let mut all_annotations = Vec::new();
2445    if let Some(bbox_anns) = map.get("bbox") {
2446        all_annotations.extend(bbox_anns.clone());
2447    }
2448    if let Some(box3d_anns) = map.get("box3d") {
2449        all_annotations.extend(box3d_anns.clone());
2450    }
2451    if let Some(mask_anns) = map.get("mask") {
2452        all_annotations.extend(mask_anns.clone());
2453    }
2454    all_annotations
2455}
2456
2457// ============================================================================
2458// GPS/IMU VALIDATION HELPERS
2459// ============================================================================
2460
2461/// Validate GPS coordinates are within valid ranges.
2462///
2463/// Pure function that checks if latitude and longitude values are within valid
2464/// geographic ranges. Helps catch data corruption or API issues early.
2465///
2466/// # Arguments
2467/// * `lat` - Latitude in degrees
2468/// * `lon` - Longitude in degrees
2469///
2470/// # Returns
2471/// `Ok(())` if valid, `Err(String)` with descriptive error message otherwise
2472///
2473/// # Valid Ranges
2474/// - Latitude: -90.0 to +90.0 degrees
2475/// - Longitude: -180.0 to +180.0 degrees
2476fn validate_gps_coordinates(lat: f64, lon: f64) -> Result<(), String> {
2477    if !lat.is_finite() {
2478        return Err(format!("GPS latitude is not finite: {}", lat));
2479    }
2480    if !lon.is_finite() {
2481        return Err(format!("GPS longitude is not finite: {}", lon));
2482    }
2483    if !(-90.0..=90.0).contains(&lat) {
2484        return Err(format!("GPS latitude out of range [-90, 90]: {}", lat));
2485    }
2486    if !(-180.0..=180.0).contains(&lon) {
2487        return Err(format!("GPS longitude out of range [-180, 180]: {}", lon));
2488    }
2489    Ok(())
2490}
2491
2492/// Validate IMU orientation angles are within valid ranges.
2493///
2494/// Pure function that checks if roll, pitch, and yaw values are finite and
2495/// within reasonable ranges. Helps catch data corruption or sensor errors
2496/// early.
2497///
2498/// # Arguments
2499/// * `roll` - Roll angle in degrees
2500/// * `pitch` - Pitch angle in degrees
2501/// * `yaw` - Yaw angle in degrees
2502///
2503/// # Returns
2504/// `Ok(())` if valid, `Err(String)` with descriptive error message otherwise
2505///
2506/// # Valid Ranges
2507/// - Roll: -180.0 to +180.0 degrees
2508/// - Pitch: -90.0 to +90.0 degrees (typical gimbal lock range)
2509/// - Yaw: -180.0 to +180.0 degrees (or 0 to 360, normalized)
2510fn validate_imu_orientation(roll: f64, pitch: f64, yaw: f64) -> Result<(), String> {
2511    if !roll.is_finite() {
2512        return Err(format!("IMU roll is not finite: {}", roll));
2513    }
2514    if !pitch.is_finite() {
2515        return Err(format!("IMU pitch is not finite: {}", pitch));
2516    }
2517    if !yaw.is_finite() {
2518        return Err(format!("IMU yaw is not finite: {}", yaw));
2519    }
2520    if !(-180.0..=180.0).contains(&roll) {
2521        return Err(format!("IMU roll out of range [-180, 180]: {}", roll));
2522    }
2523    if !(-90.0..=90.0).contains(&pitch) {
2524        return Err(format!("IMU pitch out of range [-90, 90]: {}", pitch));
2525    }
2526    if !(-180.0..=180.0).contains(&yaw) {
2527        return Err(format!("IMU yaw out of range [-180, 180]: {}", yaw));
2528    }
2529    Ok(())
2530}
2531
2532// ============================================================================
2533// MASK POLYGON CONVERSION HELPERS
2534// ============================================================================
2535
2536/// Unflatten coordinates with NaN separators back to nested polygon
2537/// structure.
2538///
2539/// Converts flat list of coordinates with NaN separators back to nested
2540/// polygon structure:
2541/// - Input: [x1, y1, x2, y2, NaN, x3, y3]
2542/// - Output: [[(x1, y1), (x2, y2)], [(x3, y3)]]
2543///
2544/// This function is used when parsing Arrow files to reconstruct the nested
2545/// polygon format required by the EdgeFirst Studio API.
2546///
2547/// # Examples
2548///
2549/// ```rust
2550/// use edgefirst_client::unflatten_polygon_coordinates;
2551///
2552/// let coords = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0];
2553/// let polygons = unflatten_polygon_coordinates(&coords);
2554///
2555/// assert_eq!(polygons.len(), 2);
2556/// assert_eq!(polygons[0], vec![(1.0, 2.0), (3.0, 4.0)]);
2557/// assert_eq!(polygons[1], vec![(5.0, 6.0)]);
2558/// ```
2559#[cfg(feature = "polars")]
2560pub fn unflatten_polygon_coordinates(coords: &[f32]) -> Vec<Vec<(f32, f32)>> {
2561    let mut polygons = Vec::new();
2562    let mut current_polygon = Vec::new();
2563    let mut i = 0;
2564
2565    while i < coords.len() {
2566        if coords[i].is_nan() {
2567            // NaN separator - save current polygon and start new one
2568            if !current_polygon.is_empty() {
2569                polygons.push(std::mem::take(&mut current_polygon));
2570            }
2571            i += 1;
2572        } else if i + 1 < coords.len() && !coords[i + 1].is_nan() {
2573            // Have both x and y coordinates (neither is NaN)
2574            current_polygon.push((coords[i], coords[i + 1]));
2575            i += 2;
2576        } else if i + 1 < coords.len() && coords[i + 1].is_nan() {
2577            // x is valid but y is NaN - malformed data; skip x, process NaN on
2578            // next iteration
2579            i += 1;
2580        } else {
2581            // Odd trailing value - skip
2582            i += 1;
2583        }
2584    }
2585
2586    // Save the last polygon if not empty
2587    if !current_polygon.is_empty() {
2588        polygons.push(current_polygon);
2589    }
2590
2591    polygons
2592}
2593
2594#[cfg(test)]
2595mod tests {
2596    use super::*;
2597
2598    // ============================================================================
2599    // TEST HELPER FUNCTIONS (Pure Logic for Testing)
2600    // ============================================================================
2601
2602    /// Flatten legacy grouped annotation format to a single vector.
2603    ///
2604    /// Converts HashMap<String, Vec<Annotation>> (with bbox/box3d/mask keys)
2605    /// into a flat Vec<Annotation> in deterministic order.
2606    fn flatten_annotation_map(
2607        map: std::collections::HashMap<String, Vec<Annotation>>,
2608    ) -> Vec<Annotation> {
2609        let mut all_annotations = Vec::new();
2610
2611        // Process in fixed order for deterministic results
2612        for key in ["bbox", "box3d", "mask"] {
2613            if let Some(mut anns) = map.get(key).cloned() {
2614                all_annotations.append(&mut anns);
2615            }
2616        }
2617
2618        all_annotations
2619    }
2620
2621    /// Get the JSON field name for the Annotation group field (for tests).
2622    fn annotation_group_field_name() -> &'static str {
2623        "group_name"
2624    }
2625
2626    /// Get the JSON field name for the Annotation object_id field (for tests).
2627    fn annotation_object_id_field_name() -> &'static str {
2628        "object_reference"
2629    }
2630
2631    /// Get the accepted alias for the Annotation object_id field (for tests).
2632    fn annotation_object_id_alias() -> &'static str {
2633        "object_id"
2634    }
2635
2636    /// Validate that annotation field names match expected values in JSON (for
2637    /// tests).
2638    fn validate_annotation_field_names(
2639        json_str: &str,
2640        expected_group: bool,
2641        expected_object_ref: bool,
2642    ) -> Result<(), String> {
2643        if expected_group && !json_str.contains("\"group_name\"") {
2644            return Err("Missing expected field: group_name".to_string());
2645        }
2646        if expected_object_ref && !json_str.contains("\"object_reference\"") {
2647            return Err("Missing expected field: object_reference".to_string());
2648        }
2649        Ok(())
2650    }
2651
2652    // ==== FileType Conversion Tests ====
2653    #[test]
2654    fn test_file_type_conversions() {
2655        // to_string() returns server API type names
2656        let api_cases = vec![
2657            (FileType::Image, "image"),
2658            (FileType::LidarPcd, "lidar.pcd"),
2659            (FileType::LidarDepth, "lidar.depth"),
2660            (FileType::LidarReflect, "lidar.reflect"),
2661            (FileType::RadarPcd, "radar.pcd"),
2662            (FileType::RadarCube, "radar.png"),
2663        ];
2664
2665        // file_extension() returns file extensions for saving
2666        let ext_cases = vec![
2667            (FileType::Image, "jpg"),
2668            (FileType::LidarPcd, "lidar.pcd"),
2669            (FileType::LidarDepth, "lidar.png"),
2670            (FileType::LidarReflect, "lidar.jpg"),
2671            (FileType::RadarPcd, "radar.pcd"),
2672            (FileType::RadarCube, "radar.png"),
2673        ];
2674
2675        // Test: Display → to_string() returns server API names
2676        for (file_type, expected_str) in &api_cases {
2677            assert_eq!(file_type.to_string(), *expected_str);
2678        }
2679
2680        // Test: file_extension() returns correct extensions
2681        for (file_type, expected_ext) in &ext_cases {
2682            assert_eq!(file_type.file_extension(), *expected_ext);
2683        }
2684
2685        // Test: try_from() string parsing (accepts multiple aliases)
2686        assert_eq!(
2687            FileType::try_from("lidar.depth").unwrap(),
2688            FileType::LidarDepth
2689        );
2690        assert_eq!(
2691            FileType::try_from("lidar.png").unwrap(),
2692            FileType::LidarDepth
2693        );
2694        assert_eq!(
2695            FileType::try_from("depth.png").unwrap(),
2696            FileType::LidarDepth
2697        );
2698        assert_eq!(
2699            FileType::try_from("lidar.reflect").unwrap(),
2700            FileType::LidarReflect
2701        );
2702        assert_eq!(
2703            FileType::try_from("lidar.jpg").unwrap(),
2704            FileType::LidarReflect
2705        );
2706        assert_eq!(
2707            FileType::try_from("lidar.jpeg").unwrap(),
2708            FileType::LidarReflect
2709        );
2710
2711        // Test: Invalid input
2712        assert!(FileType::try_from("invalid").is_err());
2713
2714        // Test: Round-trip (Display → try_from)
2715        for (file_type, _) in &api_cases {
2716            let s = file_type.to_string();
2717            let parsed = FileType::try_from(s.as_str()).unwrap();
2718            assert_eq!(parsed, *file_type);
2719        }
2720    }
2721
2722    // ==== AnnotationType Conversion Tests ====
2723    #[test]
2724    fn test_annotation_type_conversions() {
2725        let cases = vec![
2726            (AnnotationType::Box2d, "box2d"),
2727            (AnnotationType::Box3d, "box3d"),
2728            (AnnotationType::Polygon, "polygon"),
2729            (AnnotationType::Mask, "mask"),
2730        ];
2731
2732        // Test: Display → to_string()
2733        for (ann_type, expected_str) in &cases {
2734            assert_eq!(ann_type.to_string(), *expected_str);
2735        }
2736
2737        // Test: try_from() string parsing
2738        assert_eq!(
2739            AnnotationType::try_from("box2d").unwrap(),
2740            AnnotationType::Box2d
2741        );
2742        assert_eq!(
2743            AnnotationType::try_from("box3d").unwrap(),
2744            AnnotationType::Box3d
2745        );
2746        assert_eq!(
2747            AnnotationType::try_from("polygon").unwrap(),
2748            AnnotationType::Polygon
2749        );
2750        // "mask" maps to Polygon for backward compat
2751        assert_eq!(
2752            AnnotationType::try_from("mask").unwrap(),
2753            AnnotationType::Polygon
2754        );
2755        // "raster" maps to Mask
2756        assert_eq!(
2757            AnnotationType::try_from("raster").unwrap(),
2758            AnnotationType::Mask
2759        );
2760
2761        // Test: From<String> (backward compatibility)
2762        assert_eq!(
2763            AnnotationType::from("box2d".to_string()),
2764            AnnotationType::Box2d
2765        );
2766        assert_eq!(
2767            AnnotationType::from("box3d".to_string()),
2768            AnnotationType::Box3d
2769        );
2770        assert_eq!(
2771            AnnotationType::from("polygon".to_string()),
2772            AnnotationType::Polygon
2773        );
2774        // "mask" string maps to Polygon for backward compat
2775        assert_eq!(
2776            AnnotationType::from("mask".to_string()),
2777            AnnotationType::Polygon
2778        );
2779
2780        // Invalid defaults to Box2d for backward compatibility
2781        assert_eq!(
2782            AnnotationType::from("invalid".to_string()),
2783            AnnotationType::Box2d
2784        );
2785
2786        // Test: Invalid input
2787        assert!(AnnotationType::try_from("invalid").is_err());
2788
2789        // Test: Round-trip (Display → try_from)
2790        // Note: Polygon round-trips ("polygon" → Polygon), but Mask does not
2791        // because "mask" → Polygon (backward compat). Mask displays as "mask"
2792        // but parses to Polygon.
2793        assert_eq!(
2794            AnnotationType::try_from(AnnotationType::Box2d.to_string().as_str()).unwrap(),
2795            AnnotationType::Box2d
2796        );
2797        assert_eq!(
2798            AnnotationType::try_from(AnnotationType::Box3d.to_string().as_str()).unwrap(),
2799            AnnotationType::Box3d
2800        );
2801        assert_eq!(
2802            AnnotationType::try_from(AnnotationType::Polygon.to_string().as_str()).unwrap(),
2803            AnnotationType::Polygon
2804        );
2805    }
2806
2807    // ==== Pure Function: extract_sample_name Tests ====
2808    #[test]
2809    fn test_extract_sample_name_with_extension_and_camera() {
2810        assert_eq!(extract_sample_name("scene_001.camera.jpg"), "scene_001");
2811    }
2812
2813    #[test]
2814    fn test_extract_sample_name_multiple_dots() {
2815        assert_eq!(extract_sample_name("image.v2.camera.png"), "image.v2");
2816    }
2817
2818    #[test]
2819    fn test_extract_sample_name_extension_only() {
2820        assert_eq!(extract_sample_name("test.jpg"), "test");
2821    }
2822
2823    #[test]
2824    fn test_extract_sample_name_no_extension() {
2825        assert_eq!(extract_sample_name("test"), "test");
2826    }
2827
2828    #[test]
2829    fn test_extract_sample_name_edge_case_dot_prefix() {
2830        assert_eq!(extract_sample_name(".jpg"), ".jpg");
2831    }
2832
2833    // ==== File Resolution Tests ====
2834    #[test]
2835    fn test_resolve_file_image_type_returns_none() {
2836        // Image type uses image_url field, not files array
2837        let files = vec![];
2838        let result = resolve_file(&FileType::Image, &files);
2839        assert!(result.is_none());
2840    }
2841
2842    #[test]
2843    fn test_resolve_file_lidar_pcd() {
2844        let files = vec![
2845            SampleFile::with_url(
2846                "lidar.pcd".to_string(),
2847                "https://example.com/file.pcd".to_string(),
2848            ),
2849            SampleFile::with_url(
2850                "radar.pcd".to_string(),
2851                "https://example.com/radar.pcd".to_string(),
2852            ),
2853        ];
2854        let result = resolve_file(&FileType::LidarPcd, &files);
2855        assert!(result.is_some());
2856        assert_eq!(result.unwrap().url(), Some("https://example.com/file.pcd"));
2857    }
2858
2859    #[test]
2860    fn test_resolve_file_not_found() {
2861        let files = vec![SampleFile::with_url(
2862            "lidar.pcd".to_string(),
2863            "https://example.com/file.pcd".to_string(),
2864        )];
2865        // Requesting radar.pcd which doesn't exist in files
2866        let result = resolve_file(&FileType::RadarPcd, &files);
2867        assert!(result.is_none());
2868    }
2869
2870    #[test]
2871    fn test_resolve_file_lidar_depth() {
2872        // Server returns "lidar.depth" for LiDAR depth data
2873        let files = vec![SampleFile::with_url(
2874            "lidar.depth".to_string(),
2875            "https://example.com/depth.png".to_string(),
2876        )];
2877        let result = resolve_file(&FileType::LidarDepth, &files);
2878        assert!(result.is_some());
2879        assert_eq!(result.unwrap().url(), Some("https://example.com/depth.png"));
2880    }
2881
2882    #[test]
2883    fn test_resolve_file_lidar_reflect() {
2884        // Server returns "lidar.reflect" for LiDAR reflectance data
2885        let files = vec![SampleFile::with_url(
2886            "lidar.reflect".to_string(),
2887            "https://example.com/reflect.png".to_string(),
2888        )];
2889        let result = resolve_file(&FileType::LidarReflect, &files);
2890        assert!(result.is_some());
2891        assert_eq!(
2892            result.unwrap().url(),
2893            Some("https://example.com/reflect.png")
2894        );
2895    }
2896
2897    #[test]
2898    fn test_resolve_file_radar_cube() {
2899        // Server returns "radar.png" or "cube" for radar cube data
2900        let files = vec![SampleFile::with_url(
2901            "radar.png".to_string(),
2902            "https://example.com/radar.png".to_string(),
2903        )];
2904        let result = resolve_file(&FileType::RadarCube, &files);
2905        assert!(result.is_some());
2906        assert_eq!(result.unwrap().url(), Some("https://example.com/radar.png"));
2907    }
2908
2909    #[test]
2910    fn test_resolve_file_with_inline_data() {
2911        // Legacy datasets may have inline data instead of URLs
2912        let files = vec![SampleFile::with_data(
2913            "radar.pcd".to_string(),
2914            "SGVsbG8gV29ybGQ=".to_string(), // base64 "Hello World"
2915        )];
2916        let result = resolve_file(&FileType::RadarPcd, &files);
2917        assert!(result.is_some());
2918        let file = result.unwrap();
2919        assert!(file.url().is_none());
2920        assert_eq!(file.data(), Some("SGVsbG8gV29ybGQ="));
2921    }
2922
2923    #[test]
2924    fn test_convert_annotations_map_to_vec_with_bbox() {
2925        let mut map = HashMap::new();
2926        let bbox_ann = Annotation::new();
2927        map.insert("bbox".to_string(), vec![bbox_ann.clone()]);
2928
2929        let annotations = convert_annotations_map_to_vec(map);
2930        assert_eq!(annotations.len(), 1);
2931    }
2932
2933    #[test]
2934    fn test_convert_annotations_map_to_vec_all_types() {
2935        let mut map = HashMap::new();
2936        map.insert("bbox".to_string(), vec![Annotation::new()]);
2937        map.insert("box3d".to_string(), vec![Annotation::new()]);
2938        map.insert("mask".to_string(), vec![Annotation::new()]);
2939
2940        let annotations = convert_annotations_map_to_vec(map);
2941        assert_eq!(annotations.len(), 3);
2942    }
2943
2944    #[test]
2945    fn test_convert_annotations_map_to_vec_empty() {
2946        let map = HashMap::new();
2947        let annotations = convert_annotations_map_to_vec(map);
2948        assert_eq!(annotations.len(), 0);
2949    }
2950
2951    #[test]
2952    fn test_convert_annotations_map_to_vec_unknown_type_ignored() {
2953        let mut map = HashMap::new();
2954        map.insert("unknown".to_string(), vec![Annotation::new()]);
2955
2956        let annotations = convert_annotations_map_to_vec(map);
2957        // Unknown types are ignored
2958        assert_eq!(annotations.len(), 0);
2959    }
2960
2961    // ==== Annotation Field Mapping Tests ====
2962    #[test]
2963    fn test_annotation_group_field_name() {
2964        assert_eq!(annotation_group_field_name(), "group_name");
2965    }
2966
2967    #[test]
2968    fn test_annotation_object_id_field_name() {
2969        assert_eq!(annotation_object_id_field_name(), "object_reference");
2970    }
2971
2972    #[test]
2973    fn test_annotation_object_id_alias() {
2974        assert_eq!(annotation_object_id_alias(), "object_id");
2975    }
2976
2977    #[test]
2978    fn test_validate_annotation_field_names_success() {
2979        let json = r#"{"group_name":"train","object_reference":"obj1"}"#;
2980        assert!(validate_annotation_field_names(json, true, true).is_ok());
2981    }
2982
2983    #[test]
2984    fn test_validate_annotation_field_names_missing_group() {
2985        let json = r#"{"object_reference":"obj1"}"#;
2986        let result = validate_annotation_field_names(json, true, false);
2987        assert!(result.is_err());
2988        assert!(result.unwrap_err().contains("group_name"));
2989    }
2990
2991    #[test]
2992    fn test_validate_annotation_field_names_missing_object_ref() {
2993        let json = r#"{"group_name":"train"}"#;
2994        let result = validate_annotation_field_names(json, false, true);
2995        assert!(result.is_err());
2996        assert!(result.unwrap_err().contains("object_reference"));
2997    }
2998
2999    #[test]
3000    fn test_annotation_serialization_field_names() {
3001        // Test that Annotation serializes with correct field names
3002        let mut ann = Annotation::new();
3003        ann.set_group(Some("train".to_string()));
3004        ann.set_object_id(Some("obj1".to_string()));
3005
3006        let json = serde_json::to_string(&ann).unwrap();
3007        // Verify JSON contains correct field names
3008        assert!(validate_annotation_field_names(&json, true, true).is_ok());
3009    }
3010
3011    // ==== GPS/IMU Validation Tests ====
3012    #[test]
3013    fn test_validate_gps_coordinates_valid() {
3014        assert!(validate_gps_coordinates(37.7749, -122.4194).is_ok()); // San Francisco
3015        assert!(validate_gps_coordinates(0.0, 0.0).is_ok()); // Null Island
3016        assert!(validate_gps_coordinates(90.0, 180.0).is_ok()); // Edge cases
3017        assert!(validate_gps_coordinates(-90.0, -180.0).is_ok()); // Edge cases
3018    }
3019
3020    #[test]
3021    fn test_validate_gps_coordinates_invalid_latitude() {
3022        let result = validate_gps_coordinates(91.0, 0.0);
3023        assert!(result.is_err());
3024        assert!(result.unwrap_err().contains("latitude out of range"));
3025
3026        let result = validate_gps_coordinates(-91.0, 0.0);
3027        assert!(result.is_err());
3028        assert!(result.unwrap_err().contains("latitude out of range"));
3029    }
3030
3031    #[test]
3032    fn test_validate_gps_coordinates_invalid_longitude() {
3033        let result = validate_gps_coordinates(0.0, 181.0);
3034        assert!(result.is_err());
3035        assert!(result.unwrap_err().contains("longitude out of range"));
3036
3037        let result = validate_gps_coordinates(0.0, -181.0);
3038        assert!(result.is_err());
3039        assert!(result.unwrap_err().contains("longitude out of range"));
3040    }
3041
3042    #[test]
3043    fn test_validate_gps_coordinates_non_finite() {
3044        let result = validate_gps_coordinates(f64::NAN, 0.0);
3045        assert!(result.is_err());
3046        assert!(result.unwrap_err().contains("not finite"));
3047
3048        let result = validate_gps_coordinates(0.0, f64::INFINITY);
3049        assert!(result.is_err());
3050        assert!(result.unwrap_err().contains("not finite"));
3051    }
3052
3053    #[test]
3054    fn test_validate_imu_orientation_valid() {
3055        assert!(validate_imu_orientation(0.0, 0.0, 0.0).is_ok());
3056        assert!(validate_imu_orientation(45.0, 30.0, 90.0).is_ok());
3057        assert!(validate_imu_orientation(180.0, 90.0, -180.0).is_ok()); // Edge cases
3058        assert!(validate_imu_orientation(-180.0, -90.0, 180.0).is_ok()); // Edge cases
3059    }
3060
3061    #[test]
3062    fn test_validate_imu_orientation_invalid_roll() {
3063        let result = validate_imu_orientation(181.0, 0.0, 0.0);
3064        assert!(result.is_err());
3065        assert!(result.unwrap_err().contains("roll out of range"));
3066
3067        let result = validate_imu_orientation(-181.0, 0.0, 0.0);
3068        assert!(result.is_err());
3069    }
3070
3071    #[test]
3072    fn test_validate_imu_orientation_invalid_pitch() {
3073        let result = validate_imu_orientation(0.0, 91.0, 0.0);
3074        assert!(result.is_err());
3075        assert!(result.unwrap_err().contains("pitch out of range"));
3076
3077        let result = validate_imu_orientation(0.0, -91.0, 0.0);
3078        assert!(result.is_err());
3079    }
3080
3081    #[test]
3082    fn test_validate_imu_orientation_non_finite() {
3083        let result = validate_imu_orientation(f64::NAN, 0.0, 0.0);
3084        assert!(result.is_err());
3085        assert!(result.unwrap_err().contains("not finite"));
3086
3087        let result = validate_imu_orientation(0.0, f64::INFINITY, 0.0);
3088        assert!(result.is_err());
3089
3090        let result = validate_imu_orientation(0.0, 0.0, f64::NEG_INFINITY);
3091        assert!(result.is_err());
3092    }
3093
3094    // ==== Polygon Unflattening Tests ====
3095    #[test]
3096    #[cfg(feature = "polars")]
3097    fn test_unflatten_polygon_coordinates_single_polygon() {
3098        let coords = vec![1.0, 2.0, 3.0, 4.0];
3099        let result = unflatten_polygon_coordinates(&coords);
3100
3101        assert_eq!(result.len(), 1);
3102        assert_eq!(result[0].len(), 2);
3103        assert_eq!(result[0][0], (1.0, 2.0));
3104        assert_eq!(result[0][1], (3.0, 4.0));
3105    }
3106
3107    #[test]
3108    #[cfg(feature = "polars")]
3109    fn test_unflatten_polygon_coordinates_multiple_polygons() {
3110        let coords = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0, 7.0, 8.0];
3111        let result = unflatten_polygon_coordinates(&coords);
3112
3113        assert_eq!(result.len(), 2);
3114        assert_eq!(result[0].len(), 2);
3115        assert_eq!(result[0][0], (1.0, 2.0));
3116        assert_eq!(result[0][1], (3.0, 4.0));
3117        assert_eq!(result[1].len(), 2);
3118        assert_eq!(result[1][0], (5.0, 6.0));
3119        assert_eq!(result[1][1], (7.0, 8.0));
3120    }
3121
3122    #[test]
3123    #[cfg(feature = "polars")]
3124    fn test_unflatten_polygon_coordinates_roundtrip() {
3125        // Test that unflatten correctly reconstructs from NaN-separated flat coords
3126        let flat = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0, 7.0, 8.0];
3127        let result = unflatten_polygon_coordinates(&flat);
3128
3129        let expected = vec![vec![(1.0, 2.0), (3.0, 4.0)], vec![(5.0, 6.0), (7.0, 8.0)]];
3130        assert_eq!(result, expected);
3131    }
3132
3133    // ==== Annotation Format Flattening Tests ====
3134    #[test]
3135    fn test_flatten_annotation_map_all_types() {
3136        use std::collections::HashMap;
3137
3138        let mut map = HashMap::new();
3139
3140        // Create test annotations
3141        let mut bbox_ann = Annotation::new();
3142        bbox_ann.set_label(Some("bbox_label".to_string()));
3143
3144        let mut box3d_ann = Annotation::new();
3145        box3d_ann.set_label(Some("box3d_label".to_string()));
3146
3147        let mut mask_ann = Annotation::new();
3148        mask_ann.set_label(Some("mask_label".to_string()));
3149
3150        map.insert("bbox".to_string(), vec![bbox_ann.clone()]);
3151        map.insert("box3d".to_string(), vec![box3d_ann.clone()]);
3152        map.insert("mask".to_string(), vec![mask_ann.clone()]);
3153
3154        let result = flatten_annotation_map(map);
3155
3156        assert_eq!(result.len(), 3);
3157        // Check ordering: bbox, box3d, mask
3158        assert_eq!(result[0].label(), Some(&"bbox_label".to_string()));
3159        assert_eq!(result[1].label(), Some(&"box3d_label".to_string()));
3160        assert_eq!(result[2].label(), Some(&"mask_label".to_string()));
3161    }
3162
3163    #[test]
3164    fn test_flatten_annotation_map_single_type() {
3165        use std::collections::HashMap;
3166
3167        let mut map = HashMap::new();
3168        let mut bbox_ann = Annotation::new();
3169        bbox_ann.set_label(Some("test".to_string()));
3170        map.insert("bbox".to_string(), vec![bbox_ann]);
3171
3172        let result = flatten_annotation_map(map);
3173
3174        assert_eq!(result.len(), 1);
3175        assert_eq!(result[0].label(), Some(&"test".to_string()));
3176    }
3177
3178    #[test]
3179    fn test_flatten_annotation_map_empty() {
3180        use std::collections::HashMap;
3181
3182        let map = HashMap::new();
3183        let result = flatten_annotation_map(map);
3184
3185        assert_eq!(result.len(), 0);
3186    }
3187
3188    #[test]
3189    fn test_flatten_annotation_map_deterministic_order() {
3190        use std::collections::HashMap;
3191
3192        let mut map = HashMap::new();
3193
3194        let mut bbox_ann = Annotation::new();
3195        bbox_ann.set_label(Some("bbox".to_string()));
3196
3197        let mut box3d_ann = Annotation::new();
3198        box3d_ann.set_label(Some("box3d".to_string()));
3199
3200        let mut mask_ann = Annotation::new();
3201        mask_ann.set_label(Some("mask".to_string()));
3202
3203        // Insert in reverse order to test deterministic ordering
3204        map.insert("mask".to_string(), vec![mask_ann]);
3205        map.insert("box3d".to_string(), vec![box3d_ann]);
3206        map.insert("bbox".to_string(), vec![bbox_ann]);
3207
3208        let result = flatten_annotation_map(map);
3209
3210        // Should be bbox, box3d, mask regardless of insertion order
3211        assert_eq!(result.len(), 3);
3212        assert_eq!(result[0].label(), Some(&"bbox".to_string()));
3213        assert_eq!(result[1].label(), Some(&"box3d".to_string()));
3214        assert_eq!(result[2].label(), Some(&"mask".to_string()));
3215    }
3216
3217    // ==== Box2d Tests ====
3218    #[test]
3219    fn test_box2d_construction_and_accessors() {
3220        // Test case 1: Basic construction with positive coordinates
3221        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3222        assert_eq!(
3223            (bbox.left(), bbox.top(), bbox.width(), bbox.height()),
3224            (10.0, 20.0, 100.0, 50.0)
3225        );
3226
3227        // Test case 2: Center calculations
3228        assert_eq!((bbox.cx(), bbox.cy()), (60.0, 45.0)); // 10+50, 20+25
3229
3230        // Test case 3: Zero origin
3231        let bbox = Box2d::new(0.0, 0.0, 640.0, 480.0);
3232        assert_eq!(
3233            (bbox.left(), bbox.top(), bbox.width(), bbox.height()),
3234            (0.0, 0.0, 640.0, 480.0)
3235        );
3236        assert_eq!((bbox.cx(), bbox.cy()), (320.0, 240.0));
3237    }
3238
3239    #[test]
3240    fn test_box2d_center_calculation() {
3241        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3242
3243        // Center = position + size/2
3244        assert_eq!(bbox.cx(), 60.0); // 10 + 100/2
3245        assert_eq!(bbox.cy(), 45.0); // 20 + 50/2
3246    }
3247
3248    #[test]
3249    fn test_box2d_zero_dimensions() {
3250        let bbox = Box2d::new(10.0, 20.0, 0.0, 0.0);
3251
3252        // When width/height are zero, center = position
3253        assert_eq!(bbox.cx(), 10.0);
3254        assert_eq!(bbox.cy(), 20.0);
3255    }
3256
3257    #[test]
3258    fn test_box2d_negative_dimensions() {
3259        let bbox = Box2d::new(100.0, 100.0, -50.0, -50.0);
3260
3261        // Negative dimensions create inverted boxes (valid edge case)
3262        assert_eq!(bbox.width(), -50.0);
3263        assert_eq!(bbox.height(), -50.0);
3264        assert_eq!(bbox.cx(), 75.0); // 100 + (-50)/2
3265        assert_eq!(bbox.cy(), 75.0); // 100 + (-50)/2
3266    }
3267
3268    // ==== Box3d Tests ====
3269    #[test]
3270    fn test_box3d_construction_and_accessors() {
3271        // Test case 1: Basic 3D construction
3272        let bbox = Box3d::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
3273        assert_eq!((bbox.cx(), bbox.cy(), bbox.cz()), (1.0, 2.0, 3.0));
3274        assert_eq!(
3275            (bbox.width(), bbox.height(), bbox.length()),
3276            (4.0, 5.0, 6.0)
3277        );
3278
3279        // Test case 2: Corners calculation with offset center
3280        let bbox = Box3d::new(10.0, 20.0, 30.0, 4.0, 6.0, 8.0);
3281        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (8.0, 17.0, 26.0)); // 10-2, 20-3, 30-4
3282
3283        // Test case 3: Center at origin with negative corners
3284        let bbox = Box3d::new(0.0, 0.0, 0.0, 2.0, 3.0, 4.0);
3285        assert_eq!((bbox.cx(), bbox.cy(), bbox.cz()), (0.0, 0.0, 0.0));
3286        assert_eq!(
3287            (bbox.width(), bbox.height(), bbox.length()),
3288            (2.0, 3.0, 4.0)
3289        );
3290        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (-1.0, -1.5, -2.0));
3291    }
3292
3293    #[test]
3294    fn test_box3d_center_calculation() {
3295        let bbox = Box3d::new(10.0, 20.0, 30.0, 100.0, 50.0, 40.0);
3296
3297        // Center values as specified in constructor
3298        assert_eq!(bbox.cx(), 10.0);
3299        assert_eq!(bbox.cy(), 20.0);
3300        assert_eq!(bbox.cz(), 30.0);
3301    }
3302
3303    #[test]
3304    fn test_box3d_zero_dimensions() {
3305        let bbox = Box3d::new(5.0, 10.0, 15.0, 0.0, 0.0, 0.0);
3306
3307        // When all dimensions are zero, corners = center
3308        assert_eq!(bbox.cx(), 5.0);
3309        assert_eq!(bbox.cy(), 10.0);
3310        assert_eq!(bbox.cz(), 15.0);
3311        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (5.0, 10.0, 15.0));
3312    }
3313
3314    #[test]
3315    fn test_box3d_negative_dimensions() {
3316        let bbox = Box3d::new(100.0, 100.0, 100.0, -50.0, -50.0, -50.0);
3317
3318        // Negative dimensions create inverted boxes
3319        assert_eq!(bbox.width(), -50.0);
3320        assert_eq!(bbox.height(), -50.0);
3321        assert_eq!(bbox.length(), -50.0);
3322        assert_eq!(
3323            (bbox.left(), bbox.top(), bbox.front()),
3324            (125.0, 125.0, 125.0)
3325        );
3326    }
3327
3328    // ==== Polygon Tests ====
3329    #[test]
3330    fn test_polygon_creation_and_deserialization() {
3331        // Test case 1: Direct construction
3332        let rings = vec![vec![(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]];
3333        let polygon = Polygon::new(rings.clone());
3334        assert_eq!(polygon.rings, rings);
3335
3336        // Test case 2: Deserialization from legacy format (field name "polygon")
3337        let legacy = serde_json::json!({
3338            "polygon": {
3339                "polygon": [[
3340                    [0.0_f32, 0.0_f32],
3341                    [1.0_f32, 0.0_f32],
3342                    [1.0_f32, 1.0_f32]
3343                ]]
3344            }
3345        });
3346
3347        #[derive(serde::Deserialize)]
3348        struct Wrapper {
3349            polygon: Polygon,
3350        }
3351
3352        let parsed: Wrapper = serde_json::from_value(legacy).unwrap();
3353        assert_eq!(parsed.polygon.rings.len(), 1);
3354        assert_eq!(parsed.polygon.rings[0].len(), 3);
3355    }
3356
3357    // ==== Sample Tests ====
3358    #[test]
3359    fn test_sample_construction_and_accessors() {
3360        // Test case 1: New sample is empty
3361        let sample = Sample::new();
3362        assert_eq!(sample.id(), None);
3363        assert_eq!(sample.image_name(), None);
3364        assert_eq!(sample.width(), None);
3365        assert_eq!(sample.height(), None);
3366
3367        // Test case 2: Sample with populated fields
3368        let mut sample = Sample::new();
3369        sample.image_name = Some("test.jpg".to_string());
3370        sample.width = Some(1920);
3371        sample.height = Some(1080);
3372        sample.group = Some("group1".to_string());
3373
3374        assert_eq!(sample.image_name(), Some("test.jpg"));
3375        assert_eq!(sample.width(), Some(1920));
3376        assert_eq!(sample.height(), Some(1080));
3377        assert_eq!(sample.group(), Some(&"group1".to_string()));
3378    }
3379
3380    #[test]
3381    fn test_sample_name_extraction_from_image_name() {
3382        let mut sample = Sample::new();
3383
3384        // Test case 1: Basic image name with extension
3385        sample.image_name = Some("test_image.jpg".to_string());
3386        assert_eq!(sample.name(), Some("test_image".to_string()));
3387
3388        // Test case 2: Image name with .camera suffix
3389        sample.image_name = Some("test_image.camera.jpg".to_string());
3390        assert_eq!(sample.name(), Some("test_image".to_string()));
3391
3392        // Test case 3: Image name without extension
3393        sample.image_name = Some("test_image".to_string());
3394        assert_eq!(sample.name(), Some("test_image".to_string()));
3395    }
3396
3397    // ==== Annotation Tests ====
3398    #[test]
3399    fn test_annotation_construction_and_setters() {
3400        // Test case 1: New annotation is empty
3401        let ann = Annotation::new();
3402        assert_eq!(ann.sample_id(), None);
3403        assert_eq!(ann.label(), None);
3404        assert_eq!(ann.box2d(), None);
3405        assert_eq!(ann.box3d(), None);
3406        assert_eq!(ann.polygon(), None);
3407
3408        // Test case 2: Setting annotation fields
3409        let mut ann = Annotation::new();
3410        ann.set_label(Some("car".to_string()));
3411        assert_eq!(ann.label(), Some(&"car".to_string()));
3412
3413        ann.set_label_index(Some(42));
3414        assert_eq!(ann.label_index(), Some(42));
3415
3416        // Test case 3: Setting bounding box
3417        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3418        ann.set_box2d(Some(bbox.clone()));
3419        assert!(ann.box2d().is_some());
3420        assert_eq!(ann.box2d().unwrap().left(), 10.0);
3421    }
3422
3423    // ==== SampleFile Tests ====
3424    #[test]
3425    fn test_sample_file_with_url_and_filename() {
3426        // Test case 1: SampleFile with URL
3427        let file = SampleFile::with_url(
3428            "lidar.pcd".to_string(),
3429            "https://example.com/file.pcd".to_string(),
3430        );
3431        assert_eq!(file.file_type(), "lidar.pcd");
3432        assert_eq!(file.url(), Some("https://example.com/file.pcd"));
3433        assert_eq!(file.filename(), None);
3434
3435        // Test case 2: SampleFile with local filename
3436        let file = SampleFile::with_filename("image".to_string(), "test.jpg".to_string());
3437        assert_eq!(file.file_type(), "image");
3438        assert_eq!(file.filename(), Some("test.jpg"));
3439        assert_eq!(file.url(), None);
3440    }
3441
3442    // ==== Sample GPS/IMU Deserialization Tests ====
3443    #[test]
3444    fn test_sample_deserializes_gps_imu_from_sensors() {
3445        use serde_json::json;
3446
3447        // Test: GPS and IMU data in sensors array is extracted to location field
3448        let sample_json = json!({
3449            "id": 123,
3450            "image_name": "test.jpg",
3451            "sensors": [
3452                {"gps": {"lat": 37.7749, "lon": -122.4194}},
3453                {"imu": {"roll": 1.5, "pitch": 2.5, "yaw": 3.5}},
3454                {"radar.pcd": "https://example.com/radar.pcd"}
3455            ]
3456        });
3457
3458        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3459
3460        // Verify location was extracted
3461        assert!(sample.location.is_some());
3462        let location = sample.location.as_ref().unwrap();
3463
3464        // Verify GPS data
3465        assert!(location.gps.is_some());
3466        let gps = location.gps.as_ref().unwrap();
3467        assert!((gps.lat - 37.7749).abs() < 0.0001);
3468        assert!((gps.lon - (-122.4194)).abs() < 0.0001);
3469
3470        // Verify IMU data
3471        assert!(location.imu.is_some());
3472        let imu = location.imu.as_ref().unwrap();
3473        assert!((imu.roll - 1.5).abs() < 0.0001);
3474        assert!((imu.pitch - 2.5).abs() < 0.0001);
3475        assert!((imu.yaw - 3.5).abs() < 0.0001);
3476
3477        // Verify files were also extracted (non-GPS/IMU entries)
3478        assert_eq!(sample.files.len(), 1);
3479        assert_eq!(sample.files[0].file_type(), "radar.pcd");
3480        assert_eq!(sample.files[0].url(), Some("https://example.com/radar.pcd"));
3481    }
3482
3483    #[test]
3484    fn test_sample_deserializes_gps_only() {
3485        use serde_json::json;
3486
3487        // Test: Only GPS data in sensors
3488        let sample_json = json!({
3489            "id": 456,
3490            "sensors": [
3491                {"gps": {"lat": 40.7128, "lon": -74.0060}}
3492            ]
3493        });
3494
3495        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3496
3497        assert!(sample.location.is_some());
3498        let location = sample.location.as_ref().unwrap();
3499
3500        assert!(location.gps.is_some());
3501        assert!(location.imu.is_none());
3502
3503        let gps = location.gps.as_ref().unwrap();
3504        assert!((gps.lat - 40.7128).abs() < 0.0001);
3505        assert!((gps.lon - (-74.0060)).abs() < 0.0001);
3506    }
3507
3508    #[test]
3509    fn test_sample_deserializes_without_location() {
3510        use serde_json::json;
3511
3512        // Test: Sample with only file sensors (no GPS/IMU)
3513        let sample_json = json!({
3514            "id": 789,
3515            "sensors": [
3516                {"radar.pcd": "https://example.com/radar.pcd"},
3517                {"lidar.pcd": "https://example.com/lidar.pcd"}
3518            ]
3519        });
3520
3521        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3522
3523        // No location data
3524        assert!(sample.location.is_none());
3525
3526        // Both files extracted
3527        assert_eq!(sample.files.len(), 2);
3528    }
3529
3530    // ==== Label Tests ====
3531    #[test]
3532    fn test_label_deserialization_and_accessors() {
3533        use serde_json::json;
3534
3535        // Test case 1: Label deserialization and accessors
3536        let label_json = json!({
3537            "id": 123,
3538            "dataset_id": 456,
3539            "index": 5,
3540            "name": "car"
3541        });
3542
3543        let label: Label = serde_json::from_value(label_json).unwrap();
3544        assert_eq!(label.id(), 123);
3545        assert_eq!(label.index(), 5);
3546        assert_eq!(label.name(), "car");
3547        assert_eq!(label.to_string(), "car");
3548        assert_eq!(format!("{}", label), "car");
3549
3550        // Test case 2: Different label
3551        let label_json = json!({
3552            "id": 1,
3553            "dataset_id": 100,
3554            "index": 0,
3555            "name": "person"
3556        });
3557
3558        let label: Label = serde_json::from_value(label_json).unwrap();
3559        assert_eq!(format!("{}", label), "person");
3560    }
3561
3562    // ==== Annotation Serialization Tests ====
3563    #[test]
3564    fn test_annotation_serialization_with_mask_and_box() {
3565        let polygon = vec![vec![
3566            (0.0_f32, 0.0_f32),
3567            (1.0_f32, 0.0_f32),
3568            (1.0_f32, 1.0_f32),
3569        ]];
3570
3571        let mut annotation = Annotation::new();
3572        annotation.set_label(Some("test".to_string()));
3573        annotation.set_box2d(Some(Box2d::new(10.0, 20.0, 30.0, 40.0)));
3574        annotation.set_polygon(Some(Polygon::new(polygon)));
3575
3576        let mut sample = Sample::new();
3577        sample.annotations.push(annotation);
3578
3579        let json = serde_json::to_value(&sample).unwrap();
3580        let annotations = json
3581            .get("annotations")
3582            .and_then(|value| value.as_array())
3583            .expect("annotations serialized as array");
3584        assert_eq!(annotations.len(), 1);
3585
3586        let annotation_json = annotations[0].as_object().expect("annotation object");
3587        assert!(annotation_json.contains_key("box2d"));
3588        // samples.populate2 expects the polygon geometry under the "mask" key
3589        // (historical: struct was renamed Rust-side from Mask to Polygon but
3590        // the wire contract did not follow). Emitting "polygon" here is what
3591        // caused polygons to be silently dropped on upload.
3592        assert!(
3593            annotation_json.contains_key("mask"),
3594            "Annotation must serialise polygon under 'mask' key for samples.populate2; got keys: {:?}",
3595            annotation_json.keys().collect::<Vec<_>>()
3596        );
3597        assert!(!annotation_json.contains_key("polygon"));
3598        assert!(!annotation_json.contains_key("x"));
3599        assert!(
3600            annotation_json
3601                .get("mask")
3602                .and_then(|value| value.as_array())
3603                .is_some()
3604        );
3605    }
3606
3607    #[test]
3608    fn test_frame_number_negative_one_deserializes_as_none() {
3609        // Server returns frame_number: -1 for non-sequence samples
3610        // This should deserialize as None for the client
3611        let json = r#"{
3612            "uuid": "test-uuid",
3613            "frame_number": -1
3614        }"#;
3615
3616        let sample: Sample = serde_json::from_str(json).unwrap();
3617        assert_eq!(sample.frame_number, None);
3618    }
3619
3620    #[test]
3621    fn test_frame_number_positive_value_deserializes_correctly() {
3622        // Valid frame numbers should deserialize normally
3623        let json = r#"{
3624            "uuid": "test-uuid",
3625            "frame_number": 5
3626        }"#;
3627
3628        let sample: Sample = serde_json::from_str(json).unwrap();
3629        assert_eq!(sample.frame_number, Some(5));
3630    }
3631
3632    #[test]
3633    fn test_frame_number_null_deserializes_as_none() {
3634        // Explicit null should also be None
3635        let json = r#"{
3636            "uuid": "test-uuid",
3637            "frame_number": null
3638        }"#;
3639
3640        let sample: Sample = serde_json::from_str(json).unwrap();
3641        assert_eq!(sample.frame_number, None);
3642    }
3643
3644    #[test]
3645    fn test_frame_number_missing_deserializes_as_none() {
3646        // Missing field should be None
3647        let json = r#"{
3648            "uuid": "test-uuid"
3649        }"#;
3650
3651        let sample: Sample = serde_json::from_str(json).unwrap();
3652        assert_eq!(sample.frame_number, None);
3653    }
3654
3655    // =========================================================================
3656    // samples_dataframe tests - CRITICAL: Verify group preservation
3657    // =========================================================================
3658
3659    #[cfg(feature = "polars")]
3660    #[test]
3661    fn test_samples_dataframe_preserves_group_for_samples_without_annotations() {
3662        use polars::prelude::*;
3663
3664        // Create sample WITH annotations
3665        let mut sample_with_ann = Sample::new();
3666        sample_with_ann.image_name = Some("annotated.jpg".to_string());
3667        sample_with_ann.group = Some("train".to_string());
3668        let mut annotation = Annotation::new();
3669        annotation.set_label(Some("car".to_string()));
3670        annotation.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
3671        annotation.set_name(Some("annotated".to_string()));
3672        sample_with_ann.annotations = vec![annotation];
3673
3674        // Create sample WITHOUT annotations (this is the critical case)
3675        let mut sample_no_ann = Sample::new();
3676        sample_no_ann.image_name = Some("unannotated.jpg".to_string());
3677        sample_no_ann.group = Some("val".to_string()); // Should be preserved!
3678        sample_no_ann.annotations = vec![]; // Empty annotations
3679
3680        let samples = vec![sample_with_ann, sample_no_ann];
3681
3682        // Convert to DataFrame
3683        let df = samples_dataframe(&samples).expect("Failed to create DataFrame");
3684
3685        // Verify we have 2 rows (one per sample)
3686        assert_eq!(df.height(), 2, "Expected 2 rows (one per sample)");
3687
3688        // Get the group column
3689        let groups_col = df.column("group").expect("group column should exist");
3690        let groups_cast = groups_col.cast(&DataType::String).expect("cast to string");
3691        let groups = groups_cast.str().expect("as str");
3692
3693        // Find the row for "unannotated" and verify it has group "val"
3694        let names_col = df.column("name").expect("name column should exist");
3695        let names_cast = names_col.cast(&DataType::String).expect("cast to string");
3696        let names = names_cast.str().expect("as str");
3697
3698        let mut found_unannotated = false;
3699        for idx in 0..df.height() {
3700            if let Some(name) = names.get(idx)
3701                && name == "unannotated"
3702            {
3703                found_unannotated = true;
3704                let group = groups.get(idx);
3705                assert_eq!(
3706                    group,
3707                    Some("val"),
3708                    "CRITICAL: Sample 'unannotated' without annotations must have group 'val'"
3709                );
3710            }
3711        }
3712
3713        assert!(
3714            found_unannotated,
3715            "Did not find 'unannotated' sample in DataFrame - \
3716             this means samples without annotations are not being included"
3717        );
3718    }
3719
3720    #[cfg(feature = "polars")]
3721    #[test]
3722    fn test_samples_dataframe_includes_all_samples_even_without_annotations() {
3723        // Verify that samples without annotations still appear in the DataFrame
3724        // with null annotation fields but WITH their group field populated
3725
3726        let mut sample1 = Sample::new();
3727        sample1.image_name = Some("with_ann.jpg".to_string());
3728        sample1.group = Some("train".to_string());
3729        let mut ann = Annotation::new();
3730        ann.set_label(Some("person".to_string()));
3731        ann.set_box2d(Some(Box2d::new(0.0, 0.0, 0.5, 0.5)));
3732        ann.set_name(Some("with_ann".to_string()));
3733        sample1.annotations = vec![ann];
3734
3735        let mut sample2 = Sample::new();
3736        sample2.image_name = Some("no_ann_train.jpg".to_string());
3737        sample2.group = Some("train".to_string());
3738        sample2.annotations = vec![];
3739
3740        let mut sample3 = Sample::new();
3741        sample3.image_name = Some("no_ann_val.jpg".to_string());
3742        sample3.group = Some("val".to_string());
3743        sample3.annotations = vec![];
3744
3745        let samples = vec![sample1, sample2, sample3];
3746
3747        let df = samples_dataframe(&samples).expect("Failed to create DataFrame");
3748
3749        // We should have exactly 3 rows - one per sample
3750        assert_eq!(
3751            df.height(),
3752            3,
3753            "Expected 3 rows (samples without annotations should create one row each)"
3754        );
3755
3756        // Check that all groups are present
3757        let groups_col = df.column("group").expect("group column");
3758        let groups_cast = groups_col.cast(&polars::prelude::DataType::String).unwrap();
3759        let groups = groups_cast.str().unwrap();
3760
3761        let mut train_count = 0;
3762        let mut val_count = 0;
3763
3764        for idx in 0..df.height() {
3765            match groups.get(idx) {
3766                Some("train") => train_count += 1,
3767                Some("val") => val_count += 1,
3768                other => panic!(
3769                    "Unexpected group value at row {}: {:?}. \
3770                     All samples should have their group preserved.",
3771                    idx, other
3772                ),
3773            }
3774        }
3775
3776        assert_eq!(train_count, 2, "Expected 2 samples in 'train' group");
3777        assert_eq!(val_count, 1, "Expected 1 sample in 'val' group");
3778    }
3779
3780    #[cfg(feature = "polars")]
3781    #[test]
3782    fn test_samples_dataframe_group_is_not_null_for_samples_with_group() {
3783        // CRITICAL: Even when a sample has no annotations, if it has a group,
3784        // that group must NOT be null in the DataFrame
3785
3786        let mut sample = Sample::new();
3787        sample.image_name = Some("test.jpg".to_string());
3788        sample.group = Some("test_group".to_string());
3789        sample.annotations = vec![];
3790
3791        let df = samples_dataframe(&[sample]).expect("Failed to create DataFrame");
3792
3793        let groups_col = df.column("group").expect("group column");
3794
3795        // The group column should have NO nulls because our sample has a group
3796        assert_eq!(
3797            groups_col.null_count(),
3798            0,
3799            "Sample with group='test_group' but no annotations has NULL group in DataFrame. \
3800             This is a bug in samples_dataframe - group must be preserved!"
3801        );
3802    }
3803
3804    #[cfg(feature = "polars")]
3805    #[test]
3806    fn test_samples_dataframe_group_consistent_across_all_rows_for_same_image() {
3807        use polars::prelude::*;
3808
3809        // Test that when a sample has multiple annotations, ALL rows have
3810        // the same group value (not just the first one)
3811
3812        let mut sample = Sample::new();
3813        sample.image_name = Some("multi_ann.jpg".to_string());
3814        sample.group = Some("train".to_string());
3815
3816        // Add multiple annotations
3817        let mut ann1 = Annotation::new();
3818        ann1.set_label(Some("car".to_string()));
3819        ann1.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
3820        ann1.set_name(Some("multi_ann".to_string()));
3821
3822        let mut ann2 = Annotation::new();
3823        ann2.set_label(Some("truck".to_string()));
3824        ann2.set_box2d(Some(Box2d::new(0.5, 0.6, 0.2, 0.2)));
3825        ann2.set_name(Some("multi_ann".to_string()));
3826
3827        let mut ann3 = Annotation::new();
3828        ann3.set_label(Some("bus".to_string()));
3829        ann3.set_box2d(Some(Box2d::new(0.7, 0.8, 0.1, 0.1)));
3830        ann3.set_name(Some("multi_ann".to_string()));
3831
3832        sample.annotations = vec![ann1, ann2, ann3];
3833
3834        let df = samples_dataframe(&[sample]).expect("Failed to create DataFrame");
3835
3836        // Should have 3 rows (one per annotation)
3837        assert_eq!(df.height(), 3, "Expected 3 rows (one per annotation)");
3838
3839        // ALL rows should have the group "train" (not just the first one)
3840        let groups_col = df.column("group").expect("group column");
3841        let groups_cast = groups_col.cast(&DataType::String).expect("cast to string");
3842        let groups = groups_cast.str().expect("as str");
3843
3844        // No nulls allowed
3845        assert_eq!(groups_col.null_count(), 0, "No rows should have null group");
3846
3847        // All rows should have the same group
3848        for idx in 0..df.height() {
3849            let group = groups.get(idx);
3850            assert_eq!(
3851                group,
3852                Some("train"),
3853                "Row {} should have group 'train', got {:?}. \
3854                 All rows for the same image must have identical group values.",
3855                idx,
3856                group
3857            );
3858        }
3859    }
3860
3861    #[cfg(feature = "polars")]
3862    #[test]
3863    fn test_samples_dataframe_lvis_columns() {
3864        let mut ann = Annotation::new();
3865        ann.set_name(Some("test".to_string()));
3866        ann.set_label(Some("person".to_string()));
3867        ann.set_label_index(Some(1));
3868        ann.set_iscrowd(Some(false));
3869        ann.set_category_frequency(Some("f".to_string()));
3870
3871        let sample = Sample {
3872            image_name: Some("test.jpg".to_string()),
3873            width: Some(640),
3874            height: Some(480),
3875            annotations: vec![ann],
3876            neg_label_indices: Some(vec![5, 12]),
3877            not_exhaustive_label_indices: Some(vec![3]),
3878            ..Default::default()
3879        };
3880
3881        let df = samples_dataframe(&[sample]).unwrap();
3882
3883        // Verify LVIS columns are present (they have data)
3884        assert!(df.column("iscrowd").is_ok(), "iscrowd column missing");
3885        assert!(
3886            df.column("category_frequency").is_ok(),
3887            "category_frequency column missing"
3888        );
3889        assert!(
3890            df.column("neg_label_indices").is_ok(),
3891            "neg_label_indices column missing"
3892        );
3893        assert!(
3894            df.column("not_exhaustive_label_indices").is_ok(),
3895            "not_exhaustive_label_indices column missing"
3896        );
3897
3898        // All-null columns should be dropped (polygon, box2d, box3d, mask, scores, etc.)
3899        assert!(
3900            df.column("polygon").is_err(),
3901            "polygon column should be dropped (all null)"
3902        );
3903        assert!(
3904            df.column("box2d").is_err(),
3905            "box2d column should be dropped (all null)"
3906        );
3907    }
3908
3909    #[test]
3910    fn test_annotation_serialization_skips_lvis_fields() {
3911        let ann = Annotation::new();
3912        let json = serde_json::to_string(&ann).unwrap();
3913        assert!(
3914            !json.contains("iscrowd"),
3915            "iscrowd should be omitted when None"
3916        );
3917        assert!(
3918            !json.contains("category_frequency"),
3919            "category_frequency should be omitted when None"
3920        );
3921    }
3922
3923    #[test]
3924    fn test_sample_serialization_skips_lvis_fields() {
3925        let sample = Sample::new();
3926        let json = serde_json::to_string(&sample).unwrap();
3927        assert!(
3928            !json.contains("neg_label_indices"),
3929            "neg_label_indices should be omitted when None"
3930        );
3931        assert!(
3932            !json.contains("not_exhaustive_label_indices"),
3933            "not_exhaustive_label_indices should be omitted when None"
3934        );
3935    }
3936
3937    #[test]
3938    fn test_annotation_score_fields() {
3939        let mut ann = Annotation::default();
3940        assert!(ann.box2d_score.is_none());
3941        assert!(ann.polygon_score.is_none());
3942        assert!(ann.mask_score.is_none());
3943        ann.box2d_score = Some(0.95);
3944        ann.polygon_score = Some(0.87);
3945        ann.mask_score = Some(0.42);
3946        assert_eq!(ann.box2d_score, Some(0.95));
3947        assert_eq!(ann.polygon_score, Some(0.87));
3948        assert_eq!(ann.mask_score, Some(0.42));
3949    }
3950
3951    #[test]
3952    fn test_timing_struct() {
3953        let timing = Timing {
3954            load: Some(1_000_000),
3955            preprocess: Some(2_000_000),
3956            inference: Some(50_000_000),
3957            decode: Some(3_000_000),
3958        };
3959        assert_eq!(timing.inference, Some(50_000_000));
3960
3961        let default = Timing::default();
3962        assert!(default.load.is_none());
3963    }
3964
3965    #[test]
3966    fn test_sample_timing() {
3967        let mut sample = Sample::default();
3968        assert!(sample.timing.is_none());
3969        sample.timing = Some(Timing {
3970            load: Some(1_000_000),
3971            ..Default::default()
3972        });
3973        assert!(sample.timing.is_some());
3974    }
3975
3976    // =========================================================================
3977    // samples_dataframe 2026.04 schema tests
3978    // =========================================================================
3979
3980    #[cfg(feature = "polars")]
3981    #[test]
3982    fn test_samples_dataframe_polygon_column() {
3983        let mut ann = Annotation::new();
3984        ann.set_name(Some("test".to_string()));
3985        ann.set_polygon(Some(Polygon::new(vec![vec![
3986            (0.1, 0.2),
3987            (0.3, 0.4),
3988            (0.5, 0.6),
3989        ]])));
3990
3991        let sample = Sample {
3992            image_name: Some("test.jpg".to_string()),
3993            annotations: vec![ann],
3994            ..Default::default()
3995        };
3996
3997        let df = samples_dataframe(&[sample]).unwrap();
3998
3999        // 2026.04: polygon column exists with nested List(List(Float32))
4000        assert!(df.column("polygon").is_ok(), "Should have polygon column");
4001
4002        // The old "mask" column with float data should NOT exist (no MaskData set)
4003        // If mask column exists, it would be Binary type from MaskData, not floats
4004        if let Ok(mask_col) = df.column("mask") {
4005            // If it exists, it must be Binary type, not List(Float32)
4006            assert_eq!(
4007                mask_col.dtype(),
4008                &polars::prelude::DataType::Binary,
4009                "mask column must be Binary type (PNG bytes), not float list"
4010            );
4011        }
4012    }
4013
4014    #[cfg(feature = "polars")]
4015    #[test]
4016    fn test_samples_dataframe_column_presence_drops_all_null() {
4017        // Sample with only a name, no annotations
4018        let sample = Sample {
4019            image_name: Some("test.jpg".to_string()),
4020            ..Default::default()
4021        };
4022
4023        let df = samples_dataframe(&[sample]).unwrap();
4024
4025        // name is always present
4026        assert!(df.column("name").is_ok(), "name column must always exist");
4027
4028        // All-null columns should be dropped
4029        assert!(
4030            df.column("polygon").is_err(),
4031            "All-null polygon should be dropped"
4032        );
4033        assert!(
4034            df.column("box2d").is_err(),
4035            "All-null box2d should be dropped"
4036        );
4037        assert!(
4038            df.column("box3d").is_err(),
4039            "All-null box3d should be dropped"
4040        );
4041        assert!(
4042            df.column("mask").is_err(),
4043            "All-null mask should be dropped"
4044        );
4045        assert!(
4046            df.column("box2d_score").is_err(),
4047            "All-null score columns should be dropped"
4048        );
4049        assert!(
4050            df.column("timing").is_err(),
4051            "All-null timing should be dropped"
4052        );
4053    }
4054
4055    #[cfg(feature = "polars")]
4056    #[test]
4057    fn test_samples_dataframe_size_column() {
4058        // Samples with width/height should produce the size column
4059        let sample1 = Sample {
4060            image_name: Some("img1.jpg".to_string()),
4061            width: Some(1920),
4062            height: Some(1080),
4063            ..Default::default()
4064        };
4065        let sample2 = Sample {
4066            image_name: Some("img2.jpg".to_string()),
4067            width: Some(640),
4068            height: Some(480),
4069            ..Default::default()
4070        };
4071
4072        let df = samples_dataframe(&[sample1, sample2]).unwrap();
4073
4074        // Size column should be present (not dropped by all-null rule)
4075        let size_col = df
4076            .column("size")
4077            .expect("size column should be present when width/height are set");
4078        assert_eq!(size_col.len(), 2);
4079
4080        // Each row should be an Array(UInt32, 2) with [width, height]
4081        let arr = size_col.array().expect("size column should be Array dtype");
4082        let row0 = arr.get_as_series(0).unwrap();
4083        let row0_vals: Vec<u32> = row0.u32().unwrap().into_no_null_iter().collect();
4084        assert_eq!(row0_vals, vec![1920, 1080]);
4085
4086        let row1 = arr.get_as_series(1).unwrap();
4087        let row1_vals: Vec<u32> = row1.u32().unwrap().into_no_null_iter().collect();
4088        assert_eq!(row1_vals, vec![640, 480]);
4089    }
4090
4091    #[cfg(feature = "polars")]
4092    #[test]
4093    fn test_samples_dataframe_size_column_partial() {
4094        // When only some samples have dimensions, size column should still be present
4095        let sample1 = Sample {
4096            image_name: Some("img1.jpg".to_string()),
4097            width: Some(1920),
4098            height: Some(1080),
4099            ..Default::default()
4100        };
4101        let sample2 = Sample {
4102            image_name: Some("img2.jpg".to_string()),
4103            // No width/height
4104            ..Default::default()
4105        };
4106
4107        let df = samples_dataframe(&[sample1, sample2]).unwrap();
4108
4109        // Size column should be present (not all null)
4110        let size_col = df
4111            .column("size")
4112            .expect("size column should be present when at least one sample has dimensions");
4113        assert_eq!(size_col.len(), 2);
4114        assert_eq!(size_col.null_count(), 1, "one row should be null");
4115    }
4116
4117    #[cfg(feature = "polars")]
4118    #[test]
4119    fn test_samples_dataframe_score_columns() {
4120        let mut ann = Annotation::new();
4121        ann.set_name(Some("test".to_string()));
4122        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4123        ann.set_box2d_score(Some(0.95));
4124        ann.set_polygon(Some(Polygon::new(vec![vec![
4125            (0.0, 0.0),
4126            (1.0, 0.0),
4127            (1.0, 1.0),
4128        ]])));
4129        ann.set_polygon_score(Some(0.87));
4130
4131        let sample = Sample {
4132            image_name: Some("test.jpg".to_string()),
4133            annotations: vec![ann],
4134            ..Default::default()
4135        };
4136
4137        let df = samples_dataframe(&[sample]).unwrap();
4138
4139        // Score columns with data should be present
4140        assert!(
4141            df.column("box2d_score").is_ok(),
4142            "box2d_score column missing"
4143        );
4144        assert!(
4145            df.column("polygon_score").is_ok(),
4146            "polygon_score column missing"
4147        );
4148
4149        // Score columns with no data should be dropped
4150        assert!(
4151            df.column("box3d_score").is_err(),
4152            "box3d_score should be dropped (all null)"
4153        );
4154        assert!(
4155            df.column("mask_score").is_err(),
4156            "mask_score should be dropped (all null)"
4157        );
4158
4159        // Verify score values
4160        let box2d_scores = df.column("box2d_score").unwrap();
4161        let val = box2d_scores.f32().unwrap().get(0);
4162        assert_eq!(val, Some(0.95));
4163    }
4164
4165    #[cfg(feature = "polars")]
4166    #[test]
4167    fn test_samples_dataframe_timing_column() {
4168        let mut ann = Annotation::new();
4169        ann.set_name(Some("test".to_string()));
4170        ann.set_label(Some("person".to_string()));
4171
4172        let sample = Sample {
4173            image_name: Some("test.jpg".to_string()),
4174            annotations: vec![ann],
4175            timing: Some(Timing {
4176                load: Some(1_000_000),
4177                preprocess: Some(2_000_000),
4178                inference: Some(50_000_000),
4179                decode: Some(3_000_000),
4180            }),
4181            ..Default::default()
4182        };
4183
4184        let df = samples_dataframe(&[sample]).unwrap();
4185
4186        // Timing column should exist (has data)
4187        assert!(df.column("timing").is_ok(), "timing column missing");
4188
4189        // Verify it is a struct type
4190        let timing_col = df.column("timing").unwrap();
4191        assert!(
4192            matches!(timing_col.dtype(), polars::prelude::DataType::Struct(..)),
4193            "timing column should be Struct type, got {:?}",
4194            timing_col.dtype()
4195        );
4196    }
4197
4198    #[cfg(feature = "polars")]
4199    #[test]
4200    fn test_samples_dataframe_mask_binary_column() {
4201        let mut ann = Annotation::new();
4202        ann.set_name(Some("test".to_string()));
4203        // Create a small valid PNG via MaskData::encode
4204        let pixels = vec![0u8, 255, 128, 64];
4205        let mask_data = MaskData::encode(&pixels, 2, 2, 8).unwrap();
4206        ann.set_mask(Some(mask_data));
4207
4208        let sample = Sample {
4209            image_name: Some("test.jpg".to_string()),
4210            annotations: vec![ann],
4211            ..Default::default()
4212        };
4213
4214        let df = samples_dataframe(&[sample]).unwrap();
4215
4216        // mask column should exist with Binary type
4217        let mask_col = df.column("mask").unwrap();
4218        assert_eq!(
4219            mask_col.dtype(),
4220            &polars::prelude::DataType::Binary,
4221            "mask column should be Binary"
4222        );
4223        assert_eq!(mask_col.null_count(), 0, "mask value should not be null");
4224    }
4225
4226    // =========================================================================
4227    // AnnotationType "seg" alias test
4228    // =========================================================================
4229
4230    #[test]
4231    fn test_annotation_type_seg_alias() {
4232        assert_eq!(
4233            AnnotationType::try_from("seg").unwrap(),
4234            AnnotationType::Polygon,
4235            "\"seg\" should map to Polygon for server round-trip"
4236        );
4237    }
4238
4239    // =========================================================================
4240    // Timing edge case tests
4241    // =========================================================================
4242
4243    #[cfg(feature = "polars")]
4244    #[test]
4245    fn test_samples_dataframe_timing_partial() {
4246        // Timing with only load set; other fields None
4247        let mut ann = Annotation::new();
4248        ann.set_name(Some("test".to_string()));
4249        ann.set_label(Some("person".to_string()));
4250
4251        let sample = Sample {
4252            image_name: Some("test.jpg".to_string()),
4253            annotations: vec![ann],
4254            timing: Some(Timing {
4255                load: Some(1000),
4256                ..Default::default()
4257            }),
4258            ..Default::default()
4259        };
4260
4261        let df = samples_dataframe(&[sample]).unwrap();
4262
4263        // Timing column should be present because at least one field is non-null
4264        assert!(
4265            df.column("timing").is_ok(),
4266            "timing column should be present when partial data exists"
4267        );
4268    }
4269
4270    #[cfg(feature = "polars")]
4271    #[test]
4272    fn test_samples_dataframe_timing_all_none_omitted() {
4273        // All samples have timing: None — timing column should be omitted
4274        let mut ann = Annotation::new();
4275        ann.set_name(Some("test".to_string()));
4276        ann.set_label(Some("person".to_string()));
4277
4278        let sample = Sample {
4279            image_name: Some("test.jpg".to_string()),
4280            annotations: vec![ann],
4281            timing: None,
4282            ..Default::default()
4283        };
4284
4285        let df = samples_dataframe(&[sample]).unwrap();
4286
4287        assert!(
4288            df.column("timing").is_err(),
4289            "timing column should be omitted when all samples have timing: None"
4290        );
4291    }
4292
4293    // =========================================================================
4294    // Score boundary tests
4295    // =========================================================================
4296
4297    #[cfg(feature = "polars")]
4298    #[test]
4299    fn test_samples_dataframe_score_zero_survives() {
4300        // score = 0.0 must be non-null in the output (not confused with None)
4301        let mut ann = Annotation::new();
4302        ann.set_name(Some("test".to_string()));
4303        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4304        ann.set_box2d_score(Some(0.0));
4305
4306        let sample = Sample {
4307            image_name: Some("test.jpg".to_string()),
4308            annotations: vec![ann],
4309            ..Default::default()
4310        };
4311
4312        let df = samples_dataframe(&[sample]).unwrap();
4313
4314        let scores = df.column("box2d_score").unwrap();
4315        let val = scores.f32().unwrap().get(0);
4316        assert_eq!(val, Some(0.0), "score of 0.0 should survive as non-null");
4317    }
4318
4319    #[cfg(feature = "polars")]
4320    #[test]
4321    fn test_samples_dataframe_score_one_survives() {
4322        let mut ann = Annotation::new();
4323        ann.set_name(Some("test".to_string()));
4324        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4325        ann.set_box2d_score(Some(1.0));
4326
4327        let sample = Sample {
4328            image_name: Some("test.jpg".to_string()),
4329            annotations: vec![ann],
4330            ..Default::default()
4331        };
4332
4333        let df = samples_dataframe(&[sample]).unwrap();
4334
4335        let scores = df.column("box2d_score").unwrap();
4336        let val = scores.f32().unwrap().get(0);
4337        assert_eq!(val, Some(1.0), "score of 1.0 should survive as non-null");
4338    }
4339}