Skip to main content

edgefirst_client/
dataset.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright © 2025 Au-Zone Technologies. All Rights Reserved.
3
4use std::{collections::HashMap, fmt::Display};
5
6use crate::{
7    Client, Error,
8    api::{AnnotationSetID, DatasetID, ProjectID, SampleID},
9    mask::MaskData,
10};
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13
14#[cfg(feature = "polars")]
15use polars::prelude::*;
16
17/// File types supported in EdgeFirst Studio datasets.
18///
19/// Represents the different types of sensor data files that can be stored
20/// and processed in a dataset. EdgeFirst Studio supports various modalities
21/// including visual images and different forms of LiDAR and radar data.
22///
23/// # String Representations
24///
25/// This enum has two string representations:
26/// - **Display** (`fmt::Display`): Returns the server API type name (e.g.,
27///   `"lidar.depth"`) used when making API requests to EdgeFirst Studio.
28/// - **file_extension()**: Returns the file extension for saving (e.g.,
29///   `"lidar.png"`) which may differ from the API type name.
30///
31/// # Examples
32///
33/// ```rust
34/// use edgefirst_client::FileType;
35///
36/// // Create file types from strings
37/// let image_type: FileType = "image".try_into().unwrap();
38/// let lidar_type: FileType = "lidar.pcd".try_into().unwrap();
39///
40/// // Display file types
41/// println!("Processing {} files", image_type); // "Processing image files"
42///
43/// // Use in dataset operations - example usage
44/// let file_type = FileType::Image;
45/// match file_type {
46///     FileType::Image => println!("Processing image files"),
47///     FileType::LidarPcd => println!("Processing LiDAR point cloud files"),
48///     _ => println!("Processing other sensor data"),
49/// }
50/// ```
51#[derive(Clone, Eq, PartialEq, Debug)]
52pub enum FileType {
53    /// Standard image files (JPEG, PNG, etc.)
54    Image,
55    /// LiDAR point cloud data files (.pcd format)
56    LidarPcd,
57    /// LiDAR depth images (.png format)
58    LidarDepth,
59    /// LiDAR reflectance images (.jpg format)
60    LidarReflect,
61    /// Radar point cloud data files (.pcd format)
62    RadarPcd,
63    /// Radar cube data files (.png format)
64    RadarCube,
65    /// All sensor types - expands to all known file types
66    All,
67}
68
69impl std::fmt::Display for FileType {
70    /// Returns the server API type name for this file type.
71    /// Used when making API requests to the server.
72    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73        let value = match self {
74            FileType::Image => "image",
75            FileType::LidarPcd => "lidar.pcd",
76            FileType::LidarDepth => "lidar.depth",
77            FileType::LidarReflect => "lidar.reflect",
78            FileType::RadarPcd => "radar.pcd",
79            FileType::RadarCube => "radar.png",
80            FileType::All => "all",
81        };
82        write!(f, "{}", value)
83    }
84}
85
86impl FileType {
87    /// Returns the file extension to use when saving downloaded files.
88    /// This may differ from the API type name (e.g., lidar.depth → lidar.png).
89    pub fn file_extension(&self) -> &'static str {
90        match self {
91            FileType::Image => "jpg", // Will be overridden by infer detection
92            FileType::LidarPcd => "lidar.pcd",
93            FileType::LidarDepth => "lidar.png",
94            FileType::LidarReflect => "lidar.jpg",
95            FileType::RadarPcd => "radar.pcd",
96            FileType::RadarCube => "radar.png",
97            FileType::All => "",
98        }
99    }
100}
101
102impl TryFrom<&str> for FileType {
103    type Error = crate::Error;
104
105    fn try_from(s: &str) -> Result<Self, Self::Error> {
106        match s {
107            "image" => Ok(FileType::Image),
108            "lidar.pcd" => Ok(FileType::LidarPcd),
109            // Accept CLI names (lidar.png), server names (lidar.depth), and aliases
110            "lidar.png" | "lidar.depth" | "depth.png" | "depthmap" => Ok(FileType::LidarDepth),
111            "lidar.jpg" | "lidar.jpeg" | "lidar.reflect" => Ok(FileType::LidarReflect),
112            "radar.pcd" | "pcd" => Ok(FileType::RadarPcd),
113            "radar.png" | "cube" => Ok(FileType::RadarCube),
114            "all" => Ok(FileType::All),
115            _ => Err(crate::Error::InvalidFileType(s.to_string())),
116        }
117    }
118}
119
120impl std::str::FromStr for FileType {
121    type Err = crate::Error;
122
123    fn from_str(s: &str) -> Result<Self, Self::Err> {
124        s.try_into()
125    }
126}
127
128impl FileType {
129    /// Returns all concrete sensor file types (excludes `All`).
130    ///
131    /// This is useful for expanding the `All` variant or listing available
132    /// types.
133    ///
134    /// # Example
135    ///
136    /// ```rust
137    /// use edgefirst_client::FileType;
138    ///
139    /// let all_types = FileType::all_sensor_types();
140    /// assert!(all_types.contains(&FileType::Image));
141    /// assert!(!all_types.contains(&FileType::All));
142    /// ```
143    pub fn all_sensor_types() -> Vec<FileType> {
144        vec![
145            FileType::Image,
146            FileType::LidarPcd,
147            FileType::LidarDepth,
148            FileType::LidarReflect,
149            FileType::RadarPcd,
150            FileType::RadarCube,
151        ]
152    }
153
154    /// Returns all valid type names as strings for help text.
155    ///
156    /// # Example
157    ///
158    /// ```rust
159    /// use edgefirst_client::FileType;
160    ///
161    /// let names = FileType::type_names();
162    /// assert!(names.contains(&"image"));
163    /// assert!(names.contains(&"all"));
164    /// ```
165    pub fn type_names() -> Vec<&'static str> {
166        vec![
167            "image",
168            "lidar.pcd",
169            "lidar.png",
170            "lidar.jpg",
171            "radar.pcd",
172            "radar.png",
173            "all",
174        ]
175    }
176
177    /// Expands a list of file types, replacing `All` with all concrete sensor
178    /// types.
179    ///
180    /// If the input contains `FileType::All`, returns all sensor types.
181    /// Otherwise, returns the input types unchanged.
182    ///
183    /// # Example
184    ///
185    /// ```rust
186    /// use edgefirst_client::FileType;
187    ///
188    /// let types = vec![FileType::All];
189    /// let expanded = FileType::expand_types(&types);
190    /// assert_eq!(expanded.len(), 6); // All concrete sensor types
191    ///
192    /// let types = vec![FileType::Image, FileType::LidarPcd];
193    /// let expanded = FileType::expand_types(&types);
194    /// assert_eq!(expanded.len(), 2); // Unchanged
195    /// ```
196    pub fn expand_types(types: &[FileType]) -> Vec<FileType> {
197        if types.contains(&FileType::All) {
198            FileType::all_sensor_types()
199        } else {
200            types.to_vec()
201        }
202    }
203}
204
205/// Annotation types supported for labeling data in EdgeFirst Studio.
206///
207/// Represents the different types of annotations that can be applied to
208/// sensor data for machine learning tasks. Each type corresponds to a
209/// different annotation geometry and use case.
210///
211/// # Examples
212///
213/// ```rust
214/// use edgefirst_client::AnnotationType;
215///
216/// // Create annotation types from strings (using TryFrom)
217/// let box_2d: AnnotationType = "box2d".try_into().unwrap();
218/// let segmentation: AnnotationType = "polygon".try_into().unwrap();
219///
220/// // Or use From with String
221/// let box_2d = AnnotationType::from("box2d".to_string());
222/// let segmentation = AnnotationType::from("polygon".to_string());
223///
224/// // Display annotation types
225/// println!("Annotation type: {}", box_2d); // "Annotation type: box2d"
226///
227/// // Use in matching and processing
228/// let annotation_type = AnnotationType::Box2d;
229/// match annotation_type {
230///     AnnotationType::Box2d => println!("Processing 2D bounding boxes"),
231///     AnnotationType::Box3d => println!("Processing 3D bounding boxes"),
232///     AnnotationType::Polygon => println!("Processing polygon contours"),
233///     AnnotationType::Mask => println!("Processing raster pixel masks"),
234/// }
235/// ```
236#[derive(Clone, Eq, PartialEq, Debug)]
237pub enum AnnotationType {
238    /// 2D bounding boxes for object detection in images
239    Box2d,
240    /// 3D bounding boxes for object detection in 3D space (LiDAR, etc.)
241    Box3d,
242    /// Vector polygon contours for instance segmentation
243    Polygon,
244    /// Raster pixel masks for semantic/instance segmentation
245    Mask,
246}
247
248impl TryFrom<&str> for AnnotationType {
249    type Error = crate::Error;
250
251    fn try_from(s: &str) -> Result<Self, Self::Error> {
252        match s {
253            "box2d" => Ok(AnnotationType::Box2d),
254            "box3d" => Ok(AnnotationType::Box3d),
255            "polygon" => Ok(AnnotationType::Polygon),
256            "seg" => Ok(AnnotationType::Polygon),
257            "mask" => Ok(AnnotationType::Polygon), // backward compat
258            "raster" => Ok(AnnotationType::Mask),
259            _ => Err(crate::Error::InvalidAnnotationType(s.to_string())),
260        }
261    }
262}
263
264impl From<String> for AnnotationType {
265    fn from(s: String) -> Self {
266        // For backward compatibility, default to Box2d if invalid
267        s.as_str().try_into().unwrap_or(AnnotationType::Box2d)
268    }
269}
270
271impl From<&String> for AnnotationType {
272    fn from(s: &String) -> Self {
273        // For backward compatibility, default to Box2d if invalid
274        s.as_str().try_into().unwrap_or(AnnotationType::Box2d)
275    }
276}
277
278impl AnnotationType {
279    /// Returns the server API type name for this annotation type.
280    ///
281    /// The server uses different naming conventions than the client:
282    /// - `Box2d` → `"box"` (server) vs `"box2d"` (client display)
283    /// - `Box3d` → `"box3d"` (same)
284    /// - `Polygon` → `"seg"` (server) vs `"polygon"` (client display)
285    /// - `Mask` → `"seg"` (server) vs `"mask"` (client display)
286    pub fn as_server_type(&self) -> &'static str {
287        match self {
288            AnnotationType::Box2d => "box",
289            AnnotationType::Box3d => "box3d",
290            AnnotationType::Polygon => "seg",
291            AnnotationType::Mask => "seg",
292        }
293    }
294}
295
296impl std::fmt::Display for AnnotationType {
297    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
298        let value = match self {
299            AnnotationType::Box2d => "box2d",
300            AnnotationType::Box3d => "box3d",
301            AnnotationType::Polygon => "polygon",
302            AnnotationType::Mask => "mask",
303        };
304        write!(f, "{}", value)
305    }
306}
307
308/// A dataset in EdgeFirst Studio containing sensor data and annotations.
309///
310/// Datasets are collections of multi-modal sensor data (images, LiDAR, radar)
311/// along with their corresponding annotations (bounding boxes, segmentation
312/// masks, 3D annotations). Datasets belong to projects and can be used for
313/// training and validation of machine learning models.
314///
315/// # Features
316///
317/// - **Multi-modal Data**: Support for images, LiDAR point clouds, radar data
318/// - **Rich Annotations**: 2D/3D bounding boxes, segmentation masks
319/// - **Metadata**: Timestamps, sensor configurations, calibration data
320/// - **Version Control**: Track changes and maintain data lineage
321/// - **Format Conversion**: Export to popular ML frameworks
322///
323/// # Examples
324///
325/// ```no_run
326/// use edgefirst_client::{Client, Dataset, DatasetID};
327/// use std::str::FromStr;
328///
329/// # async fn example() -> Result<(), edgefirst_client::Error> {
330/// # let client = Client::new()?;
331/// // Get dataset information
332/// let dataset_id = DatasetID::from_str("ds-abc123")?;
333/// let dataset = client.dataset(dataset_id).await?;
334/// println!("Dataset: {}", dataset.name());
335///
336/// // Access dataset metadata
337/// println!("Dataset ID: {}", dataset.id());
338/// println!("Description: {}", dataset.description());
339/// println!("Created: {}", dataset.created());
340///
341/// // Work with dataset data would require additional methods
342/// // that are implemented in the full API
343/// # Ok(())
344/// # }
345/// ```
346#[derive(Deserialize, Clone, Debug)]
347pub struct Dataset {
348    id: DatasetID,
349    project_id: ProjectID,
350    name: String,
351    description: String,
352    cloud_key: String,
353    #[serde(rename = "createdAt")]
354    created: DateTime<Utc>,
355}
356
357impl Display for Dataset {
358    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
359        write!(f, "{} {}", self.id, self.name)
360    }
361}
362
363impl Dataset {
364    pub fn id(&self) -> DatasetID {
365        self.id
366    }
367
368    pub fn project_id(&self) -> ProjectID {
369        self.project_id
370    }
371
372    pub fn name(&self) -> &str {
373        &self.name
374    }
375
376    pub fn description(&self) -> &str {
377        &self.description
378    }
379
380    pub fn cloud_key(&self) -> &str {
381        &self.cloud_key
382    }
383
384    pub fn created(&self) -> &DateTime<Utc> {
385        &self.created
386    }
387
388    pub async fn project(&self, client: &Client) -> Result<crate::api::Project, Error> {
389        client.project(self.project_id).await
390    }
391
392    pub async fn annotation_sets(&self, client: &Client) -> Result<Vec<AnnotationSet>, Error> {
393        client.annotation_sets(self.id).await
394    }
395
396    pub async fn labels(&self, client: &Client) -> Result<Vec<Label>, Error> {
397        client.labels(self.id).await
398    }
399
400    pub async fn add_label(&self, client: &Client, name: &str) -> Result<(), Error> {
401        client.add_label(self.id, name).await
402    }
403
404    pub async fn remove_label(&self, client: &Client, name: &str) -> Result<(), Error> {
405        let labels = self.labels(client).await?;
406        let label = labels
407            .iter()
408            .find(|l| l.name() == name)
409            .ok_or_else(|| Error::MissingLabel(name.to_string()))?;
410        client.remove_label(label.id()).await
411    }
412}
413
414/// The AnnotationSet class represents a collection of annotations in a dataset.
415/// A dataset can have multiple annotation sets, each containing annotations for
416/// different tasks or purposes.
417#[derive(Deserialize)]
418pub struct AnnotationSet {
419    id: AnnotationSetID,
420    dataset_id: DatasetID,
421    name: String,
422    description: String,
423    #[serde(rename = "date")]
424    created: DateTime<Utc>,
425}
426
427impl Display for AnnotationSet {
428    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
429        write!(f, "{} {}", self.id, self.name)
430    }
431}
432
433impl AnnotationSet {
434    pub fn id(&self) -> AnnotationSetID {
435        self.id
436    }
437
438    pub fn dataset_id(&self) -> DatasetID {
439        self.dataset_id
440    }
441
442    pub fn name(&self) -> &str {
443        &self.name
444    }
445
446    pub fn description(&self) -> &str {
447        &self.description
448    }
449
450    pub fn created(&self) -> DateTime<Utc> {
451        self.created
452    }
453
454    pub async fn dataset(&self, client: &Client) -> Result<Dataset, Error> {
455        client.dataset(self.dataset_id).await
456    }
457}
458
459/// Pipeline timing measurements for a sample, in nanoseconds.
460///
461/// Each field records the wall-clock duration of one pipeline stage.
462/// Populated from Arrow metadata; not part of the Studio JSON-RPC API.
463#[derive(Clone, Debug, Default, PartialEq)]
464pub struct Timing {
465    /// Duration of the data-loading stage (nanoseconds).
466    pub load: Option<i64>,
467    /// Duration of the preprocessing stage (nanoseconds).
468    pub preprocess: Option<i64>,
469    /// Duration of the inference stage (nanoseconds).
470    pub inference: Option<i64>,
471    /// Duration of the decoding / postprocessing stage (nanoseconds).
472    pub decode: Option<i64>,
473}
474
475/// A sample in a dataset, typically representing a single image with metadata
476/// and optional sensor data.
477///
478/// Each sample has a unique ID, image reference, and can include additional
479/// sensor data like LiDAR, radar, or depth maps. Samples can also have
480/// associated annotations.
481#[derive(Serialize, Clone, Debug)]
482pub struct Sample {
483    #[serde(skip_serializing_if = "Option::is_none")]
484    pub id: Option<SampleID>,
485    /// Dataset split (train, val, test) - stored in Arrow metadata, not used
486    /// for directory structure.
487    /// API field name discrepancy: samples.populate2 expects "group", but
488    /// samples.list returns "group_name".
489    #[serde(
490        alias = "group_name",
491        rename(serialize = "group", deserialize = "group_name"),
492        skip_serializing_if = "Option::is_none"
493    )]
494    pub group: Option<String>,
495    #[serde(skip_serializing_if = "Option::is_none")]
496    pub sequence_name: Option<String>,
497    #[serde(skip_serializing_if = "Option::is_none")]
498    pub sequence_uuid: Option<String>,
499    #[serde(skip_serializing_if = "Option::is_none")]
500    pub sequence_description: Option<String>,
501    #[serde(
502        default,
503        skip_serializing_if = "Option::is_none",
504        deserialize_with = "deserialize_frame_number"
505    )]
506    pub frame_number: Option<u32>,
507    #[serde(skip_serializing_if = "Option::is_none")]
508    pub uuid: Option<String>,
509    #[serde(skip_serializing_if = "Option::is_none")]
510    pub image_name: Option<String>,
511    #[serde(skip_serializing_if = "Option::is_none")]
512    pub image_url: Option<String>,
513    #[serde(skip_serializing_if = "Option::is_none")]
514    pub width: Option<u32>,
515    #[serde(skip_serializing_if = "Option::is_none")]
516    pub height: Option<u32>,
517    #[serde(skip_serializing_if = "Option::is_none")]
518    pub date: Option<DateTime<Utc>>,
519    #[serde(skip_serializing_if = "Option::is_none")]
520    pub source: Option<String>,
521    /// Camera location and pose (GPS + IMU data).
522    /// Location data is extracted from the "sensors" field during
523    /// deserialization. When uploading samples, this field is serialized
524    /// as "sensors" to match the samples.populate2 API format.
525    #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "sensors"))]
526    pub location: Option<Location>,
527    /// Image degradation type (blur, occlusion, weather, etc.).
528    #[serde(skip_serializing_if = "Option::is_none")]
529    pub degradation: Option<String>,
530    /// LVIS: label_index values for categories verified absent from this image.
531    #[serde(default, skip_serializing_if = "Option::is_none")]
532    pub neg_label_indices: Option<Vec<u32>>,
533    /// LVIS: label_index values for categories with incomplete annotation.
534    #[serde(default, skip_serializing_if = "Option::is_none")]
535    pub not_exhaustive_label_indices: Option<Vec<u32>>,
536    /// Additional sensor files (LiDAR, radar, depth maps, etc.).
537    /// Deserialization is handled by custom Deserialize impl which extracts
538    /// files from the "sensors" field. Serialization converts to HashMap for
539    /// samples.populate2 API.
540    #[serde(
541        default,
542        skip_serializing_if = "Vec::is_empty",
543        serialize_with = "serialize_files"
544    )]
545    pub files: Vec<SampleFile>,
546    /// Annotations associated with this sample.
547    /// Deserialization is handled by custom Deserialize impl.
548    #[serde(
549        default,
550        skip_serializing_if = "Vec::is_empty",
551        serialize_with = "serialize_annotations"
552    )]
553    pub annotations: Vec<Annotation>,
554    /// Pipeline timing measurements (populated from Arrow, not from Studio
555    /// JSON-RPC).
556    #[serde(skip)]
557    pub timing: Option<Timing>,
558}
559
560// Custom deserializer for frame_number - converts -1 to None
561// Server returns -1 for non-sequence samples, but clients should see None
562fn deserialize_frame_number<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
563where
564    D: serde::Deserializer<'de>,
565{
566    use serde::Deserialize;
567
568    let value = Option::<i32>::deserialize(deserializer)?;
569    Ok(value.and_then(|v| if v < 0 { None } else { Some(v as u32) }))
570}
571
572/// Check if a string is a valid downloadable URL (http/https).
573/// Used to distinguish between pre-signed URLs and inline base64/JSON data.
574fn is_valid_url(s: &str) -> bool {
575    s.starts_with("http://") || s.starts_with("https://")
576}
577
578// Custom serializer for files field - converts Vec<SampleFile> to
579// HashMap<String, String>
580fn serialize_files<S>(files: &[SampleFile], serializer: S) -> Result<S::Ok, S::Error>
581where
582    S: serde::Serializer,
583{
584    use serde::Serialize;
585    let map: HashMap<String, String> = files
586        .iter()
587        .filter_map(|f| {
588            f.filename()
589                .map(|filename| (f.file_type().to_string(), filename.to_string()))
590        })
591        .collect();
592    map.serialize(serializer)
593}
594
595// Custom serializer for annotations field - serializes to a flat
596// Vec<Annotation> to match the updated samples.populate2 contract (annotations
597// array)
598fn serialize_annotations<S>(annotations: &Vec<Annotation>, serializer: S) -> Result<S::Ok, S::Error>
599where
600    S: serde::Serializer,
601{
602    serde::Serialize::serialize(annotations, serializer)
603}
604
605// Custom deserializer for annotations field - converts server format back to
606// Vec<Annotation>
607fn deserialize_annotations<'de, D>(deserializer: D) -> Result<Vec<Annotation>, D::Error>
608where
609    D: serde::Deserializer<'de>,
610{
611    use serde::Deserialize;
612
613    #[derive(Deserialize)]
614    #[serde(untagged)]
615    enum AnnotationsFormat {
616        Vec(Vec<Annotation>),
617        Map(HashMap<String, Vec<Annotation>>),
618    }
619
620    let value = Option::<AnnotationsFormat>::deserialize(deserializer)?;
621    Ok(value
622        .map(|v| match v {
623            AnnotationsFormat::Vec(annotations) => annotations,
624            AnnotationsFormat::Map(map) => convert_annotations_map_to_vec(map),
625        })
626        .unwrap_or_default())
627}
628
629/// Intermediate struct for deserializing sensors data that may contain both
630/// file references (URLs/data) and location data (GPS/IMU).
631#[derive(Debug, Default)]
632struct SensorsData {
633    files: Vec<SampleFile>,
634    location: Option<Location>,
635}
636
637/// Deserialize sensors field into both files and location data.
638fn deserialize_sensors_data(value: Option<serde_json::Value>) -> SensorsData {
639    use serde_json::Value;
640
641    /// Create a SampleFile from a string value, distinguishing URL vs inline
642    /// data.
643    fn create_sample_file(file_type: String, value: String) -> SampleFile {
644        if is_valid_url(&value) {
645            SampleFile::with_url(file_type, value)
646        } else {
647            SampleFile::with_data(file_type, value)
648        }
649    }
650
651    /// Create a SampleFile from any JSON value, converting non-strings to JSON.
652    fn create_sample_file_from_value(file_type: String, value: Value) -> Option<SampleFile> {
653        match value {
654            Value::String(s) => Some(create_sample_file(file_type, s)),
655            Value::Object(_) | Value::Array(_) => {
656                // Inline JSON data (legacy format) - serialize to string
657                serde_json::to_string(&value)
658                    .ok()
659                    .map(|data| SampleFile::with_data(file_type, data))
660            }
661            _ => None,
662        }
663    }
664
665    /// Try to extract Location from a JSON object containing gps/imu keys.
666    fn extract_location(map: &serde_json::Map<String, Value>) -> Option<Location> {
667        let gps = map
668            .get("gps")
669            .and_then(|v| serde_json::from_value::<GpsData>(v.clone()).ok());
670        let imu = map
671            .get("imu")
672            .and_then(|v| serde_json::from_value::<ImuData>(v.clone()).ok());
673
674        if gps.is_some() || imu.is_some() {
675            Some(Location { gps, imu })
676        } else {
677            None
678        }
679    }
680
681    let mut result = SensorsData::default();
682
683    match value {
684        None => result,
685        Some(Value::Array(arr)) => {
686            // Array of single-key objects: [{"radar.png": "url"}, {"gps": {...}}, ...]
687            for item in arr {
688                if let Value::Object(map) = item {
689                    // Check if this looks like a SampleFile object (has "type" key)
690                    if map.contains_key("type") {
691                        // Try to parse as SampleFile
692                        if let Ok(file) =
693                            serde_json::from_value::<SampleFile>(Value::Object(map.clone()))
694                        {
695                            result.files.push(file);
696                        }
697                    } else {
698                        // Check for location data (gps/imu)
699                        if let Some(loc) = extract_location(&map) {
700                            // Merge with existing location
701                            if let Some(ref mut existing) = result.location {
702                                if loc.gps.is_some() {
703                                    existing.gps = loc.gps;
704                                }
705                                if loc.imu.is_some() {
706                                    existing.imu = loc.imu;
707                                }
708                            } else {
709                                result.location = Some(loc);
710                            }
711                        } else {
712                            // Single-key object: {file_type: url_or_data}
713                            for (file_type, value) in map {
714                                if let Some(file) = create_sample_file_from_value(file_type, value)
715                                {
716                                    result.files.push(file);
717                                }
718                            }
719                        }
720                    }
721                }
722            }
723            result
724        }
725        Some(Value::Object(map)) => {
726            // Check if this contains location data (gps or imu keys with object values)
727            if let Some(loc) = extract_location(&map) {
728                result.location = Some(loc);
729            }
730
731            // Also extract any file references (non-location keys)
732            for (key, value) in map {
733                if key != "gps"
734                    && key != "imu"
735                    && let Some(file) = create_sample_file_from_value(key, value)
736                {
737                    result.files.push(file);
738                }
739            }
740            result
741        }
742        Some(_) => result,
743    }
744}
745
746/// Raw sample structure for deserialization.
747/// This mirrors Sample but deserializes sensors into a combined struct
748/// that captures both files and location data.
749#[derive(Deserialize)]
750struct SampleRaw {
751    #[serde(default)]
752    id: Option<SampleID>,
753    #[serde(alias = "group_name")]
754    group: Option<String>,
755    sequence_name: Option<String>,
756    sequence_uuid: Option<String>,
757    sequence_description: Option<String>,
758    #[serde(default, deserialize_with = "deserialize_frame_number")]
759    frame_number: Option<u32>,
760    uuid: Option<String>,
761    image_name: Option<String>,
762    image_url: Option<String>,
763    width: Option<u32>,
764    height: Option<u32>,
765    date: Option<DateTime<Utc>>,
766    source: Option<String>,
767    degradation: Option<String>,
768    #[serde(default)]
769    neg_label_indices: Option<Vec<u32>>,
770    #[serde(default)]
771    not_exhaustive_label_indices: Option<Vec<u32>>,
772    /// Raw sensors JSON - will be processed into files + location
773    #[serde(default, alias = "sensors")]
774    sensors: Option<serde_json::Value>,
775    #[serde(default, deserialize_with = "deserialize_annotations")]
776    annotations: Vec<Annotation>,
777}
778
779impl From<SampleRaw> for Sample {
780    fn from(raw: SampleRaw) -> Self {
781        let sensors_data = deserialize_sensors_data(raw.sensors);
782
783        Sample {
784            id: raw.id,
785            group: raw.group,
786            sequence_name: raw.sequence_name,
787            sequence_uuid: raw.sequence_uuid,
788            sequence_description: raw.sequence_description,
789            frame_number: raw.frame_number,
790            uuid: raw.uuid,
791            image_name: raw.image_name,
792            image_url: raw.image_url,
793            width: raw.width,
794            height: raw.height,
795            date: raw.date,
796            source: raw.source,
797            location: sensors_data.location,
798            degradation: raw.degradation,
799            neg_label_indices: raw.neg_label_indices,
800            not_exhaustive_label_indices: raw.not_exhaustive_label_indices,
801            files: sensors_data.files,
802            annotations: raw.annotations,
803            timing: None,
804        }
805    }
806}
807
808impl<'de> serde::Deserialize<'de> for Sample {
809    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
810    where
811        D: serde::Deserializer<'de>,
812    {
813        let raw = SampleRaw::deserialize(deserializer)?;
814        Ok(Sample::from(raw))
815    }
816}
817
818impl Display for Sample {
819    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
820        write!(
821            f,
822            "{} {}",
823            self.id
824                .map(|id| id.to_string())
825                .unwrap_or_else(|| "unknown".to_string()),
826            self.image_name().unwrap_or("unknown")
827        )
828    }
829}
830
831impl Default for Sample {
832    fn default() -> Self {
833        Self::new()
834    }
835}
836
837impl Sample {
838    /// Creates a new empty sample.
839    pub fn new() -> Self {
840        Self {
841            id: None,
842            group: None,
843            sequence_name: None,
844            sequence_uuid: None,
845            sequence_description: None,
846            frame_number: None,
847            uuid: None,
848            image_name: None,
849            image_url: None,
850            width: None,
851            height: None,
852            date: None,
853            source: None,
854            location: None,
855            degradation: None,
856            neg_label_indices: None,
857            not_exhaustive_label_indices: None,
858            files: vec![],
859            annotations: vec![],
860            timing: None,
861        }
862    }
863
864    pub fn id(&self) -> Option<SampleID> {
865        self.id
866    }
867
868    pub fn name(&self) -> Option<String> {
869        self.image_name.as_ref().map(|n| extract_sample_name(n))
870    }
871
872    pub fn group(&self) -> Option<&String> {
873        self.group.as_ref()
874    }
875
876    pub fn sequence_name(&self) -> Option<&String> {
877        self.sequence_name.as_ref()
878    }
879
880    pub fn sequence_uuid(&self) -> Option<&String> {
881        self.sequence_uuid.as_ref()
882    }
883
884    pub fn sequence_description(&self) -> Option<&String> {
885        self.sequence_description.as_ref()
886    }
887
888    pub fn frame_number(&self) -> Option<u32> {
889        self.frame_number
890    }
891
892    pub fn uuid(&self) -> Option<&String> {
893        self.uuid.as_ref()
894    }
895
896    pub fn image_name(&self) -> Option<&str> {
897        self.image_name.as_deref()
898    }
899
900    pub fn image_url(&self) -> Option<&str> {
901        self.image_url.as_deref()
902    }
903
904    pub fn width(&self) -> Option<u32> {
905        self.width
906    }
907
908    pub fn height(&self) -> Option<u32> {
909        self.height
910    }
911
912    pub fn date(&self) -> Option<DateTime<Utc>> {
913        self.date
914    }
915
916    pub fn source(&self) -> Option<&String> {
917        self.source.as_ref()
918    }
919
920    pub fn location(&self) -> Option<&Location> {
921        self.location.as_ref()
922    }
923
924    pub fn files(&self) -> &[SampleFile] {
925        &self.files
926    }
927
928    pub fn annotations(&self) -> &[Annotation] {
929        &self.annotations
930    }
931
932    pub fn with_annotations(mut self, annotations: Vec<Annotation>) -> Self {
933        self.annotations = annotations;
934        self
935    }
936
937    pub fn with_frame_number(mut self, frame_number: Option<u32>) -> Self {
938        self.frame_number = frame_number;
939        self
940    }
941
942    /// Downloads a file of the specified type for this sample.
943    ///
944    /// Supports both newer datasets (pre-signed URLs) and legacy datasets
945    /// (inline base64-encoded data):
946    /// 1. First tries to download from URL if available
947    /// 2. Falls back to decoding inline base64 data for legacy datasets
948    pub async fn download(
949        &self,
950        client: &Client,
951        file_type: FileType,
952    ) -> Result<Option<Vec<u8>>, Error> {
953        use base64::{Engine, engine::general_purpose::STANDARD};
954
955        // Handle image type separately (uses image_url field)
956        if file_type == FileType::Image {
957            if let Some(url) = self.image_url.as_deref()
958                && is_valid_url(url)
959            {
960                return Ok(Some(client.download(url).await?));
961            }
962            return Ok(None);
963        }
964
965        // Find the matching file for this type
966        let file = resolve_file(&file_type, &self.files);
967
968        match file {
969            Some(f) => {
970                // Prefer URL (newer datasets)
971                if let Some(url) = f.url() {
972                    return Ok(Some(client.download(url).await?));
973                }
974
975                // Fall back to inline data (legacy datasets)
976                if let Some(data) = f.data() {
977                    // Legacy data can be in several formats:
978                    // 1. Base64-encoded JSON: "eyJyYWRhci5wY2QiOi..." -> {"radar.pcd": "content"}
979                    // 2. Direct JSON wrapper: {"radar.pcd": "content"}
980                    // 3. Raw content (PCD text, etc.)
981
982                    // Try base64 decode first
983                    let decoded = if let Ok(bytes) = STANDARD.decode(data) {
984                        // Check if decoded bytes are UTF-8 JSON
985                        if let Ok(text) = String::from_utf8(bytes.clone()) {
986                            if text.starts_with('{') {
987                                // It's JSON - use the text for further processing
988                                text
989                            } else {
990                                // Non-JSON binary data - return as-is
991                                return Ok(Some(bytes));
992                            }
993                        } else {
994                            // Binary data - return as-is
995                            return Ok(Some(bytes));
996                        }
997                    } else {
998                        // Not base64 - use original data
999                        data.to_string()
1000                    };
1001
1002                    // Try to unwrap JSON wrapper: {"type_name": "content"}
1003                    let content = if decoded.starts_with('{') {
1004                        if let Ok(json) = serde_json::from_str::<serde_json::Value>(&decoded) {
1005                            if let Some(obj) = json.as_object() {
1006                                obj.values()
1007                                    .next()
1008                                    .and_then(|v| v.as_str())
1009                                    .map(|s| s.to_string())
1010                                    .unwrap_or(decoded)
1011                            } else {
1012                                decoded
1013                            }
1014                        } else {
1015                            decoded
1016                        }
1017                    } else {
1018                        decoded
1019                    };
1020
1021                    return Ok(Some(content.as_bytes().to_vec()));
1022                }
1023
1024                Ok(None)
1025            }
1026            None => Ok(None),
1027        }
1028    }
1029}
1030
1031/// A file associated with a sample (e.g., LiDAR point cloud, radar data).
1032///
1033/// For samples retrieved from the server, this contains the file type and URL.
1034/// For samples being populated to the server, this can be a type and filename.
1035///
1036/// Legacy datasets may have inline base64-encoded data instead of URLs.
1037/// The `data` field stores this inline content for fallback when no URL exists.
1038#[derive(Serialize, Deserialize, Clone, Debug)]
1039pub struct SampleFile {
1040    r#type: String,
1041    #[serde(skip_serializing_if = "Option::is_none")]
1042    url: Option<String>,
1043    #[serde(skip_serializing_if = "Option::is_none")]
1044    filename: Option<String>,
1045    /// Inline base64-encoded data for legacy datasets without pre-signed URLs.
1046    #[serde(skip_serializing_if = "Option::is_none", skip_deserializing)]
1047    data: Option<String>,
1048    /// Raw bytes for direct upload (e.g., from ZIP archives).
1049    /// This field is not serialized - it's only used during the upload process.
1050    #[serde(skip)]
1051    bytes: Option<Vec<u8>>,
1052}
1053
1054impl SampleFile {
1055    /// Creates a new sample file with type and URL (for newer datasets).
1056    pub fn with_url(file_type: String, url: String) -> Self {
1057        Self {
1058            r#type: file_type,
1059            url: Some(url),
1060            filename: None,
1061            data: None,
1062            bytes: None,
1063        }
1064    }
1065
1066    /// Creates a new sample file with type and filename (for populate API).
1067    pub fn with_filename(file_type: String, filename: String) -> Self {
1068        Self {
1069            r#type: file_type,
1070            url: None,
1071            filename: Some(filename),
1072            data: None,
1073            bytes: None,
1074        }
1075    }
1076
1077    /// Creates a new sample file with inline data (for legacy datasets).
1078    pub fn with_data(file_type: String, data: String) -> Self {
1079        Self {
1080            r#type: file_type,
1081            url: None,
1082            filename: None,
1083            data: Some(data),
1084            bytes: None,
1085        }
1086    }
1087
1088    /// Creates a new sample file with raw bytes for direct upload.
1089    ///
1090    /// This is useful for uploading files from ZIP archives without extracting
1091    /// to disk first. The bytes are uploaded directly to the presigned URL.
1092    ///
1093    /// # Arguments
1094    /// * `file_type` - The type of file (e.g., "image", "lidar.pcd")
1095    /// * `filename` - The filename to use for the upload
1096    /// * `bytes` - The raw file bytes
1097    pub fn with_bytes(file_type: String, filename: String, bytes: Vec<u8>) -> Self {
1098        Self {
1099            r#type: file_type,
1100            url: None,
1101            filename: Some(filename),
1102            data: None,
1103            bytes: Some(bytes),
1104        }
1105    }
1106
1107    pub fn file_type(&self) -> &str {
1108        &self.r#type
1109    }
1110
1111    pub fn url(&self) -> Option<&str> {
1112        self.url.as_deref()
1113    }
1114
1115    pub fn filename(&self) -> Option<&str> {
1116        self.filename.as_deref()
1117    }
1118
1119    /// Returns inline base64-encoded data (for legacy datasets).
1120    pub fn data(&self) -> Option<&str> {
1121        self.data.as_deref()
1122    }
1123
1124    /// Returns raw bytes for direct upload (from ZIP archives, etc.).
1125    pub fn bytes(&self) -> Option<&[u8]> {
1126        self.bytes.as_deref()
1127    }
1128}
1129
1130/// Location and pose information for a sample.
1131///
1132/// Contains GPS coordinates and IMU orientation data describing where and how
1133/// the camera was positioned when capturing the sample.
1134#[derive(Serialize, Deserialize, Clone, Debug)]
1135pub struct Location {
1136    #[serde(skip_serializing_if = "Option::is_none")]
1137    pub gps: Option<GpsData>,
1138    #[serde(skip_serializing_if = "Option::is_none")]
1139    pub imu: Option<ImuData>,
1140}
1141
1142/// GPS location data (latitude and longitude).
1143#[derive(Serialize, Deserialize, Clone, Debug)]
1144pub struct GpsData {
1145    pub lat: f64,
1146    pub lon: f64,
1147}
1148
1149impl GpsData {
1150    /// Validate GPS coordinates are within valid ranges.
1151    ///
1152    /// Checks if latitude and longitude values are within valid geographic
1153    /// ranges. Helps catch data corruption or API issues early.
1154    ///
1155    /// # Returns
1156    /// `Ok(())` if valid, `Err(String)` with descriptive error message
1157    /// otherwise
1158    ///
1159    /// # Valid Ranges
1160    /// - Latitude: -90.0 to +90.0 degrees
1161    /// - Longitude: -180.0 to +180.0 degrees
1162    ///
1163    /// # Examples
1164    /// ```
1165    /// use edgefirst_client::GpsData;
1166    ///
1167    /// let gps = GpsData {
1168    ///     lat: 37.7749,
1169    ///     lon: -122.4194,
1170    /// };
1171    /// assert!(gps.validate().is_ok());
1172    ///
1173    /// let bad_gps = GpsData {
1174    ///     lat: 100.0,
1175    ///     lon: 0.0,
1176    /// };
1177    /// assert!(bad_gps.validate().is_err());
1178    /// ```
1179    pub fn validate(&self) -> Result<(), String> {
1180        validate_gps_coordinates(self.lat, self.lon)
1181    }
1182}
1183
1184/// IMU orientation data (roll, pitch, yaw in degrees).
1185#[derive(Serialize, Deserialize, Clone, Debug)]
1186pub struct ImuData {
1187    pub roll: f64,
1188    pub pitch: f64,
1189    pub yaw: f64,
1190}
1191
1192impl ImuData {
1193    /// Validate IMU orientation angles are within valid ranges.
1194    ///
1195    /// Checks if roll, pitch, and yaw values are finite and within reasonable
1196    /// ranges. Helps catch data corruption or sensor errors early.
1197    ///
1198    /// # Returns
1199    /// `Ok(())` if valid, `Err(String)` with descriptive error message
1200    /// otherwise
1201    ///
1202    /// # Valid Ranges
1203    /// - Roll: -180.0 to +180.0 degrees
1204    /// - Pitch: -90.0 to +90.0 degrees (typical gimbal lock range)
1205    /// - Yaw: -180.0 to +180.0 degrees (or 0 to 360, normalized)
1206    ///
1207    /// # Examples
1208    /// ```
1209    /// use edgefirst_client::ImuData;
1210    ///
1211    /// let imu = ImuData {
1212    ///     roll: 10.0,
1213    ///     pitch: 5.0,
1214    ///     yaw: 90.0,
1215    /// };
1216    /// assert!(imu.validate().is_ok());
1217    ///
1218    /// let bad_imu = ImuData {
1219    ///     roll: 200.0,
1220    ///     pitch: 0.0,
1221    ///     yaw: 0.0,
1222    /// };
1223    /// assert!(bad_imu.validate().is_err());
1224    /// ```
1225    pub fn validate(&self) -> Result<(), String> {
1226        validate_imu_orientation(self.roll, self.pitch, self.yaw)
1227    }
1228}
1229
1230#[allow(dead_code)]
1231pub trait TypeName {
1232    fn type_name() -> String;
1233}
1234
1235#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
1236pub struct Box3d {
1237    x: f32,
1238    y: f32,
1239    z: f32,
1240    w: f32,
1241    h: f32,
1242    l: f32,
1243}
1244
1245impl TypeName for Box3d {
1246    fn type_name() -> String {
1247        "box3d".to_owned()
1248    }
1249}
1250
1251impl Box3d {
1252    pub fn new(cx: f32, cy: f32, cz: f32, width: f32, height: f32, length: f32) -> Self {
1253        Self {
1254            x: cx,
1255            y: cy,
1256            z: cz,
1257            w: width,
1258            h: height,
1259            l: length,
1260        }
1261    }
1262
1263    pub fn width(&self) -> f32 {
1264        self.w
1265    }
1266
1267    pub fn height(&self) -> f32 {
1268        self.h
1269    }
1270
1271    pub fn length(&self) -> f32 {
1272        self.l
1273    }
1274
1275    pub fn cx(&self) -> f32 {
1276        self.x
1277    }
1278
1279    pub fn cy(&self) -> f32 {
1280        self.y
1281    }
1282
1283    pub fn cz(&self) -> f32 {
1284        self.z
1285    }
1286
1287    pub fn left(&self) -> f32 {
1288        self.x - self.w / 2.0
1289    }
1290
1291    pub fn top(&self) -> f32 {
1292        self.y - self.h / 2.0
1293    }
1294
1295    pub fn front(&self) -> f32 {
1296        self.z - self.l / 2.0
1297    }
1298}
1299
1300#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
1301pub struct Box2d {
1302    h: f32,
1303    w: f32,
1304    x: f32,
1305    y: f32,
1306}
1307
1308impl TypeName for Box2d {
1309    fn type_name() -> String {
1310        "box2d".to_owned()
1311    }
1312}
1313
1314impl Box2d {
1315    pub fn new(left: f32, top: f32, width: f32, height: f32) -> Self {
1316        Self {
1317            x: left,
1318            y: top,
1319            w: width,
1320            h: height,
1321        }
1322    }
1323
1324    pub fn width(&self) -> f32 {
1325        self.w
1326    }
1327
1328    pub fn height(&self) -> f32 {
1329        self.h
1330    }
1331
1332    pub fn left(&self) -> f32 {
1333        self.x
1334    }
1335
1336    pub fn top(&self) -> f32 {
1337        self.y
1338    }
1339
1340    pub fn cx(&self) -> f32 {
1341        self.x + self.w / 2.0
1342    }
1343
1344    pub fn cy(&self) -> f32 {
1345        self.y + self.h / 2.0
1346    }
1347}
1348
1349#[derive(Clone, Debug, PartialEq)]
1350pub struct Polygon {
1351    pub rings: Vec<Vec<(f32, f32)>>,
1352}
1353
1354impl TypeName for Polygon {
1355    fn type_name() -> String {
1356        "polygon".to_owned()
1357    }
1358}
1359
1360impl Polygon {
1361    pub fn new(rings: Vec<Vec<(f32, f32)>>) -> Self {
1362        Self { rings }
1363    }
1364}
1365
1366impl serde::Serialize for Polygon {
1367    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1368    where
1369        S: serde::Serializer,
1370    {
1371        serde::Serialize::serialize(&self.rings, serializer)
1372    }
1373}
1374
1375impl<'de> serde::Deserialize<'de> for Polygon {
1376    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1377    where
1378        D: serde::Deserializer<'de>,
1379    {
1380        // First, deserialize to a raw JSON value to handle various formats
1381        let value = serde_json::Value::deserialize(deserializer)?;
1382
1383        // Try to extract polygon data from various formats
1384        let polygon_value = if let Some(obj) = value.as_object() {
1385            // Format: {"polygon": [...]} or {"rings": [...]}
1386            obj.get("rings")
1387                .or_else(|| obj.get("polygon"))
1388                .cloned()
1389                .unwrap_or(serde_json::Value::Null)
1390        } else {
1391            // Format: [[...]] (direct array)
1392            value
1393        };
1394
1395        // Parse the polygon array, filtering out null/invalid values
1396        let rings = parse_polygon_value(&polygon_value);
1397
1398        Ok(Self { rings })
1399    }
1400}
1401
1402/// Parse polygon value from JSON, handling malformed data gracefully.
1403///
1404/// Handles multiple formats:
1405/// - `[[[x,y],[x,y],...]]` - 3D array with point pairs (correct format)
1406/// - `[[x,y,x,y,...]]` - 2D array with flat coords (COCO format, legacy)
1407/// - `[[null,null,...]]` - corrupted data (returns empty)
1408/// - `null` - missing data (returns empty)
1409fn parse_polygon_value(value: &serde_json::Value) -> Vec<Vec<(f32, f32)>> {
1410    let Some(outer_array) = value.as_array() else {
1411        return vec![];
1412    };
1413
1414    let mut result = Vec::new();
1415
1416    for ring in outer_array {
1417        let Some(ring_array) = ring.as_array() else {
1418            continue;
1419        };
1420
1421        // Check if this is a 3D array (point pairs) or 2D array (flat coords)
1422        let is_3d = ring_array
1423            .first()
1424            .map(|first| first.is_array())
1425            .unwrap_or(false);
1426
1427        let points: Vec<(f32, f32)> = if is_3d {
1428            // 3D format: [[x1,y1], [x2,y2], ...]
1429            ring_array
1430                .iter()
1431                .filter_map(|point| {
1432                    let arr = point.as_array()?;
1433                    if arr.len() >= 2 {
1434                        let x = arr[0].as_f64()? as f32;
1435                        let y = arr[1].as_f64()? as f32;
1436                        if x.is_finite() && y.is_finite() {
1437                            Some((x, y))
1438                        } else {
1439                            None
1440                        }
1441                    } else {
1442                        None
1443                    }
1444                })
1445                .collect()
1446        } else {
1447            // 2D format (flat): [x1, y1, x2, y2, ...]
1448            ring_array
1449                .chunks(2)
1450                .filter_map(|chunk| {
1451                    if chunk.len() >= 2 {
1452                        let x = chunk[0].as_f64()? as f32;
1453                        let y = chunk[1].as_f64()? as f32;
1454                        if x.is_finite() && y.is_finite() {
1455                            Some((x, y))
1456                        } else {
1457                            None
1458                        }
1459                    } else {
1460                        None
1461                    }
1462                })
1463                .collect()
1464        };
1465
1466        // Only add rings with at least 3 valid points
1467        if points.len() >= 3 {
1468            result.push(points);
1469        }
1470    }
1471
1472    result
1473}
1474
1475/// Helper struct for deserializing annotations from the server.
1476///
1477/// The server sends bounding box coordinates as flat fields (x, y, w, h) at the
1478/// annotation level, but we want to store them as a nested Box2d struct.
1479#[derive(Deserialize)]
1480struct AnnotationRaw {
1481    #[serde(default)]
1482    sample_id: Option<SampleID>,
1483    #[serde(default)]
1484    name: Option<String>,
1485    #[serde(default)]
1486    sequence_name: Option<String>,
1487    #[serde(default)]
1488    frame_number: Option<u32>,
1489    #[serde(rename = "group_name", default)]
1490    group: Option<String>,
1491    #[serde(rename = "object_reference", alias = "object_id", default)]
1492    object_id: Option<String>,
1493    #[serde(default)]
1494    label_name: Option<String>,
1495    #[serde(default)]
1496    label_index: Option<u64>,
1497    #[serde(default)]
1498    iscrowd: Option<bool>,
1499    #[serde(default)]
1500    category_frequency: Option<String>,
1501    // Nested box2d format (if server sends it this way)
1502    #[serde(default)]
1503    box2d: Option<Box2d>,
1504    #[serde(default)]
1505    box3d: Option<Box3d>,
1506    #[serde(default, alias = "mask")]
1507    polygon: Option<Polygon>,
1508    // Flat box2d fields from server (x, y, w, h at annotation level)
1509    #[serde(default)]
1510    x: Option<f64>,
1511    #[serde(default)]
1512    y: Option<f64>,
1513    #[serde(default)]
1514    w: Option<f64>,
1515    #[serde(default)]
1516    h: Option<f64>,
1517}
1518
1519#[derive(Serialize, Clone, Debug)]
1520pub struct Annotation {
1521    #[serde(skip_serializing_if = "Option::is_none")]
1522    sample_id: Option<SampleID>,
1523    #[serde(skip_serializing_if = "Option::is_none")]
1524    name: Option<String>,
1525    #[serde(skip_serializing_if = "Option::is_none")]
1526    sequence_name: Option<String>,
1527    #[serde(skip_serializing_if = "Option::is_none")]
1528    frame_number: Option<u32>,
1529    /// Dataset split (train, val, test) - matches `Sample.group`.
1530    /// JSON field name: "group_name" (Studio API uses this name for both upload
1531    /// and download).
1532    #[serde(rename = "group_name", skip_serializing_if = "Option::is_none")]
1533    group: Option<String>,
1534    /// Object tracking identifier across frames.
1535    /// JSON field name: "object_reference" for upload (populate), "object_id"
1536    /// for download (list).
1537    #[serde(
1538        rename = "object_reference",
1539        alias = "object_id",
1540        skip_serializing_if = "Option::is_none"
1541    )]
1542    object_id: Option<String>,
1543    #[serde(skip_serializing_if = "Option::is_none")]
1544    label_name: Option<String>,
1545    #[serde(skip_serializing_if = "Option::is_none")]
1546    label_index: Option<u64>,
1547    /// COCO crowd flag: true = crowd region, false = single instance.
1548    #[serde(default, skip_serializing_if = "Option::is_none")]
1549    iscrowd: Option<bool>,
1550    /// LVIS frequency group: "f" (frequent), "c" (common), "r" (rare).
1551    #[serde(default, skip_serializing_if = "Option::is_none")]
1552    category_frequency: Option<String>,
1553    #[serde(skip_serializing_if = "Option::is_none")]
1554    box2d: Option<Box2d>,
1555    #[serde(skip_serializing_if = "Option::is_none")]
1556    box3d: Option<Box3d>,
1557    /// Polygon vertices for instance segmentation.
1558    ///
1559    /// Wire name is `mask` for historical reasons: the Rust field was
1560    /// renamed from `mask: Mask` to `polygon: Polygon` after the
1561    /// `samples.populate2` contract was already locked in, and the server
1562    /// still expects the key to be `mask`. Uploads that emit `polygon`
1563    /// here get silently dropped. Deserialisation accepts both names
1564    /// because `AnnotationRaw` carries `alias = "mask"`.
1565    #[serde(
1566        rename(serialize = "mask"),
1567        skip_serializing_if = "Option::is_none"
1568    )]
1569    polygon: Option<Polygon>,
1570    /// PNG-encoded raster mask (populated from Arrow, not from Studio JSON-RPC).
1571    #[serde(skip)]
1572    mask: Option<MaskData>,
1573    /// Detection confidence score for box2d (0..1).
1574    #[serde(skip_serializing_if = "Option::is_none")]
1575    box2d_score: Option<f32>,
1576    /// Detection confidence score for box3d (0..1).
1577    #[serde(skip_serializing_if = "Option::is_none")]
1578    box3d_score: Option<f32>,
1579    /// Confidence score for polygon (0..1).
1580    #[serde(skip_serializing_if = "Option::is_none")]
1581    polygon_score: Option<f32>,
1582    /// Confidence score for mask (0..1).
1583    #[serde(skip_serializing_if = "Option::is_none")]
1584    mask_score: Option<f32>,
1585}
1586
1587impl<'de> serde::Deserialize<'de> for Annotation {
1588    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1589    where
1590        D: serde::Deserializer<'de>,
1591    {
1592        // Deserialize to AnnotationRaw first to handle server format differences
1593        let raw: AnnotationRaw = serde::Deserialize::deserialize(deserializer)?;
1594
1595        // Prefer nested box2d if present, otherwise construct from flat x/y/w/h
1596        let box2d = raw.box2d.or_else(|| match (raw.x, raw.y, raw.w, raw.h) {
1597            (Some(x), Some(y), Some(w), Some(h)) if w > 0.0 && h > 0.0 => {
1598                Some(Box2d::new(x as f32, y as f32, w as f32, h as f32))
1599            }
1600            _ => None,
1601        });
1602
1603        Ok(Annotation {
1604            sample_id: raw.sample_id,
1605            name: raw.name,
1606            sequence_name: raw.sequence_name,
1607            frame_number: raw.frame_number,
1608            group: raw.group,
1609            object_id: raw.object_id,
1610            label_name: raw.label_name,
1611            label_index: raw.label_index,
1612            iscrowd: raw.iscrowd,
1613            category_frequency: raw.category_frequency,
1614            box2d,
1615            box3d: raw.box3d,
1616            polygon: raw.polygon,
1617            mask: None,
1618            box2d_score: None,
1619            box3d_score: None,
1620            polygon_score: None,
1621            mask_score: None,
1622        })
1623    }
1624}
1625
1626impl Default for Annotation {
1627    fn default() -> Self {
1628        Self::new()
1629    }
1630}
1631
1632impl Annotation {
1633    pub fn new() -> Self {
1634        Self {
1635            sample_id: None,
1636            name: None,
1637            sequence_name: None,
1638            frame_number: None,
1639            group: None,
1640            object_id: None,
1641            label_name: None,
1642            label_index: None,
1643            iscrowd: None,
1644            category_frequency: None,
1645            box2d: None,
1646            box3d: None,
1647            polygon: None,
1648            mask: None,
1649            box2d_score: None,
1650            box3d_score: None,
1651            polygon_score: None,
1652            mask_score: None,
1653        }
1654    }
1655
1656    pub fn set_sample_id(&mut self, sample_id: Option<SampleID>) {
1657        self.sample_id = sample_id;
1658    }
1659
1660    pub fn sample_id(&self) -> Option<SampleID> {
1661        self.sample_id
1662    }
1663
1664    pub fn set_name(&mut self, name: Option<String>) {
1665        self.name = name;
1666    }
1667
1668    pub fn name(&self) -> Option<&String> {
1669        self.name.as_ref()
1670    }
1671
1672    pub fn set_sequence_name(&mut self, sequence_name: Option<String>) {
1673        self.sequence_name = sequence_name;
1674    }
1675
1676    pub fn sequence_name(&self) -> Option<&String> {
1677        self.sequence_name.as_ref()
1678    }
1679
1680    pub fn set_frame_number(&mut self, frame_number: Option<u32>) {
1681        self.frame_number = frame_number;
1682    }
1683
1684    pub fn frame_number(&self) -> Option<u32> {
1685        self.frame_number
1686    }
1687
1688    pub fn set_group(&mut self, group: Option<String>) {
1689        self.group = group;
1690    }
1691
1692    pub fn group(&self) -> Option<&String> {
1693        self.group.as_ref()
1694    }
1695
1696    pub fn object_id(&self) -> Option<&String> {
1697        self.object_id.as_ref()
1698    }
1699
1700    pub fn set_object_id(&mut self, object_id: Option<String>) {
1701        self.object_id = object_id;
1702    }
1703
1704    pub fn label(&self) -> Option<&String> {
1705        self.label_name.as_ref()
1706    }
1707
1708    pub fn set_label(&mut self, label_name: Option<String>) {
1709        self.label_name = label_name;
1710    }
1711
1712    pub fn label_index(&self) -> Option<u64> {
1713        self.label_index
1714    }
1715
1716    pub fn set_label_index(&mut self, label_index: Option<u64>) {
1717        self.label_index = label_index;
1718    }
1719
1720    pub fn iscrowd(&self) -> Option<bool> {
1721        self.iscrowd
1722    }
1723
1724    pub fn set_iscrowd(&mut self, iscrowd: Option<bool>) {
1725        self.iscrowd = iscrowd;
1726    }
1727
1728    pub fn category_frequency(&self) -> Option<&String> {
1729        self.category_frequency.as_ref()
1730    }
1731
1732    pub fn set_category_frequency(&mut self, category_frequency: Option<String>) {
1733        self.category_frequency = category_frequency;
1734    }
1735
1736    pub fn box2d(&self) -> Option<&Box2d> {
1737        self.box2d.as_ref()
1738    }
1739
1740    pub fn set_box2d(&mut self, box2d: Option<Box2d>) {
1741        self.box2d = box2d;
1742    }
1743
1744    pub fn box3d(&self) -> Option<&Box3d> {
1745        self.box3d.as_ref()
1746    }
1747
1748    pub fn set_box3d(&mut self, box3d: Option<Box3d>) {
1749        self.box3d = box3d;
1750    }
1751
1752    pub fn polygon(&self) -> Option<&Polygon> {
1753        self.polygon.as_ref()
1754    }
1755
1756    pub fn set_polygon(&mut self, polygon: Option<Polygon>) {
1757        self.polygon = polygon;
1758    }
1759
1760    pub fn mask(&self) -> Option<&MaskData> {
1761        self.mask.as_ref()
1762    }
1763
1764    pub fn set_mask(&mut self, mask: Option<MaskData>) {
1765        self.mask = mask;
1766    }
1767
1768    pub fn box2d_score(&self) -> Option<f32> {
1769        self.box2d_score
1770    }
1771
1772    pub fn set_box2d_score(&mut self, score: Option<f32>) {
1773        self.box2d_score = score;
1774    }
1775
1776    pub fn box3d_score(&self) -> Option<f32> {
1777        self.box3d_score
1778    }
1779
1780    pub fn set_box3d_score(&mut self, score: Option<f32>) {
1781        self.box3d_score = score;
1782    }
1783
1784    pub fn polygon_score(&self) -> Option<f32> {
1785        self.polygon_score
1786    }
1787
1788    pub fn set_polygon_score(&mut self, score: Option<f32>) {
1789        self.polygon_score = score;
1790    }
1791
1792    pub fn mask_score(&self) -> Option<f32> {
1793        self.mask_score
1794    }
1795
1796    pub fn set_mask_score(&mut self, score: Option<f32>) {
1797        self.mask_score = score;
1798    }
1799}
1800
1801#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
1802pub struct Label {
1803    id: u64,
1804    dataset_id: DatasetID,
1805    index: u64,
1806    name: String,
1807}
1808
1809impl Label {
1810    pub fn id(&self) -> u64 {
1811        self.id
1812    }
1813
1814    pub fn dataset_id(&self) -> DatasetID {
1815        self.dataset_id
1816    }
1817
1818    pub fn index(&self) -> u64 {
1819        self.index
1820    }
1821
1822    pub fn name(&self) -> &str {
1823        &self.name
1824    }
1825
1826    pub async fn remove(&self, client: &Client) -> Result<(), Error> {
1827        client.remove_label(self.id()).await
1828    }
1829
1830    pub async fn set_name(&mut self, client: &Client, name: &str) -> Result<(), Error> {
1831        self.name = name.to_string();
1832        client.update_label(self).await
1833    }
1834
1835    pub async fn set_index(&mut self, client: &Client, index: u64) -> Result<(), Error> {
1836        self.index = index;
1837        client.update_label(self).await
1838    }
1839}
1840
1841impl Display for Label {
1842    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1843        write!(f, "{}", self.name())
1844    }
1845}
1846
1847#[derive(Serialize, Clone, Debug)]
1848pub struct NewLabelObject {
1849    pub name: String,
1850}
1851
1852#[derive(Serialize, Clone, Debug)]
1853pub struct NewLabel {
1854    pub dataset_id: DatasetID,
1855    pub labels: Vec<NewLabelObject>,
1856}
1857
1858/// A dataset group for organizing samples into logical subsets.
1859///
1860/// Groups are used to partition samples within a dataset for different purposes
1861/// such as training, validation, and testing. Each sample can belong to at most
1862/// one group at a time.
1863///
1864/// # Common Group Names
1865///
1866/// - `"train"` - Training data for model fitting
1867/// - `"val"` - Validation data for hyperparameter tuning
1868/// - `"test"` - Test data for final evaluation
1869///
1870/// # Examples
1871///
1872/// ```rust,no_run
1873/// use edgefirst_client::{Client, DatasetID};
1874///
1875/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
1876/// let client = Client::new()?.with_token_path(None)?;
1877/// let dataset_id: DatasetID = "ds-123".try_into()?;
1878///
1879/// // List all groups in the dataset
1880/// let groups = client.groups(dataset_id).await?;
1881/// for group in groups {
1882///     println!("Group [{}]: {}", group.id, group.name);
1883/// }
1884/// # Ok(())
1885/// # }
1886/// ```
1887#[derive(Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
1888pub struct Group {
1889    /// The unique numeric identifier for this group.
1890    ///
1891    /// Group IDs are assigned by the server and are unique within an
1892    /// organization.
1893    pub id: u64,
1894
1895    /// The human-readable name of the group.
1896    ///
1897    /// Common names include "train", "val", "test", but any string is valid.
1898    pub name: String,
1899}
1900
1901#[cfg(feature = "polars")]
1902fn extract_annotation_name(ann: &Annotation) -> Option<(String, Option<u32>)> {
1903    use std::path::Path;
1904
1905    let name = ann.name.as_ref()?;
1906    let name = Path::new(name).file_stem()?.to_str()?;
1907
1908    // For sequences, return base name and frame number
1909    // For non-sequences, return name and None
1910    match &ann.sequence_name {
1911        Some(sequence) => Some((sequence.clone(), ann.frame_number)),
1912        None => Some((name.to_string(), None)),
1913    }
1914}
1915
1916/// Convert a polygon into a nested `List(List(Float32))` Series for the
1917/// 2026.04 schema. Each ring becomes an inner list of interleaved
1918/// `[x1, y1, x2, y2, ...]` floats.
1919#[cfg(feature = "polars")]
1920fn convert_polygon_to_nested_series(polygon: &Polygon) -> Series {
1921    let ring_series: Vec<Option<Series>> = polygon
1922        .rings
1923        .iter()
1924        .map(|ring| {
1925            let coords: Vec<f32> = ring.iter().flat_map(|&(x, y)| [x, y]).collect();
1926            Some(Series::new("".into(), coords))
1927        })
1928        .collect();
1929    Series::new("".into(), ring_series)
1930}
1931
1932/// Create a DataFrame from a slice of samples with the 2026.04 schema.
1933///
1934/// Each annotation in each sample becomes one row. Columns where every value
1935/// is null are automatically dropped, so the result only contains columns
1936/// that carry data. The `name` column is always present.
1937///
1938/// # Schema (2026.04)
1939///
1940/// - `name`: Sample name (String) - ALWAYS PRESENT
1941/// - `frame`: Frame number (UInt32)
1942/// - `object_id`: Object tracking ID (String)
1943/// - `label`: Object label (Categorical)
1944/// - `label_index`: Label index (UInt64)
1945/// - `group`: Dataset group (Categorical)
1946/// - `polygon`: Segmentation polygon rings (List<List<Float32>>)
1947/// - `box2d`: 2D bounding box [cx, cy, w, h] (Array<Float32, 4>)
1948/// - `box3d`: 3D bounding box [x, y, z, w, h, l] (Array<Float32, 6>)
1949/// - `mask`: PNG-encoded raster mask (Binary)
1950/// - `box2d_score`: Box2d confidence (Float32)
1951/// - `box3d_score`: Box3d confidence (Float32)
1952/// - `polygon_score`: Polygon confidence (Float32)
1953/// - `mask_score`: Mask confidence (Float32)
1954/// - `size`: Image size [width, height] (Array<UInt32, 2>)
1955/// - `location`: GPS [lat, lon] (Array<Float32, 2>)
1956/// - `pose`: IMU [yaw, pitch, roll] (Array<Float32, 3>)
1957/// - `degradation`: Image degradation (String)
1958/// - `iscrowd`: COCO crowd flag (Boolean)
1959/// - `category_frequency`: LVIS frequency group (Categorical)
1960/// - `neg_label_indices`: Verified-absent label indices (List<UInt32>)
1961/// - `not_exhaustive_label_indices`: Incomplete label indices (List<UInt32>)
1962/// - `timing`: Pipeline timing (Struct{load, preprocess, inference, decode} of Int64)
1963///
1964/// # Example
1965///
1966/// ```rust,no_run
1967/// use edgefirst_client::{Client, samples_dataframe};
1968///
1969/// # async fn example() -> Result<(), edgefirst_client::Error> {
1970/// # let client = Client::new()?;
1971/// # let dataset_id = 1.into();
1972/// # let annotation_set_id = 1.into();
1973/// let samples = client
1974///     .samples(dataset_id, Some(annotation_set_id), &[], &[], &[], None)
1975///     .await?;
1976/// let df = samples_dataframe(&samples)?;
1977/// println!("DataFrame shape: {:?}", df.shape());
1978/// # Ok(())
1979/// # }
1980/// ```
1981#[cfg(feature = "polars")]
1982pub fn samples_dataframe(samples: &[Sample]) -> Result<DataFrame, Error> {
1983    // Collect per-row vectors directly while iterating samples
1984    let mut names: Vec<String> = Vec::new();
1985    let mut frames: Vec<Option<u32>> = Vec::new();
1986    let mut objects: Vec<Option<String>> = Vec::new();
1987    let mut labels: Vec<Option<String>> = Vec::new();
1988    let mut label_indices: Vec<Option<u64>> = Vec::new();
1989    let mut groups: Vec<Option<String>> = Vec::new();
1990    let mut polygons: Vec<Option<Series>> = Vec::new();
1991    let mut boxes2d: Vec<Option<Series>> = Vec::new();
1992    let mut boxes3d: Vec<Option<Series>> = Vec::new();
1993    let mut mask_bytes: Vec<Option<Vec<u8>>> = Vec::new();
1994    let mut box2d_scores: Vec<Option<f32>> = Vec::new();
1995    let mut box3d_scores: Vec<Option<f32>> = Vec::new();
1996    let mut polygon_scores: Vec<Option<f32>> = Vec::new();
1997    let mut mask_scores: Vec<Option<f32>> = Vec::new();
1998    let mut sizes: Vec<Option<Vec<u32>>> = Vec::new();
1999    let mut locations: Vec<Option<Vec<f32>>> = Vec::new();
2000    let mut poses: Vec<Option<Vec<f32>>> = Vec::new();
2001    let mut degradations: Vec<Option<String>> = Vec::new();
2002    let mut iscrowds: Vec<Option<bool>> = Vec::new();
2003    let mut category_frequencies: Vec<Option<String>> = Vec::new();
2004    let mut neg_label_indices_vec: Vec<Option<Vec<u32>>> = Vec::new();
2005    let mut not_exhaustive_label_indices_vec: Vec<Option<Vec<u32>>> = Vec::new();
2006    let mut timing_load: Vec<Option<i64>> = Vec::new();
2007    let mut timing_preprocess: Vec<Option<i64>> = Vec::new();
2008    let mut timing_inference: Vec<Option<i64>> = Vec::new();
2009    let mut timing_decode: Vec<Option<i64>> = Vec::new();
2010
2011    for sample in samples {
2012        // Extract sample metadata once per sample
2013        let size = match (sample.width, sample.height) {
2014            (Some(w), Some(h)) => Some(vec![w, h]),
2015            _ => None,
2016        };
2017
2018        let location = sample.location.as_ref().and_then(|loc| {
2019            loc.gps
2020                .as_ref()
2021                .map(|gps| vec![gps.lat as f32, gps.lon as f32])
2022        });
2023
2024        let pose = sample.location.as_ref().and_then(|loc| {
2025            loc.imu
2026                .as_ref()
2027                .map(|imu| vec![imu.yaw as f32, imu.pitch as f32, imu.roll as f32])
2028        });
2029
2030        let degradation = sample.degradation.clone();
2031
2032        // Timing from the sample (same for all rows of this sample)
2033        let t_load = sample.timing.as_ref().and_then(|t| t.load);
2034        let t_preprocess = sample.timing.as_ref().and_then(|t| t.preprocess);
2035        let t_inference = sample.timing.as_ref().and_then(|t| t.inference);
2036        let t_decode = sample.timing.as_ref().and_then(|t| t.decode);
2037
2038        // Helper to push shared sample-level fields
2039        macro_rules! push_sample_fields {
2040            () => {
2041                sizes.push(size.clone());
2042                locations.push(location.clone());
2043                poses.push(pose.clone());
2044                degradations.push(degradation.clone());
2045                neg_label_indices_vec.push(sample.neg_label_indices.clone());
2046                not_exhaustive_label_indices_vec.push(sample.not_exhaustive_label_indices.clone());
2047                timing_load.push(t_load);
2048                timing_preprocess.push(t_preprocess);
2049                timing_inference.push(t_inference);
2050                timing_decode.push(t_decode);
2051            };
2052        }
2053
2054        if sample.annotations.is_empty() {
2055            // One row for the sample with null annotation fields
2056            let (name, frame) = match extract_annotation_name_from_sample(sample) {
2057                Some(nf) => nf,
2058                None => continue,
2059            };
2060
2061            names.push(name);
2062            frames.push(frame);
2063            objects.push(None);
2064            labels.push(None);
2065            label_indices.push(None);
2066            groups.push(sample.group.clone());
2067            polygons.push(None);
2068            boxes2d.push(None);
2069            boxes3d.push(None);
2070            mask_bytes.push(None);
2071            box2d_scores.push(None);
2072            box3d_scores.push(None);
2073            polygon_scores.push(None);
2074            mask_scores.push(None);
2075            iscrowds.push(None);
2076            category_frequencies.push(None);
2077            push_sample_fields!();
2078        } else {
2079            // One row per annotation
2080            for ann in &sample.annotations {
2081                let (name, frame) = match extract_annotation_name(ann) {
2082                    Some(nf) => nf,
2083                    None => continue,
2084                };
2085
2086                let polygon = ann.polygon.as_ref().map(convert_polygon_to_nested_series);
2087
2088                let box2d = ann
2089                    .box2d
2090                    .as_ref()
2091                    .map(|b| Series::new("box2d".into(), [b.cx(), b.cy(), b.width(), b.height()]));
2092
2093                let box3d = ann
2094                    .box3d
2095                    .as_ref()
2096                    .map(|b| Series::new("box3d".into(), [b.x, b.y, b.z, b.w, b.h, b.l]));
2097
2098                names.push(name);
2099                frames.push(frame);
2100                objects.push(ann.object_id().cloned());
2101                labels.push(ann.label_name.clone());
2102                label_indices.push(ann.label_index);
2103                groups.push(sample.group.clone());
2104                polygons.push(polygon);
2105                boxes2d.push(box2d);
2106                boxes3d.push(box3d);
2107                mask_bytes.push(ann.mask.as_ref().map(|m| m.as_bytes().to_vec()));
2108                box2d_scores.push(ann.box2d_score());
2109                box3d_scores.push(ann.box3d_score());
2110                polygon_scores.push(ann.polygon_score());
2111                mask_scores.push(ann.mask_score());
2112                iscrowds.push(ann.iscrowd);
2113                category_frequencies.push(ann.category_frequency.clone());
2114                push_sample_fields!();
2115            }
2116        }
2117    }
2118
2119    // Build DataFrame columns
2120    let names_col: Column = Series::new("name".into(), names).into();
2121    let frames_col: Column = Series::new("frame".into(), frames).into();
2122    let objects_col: Column = Series::new("object_id".into(), objects).into();
2123
2124    // Column name: "label" (NOT "label_name")
2125    //
2126    // Physical is U16 so taxonomies larger than 255 labels fit (LVIS v1 has
2127    // 1,203 categories). U16 caps at 65,535 — comfortably above any realistic
2128    // object-detection taxonomy — and only costs one extra byte per row vs U8.
2129    let labels_col: Column = Series::new("label".into(), labels)
2130        .cast(&DataType::Categorical(
2131            Categories::new("labels".into(), "labels".into(), CategoricalPhysical::U16),
2132            Arc::new(CategoricalMapping::with_hasher(
2133                u16::MAX as usize,
2134                Default::default(),
2135            )),
2136        ))?
2137        .into();
2138
2139    let label_indices_col: Column = Series::new("label_index".into(), label_indices).into();
2140
2141    // Column name: "group" (NOT "group_name")
2142    let groups_col: Column = Series::new("group".into(), groups)
2143        .cast(&DataType::Categorical(
2144            Categories::new("groups".into(), "groups".into(), CategoricalPhysical::U8),
2145            Arc::new(CategoricalMapping::with_hasher(
2146                u8::MAX as usize,
2147                Default::default(),
2148            )),
2149        ))?
2150        .into();
2151
2152    // Polygon: List(List(Float32)) — nested rings
2153    // Build using ListChunked to avoid Polars dtype mismatch when mixing Some/None entries.
2154    // Series::new() with Vec<Option<Series>> panics when Some entries are list[f32] but None
2155    // entries infer as list[null].
2156    let polygons_col: Column = if polygons.iter().all(|p| p.is_none()) {
2157        // All null — create a null column that the drop rule will remove
2158        Series::new_null("polygon".into(), polygons.len()).into()
2159    } else {
2160        // Build properly typed column: convert each Option<Series> to Option<Series>,
2161        // ensuring None entries don't cause dtype inference issues
2162        let typed_polygons: Vec<Option<Series>> = polygons
2163            .into_iter()
2164            .map(|opt| {
2165                opt.map(|s| {
2166                    s.cast(&DataType::List(Box::new(DataType::Float32)))
2167                        .unwrap_or(s)
2168                })
2169            })
2170            .collect();
2171        Series::new("polygon".into(), &typed_polygons)
2172            .cast(&DataType::List(Box::new(DataType::List(Box::new(
2173                DataType::Float32,
2174            )))))?
2175            .into()
2176    };
2177
2178    let boxes2d_col: Column = Series::new("box2d".into(), boxes2d)
2179        .cast(&DataType::Array(Box::new(DataType::Float32), 4))?
2180        .into();
2181    let boxes3d_col: Column = Series::new("box3d".into(), boxes3d)
2182        .cast(&DataType::Array(Box::new(DataType::Float32), 6))?
2183        .into();
2184
2185    // Mask: Binary (raw PNG bytes)
2186    let mask_col: Column = Series::new("mask".into(), mask_bytes).into();
2187
2188    // Score columns: Float32
2189    let box2d_score_col: Column = Series::new("box2d_score".into(), box2d_scores).into();
2190    let box3d_score_col: Column = Series::new("box3d_score".into(), box3d_scores).into();
2191    let polygon_score_col: Column = Series::new("polygon_score".into(), polygon_scores).into();
2192    let mask_score_col: Column = Series::new("mask_score".into(), mask_scores).into();
2193
2194    // Optional metadata columns (2025.10)
2195    let size_series: Vec<Option<Series>> = sizes
2196        .into_iter()
2197        .map(|opt_vec| opt_vec.map(|vec| Series::new("size".into(), vec)))
2198        .collect();
2199    let sizes_col: Column = Series::new("size".into(), size_series)
2200        .cast(&DataType::Array(Box::new(DataType::UInt32), 2))?
2201        .into();
2202
2203    let location_series: Vec<Option<Series>> = locations
2204        .into_iter()
2205        .map(|opt_vec| opt_vec.map(|vec| Series::new("location".into(), vec)))
2206        .collect();
2207    let locations_col: Column = Series::new("location".into(), location_series)
2208        .cast(&DataType::Array(Box::new(DataType::Float32), 2))?
2209        .into();
2210
2211    let pose_series: Vec<Option<Series>> = poses
2212        .into_iter()
2213        .map(|opt_vec| opt_vec.map(|vec| Series::new("pose".into(), vec)))
2214        .collect();
2215    let poses_col: Column = Series::new("pose".into(), pose_series)
2216        .cast(&DataType::Array(Box::new(DataType::Float32), 3))?
2217        .into();
2218
2219    let degradations_col: Column = Series::new("degradation".into(), degradations).into();
2220
2221    // LVIS extension columns
2222    let iscrowds_col: Column = Series::new("iscrowd".into(), iscrowds).into();
2223
2224    let category_frequencies_col: Column =
2225        Series::new("category_frequency".into(), category_frequencies)
2226            .cast(&DataType::Categorical(
2227                Categories::new(
2228                    "cat_freq".into(),
2229                    "cat_freq".into(),
2230                    CategoricalPhysical::U8,
2231                ),
2232                Arc::new(CategoricalMapping::with_hasher(
2233                    u8::MAX as usize,
2234                    Default::default(),
2235                )),
2236            ))?
2237            .into();
2238
2239    let neg_label_indices_series: Vec<Option<Series>> = neg_label_indices_vec
2240        .into_iter()
2241        .map(|opt_vec| opt_vec.map(|vec| Series::new("neg_label_indices".into(), vec)))
2242        .collect();
2243    let neg_label_indices_col: Column =
2244        Series::new("neg_label_indices".into(), neg_label_indices_series)
2245            .cast(&DataType::List(Box::new(DataType::UInt32)))?
2246            .into();
2247
2248    let not_exhaustive_label_indices_series: Vec<Option<Series>> = not_exhaustive_label_indices_vec
2249        .into_iter()
2250        .map(|opt_vec| opt_vec.map(|vec| Series::new("not_exhaustive_label_indices".into(), vec)))
2251        .collect();
2252    let not_exhaustive_label_indices_col: Column = Series::new(
2253        "not_exhaustive_label_indices".into(),
2254        not_exhaustive_label_indices_series,
2255    )
2256    .cast(&DataType::List(Box::new(DataType::UInt32)))?
2257    .into();
2258
2259    // Timing: Struct{load, preprocess, inference, decode} of Int64
2260    let timing_col: Column = StructChunked::from_series(
2261        "timing".into(),
2262        frames_col.len(),
2263        [
2264            Series::new("load".into(), &timing_load),
2265            Series::new("preprocess".into(), &timing_preprocess),
2266            Series::new("inference".into(), &timing_inference),
2267            Series::new("decode".into(), &timing_decode),
2268        ]
2269        .iter(),
2270    )?
2271    .into_series()
2272    .into();
2273
2274    // Collect all columns, then drop any where ALL values are null (except "name")
2275    let all_columns: Vec<Column> = vec![
2276        names_col,
2277        frames_col,
2278        objects_col,
2279        labels_col,
2280        label_indices_col,
2281        groups_col,
2282        polygons_col,
2283        boxes2d_col,
2284        boxes3d_col,
2285        mask_col,
2286        box2d_score_col,
2287        box3d_score_col,
2288        polygon_score_col,
2289        mask_score_col,
2290        sizes_col,
2291        locations_col,
2292        poses_col,
2293        degradations_col,
2294        iscrowds_col,
2295        category_frequencies_col,
2296        neg_label_indices_col,
2297        not_exhaustive_label_indices_col,
2298        timing_col,
2299    ];
2300
2301    let height = all_columns.first().map(|c| c.len()).unwrap_or(0);
2302
2303    let non_empty_columns: Vec<Column> = all_columns
2304        .into_iter()
2305        .filter(|col| col.name() == "name" || !is_all_null_column(col))
2306        .collect();
2307
2308    Ok(DataFrame::new(height, non_empty_columns)?)
2309}
2310
2311/// Returns `true` when every value in the column is null. For `Struct`
2312/// columns the check recurses into inner fields — the struct is considered
2313/// all-null when **all** of its fields are individually all-null.
2314#[cfg(feature = "polars")]
2315fn is_all_null_column(col: &Column) -> bool {
2316    if col.is_empty() {
2317        return true;
2318    }
2319    if col.null_count() == col.len() {
2320        return true;
2321    }
2322    // Struct columns may have non-null outer rows but all-null inner fields
2323    if let DataType::Struct(..) = col.dtype()
2324        && let Ok(s) = col.as_materialized_series().struct_()
2325    {
2326        return s
2327            .fields_as_series()
2328            .iter()
2329            .all(|field| field.null_count() == field.len());
2330    }
2331    false
2332}
2333
2334// Helper: Extract name/frame from Sample (for samples with no annotations)
2335#[cfg(feature = "polars")]
2336fn extract_annotation_name_from_sample(sample: &Sample) -> Option<(String, Option<u32>)> {
2337    use std::path::Path;
2338
2339    let name = sample.image_name.as_ref()?;
2340    let name = Path::new(name).file_stem()?.to_str()?;
2341
2342    // For sequences, return base name and frame number
2343    // For non-sequences, return name and None
2344    match &sample.sequence_name {
2345        Some(sequence) => Some((sequence.clone(), sample.frame_number)),
2346        None => Some((name.to_string(), None)),
2347    }
2348}
2349
2350// ============================================================================
2351// PURE FUNCTIONS FOR TESTABLE CORE LOGIC
2352// ============================================================================
2353
2354/// Extract sample name from image filename by:
2355/// 1. Removing file extension (everything after last dot)
2356/// 2. Removing .camera suffix if present
2357///
2358/// # Examples
2359/// - "scene_001.camera.jpg" → "scene_001"
2360/// - "image.jpg" → "image"
2361/// - ".jpg" → ".jpg" (preserves filenames starting with dot)
2362fn extract_sample_name(image_name: &str) -> String {
2363    // Step 1: Remove file extension (but preserve filenames starting with dot)
2364    let name = image_name
2365        .rsplit_once('.')
2366        .and_then(|(name, _)| {
2367            // Only remove extension if the name part is non-empty (handles ".jpg" case)
2368            if name.is_empty() {
2369                None
2370            } else {
2371                Some(name.to_string())
2372            }
2373        })
2374        .unwrap_or_else(|| image_name.to_string());
2375
2376    // Step 2: Remove .camera suffix if present
2377    name.rsplit_once(".camera")
2378        .and_then(|(name, _)| {
2379            // Only remove .camera if the name part is non-empty
2380            if name.is_empty() {
2381                None
2382            } else {
2383                Some(name.to_string())
2384            }
2385        })
2386        .unwrap_or_else(|| name.clone())
2387}
2388
2389/// Resolve a file for a given file type from sample data.
2390///
2391/// Returns the matching `SampleFile` if found, which may contain either
2392/// a URL (newer datasets) or inline data (legacy datasets).
2393///
2394/// # Arguments
2395/// * `file_type` - The type of file to resolve (e.g., LidarPcd, RadarPcd)
2396/// * `files` - The sample's file list
2397fn resolve_file<'a>(file_type: &FileType, files: &'a [SampleFile]) -> Option<&'a SampleFile> {
2398    match file_type {
2399        FileType::Image => None, // Image uses image_url field, not files
2400        FileType::All => None,   // All should be expanded before calling this
2401        file => {
2402            // Get all possible names for this file type (primary + aliases)
2403            let type_names = file_type_names(file);
2404            files
2405                .iter()
2406                .find(|f| type_names.contains(&f.r#type.as_str()))
2407        }
2408    }
2409}
2410
2411/// Returns all possible server-side names for a file type.
2412/// The server uses specific naming conventions in the STUDIO_DB_TYPE_MAP.
2413fn file_type_names(file_type: &FileType) -> Vec<&'static str> {
2414    match file_type {
2415        FileType::Image => vec!["image"],
2416        FileType::LidarPcd => vec!["lidar.pcd"],
2417        FileType::LidarDepth => vec!["lidar.depth", "depth.png", "depthmap"],
2418        FileType::LidarReflect => vec!["lidar.reflect"],
2419        FileType::RadarPcd => vec!["radar.pcd", "pcd"],
2420        FileType::RadarCube => vec!["radar.png", "cube"],
2421        FileType::All => vec![],
2422    }
2423}
2424
2425// ============================================================================
2426// DESERIALIZATION FORMAT CONVERSION HELPERS
2427// ============================================================================
2428
2429/// Convert annotations grouped format to flat Vec<Annotation>.
2430///
2431/// Pure function that handles the conversion from the server's legacy format
2432/// (HashMap<String, Vec<Annotation>>) to the flat Vec<Annotation>
2433/// representation.
2434///
2435/// # Arguments
2436/// * `map` - HashMap where keys are annotation types ("bbox", "box3d", "mask")
2437fn convert_annotations_map_to_vec(map: HashMap<String, Vec<Annotation>>) -> Vec<Annotation> {
2438    let mut all_annotations = Vec::new();
2439    if let Some(bbox_anns) = map.get("bbox") {
2440        all_annotations.extend(bbox_anns.clone());
2441    }
2442    if let Some(box3d_anns) = map.get("box3d") {
2443        all_annotations.extend(box3d_anns.clone());
2444    }
2445    if let Some(mask_anns) = map.get("mask") {
2446        all_annotations.extend(mask_anns.clone());
2447    }
2448    all_annotations
2449}
2450
2451// ============================================================================
2452// GPS/IMU VALIDATION HELPERS
2453// ============================================================================
2454
2455/// Validate GPS coordinates are within valid ranges.
2456///
2457/// Pure function that checks if latitude and longitude values are within valid
2458/// geographic ranges. Helps catch data corruption or API issues early.
2459///
2460/// # Arguments
2461/// * `lat` - Latitude in degrees
2462/// * `lon` - Longitude in degrees
2463///
2464/// # Returns
2465/// `Ok(())` if valid, `Err(String)` with descriptive error message otherwise
2466///
2467/// # Valid Ranges
2468/// - Latitude: -90.0 to +90.0 degrees
2469/// - Longitude: -180.0 to +180.0 degrees
2470fn validate_gps_coordinates(lat: f64, lon: f64) -> Result<(), String> {
2471    if !lat.is_finite() {
2472        return Err(format!("GPS latitude is not finite: {}", lat));
2473    }
2474    if !lon.is_finite() {
2475        return Err(format!("GPS longitude is not finite: {}", lon));
2476    }
2477    if !(-90.0..=90.0).contains(&lat) {
2478        return Err(format!("GPS latitude out of range [-90, 90]: {}", lat));
2479    }
2480    if !(-180.0..=180.0).contains(&lon) {
2481        return Err(format!("GPS longitude out of range [-180, 180]: {}", lon));
2482    }
2483    Ok(())
2484}
2485
2486/// Validate IMU orientation angles are within valid ranges.
2487///
2488/// Pure function that checks if roll, pitch, and yaw values are finite and
2489/// within reasonable ranges. Helps catch data corruption or sensor errors
2490/// early.
2491///
2492/// # Arguments
2493/// * `roll` - Roll angle in degrees
2494/// * `pitch` - Pitch angle in degrees
2495/// * `yaw` - Yaw angle in degrees
2496///
2497/// # Returns
2498/// `Ok(())` if valid, `Err(String)` with descriptive error message otherwise
2499///
2500/// # Valid Ranges
2501/// - Roll: -180.0 to +180.0 degrees
2502/// - Pitch: -90.0 to +90.0 degrees (typical gimbal lock range)
2503/// - Yaw: -180.0 to +180.0 degrees (or 0 to 360, normalized)
2504fn validate_imu_orientation(roll: f64, pitch: f64, yaw: f64) -> Result<(), String> {
2505    if !roll.is_finite() {
2506        return Err(format!("IMU roll is not finite: {}", roll));
2507    }
2508    if !pitch.is_finite() {
2509        return Err(format!("IMU pitch is not finite: {}", pitch));
2510    }
2511    if !yaw.is_finite() {
2512        return Err(format!("IMU yaw is not finite: {}", yaw));
2513    }
2514    if !(-180.0..=180.0).contains(&roll) {
2515        return Err(format!("IMU roll out of range [-180, 180]: {}", roll));
2516    }
2517    if !(-90.0..=90.0).contains(&pitch) {
2518        return Err(format!("IMU pitch out of range [-90, 90]: {}", pitch));
2519    }
2520    if !(-180.0..=180.0).contains(&yaw) {
2521        return Err(format!("IMU yaw out of range [-180, 180]: {}", yaw));
2522    }
2523    Ok(())
2524}
2525
2526// ============================================================================
2527// MASK POLYGON CONVERSION HELPERS
2528// ============================================================================
2529
2530/// Unflatten coordinates with NaN separators back to nested polygon
2531/// structure.
2532///
2533/// Converts flat list of coordinates with NaN separators back to nested
2534/// polygon structure:
2535/// - Input: [x1, y1, x2, y2, NaN, x3, y3]
2536/// - Output: [[(x1, y1), (x2, y2)], [(x3, y3)]]
2537///
2538/// This function is used when parsing Arrow files to reconstruct the nested
2539/// polygon format required by the EdgeFirst Studio API.
2540///
2541/// # Examples
2542///
2543/// ```rust
2544/// use edgefirst_client::unflatten_polygon_coordinates;
2545///
2546/// let coords = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0];
2547/// let polygons = unflatten_polygon_coordinates(&coords);
2548///
2549/// assert_eq!(polygons.len(), 2);
2550/// assert_eq!(polygons[0], vec![(1.0, 2.0), (3.0, 4.0)]);
2551/// assert_eq!(polygons[1], vec![(5.0, 6.0)]);
2552/// ```
2553#[cfg(feature = "polars")]
2554pub fn unflatten_polygon_coordinates(coords: &[f32]) -> Vec<Vec<(f32, f32)>> {
2555    let mut polygons = Vec::new();
2556    let mut current_polygon = Vec::new();
2557    let mut i = 0;
2558
2559    while i < coords.len() {
2560        if coords[i].is_nan() {
2561            // NaN separator - save current polygon and start new one
2562            if !current_polygon.is_empty() {
2563                polygons.push(std::mem::take(&mut current_polygon));
2564            }
2565            i += 1;
2566        } else if i + 1 < coords.len() && !coords[i + 1].is_nan() {
2567            // Have both x and y coordinates (neither is NaN)
2568            current_polygon.push((coords[i], coords[i + 1]));
2569            i += 2;
2570        } else if i + 1 < coords.len() && coords[i + 1].is_nan() {
2571            // x is valid but y is NaN - malformed data; skip x, process NaN on
2572            // next iteration
2573            i += 1;
2574        } else {
2575            // Odd trailing value - skip
2576            i += 1;
2577        }
2578    }
2579
2580    // Save the last polygon if not empty
2581    if !current_polygon.is_empty() {
2582        polygons.push(current_polygon);
2583    }
2584
2585    polygons
2586}
2587
2588#[cfg(test)]
2589mod tests {
2590    use super::*;
2591
2592    // ============================================================================
2593    // TEST HELPER FUNCTIONS (Pure Logic for Testing)
2594    // ============================================================================
2595
2596    /// Flatten legacy grouped annotation format to a single vector.
2597    ///
2598    /// Converts HashMap<String, Vec<Annotation>> (with bbox/box3d/mask keys)
2599    /// into a flat Vec<Annotation> in deterministic order.
2600    fn flatten_annotation_map(
2601        map: std::collections::HashMap<String, Vec<Annotation>>,
2602    ) -> Vec<Annotation> {
2603        let mut all_annotations = Vec::new();
2604
2605        // Process in fixed order for deterministic results
2606        for key in ["bbox", "box3d", "mask"] {
2607            if let Some(mut anns) = map.get(key).cloned() {
2608                all_annotations.append(&mut anns);
2609            }
2610        }
2611
2612        all_annotations
2613    }
2614
2615    /// Get the JSON field name for the Annotation group field (for tests).
2616    fn annotation_group_field_name() -> &'static str {
2617        "group_name"
2618    }
2619
2620    /// Get the JSON field name for the Annotation object_id field (for tests).
2621    fn annotation_object_id_field_name() -> &'static str {
2622        "object_reference"
2623    }
2624
2625    /// Get the accepted alias for the Annotation object_id field (for tests).
2626    fn annotation_object_id_alias() -> &'static str {
2627        "object_id"
2628    }
2629
2630    /// Validate that annotation field names match expected values in JSON (for
2631    /// tests).
2632    fn validate_annotation_field_names(
2633        json_str: &str,
2634        expected_group: bool,
2635        expected_object_ref: bool,
2636    ) -> Result<(), String> {
2637        if expected_group && !json_str.contains("\"group_name\"") {
2638            return Err("Missing expected field: group_name".to_string());
2639        }
2640        if expected_object_ref && !json_str.contains("\"object_reference\"") {
2641            return Err("Missing expected field: object_reference".to_string());
2642        }
2643        Ok(())
2644    }
2645
2646    // ==== FileType Conversion Tests ====
2647    #[test]
2648    fn test_file_type_conversions() {
2649        // to_string() returns server API type names
2650        let api_cases = vec![
2651            (FileType::Image, "image"),
2652            (FileType::LidarPcd, "lidar.pcd"),
2653            (FileType::LidarDepth, "lidar.depth"),
2654            (FileType::LidarReflect, "lidar.reflect"),
2655            (FileType::RadarPcd, "radar.pcd"),
2656            (FileType::RadarCube, "radar.png"),
2657        ];
2658
2659        // file_extension() returns file extensions for saving
2660        let ext_cases = vec![
2661            (FileType::Image, "jpg"),
2662            (FileType::LidarPcd, "lidar.pcd"),
2663            (FileType::LidarDepth, "lidar.png"),
2664            (FileType::LidarReflect, "lidar.jpg"),
2665            (FileType::RadarPcd, "radar.pcd"),
2666            (FileType::RadarCube, "radar.png"),
2667        ];
2668
2669        // Test: Display → to_string() returns server API names
2670        for (file_type, expected_str) in &api_cases {
2671            assert_eq!(file_type.to_string(), *expected_str);
2672        }
2673
2674        // Test: file_extension() returns correct extensions
2675        for (file_type, expected_ext) in &ext_cases {
2676            assert_eq!(file_type.file_extension(), *expected_ext);
2677        }
2678
2679        // Test: try_from() string parsing (accepts multiple aliases)
2680        assert_eq!(
2681            FileType::try_from("lidar.depth").unwrap(),
2682            FileType::LidarDepth
2683        );
2684        assert_eq!(
2685            FileType::try_from("lidar.png").unwrap(),
2686            FileType::LidarDepth
2687        );
2688        assert_eq!(
2689            FileType::try_from("depth.png").unwrap(),
2690            FileType::LidarDepth
2691        );
2692        assert_eq!(
2693            FileType::try_from("lidar.reflect").unwrap(),
2694            FileType::LidarReflect
2695        );
2696        assert_eq!(
2697            FileType::try_from("lidar.jpg").unwrap(),
2698            FileType::LidarReflect
2699        );
2700        assert_eq!(
2701            FileType::try_from("lidar.jpeg").unwrap(),
2702            FileType::LidarReflect
2703        );
2704
2705        // Test: Invalid input
2706        assert!(FileType::try_from("invalid").is_err());
2707
2708        // Test: Round-trip (Display → try_from)
2709        for (file_type, _) in &api_cases {
2710            let s = file_type.to_string();
2711            let parsed = FileType::try_from(s.as_str()).unwrap();
2712            assert_eq!(parsed, *file_type);
2713        }
2714    }
2715
2716    // ==== AnnotationType Conversion Tests ====
2717    #[test]
2718    fn test_annotation_type_conversions() {
2719        let cases = vec![
2720            (AnnotationType::Box2d, "box2d"),
2721            (AnnotationType::Box3d, "box3d"),
2722            (AnnotationType::Polygon, "polygon"),
2723            (AnnotationType::Mask, "mask"),
2724        ];
2725
2726        // Test: Display → to_string()
2727        for (ann_type, expected_str) in &cases {
2728            assert_eq!(ann_type.to_string(), *expected_str);
2729        }
2730
2731        // Test: try_from() string parsing
2732        assert_eq!(
2733            AnnotationType::try_from("box2d").unwrap(),
2734            AnnotationType::Box2d
2735        );
2736        assert_eq!(
2737            AnnotationType::try_from("box3d").unwrap(),
2738            AnnotationType::Box3d
2739        );
2740        assert_eq!(
2741            AnnotationType::try_from("polygon").unwrap(),
2742            AnnotationType::Polygon
2743        );
2744        // "mask" maps to Polygon for backward compat
2745        assert_eq!(
2746            AnnotationType::try_from("mask").unwrap(),
2747            AnnotationType::Polygon
2748        );
2749        // "raster" maps to Mask
2750        assert_eq!(
2751            AnnotationType::try_from("raster").unwrap(),
2752            AnnotationType::Mask
2753        );
2754
2755        // Test: From<String> (backward compatibility)
2756        assert_eq!(
2757            AnnotationType::from("box2d".to_string()),
2758            AnnotationType::Box2d
2759        );
2760        assert_eq!(
2761            AnnotationType::from("box3d".to_string()),
2762            AnnotationType::Box3d
2763        );
2764        assert_eq!(
2765            AnnotationType::from("polygon".to_string()),
2766            AnnotationType::Polygon
2767        );
2768        // "mask" string maps to Polygon for backward compat
2769        assert_eq!(
2770            AnnotationType::from("mask".to_string()),
2771            AnnotationType::Polygon
2772        );
2773
2774        // Invalid defaults to Box2d for backward compatibility
2775        assert_eq!(
2776            AnnotationType::from("invalid".to_string()),
2777            AnnotationType::Box2d
2778        );
2779
2780        // Test: Invalid input
2781        assert!(AnnotationType::try_from("invalid").is_err());
2782
2783        // Test: Round-trip (Display → try_from)
2784        // Note: Polygon round-trips ("polygon" → Polygon), but Mask does not
2785        // because "mask" → Polygon (backward compat). Mask displays as "mask"
2786        // but parses to Polygon.
2787        assert_eq!(
2788            AnnotationType::try_from(AnnotationType::Box2d.to_string().as_str()).unwrap(),
2789            AnnotationType::Box2d
2790        );
2791        assert_eq!(
2792            AnnotationType::try_from(AnnotationType::Box3d.to_string().as_str()).unwrap(),
2793            AnnotationType::Box3d
2794        );
2795        assert_eq!(
2796            AnnotationType::try_from(AnnotationType::Polygon.to_string().as_str()).unwrap(),
2797            AnnotationType::Polygon
2798        );
2799    }
2800
2801    // ==== Pure Function: extract_sample_name Tests ====
2802    #[test]
2803    fn test_extract_sample_name_with_extension_and_camera() {
2804        assert_eq!(extract_sample_name("scene_001.camera.jpg"), "scene_001");
2805    }
2806
2807    #[test]
2808    fn test_extract_sample_name_multiple_dots() {
2809        assert_eq!(extract_sample_name("image.v2.camera.png"), "image.v2");
2810    }
2811
2812    #[test]
2813    fn test_extract_sample_name_extension_only() {
2814        assert_eq!(extract_sample_name("test.jpg"), "test");
2815    }
2816
2817    #[test]
2818    fn test_extract_sample_name_no_extension() {
2819        assert_eq!(extract_sample_name("test"), "test");
2820    }
2821
2822    #[test]
2823    fn test_extract_sample_name_edge_case_dot_prefix() {
2824        assert_eq!(extract_sample_name(".jpg"), ".jpg");
2825    }
2826
2827    // ==== File Resolution Tests ====
2828    #[test]
2829    fn test_resolve_file_image_type_returns_none() {
2830        // Image type uses image_url field, not files array
2831        let files = vec![];
2832        let result = resolve_file(&FileType::Image, &files);
2833        assert!(result.is_none());
2834    }
2835
2836    #[test]
2837    fn test_resolve_file_lidar_pcd() {
2838        let files = vec![
2839            SampleFile::with_url(
2840                "lidar.pcd".to_string(),
2841                "https://example.com/file.pcd".to_string(),
2842            ),
2843            SampleFile::with_url(
2844                "radar.pcd".to_string(),
2845                "https://example.com/radar.pcd".to_string(),
2846            ),
2847        ];
2848        let result = resolve_file(&FileType::LidarPcd, &files);
2849        assert!(result.is_some());
2850        assert_eq!(result.unwrap().url(), Some("https://example.com/file.pcd"));
2851    }
2852
2853    #[test]
2854    fn test_resolve_file_not_found() {
2855        let files = vec![SampleFile::with_url(
2856            "lidar.pcd".to_string(),
2857            "https://example.com/file.pcd".to_string(),
2858        )];
2859        // Requesting radar.pcd which doesn't exist in files
2860        let result = resolve_file(&FileType::RadarPcd, &files);
2861        assert!(result.is_none());
2862    }
2863
2864    #[test]
2865    fn test_resolve_file_lidar_depth() {
2866        // Server returns "lidar.depth" for LiDAR depth data
2867        let files = vec![SampleFile::with_url(
2868            "lidar.depth".to_string(),
2869            "https://example.com/depth.png".to_string(),
2870        )];
2871        let result = resolve_file(&FileType::LidarDepth, &files);
2872        assert!(result.is_some());
2873        assert_eq!(result.unwrap().url(), Some("https://example.com/depth.png"));
2874    }
2875
2876    #[test]
2877    fn test_resolve_file_lidar_reflect() {
2878        // Server returns "lidar.reflect" for LiDAR reflectance data
2879        let files = vec![SampleFile::with_url(
2880            "lidar.reflect".to_string(),
2881            "https://example.com/reflect.png".to_string(),
2882        )];
2883        let result = resolve_file(&FileType::LidarReflect, &files);
2884        assert!(result.is_some());
2885        assert_eq!(
2886            result.unwrap().url(),
2887            Some("https://example.com/reflect.png")
2888        );
2889    }
2890
2891    #[test]
2892    fn test_resolve_file_radar_cube() {
2893        // Server returns "radar.png" or "cube" for radar cube data
2894        let files = vec![SampleFile::with_url(
2895            "radar.png".to_string(),
2896            "https://example.com/radar.png".to_string(),
2897        )];
2898        let result = resolve_file(&FileType::RadarCube, &files);
2899        assert!(result.is_some());
2900        assert_eq!(result.unwrap().url(), Some("https://example.com/radar.png"));
2901    }
2902
2903    #[test]
2904    fn test_resolve_file_with_inline_data() {
2905        // Legacy datasets may have inline data instead of URLs
2906        let files = vec![SampleFile::with_data(
2907            "radar.pcd".to_string(),
2908            "SGVsbG8gV29ybGQ=".to_string(), // base64 "Hello World"
2909        )];
2910        let result = resolve_file(&FileType::RadarPcd, &files);
2911        assert!(result.is_some());
2912        let file = result.unwrap();
2913        assert!(file.url().is_none());
2914        assert_eq!(file.data(), Some("SGVsbG8gV29ybGQ="));
2915    }
2916
2917    #[test]
2918    fn test_convert_annotations_map_to_vec_with_bbox() {
2919        let mut map = HashMap::new();
2920        let bbox_ann = Annotation::new();
2921        map.insert("bbox".to_string(), vec![bbox_ann.clone()]);
2922
2923        let annotations = convert_annotations_map_to_vec(map);
2924        assert_eq!(annotations.len(), 1);
2925    }
2926
2927    #[test]
2928    fn test_convert_annotations_map_to_vec_all_types() {
2929        let mut map = HashMap::new();
2930        map.insert("bbox".to_string(), vec![Annotation::new()]);
2931        map.insert("box3d".to_string(), vec![Annotation::new()]);
2932        map.insert("mask".to_string(), vec![Annotation::new()]);
2933
2934        let annotations = convert_annotations_map_to_vec(map);
2935        assert_eq!(annotations.len(), 3);
2936    }
2937
2938    #[test]
2939    fn test_convert_annotations_map_to_vec_empty() {
2940        let map = HashMap::new();
2941        let annotations = convert_annotations_map_to_vec(map);
2942        assert_eq!(annotations.len(), 0);
2943    }
2944
2945    #[test]
2946    fn test_convert_annotations_map_to_vec_unknown_type_ignored() {
2947        let mut map = HashMap::new();
2948        map.insert("unknown".to_string(), vec![Annotation::new()]);
2949
2950        let annotations = convert_annotations_map_to_vec(map);
2951        // Unknown types are ignored
2952        assert_eq!(annotations.len(), 0);
2953    }
2954
2955    // ==== Annotation Field Mapping Tests ====
2956    #[test]
2957    fn test_annotation_group_field_name() {
2958        assert_eq!(annotation_group_field_name(), "group_name");
2959    }
2960
2961    #[test]
2962    fn test_annotation_object_id_field_name() {
2963        assert_eq!(annotation_object_id_field_name(), "object_reference");
2964    }
2965
2966    #[test]
2967    fn test_annotation_object_id_alias() {
2968        assert_eq!(annotation_object_id_alias(), "object_id");
2969    }
2970
2971    #[test]
2972    fn test_validate_annotation_field_names_success() {
2973        let json = r#"{"group_name":"train","object_reference":"obj1"}"#;
2974        assert!(validate_annotation_field_names(json, true, true).is_ok());
2975    }
2976
2977    #[test]
2978    fn test_validate_annotation_field_names_missing_group() {
2979        let json = r#"{"object_reference":"obj1"}"#;
2980        let result = validate_annotation_field_names(json, true, false);
2981        assert!(result.is_err());
2982        assert!(result.unwrap_err().contains("group_name"));
2983    }
2984
2985    #[test]
2986    fn test_validate_annotation_field_names_missing_object_ref() {
2987        let json = r#"{"group_name":"train"}"#;
2988        let result = validate_annotation_field_names(json, false, true);
2989        assert!(result.is_err());
2990        assert!(result.unwrap_err().contains("object_reference"));
2991    }
2992
2993    #[test]
2994    fn test_annotation_serialization_field_names() {
2995        // Test that Annotation serializes with correct field names
2996        let mut ann = Annotation::new();
2997        ann.set_group(Some("train".to_string()));
2998        ann.set_object_id(Some("obj1".to_string()));
2999
3000        let json = serde_json::to_string(&ann).unwrap();
3001        // Verify JSON contains correct field names
3002        assert!(validate_annotation_field_names(&json, true, true).is_ok());
3003    }
3004
3005    // ==== GPS/IMU Validation Tests ====
3006    #[test]
3007    fn test_validate_gps_coordinates_valid() {
3008        assert!(validate_gps_coordinates(37.7749, -122.4194).is_ok()); // San Francisco
3009        assert!(validate_gps_coordinates(0.0, 0.0).is_ok()); // Null Island
3010        assert!(validate_gps_coordinates(90.0, 180.0).is_ok()); // Edge cases
3011        assert!(validate_gps_coordinates(-90.0, -180.0).is_ok()); // Edge cases
3012    }
3013
3014    #[test]
3015    fn test_validate_gps_coordinates_invalid_latitude() {
3016        let result = validate_gps_coordinates(91.0, 0.0);
3017        assert!(result.is_err());
3018        assert!(result.unwrap_err().contains("latitude out of range"));
3019
3020        let result = validate_gps_coordinates(-91.0, 0.0);
3021        assert!(result.is_err());
3022        assert!(result.unwrap_err().contains("latitude out of range"));
3023    }
3024
3025    #[test]
3026    fn test_validate_gps_coordinates_invalid_longitude() {
3027        let result = validate_gps_coordinates(0.0, 181.0);
3028        assert!(result.is_err());
3029        assert!(result.unwrap_err().contains("longitude out of range"));
3030
3031        let result = validate_gps_coordinates(0.0, -181.0);
3032        assert!(result.is_err());
3033        assert!(result.unwrap_err().contains("longitude out of range"));
3034    }
3035
3036    #[test]
3037    fn test_validate_gps_coordinates_non_finite() {
3038        let result = validate_gps_coordinates(f64::NAN, 0.0);
3039        assert!(result.is_err());
3040        assert!(result.unwrap_err().contains("not finite"));
3041
3042        let result = validate_gps_coordinates(0.0, f64::INFINITY);
3043        assert!(result.is_err());
3044        assert!(result.unwrap_err().contains("not finite"));
3045    }
3046
3047    #[test]
3048    fn test_validate_imu_orientation_valid() {
3049        assert!(validate_imu_orientation(0.0, 0.0, 0.0).is_ok());
3050        assert!(validate_imu_orientation(45.0, 30.0, 90.0).is_ok());
3051        assert!(validate_imu_orientation(180.0, 90.0, -180.0).is_ok()); // Edge cases
3052        assert!(validate_imu_orientation(-180.0, -90.0, 180.0).is_ok()); // Edge cases
3053    }
3054
3055    #[test]
3056    fn test_validate_imu_orientation_invalid_roll() {
3057        let result = validate_imu_orientation(181.0, 0.0, 0.0);
3058        assert!(result.is_err());
3059        assert!(result.unwrap_err().contains("roll out of range"));
3060
3061        let result = validate_imu_orientation(-181.0, 0.0, 0.0);
3062        assert!(result.is_err());
3063    }
3064
3065    #[test]
3066    fn test_validate_imu_orientation_invalid_pitch() {
3067        let result = validate_imu_orientation(0.0, 91.0, 0.0);
3068        assert!(result.is_err());
3069        assert!(result.unwrap_err().contains("pitch out of range"));
3070
3071        let result = validate_imu_orientation(0.0, -91.0, 0.0);
3072        assert!(result.is_err());
3073    }
3074
3075    #[test]
3076    fn test_validate_imu_orientation_non_finite() {
3077        let result = validate_imu_orientation(f64::NAN, 0.0, 0.0);
3078        assert!(result.is_err());
3079        assert!(result.unwrap_err().contains("not finite"));
3080
3081        let result = validate_imu_orientation(0.0, f64::INFINITY, 0.0);
3082        assert!(result.is_err());
3083
3084        let result = validate_imu_orientation(0.0, 0.0, f64::NEG_INFINITY);
3085        assert!(result.is_err());
3086    }
3087
3088    // ==== Polygon Unflattening Tests ====
3089    #[test]
3090    #[cfg(feature = "polars")]
3091    fn test_unflatten_polygon_coordinates_single_polygon() {
3092        let coords = vec![1.0, 2.0, 3.0, 4.0];
3093        let result = unflatten_polygon_coordinates(&coords);
3094
3095        assert_eq!(result.len(), 1);
3096        assert_eq!(result[0].len(), 2);
3097        assert_eq!(result[0][0], (1.0, 2.0));
3098        assert_eq!(result[0][1], (3.0, 4.0));
3099    }
3100
3101    #[test]
3102    #[cfg(feature = "polars")]
3103    fn test_unflatten_polygon_coordinates_multiple_polygons() {
3104        let coords = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0, 7.0, 8.0];
3105        let result = unflatten_polygon_coordinates(&coords);
3106
3107        assert_eq!(result.len(), 2);
3108        assert_eq!(result[0].len(), 2);
3109        assert_eq!(result[0][0], (1.0, 2.0));
3110        assert_eq!(result[0][1], (3.0, 4.0));
3111        assert_eq!(result[1].len(), 2);
3112        assert_eq!(result[1][0], (5.0, 6.0));
3113        assert_eq!(result[1][1], (7.0, 8.0));
3114    }
3115
3116    #[test]
3117    #[cfg(feature = "polars")]
3118    fn test_unflatten_polygon_coordinates_roundtrip() {
3119        // Test that unflatten correctly reconstructs from NaN-separated flat coords
3120        let flat = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0, 7.0, 8.0];
3121        let result = unflatten_polygon_coordinates(&flat);
3122
3123        let expected = vec![vec![(1.0, 2.0), (3.0, 4.0)], vec![(5.0, 6.0), (7.0, 8.0)]];
3124        assert_eq!(result, expected);
3125    }
3126
3127    // ==== Annotation Format Flattening Tests ====
3128    #[test]
3129    fn test_flatten_annotation_map_all_types() {
3130        use std::collections::HashMap;
3131
3132        let mut map = HashMap::new();
3133
3134        // Create test annotations
3135        let mut bbox_ann = Annotation::new();
3136        bbox_ann.set_label(Some("bbox_label".to_string()));
3137
3138        let mut box3d_ann = Annotation::new();
3139        box3d_ann.set_label(Some("box3d_label".to_string()));
3140
3141        let mut mask_ann = Annotation::new();
3142        mask_ann.set_label(Some("mask_label".to_string()));
3143
3144        map.insert("bbox".to_string(), vec![bbox_ann.clone()]);
3145        map.insert("box3d".to_string(), vec![box3d_ann.clone()]);
3146        map.insert("mask".to_string(), vec![mask_ann.clone()]);
3147
3148        let result = flatten_annotation_map(map);
3149
3150        assert_eq!(result.len(), 3);
3151        // Check ordering: bbox, box3d, mask
3152        assert_eq!(result[0].label(), Some(&"bbox_label".to_string()));
3153        assert_eq!(result[1].label(), Some(&"box3d_label".to_string()));
3154        assert_eq!(result[2].label(), Some(&"mask_label".to_string()));
3155    }
3156
3157    #[test]
3158    fn test_flatten_annotation_map_single_type() {
3159        use std::collections::HashMap;
3160
3161        let mut map = HashMap::new();
3162        let mut bbox_ann = Annotation::new();
3163        bbox_ann.set_label(Some("test".to_string()));
3164        map.insert("bbox".to_string(), vec![bbox_ann]);
3165
3166        let result = flatten_annotation_map(map);
3167
3168        assert_eq!(result.len(), 1);
3169        assert_eq!(result[0].label(), Some(&"test".to_string()));
3170    }
3171
3172    #[test]
3173    fn test_flatten_annotation_map_empty() {
3174        use std::collections::HashMap;
3175
3176        let map = HashMap::new();
3177        let result = flatten_annotation_map(map);
3178
3179        assert_eq!(result.len(), 0);
3180    }
3181
3182    #[test]
3183    fn test_flatten_annotation_map_deterministic_order() {
3184        use std::collections::HashMap;
3185
3186        let mut map = HashMap::new();
3187
3188        let mut bbox_ann = Annotation::new();
3189        bbox_ann.set_label(Some("bbox".to_string()));
3190
3191        let mut box3d_ann = Annotation::new();
3192        box3d_ann.set_label(Some("box3d".to_string()));
3193
3194        let mut mask_ann = Annotation::new();
3195        mask_ann.set_label(Some("mask".to_string()));
3196
3197        // Insert in reverse order to test deterministic ordering
3198        map.insert("mask".to_string(), vec![mask_ann]);
3199        map.insert("box3d".to_string(), vec![box3d_ann]);
3200        map.insert("bbox".to_string(), vec![bbox_ann]);
3201
3202        let result = flatten_annotation_map(map);
3203
3204        // Should be bbox, box3d, mask regardless of insertion order
3205        assert_eq!(result.len(), 3);
3206        assert_eq!(result[0].label(), Some(&"bbox".to_string()));
3207        assert_eq!(result[1].label(), Some(&"box3d".to_string()));
3208        assert_eq!(result[2].label(), Some(&"mask".to_string()));
3209    }
3210
3211    // ==== Box2d Tests ====
3212    #[test]
3213    fn test_box2d_construction_and_accessors() {
3214        // Test case 1: Basic construction with positive coordinates
3215        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3216        assert_eq!(
3217            (bbox.left(), bbox.top(), bbox.width(), bbox.height()),
3218            (10.0, 20.0, 100.0, 50.0)
3219        );
3220
3221        // Test case 2: Center calculations
3222        assert_eq!((bbox.cx(), bbox.cy()), (60.0, 45.0)); // 10+50, 20+25
3223
3224        // Test case 3: Zero origin
3225        let bbox = Box2d::new(0.0, 0.0, 640.0, 480.0);
3226        assert_eq!(
3227            (bbox.left(), bbox.top(), bbox.width(), bbox.height()),
3228            (0.0, 0.0, 640.0, 480.0)
3229        );
3230        assert_eq!((bbox.cx(), bbox.cy()), (320.0, 240.0));
3231    }
3232
3233    #[test]
3234    fn test_box2d_center_calculation() {
3235        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3236
3237        // Center = position + size/2
3238        assert_eq!(bbox.cx(), 60.0); // 10 + 100/2
3239        assert_eq!(bbox.cy(), 45.0); // 20 + 50/2
3240    }
3241
3242    #[test]
3243    fn test_box2d_zero_dimensions() {
3244        let bbox = Box2d::new(10.0, 20.0, 0.0, 0.0);
3245
3246        // When width/height are zero, center = position
3247        assert_eq!(bbox.cx(), 10.0);
3248        assert_eq!(bbox.cy(), 20.0);
3249    }
3250
3251    #[test]
3252    fn test_box2d_negative_dimensions() {
3253        let bbox = Box2d::new(100.0, 100.0, -50.0, -50.0);
3254
3255        // Negative dimensions create inverted boxes (valid edge case)
3256        assert_eq!(bbox.width(), -50.0);
3257        assert_eq!(bbox.height(), -50.0);
3258        assert_eq!(bbox.cx(), 75.0); // 100 + (-50)/2
3259        assert_eq!(bbox.cy(), 75.0); // 100 + (-50)/2
3260    }
3261
3262    // ==== Box3d Tests ====
3263    #[test]
3264    fn test_box3d_construction_and_accessors() {
3265        // Test case 1: Basic 3D construction
3266        let bbox = Box3d::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
3267        assert_eq!((bbox.cx(), bbox.cy(), bbox.cz()), (1.0, 2.0, 3.0));
3268        assert_eq!(
3269            (bbox.width(), bbox.height(), bbox.length()),
3270            (4.0, 5.0, 6.0)
3271        );
3272
3273        // Test case 2: Corners calculation with offset center
3274        let bbox = Box3d::new(10.0, 20.0, 30.0, 4.0, 6.0, 8.0);
3275        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (8.0, 17.0, 26.0)); // 10-2, 20-3, 30-4
3276
3277        // Test case 3: Center at origin with negative corners
3278        let bbox = Box3d::new(0.0, 0.0, 0.0, 2.0, 3.0, 4.0);
3279        assert_eq!((bbox.cx(), bbox.cy(), bbox.cz()), (0.0, 0.0, 0.0));
3280        assert_eq!(
3281            (bbox.width(), bbox.height(), bbox.length()),
3282            (2.0, 3.0, 4.0)
3283        );
3284        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (-1.0, -1.5, -2.0));
3285    }
3286
3287    #[test]
3288    fn test_box3d_center_calculation() {
3289        let bbox = Box3d::new(10.0, 20.0, 30.0, 100.0, 50.0, 40.0);
3290
3291        // Center values as specified in constructor
3292        assert_eq!(bbox.cx(), 10.0);
3293        assert_eq!(bbox.cy(), 20.0);
3294        assert_eq!(bbox.cz(), 30.0);
3295    }
3296
3297    #[test]
3298    fn test_box3d_zero_dimensions() {
3299        let bbox = Box3d::new(5.0, 10.0, 15.0, 0.0, 0.0, 0.0);
3300
3301        // When all dimensions are zero, corners = center
3302        assert_eq!(bbox.cx(), 5.0);
3303        assert_eq!(bbox.cy(), 10.0);
3304        assert_eq!(bbox.cz(), 15.0);
3305        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (5.0, 10.0, 15.0));
3306    }
3307
3308    #[test]
3309    fn test_box3d_negative_dimensions() {
3310        let bbox = Box3d::new(100.0, 100.0, 100.0, -50.0, -50.0, -50.0);
3311
3312        // Negative dimensions create inverted boxes
3313        assert_eq!(bbox.width(), -50.0);
3314        assert_eq!(bbox.height(), -50.0);
3315        assert_eq!(bbox.length(), -50.0);
3316        assert_eq!(
3317            (bbox.left(), bbox.top(), bbox.front()),
3318            (125.0, 125.0, 125.0)
3319        );
3320    }
3321
3322    // ==== Polygon Tests ====
3323    #[test]
3324    fn test_polygon_creation_and_deserialization() {
3325        // Test case 1: Direct construction
3326        let rings = vec![vec![(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]];
3327        let polygon = Polygon::new(rings.clone());
3328        assert_eq!(polygon.rings, rings);
3329
3330        // Test case 2: Deserialization from legacy format (field name "polygon")
3331        let legacy = serde_json::json!({
3332            "polygon": {
3333                "polygon": [[
3334                    [0.0_f32, 0.0_f32],
3335                    [1.0_f32, 0.0_f32],
3336                    [1.0_f32, 1.0_f32]
3337                ]]
3338            }
3339        });
3340
3341        #[derive(serde::Deserialize)]
3342        struct Wrapper {
3343            polygon: Polygon,
3344        }
3345
3346        let parsed: Wrapper = serde_json::from_value(legacy).unwrap();
3347        assert_eq!(parsed.polygon.rings.len(), 1);
3348        assert_eq!(parsed.polygon.rings[0].len(), 3);
3349    }
3350
3351    // ==== Sample Tests ====
3352    #[test]
3353    fn test_sample_construction_and_accessors() {
3354        // Test case 1: New sample is empty
3355        let sample = Sample::new();
3356        assert_eq!(sample.id(), None);
3357        assert_eq!(sample.image_name(), None);
3358        assert_eq!(sample.width(), None);
3359        assert_eq!(sample.height(), None);
3360
3361        // Test case 2: Sample with populated fields
3362        let mut sample = Sample::new();
3363        sample.image_name = Some("test.jpg".to_string());
3364        sample.width = Some(1920);
3365        sample.height = Some(1080);
3366        sample.group = Some("group1".to_string());
3367
3368        assert_eq!(sample.image_name(), Some("test.jpg"));
3369        assert_eq!(sample.width(), Some(1920));
3370        assert_eq!(sample.height(), Some(1080));
3371        assert_eq!(sample.group(), Some(&"group1".to_string()));
3372    }
3373
3374    #[test]
3375    fn test_sample_name_extraction_from_image_name() {
3376        let mut sample = Sample::new();
3377
3378        // Test case 1: Basic image name with extension
3379        sample.image_name = Some("test_image.jpg".to_string());
3380        assert_eq!(sample.name(), Some("test_image".to_string()));
3381
3382        // Test case 2: Image name with .camera suffix
3383        sample.image_name = Some("test_image.camera.jpg".to_string());
3384        assert_eq!(sample.name(), Some("test_image".to_string()));
3385
3386        // Test case 3: Image name without extension
3387        sample.image_name = Some("test_image".to_string());
3388        assert_eq!(sample.name(), Some("test_image".to_string()));
3389    }
3390
3391    // ==== Annotation Tests ====
3392    #[test]
3393    fn test_annotation_construction_and_setters() {
3394        // Test case 1: New annotation is empty
3395        let ann = Annotation::new();
3396        assert_eq!(ann.sample_id(), None);
3397        assert_eq!(ann.label(), None);
3398        assert_eq!(ann.box2d(), None);
3399        assert_eq!(ann.box3d(), None);
3400        assert_eq!(ann.polygon(), None);
3401
3402        // Test case 2: Setting annotation fields
3403        let mut ann = Annotation::new();
3404        ann.set_label(Some("car".to_string()));
3405        assert_eq!(ann.label(), Some(&"car".to_string()));
3406
3407        ann.set_label_index(Some(42));
3408        assert_eq!(ann.label_index(), Some(42));
3409
3410        // Test case 3: Setting bounding box
3411        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3412        ann.set_box2d(Some(bbox.clone()));
3413        assert!(ann.box2d().is_some());
3414        assert_eq!(ann.box2d().unwrap().left(), 10.0);
3415    }
3416
3417    // ==== SampleFile Tests ====
3418    #[test]
3419    fn test_sample_file_with_url_and_filename() {
3420        // Test case 1: SampleFile with URL
3421        let file = SampleFile::with_url(
3422            "lidar.pcd".to_string(),
3423            "https://example.com/file.pcd".to_string(),
3424        );
3425        assert_eq!(file.file_type(), "lidar.pcd");
3426        assert_eq!(file.url(), Some("https://example.com/file.pcd"));
3427        assert_eq!(file.filename(), None);
3428
3429        // Test case 2: SampleFile with local filename
3430        let file = SampleFile::with_filename("image".to_string(), "test.jpg".to_string());
3431        assert_eq!(file.file_type(), "image");
3432        assert_eq!(file.filename(), Some("test.jpg"));
3433        assert_eq!(file.url(), None);
3434    }
3435
3436    // ==== Sample GPS/IMU Deserialization Tests ====
3437    #[test]
3438    fn test_sample_deserializes_gps_imu_from_sensors() {
3439        use serde_json::json;
3440
3441        // Test: GPS and IMU data in sensors array is extracted to location field
3442        let sample_json = json!({
3443            "id": 123,
3444            "image_name": "test.jpg",
3445            "sensors": [
3446                {"gps": {"lat": 37.7749, "lon": -122.4194}},
3447                {"imu": {"roll": 1.5, "pitch": 2.5, "yaw": 3.5}},
3448                {"radar.pcd": "https://example.com/radar.pcd"}
3449            ]
3450        });
3451
3452        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3453
3454        // Verify location was extracted
3455        assert!(sample.location.is_some());
3456        let location = sample.location.as_ref().unwrap();
3457
3458        // Verify GPS data
3459        assert!(location.gps.is_some());
3460        let gps = location.gps.as_ref().unwrap();
3461        assert!((gps.lat - 37.7749).abs() < 0.0001);
3462        assert!((gps.lon - (-122.4194)).abs() < 0.0001);
3463
3464        // Verify IMU data
3465        assert!(location.imu.is_some());
3466        let imu = location.imu.as_ref().unwrap();
3467        assert!((imu.roll - 1.5).abs() < 0.0001);
3468        assert!((imu.pitch - 2.5).abs() < 0.0001);
3469        assert!((imu.yaw - 3.5).abs() < 0.0001);
3470
3471        // Verify files were also extracted (non-GPS/IMU entries)
3472        assert_eq!(sample.files.len(), 1);
3473        assert_eq!(sample.files[0].file_type(), "radar.pcd");
3474        assert_eq!(sample.files[0].url(), Some("https://example.com/radar.pcd"));
3475    }
3476
3477    #[test]
3478    fn test_sample_deserializes_gps_only() {
3479        use serde_json::json;
3480
3481        // Test: Only GPS data in sensors
3482        let sample_json = json!({
3483            "id": 456,
3484            "sensors": [
3485                {"gps": {"lat": 40.7128, "lon": -74.0060}}
3486            ]
3487        });
3488
3489        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3490
3491        assert!(sample.location.is_some());
3492        let location = sample.location.as_ref().unwrap();
3493
3494        assert!(location.gps.is_some());
3495        assert!(location.imu.is_none());
3496
3497        let gps = location.gps.as_ref().unwrap();
3498        assert!((gps.lat - 40.7128).abs() < 0.0001);
3499        assert!((gps.lon - (-74.0060)).abs() < 0.0001);
3500    }
3501
3502    #[test]
3503    fn test_sample_deserializes_without_location() {
3504        use serde_json::json;
3505
3506        // Test: Sample with only file sensors (no GPS/IMU)
3507        let sample_json = json!({
3508            "id": 789,
3509            "sensors": [
3510                {"radar.pcd": "https://example.com/radar.pcd"},
3511                {"lidar.pcd": "https://example.com/lidar.pcd"}
3512            ]
3513        });
3514
3515        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3516
3517        // No location data
3518        assert!(sample.location.is_none());
3519
3520        // Both files extracted
3521        assert_eq!(sample.files.len(), 2);
3522    }
3523
3524    // ==== Label Tests ====
3525    #[test]
3526    fn test_label_deserialization_and_accessors() {
3527        use serde_json::json;
3528
3529        // Test case 1: Label deserialization and accessors
3530        let label_json = json!({
3531            "id": 123,
3532            "dataset_id": 456,
3533            "index": 5,
3534            "name": "car"
3535        });
3536
3537        let label: Label = serde_json::from_value(label_json).unwrap();
3538        assert_eq!(label.id(), 123);
3539        assert_eq!(label.index(), 5);
3540        assert_eq!(label.name(), "car");
3541        assert_eq!(label.to_string(), "car");
3542        assert_eq!(format!("{}", label), "car");
3543
3544        // Test case 2: Different label
3545        let label_json = json!({
3546            "id": 1,
3547            "dataset_id": 100,
3548            "index": 0,
3549            "name": "person"
3550        });
3551
3552        let label: Label = serde_json::from_value(label_json).unwrap();
3553        assert_eq!(format!("{}", label), "person");
3554    }
3555
3556    // ==== Annotation Serialization Tests ====
3557    #[test]
3558    fn test_annotation_serialization_with_mask_and_box() {
3559        let polygon = vec![vec![
3560            (0.0_f32, 0.0_f32),
3561            (1.0_f32, 0.0_f32),
3562            (1.0_f32, 1.0_f32),
3563        ]];
3564
3565        let mut annotation = Annotation::new();
3566        annotation.set_label(Some("test".to_string()));
3567        annotation.set_box2d(Some(Box2d::new(10.0, 20.0, 30.0, 40.0)));
3568        annotation.set_polygon(Some(Polygon::new(polygon)));
3569
3570        let mut sample = Sample::new();
3571        sample.annotations.push(annotation);
3572
3573        let json = serde_json::to_value(&sample).unwrap();
3574        let annotations = json
3575            .get("annotations")
3576            .and_then(|value| value.as_array())
3577            .expect("annotations serialized as array");
3578        assert_eq!(annotations.len(), 1);
3579
3580        let annotation_json = annotations[0].as_object().expect("annotation object");
3581        assert!(annotation_json.contains_key("box2d"));
3582        // samples.populate2 expects the polygon geometry under the "mask" key
3583        // (historical: struct was renamed Rust-side from Mask to Polygon but
3584        // the wire contract did not follow). Emitting "polygon" here is what
3585        // caused polygons to be silently dropped on upload.
3586        assert!(
3587            annotation_json.contains_key("mask"),
3588            "Annotation must serialise polygon under 'mask' key for samples.populate2; got keys: {:?}",
3589            annotation_json.keys().collect::<Vec<_>>()
3590        );
3591        assert!(!annotation_json.contains_key("polygon"));
3592        assert!(!annotation_json.contains_key("x"));
3593        assert!(
3594            annotation_json
3595                .get("mask")
3596                .and_then(|value| value.as_array())
3597                .is_some()
3598        );
3599    }
3600
3601    #[test]
3602    fn test_frame_number_negative_one_deserializes_as_none() {
3603        // Server returns frame_number: -1 for non-sequence samples
3604        // This should deserialize as None for the client
3605        let json = r#"{
3606            "uuid": "test-uuid",
3607            "frame_number": -1
3608        }"#;
3609
3610        let sample: Sample = serde_json::from_str(json).unwrap();
3611        assert_eq!(sample.frame_number, None);
3612    }
3613
3614    #[test]
3615    fn test_frame_number_positive_value_deserializes_correctly() {
3616        // Valid frame numbers should deserialize normally
3617        let json = r#"{
3618            "uuid": "test-uuid",
3619            "frame_number": 5
3620        }"#;
3621
3622        let sample: Sample = serde_json::from_str(json).unwrap();
3623        assert_eq!(sample.frame_number, Some(5));
3624    }
3625
3626    #[test]
3627    fn test_frame_number_null_deserializes_as_none() {
3628        // Explicit null should also be None
3629        let json = r#"{
3630            "uuid": "test-uuid",
3631            "frame_number": null
3632        }"#;
3633
3634        let sample: Sample = serde_json::from_str(json).unwrap();
3635        assert_eq!(sample.frame_number, None);
3636    }
3637
3638    #[test]
3639    fn test_frame_number_missing_deserializes_as_none() {
3640        // Missing field should be None
3641        let json = r#"{
3642            "uuid": "test-uuid"
3643        }"#;
3644
3645        let sample: Sample = serde_json::from_str(json).unwrap();
3646        assert_eq!(sample.frame_number, None);
3647    }
3648
3649    // =========================================================================
3650    // samples_dataframe tests - CRITICAL: Verify group preservation
3651    // =========================================================================
3652
3653    #[cfg(feature = "polars")]
3654    #[test]
3655    fn test_samples_dataframe_preserves_group_for_samples_without_annotations() {
3656        use polars::prelude::*;
3657
3658        // Create sample WITH annotations
3659        let mut sample_with_ann = Sample::new();
3660        sample_with_ann.image_name = Some("annotated.jpg".to_string());
3661        sample_with_ann.group = Some("train".to_string());
3662        let mut annotation = Annotation::new();
3663        annotation.set_label(Some("car".to_string()));
3664        annotation.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
3665        annotation.set_name(Some("annotated".to_string()));
3666        sample_with_ann.annotations = vec![annotation];
3667
3668        // Create sample WITHOUT annotations (this is the critical case)
3669        let mut sample_no_ann = Sample::new();
3670        sample_no_ann.image_name = Some("unannotated.jpg".to_string());
3671        sample_no_ann.group = Some("val".to_string()); // Should be preserved!
3672        sample_no_ann.annotations = vec![]; // Empty annotations
3673
3674        let samples = vec![sample_with_ann, sample_no_ann];
3675
3676        // Convert to DataFrame
3677        let df = samples_dataframe(&samples).expect("Failed to create DataFrame");
3678
3679        // Verify we have 2 rows (one per sample)
3680        assert_eq!(df.height(), 2, "Expected 2 rows (one per sample)");
3681
3682        // Get the group column
3683        let groups_col = df.column("group").expect("group column should exist");
3684        let groups_cast = groups_col.cast(&DataType::String).expect("cast to string");
3685        let groups = groups_cast.str().expect("as str");
3686
3687        // Find the row for "unannotated" and verify it has group "val"
3688        let names_col = df.column("name").expect("name column should exist");
3689        let names_cast = names_col.cast(&DataType::String).expect("cast to string");
3690        let names = names_cast.str().expect("as str");
3691
3692        let mut found_unannotated = false;
3693        for idx in 0..df.height() {
3694            if let Some(name) = names.get(idx)
3695                && name == "unannotated"
3696            {
3697                found_unannotated = true;
3698                let group = groups.get(idx);
3699                assert_eq!(
3700                    group,
3701                    Some("val"),
3702                    "CRITICAL: Sample 'unannotated' without annotations must have group 'val'"
3703                );
3704            }
3705        }
3706
3707        assert!(
3708            found_unannotated,
3709            "Did not find 'unannotated' sample in DataFrame - \
3710             this means samples without annotations are not being included"
3711        );
3712    }
3713
3714    #[cfg(feature = "polars")]
3715    #[test]
3716    fn test_samples_dataframe_includes_all_samples_even_without_annotations() {
3717        // Verify that samples without annotations still appear in the DataFrame
3718        // with null annotation fields but WITH their group field populated
3719
3720        let mut sample1 = Sample::new();
3721        sample1.image_name = Some("with_ann.jpg".to_string());
3722        sample1.group = Some("train".to_string());
3723        let mut ann = Annotation::new();
3724        ann.set_label(Some("person".to_string()));
3725        ann.set_box2d(Some(Box2d::new(0.0, 0.0, 0.5, 0.5)));
3726        ann.set_name(Some("with_ann".to_string()));
3727        sample1.annotations = vec![ann];
3728
3729        let mut sample2 = Sample::new();
3730        sample2.image_name = Some("no_ann_train.jpg".to_string());
3731        sample2.group = Some("train".to_string());
3732        sample2.annotations = vec![];
3733
3734        let mut sample3 = Sample::new();
3735        sample3.image_name = Some("no_ann_val.jpg".to_string());
3736        sample3.group = Some("val".to_string());
3737        sample3.annotations = vec![];
3738
3739        let samples = vec![sample1, sample2, sample3];
3740
3741        let df = samples_dataframe(&samples).expect("Failed to create DataFrame");
3742
3743        // We should have exactly 3 rows - one per sample
3744        assert_eq!(
3745            df.height(),
3746            3,
3747            "Expected 3 rows (samples without annotations should create one row each)"
3748        );
3749
3750        // Check that all groups are present
3751        let groups_col = df.column("group").expect("group column");
3752        let groups_cast = groups_col.cast(&polars::prelude::DataType::String).unwrap();
3753        let groups = groups_cast.str().unwrap();
3754
3755        let mut train_count = 0;
3756        let mut val_count = 0;
3757
3758        for idx in 0..df.height() {
3759            match groups.get(idx) {
3760                Some("train") => train_count += 1,
3761                Some("val") => val_count += 1,
3762                other => panic!(
3763                    "Unexpected group value at row {}: {:?}. \
3764                     All samples should have their group preserved.",
3765                    idx, other
3766                ),
3767            }
3768        }
3769
3770        assert_eq!(train_count, 2, "Expected 2 samples in 'train' group");
3771        assert_eq!(val_count, 1, "Expected 1 sample in 'val' group");
3772    }
3773
3774    #[cfg(feature = "polars")]
3775    #[test]
3776    fn test_samples_dataframe_group_is_not_null_for_samples_with_group() {
3777        // CRITICAL: Even when a sample has no annotations, if it has a group,
3778        // that group must NOT be null in the DataFrame
3779
3780        let mut sample = Sample::new();
3781        sample.image_name = Some("test.jpg".to_string());
3782        sample.group = Some("test_group".to_string());
3783        sample.annotations = vec![];
3784
3785        let df = samples_dataframe(&[sample]).expect("Failed to create DataFrame");
3786
3787        let groups_col = df.column("group").expect("group column");
3788
3789        // The group column should have NO nulls because our sample has a group
3790        assert_eq!(
3791            groups_col.null_count(),
3792            0,
3793            "Sample with group='test_group' but no annotations has NULL group in DataFrame. \
3794             This is a bug in samples_dataframe - group must be preserved!"
3795        );
3796    }
3797
3798    #[cfg(feature = "polars")]
3799    #[test]
3800    fn test_samples_dataframe_group_consistent_across_all_rows_for_same_image() {
3801        use polars::prelude::*;
3802
3803        // Test that when a sample has multiple annotations, ALL rows have
3804        // the same group value (not just the first one)
3805
3806        let mut sample = Sample::new();
3807        sample.image_name = Some("multi_ann.jpg".to_string());
3808        sample.group = Some("train".to_string());
3809
3810        // Add multiple annotations
3811        let mut ann1 = Annotation::new();
3812        ann1.set_label(Some("car".to_string()));
3813        ann1.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
3814        ann1.set_name(Some("multi_ann".to_string()));
3815
3816        let mut ann2 = Annotation::new();
3817        ann2.set_label(Some("truck".to_string()));
3818        ann2.set_box2d(Some(Box2d::new(0.5, 0.6, 0.2, 0.2)));
3819        ann2.set_name(Some("multi_ann".to_string()));
3820
3821        let mut ann3 = Annotation::new();
3822        ann3.set_label(Some("bus".to_string()));
3823        ann3.set_box2d(Some(Box2d::new(0.7, 0.8, 0.1, 0.1)));
3824        ann3.set_name(Some("multi_ann".to_string()));
3825
3826        sample.annotations = vec![ann1, ann2, ann3];
3827
3828        let df = samples_dataframe(&[sample]).expect("Failed to create DataFrame");
3829
3830        // Should have 3 rows (one per annotation)
3831        assert_eq!(df.height(), 3, "Expected 3 rows (one per annotation)");
3832
3833        // ALL rows should have the group "train" (not just the first one)
3834        let groups_col = df.column("group").expect("group column");
3835        let groups_cast = groups_col.cast(&DataType::String).expect("cast to string");
3836        let groups = groups_cast.str().expect("as str");
3837
3838        // No nulls allowed
3839        assert_eq!(groups_col.null_count(), 0, "No rows should have null group");
3840
3841        // All rows should have the same group
3842        for idx in 0..df.height() {
3843            let group = groups.get(idx);
3844            assert_eq!(
3845                group,
3846                Some("train"),
3847                "Row {} should have group 'train', got {:?}. \
3848                 All rows for the same image must have identical group values.",
3849                idx,
3850                group
3851            );
3852        }
3853    }
3854
3855    #[cfg(feature = "polars")]
3856    #[test]
3857    fn test_samples_dataframe_lvis_columns() {
3858        let mut ann = Annotation::new();
3859        ann.set_name(Some("test".to_string()));
3860        ann.set_label(Some("person".to_string()));
3861        ann.set_label_index(Some(1));
3862        ann.set_iscrowd(Some(false));
3863        ann.set_category_frequency(Some("f".to_string()));
3864
3865        let sample = Sample {
3866            image_name: Some("test.jpg".to_string()),
3867            width: Some(640),
3868            height: Some(480),
3869            annotations: vec![ann],
3870            neg_label_indices: Some(vec![5, 12]),
3871            not_exhaustive_label_indices: Some(vec![3]),
3872            ..Default::default()
3873        };
3874
3875        let df = samples_dataframe(&[sample]).unwrap();
3876
3877        // Verify LVIS columns are present (they have data)
3878        assert!(df.column("iscrowd").is_ok(), "iscrowd column missing");
3879        assert!(
3880            df.column("category_frequency").is_ok(),
3881            "category_frequency column missing"
3882        );
3883        assert!(
3884            df.column("neg_label_indices").is_ok(),
3885            "neg_label_indices column missing"
3886        );
3887        assert!(
3888            df.column("not_exhaustive_label_indices").is_ok(),
3889            "not_exhaustive_label_indices column missing"
3890        );
3891
3892        // All-null columns should be dropped (polygon, box2d, box3d, mask, scores, etc.)
3893        assert!(
3894            df.column("polygon").is_err(),
3895            "polygon column should be dropped (all null)"
3896        );
3897        assert!(
3898            df.column("box2d").is_err(),
3899            "box2d column should be dropped (all null)"
3900        );
3901    }
3902
3903    #[test]
3904    fn test_annotation_serialization_skips_lvis_fields() {
3905        let ann = Annotation::new();
3906        let json = serde_json::to_string(&ann).unwrap();
3907        assert!(
3908            !json.contains("iscrowd"),
3909            "iscrowd should be omitted when None"
3910        );
3911        assert!(
3912            !json.contains("category_frequency"),
3913            "category_frequency should be omitted when None"
3914        );
3915    }
3916
3917    #[test]
3918    fn test_sample_serialization_skips_lvis_fields() {
3919        let sample = Sample::new();
3920        let json = serde_json::to_string(&sample).unwrap();
3921        assert!(
3922            !json.contains("neg_label_indices"),
3923            "neg_label_indices should be omitted when None"
3924        );
3925        assert!(
3926            !json.contains("not_exhaustive_label_indices"),
3927            "not_exhaustive_label_indices should be omitted when None"
3928        );
3929    }
3930
3931    #[test]
3932    fn test_annotation_score_fields() {
3933        let mut ann = Annotation::default();
3934        assert!(ann.box2d_score.is_none());
3935        assert!(ann.polygon_score.is_none());
3936        assert!(ann.mask_score.is_none());
3937        ann.box2d_score = Some(0.95);
3938        ann.polygon_score = Some(0.87);
3939        ann.mask_score = Some(0.42);
3940        assert_eq!(ann.box2d_score, Some(0.95));
3941        assert_eq!(ann.polygon_score, Some(0.87));
3942        assert_eq!(ann.mask_score, Some(0.42));
3943    }
3944
3945    #[test]
3946    fn test_timing_struct() {
3947        let timing = Timing {
3948            load: Some(1_000_000),
3949            preprocess: Some(2_000_000),
3950            inference: Some(50_000_000),
3951            decode: Some(3_000_000),
3952        };
3953        assert_eq!(timing.inference, Some(50_000_000));
3954
3955        let default = Timing::default();
3956        assert!(default.load.is_none());
3957    }
3958
3959    #[test]
3960    fn test_sample_timing() {
3961        let mut sample = Sample::default();
3962        assert!(sample.timing.is_none());
3963        sample.timing = Some(Timing {
3964            load: Some(1_000_000),
3965            ..Default::default()
3966        });
3967        assert!(sample.timing.is_some());
3968    }
3969
3970    // =========================================================================
3971    // samples_dataframe 2026.04 schema tests
3972    // =========================================================================
3973
3974    #[cfg(feature = "polars")]
3975    #[test]
3976    fn test_samples_dataframe_polygon_column() {
3977        let mut ann = Annotation::new();
3978        ann.set_name(Some("test".to_string()));
3979        ann.set_polygon(Some(Polygon::new(vec![vec![
3980            (0.1, 0.2),
3981            (0.3, 0.4),
3982            (0.5, 0.6),
3983        ]])));
3984
3985        let sample = Sample {
3986            image_name: Some("test.jpg".to_string()),
3987            annotations: vec![ann],
3988            ..Default::default()
3989        };
3990
3991        let df = samples_dataframe(&[sample]).unwrap();
3992
3993        // 2026.04: polygon column exists with nested List(List(Float32))
3994        assert!(df.column("polygon").is_ok(), "Should have polygon column");
3995
3996        // The old "mask" column with float data should NOT exist (no MaskData set)
3997        // If mask column exists, it would be Binary type from MaskData, not floats
3998        if let Ok(mask_col) = df.column("mask") {
3999            // If it exists, it must be Binary type, not List(Float32)
4000            assert_eq!(
4001                mask_col.dtype(),
4002                &polars::prelude::DataType::Binary,
4003                "mask column must be Binary type (PNG bytes), not float list"
4004            );
4005        }
4006    }
4007
4008    #[cfg(feature = "polars")]
4009    #[test]
4010    fn test_samples_dataframe_column_presence_drops_all_null() {
4011        // Sample with only a name, no annotations
4012        let sample = Sample {
4013            image_name: Some("test.jpg".to_string()),
4014            ..Default::default()
4015        };
4016
4017        let df = samples_dataframe(&[sample]).unwrap();
4018
4019        // name is always present
4020        assert!(df.column("name").is_ok(), "name column must always exist");
4021
4022        // All-null columns should be dropped
4023        assert!(
4024            df.column("polygon").is_err(),
4025            "All-null polygon should be dropped"
4026        );
4027        assert!(
4028            df.column("box2d").is_err(),
4029            "All-null box2d should be dropped"
4030        );
4031        assert!(
4032            df.column("box3d").is_err(),
4033            "All-null box3d should be dropped"
4034        );
4035        assert!(
4036            df.column("mask").is_err(),
4037            "All-null mask should be dropped"
4038        );
4039        assert!(
4040            df.column("box2d_score").is_err(),
4041            "All-null score columns should be dropped"
4042        );
4043        assert!(
4044            df.column("timing").is_err(),
4045            "All-null timing should be dropped"
4046        );
4047    }
4048
4049    #[cfg(feature = "polars")]
4050    #[test]
4051    fn test_samples_dataframe_score_columns() {
4052        let mut ann = Annotation::new();
4053        ann.set_name(Some("test".to_string()));
4054        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4055        ann.set_box2d_score(Some(0.95));
4056        ann.set_polygon(Some(Polygon::new(vec![vec![
4057            (0.0, 0.0),
4058            (1.0, 0.0),
4059            (1.0, 1.0),
4060        ]])));
4061        ann.set_polygon_score(Some(0.87));
4062
4063        let sample = Sample {
4064            image_name: Some("test.jpg".to_string()),
4065            annotations: vec![ann],
4066            ..Default::default()
4067        };
4068
4069        let df = samples_dataframe(&[sample]).unwrap();
4070
4071        // Score columns with data should be present
4072        assert!(
4073            df.column("box2d_score").is_ok(),
4074            "box2d_score column missing"
4075        );
4076        assert!(
4077            df.column("polygon_score").is_ok(),
4078            "polygon_score column missing"
4079        );
4080
4081        // Score columns with no data should be dropped
4082        assert!(
4083            df.column("box3d_score").is_err(),
4084            "box3d_score should be dropped (all null)"
4085        );
4086        assert!(
4087            df.column("mask_score").is_err(),
4088            "mask_score should be dropped (all null)"
4089        );
4090
4091        // Verify score values
4092        let box2d_scores = df.column("box2d_score").unwrap();
4093        let val = box2d_scores.f32().unwrap().get(0);
4094        assert_eq!(val, Some(0.95));
4095    }
4096
4097    #[cfg(feature = "polars")]
4098    #[test]
4099    fn test_samples_dataframe_timing_column() {
4100        let mut ann = Annotation::new();
4101        ann.set_name(Some("test".to_string()));
4102        ann.set_label(Some("person".to_string()));
4103
4104        let sample = Sample {
4105            image_name: Some("test.jpg".to_string()),
4106            annotations: vec![ann],
4107            timing: Some(Timing {
4108                load: Some(1_000_000),
4109                preprocess: Some(2_000_000),
4110                inference: Some(50_000_000),
4111                decode: Some(3_000_000),
4112            }),
4113            ..Default::default()
4114        };
4115
4116        let df = samples_dataframe(&[sample]).unwrap();
4117
4118        // Timing column should exist (has data)
4119        assert!(df.column("timing").is_ok(), "timing column missing");
4120
4121        // Verify it is a struct type
4122        let timing_col = df.column("timing").unwrap();
4123        assert!(
4124            matches!(timing_col.dtype(), polars::prelude::DataType::Struct(..)),
4125            "timing column should be Struct type, got {:?}",
4126            timing_col.dtype()
4127        );
4128    }
4129
4130    #[cfg(feature = "polars")]
4131    #[test]
4132    fn test_samples_dataframe_mask_binary_column() {
4133        let mut ann = Annotation::new();
4134        ann.set_name(Some("test".to_string()));
4135        // Create a small valid PNG via MaskData::encode
4136        let pixels = vec![0u8, 255, 128, 64];
4137        let mask_data = MaskData::encode(&pixels, 2, 2, 8).unwrap();
4138        ann.set_mask(Some(mask_data));
4139
4140        let sample = Sample {
4141            image_name: Some("test.jpg".to_string()),
4142            annotations: vec![ann],
4143            ..Default::default()
4144        };
4145
4146        let df = samples_dataframe(&[sample]).unwrap();
4147
4148        // mask column should exist with Binary type
4149        let mask_col = df.column("mask").unwrap();
4150        assert_eq!(
4151            mask_col.dtype(),
4152            &polars::prelude::DataType::Binary,
4153            "mask column should be Binary"
4154        );
4155        assert_eq!(mask_col.null_count(), 0, "mask value should not be null");
4156    }
4157
4158    // =========================================================================
4159    // AnnotationType "seg" alias test
4160    // =========================================================================
4161
4162    #[test]
4163    fn test_annotation_type_seg_alias() {
4164        assert_eq!(
4165            AnnotationType::try_from("seg").unwrap(),
4166            AnnotationType::Polygon,
4167            "\"seg\" should map to Polygon for server round-trip"
4168        );
4169    }
4170
4171    // =========================================================================
4172    // Timing edge case tests
4173    // =========================================================================
4174
4175    #[cfg(feature = "polars")]
4176    #[test]
4177    fn test_samples_dataframe_timing_partial() {
4178        // Timing with only load set; other fields None
4179        let mut ann = Annotation::new();
4180        ann.set_name(Some("test".to_string()));
4181        ann.set_label(Some("person".to_string()));
4182
4183        let sample = Sample {
4184            image_name: Some("test.jpg".to_string()),
4185            annotations: vec![ann],
4186            timing: Some(Timing {
4187                load: Some(1000),
4188                ..Default::default()
4189            }),
4190            ..Default::default()
4191        };
4192
4193        let df = samples_dataframe(&[sample]).unwrap();
4194
4195        // Timing column should be present because at least one field is non-null
4196        assert!(
4197            df.column("timing").is_ok(),
4198            "timing column should be present when partial data exists"
4199        );
4200    }
4201
4202    #[cfg(feature = "polars")]
4203    #[test]
4204    fn test_samples_dataframe_timing_all_none_omitted() {
4205        // All samples have timing: None — timing column should be omitted
4206        let mut ann = Annotation::new();
4207        ann.set_name(Some("test".to_string()));
4208        ann.set_label(Some("person".to_string()));
4209
4210        let sample = Sample {
4211            image_name: Some("test.jpg".to_string()),
4212            annotations: vec![ann],
4213            timing: None,
4214            ..Default::default()
4215        };
4216
4217        let df = samples_dataframe(&[sample]).unwrap();
4218
4219        assert!(
4220            df.column("timing").is_err(),
4221            "timing column should be omitted when all samples have timing: None"
4222        );
4223    }
4224
4225    // =========================================================================
4226    // Score boundary tests
4227    // =========================================================================
4228
4229    #[cfg(feature = "polars")]
4230    #[test]
4231    fn test_samples_dataframe_score_zero_survives() {
4232        // score = 0.0 must be non-null in the output (not confused with None)
4233        let mut ann = Annotation::new();
4234        ann.set_name(Some("test".to_string()));
4235        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4236        ann.set_box2d_score(Some(0.0));
4237
4238        let sample = Sample {
4239            image_name: Some("test.jpg".to_string()),
4240            annotations: vec![ann],
4241            ..Default::default()
4242        };
4243
4244        let df = samples_dataframe(&[sample]).unwrap();
4245
4246        let scores = df.column("box2d_score").unwrap();
4247        let val = scores.f32().unwrap().get(0);
4248        assert_eq!(val, Some(0.0), "score of 0.0 should survive as non-null");
4249    }
4250
4251    #[cfg(feature = "polars")]
4252    #[test]
4253    fn test_samples_dataframe_score_one_survives() {
4254        let mut ann = Annotation::new();
4255        ann.set_name(Some("test".to_string()));
4256        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4257        ann.set_box2d_score(Some(1.0));
4258
4259        let sample = Sample {
4260            image_name: Some("test.jpg".to_string()),
4261            annotations: vec![ann],
4262            ..Default::default()
4263        };
4264
4265        let df = samples_dataframe(&[sample]).unwrap();
4266
4267        let scores = df.column("box2d_score").unwrap();
4268        let val = scores.f32().unwrap().get(0);
4269        assert_eq!(val, Some(1.0), "score of 1.0 should survive as non-null");
4270    }
4271}