Skip to main content

edgefirst_client/
dataset.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright © 2025 Au-Zone Technologies. All Rights Reserved.
3
4use std::{collections::HashMap, fmt::Display};
5
6use crate::{
7    Client, Error,
8    api::{AnnotationSetID, DatasetID, ProjectID, SampleID},
9    mask::MaskData,
10};
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13
14#[cfg(feature = "polars")]
15use polars::prelude::*;
16
17/// File types supported in EdgeFirst Studio datasets.
18///
19/// Represents the different types of sensor data files that can be stored
20/// and processed in a dataset. EdgeFirst Studio supports various modalities
21/// including visual images and different forms of LiDAR and radar data.
22///
23/// # String Representations
24///
25/// This enum has two string representations:
26/// - **Display** (`fmt::Display`): Returns the server API type name (e.g.,
27///   `"lidar.depth"`) used when making API requests to EdgeFirst Studio.
28/// - **file_extension()**: Returns the file extension for saving (e.g.,
29///   `"lidar.png"`) which may differ from the API type name.
30///
31/// # Examples
32///
33/// ```rust
34/// use edgefirst_client::FileType;
35///
36/// // Create file types from strings
37/// let image_type: FileType = "image".try_into().unwrap();
38/// let lidar_type: FileType = "lidar.pcd".try_into().unwrap();
39///
40/// // Display file types
41/// println!("Processing {} files", image_type); // "Processing image files"
42///
43/// // Use in dataset operations - example usage
44/// let file_type = FileType::Image;
45/// match file_type {
46///     FileType::Image => println!("Processing image files"),
47///     FileType::LidarPcd => println!("Processing LiDAR point cloud files"),
48///     _ => println!("Processing other sensor data"),
49/// }
50/// ```
51#[derive(Clone, Eq, PartialEq, Debug)]
52pub enum FileType {
53    /// Standard image files (JPEG, PNG, etc.)
54    Image,
55    /// LiDAR point cloud data files (.pcd format)
56    LidarPcd,
57    /// LiDAR depth images (.png format)
58    LidarDepth,
59    /// LiDAR reflectance images (.jpg format)
60    LidarReflect,
61    /// Radar point cloud data files (.pcd format)
62    RadarPcd,
63    /// Radar cube data files (.png format)
64    RadarCube,
65    /// All sensor types - expands to all known file types
66    All,
67}
68
69impl std::fmt::Display for FileType {
70    /// Returns the server API type name for this file type.
71    /// Used when making API requests to the server.
72    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73        let value = match self {
74            FileType::Image => "image",
75            FileType::LidarPcd => "lidar.pcd",
76            FileType::LidarDepth => "lidar.depth",
77            FileType::LidarReflect => "lidar.reflect",
78            FileType::RadarPcd => "radar.pcd",
79            FileType::RadarCube => "radar.png",
80            FileType::All => "all",
81        };
82        write!(f, "{}", value)
83    }
84}
85
86impl FileType {
87    /// Returns the file extension to use when saving downloaded files.
88    /// This may differ from the API type name (e.g., lidar.depth → lidar.png).
89    pub fn file_extension(&self) -> &'static str {
90        match self {
91            FileType::Image => "jpg", // Will be overridden by infer detection
92            FileType::LidarPcd => "lidar.pcd",
93            FileType::LidarDepth => "lidar.png",
94            FileType::LidarReflect => "lidar.jpg",
95            FileType::RadarPcd => "radar.pcd",
96            FileType::RadarCube => "radar.png",
97            FileType::All => "",
98        }
99    }
100}
101
102impl TryFrom<&str> for FileType {
103    type Error = crate::Error;
104
105    fn try_from(s: &str) -> Result<Self, Self::Error> {
106        match s {
107            "image" => Ok(FileType::Image),
108            "lidar.pcd" => Ok(FileType::LidarPcd),
109            // Accept CLI names (lidar.png), server names (lidar.depth), and aliases
110            "lidar.png" | "lidar.depth" | "depth.png" | "depthmap" => Ok(FileType::LidarDepth),
111            "lidar.jpg" | "lidar.jpeg" | "lidar.reflect" => Ok(FileType::LidarReflect),
112            "radar.pcd" | "pcd" => Ok(FileType::RadarPcd),
113            "radar.png" | "cube" => Ok(FileType::RadarCube),
114            "all" => Ok(FileType::All),
115            _ => Err(crate::Error::InvalidFileType(s.to_string())),
116        }
117    }
118}
119
120impl std::str::FromStr for FileType {
121    type Err = crate::Error;
122
123    fn from_str(s: &str) -> Result<Self, Self::Err> {
124        s.try_into()
125    }
126}
127
128impl FileType {
129    /// Returns all concrete sensor file types (excludes `All`).
130    ///
131    /// This is useful for expanding the `All` variant or listing available
132    /// types.
133    ///
134    /// # Example
135    ///
136    /// ```rust
137    /// use edgefirst_client::FileType;
138    ///
139    /// let all_types = FileType::all_sensor_types();
140    /// assert!(all_types.contains(&FileType::Image));
141    /// assert!(!all_types.contains(&FileType::All));
142    /// ```
143    pub fn all_sensor_types() -> Vec<FileType> {
144        vec![
145            FileType::Image,
146            FileType::LidarPcd,
147            FileType::LidarDepth,
148            FileType::LidarReflect,
149            FileType::RadarPcd,
150            FileType::RadarCube,
151        ]
152    }
153
154    /// Returns all valid type names as strings for help text.
155    ///
156    /// # Example
157    ///
158    /// ```rust
159    /// use edgefirst_client::FileType;
160    ///
161    /// let names = FileType::type_names();
162    /// assert!(names.contains(&"image"));
163    /// assert!(names.contains(&"all"));
164    /// ```
165    pub fn type_names() -> Vec<&'static str> {
166        vec![
167            "image",
168            "lidar.pcd",
169            "lidar.png",
170            "lidar.jpg",
171            "radar.pcd",
172            "radar.png",
173            "all",
174        ]
175    }
176
177    /// Expands a list of file types, replacing `All` with all concrete sensor
178    /// types.
179    ///
180    /// If the input contains `FileType::All`, returns all sensor types.
181    /// Otherwise, returns the input types unchanged.
182    ///
183    /// # Example
184    ///
185    /// ```rust
186    /// use edgefirst_client::FileType;
187    ///
188    /// let types = vec![FileType::All];
189    /// let expanded = FileType::expand_types(&types);
190    /// assert_eq!(expanded.len(), 6); // All concrete sensor types
191    ///
192    /// let types = vec![FileType::Image, FileType::LidarPcd];
193    /// let expanded = FileType::expand_types(&types);
194    /// assert_eq!(expanded.len(), 2); // Unchanged
195    /// ```
196    pub fn expand_types(types: &[FileType]) -> Vec<FileType> {
197        if types.contains(&FileType::All) {
198            FileType::all_sensor_types()
199        } else {
200            types.to_vec()
201        }
202    }
203}
204
205/// Annotation types supported for labeling data in EdgeFirst Studio.
206///
207/// Represents the different types of annotations that can be applied to
208/// sensor data for machine learning tasks. Each type corresponds to a
209/// different annotation geometry and use case.
210///
211/// # Examples
212///
213/// ```rust
214/// use edgefirst_client::AnnotationType;
215///
216/// // Create annotation types from strings (using TryFrom)
217/// let box_2d: AnnotationType = "box2d".try_into().unwrap();
218/// let segmentation: AnnotationType = "polygon".try_into().unwrap();
219///
220/// // Or use From with String
221/// let box_2d = AnnotationType::from("box2d".to_string());
222/// let segmentation = AnnotationType::from("polygon".to_string());
223///
224/// // Display annotation types
225/// println!("Annotation type: {}", box_2d); // "Annotation type: box2d"
226///
227/// // Use in matching and processing
228/// let annotation_type = AnnotationType::Box2d;
229/// match annotation_type {
230///     AnnotationType::Box2d => println!("Processing 2D bounding boxes"),
231///     AnnotationType::Box3d => println!("Processing 3D bounding boxes"),
232///     AnnotationType::Polygon => println!("Processing polygon contours"),
233///     AnnotationType::Mask => println!("Processing raster pixel masks"),
234/// }
235/// ```
236#[derive(Clone, Eq, PartialEq, Debug)]
237pub enum AnnotationType {
238    /// 2D bounding boxes for object detection in images
239    Box2d,
240    /// 3D bounding boxes for object detection in 3D space (LiDAR, etc.)
241    Box3d,
242    /// Vector polygon contours for instance segmentation
243    Polygon,
244    /// Raster pixel masks for semantic/instance segmentation
245    Mask,
246}
247
248impl TryFrom<&str> for AnnotationType {
249    type Error = crate::Error;
250
251    fn try_from(s: &str) -> Result<Self, Self::Error> {
252        match s {
253            "box2d" => Ok(AnnotationType::Box2d),
254            "box3d" => Ok(AnnotationType::Box3d),
255            "polygon" => Ok(AnnotationType::Polygon),
256            "seg" => Ok(AnnotationType::Polygon),
257            "mask" => Ok(AnnotationType::Polygon), // backward compat
258            "raster" => Ok(AnnotationType::Mask),
259            _ => Err(crate::Error::InvalidAnnotationType(s.to_string())),
260        }
261    }
262}
263
264impl From<String> for AnnotationType {
265    fn from(s: String) -> Self {
266        // For backward compatibility, default to Box2d if invalid
267        s.as_str().try_into().unwrap_or(AnnotationType::Box2d)
268    }
269}
270
271impl From<&String> for AnnotationType {
272    fn from(s: &String) -> Self {
273        // For backward compatibility, default to Box2d if invalid
274        s.as_str().try_into().unwrap_or(AnnotationType::Box2d)
275    }
276}
277
278impl AnnotationType {
279    /// Returns the server API type name for this annotation type.
280    ///
281    /// The server uses different naming conventions than the client:
282    /// - `Box2d` → `"box"` (server) vs `"box2d"` (client display)
283    /// - `Box3d` → `"box3d"` (same)
284    /// - `Polygon` → `"seg"` (server) vs `"polygon"` (client display)
285    /// - `Mask` → `"seg"` (server) vs `"mask"` (client display)
286    pub fn as_server_type(&self) -> &'static str {
287        match self {
288            AnnotationType::Box2d => "box",
289            AnnotationType::Box3d => "box3d",
290            AnnotationType::Polygon => "seg",
291            AnnotationType::Mask => "seg",
292        }
293    }
294}
295
296impl std::fmt::Display for AnnotationType {
297    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
298        let value = match self {
299            AnnotationType::Box2d => "box2d",
300            AnnotationType::Box3d => "box3d",
301            AnnotationType::Polygon => "polygon",
302            AnnotationType::Mask => "mask",
303        };
304        write!(f, "{}", value)
305    }
306}
307
308/// A dataset in EdgeFirst Studio containing sensor data and annotations.
309///
310/// Datasets are collections of multi-modal sensor data (images, LiDAR, radar)
311/// along with their corresponding annotations (bounding boxes, segmentation
312/// masks, 3D annotations). Datasets belong to projects and can be used for
313/// training and validation of machine learning models.
314///
315/// # Features
316///
317/// - **Multi-modal Data**: Support for images, LiDAR point clouds, radar data
318/// - **Rich Annotations**: 2D/3D bounding boxes, segmentation masks
319/// - **Metadata**: Timestamps, sensor configurations, calibration data
320/// - **Version Control**: Track changes and maintain data lineage
321/// - **Format Conversion**: Export to popular ML frameworks
322///
323/// # Examples
324///
325/// ```no_run
326/// use edgefirst_client::{Client, Dataset, DatasetID};
327/// use std::str::FromStr;
328///
329/// # async fn example() -> Result<(), edgefirst_client::Error> {
330/// # let client = Client::new()?;
331/// // Get dataset information
332/// let dataset_id = DatasetID::from_str("ds-abc123")?;
333/// let dataset = client.dataset(dataset_id).await?;
334/// println!("Dataset: {}", dataset.name());
335///
336/// // Access dataset metadata
337/// println!("Dataset ID: {}", dataset.id());
338/// println!("Description: {}", dataset.description());
339/// println!("Created: {}", dataset.created());
340///
341/// // Work with dataset data would require additional methods
342/// // that are implemented in the full API
343/// # Ok(())
344/// # }
345/// ```
346#[derive(Deserialize, Clone, Debug)]
347pub struct Dataset {
348    id: DatasetID,
349    project_id: ProjectID,
350    name: String,
351    description: String,
352    cloud_key: String,
353    #[serde(rename = "createdAt")]
354    created: DateTime<Utc>,
355}
356
357impl Display for Dataset {
358    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
359        write!(f, "{} {}", self.id, self.name)
360    }
361}
362
363impl Dataset {
364    pub fn id(&self) -> DatasetID {
365        self.id
366    }
367
368    pub fn project_id(&self) -> ProjectID {
369        self.project_id
370    }
371
372    pub fn name(&self) -> &str {
373        &self.name
374    }
375
376    pub fn description(&self) -> &str {
377        &self.description
378    }
379
380    pub fn cloud_key(&self) -> &str {
381        &self.cloud_key
382    }
383
384    pub fn created(&self) -> &DateTime<Utc> {
385        &self.created
386    }
387
388    pub async fn project(&self, client: &Client) -> Result<crate::api::Project, Error> {
389        client.project(self.project_id).await
390    }
391
392    pub async fn annotation_sets(&self, client: &Client) -> Result<Vec<AnnotationSet>, Error> {
393        client.annotation_sets(self.id).await
394    }
395
396    pub async fn labels(&self, client: &Client) -> Result<Vec<Label>, Error> {
397        client.labels(self.id).await
398    }
399
400    pub async fn add_label(&self, client: &Client, name: &str) -> Result<(), Error> {
401        client.add_label(self.id, name).await
402    }
403
404    pub async fn remove_label(&self, client: &Client, name: &str) -> Result<(), Error> {
405        let labels = self.labels(client).await?;
406        let label = labels
407            .iter()
408            .find(|l| l.name() == name)
409            .ok_or_else(|| Error::MissingLabel(name.to_string()))?;
410        client.remove_label(label.id()).await
411    }
412}
413
414/// The AnnotationSet class represents a collection of annotations in a dataset.
415/// A dataset can have multiple annotation sets, each containing annotations for
416/// different tasks or purposes.
417#[derive(Deserialize)]
418pub struct AnnotationSet {
419    id: AnnotationSetID,
420    dataset_id: DatasetID,
421    name: String,
422    description: String,
423    #[serde(rename = "date")]
424    created: DateTime<Utc>,
425}
426
427impl Display for AnnotationSet {
428    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
429        write!(f, "{} {}", self.id, self.name)
430    }
431}
432
433impl AnnotationSet {
434    pub fn id(&self) -> AnnotationSetID {
435        self.id
436    }
437
438    pub fn dataset_id(&self) -> DatasetID {
439        self.dataset_id
440    }
441
442    pub fn name(&self) -> &str {
443        &self.name
444    }
445
446    pub fn description(&self) -> &str {
447        &self.description
448    }
449
450    pub fn created(&self) -> DateTime<Utc> {
451        self.created
452    }
453
454    pub async fn dataset(&self, client: &Client) -> Result<Dataset, Error> {
455        client.dataset(self.dataset_id).await
456    }
457}
458
459/// Pipeline timing measurements for a sample, in nanoseconds.
460///
461/// Each field records the wall-clock duration of one pipeline stage.
462/// Populated from Arrow metadata; not part of the Studio JSON-RPC API.
463#[derive(Clone, Debug, Default, PartialEq)]
464pub struct Timing {
465    /// Duration of the data-loading stage (nanoseconds).
466    pub load: Option<i64>,
467    /// Duration of the preprocessing stage (nanoseconds).
468    pub preprocess: Option<i64>,
469    /// Duration of the inference stage (nanoseconds).
470    pub inference: Option<i64>,
471    /// Duration of the decoding / postprocessing stage (nanoseconds).
472    pub decode: Option<i64>,
473}
474
475/// A sample in a dataset, typically representing a single image with metadata
476/// and optional sensor data.
477///
478/// Each sample has a unique ID, image reference, and can include additional
479/// sensor data like LiDAR, radar, or depth maps. Samples can also have
480/// associated annotations.
481#[derive(Serialize, Clone, Debug)]
482pub struct Sample {
483    #[serde(skip_serializing_if = "Option::is_none")]
484    pub id: Option<SampleID>,
485    /// Dataset split (train, val, test) - stored in Arrow metadata, not used
486    /// for directory structure.
487    /// API field name discrepancy: samples.populate2 expects "group", but
488    /// samples.list returns "group_name".
489    #[serde(
490        alias = "group_name",
491        rename(serialize = "group", deserialize = "group_name"),
492        skip_serializing_if = "Option::is_none"
493    )]
494    pub group: Option<String>,
495    #[serde(skip_serializing_if = "Option::is_none")]
496    pub sequence_name: Option<String>,
497    #[serde(skip_serializing_if = "Option::is_none")]
498    pub sequence_uuid: Option<String>,
499    #[serde(skip_serializing_if = "Option::is_none")]
500    pub sequence_description: Option<String>,
501    #[serde(
502        default,
503        skip_serializing_if = "Option::is_none",
504        deserialize_with = "deserialize_frame_number"
505    )]
506    pub frame_number: Option<u32>,
507    #[serde(skip_serializing_if = "Option::is_none")]
508    pub uuid: Option<String>,
509    #[serde(skip_serializing_if = "Option::is_none")]
510    pub image_name: Option<String>,
511    #[serde(skip_serializing_if = "Option::is_none")]
512    pub image_url: Option<String>,
513    #[serde(skip_serializing_if = "Option::is_none")]
514    pub width: Option<u32>,
515    #[serde(skip_serializing_if = "Option::is_none")]
516    pub height: Option<u32>,
517    #[serde(skip_serializing_if = "Option::is_none")]
518    pub date: Option<DateTime<Utc>>,
519    #[serde(skip_serializing_if = "Option::is_none")]
520    pub source: Option<String>,
521    /// Camera location and pose (GPS + IMU data).
522    /// Location data is extracted from the "sensors" field during
523    /// deserialization. When uploading samples, this field is serialized
524    /// as "sensors" to match the samples.populate2 API format.
525    #[serde(skip_serializing_if = "Option::is_none", rename(serialize = "sensors"))]
526    pub location: Option<Location>,
527    /// Image degradation type (blur, occlusion, weather, etc.).
528    #[serde(skip_serializing_if = "Option::is_none")]
529    pub degradation: Option<String>,
530    /// LVIS: label_index values for categories verified absent from this image.
531    #[serde(default, skip_serializing_if = "Option::is_none")]
532    pub neg_label_indices: Option<Vec<u32>>,
533    /// LVIS: label_index values for categories with incomplete annotation.
534    #[serde(default, skip_serializing_if = "Option::is_none")]
535    pub not_exhaustive_label_indices: Option<Vec<u32>>,
536    /// Additional sensor files (LiDAR, radar, depth maps, etc.).
537    /// Deserialization is handled by custom Deserialize impl which extracts
538    /// files from the "sensors" field. Serialization converts to HashMap for
539    /// samples.populate2 API.
540    #[serde(
541        default,
542        skip_serializing_if = "Vec::is_empty",
543        serialize_with = "serialize_files"
544    )]
545    pub files: Vec<SampleFile>,
546    /// Annotations associated with this sample.
547    /// Deserialization is handled by custom Deserialize impl.
548    #[serde(
549        default,
550        skip_serializing_if = "Vec::is_empty",
551        serialize_with = "serialize_annotations"
552    )]
553    pub annotations: Vec<Annotation>,
554    /// Pipeline timing measurements (populated from Arrow, not from Studio
555    /// JSON-RPC).
556    #[serde(skip)]
557    pub timing: Option<Timing>,
558}
559
560// Custom deserializer for frame_number - converts -1 to None
561// Server returns -1 for non-sequence samples, but clients should see None
562fn deserialize_frame_number<'de, D>(deserializer: D) -> Result<Option<u32>, D::Error>
563where
564    D: serde::Deserializer<'de>,
565{
566    use serde::Deserialize;
567
568    let value = Option::<i32>::deserialize(deserializer)?;
569    Ok(value.and_then(|v| if v < 0 { None } else { Some(v as u32) }))
570}
571
572/// Check if a string is a valid downloadable URL (http/https).
573/// Used to distinguish between pre-signed URLs and inline base64/JSON data.
574fn is_valid_url(s: &str) -> bool {
575    s.starts_with("http://") || s.starts_with("https://")
576}
577
578// Custom serializer for files field - converts Vec<SampleFile> to
579// HashMap<String, String>
580fn serialize_files<S>(files: &[SampleFile], serializer: S) -> Result<S::Ok, S::Error>
581where
582    S: serde::Serializer,
583{
584    use serde::Serialize;
585    let map: HashMap<String, String> = files
586        .iter()
587        .filter_map(|f| {
588            f.filename()
589                .map(|filename| (f.file_type().to_string(), filename.to_string()))
590        })
591        .collect();
592    map.serialize(serializer)
593}
594
595// Custom serializer for annotations field - serializes to a flat
596// Vec<Annotation> to match the updated samples.populate2 contract (annotations
597// array)
598fn serialize_annotations<S>(annotations: &Vec<Annotation>, serializer: S) -> Result<S::Ok, S::Error>
599where
600    S: serde::Serializer,
601{
602    serde::Serialize::serialize(annotations, serializer)
603}
604
605// Custom deserializer for annotations field - converts server format back to
606// Vec<Annotation>
607fn deserialize_annotations<'de, D>(deserializer: D) -> Result<Vec<Annotation>, D::Error>
608where
609    D: serde::Deserializer<'de>,
610{
611    use serde::Deserialize;
612
613    #[derive(Deserialize)]
614    #[serde(untagged)]
615    enum AnnotationsFormat {
616        Vec(Vec<Annotation>),
617        Map(HashMap<String, Vec<Annotation>>),
618    }
619
620    let value = Option::<AnnotationsFormat>::deserialize(deserializer)?;
621    Ok(value
622        .map(|v| match v {
623            AnnotationsFormat::Vec(annotations) => annotations,
624            AnnotationsFormat::Map(map) => convert_annotations_map_to_vec(map),
625        })
626        .unwrap_or_default())
627}
628
629/// Intermediate struct for deserializing sensors data that may contain both
630/// file references (URLs/data) and location data (GPS/IMU).
631#[derive(Debug, Default)]
632struct SensorsData {
633    files: Vec<SampleFile>,
634    location: Option<Location>,
635}
636
637/// Deserialize sensors field into both files and location data.
638fn deserialize_sensors_data(value: Option<serde_json::Value>) -> SensorsData {
639    use serde_json::Value;
640
641    /// Create a SampleFile from a string value, distinguishing URL vs inline
642    /// data.
643    fn create_sample_file(file_type: String, value: String) -> SampleFile {
644        if is_valid_url(&value) {
645            SampleFile::with_url(file_type, value)
646        } else {
647            SampleFile::with_data(file_type, value)
648        }
649    }
650
651    /// Create a SampleFile from any JSON value, converting non-strings to JSON.
652    fn create_sample_file_from_value(file_type: String, value: Value) -> Option<SampleFile> {
653        match value {
654            Value::String(s) => Some(create_sample_file(file_type, s)),
655            Value::Object(_) | Value::Array(_) => {
656                // Inline JSON data (legacy format) - serialize to string
657                serde_json::to_string(&value)
658                    .ok()
659                    .map(|data| SampleFile::with_data(file_type, data))
660            }
661            _ => None,
662        }
663    }
664
665    /// Try to extract Location from a JSON object containing gps/imu keys.
666    fn extract_location(map: &serde_json::Map<String, Value>) -> Option<Location> {
667        let gps = map
668            .get("gps")
669            .and_then(|v| serde_json::from_value::<GpsData>(v.clone()).ok());
670        let imu = map
671            .get("imu")
672            .and_then(|v| serde_json::from_value::<ImuData>(v.clone()).ok());
673
674        if gps.is_some() || imu.is_some() {
675            Some(Location { gps, imu })
676        } else {
677            None
678        }
679    }
680
681    let mut result = SensorsData::default();
682
683    match value {
684        None => result,
685        Some(Value::Array(arr)) => {
686            // Array of single-key objects: [{"radar.png": "url"}, {"gps": {...}}, ...]
687            for item in arr {
688                if let Value::Object(map) = item {
689                    // Check if this looks like a SampleFile object (has "type" key)
690                    if map.contains_key("type") {
691                        // Try to parse as SampleFile
692                        if let Ok(file) =
693                            serde_json::from_value::<SampleFile>(Value::Object(map.clone()))
694                        {
695                            result.files.push(file);
696                        }
697                    } else {
698                        // Check for location data (gps/imu)
699                        if let Some(loc) = extract_location(&map) {
700                            // Merge with existing location
701                            if let Some(ref mut existing) = result.location {
702                                if loc.gps.is_some() {
703                                    existing.gps = loc.gps;
704                                }
705                                if loc.imu.is_some() {
706                                    existing.imu = loc.imu;
707                                }
708                            } else {
709                                result.location = Some(loc);
710                            }
711                        } else {
712                            // Single-key object: {file_type: url_or_data}
713                            for (file_type, value) in map {
714                                if let Some(file) = create_sample_file_from_value(file_type, value)
715                                {
716                                    result.files.push(file);
717                                }
718                            }
719                        }
720                    }
721                }
722            }
723            result
724        }
725        Some(Value::Object(map)) => {
726            // Check if this contains location data (gps or imu keys with object values)
727            if let Some(loc) = extract_location(&map) {
728                result.location = Some(loc);
729            }
730
731            // Also extract any file references (non-location keys)
732            for (key, value) in map {
733                if key != "gps"
734                    && key != "imu"
735                    && let Some(file) = create_sample_file_from_value(key, value)
736                {
737                    result.files.push(file);
738                }
739            }
740            result
741        }
742        Some(_) => result,
743    }
744}
745
746/// Raw sample structure for deserialization.
747/// This mirrors Sample but deserializes sensors into a combined struct
748/// that captures both files and location data.
749#[derive(Deserialize)]
750struct SampleRaw {
751    #[serde(default)]
752    id: Option<SampleID>,
753    #[serde(alias = "group_name")]
754    group: Option<String>,
755    sequence_name: Option<String>,
756    sequence_uuid: Option<String>,
757    sequence_description: Option<String>,
758    #[serde(default, deserialize_with = "deserialize_frame_number")]
759    frame_number: Option<u32>,
760    uuid: Option<String>,
761    image_name: Option<String>,
762    image_url: Option<String>,
763    width: Option<u32>,
764    height: Option<u32>,
765    date: Option<DateTime<Utc>>,
766    source: Option<String>,
767    degradation: Option<String>,
768    #[serde(default)]
769    neg_label_indices: Option<Vec<u32>>,
770    #[serde(default)]
771    not_exhaustive_label_indices: Option<Vec<u32>>,
772    /// Raw sensors JSON - will be processed into files + location
773    #[serde(default, alias = "sensors")]
774    sensors: Option<serde_json::Value>,
775    #[serde(default, deserialize_with = "deserialize_annotations")]
776    annotations: Vec<Annotation>,
777}
778
779impl From<SampleRaw> for Sample {
780    fn from(raw: SampleRaw) -> Self {
781        let sensors_data = deserialize_sensors_data(raw.sensors);
782
783        Sample {
784            id: raw.id,
785            group: raw.group,
786            sequence_name: raw.sequence_name,
787            sequence_uuid: raw.sequence_uuid,
788            sequence_description: raw.sequence_description,
789            frame_number: raw.frame_number,
790            uuid: raw.uuid,
791            image_name: raw.image_name,
792            image_url: raw.image_url,
793            width: raw.width,
794            height: raw.height,
795            date: raw.date,
796            source: raw.source,
797            location: sensors_data.location,
798            degradation: raw.degradation,
799            neg_label_indices: raw.neg_label_indices,
800            not_exhaustive_label_indices: raw.not_exhaustive_label_indices,
801            files: sensors_data.files,
802            annotations: raw.annotations,
803            timing: None,
804        }
805    }
806}
807
808impl<'de> serde::Deserialize<'de> for Sample {
809    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
810    where
811        D: serde::Deserializer<'de>,
812    {
813        let raw = SampleRaw::deserialize(deserializer)?;
814        Ok(Sample::from(raw))
815    }
816}
817
818impl Display for Sample {
819    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
820        write!(
821            f,
822            "{} {}",
823            self.id
824                .map(|id| id.to_string())
825                .unwrap_or_else(|| "unknown".to_string()),
826            self.image_name().unwrap_or("unknown")
827        )
828    }
829}
830
831impl Default for Sample {
832    fn default() -> Self {
833        Self::new()
834    }
835}
836
837impl Sample {
838    /// Creates a new empty sample.
839    pub fn new() -> Self {
840        Self {
841            id: None,
842            group: None,
843            sequence_name: None,
844            sequence_uuid: None,
845            sequence_description: None,
846            frame_number: None,
847            uuid: None,
848            image_name: None,
849            image_url: None,
850            width: None,
851            height: None,
852            date: None,
853            source: None,
854            location: None,
855            degradation: None,
856            neg_label_indices: None,
857            not_exhaustive_label_indices: None,
858            files: vec![],
859            annotations: vec![],
860            timing: None,
861        }
862    }
863
864    pub fn id(&self) -> Option<SampleID> {
865        self.id
866    }
867
868    pub fn name(&self) -> Option<String> {
869        self.image_name.as_ref().map(|n| extract_sample_name(n))
870    }
871
872    pub fn group(&self) -> Option<&String> {
873        self.group.as_ref()
874    }
875
876    pub fn sequence_name(&self) -> Option<&String> {
877        self.sequence_name.as_ref()
878    }
879
880    pub fn sequence_uuid(&self) -> Option<&String> {
881        self.sequence_uuid.as_ref()
882    }
883
884    pub fn sequence_description(&self) -> Option<&String> {
885        self.sequence_description.as_ref()
886    }
887
888    pub fn frame_number(&self) -> Option<u32> {
889        self.frame_number
890    }
891
892    pub fn uuid(&self) -> Option<&String> {
893        self.uuid.as_ref()
894    }
895
896    pub fn image_name(&self) -> Option<&str> {
897        self.image_name.as_deref()
898    }
899
900    pub fn image_url(&self) -> Option<&str> {
901        self.image_url.as_deref()
902    }
903
904    pub fn width(&self) -> Option<u32> {
905        self.width
906    }
907
908    pub fn height(&self) -> Option<u32> {
909        self.height
910    }
911
912    pub fn date(&self) -> Option<DateTime<Utc>> {
913        self.date
914    }
915
916    pub fn source(&self) -> Option<&String> {
917        self.source.as_ref()
918    }
919
920    pub fn location(&self) -> Option<&Location> {
921        self.location.as_ref()
922    }
923
924    pub fn files(&self) -> &[SampleFile] {
925        &self.files
926    }
927
928    pub fn annotations(&self) -> &[Annotation] {
929        &self.annotations
930    }
931
932    pub fn with_annotations(mut self, annotations: Vec<Annotation>) -> Self {
933        self.annotations = annotations;
934        self
935    }
936
937    pub fn with_frame_number(mut self, frame_number: Option<u32>) -> Self {
938        self.frame_number = frame_number;
939        self
940    }
941
942    /// Downloads a file of the specified type for this sample.
943    ///
944    /// Supports both newer datasets (pre-signed URLs) and legacy datasets
945    /// (inline base64-encoded data):
946    /// 1. First tries to download from URL if available
947    /// 2. Falls back to decoding inline base64 data for legacy datasets
948    pub async fn download(
949        &self,
950        client: &Client,
951        file_type: FileType,
952    ) -> Result<Option<Vec<u8>>, Error> {
953        use base64::{Engine, engine::general_purpose::STANDARD};
954
955        // Handle image type separately (uses image_url field)
956        if file_type == FileType::Image {
957            if let Some(url) = self.image_url.as_deref()
958                && is_valid_url(url)
959            {
960                return Ok(Some(client.download(url).await?));
961            }
962            return Ok(None);
963        }
964
965        // Find the matching file for this type
966        let file = resolve_file(&file_type, &self.files);
967
968        match file {
969            Some(f) => {
970                // Prefer URL (newer datasets)
971                if let Some(url) = f.url() {
972                    return Ok(Some(client.download(url).await?));
973                }
974
975                // Fall back to inline data (legacy datasets)
976                if let Some(data) = f.data() {
977                    // Legacy data can be in several formats:
978                    // 1. Base64-encoded JSON: "eyJyYWRhci5wY2QiOi..." -> {"radar.pcd": "content"}
979                    // 2. Direct JSON wrapper: {"radar.pcd": "content"}
980                    // 3. Raw content (PCD text, etc.)
981
982                    // Try base64 decode first
983                    let decoded = if let Ok(bytes) = STANDARD.decode(data) {
984                        // Check if decoded bytes are UTF-8 JSON
985                        if let Ok(text) = String::from_utf8(bytes.clone()) {
986                            if text.starts_with('{') {
987                                // It's JSON - use the text for further processing
988                                text
989                            } else {
990                                // Non-JSON binary data - return as-is
991                                return Ok(Some(bytes));
992                            }
993                        } else {
994                            // Binary data - return as-is
995                            return Ok(Some(bytes));
996                        }
997                    } else {
998                        // Not base64 - use original data
999                        data.to_string()
1000                    };
1001
1002                    // Try to unwrap JSON wrapper: {"type_name": "content"}
1003                    let content = if decoded.starts_with('{') {
1004                        if let Ok(json) = serde_json::from_str::<serde_json::Value>(&decoded) {
1005                            if let Some(obj) = json.as_object() {
1006                                obj.values()
1007                                    .next()
1008                                    .and_then(|v| v.as_str())
1009                                    .map(|s| s.to_string())
1010                                    .unwrap_or(decoded)
1011                            } else {
1012                                decoded
1013                            }
1014                        } else {
1015                            decoded
1016                        }
1017                    } else {
1018                        decoded
1019                    };
1020
1021                    return Ok(Some(content.as_bytes().to_vec()));
1022                }
1023
1024                Ok(None)
1025            }
1026            None => Ok(None),
1027        }
1028    }
1029}
1030
1031/// A file associated with a sample (e.g., LiDAR point cloud, radar data).
1032///
1033/// For samples retrieved from the server, this contains the file type and URL.
1034/// For samples being populated to the server, this can be a type and filename.
1035///
1036/// Legacy datasets may have inline base64-encoded data instead of URLs.
1037/// The `data` field stores this inline content for fallback when no URL exists.
1038#[derive(Serialize, Deserialize, Clone, Debug)]
1039pub struct SampleFile {
1040    r#type: String,
1041    #[serde(skip_serializing_if = "Option::is_none")]
1042    url: Option<String>,
1043    #[serde(skip_serializing_if = "Option::is_none")]
1044    filename: Option<String>,
1045    /// Inline base64-encoded data for legacy datasets without pre-signed URLs.
1046    #[serde(skip_serializing_if = "Option::is_none", skip_deserializing)]
1047    data: Option<String>,
1048    /// Raw bytes for direct upload (e.g., from ZIP archives).
1049    /// This field is not serialized - it's only used during the upload process.
1050    #[serde(skip)]
1051    bytes: Option<Vec<u8>>,
1052}
1053
1054impl SampleFile {
1055    /// Creates a new sample file with type and URL (for newer datasets).
1056    pub fn with_url(file_type: String, url: String) -> Self {
1057        Self {
1058            r#type: file_type,
1059            url: Some(url),
1060            filename: None,
1061            data: None,
1062            bytes: None,
1063        }
1064    }
1065
1066    /// Creates a new sample file with type and filename (for populate API).
1067    pub fn with_filename(file_type: String, filename: String) -> Self {
1068        Self {
1069            r#type: file_type,
1070            url: None,
1071            filename: Some(filename),
1072            data: None,
1073            bytes: None,
1074        }
1075    }
1076
1077    /// Creates a new sample file with inline data (for legacy datasets).
1078    pub fn with_data(file_type: String, data: String) -> Self {
1079        Self {
1080            r#type: file_type,
1081            url: None,
1082            filename: None,
1083            data: Some(data),
1084            bytes: None,
1085        }
1086    }
1087
1088    /// Creates a new sample file with raw bytes for direct upload.
1089    ///
1090    /// This is useful for uploading files from ZIP archives without extracting
1091    /// to disk first. The bytes are uploaded directly to the presigned URL.
1092    ///
1093    /// # Arguments
1094    /// * `file_type` - The type of file (e.g., "image", "lidar.pcd")
1095    /// * `filename` - The filename to use for the upload
1096    /// * `bytes` - The raw file bytes
1097    pub fn with_bytes(file_type: String, filename: String, bytes: Vec<u8>) -> Self {
1098        Self {
1099            r#type: file_type,
1100            url: None,
1101            filename: Some(filename),
1102            data: None,
1103            bytes: Some(bytes),
1104        }
1105    }
1106
1107    pub fn file_type(&self) -> &str {
1108        &self.r#type
1109    }
1110
1111    pub fn url(&self) -> Option<&str> {
1112        self.url.as_deref()
1113    }
1114
1115    pub fn filename(&self) -> Option<&str> {
1116        self.filename.as_deref()
1117    }
1118
1119    /// Returns inline base64-encoded data (for legacy datasets).
1120    pub fn data(&self) -> Option<&str> {
1121        self.data.as_deref()
1122    }
1123
1124    /// Returns raw bytes for direct upload (from ZIP archives, etc.).
1125    pub fn bytes(&self) -> Option<&[u8]> {
1126        self.bytes.as_deref()
1127    }
1128}
1129
1130/// Location and pose information for a sample.
1131///
1132/// Contains GPS coordinates and IMU orientation data describing where and how
1133/// the camera was positioned when capturing the sample.
1134#[derive(Serialize, Deserialize, Clone, Debug)]
1135pub struct Location {
1136    #[serde(skip_serializing_if = "Option::is_none")]
1137    pub gps: Option<GpsData>,
1138    #[serde(skip_serializing_if = "Option::is_none")]
1139    pub imu: Option<ImuData>,
1140}
1141
1142/// GPS location data (latitude and longitude).
1143#[derive(Serialize, Deserialize, Clone, Debug)]
1144pub struct GpsData {
1145    pub lat: f64,
1146    pub lon: f64,
1147}
1148
1149impl GpsData {
1150    /// Validate GPS coordinates are within valid ranges.
1151    ///
1152    /// Checks if latitude and longitude values are within valid geographic
1153    /// ranges. Helps catch data corruption or API issues early.
1154    ///
1155    /// # Returns
1156    /// `Ok(())` if valid, `Err(String)` with descriptive error message
1157    /// otherwise
1158    ///
1159    /// # Valid Ranges
1160    /// - Latitude: -90.0 to +90.0 degrees
1161    /// - Longitude: -180.0 to +180.0 degrees
1162    ///
1163    /// # Examples
1164    /// ```
1165    /// use edgefirst_client::GpsData;
1166    ///
1167    /// let gps = GpsData {
1168    ///     lat: 37.7749,
1169    ///     lon: -122.4194,
1170    /// };
1171    /// assert!(gps.validate().is_ok());
1172    ///
1173    /// let bad_gps = GpsData {
1174    ///     lat: 100.0,
1175    ///     lon: 0.0,
1176    /// };
1177    /// assert!(bad_gps.validate().is_err());
1178    /// ```
1179    pub fn validate(&self) -> Result<(), String> {
1180        validate_gps_coordinates(self.lat, self.lon)
1181    }
1182}
1183
1184/// IMU orientation data (roll, pitch, yaw in degrees).
1185#[derive(Serialize, Deserialize, Clone, Debug)]
1186pub struct ImuData {
1187    pub roll: f64,
1188    pub pitch: f64,
1189    pub yaw: f64,
1190}
1191
1192impl ImuData {
1193    /// Validate IMU orientation angles are within valid ranges.
1194    ///
1195    /// Checks if roll, pitch, and yaw values are finite and within reasonable
1196    /// ranges. Helps catch data corruption or sensor errors early.
1197    ///
1198    /// # Returns
1199    /// `Ok(())` if valid, `Err(String)` with descriptive error message
1200    /// otherwise
1201    ///
1202    /// # Valid Ranges
1203    /// - Roll: -180.0 to +180.0 degrees
1204    /// - Pitch: -90.0 to +90.0 degrees (typical gimbal lock range)
1205    /// - Yaw: -180.0 to +180.0 degrees (or 0 to 360, normalized)
1206    ///
1207    /// # Examples
1208    /// ```
1209    /// use edgefirst_client::ImuData;
1210    ///
1211    /// let imu = ImuData {
1212    ///     roll: 10.0,
1213    ///     pitch: 5.0,
1214    ///     yaw: 90.0,
1215    /// };
1216    /// assert!(imu.validate().is_ok());
1217    ///
1218    /// let bad_imu = ImuData {
1219    ///     roll: 200.0,
1220    ///     pitch: 0.0,
1221    ///     yaw: 0.0,
1222    /// };
1223    /// assert!(bad_imu.validate().is_err());
1224    /// ```
1225    pub fn validate(&self) -> Result<(), String> {
1226        validate_imu_orientation(self.roll, self.pitch, self.yaw)
1227    }
1228}
1229
1230#[allow(dead_code)]
1231pub trait TypeName {
1232    fn type_name() -> String;
1233}
1234
1235#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
1236pub struct Box3d {
1237    x: f32,
1238    y: f32,
1239    z: f32,
1240    w: f32,
1241    h: f32,
1242    l: f32,
1243}
1244
1245impl TypeName for Box3d {
1246    fn type_name() -> String {
1247        "box3d".to_owned()
1248    }
1249}
1250
1251impl Box3d {
1252    pub fn new(cx: f32, cy: f32, cz: f32, width: f32, height: f32, length: f32) -> Self {
1253        Self {
1254            x: cx,
1255            y: cy,
1256            z: cz,
1257            w: width,
1258            h: height,
1259            l: length,
1260        }
1261    }
1262
1263    pub fn width(&self) -> f32 {
1264        self.w
1265    }
1266
1267    pub fn height(&self) -> f32 {
1268        self.h
1269    }
1270
1271    pub fn length(&self) -> f32 {
1272        self.l
1273    }
1274
1275    pub fn cx(&self) -> f32 {
1276        self.x
1277    }
1278
1279    pub fn cy(&self) -> f32 {
1280        self.y
1281    }
1282
1283    pub fn cz(&self) -> f32 {
1284        self.z
1285    }
1286
1287    pub fn left(&self) -> f32 {
1288        self.x - self.w / 2.0
1289    }
1290
1291    pub fn top(&self) -> f32 {
1292        self.y - self.h / 2.0
1293    }
1294
1295    pub fn front(&self) -> f32 {
1296        self.z - self.l / 2.0
1297    }
1298}
1299
1300#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
1301pub struct Box2d {
1302    h: f32,
1303    w: f32,
1304    x: f32,
1305    y: f32,
1306}
1307
1308impl TypeName for Box2d {
1309    fn type_name() -> String {
1310        "box2d".to_owned()
1311    }
1312}
1313
1314impl Box2d {
1315    pub fn new(left: f32, top: f32, width: f32, height: f32) -> Self {
1316        Self {
1317            x: left,
1318            y: top,
1319            w: width,
1320            h: height,
1321        }
1322    }
1323
1324    pub fn width(&self) -> f32 {
1325        self.w
1326    }
1327
1328    pub fn height(&self) -> f32 {
1329        self.h
1330    }
1331
1332    pub fn left(&self) -> f32 {
1333        self.x
1334    }
1335
1336    pub fn top(&self) -> f32 {
1337        self.y
1338    }
1339
1340    pub fn cx(&self) -> f32 {
1341        self.x + self.w / 2.0
1342    }
1343
1344    pub fn cy(&self) -> f32 {
1345        self.y + self.h / 2.0
1346    }
1347}
1348
1349#[derive(Clone, Debug, PartialEq)]
1350pub struct Polygon {
1351    pub rings: Vec<Vec<(f32, f32)>>,
1352}
1353
1354impl TypeName for Polygon {
1355    fn type_name() -> String {
1356        "polygon".to_owned()
1357    }
1358}
1359
1360impl Polygon {
1361    pub fn new(rings: Vec<Vec<(f32, f32)>>) -> Self {
1362        Self { rings }
1363    }
1364}
1365
1366impl serde::Serialize for Polygon {
1367    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1368    where
1369        S: serde::Serializer,
1370    {
1371        serde::Serialize::serialize(&self.rings, serializer)
1372    }
1373}
1374
1375impl<'de> serde::Deserialize<'de> for Polygon {
1376    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1377    where
1378        D: serde::Deserializer<'de>,
1379    {
1380        // First, deserialize to a raw JSON value to handle various formats
1381        let value = serde_json::Value::deserialize(deserializer)?;
1382
1383        // Try to extract polygon data from various formats
1384        let polygon_value = if let Some(obj) = value.as_object() {
1385            // Format: {"polygon": [...]} or {"rings": [...]}
1386            obj.get("rings")
1387                .or_else(|| obj.get("polygon"))
1388                .cloned()
1389                .unwrap_or(serde_json::Value::Null)
1390        } else {
1391            // Format: [[...]] (direct array)
1392            value
1393        };
1394
1395        // Parse the polygon array, filtering out null/invalid values
1396        let rings = parse_polygon_value(&polygon_value);
1397
1398        Ok(Self { rings })
1399    }
1400}
1401
1402/// Parse polygon value from JSON, handling malformed data gracefully.
1403///
1404/// Handles multiple formats:
1405/// - `[[[x,y],[x,y],...]]` - 3D array with point pairs (correct format)
1406/// - `[[x,y,x,y,...]]` - 2D array with flat coords (COCO format, legacy)
1407/// - `[[null,null,...]]` - corrupted data (returns empty)
1408/// - `null` - missing data (returns empty)
1409fn parse_polygon_value(value: &serde_json::Value) -> Vec<Vec<(f32, f32)>> {
1410    let Some(outer_array) = value.as_array() else {
1411        return vec![];
1412    };
1413
1414    let mut result = Vec::new();
1415
1416    for ring in outer_array {
1417        let Some(ring_array) = ring.as_array() else {
1418            continue;
1419        };
1420
1421        // Check if this is a 3D array (point pairs) or 2D array (flat coords)
1422        let is_3d = ring_array
1423            .first()
1424            .map(|first| first.is_array())
1425            .unwrap_or(false);
1426
1427        let points: Vec<(f32, f32)> = if is_3d {
1428            // 3D format: [[x1,y1], [x2,y2], ...]
1429            ring_array
1430                .iter()
1431                .filter_map(|point| {
1432                    let arr = point.as_array()?;
1433                    if arr.len() >= 2 {
1434                        let x = arr[0].as_f64()? as f32;
1435                        let y = arr[1].as_f64()? as f32;
1436                        if x.is_finite() && y.is_finite() {
1437                            Some((x, y))
1438                        } else {
1439                            None
1440                        }
1441                    } else {
1442                        None
1443                    }
1444                })
1445                .collect()
1446        } else {
1447            // 2D format (flat): [x1, y1, x2, y2, ...]
1448            ring_array
1449                .chunks(2)
1450                .filter_map(|chunk| {
1451                    if chunk.len() >= 2 {
1452                        let x = chunk[0].as_f64()? as f32;
1453                        let y = chunk[1].as_f64()? as f32;
1454                        if x.is_finite() && y.is_finite() {
1455                            Some((x, y))
1456                        } else {
1457                            None
1458                        }
1459                    } else {
1460                        None
1461                    }
1462                })
1463                .collect()
1464        };
1465
1466        // Only add rings with at least 3 valid points
1467        if points.len() >= 3 {
1468            result.push(points);
1469        }
1470    }
1471
1472    result
1473}
1474
1475/// Helper struct for deserializing annotations from the server.
1476///
1477/// The server sends bounding box coordinates as flat fields (x, y, w, h) at the
1478/// annotation level, but we want to store them as a nested Box2d struct.
1479#[derive(Deserialize)]
1480struct AnnotationRaw {
1481    #[serde(default)]
1482    sample_id: Option<SampleID>,
1483    #[serde(default)]
1484    name: Option<String>,
1485    #[serde(default)]
1486    sequence_name: Option<String>,
1487    #[serde(default)]
1488    frame_number: Option<u32>,
1489    #[serde(rename = "group_name", default)]
1490    group: Option<String>,
1491    #[serde(rename = "object_reference", alias = "object_id", default)]
1492    object_id: Option<String>,
1493    #[serde(default)]
1494    label_name: Option<String>,
1495    #[serde(default)]
1496    label_index: Option<u64>,
1497    #[serde(default)]
1498    iscrowd: Option<bool>,
1499    #[serde(default)]
1500    category_frequency: Option<String>,
1501    // Nested box2d format (if server sends it this way)
1502    #[serde(default)]
1503    box2d: Option<Box2d>,
1504    #[serde(default)]
1505    box3d: Option<Box3d>,
1506    #[serde(default, alias = "mask")]
1507    polygon: Option<Polygon>,
1508    // Flat box2d fields from server (x, y, w, h at annotation level)
1509    #[serde(default)]
1510    x: Option<f64>,
1511    #[serde(default)]
1512    y: Option<f64>,
1513    #[serde(default)]
1514    w: Option<f64>,
1515    #[serde(default)]
1516    h: Option<f64>,
1517}
1518
1519#[derive(Serialize, Clone, Debug)]
1520pub struct Annotation {
1521    #[serde(skip_serializing_if = "Option::is_none")]
1522    sample_id: Option<SampleID>,
1523    #[serde(skip_serializing_if = "Option::is_none")]
1524    name: Option<String>,
1525    #[serde(skip_serializing_if = "Option::is_none")]
1526    sequence_name: Option<String>,
1527    #[serde(skip_serializing_if = "Option::is_none")]
1528    frame_number: Option<u32>,
1529    /// Dataset split (train, val, test) - matches `Sample.group`.
1530    /// JSON field name: "group_name" (Studio API uses this name for both upload
1531    /// and download).
1532    #[serde(rename = "group_name", skip_serializing_if = "Option::is_none")]
1533    group: Option<String>,
1534    /// Object tracking identifier across frames.
1535    /// JSON field name: "object_reference" for upload (populate), "object_id"
1536    /// for download (list).
1537    #[serde(
1538        rename = "object_reference",
1539        alias = "object_id",
1540        skip_serializing_if = "Option::is_none"
1541    )]
1542    object_id: Option<String>,
1543    #[serde(skip_serializing_if = "Option::is_none")]
1544    label_name: Option<String>,
1545    #[serde(skip_serializing_if = "Option::is_none")]
1546    label_index: Option<u64>,
1547    /// COCO crowd flag: true = crowd region, false = single instance.
1548    #[serde(default, skip_serializing_if = "Option::is_none")]
1549    iscrowd: Option<bool>,
1550    /// LVIS frequency group: "f" (frequent), "c" (common), "r" (rare).
1551    #[serde(default, skip_serializing_if = "Option::is_none")]
1552    category_frequency: Option<String>,
1553    #[serde(skip_serializing_if = "Option::is_none")]
1554    box2d: Option<Box2d>,
1555    #[serde(skip_serializing_if = "Option::is_none")]
1556    box3d: Option<Box3d>,
1557    /// Polygon vertices for instance segmentation.
1558    ///
1559    /// Wire name is `mask` for historical reasons: the Rust field was
1560    /// renamed from `mask: Mask` to `polygon: Polygon` after the
1561    /// `samples.populate2` contract was already locked in, and the server
1562    /// still expects the key to be `mask`. Uploads that emit `polygon`
1563    /// here get silently dropped. Deserialisation accepts both names
1564    /// because `AnnotationRaw` carries `alias = "mask"`.
1565    #[serde(rename(serialize = "mask"), skip_serializing_if = "Option::is_none")]
1566    polygon: Option<Polygon>,
1567    /// PNG-encoded raster mask (populated from Arrow, not from Studio JSON-RPC).
1568    #[serde(skip)]
1569    mask: Option<MaskData>,
1570    /// Detection confidence score for box2d (0..1).
1571    #[serde(skip_serializing_if = "Option::is_none")]
1572    box2d_score: Option<f32>,
1573    /// Detection confidence score for box3d (0..1).
1574    #[serde(skip_serializing_if = "Option::is_none")]
1575    box3d_score: Option<f32>,
1576    /// Confidence score for polygon (0..1).
1577    #[serde(skip_serializing_if = "Option::is_none")]
1578    polygon_score: Option<f32>,
1579    /// Confidence score for mask (0..1).
1580    #[serde(skip_serializing_if = "Option::is_none")]
1581    mask_score: Option<f32>,
1582}
1583
1584impl<'de> serde::Deserialize<'de> for Annotation {
1585    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1586    where
1587        D: serde::Deserializer<'de>,
1588    {
1589        // Deserialize to AnnotationRaw first to handle server format differences
1590        let raw: AnnotationRaw = serde::Deserialize::deserialize(deserializer)?;
1591
1592        // Prefer nested box2d if present, otherwise construct from flat x/y/w/h
1593        let box2d = raw.box2d.or_else(|| match (raw.x, raw.y, raw.w, raw.h) {
1594            (Some(x), Some(y), Some(w), Some(h)) if w > 0.0 && h > 0.0 => {
1595                Some(Box2d::new(x as f32, y as f32, w as f32, h as f32))
1596            }
1597            _ => None,
1598        });
1599
1600        Ok(Annotation {
1601            sample_id: raw.sample_id,
1602            name: raw.name,
1603            sequence_name: raw.sequence_name,
1604            frame_number: raw.frame_number,
1605            group: raw.group,
1606            object_id: raw.object_id,
1607            label_name: raw.label_name,
1608            label_index: raw.label_index,
1609            iscrowd: raw.iscrowd,
1610            category_frequency: raw.category_frequency,
1611            box2d,
1612            box3d: raw.box3d,
1613            polygon: raw.polygon,
1614            mask: None,
1615            box2d_score: None,
1616            box3d_score: None,
1617            polygon_score: None,
1618            mask_score: None,
1619        })
1620    }
1621}
1622
1623impl Default for Annotation {
1624    fn default() -> Self {
1625        Self::new()
1626    }
1627}
1628
1629impl Annotation {
1630    pub fn new() -> Self {
1631        Self {
1632            sample_id: None,
1633            name: None,
1634            sequence_name: None,
1635            frame_number: None,
1636            group: None,
1637            object_id: None,
1638            label_name: None,
1639            label_index: None,
1640            iscrowd: None,
1641            category_frequency: None,
1642            box2d: None,
1643            box3d: None,
1644            polygon: None,
1645            mask: None,
1646            box2d_score: None,
1647            box3d_score: None,
1648            polygon_score: None,
1649            mask_score: None,
1650        }
1651    }
1652
1653    pub fn set_sample_id(&mut self, sample_id: Option<SampleID>) {
1654        self.sample_id = sample_id;
1655    }
1656
1657    pub fn sample_id(&self) -> Option<SampleID> {
1658        self.sample_id
1659    }
1660
1661    pub fn set_name(&mut self, name: Option<String>) {
1662        self.name = name;
1663    }
1664
1665    pub fn name(&self) -> Option<&String> {
1666        self.name.as_ref()
1667    }
1668
1669    pub fn set_sequence_name(&mut self, sequence_name: Option<String>) {
1670        self.sequence_name = sequence_name;
1671    }
1672
1673    pub fn sequence_name(&self) -> Option<&String> {
1674        self.sequence_name.as_ref()
1675    }
1676
1677    pub fn set_frame_number(&mut self, frame_number: Option<u32>) {
1678        self.frame_number = frame_number;
1679    }
1680
1681    pub fn frame_number(&self) -> Option<u32> {
1682        self.frame_number
1683    }
1684
1685    pub fn set_group(&mut self, group: Option<String>) {
1686        self.group = group;
1687    }
1688
1689    pub fn group(&self) -> Option<&String> {
1690        self.group.as_ref()
1691    }
1692
1693    pub fn object_id(&self) -> Option<&String> {
1694        self.object_id.as_ref()
1695    }
1696
1697    pub fn set_object_id(&mut self, object_id: Option<String>) {
1698        self.object_id = object_id;
1699    }
1700
1701    pub fn label(&self) -> Option<&String> {
1702        self.label_name.as_ref()
1703    }
1704
1705    pub fn set_label(&mut self, label_name: Option<String>) {
1706        self.label_name = label_name;
1707    }
1708
1709    pub fn label_index(&self) -> Option<u64> {
1710        self.label_index
1711    }
1712
1713    pub fn set_label_index(&mut self, label_index: Option<u64>) {
1714        self.label_index = label_index;
1715    }
1716
1717    pub fn iscrowd(&self) -> Option<bool> {
1718        self.iscrowd
1719    }
1720
1721    pub fn set_iscrowd(&mut self, iscrowd: Option<bool>) {
1722        self.iscrowd = iscrowd;
1723    }
1724
1725    pub fn category_frequency(&self) -> Option<&String> {
1726        self.category_frequency.as_ref()
1727    }
1728
1729    pub fn set_category_frequency(&mut self, category_frequency: Option<String>) {
1730        self.category_frequency = category_frequency;
1731    }
1732
1733    pub fn box2d(&self) -> Option<&Box2d> {
1734        self.box2d.as_ref()
1735    }
1736
1737    pub fn set_box2d(&mut self, box2d: Option<Box2d>) {
1738        self.box2d = box2d;
1739    }
1740
1741    pub fn box3d(&self) -> Option<&Box3d> {
1742        self.box3d.as_ref()
1743    }
1744
1745    pub fn set_box3d(&mut self, box3d: Option<Box3d>) {
1746        self.box3d = box3d;
1747    }
1748
1749    pub fn polygon(&self) -> Option<&Polygon> {
1750        self.polygon.as_ref()
1751    }
1752
1753    pub fn set_polygon(&mut self, polygon: Option<Polygon>) {
1754        self.polygon = polygon;
1755    }
1756
1757    pub fn mask(&self) -> Option<&MaskData> {
1758        self.mask.as_ref()
1759    }
1760
1761    pub fn set_mask(&mut self, mask: Option<MaskData>) {
1762        self.mask = mask;
1763    }
1764
1765    pub fn box2d_score(&self) -> Option<f32> {
1766        self.box2d_score
1767    }
1768
1769    pub fn set_box2d_score(&mut self, score: Option<f32>) {
1770        self.box2d_score = score;
1771    }
1772
1773    pub fn box3d_score(&self) -> Option<f32> {
1774        self.box3d_score
1775    }
1776
1777    pub fn set_box3d_score(&mut self, score: Option<f32>) {
1778        self.box3d_score = score;
1779    }
1780
1781    pub fn polygon_score(&self) -> Option<f32> {
1782        self.polygon_score
1783    }
1784
1785    pub fn set_polygon_score(&mut self, score: Option<f32>) {
1786        self.polygon_score = score;
1787    }
1788
1789    pub fn mask_score(&self) -> Option<f32> {
1790        self.mask_score
1791    }
1792
1793    pub fn set_mask_score(&mut self, score: Option<f32>) {
1794        self.mask_score = score;
1795    }
1796}
1797
1798#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
1799pub struct Label {
1800    id: u64,
1801    dataset_id: DatasetID,
1802    index: u64,
1803    name: String,
1804}
1805
1806impl Label {
1807    pub fn id(&self) -> u64 {
1808        self.id
1809    }
1810
1811    pub fn dataset_id(&self) -> DatasetID {
1812        self.dataset_id
1813    }
1814
1815    pub fn index(&self) -> u64 {
1816        self.index
1817    }
1818
1819    pub fn name(&self) -> &str {
1820        &self.name
1821    }
1822
1823    pub async fn remove(&self, client: &Client) -> Result<(), Error> {
1824        client.remove_label(self.id()).await
1825    }
1826
1827    pub async fn set_name(&mut self, client: &Client, name: &str) -> Result<(), Error> {
1828        self.name = name.to_string();
1829        client.update_label(self).await
1830    }
1831
1832    pub async fn set_index(&mut self, client: &Client, index: u64) -> Result<(), Error> {
1833        self.index = index;
1834        client.update_label(self).await
1835    }
1836}
1837
1838impl Display for Label {
1839    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1840        write!(f, "{}", self.name())
1841    }
1842}
1843
1844#[derive(Serialize, Clone, Debug)]
1845pub struct NewLabelObject {
1846    pub name: String,
1847}
1848
1849#[derive(Serialize, Clone, Debug)]
1850pub struct NewLabel {
1851    pub dataset_id: DatasetID,
1852    pub labels: Vec<NewLabelObject>,
1853}
1854
1855/// A dataset group for organizing samples into logical subsets.
1856///
1857/// Groups are used to partition samples within a dataset for different purposes
1858/// such as training, validation, and testing. Each sample can belong to at most
1859/// one group at a time.
1860///
1861/// # Common Group Names
1862///
1863/// - `"train"` - Training data for model fitting
1864/// - `"val"` - Validation data for hyperparameter tuning
1865/// - `"test"` - Test data for final evaluation
1866///
1867/// # Examples
1868///
1869/// ```rust,no_run
1870/// use edgefirst_client::{Client, DatasetID};
1871///
1872/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
1873/// let client = Client::new()?.with_token_path(None)?;
1874/// let dataset_id: DatasetID = "ds-123".try_into()?;
1875///
1876/// // List all groups in the dataset
1877/// let groups = client.groups(dataset_id).await?;
1878/// for group in groups {
1879///     println!("Group [{}]: {}", group.id, group.name);
1880/// }
1881/// # Ok(())
1882/// # }
1883/// ```
1884#[derive(Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
1885pub struct Group {
1886    /// The unique numeric identifier for this group.
1887    ///
1888    /// Group IDs are assigned by the server and are unique within an
1889    /// organization.
1890    pub id: u64,
1891
1892    /// The human-readable name of the group.
1893    ///
1894    /// Common names include "train", "val", "test", but any string is valid.
1895    pub name: String,
1896}
1897
1898#[cfg(feature = "polars")]
1899fn extract_annotation_name(ann: &Annotation) -> Option<(String, Option<u32>)> {
1900    use std::path::Path;
1901
1902    let name = ann.name.as_ref()?;
1903    let name = Path::new(name).file_stem()?.to_str()?;
1904
1905    // For sequences, return base name and frame number
1906    // For non-sequences, return name and None
1907    match &ann.sequence_name {
1908        Some(sequence) => Some((sequence.clone(), ann.frame_number)),
1909        None => Some((name.to_string(), None)),
1910    }
1911}
1912
1913/// Convert a polygon into a nested `List(List(Float32))` Series for the
1914/// 2026.04 schema. Each ring becomes an inner list of interleaved
1915/// `[x1, y1, x2, y2, ...]` floats.
1916#[cfg(feature = "polars")]
1917fn convert_polygon_to_nested_series(polygon: &Polygon) -> Series {
1918    let ring_series: Vec<Option<Series>> = polygon
1919        .rings
1920        .iter()
1921        .map(|ring| {
1922            let coords: Vec<f32> = ring.iter().flat_map(|&(x, y)| [x, y]).collect();
1923            Some(Series::new("".into(), coords))
1924        })
1925        .collect();
1926    Series::new("".into(), ring_series)
1927}
1928
1929/// Create a DataFrame from a slice of samples with the 2026.04 schema.
1930///
1931/// Each annotation in each sample becomes one row. Columns where every value
1932/// is null are automatically dropped, so the result only contains columns
1933/// that carry data. The `name` column is always present.
1934///
1935/// # Schema (2026.04)
1936///
1937/// - `name`: Sample name (String) - ALWAYS PRESENT
1938/// - `frame`: Frame number (UInt32)
1939/// - `object_id`: Object tracking ID (String)
1940/// - `label`: Object label (Categorical)
1941/// - `label_index`: Label index (UInt64)
1942/// - `group`: Dataset group (Categorical)
1943/// - `polygon`: Segmentation polygon rings (List<List<Float32>>)
1944/// - `box2d`: 2D bounding box [cx, cy, w, h] (Array<Float32, 4>)
1945/// - `box3d`: 3D bounding box [x, y, z, w, h, l] (Array<Float32, 6>)
1946/// - `mask`: PNG-encoded raster mask (Binary)
1947/// - `box2d_score`: Box2d confidence (Float32)
1948/// - `box3d_score`: Box3d confidence (Float32)
1949/// - `polygon_score`: Polygon confidence (Float32)
1950/// - `mask_score`: Mask confidence (Float32)
1951/// - `size`: Image size [width, height] (Array<UInt32, 2>)
1952/// - `location`: GPS [lat, lon] (Array<Float32, 2>)
1953/// - `pose`: IMU [yaw, pitch, roll] (Array<Float32, 3>)
1954/// - `degradation`: Image degradation (String)
1955/// - `iscrowd`: COCO crowd flag (Boolean)
1956/// - `category_frequency`: LVIS frequency group (Categorical)
1957/// - `neg_label_indices`: Verified-absent label indices (List<UInt32>)
1958/// - `not_exhaustive_label_indices`: Incomplete label indices (List<UInt32>)
1959/// - `timing`: Pipeline timing (Struct{load, preprocess, inference, decode} of Int64)
1960///
1961/// # Example
1962///
1963/// ```rust,no_run
1964/// use edgefirst_client::{Client, samples_dataframe};
1965///
1966/// # async fn example() -> Result<(), edgefirst_client::Error> {
1967/// # let client = Client::new()?;
1968/// # let dataset_id = 1.into();
1969/// # let annotation_set_id = 1.into();
1970/// let samples = client
1971///     .samples(dataset_id, Some(annotation_set_id), &[], &[], &[], None)
1972///     .await?;
1973/// let df = samples_dataframe(&samples)?;
1974/// println!("DataFrame shape: {:?}", df.shape());
1975/// # Ok(())
1976/// # }
1977/// ```
1978#[cfg(feature = "polars")]
1979pub fn samples_dataframe(samples: &[Sample]) -> Result<DataFrame, Error> {
1980    // Collect per-row vectors directly while iterating samples
1981    let mut names: Vec<String> = Vec::new();
1982    let mut frames: Vec<Option<u32>> = Vec::new();
1983    let mut objects: Vec<Option<String>> = Vec::new();
1984    let mut labels: Vec<Option<String>> = Vec::new();
1985    let mut label_indices: Vec<Option<u64>> = Vec::new();
1986    let mut groups: Vec<Option<String>> = Vec::new();
1987    let mut polygons: Vec<Option<Series>> = Vec::new();
1988    let mut boxes2d: Vec<Option<Series>> = Vec::new();
1989    let mut boxes3d: Vec<Option<Series>> = Vec::new();
1990    let mut mask_bytes: Vec<Option<Vec<u8>>> = Vec::new();
1991    let mut box2d_scores: Vec<Option<f32>> = Vec::new();
1992    let mut box3d_scores: Vec<Option<f32>> = Vec::new();
1993    let mut polygon_scores: Vec<Option<f32>> = Vec::new();
1994    let mut mask_scores: Vec<Option<f32>> = Vec::new();
1995    let mut sizes: Vec<Option<Vec<u32>>> = Vec::new();
1996    let mut locations: Vec<Option<Vec<f32>>> = Vec::new();
1997    let mut poses: Vec<Option<Vec<f32>>> = Vec::new();
1998    let mut degradations: Vec<Option<String>> = Vec::new();
1999    let mut iscrowds: Vec<Option<bool>> = Vec::new();
2000    let mut category_frequencies: Vec<Option<String>> = Vec::new();
2001    let mut neg_label_indices_vec: Vec<Option<Vec<u32>>> = Vec::new();
2002    let mut not_exhaustive_label_indices_vec: Vec<Option<Vec<u32>>> = Vec::new();
2003    let mut timing_load: Vec<Option<i64>> = Vec::new();
2004    let mut timing_preprocess: Vec<Option<i64>> = Vec::new();
2005    let mut timing_inference: Vec<Option<i64>> = Vec::new();
2006    let mut timing_decode: Vec<Option<i64>> = Vec::new();
2007
2008    for sample in samples {
2009        // Extract sample metadata once per sample
2010        let size = match (sample.width, sample.height) {
2011            (Some(w), Some(h)) => Some(vec![w, h]),
2012            _ => None,
2013        };
2014
2015        let location = sample.location.as_ref().and_then(|loc| {
2016            loc.gps
2017                .as_ref()
2018                .map(|gps| vec![gps.lat as f32, gps.lon as f32])
2019        });
2020
2021        let pose = sample.location.as_ref().and_then(|loc| {
2022            loc.imu
2023                .as_ref()
2024                .map(|imu| vec![imu.yaw as f32, imu.pitch as f32, imu.roll as f32])
2025        });
2026
2027        let degradation = sample.degradation.clone();
2028
2029        // Timing from the sample (same for all rows of this sample)
2030        let t_load = sample.timing.as_ref().and_then(|t| t.load);
2031        let t_preprocess = sample.timing.as_ref().and_then(|t| t.preprocess);
2032        let t_inference = sample.timing.as_ref().and_then(|t| t.inference);
2033        let t_decode = sample.timing.as_ref().and_then(|t| t.decode);
2034
2035        // Helper to push shared sample-level fields
2036        macro_rules! push_sample_fields {
2037            () => {
2038                sizes.push(size.clone());
2039                locations.push(location.clone());
2040                poses.push(pose.clone());
2041                degradations.push(degradation.clone());
2042                neg_label_indices_vec.push(sample.neg_label_indices.clone());
2043                not_exhaustive_label_indices_vec.push(sample.not_exhaustive_label_indices.clone());
2044                timing_load.push(t_load);
2045                timing_preprocess.push(t_preprocess);
2046                timing_inference.push(t_inference);
2047                timing_decode.push(t_decode);
2048            };
2049        }
2050
2051        if sample.annotations.is_empty() {
2052            // One row for the sample with null annotation fields
2053            let (name, frame) = match extract_annotation_name_from_sample(sample) {
2054                Some(nf) => nf,
2055                None => continue,
2056            };
2057
2058            names.push(name);
2059            frames.push(frame);
2060            objects.push(None);
2061            labels.push(None);
2062            label_indices.push(None);
2063            groups.push(sample.group.clone());
2064            polygons.push(None);
2065            boxes2d.push(None);
2066            boxes3d.push(None);
2067            mask_bytes.push(None);
2068            box2d_scores.push(None);
2069            box3d_scores.push(None);
2070            polygon_scores.push(None);
2071            mask_scores.push(None);
2072            iscrowds.push(None);
2073            category_frequencies.push(None);
2074            push_sample_fields!();
2075        } else {
2076            // One row per annotation
2077            for ann in &sample.annotations {
2078                let (name, frame) = match extract_annotation_name(ann) {
2079                    Some(nf) => nf,
2080                    None => continue,
2081                };
2082
2083                let polygon = ann.polygon.as_ref().map(convert_polygon_to_nested_series);
2084
2085                let box2d = ann
2086                    .box2d
2087                    .as_ref()
2088                    .map(|b| Series::new("box2d".into(), [b.cx(), b.cy(), b.width(), b.height()]));
2089
2090                let box3d = ann
2091                    .box3d
2092                    .as_ref()
2093                    .map(|b| Series::new("box3d".into(), [b.x, b.y, b.z, b.w, b.h, b.l]));
2094
2095                names.push(name);
2096                frames.push(frame);
2097                objects.push(ann.object_id().cloned());
2098                labels.push(ann.label_name.clone());
2099                label_indices.push(ann.label_index);
2100                groups.push(sample.group.clone());
2101                polygons.push(polygon);
2102                boxes2d.push(box2d);
2103                boxes3d.push(box3d);
2104                mask_bytes.push(ann.mask.as_ref().map(|m| m.as_bytes().to_vec()));
2105                box2d_scores.push(ann.box2d_score());
2106                box3d_scores.push(ann.box3d_score());
2107                polygon_scores.push(ann.polygon_score());
2108                mask_scores.push(ann.mask_score());
2109                iscrowds.push(ann.iscrowd);
2110                category_frequencies.push(ann.category_frequency.clone());
2111                push_sample_fields!();
2112            }
2113        }
2114    }
2115
2116    // Build DataFrame columns
2117    let names_col: Column = Series::new("name".into(), names).into();
2118    let frames_col: Column = Series::new("frame".into(), frames).into();
2119    let objects_col: Column = Series::new("object_id".into(), objects).into();
2120
2121    // Column name: "label" (NOT "label_name")
2122    //
2123    // Physical is U16 so taxonomies larger than 255 labels fit (LVIS v1 has
2124    // 1,203 categories). U16 caps at 65,535 — comfortably above any realistic
2125    // object-detection taxonomy — and only costs one extra byte per row vs U8.
2126    let labels_col: Column = Series::new("label".into(), labels)
2127        .cast(&DataType::Categorical(
2128            Categories::new("labels".into(), "labels".into(), CategoricalPhysical::U16),
2129            Arc::new(CategoricalMapping::with_hasher(
2130                u16::MAX as usize,
2131                Default::default(),
2132            )),
2133        ))?
2134        .into();
2135
2136    let label_indices_col: Column = Series::new("label_index".into(), label_indices).into();
2137
2138    // Column name: "group" (NOT "group_name")
2139    let groups_col: Column = Series::new("group".into(), groups)
2140        .cast(&DataType::Categorical(
2141            Categories::new("groups".into(), "groups".into(), CategoricalPhysical::U8),
2142            Arc::new(CategoricalMapping::with_hasher(
2143                u8::MAX as usize,
2144                Default::default(),
2145            )),
2146        ))?
2147        .into();
2148
2149    // Polygon: List(List(Float32)) — nested rings
2150    // Build using ListChunked to avoid Polars dtype mismatch when mixing Some/None entries.
2151    // Series::new() with Vec<Option<Series>> panics when Some entries are list[f32] but None
2152    // entries infer as list[null].
2153    let polygons_col: Column = if polygons.iter().all(|p| p.is_none()) {
2154        // All null — create a null column that the drop rule will remove
2155        Series::new_null("polygon".into(), polygons.len()).into()
2156    } else {
2157        // Build properly typed column: convert each Option<Series> to Option<Series>,
2158        // ensuring None entries don't cause dtype inference issues
2159        let typed_polygons: Vec<Option<Series>> = polygons
2160            .into_iter()
2161            .map(|opt| {
2162                opt.map(|s| {
2163                    s.cast(&DataType::List(Box::new(DataType::Float32)))
2164                        .unwrap_or(s)
2165                })
2166            })
2167            .collect();
2168        Series::new("polygon".into(), &typed_polygons)
2169            .cast(&DataType::List(Box::new(DataType::List(Box::new(
2170                DataType::Float32,
2171            )))))?
2172            .into()
2173    };
2174
2175    let boxes2d_col: Column = Series::new("box2d".into(), boxes2d)
2176        .cast(&DataType::Array(Box::new(DataType::Float32), 4))?
2177        .into();
2178    let boxes3d_col: Column = Series::new("box3d".into(), boxes3d)
2179        .cast(&DataType::Array(Box::new(DataType::Float32), 6))?
2180        .into();
2181
2182    // Mask: Binary (raw PNG bytes)
2183    let mask_col: Column = Series::new("mask".into(), mask_bytes).into();
2184
2185    // Score columns: Float32
2186    let box2d_score_col: Column = Series::new("box2d_score".into(), box2d_scores).into();
2187    let box3d_score_col: Column = Series::new("box3d_score".into(), box3d_scores).into();
2188    let polygon_score_col: Column = Series::new("polygon_score".into(), polygon_scores).into();
2189    let mask_score_col: Column = Series::new("mask_score".into(), mask_scores).into();
2190
2191    // Optional metadata columns (2025.10)
2192    let size_series: Vec<Option<Series>> = sizes
2193        .into_iter()
2194        .map(|opt_vec| opt_vec.map(|vec| Series::new("size".into(), vec)))
2195        .collect();
2196    let sizes_col: Column = Series::new("size".into(), size_series)
2197        .cast(&DataType::Array(Box::new(DataType::UInt32), 2))?
2198        .into();
2199
2200    let location_series: Vec<Option<Series>> = locations
2201        .into_iter()
2202        .map(|opt_vec| opt_vec.map(|vec| Series::new("location".into(), vec)))
2203        .collect();
2204    let locations_col: Column = Series::new("location".into(), location_series)
2205        .cast(&DataType::Array(Box::new(DataType::Float32), 2))?
2206        .into();
2207
2208    let pose_series: Vec<Option<Series>> = poses
2209        .into_iter()
2210        .map(|opt_vec| opt_vec.map(|vec| Series::new("pose".into(), vec)))
2211        .collect();
2212    let poses_col: Column = Series::new("pose".into(), pose_series)
2213        .cast(&DataType::Array(Box::new(DataType::Float32), 3))?
2214        .into();
2215
2216    let degradations_col: Column = Series::new("degradation".into(), degradations).into();
2217
2218    // LVIS extension columns
2219    let iscrowds_col: Column = Series::new("iscrowd".into(), iscrowds).into();
2220
2221    let category_frequencies_col: Column =
2222        Series::new("category_frequency".into(), category_frequencies)
2223            .cast(&DataType::Categorical(
2224                Categories::new(
2225                    "cat_freq".into(),
2226                    "cat_freq".into(),
2227                    CategoricalPhysical::U8,
2228                ),
2229                Arc::new(CategoricalMapping::with_hasher(
2230                    u8::MAX as usize,
2231                    Default::default(),
2232                )),
2233            ))?
2234            .into();
2235
2236    let neg_label_indices_series: Vec<Option<Series>> = neg_label_indices_vec
2237        .into_iter()
2238        .map(|opt_vec| opt_vec.map(|vec| Series::new("neg_label_indices".into(), vec)))
2239        .collect();
2240    let neg_label_indices_col: Column =
2241        Series::new("neg_label_indices".into(), neg_label_indices_series)
2242            .cast(&DataType::List(Box::new(DataType::UInt32)))?
2243            .into();
2244
2245    let not_exhaustive_label_indices_series: Vec<Option<Series>> = not_exhaustive_label_indices_vec
2246        .into_iter()
2247        .map(|opt_vec| opt_vec.map(|vec| Series::new("not_exhaustive_label_indices".into(), vec)))
2248        .collect();
2249    let not_exhaustive_label_indices_col: Column = Series::new(
2250        "not_exhaustive_label_indices".into(),
2251        not_exhaustive_label_indices_series,
2252    )
2253    .cast(&DataType::List(Box::new(DataType::UInt32)))?
2254    .into();
2255
2256    // Timing: Struct{load, preprocess, inference, decode} of Int64
2257    let timing_col: Column = StructChunked::from_series(
2258        "timing".into(),
2259        frames_col.len(),
2260        [
2261            Series::new("load".into(), &timing_load),
2262            Series::new("preprocess".into(), &timing_preprocess),
2263            Series::new("inference".into(), &timing_inference),
2264            Series::new("decode".into(), &timing_decode),
2265        ]
2266        .iter(),
2267    )?
2268    .into_series()
2269    .into();
2270
2271    // Collect all columns, then drop any where ALL values are null (except "name")
2272    let all_columns: Vec<Column> = vec![
2273        names_col,
2274        frames_col,
2275        objects_col,
2276        labels_col,
2277        label_indices_col,
2278        groups_col,
2279        polygons_col,
2280        boxes2d_col,
2281        boxes3d_col,
2282        mask_col,
2283        box2d_score_col,
2284        box3d_score_col,
2285        polygon_score_col,
2286        mask_score_col,
2287        sizes_col,
2288        locations_col,
2289        poses_col,
2290        degradations_col,
2291        iscrowds_col,
2292        category_frequencies_col,
2293        neg_label_indices_col,
2294        not_exhaustive_label_indices_col,
2295        timing_col,
2296    ];
2297
2298    let height = all_columns.first().map(|c| c.len()).unwrap_or(0);
2299
2300    let non_empty_columns: Vec<Column> = all_columns
2301        .into_iter()
2302        .filter(|col| col.name() == "name" || !is_all_null_column(col))
2303        .collect();
2304
2305    Ok(DataFrame::new(height, non_empty_columns)?)
2306}
2307
2308/// Returns `true` when every value in the column is null. For `Struct`
2309/// columns the check recurses into inner fields — the struct is considered
2310/// all-null when **all** of its fields are individually all-null.
2311#[cfg(feature = "polars")]
2312fn is_all_null_column(col: &Column) -> bool {
2313    if col.is_empty() {
2314        return true;
2315    }
2316    if col.null_count() == col.len() {
2317        return true;
2318    }
2319    // Struct columns may have non-null outer rows but all-null inner fields
2320    if let DataType::Struct(..) = col.dtype()
2321        && let Ok(s) = col.as_materialized_series().struct_()
2322    {
2323        return s
2324            .fields_as_series()
2325            .iter()
2326            .all(|field| field.null_count() == field.len());
2327    }
2328    false
2329}
2330
2331// Helper: Extract name/frame from Sample (for samples with no annotations)
2332#[cfg(feature = "polars")]
2333fn extract_annotation_name_from_sample(sample: &Sample) -> Option<(String, Option<u32>)> {
2334    use std::path::Path;
2335
2336    let name = sample.image_name.as_ref()?;
2337    let name = Path::new(name).file_stem()?.to_str()?;
2338
2339    // For sequences, return base name and frame number
2340    // For non-sequences, return name and None
2341    match &sample.sequence_name {
2342        Some(sequence) => Some((sequence.clone(), sample.frame_number)),
2343        None => Some((name.to_string(), None)),
2344    }
2345}
2346
2347// ============================================================================
2348// PURE FUNCTIONS FOR TESTABLE CORE LOGIC
2349// ============================================================================
2350
2351/// Extract sample name from image filename by:
2352/// 1. Removing file extension (everything after last dot)
2353/// 2. Removing .camera suffix if present
2354///
2355/// # Examples
2356/// - "scene_001.camera.jpg" → "scene_001"
2357/// - "image.jpg" → "image"
2358/// - ".jpg" → ".jpg" (preserves filenames starting with dot)
2359fn extract_sample_name(image_name: &str) -> String {
2360    // Step 1: Remove file extension (but preserve filenames starting with dot)
2361    let name = image_name
2362        .rsplit_once('.')
2363        .and_then(|(name, _)| {
2364            // Only remove extension if the name part is non-empty (handles ".jpg" case)
2365            if name.is_empty() {
2366                None
2367            } else {
2368                Some(name.to_string())
2369            }
2370        })
2371        .unwrap_or_else(|| image_name.to_string());
2372
2373    // Step 2: Remove .camera suffix if present
2374    name.rsplit_once(".camera")
2375        .and_then(|(name, _)| {
2376            // Only remove .camera if the name part is non-empty
2377            if name.is_empty() {
2378                None
2379            } else {
2380                Some(name.to_string())
2381            }
2382        })
2383        .unwrap_or_else(|| name.clone())
2384}
2385
2386/// Resolve a file for a given file type from sample data.
2387///
2388/// Returns the matching `SampleFile` if found, which may contain either
2389/// a URL (newer datasets) or inline data (legacy datasets).
2390///
2391/// # Arguments
2392/// * `file_type` - The type of file to resolve (e.g., LidarPcd, RadarPcd)
2393/// * `files` - The sample's file list
2394fn resolve_file<'a>(file_type: &FileType, files: &'a [SampleFile]) -> Option<&'a SampleFile> {
2395    match file_type {
2396        FileType::Image => None, // Image uses image_url field, not files
2397        FileType::All => None,   // All should be expanded before calling this
2398        file => {
2399            // Get all possible names for this file type (primary + aliases)
2400            let type_names = file_type_names(file);
2401            files
2402                .iter()
2403                .find(|f| type_names.contains(&f.r#type.as_str()))
2404        }
2405    }
2406}
2407
2408/// Returns all possible server-side names for a file type.
2409/// The server uses specific naming conventions in the STUDIO_DB_TYPE_MAP.
2410fn file_type_names(file_type: &FileType) -> Vec<&'static str> {
2411    match file_type {
2412        FileType::Image => vec!["image"],
2413        FileType::LidarPcd => vec!["lidar.pcd"],
2414        FileType::LidarDepth => vec!["lidar.depth", "depth.png", "depthmap"],
2415        FileType::LidarReflect => vec!["lidar.reflect"],
2416        FileType::RadarPcd => vec!["radar.pcd", "pcd"],
2417        FileType::RadarCube => vec!["radar.png", "cube"],
2418        FileType::All => vec![],
2419    }
2420}
2421
2422// ============================================================================
2423// DESERIALIZATION FORMAT CONVERSION HELPERS
2424// ============================================================================
2425
2426/// Convert annotations grouped format to flat Vec<Annotation>.
2427///
2428/// Pure function that handles the conversion from the server's legacy format
2429/// (HashMap<String, Vec<Annotation>>) to the flat Vec<Annotation>
2430/// representation.
2431///
2432/// # Arguments
2433/// * `map` - HashMap where keys are annotation types ("bbox", "box3d", "mask")
2434fn convert_annotations_map_to_vec(map: HashMap<String, Vec<Annotation>>) -> Vec<Annotation> {
2435    let mut all_annotations = Vec::new();
2436    if let Some(bbox_anns) = map.get("bbox") {
2437        all_annotations.extend(bbox_anns.clone());
2438    }
2439    if let Some(box3d_anns) = map.get("box3d") {
2440        all_annotations.extend(box3d_anns.clone());
2441    }
2442    if let Some(mask_anns) = map.get("mask") {
2443        all_annotations.extend(mask_anns.clone());
2444    }
2445    all_annotations
2446}
2447
2448// ============================================================================
2449// GPS/IMU VALIDATION HELPERS
2450// ============================================================================
2451
2452/// Validate GPS coordinates are within valid ranges.
2453///
2454/// Pure function that checks if latitude and longitude values are within valid
2455/// geographic ranges. Helps catch data corruption or API issues early.
2456///
2457/// # Arguments
2458/// * `lat` - Latitude in degrees
2459/// * `lon` - Longitude in degrees
2460///
2461/// # Returns
2462/// `Ok(())` if valid, `Err(String)` with descriptive error message otherwise
2463///
2464/// # Valid Ranges
2465/// - Latitude: -90.0 to +90.0 degrees
2466/// - Longitude: -180.0 to +180.0 degrees
2467fn validate_gps_coordinates(lat: f64, lon: f64) -> Result<(), String> {
2468    if !lat.is_finite() {
2469        return Err(format!("GPS latitude is not finite: {}", lat));
2470    }
2471    if !lon.is_finite() {
2472        return Err(format!("GPS longitude is not finite: {}", lon));
2473    }
2474    if !(-90.0..=90.0).contains(&lat) {
2475        return Err(format!("GPS latitude out of range [-90, 90]: {}", lat));
2476    }
2477    if !(-180.0..=180.0).contains(&lon) {
2478        return Err(format!("GPS longitude out of range [-180, 180]: {}", lon));
2479    }
2480    Ok(())
2481}
2482
2483/// Validate IMU orientation angles are within valid ranges.
2484///
2485/// Pure function that checks if roll, pitch, and yaw values are finite and
2486/// within reasonable ranges. Helps catch data corruption or sensor errors
2487/// early.
2488///
2489/// # Arguments
2490/// * `roll` - Roll angle in degrees
2491/// * `pitch` - Pitch angle in degrees
2492/// * `yaw` - Yaw angle in degrees
2493///
2494/// # Returns
2495/// `Ok(())` if valid, `Err(String)` with descriptive error message otherwise
2496///
2497/// # Valid Ranges
2498/// - Roll: -180.0 to +180.0 degrees
2499/// - Pitch: -90.0 to +90.0 degrees (typical gimbal lock range)
2500/// - Yaw: -180.0 to +180.0 degrees (or 0 to 360, normalized)
2501fn validate_imu_orientation(roll: f64, pitch: f64, yaw: f64) -> Result<(), String> {
2502    if !roll.is_finite() {
2503        return Err(format!("IMU roll is not finite: {}", roll));
2504    }
2505    if !pitch.is_finite() {
2506        return Err(format!("IMU pitch is not finite: {}", pitch));
2507    }
2508    if !yaw.is_finite() {
2509        return Err(format!("IMU yaw is not finite: {}", yaw));
2510    }
2511    if !(-180.0..=180.0).contains(&roll) {
2512        return Err(format!("IMU roll out of range [-180, 180]: {}", roll));
2513    }
2514    if !(-90.0..=90.0).contains(&pitch) {
2515        return Err(format!("IMU pitch out of range [-90, 90]: {}", pitch));
2516    }
2517    if !(-180.0..=180.0).contains(&yaw) {
2518        return Err(format!("IMU yaw out of range [-180, 180]: {}", yaw));
2519    }
2520    Ok(())
2521}
2522
2523// ============================================================================
2524// MASK POLYGON CONVERSION HELPERS
2525// ============================================================================
2526
2527/// Unflatten coordinates with NaN separators back to nested polygon
2528/// structure.
2529///
2530/// Converts flat list of coordinates with NaN separators back to nested
2531/// polygon structure:
2532/// - Input: [x1, y1, x2, y2, NaN, x3, y3]
2533/// - Output: [[(x1, y1), (x2, y2)], [(x3, y3)]]
2534///
2535/// This function is used when parsing Arrow files to reconstruct the nested
2536/// polygon format required by the EdgeFirst Studio API.
2537///
2538/// # Examples
2539///
2540/// ```rust
2541/// use edgefirst_client::unflatten_polygon_coordinates;
2542///
2543/// let coords = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0];
2544/// let polygons = unflatten_polygon_coordinates(&coords);
2545///
2546/// assert_eq!(polygons.len(), 2);
2547/// assert_eq!(polygons[0], vec![(1.0, 2.0), (3.0, 4.0)]);
2548/// assert_eq!(polygons[1], vec![(5.0, 6.0)]);
2549/// ```
2550#[cfg(feature = "polars")]
2551pub fn unflatten_polygon_coordinates(coords: &[f32]) -> Vec<Vec<(f32, f32)>> {
2552    let mut polygons = Vec::new();
2553    let mut current_polygon = Vec::new();
2554    let mut i = 0;
2555
2556    while i < coords.len() {
2557        if coords[i].is_nan() {
2558            // NaN separator - save current polygon and start new one
2559            if !current_polygon.is_empty() {
2560                polygons.push(std::mem::take(&mut current_polygon));
2561            }
2562            i += 1;
2563        } else if i + 1 < coords.len() && !coords[i + 1].is_nan() {
2564            // Have both x and y coordinates (neither is NaN)
2565            current_polygon.push((coords[i], coords[i + 1]));
2566            i += 2;
2567        } else if i + 1 < coords.len() && coords[i + 1].is_nan() {
2568            // x is valid but y is NaN - malformed data; skip x, process NaN on
2569            // next iteration
2570            i += 1;
2571        } else {
2572            // Odd trailing value - skip
2573            i += 1;
2574        }
2575    }
2576
2577    // Save the last polygon if not empty
2578    if !current_polygon.is_empty() {
2579        polygons.push(current_polygon);
2580    }
2581
2582    polygons
2583}
2584
2585#[cfg(test)]
2586mod tests {
2587    use super::*;
2588
2589    // ============================================================================
2590    // TEST HELPER FUNCTIONS (Pure Logic for Testing)
2591    // ============================================================================
2592
2593    /// Flatten legacy grouped annotation format to a single vector.
2594    ///
2595    /// Converts HashMap<String, Vec<Annotation>> (with bbox/box3d/mask keys)
2596    /// into a flat Vec<Annotation> in deterministic order.
2597    fn flatten_annotation_map(
2598        map: std::collections::HashMap<String, Vec<Annotation>>,
2599    ) -> Vec<Annotation> {
2600        let mut all_annotations = Vec::new();
2601
2602        // Process in fixed order for deterministic results
2603        for key in ["bbox", "box3d", "mask"] {
2604            if let Some(mut anns) = map.get(key).cloned() {
2605                all_annotations.append(&mut anns);
2606            }
2607        }
2608
2609        all_annotations
2610    }
2611
2612    /// Get the JSON field name for the Annotation group field (for tests).
2613    fn annotation_group_field_name() -> &'static str {
2614        "group_name"
2615    }
2616
2617    /// Get the JSON field name for the Annotation object_id field (for tests).
2618    fn annotation_object_id_field_name() -> &'static str {
2619        "object_reference"
2620    }
2621
2622    /// Get the accepted alias for the Annotation object_id field (for tests).
2623    fn annotation_object_id_alias() -> &'static str {
2624        "object_id"
2625    }
2626
2627    /// Validate that annotation field names match expected values in JSON (for
2628    /// tests).
2629    fn validate_annotation_field_names(
2630        json_str: &str,
2631        expected_group: bool,
2632        expected_object_ref: bool,
2633    ) -> Result<(), String> {
2634        if expected_group && !json_str.contains("\"group_name\"") {
2635            return Err("Missing expected field: group_name".to_string());
2636        }
2637        if expected_object_ref && !json_str.contains("\"object_reference\"") {
2638            return Err("Missing expected field: object_reference".to_string());
2639        }
2640        Ok(())
2641    }
2642
2643    // ==== FileType Conversion Tests ====
2644    #[test]
2645    fn test_file_type_conversions() {
2646        // to_string() returns server API type names
2647        let api_cases = vec![
2648            (FileType::Image, "image"),
2649            (FileType::LidarPcd, "lidar.pcd"),
2650            (FileType::LidarDepth, "lidar.depth"),
2651            (FileType::LidarReflect, "lidar.reflect"),
2652            (FileType::RadarPcd, "radar.pcd"),
2653            (FileType::RadarCube, "radar.png"),
2654        ];
2655
2656        // file_extension() returns file extensions for saving
2657        let ext_cases = vec![
2658            (FileType::Image, "jpg"),
2659            (FileType::LidarPcd, "lidar.pcd"),
2660            (FileType::LidarDepth, "lidar.png"),
2661            (FileType::LidarReflect, "lidar.jpg"),
2662            (FileType::RadarPcd, "radar.pcd"),
2663            (FileType::RadarCube, "radar.png"),
2664        ];
2665
2666        // Test: Display → to_string() returns server API names
2667        for (file_type, expected_str) in &api_cases {
2668            assert_eq!(file_type.to_string(), *expected_str);
2669        }
2670
2671        // Test: file_extension() returns correct extensions
2672        for (file_type, expected_ext) in &ext_cases {
2673            assert_eq!(file_type.file_extension(), *expected_ext);
2674        }
2675
2676        // Test: try_from() string parsing (accepts multiple aliases)
2677        assert_eq!(
2678            FileType::try_from("lidar.depth").unwrap(),
2679            FileType::LidarDepth
2680        );
2681        assert_eq!(
2682            FileType::try_from("lidar.png").unwrap(),
2683            FileType::LidarDepth
2684        );
2685        assert_eq!(
2686            FileType::try_from("depth.png").unwrap(),
2687            FileType::LidarDepth
2688        );
2689        assert_eq!(
2690            FileType::try_from("lidar.reflect").unwrap(),
2691            FileType::LidarReflect
2692        );
2693        assert_eq!(
2694            FileType::try_from("lidar.jpg").unwrap(),
2695            FileType::LidarReflect
2696        );
2697        assert_eq!(
2698            FileType::try_from("lidar.jpeg").unwrap(),
2699            FileType::LidarReflect
2700        );
2701
2702        // Test: Invalid input
2703        assert!(FileType::try_from("invalid").is_err());
2704
2705        // Test: Round-trip (Display → try_from)
2706        for (file_type, _) in &api_cases {
2707            let s = file_type.to_string();
2708            let parsed = FileType::try_from(s.as_str()).unwrap();
2709            assert_eq!(parsed, *file_type);
2710        }
2711    }
2712
2713    // ==== AnnotationType Conversion Tests ====
2714    #[test]
2715    fn test_annotation_type_conversions() {
2716        let cases = vec![
2717            (AnnotationType::Box2d, "box2d"),
2718            (AnnotationType::Box3d, "box3d"),
2719            (AnnotationType::Polygon, "polygon"),
2720            (AnnotationType::Mask, "mask"),
2721        ];
2722
2723        // Test: Display → to_string()
2724        for (ann_type, expected_str) in &cases {
2725            assert_eq!(ann_type.to_string(), *expected_str);
2726        }
2727
2728        // Test: try_from() string parsing
2729        assert_eq!(
2730            AnnotationType::try_from("box2d").unwrap(),
2731            AnnotationType::Box2d
2732        );
2733        assert_eq!(
2734            AnnotationType::try_from("box3d").unwrap(),
2735            AnnotationType::Box3d
2736        );
2737        assert_eq!(
2738            AnnotationType::try_from("polygon").unwrap(),
2739            AnnotationType::Polygon
2740        );
2741        // "mask" maps to Polygon for backward compat
2742        assert_eq!(
2743            AnnotationType::try_from("mask").unwrap(),
2744            AnnotationType::Polygon
2745        );
2746        // "raster" maps to Mask
2747        assert_eq!(
2748            AnnotationType::try_from("raster").unwrap(),
2749            AnnotationType::Mask
2750        );
2751
2752        // Test: From<String> (backward compatibility)
2753        assert_eq!(
2754            AnnotationType::from("box2d".to_string()),
2755            AnnotationType::Box2d
2756        );
2757        assert_eq!(
2758            AnnotationType::from("box3d".to_string()),
2759            AnnotationType::Box3d
2760        );
2761        assert_eq!(
2762            AnnotationType::from("polygon".to_string()),
2763            AnnotationType::Polygon
2764        );
2765        // "mask" string maps to Polygon for backward compat
2766        assert_eq!(
2767            AnnotationType::from("mask".to_string()),
2768            AnnotationType::Polygon
2769        );
2770
2771        // Invalid defaults to Box2d for backward compatibility
2772        assert_eq!(
2773            AnnotationType::from("invalid".to_string()),
2774            AnnotationType::Box2d
2775        );
2776
2777        // Test: Invalid input
2778        assert!(AnnotationType::try_from("invalid").is_err());
2779
2780        // Test: Round-trip (Display → try_from)
2781        // Note: Polygon round-trips ("polygon" → Polygon), but Mask does not
2782        // because "mask" → Polygon (backward compat). Mask displays as "mask"
2783        // but parses to Polygon.
2784        assert_eq!(
2785            AnnotationType::try_from(AnnotationType::Box2d.to_string().as_str()).unwrap(),
2786            AnnotationType::Box2d
2787        );
2788        assert_eq!(
2789            AnnotationType::try_from(AnnotationType::Box3d.to_string().as_str()).unwrap(),
2790            AnnotationType::Box3d
2791        );
2792        assert_eq!(
2793            AnnotationType::try_from(AnnotationType::Polygon.to_string().as_str()).unwrap(),
2794            AnnotationType::Polygon
2795        );
2796    }
2797
2798    // ==== Pure Function: extract_sample_name Tests ====
2799    #[test]
2800    fn test_extract_sample_name_with_extension_and_camera() {
2801        assert_eq!(extract_sample_name("scene_001.camera.jpg"), "scene_001");
2802    }
2803
2804    #[test]
2805    fn test_extract_sample_name_multiple_dots() {
2806        assert_eq!(extract_sample_name("image.v2.camera.png"), "image.v2");
2807    }
2808
2809    #[test]
2810    fn test_extract_sample_name_extension_only() {
2811        assert_eq!(extract_sample_name("test.jpg"), "test");
2812    }
2813
2814    #[test]
2815    fn test_extract_sample_name_no_extension() {
2816        assert_eq!(extract_sample_name("test"), "test");
2817    }
2818
2819    #[test]
2820    fn test_extract_sample_name_edge_case_dot_prefix() {
2821        assert_eq!(extract_sample_name(".jpg"), ".jpg");
2822    }
2823
2824    // ==== File Resolution Tests ====
2825    #[test]
2826    fn test_resolve_file_image_type_returns_none() {
2827        // Image type uses image_url field, not files array
2828        let files = vec![];
2829        let result = resolve_file(&FileType::Image, &files);
2830        assert!(result.is_none());
2831    }
2832
2833    #[test]
2834    fn test_resolve_file_lidar_pcd() {
2835        let files = vec![
2836            SampleFile::with_url(
2837                "lidar.pcd".to_string(),
2838                "https://example.com/file.pcd".to_string(),
2839            ),
2840            SampleFile::with_url(
2841                "radar.pcd".to_string(),
2842                "https://example.com/radar.pcd".to_string(),
2843            ),
2844        ];
2845        let result = resolve_file(&FileType::LidarPcd, &files);
2846        assert!(result.is_some());
2847        assert_eq!(result.unwrap().url(), Some("https://example.com/file.pcd"));
2848    }
2849
2850    #[test]
2851    fn test_resolve_file_not_found() {
2852        let files = vec![SampleFile::with_url(
2853            "lidar.pcd".to_string(),
2854            "https://example.com/file.pcd".to_string(),
2855        )];
2856        // Requesting radar.pcd which doesn't exist in files
2857        let result = resolve_file(&FileType::RadarPcd, &files);
2858        assert!(result.is_none());
2859    }
2860
2861    #[test]
2862    fn test_resolve_file_lidar_depth() {
2863        // Server returns "lidar.depth" for LiDAR depth data
2864        let files = vec![SampleFile::with_url(
2865            "lidar.depth".to_string(),
2866            "https://example.com/depth.png".to_string(),
2867        )];
2868        let result = resolve_file(&FileType::LidarDepth, &files);
2869        assert!(result.is_some());
2870        assert_eq!(result.unwrap().url(), Some("https://example.com/depth.png"));
2871    }
2872
2873    #[test]
2874    fn test_resolve_file_lidar_reflect() {
2875        // Server returns "lidar.reflect" for LiDAR reflectance data
2876        let files = vec![SampleFile::with_url(
2877            "lidar.reflect".to_string(),
2878            "https://example.com/reflect.png".to_string(),
2879        )];
2880        let result = resolve_file(&FileType::LidarReflect, &files);
2881        assert!(result.is_some());
2882        assert_eq!(
2883            result.unwrap().url(),
2884            Some("https://example.com/reflect.png")
2885        );
2886    }
2887
2888    #[test]
2889    fn test_resolve_file_radar_cube() {
2890        // Server returns "radar.png" or "cube" for radar cube data
2891        let files = vec![SampleFile::with_url(
2892            "radar.png".to_string(),
2893            "https://example.com/radar.png".to_string(),
2894        )];
2895        let result = resolve_file(&FileType::RadarCube, &files);
2896        assert!(result.is_some());
2897        assert_eq!(result.unwrap().url(), Some("https://example.com/radar.png"));
2898    }
2899
2900    #[test]
2901    fn test_resolve_file_with_inline_data() {
2902        // Legacy datasets may have inline data instead of URLs
2903        let files = vec![SampleFile::with_data(
2904            "radar.pcd".to_string(),
2905            "SGVsbG8gV29ybGQ=".to_string(), // base64 "Hello World"
2906        )];
2907        let result = resolve_file(&FileType::RadarPcd, &files);
2908        assert!(result.is_some());
2909        let file = result.unwrap();
2910        assert!(file.url().is_none());
2911        assert_eq!(file.data(), Some("SGVsbG8gV29ybGQ="));
2912    }
2913
2914    #[test]
2915    fn test_convert_annotations_map_to_vec_with_bbox() {
2916        let mut map = HashMap::new();
2917        let bbox_ann = Annotation::new();
2918        map.insert("bbox".to_string(), vec![bbox_ann.clone()]);
2919
2920        let annotations = convert_annotations_map_to_vec(map);
2921        assert_eq!(annotations.len(), 1);
2922    }
2923
2924    #[test]
2925    fn test_convert_annotations_map_to_vec_all_types() {
2926        let mut map = HashMap::new();
2927        map.insert("bbox".to_string(), vec![Annotation::new()]);
2928        map.insert("box3d".to_string(), vec![Annotation::new()]);
2929        map.insert("mask".to_string(), vec![Annotation::new()]);
2930
2931        let annotations = convert_annotations_map_to_vec(map);
2932        assert_eq!(annotations.len(), 3);
2933    }
2934
2935    #[test]
2936    fn test_convert_annotations_map_to_vec_empty() {
2937        let map = HashMap::new();
2938        let annotations = convert_annotations_map_to_vec(map);
2939        assert_eq!(annotations.len(), 0);
2940    }
2941
2942    #[test]
2943    fn test_convert_annotations_map_to_vec_unknown_type_ignored() {
2944        let mut map = HashMap::new();
2945        map.insert("unknown".to_string(), vec![Annotation::new()]);
2946
2947        let annotations = convert_annotations_map_to_vec(map);
2948        // Unknown types are ignored
2949        assert_eq!(annotations.len(), 0);
2950    }
2951
2952    // ==== Annotation Field Mapping Tests ====
2953    #[test]
2954    fn test_annotation_group_field_name() {
2955        assert_eq!(annotation_group_field_name(), "group_name");
2956    }
2957
2958    #[test]
2959    fn test_annotation_object_id_field_name() {
2960        assert_eq!(annotation_object_id_field_name(), "object_reference");
2961    }
2962
2963    #[test]
2964    fn test_annotation_object_id_alias() {
2965        assert_eq!(annotation_object_id_alias(), "object_id");
2966    }
2967
2968    #[test]
2969    fn test_validate_annotation_field_names_success() {
2970        let json = r#"{"group_name":"train","object_reference":"obj1"}"#;
2971        assert!(validate_annotation_field_names(json, true, true).is_ok());
2972    }
2973
2974    #[test]
2975    fn test_validate_annotation_field_names_missing_group() {
2976        let json = r#"{"object_reference":"obj1"}"#;
2977        let result = validate_annotation_field_names(json, true, false);
2978        assert!(result.is_err());
2979        assert!(result.unwrap_err().contains("group_name"));
2980    }
2981
2982    #[test]
2983    fn test_validate_annotation_field_names_missing_object_ref() {
2984        let json = r#"{"group_name":"train"}"#;
2985        let result = validate_annotation_field_names(json, false, true);
2986        assert!(result.is_err());
2987        assert!(result.unwrap_err().contains("object_reference"));
2988    }
2989
2990    #[test]
2991    fn test_annotation_serialization_field_names() {
2992        // Test that Annotation serializes with correct field names
2993        let mut ann = Annotation::new();
2994        ann.set_group(Some("train".to_string()));
2995        ann.set_object_id(Some("obj1".to_string()));
2996
2997        let json = serde_json::to_string(&ann).unwrap();
2998        // Verify JSON contains correct field names
2999        assert!(validate_annotation_field_names(&json, true, true).is_ok());
3000    }
3001
3002    // ==== GPS/IMU Validation Tests ====
3003    #[test]
3004    fn test_validate_gps_coordinates_valid() {
3005        assert!(validate_gps_coordinates(37.7749, -122.4194).is_ok()); // San Francisco
3006        assert!(validate_gps_coordinates(0.0, 0.0).is_ok()); // Null Island
3007        assert!(validate_gps_coordinates(90.0, 180.0).is_ok()); // Edge cases
3008        assert!(validate_gps_coordinates(-90.0, -180.0).is_ok()); // Edge cases
3009    }
3010
3011    #[test]
3012    fn test_validate_gps_coordinates_invalid_latitude() {
3013        let result = validate_gps_coordinates(91.0, 0.0);
3014        assert!(result.is_err());
3015        assert!(result.unwrap_err().contains("latitude out of range"));
3016
3017        let result = validate_gps_coordinates(-91.0, 0.0);
3018        assert!(result.is_err());
3019        assert!(result.unwrap_err().contains("latitude out of range"));
3020    }
3021
3022    #[test]
3023    fn test_validate_gps_coordinates_invalid_longitude() {
3024        let result = validate_gps_coordinates(0.0, 181.0);
3025        assert!(result.is_err());
3026        assert!(result.unwrap_err().contains("longitude out of range"));
3027
3028        let result = validate_gps_coordinates(0.0, -181.0);
3029        assert!(result.is_err());
3030        assert!(result.unwrap_err().contains("longitude out of range"));
3031    }
3032
3033    #[test]
3034    fn test_validate_gps_coordinates_non_finite() {
3035        let result = validate_gps_coordinates(f64::NAN, 0.0);
3036        assert!(result.is_err());
3037        assert!(result.unwrap_err().contains("not finite"));
3038
3039        let result = validate_gps_coordinates(0.0, f64::INFINITY);
3040        assert!(result.is_err());
3041        assert!(result.unwrap_err().contains("not finite"));
3042    }
3043
3044    #[test]
3045    fn test_validate_imu_orientation_valid() {
3046        assert!(validate_imu_orientation(0.0, 0.0, 0.0).is_ok());
3047        assert!(validate_imu_orientation(45.0, 30.0, 90.0).is_ok());
3048        assert!(validate_imu_orientation(180.0, 90.0, -180.0).is_ok()); // Edge cases
3049        assert!(validate_imu_orientation(-180.0, -90.0, 180.0).is_ok()); // Edge cases
3050    }
3051
3052    #[test]
3053    fn test_validate_imu_orientation_invalid_roll() {
3054        let result = validate_imu_orientation(181.0, 0.0, 0.0);
3055        assert!(result.is_err());
3056        assert!(result.unwrap_err().contains("roll out of range"));
3057
3058        let result = validate_imu_orientation(-181.0, 0.0, 0.0);
3059        assert!(result.is_err());
3060    }
3061
3062    #[test]
3063    fn test_validate_imu_orientation_invalid_pitch() {
3064        let result = validate_imu_orientation(0.0, 91.0, 0.0);
3065        assert!(result.is_err());
3066        assert!(result.unwrap_err().contains("pitch out of range"));
3067
3068        let result = validate_imu_orientation(0.0, -91.0, 0.0);
3069        assert!(result.is_err());
3070    }
3071
3072    #[test]
3073    fn test_validate_imu_orientation_non_finite() {
3074        let result = validate_imu_orientation(f64::NAN, 0.0, 0.0);
3075        assert!(result.is_err());
3076        assert!(result.unwrap_err().contains("not finite"));
3077
3078        let result = validate_imu_orientation(0.0, f64::INFINITY, 0.0);
3079        assert!(result.is_err());
3080
3081        let result = validate_imu_orientation(0.0, 0.0, f64::NEG_INFINITY);
3082        assert!(result.is_err());
3083    }
3084
3085    // ==== Polygon Unflattening Tests ====
3086    #[test]
3087    #[cfg(feature = "polars")]
3088    fn test_unflatten_polygon_coordinates_single_polygon() {
3089        let coords = vec![1.0, 2.0, 3.0, 4.0];
3090        let result = unflatten_polygon_coordinates(&coords);
3091
3092        assert_eq!(result.len(), 1);
3093        assert_eq!(result[0].len(), 2);
3094        assert_eq!(result[0][0], (1.0, 2.0));
3095        assert_eq!(result[0][1], (3.0, 4.0));
3096    }
3097
3098    #[test]
3099    #[cfg(feature = "polars")]
3100    fn test_unflatten_polygon_coordinates_multiple_polygons() {
3101        let coords = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0, 7.0, 8.0];
3102        let result = unflatten_polygon_coordinates(&coords);
3103
3104        assert_eq!(result.len(), 2);
3105        assert_eq!(result[0].len(), 2);
3106        assert_eq!(result[0][0], (1.0, 2.0));
3107        assert_eq!(result[0][1], (3.0, 4.0));
3108        assert_eq!(result[1].len(), 2);
3109        assert_eq!(result[1][0], (5.0, 6.0));
3110        assert_eq!(result[1][1], (7.0, 8.0));
3111    }
3112
3113    #[test]
3114    #[cfg(feature = "polars")]
3115    fn test_unflatten_polygon_coordinates_roundtrip() {
3116        // Test that unflatten correctly reconstructs from NaN-separated flat coords
3117        let flat = vec![1.0, 2.0, 3.0, 4.0, f32::NAN, 5.0, 6.0, 7.0, 8.0];
3118        let result = unflatten_polygon_coordinates(&flat);
3119
3120        let expected = vec![vec![(1.0, 2.0), (3.0, 4.0)], vec![(5.0, 6.0), (7.0, 8.0)]];
3121        assert_eq!(result, expected);
3122    }
3123
3124    // ==== Annotation Format Flattening Tests ====
3125    #[test]
3126    fn test_flatten_annotation_map_all_types() {
3127        use std::collections::HashMap;
3128
3129        let mut map = HashMap::new();
3130
3131        // Create test annotations
3132        let mut bbox_ann = Annotation::new();
3133        bbox_ann.set_label(Some("bbox_label".to_string()));
3134
3135        let mut box3d_ann = Annotation::new();
3136        box3d_ann.set_label(Some("box3d_label".to_string()));
3137
3138        let mut mask_ann = Annotation::new();
3139        mask_ann.set_label(Some("mask_label".to_string()));
3140
3141        map.insert("bbox".to_string(), vec![bbox_ann.clone()]);
3142        map.insert("box3d".to_string(), vec![box3d_ann.clone()]);
3143        map.insert("mask".to_string(), vec![mask_ann.clone()]);
3144
3145        let result = flatten_annotation_map(map);
3146
3147        assert_eq!(result.len(), 3);
3148        // Check ordering: bbox, box3d, mask
3149        assert_eq!(result[0].label(), Some(&"bbox_label".to_string()));
3150        assert_eq!(result[1].label(), Some(&"box3d_label".to_string()));
3151        assert_eq!(result[2].label(), Some(&"mask_label".to_string()));
3152    }
3153
3154    #[test]
3155    fn test_flatten_annotation_map_single_type() {
3156        use std::collections::HashMap;
3157
3158        let mut map = HashMap::new();
3159        let mut bbox_ann = Annotation::new();
3160        bbox_ann.set_label(Some("test".to_string()));
3161        map.insert("bbox".to_string(), vec![bbox_ann]);
3162
3163        let result = flatten_annotation_map(map);
3164
3165        assert_eq!(result.len(), 1);
3166        assert_eq!(result[0].label(), Some(&"test".to_string()));
3167    }
3168
3169    #[test]
3170    fn test_flatten_annotation_map_empty() {
3171        use std::collections::HashMap;
3172
3173        let map = HashMap::new();
3174        let result = flatten_annotation_map(map);
3175
3176        assert_eq!(result.len(), 0);
3177    }
3178
3179    #[test]
3180    fn test_flatten_annotation_map_deterministic_order() {
3181        use std::collections::HashMap;
3182
3183        let mut map = HashMap::new();
3184
3185        let mut bbox_ann = Annotation::new();
3186        bbox_ann.set_label(Some("bbox".to_string()));
3187
3188        let mut box3d_ann = Annotation::new();
3189        box3d_ann.set_label(Some("box3d".to_string()));
3190
3191        let mut mask_ann = Annotation::new();
3192        mask_ann.set_label(Some("mask".to_string()));
3193
3194        // Insert in reverse order to test deterministic ordering
3195        map.insert("mask".to_string(), vec![mask_ann]);
3196        map.insert("box3d".to_string(), vec![box3d_ann]);
3197        map.insert("bbox".to_string(), vec![bbox_ann]);
3198
3199        let result = flatten_annotation_map(map);
3200
3201        // Should be bbox, box3d, mask regardless of insertion order
3202        assert_eq!(result.len(), 3);
3203        assert_eq!(result[0].label(), Some(&"bbox".to_string()));
3204        assert_eq!(result[1].label(), Some(&"box3d".to_string()));
3205        assert_eq!(result[2].label(), Some(&"mask".to_string()));
3206    }
3207
3208    // ==== Box2d Tests ====
3209    #[test]
3210    fn test_box2d_construction_and_accessors() {
3211        // Test case 1: Basic construction with positive coordinates
3212        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3213        assert_eq!(
3214            (bbox.left(), bbox.top(), bbox.width(), bbox.height()),
3215            (10.0, 20.0, 100.0, 50.0)
3216        );
3217
3218        // Test case 2: Center calculations
3219        assert_eq!((bbox.cx(), bbox.cy()), (60.0, 45.0)); // 10+50, 20+25
3220
3221        // Test case 3: Zero origin
3222        let bbox = Box2d::new(0.0, 0.0, 640.0, 480.0);
3223        assert_eq!(
3224            (bbox.left(), bbox.top(), bbox.width(), bbox.height()),
3225            (0.0, 0.0, 640.0, 480.0)
3226        );
3227        assert_eq!((bbox.cx(), bbox.cy()), (320.0, 240.0));
3228    }
3229
3230    #[test]
3231    fn test_box2d_center_calculation() {
3232        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3233
3234        // Center = position + size/2
3235        assert_eq!(bbox.cx(), 60.0); // 10 + 100/2
3236        assert_eq!(bbox.cy(), 45.0); // 20 + 50/2
3237    }
3238
3239    #[test]
3240    fn test_box2d_zero_dimensions() {
3241        let bbox = Box2d::new(10.0, 20.0, 0.0, 0.0);
3242
3243        // When width/height are zero, center = position
3244        assert_eq!(bbox.cx(), 10.0);
3245        assert_eq!(bbox.cy(), 20.0);
3246    }
3247
3248    #[test]
3249    fn test_box2d_negative_dimensions() {
3250        let bbox = Box2d::new(100.0, 100.0, -50.0, -50.0);
3251
3252        // Negative dimensions create inverted boxes (valid edge case)
3253        assert_eq!(bbox.width(), -50.0);
3254        assert_eq!(bbox.height(), -50.0);
3255        assert_eq!(bbox.cx(), 75.0); // 100 + (-50)/2
3256        assert_eq!(bbox.cy(), 75.0); // 100 + (-50)/2
3257    }
3258
3259    // ==== Box3d Tests ====
3260    #[test]
3261    fn test_box3d_construction_and_accessors() {
3262        // Test case 1: Basic 3D construction
3263        let bbox = Box3d::new(1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
3264        assert_eq!((bbox.cx(), bbox.cy(), bbox.cz()), (1.0, 2.0, 3.0));
3265        assert_eq!(
3266            (bbox.width(), bbox.height(), bbox.length()),
3267            (4.0, 5.0, 6.0)
3268        );
3269
3270        // Test case 2: Corners calculation with offset center
3271        let bbox = Box3d::new(10.0, 20.0, 30.0, 4.0, 6.0, 8.0);
3272        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (8.0, 17.0, 26.0)); // 10-2, 20-3, 30-4
3273
3274        // Test case 3: Center at origin with negative corners
3275        let bbox = Box3d::new(0.0, 0.0, 0.0, 2.0, 3.0, 4.0);
3276        assert_eq!((bbox.cx(), bbox.cy(), bbox.cz()), (0.0, 0.0, 0.0));
3277        assert_eq!(
3278            (bbox.width(), bbox.height(), bbox.length()),
3279            (2.0, 3.0, 4.0)
3280        );
3281        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (-1.0, -1.5, -2.0));
3282    }
3283
3284    #[test]
3285    fn test_box3d_center_calculation() {
3286        let bbox = Box3d::new(10.0, 20.0, 30.0, 100.0, 50.0, 40.0);
3287
3288        // Center values as specified in constructor
3289        assert_eq!(bbox.cx(), 10.0);
3290        assert_eq!(bbox.cy(), 20.0);
3291        assert_eq!(bbox.cz(), 30.0);
3292    }
3293
3294    #[test]
3295    fn test_box3d_zero_dimensions() {
3296        let bbox = Box3d::new(5.0, 10.0, 15.0, 0.0, 0.0, 0.0);
3297
3298        // When all dimensions are zero, corners = center
3299        assert_eq!(bbox.cx(), 5.0);
3300        assert_eq!(bbox.cy(), 10.0);
3301        assert_eq!(bbox.cz(), 15.0);
3302        assert_eq!((bbox.left(), bbox.top(), bbox.front()), (5.0, 10.0, 15.0));
3303    }
3304
3305    #[test]
3306    fn test_box3d_negative_dimensions() {
3307        let bbox = Box3d::new(100.0, 100.0, 100.0, -50.0, -50.0, -50.0);
3308
3309        // Negative dimensions create inverted boxes
3310        assert_eq!(bbox.width(), -50.0);
3311        assert_eq!(bbox.height(), -50.0);
3312        assert_eq!(bbox.length(), -50.0);
3313        assert_eq!(
3314            (bbox.left(), bbox.top(), bbox.front()),
3315            (125.0, 125.0, 125.0)
3316        );
3317    }
3318
3319    // ==== Polygon Tests ====
3320    #[test]
3321    fn test_polygon_creation_and_deserialization() {
3322        // Test case 1: Direct construction
3323        let rings = vec![vec![(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]];
3324        let polygon = Polygon::new(rings.clone());
3325        assert_eq!(polygon.rings, rings);
3326
3327        // Test case 2: Deserialization from legacy format (field name "polygon")
3328        let legacy = serde_json::json!({
3329            "polygon": {
3330                "polygon": [[
3331                    [0.0_f32, 0.0_f32],
3332                    [1.0_f32, 0.0_f32],
3333                    [1.0_f32, 1.0_f32]
3334                ]]
3335            }
3336        });
3337
3338        #[derive(serde::Deserialize)]
3339        struct Wrapper {
3340            polygon: Polygon,
3341        }
3342
3343        let parsed: Wrapper = serde_json::from_value(legacy).unwrap();
3344        assert_eq!(parsed.polygon.rings.len(), 1);
3345        assert_eq!(parsed.polygon.rings[0].len(), 3);
3346    }
3347
3348    // ==== Sample Tests ====
3349    #[test]
3350    fn test_sample_construction_and_accessors() {
3351        // Test case 1: New sample is empty
3352        let sample = Sample::new();
3353        assert_eq!(sample.id(), None);
3354        assert_eq!(sample.image_name(), None);
3355        assert_eq!(sample.width(), None);
3356        assert_eq!(sample.height(), None);
3357
3358        // Test case 2: Sample with populated fields
3359        let mut sample = Sample::new();
3360        sample.image_name = Some("test.jpg".to_string());
3361        sample.width = Some(1920);
3362        sample.height = Some(1080);
3363        sample.group = Some("group1".to_string());
3364
3365        assert_eq!(sample.image_name(), Some("test.jpg"));
3366        assert_eq!(sample.width(), Some(1920));
3367        assert_eq!(sample.height(), Some(1080));
3368        assert_eq!(sample.group(), Some(&"group1".to_string()));
3369    }
3370
3371    #[test]
3372    fn test_sample_name_extraction_from_image_name() {
3373        let mut sample = Sample::new();
3374
3375        // Test case 1: Basic image name with extension
3376        sample.image_name = Some("test_image.jpg".to_string());
3377        assert_eq!(sample.name(), Some("test_image".to_string()));
3378
3379        // Test case 2: Image name with .camera suffix
3380        sample.image_name = Some("test_image.camera.jpg".to_string());
3381        assert_eq!(sample.name(), Some("test_image".to_string()));
3382
3383        // Test case 3: Image name without extension
3384        sample.image_name = Some("test_image".to_string());
3385        assert_eq!(sample.name(), Some("test_image".to_string()));
3386    }
3387
3388    // ==== Annotation Tests ====
3389    #[test]
3390    fn test_annotation_construction_and_setters() {
3391        // Test case 1: New annotation is empty
3392        let ann = Annotation::new();
3393        assert_eq!(ann.sample_id(), None);
3394        assert_eq!(ann.label(), None);
3395        assert_eq!(ann.box2d(), None);
3396        assert_eq!(ann.box3d(), None);
3397        assert_eq!(ann.polygon(), None);
3398
3399        // Test case 2: Setting annotation fields
3400        let mut ann = Annotation::new();
3401        ann.set_label(Some("car".to_string()));
3402        assert_eq!(ann.label(), Some(&"car".to_string()));
3403
3404        ann.set_label_index(Some(42));
3405        assert_eq!(ann.label_index(), Some(42));
3406
3407        // Test case 3: Setting bounding box
3408        let bbox = Box2d::new(10.0, 20.0, 100.0, 50.0);
3409        ann.set_box2d(Some(bbox.clone()));
3410        assert!(ann.box2d().is_some());
3411        assert_eq!(ann.box2d().unwrap().left(), 10.0);
3412    }
3413
3414    // ==== SampleFile Tests ====
3415    #[test]
3416    fn test_sample_file_with_url_and_filename() {
3417        // Test case 1: SampleFile with URL
3418        let file = SampleFile::with_url(
3419            "lidar.pcd".to_string(),
3420            "https://example.com/file.pcd".to_string(),
3421        );
3422        assert_eq!(file.file_type(), "lidar.pcd");
3423        assert_eq!(file.url(), Some("https://example.com/file.pcd"));
3424        assert_eq!(file.filename(), None);
3425
3426        // Test case 2: SampleFile with local filename
3427        let file = SampleFile::with_filename("image".to_string(), "test.jpg".to_string());
3428        assert_eq!(file.file_type(), "image");
3429        assert_eq!(file.filename(), Some("test.jpg"));
3430        assert_eq!(file.url(), None);
3431    }
3432
3433    // ==== Sample GPS/IMU Deserialization Tests ====
3434    #[test]
3435    fn test_sample_deserializes_gps_imu_from_sensors() {
3436        use serde_json::json;
3437
3438        // Test: GPS and IMU data in sensors array is extracted to location field
3439        let sample_json = json!({
3440            "id": 123,
3441            "image_name": "test.jpg",
3442            "sensors": [
3443                {"gps": {"lat": 37.7749, "lon": -122.4194}},
3444                {"imu": {"roll": 1.5, "pitch": 2.5, "yaw": 3.5}},
3445                {"radar.pcd": "https://example.com/radar.pcd"}
3446            ]
3447        });
3448
3449        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3450
3451        // Verify location was extracted
3452        assert!(sample.location.is_some());
3453        let location = sample.location.as_ref().unwrap();
3454
3455        // Verify GPS data
3456        assert!(location.gps.is_some());
3457        let gps = location.gps.as_ref().unwrap();
3458        assert!((gps.lat - 37.7749).abs() < 0.0001);
3459        assert!((gps.lon - (-122.4194)).abs() < 0.0001);
3460
3461        // Verify IMU data
3462        assert!(location.imu.is_some());
3463        let imu = location.imu.as_ref().unwrap();
3464        assert!((imu.roll - 1.5).abs() < 0.0001);
3465        assert!((imu.pitch - 2.5).abs() < 0.0001);
3466        assert!((imu.yaw - 3.5).abs() < 0.0001);
3467
3468        // Verify files were also extracted (non-GPS/IMU entries)
3469        assert_eq!(sample.files.len(), 1);
3470        assert_eq!(sample.files[0].file_type(), "radar.pcd");
3471        assert_eq!(sample.files[0].url(), Some("https://example.com/radar.pcd"));
3472    }
3473
3474    #[test]
3475    fn test_sample_deserializes_gps_only() {
3476        use serde_json::json;
3477
3478        // Test: Only GPS data in sensors
3479        let sample_json = json!({
3480            "id": 456,
3481            "sensors": [
3482                {"gps": {"lat": 40.7128, "lon": -74.0060}}
3483            ]
3484        });
3485
3486        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3487
3488        assert!(sample.location.is_some());
3489        let location = sample.location.as_ref().unwrap();
3490
3491        assert!(location.gps.is_some());
3492        assert!(location.imu.is_none());
3493
3494        let gps = location.gps.as_ref().unwrap();
3495        assert!((gps.lat - 40.7128).abs() < 0.0001);
3496        assert!((gps.lon - (-74.0060)).abs() < 0.0001);
3497    }
3498
3499    #[test]
3500    fn test_sample_deserializes_without_location() {
3501        use serde_json::json;
3502
3503        // Test: Sample with only file sensors (no GPS/IMU)
3504        let sample_json = json!({
3505            "id": 789,
3506            "sensors": [
3507                {"radar.pcd": "https://example.com/radar.pcd"},
3508                {"lidar.pcd": "https://example.com/lidar.pcd"}
3509            ]
3510        });
3511
3512        let sample: Sample = serde_json::from_value(sample_json).unwrap();
3513
3514        // No location data
3515        assert!(sample.location.is_none());
3516
3517        // Both files extracted
3518        assert_eq!(sample.files.len(), 2);
3519    }
3520
3521    // ==== Label Tests ====
3522    #[test]
3523    fn test_label_deserialization_and_accessors() {
3524        use serde_json::json;
3525
3526        // Test case 1: Label deserialization and accessors
3527        let label_json = json!({
3528            "id": 123,
3529            "dataset_id": 456,
3530            "index": 5,
3531            "name": "car"
3532        });
3533
3534        let label: Label = serde_json::from_value(label_json).unwrap();
3535        assert_eq!(label.id(), 123);
3536        assert_eq!(label.index(), 5);
3537        assert_eq!(label.name(), "car");
3538        assert_eq!(label.to_string(), "car");
3539        assert_eq!(format!("{}", label), "car");
3540
3541        // Test case 2: Different label
3542        let label_json = json!({
3543            "id": 1,
3544            "dataset_id": 100,
3545            "index": 0,
3546            "name": "person"
3547        });
3548
3549        let label: Label = serde_json::from_value(label_json).unwrap();
3550        assert_eq!(format!("{}", label), "person");
3551    }
3552
3553    // ==== Annotation Serialization Tests ====
3554    #[test]
3555    fn test_annotation_serialization_with_mask_and_box() {
3556        let polygon = vec![vec![
3557            (0.0_f32, 0.0_f32),
3558            (1.0_f32, 0.0_f32),
3559            (1.0_f32, 1.0_f32),
3560        ]];
3561
3562        let mut annotation = Annotation::new();
3563        annotation.set_label(Some("test".to_string()));
3564        annotation.set_box2d(Some(Box2d::new(10.0, 20.0, 30.0, 40.0)));
3565        annotation.set_polygon(Some(Polygon::new(polygon)));
3566
3567        let mut sample = Sample::new();
3568        sample.annotations.push(annotation);
3569
3570        let json = serde_json::to_value(&sample).unwrap();
3571        let annotations = json
3572            .get("annotations")
3573            .and_then(|value| value.as_array())
3574            .expect("annotations serialized as array");
3575        assert_eq!(annotations.len(), 1);
3576
3577        let annotation_json = annotations[0].as_object().expect("annotation object");
3578        assert!(annotation_json.contains_key("box2d"));
3579        // samples.populate2 expects the polygon geometry under the "mask" key
3580        // (historical: struct was renamed Rust-side from Mask to Polygon but
3581        // the wire contract did not follow). Emitting "polygon" here is what
3582        // caused polygons to be silently dropped on upload.
3583        assert!(
3584            annotation_json.contains_key("mask"),
3585            "Annotation must serialise polygon under 'mask' key for samples.populate2; got keys: {:?}",
3586            annotation_json.keys().collect::<Vec<_>>()
3587        );
3588        assert!(!annotation_json.contains_key("polygon"));
3589        assert!(!annotation_json.contains_key("x"));
3590        assert!(
3591            annotation_json
3592                .get("mask")
3593                .and_then(|value| value.as_array())
3594                .is_some()
3595        );
3596    }
3597
3598    #[test]
3599    fn test_frame_number_negative_one_deserializes_as_none() {
3600        // Server returns frame_number: -1 for non-sequence samples
3601        // This should deserialize as None for the client
3602        let json = r#"{
3603            "uuid": "test-uuid",
3604            "frame_number": -1
3605        }"#;
3606
3607        let sample: Sample = serde_json::from_str(json).unwrap();
3608        assert_eq!(sample.frame_number, None);
3609    }
3610
3611    #[test]
3612    fn test_frame_number_positive_value_deserializes_correctly() {
3613        // Valid frame numbers should deserialize normally
3614        let json = r#"{
3615            "uuid": "test-uuid",
3616            "frame_number": 5
3617        }"#;
3618
3619        let sample: Sample = serde_json::from_str(json).unwrap();
3620        assert_eq!(sample.frame_number, Some(5));
3621    }
3622
3623    #[test]
3624    fn test_frame_number_null_deserializes_as_none() {
3625        // Explicit null should also be None
3626        let json = r#"{
3627            "uuid": "test-uuid",
3628            "frame_number": null
3629        }"#;
3630
3631        let sample: Sample = serde_json::from_str(json).unwrap();
3632        assert_eq!(sample.frame_number, None);
3633    }
3634
3635    #[test]
3636    fn test_frame_number_missing_deserializes_as_none() {
3637        // Missing field should be None
3638        let json = r#"{
3639            "uuid": "test-uuid"
3640        }"#;
3641
3642        let sample: Sample = serde_json::from_str(json).unwrap();
3643        assert_eq!(sample.frame_number, None);
3644    }
3645
3646    // =========================================================================
3647    // samples_dataframe tests - CRITICAL: Verify group preservation
3648    // =========================================================================
3649
3650    #[cfg(feature = "polars")]
3651    #[test]
3652    fn test_samples_dataframe_preserves_group_for_samples_without_annotations() {
3653        use polars::prelude::*;
3654
3655        // Create sample WITH annotations
3656        let mut sample_with_ann = Sample::new();
3657        sample_with_ann.image_name = Some("annotated.jpg".to_string());
3658        sample_with_ann.group = Some("train".to_string());
3659        let mut annotation = Annotation::new();
3660        annotation.set_label(Some("car".to_string()));
3661        annotation.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
3662        annotation.set_name(Some("annotated".to_string()));
3663        sample_with_ann.annotations = vec![annotation];
3664
3665        // Create sample WITHOUT annotations (this is the critical case)
3666        let mut sample_no_ann = Sample::new();
3667        sample_no_ann.image_name = Some("unannotated.jpg".to_string());
3668        sample_no_ann.group = Some("val".to_string()); // Should be preserved!
3669        sample_no_ann.annotations = vec![]; // Empty annotations
3670
3671        let samples = vec![sample_with_ann, sample_no_ann];
3672
3673        // Convert to DataFrame
3674        let df = samples_dataframe(&samples).expect("Failed to create DataFrame");
3675
3676        // Verify we have 2 rows (one per sample)
3677        assert_eq!(df.height(), 2, "Expected 2 rows (one per sample)");
3678
3679        // Get the group column
3680        let groups_col = df.column("group").expect("group column should exist");
3681        let groups_cast = groups_col.cast(&DataType::String).expect("cast to string");
3682        let groups = groups_cast.str().expect("as str");
3683
3684        // Find the row for "unannotated" and verify it has group "val"
3685        let names_col = df.column("name").expect("name column should exist");
3686        let names_cast = names_col.cast(&DataType::String).expect("cast to string");
3687        let names = names_cast.str().expect("as str");
3688
3689        let mut found_unannotated = false;
3690        for idx in 0..df.height() {
3691            if let Some(name) = names.get(idx)
3692                && name == "unannotated"
3693            {
3694                found_unannotated = true;
3695                let group = groups.get(idx);
3696                assert_eq!(
3697                    group,
3698                    Some("val"),
3699                    "CRITICAL: Sample 'unannotated' without annotations must have group 'val'"
3700                );
3701            }
3702        }
3703
3704        assert!(
3705            found_unannotated,
3706            "Did not find 'unannotated' sample in DataFrame - \
3707             this means samples without annotations are not being included"
3708        );
3709    }
3710
3711    #[cfg(feature = "polars")]
3712    #[test]
3713    fn test_samples_dataframe_includes_all_samples_even_without_annotations() {
3714        // Verify that samples without annotations still appear in the DataFrame
3715        // with null annotation fields but WITH their group field populated
3716
3717        let mut sample1 = Sample::new();
3718        sample1.image_name = Some("with_ann.jpg".to_string());
3719        sample1.group = Some("train".to_string());
3720        let mut ann = Annotation::new();
3721        ann.set_label(Some("person".to_string()));
3722        ann.set_box2d(Some(Box2d::new(0.0, 0.0, 0.5, 0.5)));
3723        ann.set_name(Some("with_ann".to_string()));
3724        sample1.annotations = vec![ann];
3725
3726        let mut sample2 = Sample::new();
3727        sample2.image_name = Some("no_ann_train.jpg".to_string());
3728        sample2.group = Some("train".to_string());
3729        sample2.annotations = vec![];
3730
3731        let mut sample3 = Sample::new();
3732        sample3.image_name = Some("no_ann_val.jpg".to_string());
3733        sample3.group = Some("val".to_string());
3734        sample3.annotations = vec![];
3735
3736        let samples = vec![sample1, sample2, sample3];
3737
3738        let df = samples_dataframe(&samples).expect("Failed to create DataFrame");
3739
3740        // We should have exactly 3 rows - one per sample
3741        assert_eq!(
3742            df.height(),
3743            3,
3744            "Expected 3 rows (samples without annotations should create one row each)"
3745        );
3746
3747        // Check that all groups are present
3748        let groups_col = df.column("group").expect("group column");
3749        let groups_cast = groups_col.cast(&polars::prelude::DataType::String).unwrap();
3750        let groups = groups_cast.str().unwrap();
3751
3752        let mut train_count = 0;
3753        let mut val_count = 0;
3754
3755        for idx in 0..df.height() {
3756            match groups.get(idx) {
3757                Some("train") => train_count += 1,
3758                Some("val") => val_count += 1,
3759                other => panic!(
3760                    "Unexpected group value at row {}: {:?}. \
3761                     All samples should have their group preserved.",
3762                    idx, other
3763                ),
3764            }
3765        }
3766
3767        assert_eq!(train_count, 2, "Expected 2 samples in 'train' group");
3768        assert_eq!(val_count, 1, "Expected 1 sample in 'val' group");
3769    }
3770
3771    #[cfg(feature = "polars")]
3772    #[test]
3773    fn test_samples_dataframe_group_is_not_null_for_samples_with_group() {
3774        // CRITICAL: Even when a sample has no annotations, if it has a group,
3775        // that group must NOT be null in the DataFrame
3776
3777        let mut sample = Sample::new();
3778        sample.image_name = Some("test.jpg".to_string());
3779        sample.group = Some("test_group".to_string());
3780        sample.annotations = vec![];
3781
3782        let df = samples_dataframe(&[sample]).expect("Failed to create DataFrame");
3783
3784        let groups_col = df.column("group").expect("group column");
3785
3786        // The group column should have NO nulls because our sample has a group
3787        assert_eq!(
3788            groups_col.null_count(),
3789            0,
3790            "Sample with group='test_group' but no annotations has NULL group in DataFrame. \
3791             This is a bug in samples_dataframe - group must be preserved!"
3792        );
3793    }
3794
3795    #[cfg(feature = "polars")]
3796    #[test]
3797    fn test_samples_dataframe_group_consistent_across_all_rows_for_same_image() {
3798        use polars::prelude::*;
3799
3800        // Test that when a sample has multiple annotations, ALL rows have
3801        // the same group value (not just the first one)
3802
3803        let mut sample = Sample::new();
3804        sample.image_name = Some("multi_ann.jpg".to_string());
3805        sample.group = Some("train".to_string());
3806
3807        // Add multiple annotations
3808        let mut ann1 = Annotation::new();
3809        ann1.set_label(Some("car".to_string()));
3810        ann1.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
3811        ann1.set_name(Some("multi_ann".to_string()));
3812
3813        let mut ann2 = Annotation::new();
3814        ann2.set_label(Some("truck".to_string()));
3815        ann2.set_box2d(Some(Box2d::new(0.5, 0.6, 0.2, 0.2)));
3816        ann2.set_name(Some("multi_ann".to_string()));
3817
3818        let mut ann3 = Annotation::new();
3819        ann3.set_label(Some("bus".to_string()));
3820        ann3.set_box2d(Some(Box2d::new(0.7, 0.8, 0.1, 0.1)));
3821        ann3.set_name(Some("multi_ann".to_string()));
3822
3823        sample.annotations = vec![ann1, ann2, ann3];
3824
3825        let df = samples_dataframe(&[sample]).expect("Failed to create DataFrame");
3826
3827        // Should have 3 rows (one per annotation)
3828        assert_eq!(df.height(), 3, "Expected 3 rows (one per annotation)");
3829
3830        // ALL rows should have the group "train" (not just the first one)
3831        let groups_col = df.column("group").expect("group column");
3832        let groups_cast = groups_col.cast(&DataType::String).expect("cast to string");
3833        let groups = groups_cast.str().expect("as str");
3834
3835        // No nulls allowed
3836        assert_eq!(groups_col.null_count(), 0, "No rows should have null group");
3837
3838        // All rows should have the same group
3839        for idx in 0..df.height() {
3840            let group = groups.get(idx);
3841            assert_eq!(
3842                group,
3843                Some("train"),
3844                "Row {} should have group 'train', got {:?}. \
3845                 All rows for the same image must have identical group values.",
3846                idx,
3847                group
3848            );
3849        }
3850    }
3851
3852    #[cfg(feature = "polars")]
3853    #[test]
3854    fn test_samples_dataframe_lvis_columns() {
3855        let mut ann = Annotation::new();
3856        ann.set_name(Some("test".to_string()));
3857        ann.set_label(Some("person".to_string()));
3858        ann.set_label_index(Some(1));
3859        ann.set_iscrowd(Some(false));
3860        ann.set_category_frequency(Some("f".to_string()));
3861
3862        let sample = Sample {
3863            image_name: Some("test.jpg".to_string()),
3864            width: Some(640),
3865            height: Some(480),
3866            annotations: vec![ann],
3867            neg_label_indices: Some(vec![5, 12]),
3868            not_exhaustive_label_indices: Some(vec![3]),
3869            ..Default::default()
3870        };
3871
3872        let df = samples_dataframe(&[sample]).unwrap();
3873
3874        // Verify LVIS columns are present (they have data)
3875        assert!(df.column("iscrowd").is_ok(), "iscrowd column missing");
3876        assert!(
3877            df.column("category_frequency").is_ok(),
3878            "category_frequency column missing"
3879        );
3880        assert!(
3881            df.column("neg_label_indices").is_ok(),
3882            "neg_label_indices column missing"
3883        );
3884        assert!(
3885            df.column("not_exhaustive_label_indices").is_ok(),
3886            "not_exhaustive_label_indices column missing"
3887        );
3888
3889        // All-null columns should be dropped (polygon, box2d, box3d, mask, scores, etc.)
3890        assert!(
3891            df.column("polygon").is_err(),
3892            "polygon column should be dropped (all null)"
3893        );
3894        assert!(
3895            df.column("box2d").is_err(),
3896            "box2d column should be dropped (all null)"
3897        );
3898    }
3899
3900    #[test]
3901    fn test_annotation_serialization_skips_lvis_fields() {
3902        let ann = Annotation::new();
3903        let json = serde_json::to_string(&ann).unwrap();
3904        assert!(
3905            !json.contains("iscrowd"),
3906            "iscrowd should be omitted when None"
3907        );
3908        assert!(
3909            !json.contains("category_frequency"),
3910            "category_frequency should be omitted when None"
3911        );
3912    }
3913
3914    #[test]
3915    fn test_sample_serialization_skips_lvis_fields() {
3916        let sample = Sample::new();
3917        let json = serde_json::to_string(&sample).unwrap();
3918        assert!(
3919            !json.contains("neg_label_indices"),
3920            "neg_label_indices should be omitted when None"
3921        );
3922        assert!(
3923            !json.contains("not_exhaustive_label_indices"),
3924            "not_exhaustive_label_indices should be omitted when None"
3925        );
3926    }
3927
3928    #[test]
3929    fn test_annotation_score_fields() {
3930        let mut ann = Annotation::default();
3931        assert!(ann.box2d_score.is_none());
3932        assert!(ann.polygon_score.is_none());
3933        assert!(ann.mask_score.is_none());
3934        ann.box2d_score = Some(0.95);
3935        ann.polygon_score = Some(0.87);
3936        ann.mask_score = Some(0.42);
3937        assert_eq!(ann.box2d_score, Some(0.95));
3938        assert_eq!(ann.polygon_score, Some(0.87));
3939        assert_eq!(ann.mask_score, Some(0.42));
3940    }
3941
3942    #[test]
3943    fn test_timing_struct() {
3944        let timing = Timing {
3945            load: Some(1_000_000),
3946            preprocess: Some(2_000_000),
3947            inference: Some(50_000_000),
3948            decode: Some(3_000_000),
3949        };
3950        assert_eq!(timing.inference, Some(50_000_000));
3951
3952        let default = Timing::default();
3953        assert!(default.load.is_none());
3954    }
3955
3956    #[test]
3957    fn test_sample_timing() {
3958        let mut sample = Sample::default();
3959        assert!(sample.timing.is_none());
3960        sample.timing = Some(Timing {
3961            load: Some(1_000_000),
3962            ..Default::default()
3963        });
3964        assert!(sample.timing.is_some());
3965    }
3966
3967    // =========================================================================
3968    // samples_dataframe 2026.04 schema tests
3969    // =========================================================================
3970
3971    #[cfg(feature = "polars")]
3972    #[test]
3973    fn test_samples_dataframe_polygon_column() {
3974        let mut ann = Annotation::new();
3975        ann.set_name(Some("test".to_string()));
3976        ann.set_polygon(Some(Polygon::new(vec![vec![
3977            (0.1, 0.2),
3978            (0.3, 0.4),
3979            (0.5, 0.6),
3980        ]])));
3981
3982        let sample = Sample {
3983            image_name: Some("test.jpg".to_string()),
3984            annotations: vec![ann],
3985            ..Default::default()
3986        };
3987
3988        let df = samples_dataframe(&[sample]).unwrap();
3989
3990        // 2026.04: polygon column exists with nested List(List(Float32))
3991        assert!(df.column("polygon").is_ok(), "Should have polygon column");
3992
3993        // The old "mask" column with float data should NOT exist (no MaskData set)
3994        // If mask column exists, it would be Binary type from MaskData, not floats
3995        if let Ok(mask_col) = df.column("mask") {
3996            // If it exists, it must be Binary type, not List(Float32)
3997            assert_eq!(
3998                mask_col.dtype(),
3999                &polars::prelude::DataType::Binary,
4000                "mask column must be Binary type (PNG bytes), not float list"
4001            );
4002        }
4003    }
4004
4005    #[cfg(feature = "polars")]
4006    #[test]
4007    fn test_samples_dataframe_column_presence_drops_all_null() {
4008        // Sample with only a name, no annotations
4009        let sample = Sample {
4010            image_name: Some("test.jpg".to_string()),
4011            ..Default::default()
4012        };
4013
4014        let df = samples_dataframe(&[sample]).unwrap();
4015
4016        // name is always present
4017        assert!(df.column("name").is_ok(), "name column must always exist");
4018
4019        // All-null columns should be dropped
4020        assert!(
4021            df.column("polygon").is_err(),
4022            "All-null polygon should be dropped"
4023        );
4024        assert!(
4025            df.column("box2d").is_err(),
4026            "All-null box2d should be dropped"
4027        );
4028        assert!(
4029            df.column("box3d").is_err(),
4030            "All-null box3d should be dropped"
4031        );
4032        assert!(
4033            df.column("mask").is_err(),
4034            "All-null mask should be dropped"
4035        );
4036        assert!(
4037            df.column("box2d_score").is_err(),
4038            "All-null score columns should be dropped"
4039        );
4040        assert!(
4041            df.column("timing").is_err(),
4042            "All-null timing should be dropped"
4043        );
4044    }
4045
4046    #[cfg(feature = "polars")]
4047    #[test]
4048    fn test_samples_dataframe_size_column() {
4049        // Samples with width/height should produce the size column
4050        let sample1 = Sample {
4051            image_name: Some("img1.jpg".to_string()),
4052            width: Some(1920),
4053            height: Some(1080),
4054            ..Default::default()
4055        };
4056        let sample2 = Sample {
4057            image_name: Some("img2.jpg".to_string()),
4058            width: Some(640),
4059            height: Some(480),
4060            ..Default::default()
4061        };
4062
4063        let df = samples_dataframe(&[sample1, sample2]).unwrap();
4064
4065        // Size column should be present (not dropped by all-null rule)
4066        let size_col = df
4067            .column("size")
4068            .expect("size column should be present when width/height are set");
4069        assert_eq!(size_col.len(), 2);
4070
4071        // Each row should be an Array(UInt32, 2) with [width, height]
4072        let arr = size_col.array().expect("size column should be Array dtype");
4073        let row0 = arr.get_as_series(0).unwrap();
4074        let row0_vals: Vec<u32> = row0.u32().unwrap().into_no_null_iter().collect();
4075        assert_eq!(row0_vals, vec![1920, 1080]);
4076
4077        let row1 = arr.get_as_series(1).unwrap();
4078        let row1_vals: Vec<u32> = row1.u32().unwrap().into_no_null_iter().collect();
4079        assert_eq!(row1_vals, vec![640, 480]);
4080    }
4081
4082    #[cfg(feature = "polars")]
4083    #[test]
4084    fn test_samples_dataframe_size_column_partial() {
4085        // When only some samples have dimensions, size column should still be present
4086        let sample1 = Sample {
4087            image_name: Some("img1.jpg".to_string()),
4088            width: Some(1920),
4089            height: Some(1080),
4090            ..Default::default()
4091        };
4092        let sample2 = Sample {
4093            image_name: Some("img2.jpg".to_string()),
4094            // No width/height
4095            ..Default::default()
4096        };
4097
4098        let df = samples_dataframe(&[sample1, sample2]).unwrap();
4099
4100        // Size column should be present (not all null)
4101        let size_col = df
4102            .column("size")
4103            .expect("size column should be present when at least one sample has dimensions");
4104        assert_eq!(size_col.len(), 2);
4105        assert_eq!(size_col.null_count(), 1, "one row should be null");
4106    }
4107
4108    #[cfg(feature = "polars")]
4109    #[test]
4110    fn test_samples_dataframe_score_columns() {
4111        let mut ann = Annotation::new();
4112        ann.set_name(Some("test".to_string()));
4113        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4114        ann.set_box2d_score(Some(0.95));
4115        ann.set_polygon(Some(Polygon::new(vec![vec![
4116            (0.0, 0.0),
4117            (1.0, 0.0),
4118            (1.0, 1.0),
4119        ]])));
4120        ann.set_polygon_score(Some(0.87));
4121
4122        let sample = Sample {
4123            image_name: Some("test.jpg".to_string()),
4124            annotations: vec![ann],
4125            ..Default::default()
4126        };
4127
4128        let df = samples_dataframe(&[sample]).unwrap();
4129
4130        // Score columns with data should be present
4131        assert!(
4132            df.column("box2d_score").is_ok(),
4133            "box2d_score column missing"
4134        );
4135        assert!(
4136            df.column("polygon_score").is_ok(),
4137            "polygon_score column missing"
4138        );
4139
4140        // Score columns with no data should be dropped
4141        assert!(
4142            df.column("box3d_score").is_err(),
4143            "box3d_score should be dropped (all null)"
4144        );
4145        assert!(
4146            df.column("mask_score").is_err(),
4147            "mask_score should be dropped (all null)"
4148        );
4149
4150        // Verify score values
4151        let box2d_scores = df.column("box2d_score").unwrap();
4152        let val = box2d_scores.f32().unwrap().get(0);
4153        assert_eq!(val, Some(0.95));
4154    }
4155
4156    #[cfg(feature = "polars")]
4157    #[test]
4158    fn test_samples_dataframe_timing_column() {
4159        let mut ann = Annotation::new();
4160        ann.set_name(Some("test".to_string()));
4161        ann.set_label(Some("person".to_string()));
4162
4163        let sample = Sample {
4164            image_name: Some("test.jpg".to_string()),
4165            annotations: vec![ann],
4166            timing: Some(Timing {
4167                load: Some(1_000_000),
4168                preprocess: Some(2_000_000),
4169                inference: Some(50_000_000),
4170                decode: Some(3_000_000),
4171            }),
4172            ..Default::default()
4173        };
4174
4175        let df = samples_dataframe(&[sample]).unwrap();
4176
4177        // Timing column should exist (has data)
4178        assert!(df.column("timing").is_ok(), "timing column missing");
4179
4180        // Verify it is a struct type
4181        let timing_col = df.column("timing").unwrap();
4182        assert!(
4183            matches!(timing_col.dtype(), polars::prelude::DataType::Struct(..)),
4184            "timing column should be Struct type, got {:?}",
4185            timing_col.dtype()
4186        );
4187    }
4188
4189    #[cfg(feature = "polars")]
4190    #[test]
4191    fn test_samples_dataframe_mask_binary_column() {
4192        let mut ann = Annotation::new();
4193        ann.set_name(Some("test".to_string()));
4194        // Create a small valid PNG via MaskData::encode
4195        let pixels = vec![0u8, 255, 128, 64];
4196        let mask_data = MaskData::encode(&pixels, 2, 2, 8).unwrap();
4197        ann.set_mask(Some(mask_data));
4198
4199        let sample = Sample {
4200            image_name: Some("test.jpg".to_string()),
4201            annotations: vec![ann],
4202            ..Default::default()
4203        };
4204
4205        let df = samples_dataframe(&[sample]).unwrap();
4206
4207        // mask column should exist with Binary type
4208        let mask_col = df.column("mask").unwrap();
4209        assert_eq!(
4210            mask_col.dtype(),
4211            &polars::prelude::DataType::Binary,
4212            "mask column should be Binary"
4213        );
4214        assert_eq!(mask_col.null_count(), 0, "mask value should not be null");
4215    }
4216
4217    // =========================================================================
4218    // AnnotationType "seg" alias test
4219    // =========================================================================
4220
4221    #[test]
4222    fn test_annotation_type_seg_alias() {
4223        assert_eq!(
4224            AnnotationType::try_from("seg").unwrap(),
4225            AnnotationType::Polygon,
4226            "\"seg\" should map to Polygon for server round-trip"
4227        );
4228    }
4229
4230    // =========================================================================
4231    // Timing edge case tests
4232    // =========================================================================
4233
4234    #[cfg(feature = "polars")]
4235    #[test]
4236    fn test_samples_dataframe_timing_partial() {
4237        // Timing with only load set; other fields None
4238        let mut ann = Annotation::new();
4239        ann.set_name(Some("test".to_string()));
4240        ann.set_label(Some("person".to_string()));
4241
4242        let sample = Sample {
4243            image_name: Some("test.jpg".to_string()),
4244            annotations: vec![ann],
4245            timing: Some(Timing {
4246                load: Some(1000),
4247                ..Default::default()
4248            }),
4249            ..Default::default()
4250        };
4251
4252        let df = samples_dataframe(&[sample]).unwrap();
4253
4254        // Timing column should be present because at least one field is non-null
4255        assert!(
4256            df.column("timing").is_ok(),
4257            "timing column should be present when partial data exists"
4258        );
4259    }
4260
4261    #[cfg(feature = "polars")]
4262    #[test]
4263    fn test_samples_dataframe_timing_all_none_omitted() {
4264        // All samples have timing: None — timing column should be omitted
4265        let mut ann = Annotation::new();
4266        ann.set_name(Some("test".to_string()));
4267        ann.set_label(Some("person".to_string()));
4268
4269        let sample = Sample {
4270            image_name: Some("test.jpg".to_string()),
4271            annotations: vec![ann],
4272            timing: None,
4273            ..Default::default()
4274        };
4275
4276        let df = samples_dataframe(&[sample]).unwrap();
4277
4278        assert!(
4279            df.column("timing").is_err(),
4280            "timing column should be omitted when all samples have timing: None"
4281        );
4282    }
4283
4284    // =========================================================================
4285    // Score boundary tests
4286    // =========================================================================
4287
4288    #[cfg(feature = "polars")]
4289    #[test]
4290    fn test_samples_dataframe_score_zero_survives() {
4291        // score = 0.0 must be non-null in the output (not confused with None)
4292        let mut ann = Annotation::new();
4293        ann.set_name(Some("test".to_string()));
4294        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4295        ann.set_box2d_score(Some(0.0));
4296
4297        let sample = Sample {
4298            image_name: Some("test.jpg".to_string()),
4299            annotations: vec![ann],
4300            ..Default::default()
4301        };
4302
4303        let df = samples_dataframe(&[sample]).unwrap();
4304
4305        let scores = df.column("box2d_score").unwrap();
4306        let val = scores.f32().unwrap().get(0);
4307        assert_eq!(val, Some(0.0), "score of 0.0 should survive as non-null");
4308    }
4309
4310    #[cfg(feature = "polars")]
4311    #[test]
4312    fn test_samples_dataframe_score_one_survives() {
4313        let mut ann = Annotation::new();
4314        ann.set_name(Some("test".to_string()));
4315        ann.set_box2d(Some(Box2d::new(0.1, 0.2, 0.3, 0.4)));
4316        ann.set_box2d_score(Some(1.0));
4317
4318        let sample = Sample {
4319            image_name: Some("test.jpg".to_string()),
4320            annotations: vec![ann],
4321            ..Default::default()
4322        };
4323
4324        let df = samples_dataframe(&[sample]).unwrap();
4325
4326        let scores = df.column("box2d_score").unwrap();
4327        let val = scores.f32().unwrap().get(0);
4328        assert_eq!(val, Some(1.0), "score of 1.0 should survive as non-null");
4329    }
4330}