Skip to main content

nuviz_cli/data/
ply.rs

1use std::collections::HashMap;
2use std::fs::File;
3use std::io::{BufRead, BufReader, Read};
4use std::path::Path;
5
6use anyhow::{bail, Context, Result};
7
8/// Parsed PLY file data.
9#[derive(Debug)]
10pub struct PlyData {
11    pub num_vertices: usize,
12    pub positions: Vec<[f32; 3]>,
13    pub colors: Option<Vec<[u8; 3]>>,
14    pub opacities: Option<Vec<f32>>,
15    pub scales: Option<Vec<[f32; 3]>>,
16    #[allow(dead_code)]
17    pub rotations: Option<Vec<[f32; 4]>>,
18    pub sh_degree: Option<u32>,
19    pub custom_properties: HashMap<String, Vec<f32>>,
20    pub file_size_bytes: u64,
21}
22
23#[derive(Debug, Clone, Copy, PartialEq)]
24enum PlyFormat {
25    Ascii,
26    BinaryLittleEndian,
27    BinaryBigEndian,
28}
29
30#[derive(Debug, Clone)]
31struct PropertyDef {
32    name: String,
33    dtype: PropertyType,
34}
35
36#[derive(Debug, Clone, Copy)]
37enum PropertyType {
38    Float32,
39    Float64,
40    UChar,
41    Int32,
42    UInt32,
43    Int16,
44    UInt16,
45    Int8,
46}
47
48impl PropertyType {
49    fn from_str(s: &str) -> Option<Self> {
50        match s {
51            "float" | "float32" => Some(Self::Float32),
52            "double" | "float64" => Some(Self::Float64),
53            "uchar" | "uint8" => Some(Self::UChar),
54            "int" | "int32" => Some(Self::Int32),
55            "uint" | "uint32" => Some(Self::UInt32),
56            "short" | "int16" => Some(Self::Int16),
57            "ushort" | "uint16" => Some(Self::UInt16),
58            "char" | "int8" => Some(Self::Int8),
59            _ => None,
60        }
61    }
62
63    fn byte_size(self) -> usize {
64        match self {
65            Self::Float32 | Self::Int32 | Self::UInt32 => 4,
66            Self::Float64 => 8,
67            Self::Int16 | Self::UInt16 => 2,
68            Self::UChar | Self::Int8 => 1,
69        }
70    }
71}
72
73/// Parse a PLY file from the given path.
74pub fn parse_ply(path: &Path) -> Result<PlyData> {
75    let file_size_bytes = std::fs::metadata(path)
76        .with_context(|| format!("Cannot read PLY file: {}", path.display()))?
77        .len();
78
79    let file = File::open(path)?;
80    let mut reader = BufReader::new(file);
81
82    // Parse header
83    let (format, num_vertices, properties) = parse_header(&mut reader)?;
84
85    // Parse vertex data
86    let raw = match format {
87        PlyFormat::BinaryLittleEndian => {
88            read_binary_vertices(&mut reader, num_vertices, &properties)?
89        }
90        PlyFormat::Ascii => read_ascii_vertices(&mut reader, num_vertices, &properties)?,
91        PlyFormat::BinaryBigEndian => bail!("Binary big-endian PLY is not supported"),
92    };
93
94    // Extract structured fields
95    let ply = extract_fields(raw, &properties, num_vertices, file_size_bytes)?;
96    Ok(ply)
97}
98
99/// Parse PLY header, returning format, vertex count, and property definitions.
100fn parse_header(reader: &mut BufReader<File>) -> Result<(PlyFormat, usize, Vec<PropertyDef>)> {
101    let mut line = String::new();
102    let mut format = None;
103    let mut num_vertices = 0usize;
104    let mut properties = Vec::new();
105    let mut in_vertex_element = false;
106
107    // Verify magic number
108    reader.read_line(&mut line)?;
109    if line.trim() != "ply" {
110        bail!("Not a PLY file (missing 'ply' magic)");
111    }
112
113    loop {
114        line.clear();
115        let bytes_read = reader.read_line(&mut line)?;
116        if bytes_read == 0 {
117            bail!("Unexpected EOF in PLY header (missing 'end_header')");
118        }
119
120        let trimmed = line.trim();
121
122        if trimmed == "end_header" {
123            break;
124        }
125
126        let parts: Vec<&str> = trimmed.split_whitespace().collect();
127        if parts.is_empty() {
128            continue;
129        }
130
131        match parts[0] {
132            "format" => {
133                if parts.len() < 3 {
134                    bail!("Invalid format line: {trimmed}");
135                }
136                format = Some(match parts[1] {
137                    "ascii" => PlyFormat::Ascii,
138                    "binary_little_endian" => PlyFormat::BinaryLittleEndian,
139                    "binary_big_endian" => PlyFormat::BinaryBigEndian,
140                    other => bail!("Unknown PLY format: {other}"),
141                });
142            }
143            "element" => {
144                if parts.len() >= 3 && parts[1] == "vertex" {
145                    num_vertices = parts[2].parse().context("Invalid vertex count")?;
146                    in_vertex_element = true;
147                } else {
148                    in_vertex_element = false;
149                }
150            }
151            "property" => {
152                if !in_vertex_element {
153                    continue;
154                }
155                // Skip list properties (e.g., face vertex_indices)
156                if parts.len() >= 2 && parts[1] == "list" {
157                    continue;
158                }
159                if parts.len() < 3 {
160                    continue;
161                }
162                let dtype = PropertyType::from_str(parts[1]);
163                if let Some(dt) = dtype {
164                    properties.push(PropertyDef {
165                        name: parts[2].to_string(),
166                        dtype: dt,
167                    });
168                }
169            }
170            _ => {} // Ignore comments, obj_info, etc.
171        }
172    }
173
174    let format = format.ok_or_else(|| anyhow::anyhow!("No format declaration in PLY header"))?;
175
176    if num_vertices == 0 {
177        bail!("No vertex element found in PLY header");
178    }
179
180    Ok((format, num_vertices, properties))
181}
182
183/// Read binary vertex data into a flat Vec of f32 per property per vertex.
184fn read_binary_vertices(
185    reader: &mut BufReader<File>,
186    num_vertices: usize,
187    properties: &[PropertyDef],
188) -> Result<Vec<Vec<f32>>> {
189    let row_size: usize = properties.iter().map(|p| p.dtype.byte_size()).sum();
190    let total_bytes = row_size * num_vertices;
191
192    let mut raw_buf = vec![0u8; total_bytes];
193    reader.read_exact(&mut raw_buf)?;
194
195    let mut columns: Vec<Vec<f32>> = properties
196        .iter()
197        .map(|_| Vec::with_capacity(num_vertices))
198        .collect();
199
200    let mut offset = 0;
201    for _ in 0..num_vertices {
202        for (col_idx, prop) in properties.iter().enumerate() {
203            let value = read_value_le(&raw_buf[offset..], prop.dtype);
204            columns[col_idx].push(value);
205            offset += prop.dtype.byte_size();
206        }
207    }
208
209    Ok(columns)
210}
211
212/// Read a single value from a byte slice in little-endian format, returning as f32.
213fn read_value_le(buf: &[u8], dtype: PropertyType) -> f32 {
214    match dtype {
215        PropertyType::Float32 => f32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]),
216        PropertyType::Float64 => f64::from_le_bytes([
217            buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
218        ]) as f32,
219        PropertyType::UChar => buf[0] as f32,
220        PropertyType::Int8 => buf[0] as i8 as f32,
221        PropertyType::Int16 => i16::from_le_bytes([buf[0], buf[1]]) as f32,
222        PropertyType::UInt16 => u16::from_le_bytes([buf[0], buf[1]]) as f32,
223        PropertyType::Int32 => i32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]) as f32,
224        PropertyType::UInt32 => u32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]) as f32,
225    }
226}
227
228/// Read ASCII vertex data.
229fn read_ascii_vertices(
230    reader: &mut BufReader<File>,
231    num_vertices: usize,
232    properties: &[PropertyDef],
233) -> Result<Vec<Vec<f32>>> {
234    let mut columns: Vec<Vec<f32>> = properties
235        .iter()
236        .map(|_| Vec::with_capacity(num_vertices))
237        .collect();
238    let mut line = String::new();
239
240    for row_idx in 0..num_vertices {
241        line.clear();
242        reader.read_line(&mut line)?;
243        let values: Vec<&str> = line.split_whitespace().collect();
244
245        if values.len() < properties.len() {
246            bail!(
247                "ASCII PLY row {} has {} values, expected {}",
248                row_idx,
249                values.len(),
250                properties.len()
251            );
252        }
253
254        for (col_idx, prop) in properties.iter().enumerate() {
255            let v: f32 = match prop.dtype {
256                PropertyType::UChar | PropertyType::Int8 => {
257                    values[col_idx].parse::<i32>().unwrap_or(0) as f32
258                }
259                _ => values[col_idx].parse().unwrap_or(0.0),
260            };
261            columns[col_idx].push(v);
262        }
263    }
264
265    Ok(columns)
266}
267
268/// Extract structured fields from raw column data.
269fn extract_fields(
270    columns: Vec<Vec<f32>>,
271    properties: &[PropertyDef],
272    num_vertices: usize,
273    file_size_bytes: u64,
274) -> Result<PlyData> {
275    let prop_index: HashMap<&str, usize> = properties
276        .iter()
277        .enumerate()
278        .map(|(i, p)| (p.name.as_str(), i))
279        .collect();
280
281    // Positions (required)
282    let x_idx = prop_index
283        .get("x")
284        .ok_or_else(|| anyhow::anyhow!("Missing 'x' property"))?;
285    let y_idx = prop_index
286        .get("y")
287        .ok_or_else(|| anyhow::anyhow!("Missing 'y' property"))?;
288    let z_idx = prop_index
289        .get("z")
290        .ok_or_else(|| anyhow::anyhow!("Missing 'z' property"))?;
291
292    let positions: Vec<[f32; 3]> = columns[*x_idx]
293        .iter()
294        .zip(columns[*y_idx].iter())
295        .zip(columns[*z_idx].iter())
296        .map(|((&x, &y), &z)| [x, y, z])
297        .collect();
298
299    // Colors (optional)
300    let colors = if let (Some(&ri), Some(&gi), Some(&bi)) = (
301        prop_index.get("red"),
302        prop_index.get("green"),
303        prop_index.get("blue"),
304    ) {
305        let v: Vec<[u8; 3]> = columns[ri]
306            .iter()
307            .zip(columns[gi].iter())
308            .zip(columns[bi].iter())
309            .map(|((&r, &g), &b)| [r as u8, g as u8, b as u8])
310            .collect();
311        Some(v)
312    } else {
313        None
314    };
315
316    // Opacities
317    let opacities = prop_index.get("opacity").map(|&idx| columns[idx].clone());
318
319    // Scales (3DGS)
320    let scales = if let (Some(&s0), Some(&s1), Some(&s2)) = (
321        prop_index.get("scale_0"),
322        prop_index.get("scale_1"),
323        prop_index.get("scale_2"),
324    ) {
325        let v: Vec<[f32; 3]> = columns[s0]
326            .iter()
327            .zip(columns[s1].iter())
328            .zip(columns[s2].iter())
329            .map(|((&a, &b), &c)| [a, b, c])
330            .collect();
331        Some(v)
332    } else {
333        None
334    };
335
336    // Rotations (3DGS quaternion)
337    let rotations = if let (Some(&r0), Some(&r1), Some(&r2), Some(&r3)) = (
338        prop_index.get("rot_0"),
339        prop_index.get("rot_1"),
340        prop_index.get("rot_2"),
341        prop_index.get("rot_3"),
342    ) {
343        let v: Vec<[f32; 4]> = columns[r0]
344            .iter()
345            .zip(columns[r1].iter())
346            .zip(columns[r2].iter())
347            .zip(columns[r3].iter())
348            .map(|(((&a, &b), &c), &d)| [a, b, c, d])
349            .collect();
350        Some(v)
351    } else {
352        None
353    };
354
355    // SH degree: inferred from f_rest_* count
356    // degree 0: 0 rest, degree 1: 9, degree 2: 24, degree 3: 45
357    let sh_rest_count = prop_index
358        .keys()
359        .filter(|k| k.starts_with("f_rest_"))
360        .count();
361    let sh_degree = match sh_rest_count {
362        0 => {
363            if prop_index.contains_key("f_dc_0") {
364                Some(0)
365            } else {
366                None
367            }
368        }
369        n if n <= 9 => Some(1),
370        n if n <= 24 => Some(2),
371        _ => Some(3),
372    };
373
374    // Collect remaining properties as custom
375    let known_props = [
376        "x", "y", "z", "red", "green", "blue", "opacity", "scale_0", "scale_1", "scale_2", "rot_0",
377        "rot_1", "rot_2", "rot_3", "nx", "ny", "nz", "alpha",
378    ];
379    let mut custom_properties = HashMap::new();
380    for (i, prop) in properties.iter().enumerate() {
381        if !known_props.contains(&prop.name.as_str())
382            && !prop.name.starts_with("f_dc_")
383            && !prop.name.starts_with("f_rest_")
384        {
385            custom_properties.insert(prop.name.clone(), columns[i].clone());
386        }
387    }
388
389    Ok(PlyData {
390        num_vertices,
391        positions,
392        colors,
393        opacities,
394        scales,
395        rotations,
396        sh_degree,
397        custom_properties,
398        file_size_bytes,
399    })
400}
401
402/// Compute statistics for a PLY file without loading all data into memory.
403/// For now delegates to parse_ply; streaming version can be added later.
404pub fn compute_ply_stats(path: &Path) -> Result<PlyStats> {
405    let ply = parse_ply(path)?;
406
407    let mut min_pos = [f32::MAX; 3];
408    let mut max_pos = [f32::MIN; 3];
409
410    for pos in &ply.positions {
411        for i in 0..3 {
412            min_pos[i] = min_pos[i].min(pos[i]);
413            max_pos[i] = max_pos[i].max(pos[i]);
414        }
415    }
416
417    let opacity_stats = ply.opacities.as_ref().map(|ops| {
418        let (sum, sum_sq, min, max) = ops.iter().fold(
419            (0.0f64, 0.0f64, f32::MAX, f32::MIN),
420            |(s, sq, mn, mx), &v| (s + v as f64, sq + v as f64 * v as f64, mn.min(v), mx.max(v)),
421        );
422        let n = ops.len() as f64;
423        let mean = sum / n;
424        let std = ((sum_sq / n) - mean * mean).max(0.0).sqrt();
425        let near_transparent = ops.iter().filter(|&&v| v < 0.01).count();
426        AttributeStats {
427            mean: mean as f32,
428            std: std as f32,
429            min,
430            max,
431            special_count: near_transparent,
432        }
433    });
434
435    let scale_stats = ply.scales.as_ref().map(|scales| {
436        let magnitudes: Vec<f32> = scales
437            .iter()
438            .map(|s| (s[0] * s[0] + s[1] * s[1] + s[2] * s[2]).sqrt())
439            .collect();
440        let (sum, sum_sq, min, max) = magnitudes.iter().fold(
441            (0.0f64, 0.0f64, f32::MAX, f32::MIN),
442            |(s, sq, mn, mx), &v| (s + v as f64, sq + v as f64 * v as f64, mn.min(v), mx.max(v)),
443        );
444        let n = magnitudes.len() as f64;
445        let mean = sum / n;
446        let std = ((sum_sq / n) - mean * mean).max(0.0).sqrt();
447        let outliers = magnitudes
448            .iter()
449            .filter(|&&v| (v as f64 - mean).abs() > 3.0 * std)
450            .count();
451        AttributeStats {
452            mean: mean as f32,
453            std: std as f32,
454            min,
455            max,
456            special_count: outliers,
457        }
458    });
459
460    Ok(PlyStats {
461        num_vertices: ply.num_vertices,
462        bounding_box: (min_pos, max_pos),
463        sh_degree: ply.sh_degree,
464        has_colors: ply.colors.is_some(),
465        opacity_stats,
466        scale_stats,
467        file_size_bytes: ply.file_size_bytes,
468        custom_property_count: ply.custom_properties.len(),
469        opacities: ply.opacities,
470        scales: ply.scales,
471    })
472}
473
474/// Summary statistics for a PLY file.
475#[derive(Debug)]
476pub struct PlyStats {
477    pub num_vertices: usize,
478    pub bounding_box: ([f32; 3], [f32; 3]),
479    pub sh_degree: Option<u32>,
480    pub has_colors: bool,
481    pub opacity_stats: Option<AttributeStats>,
482    pub scale_stats: Option<AttributeStats>,
483    pub file_size_bytes: u64,
484    pub custom_property_count: usize,
485    /// Raw opacity values (for histogram rendering)
486    pub opacities: Option<Vec<f32>>,
487    /// Raw scale values (for histogram rendering)
488    pub scales: Option<Vec<[f32; 3]>>,
489}
490
491#[derive(Debug)]
492pub struct AttributeStats {
493    pub mean: f32,
494    pub std: f32,
495    pub min: f32,
496    pub max: f32,
497    /// Context-dependent: near-transparent count for opacity, outlier count for scale
498    pub special_count: usize,
499}
500
501#[cfg(test)]
502mod tests {
503    use super::*;
504    use std::io::Write;
505
506    fn write_ascii_ply(dir: &Path, vertices: &[[f32; 3]]) -> std::path::PathBuf {
507        let path = dir.join("test.ply");
508        let mut f = File::create(&path).unwrap();
509
510        writeln!(f, "ply").unwrap();
511        writeln!(f, "format ascii 1.0").unwrap();
512        writeln!(f, "element vertex {}", vertices.len()).unwrap();
513        writeln!(f, "property float x").unwrap();
514        writeln!(f, "property float y").unwrap();
515        writeln!(f, "property float z").unwrap();
516        writeln!(f, "end_header").unwrap();
517
518        for v in vertices {
519            writeln!(f, "{} {} {}", v[0], v[1], v[2]).unwrap();
520        }
521
522        path
523    }
524
525    fn write_binary_ply(dir: &Path) -> std::path::PathBuf {
526        let path = dir.join("test_bin.ply");
527        let mut f = File::create(&path).unwrap();
528
529        let header = "ply\nformat binary_little_endian 1.0\nelement vertex 3\nproperty float x\nproperty float y\nproperty float z\nproperty uchar red\nproperty uchar green\nproperty uchar blue\nproperty float opacity\nend_header\n";
530        f.write_all(header.as_bytes()).unwrap();
531
532        // 3 vertices: (x,y,z,r,g,b,opacity)
533        for i in 0..3u32 {
534            let x = i as f32;
535            let y = (i as f32) * 2.0;
536            let z = (i as f32) * 3.0;
537            f.write_all(&x.to_le_bytes()).unwrap();
538            f.write_all(&y.to_le_bytes()).unwrap();
539            f.write_all(&z.to_le_bytes()).unwrap();
540            f.write_all(&[255u8, 128, 64]).unwrap(); // RGB
541            let opacity = 0.5f32 + i as f32 * 0.2;
542            f.write_all(&opacity.to_le_bytes()).unwrap();
543        }
544
545        path
546    }
547
548    #[test]
549    fn test_parse_ascii_ply() {
550        let dir = tempfile::tempdir().unwrap();
551        let path = write_ascii_ply(
552            dir.path(),
553            &[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
554        );
555
556        let ply = parse_ply(&path).unwrap();
557        assert_eq!(ply.num_vertices, 3);
558        assert_eq!(ply.positions.len(), 3);
559        assert!((ply.positions[0][0] - 1.0).abs() < f32::EPSILON);
560        assert!((ply.positions[2][2] - 9.0).abs() < f32::EPSILON);
561        assert!(ply.colors.is_none());
562        assert!(ply.opacities.is_none());
563    }
564
565    #[test]
566    fn test_parse_binary_ply() {
567        let dir = tempfile::tempdir().unwrap();
568        let path = write_binary_ply(dir.path());
569
570        let ply = parse_ply(&path).unwrap();
571        assert_eq!(ply.num_vertices, 3);
572        assert!(ply.colors.is_some());
573        assert_eq!(ply.colors.as_ref().unwrap()[0], [255, 128, 64]);
574        assert!(ply.opacities.is_some());
575        assert!((ply.opacities.as_ref().unwrap()[0] - 0.5).abs() < 0.01);
576    }
577
578    #[test]
579    fn test_parse_ply_missing_file() {
580        let result = parse_ply(Path::new("/nonexistent/test.ply"));
581        assert!(result.is_err());
582    }
583
584    #[test]
585    fn test_parse_ply_not_ply() {
586        let dir = tempfile::tempdir().unwrap();
587        let path = dir.path().join("bad.ply");
588        std::fs::write(&path, "not a ply file").unwrap();
589        let result = parse_ply(&path);
590        assert!(result.is_err());
591    }
592
593    #[test]
594    fn test_parse_ply_no_end_header() {
595        let dir = tempfile::tempdir().unwrap();
596        let path = dir.path().join("noend.ply");
597        std::fs::write(
598            &path,
599            "ply\nformat ascii 1.0\nelement vertex 1\nproperty float x\n",
600        )
601        .unwrap();
602        let result = parse_ply(&path);
603        assert!(result.is_err());
604    }
605
606    #[test]
607    fn test_sh_degree_detection() {
608        let dir = tempfile::tempdir().unwrap();
609        let path = dir.path().join("sh.ply");
610        let mut f = File::create(&path).unwrap();
611
612        writeln!(f, "ply").unwrap();
613        writeln!(f, "format ascii 1.0").unwrap();
614        writeln!(f, "element vertex 1").unwrap();
615        writeln!(f, "property float x").unwrap();
616        writeln!(f, "property float y").unwrap();
617        writeln!(f, "property float z").unwrap();
618        writeln!(f, "property float f_dc_0").unwrap();
619        writeln!(f, "property float f_dc_1").unwrap();
620        writeln!(f, "property float f_dc_2").unwrap();
621        for i in 0..9 {
622            writeln!(f, "property float f_rest_{i}").unwrap();
623        }
624        writeln!(f, "end_header").unwrap();
625        // 1 vertex: x y z f_dc_0..2 f_rest_0..8
626        writeln!(f, "0 0 0 0.1 0.2 0.3 0 0 0 0 0 0 0 0 0").unwrap();
627
628        let ply = parse_ply(&path).unwrap();
629        assert_eq!(ply.sh_degree, Some(1));
630    }
631
632    #[test]
633    fn test_compute_ply_stats() {
634        let dir = tempfile::tempdir().unwrap();
635        let path = write_binary_ply(dir.path());
636
637        let stats = compute_ply_stats(&path).unwrap();
638        assert_eq!(stats.num_vertices, 3);
639        assert!(stats.has_colors);
640        assert!(stats.opacity_stats.is_some());
641    }
642}