Skip to main content

copc_reader/
lib.rs

1//! Pure-Rust COPC reader.
2//!
3//! Parses LAS/COPC metadata and exposes chunked-LAZ point iteration over COPC
4//! hierarchy entries.
5
6#![forbid(unsafe_code)]
7
8mod points;
9
10use std::collections::{BTreeMap, HashSet};
11use std::fs::File;
12use std::io::{Read, Seek, SeekFrom};
13use std::path::Path;
14
15use byteorder::{LittleEndian, ReadBytesExt};
16use copc_core::{
17    CopcInfo, Entry, EntryAvailability, Error, HierarchyPage, Result, VoxelKey,
18    HIERARCHY_ENTRY_BYTES,
19};
20use las::{Transform, Vector};
21use laz::LazVlr;
22
23pub use copc_core::{
24    ColumnData, ColumnSelection, ColumnSpec, ColumnView, LasColumnBatch, LasDimension, ScalarType,
25};
26pub use points::{BoundsSelection, CopcReader, LodSelection, PointIter, PointQuery};
27
28const LAS_HEADER_SIZE_14: u16 = 375;
29const VLR_HEADER_BYTES: u64 = 54;
30const EVLR_HEADER_BYTES: u64 = 60;
31const MAX_VLR_COUNT: u32 = 4_096;
32const MAX_EVLR_COUNT: u32 = 4_096;
33const MAX_HIERARCHY_PAGE_BYTES: u64 = 64 * 1024 * 1024;
34const MAX_HIERARCHY_TOTAL_BYTES: u64 = 256 * 1024 * 1024;
35
36/// A parsed COPC file.
37#[derive(Debug, Clone)]
38pub struct CopcFile {
39    header: LasHeader,
40    copc_info: CopcInfo,
41    laszip_vlr: LazVlr,
42    root_hierarchy: HierarchyPage,
43    hierarchy: BTreeMap<VoxelKey, Entry>,
44}
45
46/// Minimal LAS header fields needed by COPC callers.
47#[derive(Debug, Clone, Copy, PartialEq)]
48pub struct LasHeader {
49    pub point_data_record_format: u8,
50    pub point_data_record_length: u16,
51    pub offset_to_point_data: u32,
52    pub number_of_vlrs: u32,
53    pub x_scale_factor: f64,
54    pub y_scale_factor: f64,
55    pub z_scale_factor: f64,
56    pub x_offset: f64,
57    pub y_offset: f64,
58    pub z_offset: f64,
59    pub min_x: f64,
60    pub max_x: f64,
61    pub min_y: f64,
62    pub max_y: f64,
63    pub min_z: f64,
64    pub max_z: f64,
65    pub offset_to_first_evlr: u64,
66    pub number_of_evlrs: u32,
67    pub number_of_points: u64,
68}
69
70#[derive(Debug, Clone)]
71struct Vlr {
72    user_id: String,
73    record_id: u16,
74    data: Vec<u8>,
75}
76
77#[derive(Debug, Clone, Copy)]
78struct EvlrRef {
79    user_id: [u8; 16],
80    record_id: u16,
81    data_offset: u64,
82}
83
84impl CopcFile {
85    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
86        let mut file = File::open(path.as_ref()).map_err(|e| Error::io("open COPC file", e))?;
87        Self::from_reader(&mut file)
88    }
89
90    pub fn from_reader<R: Read + Seek>(reader: &mut R) -> Result<Self> {
91        let file_len = reader_len(reader)?;
92        let header = read_las_header(reader, file_len)?;
93        let vlrs = read_vlrs(
94            reader,
95            header.number_of_vlrs,
96            file_len,
97            u64::from(header.offset_to_point_data),
98        )?;
99        let copc_info_vlr = vlrs
100            .iter()
101            .find(|vlr| vlr.user_id == "copc" && vlr.record_id == 1)
102            .ok_or_else(|| Error::InvalidData("missing COPC info VLR".into()))?;
103        let copc_info = CopcInfo::from_le_bytes(&copc_info_vlr.data)?;
104        let laszip_vlr = vlrs
105            .iter()
106            .find(|vlr| vlr.user_id == "laszip encoded" && vlr.record_id == 22204)
107            .map(|vlr| {
108                LazVlr::read_from(vlr.data.as_slice()).map_err(|e| Error::Las(e.to_string()))
109            })
110            .transpose()?
111            .ok_or_else(|| Error::InvalidData("missing LASzip VLR".into()))?;
112        let evlrs = read_evlr_refs(reader, &header, file_len)?;
113        let root_evlr = evlrs
114            .iter()
115            .find(|evlr| trim_nul(&evlr.user_id) == "copc" && evlr.record_id == 1000)
116            .copied()
117            .ok_or_else(|| Error::InvalidData("missing COPC hierarchy EVLR".into()))?;
118        if copc_info.root_hier_offset != root_evlr.data_offset {
119            return Err(Error::InvalidData(format!(
120                "COPC root hierarchy offset {} does not match EVLR data offset {}",
121                copc_info.root_hier_offset, root_evlr.data_offset
122            )));
123        }
124        let mut hierarchy_limits = HierarchyReadLimits::default();
125        let root_hierarchy = read_hierarchy_page_at(
126            reader,
127            copc_info.root_hier_offset,
128            copc_info.root_hier_size,
129            file_len,
130            &mut hierarchy_limits,
131        )?;
132        let mut hierarchy = BTreeMap::new();
133        let mut visited_pages = HashSet::new();
134        visited_pages.insert((copc_info.root_hier_offset, copc_info.root_hier_size));
135        insert_hierarchy_page(
136            reader,
137            &root_hierarchy,
138            &mut hierarchy,
139            &mut visited_pages,
140            file_len,
141            &mut hierarchy_limits,
142        )?;
143        Ok(Self {
144            header,
145            copc_info,
146            laszip_vlr,
147            root_hierarchy,
148            hierarchy,
149        })
150    }
151
152    pub fn header(&self) -> &LasHeader {
153        &self.header
154    }
155
156    pub fn copc_info(&self) -> &CopcInfo {
157        &self.copc_info
158    }
159
160    pub fn root_hierarchy(&self) -> &HierarchyPage {
161        &self.root_hierarchy
162    }
163
164    /// Return all parsed hierarchy entries, including recursively loaded child pages.
165    pub fn hierarchy_walk(&self) -> Vec<Entry> {
166        self.hierarchy.values().copied().collect()
167    }
168
169    /// Return the full hierarchy index keyed by COPC voxel key.
170    pub fn hierarchy(&self) -> &BTreeMap<VoxelKey, Entry> {
171        &self.hierarchy
172    }
173
174    pub fn hierarchy_entries(&self) -> impl Iterator<Item = &Entry> {
175        self.hierarchy.values()
176    }
177
178    pub(crate) fn laszip_vlr(&self) -> &LazVlr {
179        &self.laszip_vlr
180    }
181
182    pub(crate) fn point_format(&self) -> Result<las::point::Format> {
183        let format_id = self.header.point_data_record_format & 0x7F;
184        let mut format =
185            las::point::Format::new(format_id).map_err(|e| Error::Las(e.to_string()))?;
186        let base_len = format.len();
187        if self.header.point_data_record_length < base_len {
188            return Err(Error::InvalidData(format!(
189                "point record length {} is smaller than point format {} base length {}",
190                self.header.point_data_record_length, format_id, base_len
191            )));
192        }
193        format.extra_bytes = self.header.point_data_record_length - base_len;
194        Ok(format)
195    }
196
197    pub(crate) fn transforms(&self) -> Vector<Transform> {
198        Vector {
199            x: Transform {
200                scale: self.header.x_scale_factor,
201                offset: self.header.x_offset,
202            },
203            y: Transform {
204                scale: self.header.y_scale_factor,
205                offset: self.header.y_offset,
206            },
207            z: Transform {
208                scale: self.header.z_scale_factor,
209                offset: self.header.z_offset,
210            },
211        }
212    }
213}
214
215impl LasHeader {
216    pub fn number_of_points(&self) -> u64 {
217        self.number_of_points
218    }
219}
220
221#[derive(Debug, Default)]
222struct HierarchyReadLimits {
223    total_bytes: u64,
224}
225
226impl HierarchyReadLimits {
227    fn add_page(&mut self, byte_size: u64) -> Result<()> {
228        if byte_size > MAX_HIERARCHY_PAGE_BYTES {
229            return Err(Error::InvalidData(format!(
230                "hierarchy page is {byte_size} bytes, max supported is {MAX_HIERARCHY_PAGE_BYTES}"
231            )));
232        }
233        self.total_bytes = self
234            .total_bytes
235            .checked_add(byte_size)
236            .ok_or_else(|| Error::InvalidData("hierarchy byte total overflow".into()))?;
237        if self.total_bytes > MAX_HIERARCHY_TOTAL_BYTES {
238            return Err(Error::InvalidData(format!(
239                "hierarchy pages total {} bytes, max supported is {}",
240                self.total_bytes, MAX_HIERARCHY_TOTAL_BYTES
241            )));
242        }
243        Ok(())
244    }
245}
246
247fn reader_len<R: Seek>(reader: &mut R) -> Result<u64> {
248    let current = reader
249        .stream_position()
250        .map_err(|e| Error::io("record reader position", e))?;
251    let len = reader
252        .seek(SeekFrom::End(0))
253        .map_err(|e| Error::io("seek end of COPC file", e))?;
254    reader
255        .seek(SeekFrom::Start(current))
256        .map_err(|e| Error::io("restore reader position", e))?;
257    Ok(len)
258}
259
260fn checked_range_end(offset: u64, byte_size: u64, label: &str) -> Result<u64> {
261    offset
262        .checked_add(byte_size)
263        .ok_or_else(|| Error::InvalidData(format!("{label} offset/size overflow")))
264}
265
266fn validate_range_in_file(offset: u64, byte_size: u64, file_len: u64, label: &str) -> Result<u64> {
267    let end = checked_range_end(offset, byte_size, label)?;
268    if end > file_len {
269        return Err(Error::InvalidData(format!(
270            "{label} range {offset}..{end} exceeds file length {file_len}"
271        )));
272    }
273    Ok(end)
274}
275
276fn read_hierarchy_page_at<R: Read + Seek>(
277    reader: &mut R,
278    offset: u64,
279    byte_size: u64,
280    file_len: u64,
281    limits: &mut HierarchyReadLimits,
282) -> Result<HierarchyPage> {
283    if byte_size == 0 {
284        return Err(Error::InvalidData("hierarchy page is empty".into()));
285    }
286    if byte_size % HIERARCHY_ENTRY_BYTES as u64 != 0 {
287        return Err(Error::InvalidData(format!(
288            "hierarchy page is {byte_size} bytes, not a multiple of {HIERARCHY_ENTRY_BYTES}"
289        )));
290    }
291    limits.add_page(byte_size)?;
292    validate_range_in_file(offset, byte_size, file_len, "hierarchy page")?;
293    let hierarchy_len = usize::try_from(byte_size)
294        .map_err(|_| Error::InvalidData("hierarchy page is too large".into()))?;
295    let mut hierarchy_bytes = vec![0u8; hierarchy_len];
296    reader
297        .seek(SeekFrom::Start(offset))
298        .map_err(|e| Error::io("seek hierarchy page", e))?;
299    reader
300        .read_exact(&mut hierarchy_bytes)
301        .map_err(|e| Error::io("read hierarchy page", e))?;
302    HierarchyPage::from_le_bytes(&hierarchy_bytes)
303}
304
305fn insert_hierarchy_page<R: Read + Seek>(
306    reader: &mut R,
307    page: &HierarchyPage,
308    hierarchy: &mut BTreeMap<VoxelKey, Entry>,
309    visited_pages: &mut HashSet<(u64, u64)>,
310    file_len: u64,
311    limits: &mut HierarchyReadLimits,
312) -> Result<()> {
313    for entry in page.entries().iter().copied() {
314        validate_hierarchy_entry(entry, file_len)?;
315        hierarchy.insert(entry.key, entry);
316    }
317    for entry in page.entries().iter().copied().filter(|e| e.is_child_page()) {
318        let byte_size = u64::try_from(entry.byte_size).expect("validated child page byte size");
319        if visited_pages.insert((entry.offset, byte_size)) {
320            let child_page =
321                read_hierarchy_page_at(reader, entry.offset, byte_size, file_len, limits)?;
322            insert_hierarchy_page(
323                reader,
324                &child_page,
325                hierarchy,
326                visited_pages,
327                file_len,
328                limits,
329            )?;
330        }
331    }
332    Ok(())
333}
334
335fn validate_hierarchy_entry(entry: Entry, file_len: u64) -> Result<()> {
336    match entry.availability()? {
337        EntryAvailability::Empty => Ok(()),
338        EntryAvailability::PointData { .. } => {
339            if entry.byte_size <= 0 {
340                return Err(Error::InvalidData(format!(
341                    "point data entry {:?} has invalid byte size {}",
342                    entry.key, entry.byte_size
343                )));
344            }
345            let byte_size = u64::try_from(entry.byte_size).map_err(|_| {
346                Error::InvalidData(format!(
347                    "point data entry {:?} has negative byte size {}",
348                    entry.key, entry.byte_size
349                ))
350            })?;
351            validate_range_in_file(entry.offset, byte_size, file_len, "point data entry")?;
352            Ok(())
353        }
354        EntryAvailability::ChildPage => {
355            if entry.byte_size <= 0 {
356                return Err(Error::InvalidData(format!(
357                    "child hierarchy page {:?} has invalid byte size {}",
358                    entry.key, entry.byte_size
359                )));
360            }
361            let byte_size = u64::try_from(entry.byte_size).map_err(|_| {
362                Error::InvalidData(format!(
363                    "child hierarchy page {:?} has negative byte size {}",
364                    entry.key, entry.byte_size
365                ))
366            })?;
367            validate_range_in_file(entry.offset, byte_size, file_len, "child hierarchy page")?;
368            Ok(())
369        }
370    }
371}
372
373fn read_las_header<R: Read + Seek>(reader: &mut R, file_len: u64) -> Result<LasHeader> {
374    if file_len < u64::from(LAS_HEADER_SIZE_14) {
375        return Err(Error::InvalidData(format!(
376            "file is {file_len} bytes; COPC requires at least {LAS_HEADER_SIZE_14}"
377        )));
378    }
379    reader
380        .seek(SeekFrom::Start(0))
381        .map_err(|e| Error::io("seek LAS header", e))?;
382    let mut signature = [0u8; 4];
383    reader
384        .read_exact(&mut signature)
385        .map_err(|e| Error::io("read LAS signature", e))?;
386    if &signature != b"LASF" {
387        return Err(Error::InvalidData("missing LASF signature".into()));
388    }
389    reader
390        .seek(SeekFrom::Start(94))
391        .map_err(|e| Error::io("seek LAS header size", e))?;
392    let header_size = reader
393        .read_u16::<LittleEndian>()
394        .map_err(|e| Error::io("read LAS header size", e))?;
395    if header_size < LAS_HEADER_SIZE_14 {
396        return Err(Error::Unsupported(format!(
397            "LAS header is {header_size} bytes; COPC requires LAS 1.4"
398        )));
399    }
400    if u64::from(header_size) > file_len {
401        return Err(Error::InvalidData(format!(
402            "LAS header size {header_size} exceeds file length {file_len}"
403        )));
404    }
405    let offset_to_point_data = reader
406        .read_u32::<LittleEndian>()
407        .map_err(|e| Error::io("read point data offset", e))?;
408    if u64::from(offset_to_point_data) < u64::from(header_size) {
409        return Err(Error::InvalidData(format!(
410            "point data offset {offset_to_point_data} is before LAS header size {header_size}"
411        )));
412    }
413    if u64::from(offset_to_point_data) > file_len {
414        return Err(Error::InvalidData(format!(
415            "point data offset {offset_to_point_data} exceeds file length {file_len}"
416        )));
417    }
418    let number_of_vlrs = reader
419        .read_u32::<LittleEndian>()
420        .map_err(|e| Error::io("read VLR count", e))?;
421    if number_of_vlrs > MAX_VLR_COUNT {
422        return Err(Error::InvalidData(format!(
423            "VLR count {number_of_vlrs} exceeds max supported {MAX_VLR_COUNT}"
424        )));
425    }
426    let point_data_record_format = reader
427        .read_u8()
428        .map_err(|e| Error::io("read point record format", e))?;
429    let point_data_record_length = reader
430        .read_u16::<LittleEndian>()
431        .map_err(|e| Error::io("read point record length", e))?;
432    reader
433        .seek(SeekFrom::Start(131))
434        .map_err(|e| Error::io("seek LAS transforms", e))?;
435    let x_scale_factor = reader
436        .read_f64::<LittleEndian>()
437        .map_err(|e| Error::io("read x scale factor", e))?;
438    let y_scale_factor = reader
439        .read_f64::<LittleEndian>()
440        .map_err(|e| Error::io("read y scale factor", e))?;
441    let z_scale_factor = reader
442        .read_f64::<LittleEndian>()
443        .map_err(|e| Error::io("read z scale factor", e))?;
444    let x_offset = reader
445        .read_f64::<LittleEndian>()
446        .map_err(|e| Error::io("read x offset", e))?;
447    let y_offset = reader
448        .read_f64::<LittleEndian>()
449        .map_err(|e| Error::io("read y offset", e))?;
450    let z_offset = reader
451        .read_f64::<LittleEndian>()
452        .map_err(|e| Error::io("read z offset", e))?;
453    let max_x = reader
454        .read_f64::<LittleEndian>()
455        .map_err(|e| Error::io("read max x", e))?;
456    let min_x = reader
457        .read_f64::<LittleEndian>()
458        .map_err(|e| Error::io("read min x", e))?;
459    let max_y = reader
460        .read_f64::<LittleEndian>()
461        .map_err(|e| Error::io("read max y", e))?;
462    let min_y = reader
463        .read_f64::<LittleEndian>()
464        .map_err(|e| Error::io("read min y", e))?;
465    let max_z = reader
466        .read_f64::<LittleEndian>()
467        .map_err(|e| Error::io("read max z", e))?;
468    let min_z = reader
469        .read_f64::<LittleEndian>()
470        .map_err(|e| Error::io("read min z", e))?;
471    reader
472        .seek(SeekFrom::Start(235))
473        .map_err(|e| Error::io("seek LAS 1.4 fields", e))?;
474    let offset_to_first_evlr = reader
475        .read_u64::<LittleEndian>()
476        .map_err(|e| Error::io("read first EVLR offset", e))?;
477    let number_of_evlrs = reader
478        .read_u32::<LittleEndian>()
479        .map_err(|e| Error::io("read EVLR count", e))?;
480    if number_of_evlrs > MAX_EVLR_COUNT {
481        return Err(Error::InvalidData(format!(
482            "EVLR count {number_of_evlrs} exceeds max supported {MAX_EVLR_COUNT}"
483        )));
484    }
485    if offset_to_first_evlr != 0 && offset_to_first_evlr > file_len {
486        return Err(Error::InvalidData(format!(
487            "first EVLR offset {offset_to_first_evlr} exceeds file length {file_len}"
488        )));
489    }
490    let number_of_points = reader
491        .read_u64::<LittleEndian>()
492        .map_err(|e| Error::io("read point count", e))?;
493    reader
494        .seek(SeekFrom::Start(u64::from(header_size)))
495        .map_err(|e| Error::io("seek after LAS header", e))?;
496    Ok(LasHeader {
497        point_data_record_format,
498        point_data_record_length,
499        offset_to_point_data,
500        number_of_vlrs,
501        x_scale_factor,
502        y_scale_factor,
503        z_scale_factor,
504        x_offset,
505        y_offset,
506        z_offset,
507        min_x,
508        max_x,
509        min_y,
510        max_y,
511        min_z,
512        max_z,
513        offset_to_first_evlr,
514        number_of_evlrs,
515        number_of_points,
516    })
517}
518
519fn read_vlrs<R: Read + Seek>(
520    reader: &mut R,
521    count: u32,
522    file_len: u64,
523    section_end: u64,
524) -> Result<Vec<Vlr>> {
525    if count > MAX_VLR_COUNT {
526        return Err(Error::InvalidData(format!(
527            "VLR count {count} exceeds max supported {MAX_VLR_COUNT}"
528        )));
529    }
530    if section_end > file_len {
531        return Err(Error::InvalidData(format!(
532            "VLR section end {section_end} exceeds file length {file_len}"
533        )));
534    }
535    let mut vlrs = Vec::new();
536    for index in 0..count {
537        let header_offset = reader
538            .stream_position()
539            .map_err(|e| Error::io("record VLR offset", e))?;
540        validate_range_in_file(header_offset, VLR_HEADER_BYTES, section_end, "VLR header")?;
541        let _reserved = reader
542            .read_u16::<LittleEndian>()
543            .map_err(|e| Error::io("read VLR reserved", e))?;
544        let mut user_id = [0u8; 16];
545        reader
546            .read_exact(&mut user_id)
547            .map_err(|e| Error::io("read VLR user id", e))?;
548        let record_id = reader
549            .read_u16::<LittleEndian>()
550            .map_err(|e| Error::io("read VLR record id", e))?;
551        let record_length = reader
552            .read_u16::<LittleEndian>()
553            .map_err(|e| Error::io("read VLR length", e))?;
554        let mut description = [0u8; 32];
555        reader
556            .read_exact(&mut description)
557            .map_err(|e| Error::io("read VLR description", e))?;
558        let data_offset = reader
559            .stream_position()
560            .map_err(|e| Error::io("record VLR data offset", e))?;
561        let data_end = validate_range_in_file(
562            data_offset,
563            u64::from(record_length),
564            section_end,
565            "VLR data",
566        )?;
567        let user_id_str = trim_nul(&user_id).to_string();
568        if should_store_vlr(&user_id_str, record_id) {
569            let mut data = vec![0u8; usize::from(record_length)];
570            reader
571                .read_exact(&mut data)
572                .map_err(|e| Error::io("read VLR data", e))?;
573            vlrs.push(Vlr {
574                user_id: user_id_str,
575                record_id,
576                data,
577            });
578        } else {
579            reader
580                .seek(SeekFrom::Start(data_end))
581                .map_err(|e| Error::io("skip VLR data", e))?;
582        }
583        let actual_next = reader
584            .stream_position()
585            .map_err(|e| Error::io("record next VLR offset", e))?;
586        if actual_next != data_end {
587            return Err(Error::InvalidData(format!(
588                "VLR {index} cursor at {actual_next}, expected {data_end}"
589            )));
590        }
591    }
592    Ok(vlrs)
593}
594
595fn should_store_vlr(user_id: &str, record_id: u16) -> bool {
596    (user_id == "copc" && record_id == 1) || (user_id == "laszip encoded" && record_id == 22204)
597}
598
599fn read_evlr_refs<R: Read + Seek>(
600    reader: &mut R,
601    header: &LasHeader,
602    file_len: u64,
603) -> Result<Vec<EvlrRef>> {
604    if header.offset_to_first_evlr == 0 || header.number_of_evlrs == 0 {
605        return Ok(Vec::new());
606    }
607    if header.number_of_evlrs > MAX_EVLR_COUNT {
608        return Err(Error::InvalidData(format!(
609            "EVLR count {} exceeds max supported {}",
610            header.number_of_evlrs, MAX_EVLR_COUNT
611        )));
612    }
613    validate_range_in_file(
614        header.offset_to_first_evlr,
615        EVLR_HEADER_BYTES,
616        file_len,
617        "first EVLR header",
618    )?;
619    reader
620        .seek(SeekFrom::Start(header.offset_to_first_evlr))
621        .map_err(|e| Error::io("seek EVLRs", e))?;
622    let mut evlrs = Vec::new();
623    for index in 0..header.number_of_evlrs {
624        let header_start = reader
625            .stream_position()
626            .map_err(|e| Error::io("record EVLR offset", e))?;
627        validate_range_in_file(header_start, EVLR_HEADER_BYTES, file_len, "EVLR header")?;
628        let _reserved = reader
629            .read_u16::<LittleEndian>()
630            .map_err(|e| Error::io("read EVLR reserved", e))?;
631        let mut user_id = [0u8; 16];
632        reader
633            .read_exact(&mut user_id)
634            .map_err(|e| Error::io("read EVLR user id", e))?;
635        let record_id = reader
636            .read_u16::<LittleEndian>()
637            .map_err(|e| Error::io("read EVLR record id", e))?;
638        let data_len = reader
639            .read_u64::<LittleEndian>()
640            .map_err(|e| Error::io("read EVLR length", e))?;
641        let mut description = [0u8; 32];
642        reader
643            .read_exact(&mut description)
644            .map_err(|e| Error::io("read EVLR description", e))?;
645        let data_offset = reader
646            .stream_position()
647            .map_err(|e| Error::io("record EVLR data offset", e))?;
648        evlrs.push(EvlrRef {
649            user_id,
650            record_id,
651            data_offset,
652        });
653        let expected_next = validate_range_in_file(data_offset, data_len, file_len, "EVLR data")?;
654        reader
655            .seek(SeekFrom::Start(expected_next))
656            .map_err(|e| Error::io("skip EVLR data", e))?;
657        let actual_next = reader
658            .stream_position()
659            .map_err(|e| Error::io("record next EVLR offset", e))?;
660        if actual_next != expected_next {
661            return Err(Error::InvalidData(format!(
662                "EVLR {index} cursor at {actual_next}, expected {expected_next}"
663            )));
664        }
665    }
666    Ok(evlrs)
667}
668
669fn trim_nul(bytes: &[u8]) -> &str {
670    let end = bytes.iter().position(|b| *b == 0).unwrap_or(bytes.len());
671    std::str::from_utf8(&bytes[..end]).unwrap_or("")
672}
673
674#[cfg(test)]
675mod tests {
676    use super::*;
677
678    use byteorder::{LittleEndian, WriteBytesExt};
679    use copc_core::{EntryAvailability, HIERARCHY_ENTRY_BYTES};
680    use laz::LazVlrBuilder;
681    use std::io::{Cursor, Write};
682
683    #[test]
684    fn hierarchy_walk_loads_recursive_child_pages() {
685        let mut fixture = Cursor::new(copc_with_child_hierarchy_page());
686        let file = CopcFile::from_reader(&mut fixture).unwrap();
687        let child_key = VoxelKey::root().child(3);
688        let grandchild_key = child_key.child(5);
689
690        assert_eq!(file.root_hierarchy().entries().len(), 2);
691        assert!(file.root_hierarchy().entries()[1].is_child_page());
692
693        let hierarchy = file.hierarchy();
694        assert_eq!(hierarchy.len(), 3);
695        assert_eq!(
696            hierarchy
697                .get(&VoxelKey::root())
698                .unwrap()
699                .availability()
700                .unwrap(),
701            EntryAvailability::PointData { point_count: 5 }
702        );
703        assert_eq!(
704            hierarchy.get(&child_key).unwrap().availability().unwrap(),
705            EntryAvailability::PointData { point_count: 4 }
706        );
707        assert_eq!(
708            hierarchy
709                .get(&grandchild_key)
710                .unwrap()
711                .availability()
712                .unwrap(),
713            EntryAvailability::PointData { point_count: 3 }
714        );
715        assert!(!hierarchy.values().any(|entry| entry.is_child_page()));
716
717        let walk = file.hierarchy_walk();
718        assert_eq!(walk.len(), hierarchy.len());
719        assert_eq!(walk.iter().map(|entry| entry.point_count).sum::<i32>(), 12);
720    }
721
722    #[test]
723    fn rejects_excessive_vlr_count_before_allocation() {
724        let mut bytes = copc_with_child_hierarchy_page();
725        put_u32(&mut bytes, 100, MAX_VLR_COUNT + 1);
726
727        let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
728
729        assert!(err.to_string().contains("VLR count"));
730    }
731
732    #[test]
733    fn rejects_excessive_evlr_count_before_allocation() {
734        let mut bytes = copc_with_child_hierarchy_page();
735        put_u32(&mut bytes, 243, MAX_EVLR_COUNT + 1);
736
737        let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
738
739        assert!(err.to_string().contains("EVLR count"));
740    }
741
742    #[test]
743    fn rejects_oversized_root_hierarchy_page_before_allocation() {
744        let mut bytes = copc_with_child_hierarchy_page();
745        let copc_info_data = usize::from(LAS_HEADER_SIZE_14) + VLR_HEADER_BYTES as usize;
746        put_u64(
747            &mut bytes,
748            copc_info_data + 48,
749            MAX_HIERARCHY_PAGE_BYTES + HIERARCHY_ENTRY_BYTES as u64,
750        );
751
752        let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
753
754        assert!(err.to_string().contains("hierarchy page"));
755        assert!(err.to_string().contains("max supported"));
756    }
757
758    #[test]
759    fn rejects_child_hierarchy_page_outside_file() {
760        let mut bytes = copc_with_child_hierarchy_page();
761        let copc_info_data = usize::from(LAS_HEADER_SIZE_14) + VLR_HEADER_BYTES as usize;
762        let root_hier_offset = read_u64(&bytes, copc_info_data + 40) as usize;
763        let child_entry_offset_field = root_hier_offset + HIERARCHY_ENTRY_BYTES + 16;
764        let outside_file = bytes.len() as u64 + 1;
765        put_u64(&mut bytes, child_entry_offset_field, outside_file);
766
767        let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
768
769        assert!(err.to_string().contains("child hierarchy page"));
770        assert!(err.to_string().contains("exceeds file length"));
771    }
772
773    #[test]
774    fn rejects_header_offsets_outside_file_before_allocation() {
775        for (offset, value, expected) in [
776            (94, u64::from(u16::MAX), "LAS header size"),
777            (96, 1, "point data offset"),
778            (96, u64::MAX, "point data offset"),
779            (235, u64::MAX, "first EVLR offset"),
780        ] {
781            let mut bytes = copc_with_child_hierarchy_page();
782            put_int(&mut bytes, offset, value);
783
784            let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
785
786            assert!(
787                err.to_string().contains(expected),
788                "expected {expected:?}, got {err}"
789            );
790        }
791    }
792
793    #[test]
794    fn rejects_vlr_and_evlr_lengths_outside_file_before_allocation() {
795        let mut bytes = copc_with_child_hierarchy_page();
796        let first_vlr_length_field = usize::from(LAS_HEADER_SIZE_14) + 20;
797        put_u16(&mut bytes, first_vlr_length_field, u16::MAX);
798
799        let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
800
801        assert!(err.to_string().contains("VLR data"));
802        assert!(err.to_string().contains("exceeds file length"));
803
804        let mut bytes = copc_with_child_hierarchy_page();
805        let evlr_start = read_u64(&bytes, 235) as usize;
806        let evlr_length_field = evlr_start + 20;
807        put_u64(&mut bytes, evlr_length_field, u64::MAX);
808
809        let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
810
811        assert!(err.to_string().contains("EVLR data"));
812        assert!(
813            err.to_string().contains("overflow") || err.to_string().contains("exceeds file length")
814        );
815    }
816
817    #[test]
818    fn rejects_malformed_root_hierarchy_sizes() {
819        for (root_hier_size, expected) in [
820            (0, "empty"),
821            (HIERARCHY_ENTRY_BYTES as u64 - 1, "not a multiple"),
822        ] {
823            let mut bytes = copc_with_child_hierarchy_page();
824            let copc_info_data = usize::from(LAS_HEADER_SIZE_14) + VLR_HEADER_BYTES as usize;
825            put_u64(&mut bytes, copc_info_data + 48, root_hier_size);
826
827            let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
828
829            assert!(
830                err.to_string().contains(expected),
831                "expected {expected:?}, got {err}"
832            );
833        }
834    }
835
836    #[test]
837    fn rejects_invalid_hierarchy_entry_byte_sizes() {
838        let mut bytes = copc_with_child_hierarchy_page();
839        let copc_info_data = usize::from(LAS_HEADER_SIZE_14) + VLR_HEADER_BYTES as usize;
840        let root_hier_offset = read_u64(&bytes, copc_info_data + 40) as usize;
841        put_i32(&mut bytes, root_hier_offset + 24, 0);
842
843        let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
844
845        assert!(err.to_string().contains("point data entry"));
846        assert!(err.to_string().contains("invalid byte size"));
847
848        let mut bytes = copc_with_child_hierarchy_page();
849        let root_hier_offset = read_u64(&bytes, copc_info_data + 40) as usize;
850        put_i32(&mut bytes, root_hier_offset + HIERARCHY_ENTRY_BYTES + 24, 0);
851
852        let err = CopcFile::from_reader(&mut Cursor::new(bytes)).unwrap_err();
853
854        assert!(err.to_string().contains("child hierarchy page"));
855        assert!(err.to_string().contains("invalid byte size"));
856    }
857
858    #[test]
859    fn truncated_inputs_fail_without_panicking() {
860        let bytes = copc_with_child_hierarchy_page();
861        for len in [
862            0,
863            1,
864            4,
865            128,
866            usize::from(LAS_HEADER_SIZE_14) - 1,
867            usize::from(LAS_HEADER_SIZE_14),
868            bytes.len() / 2,
869            bytes.len() - 1,
870        ] {
871            let truncated = bytes[..len].to_vec();
872
873            let err = CopcFile::from_reader(&mut Cursor::new(truncated)).unwrap_err();
874
875            assert!(
876                !err.to_string().is_empty(),
877                "truncated input length {len} produced an empty error"
878            );
879        }
880    }
881
882    fn copc_with_child_hierarchy_page() -> Vec<u8> {
883        let mut laz_vlr_bytes = Vec::new();
884        LazVlrBuilder::default()
885            .with_point_format(6, 0)
886            .unwrap()
887            .with_variable_chunk_size()
888            .build()
889            .write_to(&mut laz_vlr_bytes)
890            .unwrap();
891
892        let offset_to_point_data = u32::from(LAS_HEADER_SIZE_14)
893            + (54 + copc_core::info::COPC_INFO_BYTES as u32)
894            + (54 + laz_vlr_bytes.len() as u32);
895        let root_point_offset = u64::from(offset_to_point_data);
896        let child_point_offset = root_point_offset + 100;
897        let grandchild_point_offset = child_point_offset + 200;
898        let evlr_start = grandchild_point_offset + 220;
899        let root_hier_offset = evlr_start + 60;
900        let root_hier_size = (2 * HIERARCHY_ENTRY_BYTES) as u64;
901        let child_page_offset = root_hier_offset + root_hier_size;
902
903        let child_key = VoxelKey::root().child(3);
904        let grandchild_key = child_key.child(5);
905        let child_page = HierarchyPage::new(vec![
906            Entry {
907                key: child_key,
908                offset: child_point_offset,
909                byte_size: 200,
910                point_count: 4,
911            },
912            Entry {
913                key: grandchild_key,
914                offset: grandchild_point_offset,
915                byte_size: 220,
916                point_count: 3,
917            },
918        ]);
919        let child_page_bytes = child_page.write_le_bytes().unwrap();
920        let root_page = HierarchyPage::new(vec![
921            Entry {
922                key: VoxelKey::root(),
923                offset: root_point_offset,
924                byte_size: 100,
925                point_count: 5,
926            },
927            Entry {
928                key: child_key,
929                offset: child_page_offset,
930                byte_size: child_page_bytes.len() as i32,
931                point_count: -1,
932            },
933        ]);
934        let root_page_bytes = root_page.write_le_bytes().unwrap();
935
936        let info = CopcInfo {
937            center: (0.0, 0.0, 0.0),
938            halfsize: 10.0,
939            spacing: 1.0,
940            root_hier_offset,
941            root_hier_size,
942            gpstime_min: 0.0,
943            gpstime_max: 0.0,
944        };
945
946        let mut out = Vec::new();
947        write_las_header(&mut out, offset_to_point_data, evlr_start, 12);
948        write_vlr(&mut out, "copc", 1, &info.write_le_bytes(), "COPC info");
949        write_vlr(
950            &mut out,
951            "laszip encoded",
952            22204,
953            &laz_vlr_bytes,
954            "http://laszip.org",
955        );
956        assert_eq!(out.len(), offset_to_point_data as usize);
957        out.resize(evlr_start as usize, 0);
958
959        write_evlr_header(
960            &mut out,
961            "copc",
962            1000,
963            root_page_bytes.len() as u64,
964            "COPC hierarchy",
965        );
966        assert_eq!(out.len() as u64, root_hier_offset);
967        out.extend_from_slice(&root_page_bytes);
968        assert_eq!(out.len() as u64, child_page_offset);
969        out.extend_from_slice(&child_page_bytes);
970        out
971    }
972
973    fn write_las_header(
974        out: &mut Vec<u8>,
975        offset_to_point_data: u32,
976        evlr_start: u64,
977        point_count: u64,
978    ) {
979        out.resize(usize::from(LAS_HEADER_SIZE_14), 0);
980        out[0..4].copy_from_slice(b"LASF");
981        out[24] = 1;
982        out[25] = 4;
983        put_u16(out, 94, LAS_HEADER_SIZE_14);
984        put_u32(out, 96, offset_to_point_data);
985        put_u32(out, 100, 2);
986        out[104] = 6 | 0x80;
987        put_u16(out, 105, 30);
988        put_f64(out, 131, 0.001);
989        put_f64(out, 139, 0.001);
990        put_f64(out, 147, 0.001);
991        put_f64(out, 155, 0.0);
992        put_f64(out, 163, 0.0);
993        put_f64(out, 171, 0.0);
994        put_f64(out, 179, 10.0);
995        put_f64(out, 187, -10.0);
996        put_f64(out, 195, 10.0);
997        put_f64(out, 203, -10.0);
998        put_f64(out, 211, 10.0);
999        put_f64(out, 219, -10.0);
1000        put_u64(out, 235, evlr_start);
1001        put_u32(out, 243, 1);
1002        put_u64(out, 247, point_count);
1003    }
1004
1005    fn write_vlr(out: &mut Vec<u8>, user_id: &str, record_id: u16, data: &[u8], desc: &str) {
1006        out.write_u16::<LittleEndian>(0).unwrap();
1007        out.write_all(&padded(user_id.as_bytes(), 16)).unwrap();
1008        out.write_u16::<LittleEndian>(record_id).unwrap();
1009        out.write_u16::<LittleEndian>(data.len() as u16).unwrap();
1010        out.write_all(&padded(desc.as_bytes(), 32)).unwrap();
1011        out.write_all(data).unwrap();
1012    }
1013
1014    fn write_evlr_header(
1015        out: &mut Vec<u8>,
1016        user_id: &str,
1017        record_id: u16,
1018        data_len: u64,
1019        desc: &str,
1020    ) {
1021        out.write_u16::<LittleEndian>(0).unwrap();
1022        out.write_all(&padded(user_id.as_bytes(), 16)).unwrap();
1023        out.write_u16::<LittleEndian>(record_id).unwrap();
1024        out.write_u64::<LittleEndian>(data_len).unwrap();
1025        out.write_all(&padded(desc.as_bytes(), 32)).unwrap();
1026    }
1027
1028    fn padded(bytes: &[u8], len: usize) -> Vec<u8> {
1029        let mut out = vec![0u8; len];
1030        let count = bytes.len().min(len);
1031        out[..count].copy_from_slice(&bytes[..count]);
1032        out
1033    }
1034
1035    fn put_u16(out: &mut [u8], offset: usize, value: u16) {
1036        out[offset..offset + 2].copy_from_slice(&value.to_le_bytes());
1037    }
1038
1039    fn put_u32(out: &mut [u8], offset: usize, value: u32) {
1040        out[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
1041    }
1042
1043    fn put_u64(out: &mut [u8], offset: usize, value: u64) {
1044        out[offset..offset + 8].copy_from_slice(&value.to_le_bytes());
1045    }
1046
1047    fn put_i32(out: &mut [u8], offset: usize, value: i32) {
1048        out[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
1049    }
1050
1051    fn put_int(out: &mut [u8], offset: usize, value: u64) {
1052        match offset {
1053            94 => put_u16(out, offset, value as u16),
1054            96 => put_u32(out, offset, value as u32),
1055            235 => put_u64(out, offset, value),
1056            _ => unreachable!("unexpected integer offset"),
1057        }
1058    }
1059
1060    fn read_u64(bytes: &[u8], offset: usize) -> u64 {
1061        u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap())
1062    }
1063
1064    fn put_f64(out: &mut [u8], offset: usize, value: f64) {
1065        out[offset..offset + 8].copy_from_slice(&value.to_le_bytes());
1066    }
1067}