Skip to main content

copc_writer/
writer.rs

1use std::fs::File;
2use std::io::{BufReader, BufWriter, Cursor, Seek, SeekFrom, Write};
3use std::path::Path;
4
5use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
6use copc_core::{
7    Bounds, CancelCheck, ColumnData, CopcInfo, Entry, Error, LasColumnBatch, LasDimension,
8    LasPointRecord, NeverCancel, Result, StreamingLayout, VoxelKey, HIERARCHY_ENTRY_BYTES,
9};
10use las::{point::Format as LasFormat, raw, Color, Read as _};
11use laz::{LasZipCompressor, LazVlrBuilder};
12use tempfile::{NamedTempFile, TempPath};
13
14use crate::spill::{SpillReader, SpillWriter};
15
16const CANCEL_POLL_STRIDE: usize = 4_096;
17const HIERARCHY_PAGE_MAX_ENTRIES: usize = 4_096;
18const INDEX_RECORD_BYTES: u64 = 4;
19/// Depth bounds for the LOD octree. The layered LAZ compressor buffers an
20/// entire COPC chunk (one octree node) in memory before flushing, so a node
21/// holding far more than `max_points_per_node` points costs proportional
22/// memory. A too-shallow `max_depth` over a dense cluster stops subdivision
23/// while a node is still huge — the multi-gigabyte failure mode on real clouds.
24/// Clamping `max_depth` up to `MIN_LEAF_DEPTH` keeps nodes subdividing until
25/// they fit in a chunk; realistic clouds reach that far shallower, so it only
26/// affects pathologically dense input. `MAX_LEAF_DEPTH` keeps voxel keys in
27/// range.
28const MIN_LEAF_DEPTH: u32 = 21;
29const MAX_LEAF_DEPTH: u32 = 30;
30const LAS_14_SCAN_ANGLE_SCALE: f32 = 0.006;
31const LAS_VLR_HEADER_BYTES: u32 = 54;
32const LAS_EVLR_HEADER_BYTES: u64 = 60;
33const LASZIP_VLR_USER_ID: &str = "laszip encoded";
34const LASZIP_VLR_RECORD_ID: u16 = 22204;
35const LASF_PROJECTION_USER_ID: &str = "LASF_Projection";
36const WKT_CRS_RECORD_ID: u16 = 2112;
37const GEOTIFF_GEO_KEY_DIRECTORY_RECORD_ID: u16 = 34735;
38const GEOTIFF_DOUBLE_PARAMS_RECORD_ID: u16 = 34736;
39const GEOTIFF_ASCII_PARAMS_RECORD_ID: u16 = 34737;
40const LASF_SPEC_USER_ID: &str = "LASF_Spec";
41const EXTRA_BYTES_RECORD_ID: u16 = 4;
42const WKT_GLOBAL_ENCODING_BIT: u16 = 16;
43const LAS_INPUT_BUFFER_BYTES: usize = 1024 * 1024;
44const COPC_OUTPUT_BUFFER_BYTES: usize = 1024 * 1024;
45const INDEX_IO_BUFFER_BYTES: usize = 1024 * 1024;
46
47/// Normalized point fields consumed by the COPC writer.
48#[derive(Clone, Debug, PartialEq)]
49pub struct CopcPointFields {
50    pub x: f64,
51    pub y: f64,
52    pub z: f64,
53    pub intensity: u16,
54    pub return_number: u8,
55    pub number_of_returns: u8,
56    pub synthetic: u8,
57    pub key_point: u8,
58    pub withheld: u8,
59    pub overlap: u8,
60    pub scan_channel: u8,
61    pub scan_direction_flag: u8,
62    pub edge_of_flight_line: u8,
63    pub classification: u8,
64    pub user_data: u8,
65    /// Scan angle in degrees; encoded as LAS 1.4 scaled scan angle on write.
66    pub scan_angle: f32,
67    pub point_source_id: u16,
68    pub gps_time: f64,
69    pub red: u16,
70    pub green: u16,
71    pub blue: u16,
72    pub extra_bytes: Vec<u8>,
73}
74
75/// Abstract point-data source for COPC emission.
76pub trait CopcPointSource {
77    fn len(&self) -> usize;
78    fn xyz(&self, index: usize) -> (f64, f64, f64);
79    fn fields(&self, index: usize) -> Result<CopcPointFields>;
80    fn extra_byte_count(&self) -> u16 {
81        0
82    }
83    fn extra_bytes_vlrs(&self) -> &[las::Vlr] {
84        &[]
85    }
86
87    fn is_empty(&self) -> bool {
88        self.len() == 0
89    }
90}
91
92/// COPC writer source backed directly by a neutral LAS column batch.
93pub struct ColumnBatchSource<'a> {
94    batch: &'a LasColumnBatch,
95    x: &'a [f64],
96    y: &'a [f64],
97    z: &'a [f64],
98    intensity: Option<&'a [u16]>,
99    return_number: Option<&'a [u8]>,
100    number_of_returns: Option<&'a [u8]>,
101    synthetic: Option<&'a [bool]>,
102    key_point: Option<&'a [bool]>,
103    withheld: Option<&'a [bool]>,
104    overlap: Option<&'a [bool]>,
105    scan_channel: Option<&'a [u8]>,
106    scan_direction_flag: Option<&'a [bool]>,
107    edge_of_flight_line: Option<&'a [bool]>,
108    classification: Option<&'a [u8]>,
109    user_data: Option<&'a [u8]>,
110    scan_angle_rank: Option<&'a [i16]>,
111    point_source_id: Option<&'a [u16]>,
112    gps_time: Option<&'a [f64]>,
113    red: Option<&'a [u16]>,
114    green: Option<&'a [u16]>,
115    blue: Option<&'a [u16]>,
116    extra_bytes: Option<(&'a [u8], usize)>,
117}
118
119impl<'a> ColumnBatchSource<'a> {
120    pub fn new(batch: &'a LasColumnBatch) -> Result<Self> {
121        batch.validate()?;
122        validate_column_batch_writer_support(batch)?;
123
124        let x = required_f64_column(batch, LasDimension::X)?;
125        let y = required_f64_column(batch, LasDimension::Y)?;
126        let z = required_f64_column(batch, LasDimension::Z)?;
127        let red = optional_u16_column(batch, LasDimension::Red)?;
128        let green = optional_u16_column(batch, LasDimension::Green)?;
129        let blue = optional_u16_column(batch, LasDimension::Blue)?;
130        let extra_bytes = optional_extra_bytes_column(batch)?;
131        validate_color_columns(red, green, blue)?;
132
133        Ok(Self {
134            batch,
135            x,
136            y,
137            z,
138            intensity: optional_u16_column(batch, LasDimension::Intensity)?,
139            return_number: optional_u8_column(batch, LasDimension::ReturnNumber)?,
140            number_of_returns: optional_u8_column(batch, LasDimension::NumberOfReturns)?,
141            synthetic: optional_bool_column(batch, LasDimension::Synthetic)?,
142            key_point: optional_bool_column(batch, LasDimension::KeyPoint)?,
143            withheld: optional_bool_column(batch, LasDimension::Withheld)?,
144            overlap: optional_bool_column(batch, LasDimension::Overlap)?,
145            scan_channel: optional_u8_column(batch, LasDimension::ScanChannel)?,
146            scan_direction_flag: optional_bool_column(batch, LasDimension::ScanDirectionFlag)?,
147            edge_of_flight_line: optional_bool_column(batch, LasDimension::EdgeOfFlightLine)?,
148            classification: optional_u8_column(batch, LasDimension::Classification)?,
149            user_data: optional_u8_column(batch, LasDimension::UserData)?,
150            scan_angle_rank: optional_i16_column(batch, LasDimension::ScanAngleRank)?,
151            point_source_id: optional_u16_column(batch, LasDimension::PointSourceId)?,
152            gps_time: optional_f64_column(batch, LasDimension::GpsTime)?,
153            red,
154            green,
155            blue,
156            extra_bytes,
157        })
158    }
159
160    pub fn batch(&self) -> &LasColumnBatch {
161        self.batch
162    }
163
164    pub fn has_color(&self) -> bool {
165        self.red.is_some() && self.green.is_some() && self.blue.is_some()
166    }
167
168    pub fn extra_byte_width(&self) -> usize {
169        self.extra_bytes.map(|(_, width)| width).unwrap_or(0)
170    }
171
172    pub fn bounds(&self) -> Result<Bounds> {
173        if self.is_empty() {
174            return Err(Error::InvalidInput(
175                "cannot compute bounds for empty column batch".into(),
176            ));
177        }
178        let mut bounds = Bounds::point(self.x[0], self.y[0], self.z[0]);
179        for index in 1..self.len() {
180            bounds.extend(self.x[index], self.y[index], self.z[index]);
181        }
182        Ok(bounds)
183    }
184}
185
186impl CopcPointSource for ColumnBatchSource<'_> {
187    fn len(&self) -> usize {
188        self.batch.len()
189    }
190
191    #[inline]
192    fn xyz(&self, index: usize) -> (f64, f64, f64) {
193        (self.x[index], self.y[index], self.z[index])
194    }
195
196    fn fields(&self, index: usize) -> Result<CopcPointFields> {
197        Ok(CopcPointFields {
198            x: self.x[index],
199            y: self.y[index],
200            z: self.z[index],
201            intensity: at_u16(self.intensity, index),
202            return_number: at_u8(self.return_number, index),
203            number_of_returns: at_u8(self.number_of_returns, index),
204            synthetic: at_bool_u8(self.synthetic, index),
205            key_point: at_bool_u8(self.key_point, index),
206            withheld: at_bool_u8(self.withheld, index),
207            overlap: at_bool_u8(self.overlap, index),
208            scan_channel: at_u8(self.scan_channel, index),
209            scan_direction_flag: at_bool_u8(self.scan_direction_flag, index),
210            edge_of_flight_line: at_bool_u8(self.edge_of_flight_line, index),
211            classification: at_u8(self.classification, index),
212            user_data: at_u8(self.user_data, index),
213            scan_angle: self
214                .scan_angle_rank
215                .map(|column| column[index] as f32 * 90.0 / 180.0)
216                .unwrap_or(0.0),
217            point_source_id: at_u16(self.point_source_id, index),
218            gps_time: self.gps_time.map(|column| column[index]).unwrap_or(0.0),
219            red: at_u16(self.red, index),
220            green: at_u16(self.green, index),
221            blue: at_u16(self.blue, index),
222            extra_bytes: extra_bytes_at(self.extra_bytes, index),
223        })
224    }
225
226    fn extra_byte_count(&self) -> u16 {
227        self.extra_byte_width() as u16
228    }
229}
230
231fn at_bool_u8(column: Option<&[bool]>, index: usize) -> u8 {
232    column.map(|values| u8::from(values[index])).unwrap_or(0)
233}
234
235fn at_u8(column: Option<&[u8]>, index: usize) -> u8 {
236    column.map(|values| values[index]).unwrap_or(0)
237}
238
239fn at_u16(column: Option<&[u16]>, index: usize) -> u16 {
240    column.map(|values| values[index]).unwrap_or(0)
241}
242
243fn validate_column_batch_writer_support(batch: &LasColumnBatch) -> Result<()> {
244    let unsupported: Vec<_> = batch
245        .columns
246        .iter()
247        .filter_map(|(spec, _)| match spec.dimension {
248            LasDimension::Nir => Some("NIR point data"),
249            LasDimension::WaveformPacketDescriptorIndex
250            | LasDimension::WaveformPacketByteOffset
251            | LasDimension::WaveformPacketSize
252            | LasDimension::WavePacketReturnPointWaveformLocation => Some("waveform point data"),
253            _ => None,
254        })
255        .collect();
256    if unsupported.is_empty() {
257        Ok(())
258    } else {
259        Err(Error::Unsupported(format!(
260            "COPC writer cannot preserve {}",
261            unsupported.join(", ")
262        )))
263    }
264}
265
266fn validate_color_columns(
267    red: Option<&[u16]>,
268    green: Option<&[u16]>,
269    blue: Option<&[u16]>,
270) -> Result<()> {
271    let present =
272        usize::from(red.is_some()) + usize::from(green.is_some()) + usize::from(blue.is_some());
273    if present == 0 || present == 3 {
274        Ok(())
275    } else {
276        Err(Error::InvalidInput(
277            "Red, Green, and Blue columns must be supplied together".into(),
278        ))
279    }
280}
281
282fn required_f64_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<&[f64]> {
283    match batch.column(dimension) {
284        Some(ColumnData::F64(values)) => Ok(values),
285        Some(other) => Err(unexpected_column_type(dimension, "F64", other)),
286        None => Err(Error::InvalidInput(format!(
287            "ColumnBatchSource requires {dimension:?} column"
288        ))),
289    }
290}
291
292fn optional_f64_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<Option<&[f64]>> {
293    match batch.column(dimension) {
294        Some(ColumnData::F64(values)) => Ok(Some(values)),
295        Some(other) => Err(unexpected_column_type(dimension, "F64", other)),
296        None => Ok(None),
297    }
298}
299
300fn optional_i16_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<Option<&[i16]>> {
301    match batch.column(dimension) {
302        Some(ColumnData::I16(values)) => Ok(Some(values)),
303        Some(other) => Err(unexpected_column_type(dimension, "I16", other)),
304        None => Ok(None),
305    }
306}
307
308fn optional_u16_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<Option<&[u16]>> {
309    match batch.column(dimension) {
310        Some(ColumnData::U16(values)) => Ok(Some(values)),
311        Some(other) => Err(unexpected_column_type(dimension, "U16", other)),
312        None => Ok(None),
313    }
314}
315
316fn optional_u8_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<Option<&[u8]>> {
317    match batch.column(dimension) {
318        Some(ColumnData::U8(values)) => Ok(Some(values)),
319        Some(other) => Err(unexpected_column_type(dimension, "U8", other)),
320        None => Ok(None),
321    }
322}
323
324fn optional_extra_bytes_column(batch: &LasColumnBatch) -> Result<Option<(&[u8], usize)>> {
325    let mut extra_bytes = None;
326    for (spec, data) in &batch.columns {
327        if spec.dimension != LasDimension::ExtraBytes {
328            continue;
329        }
330        let width = spec.extra_byte_width().ok_or_else(|| {
331            Error::InvalidInput("ExtraBytes column requires a non-zero byte width".into())
332        })?;
333        if width > usize::from(u16::MAX) {
334            return Err(Error::InvalidInput(format!(
335                "ExtraBytes column width {width} exceeds LAS u16 range"
336            )));
337        }
338        let values = match data {
339            ColumnData::U8(values) => values.as_slice(),
340            other => {
341                return Err(unexpected_column_type(
342                    LasDimension::ExtraBytes,
343                    "U8",
344                    other,
345                ))
346            }
347        };
348        if extra_bytes.replace((values, width)).is_some() {
349            return Err(Error::InvalidInput(
350                "ColumnBatchSource supports at most one ExtraBytes column".into(),
351            ));
352        }
353    }
354    Ok(extra_bytes)
355}
356
357fn extra_bytes_at(column: Option<(&[u8], usize)>, index: usize) -> Vec<u8> {
358    match column {
359        Some((values, width)) => {
360            let start = index * width;
361            values[start..start + width].to_vec()
362        }
363        None => Vec::new(),
364    }
365}
366
367fn optional_bool_column(
368    batch: &LasColumnBatch,
369    dimension: LasDimension,
370) -> Result<Option<&[bool]>> {
371    match batch.column(dimension) {
372        Some(ColumnData::Bool(values)) => Ok(Some(values)),
373        Some(other) => Err(unexpected_column_type(dimension, "Bool", other)),
374        None => Ok(None),
375    }
376}
377
378fn unexpected_column_type(dimension: LasDimension, expected: &str, actual: &ColumnData) -> Error {
379    Error::InvalidInput(format!(
380        "{dimension:?} column must be {expected}, found {:?}",
381        actual.scalar()
382    ))
383}
384
385struct SpillSource<'a> {
386    reader: &'a SpillReader,
387}
388
389impl CopcPointSource for SpillSource<'_> {
390    fn len(&self) -> usize {
391        self.reader.len()
392    }
393
394    #[inline]
395    fn xyz(&self, index: usize) -> (f64, f64, f64) {
396        self.reader.xyz_at(index)
397    }
398
399    fn fields(&self, index: usize) -> Result<CopcPointFields> {
400        let record = self.reader.record_at(index)?;
401        Ok(CopcPointFields {
402            x: record.x,
403            y: record.y,
404            z: record.z,
405            intensity: record.intensity,
406            return_number: record.return_number,
407            number_of_returns: record.number_of_returns,
408            synthetic: u8::from(record.synthetic),
409            key_point: u8::from(record.key_point),
410            withheld: u8::from(record.withheld),
411            overlap: u8::from(record.overlap),
412            scan_channel: record.scan_channel,
413            scan_direction_flag: u8::from(record.scan_direction_flag),
414            edge_of_flight_line: u8::from(record.edge_of_flight_line),
415            classification: record.classification,
416            user_data: record.user_data,
417            scan_angle: record.scan_angle,
418            point_source_id: record.point_source_id,
419            gps_time: record.gps_time,
420            red: record.red,
421            green: record.green,
422            blue: record.blue,
423            extra_bytes: record.extra_bytes,
424        })
425    }
426
427    fn extra_byte_count(&self) -> u16 {
428        self.reader.layout().extra_bytes
429    }
430
431    fn extra_bytes_vlrs(&self) -> &[las::Vlr] {
432        &self.reader.layout().extra_bytes_descriptors
433    }
434}
435
436#[derive(Clone, Debug)]
437struct OutputCrsRecord {
438    vlr: las::Vlr,
439    is_extended: bool,
440}
441
442#[derive(Clone, Debug)]
443struct OutputLasMetadata {
444    file_source_id: u16,
445    global_encoding: u16,
446    guid: [u8; 16],
447    system_identifier: String,
448    generating_software: String,
449    creation_day_of_year: u16,
450    creation_year: u16,
451    scale: (f64, f64, f64),
452    offset: Option<(f64, f64, f64)>,
453    crs_records: Vec<OutputCrsRecord>,
454    pass_through_vlrs: Vec<las::Vlr>,
455    pass_through_evlrs: Vec<las::Vlr>,
456}
457
458impl Default for OutputLasMetadata {
459    fn default() -> Self {
460        Self {
461            file_source_id: 0,
462            global_encoding: 0,
463            guid: [0; 16],
464            system_identifier: "copc-rust".to_string(),
465            generating_software: "copc-writer".to_string(),
466            creation_day_of_year: 0,
467            creation_year: 2026,
468            scale: (0.001, 0.001, 0.001),
469            offset: None,
470            crs_records: Vec::new(),
471            pass_through_vlrs: Vec::new(),
472            pass_through_evlrs: Vec::new(),
473        }
474    }
475}
476
477impl OutputLasMetadata {
478    fn from_las_header(header: &las::Header, source_evlrs: &[las::Vlr]) -> Self {
479        let mut global_encoding = u16::from(header.gps_time_type());
480        if header.has_synthetic_return_numbers() {
481            global_encoding |= 8;
482        }
483        let crs_records = extract_source_wkt_crs_records(header, source_evlrs);
484        if !crs_records.is_empty() {
485            global_encoding |= WKT_GLOBAL_ENCODING_BIT;
486        }
487        let pass_through_vlrs = extract_pass_through_vlrs(header);
488        let pass_through_evlrs = extract_pass_through_evlrs(source_evlrs);
489        let transforms = header.transforms();
490        let (creation_day_of_year, creation_year) = header
491            .date()
492            .map(|date| {
493                let year = date.format("%Y").to_string().parse().unwrap_or(0);
494                let day = date.format("%j").to_string().parse().unwrap_or(0);
495                (day, year)
496            })
497            .unwrap_or((0, 0));
498
499        Self {
500            file_source_id: header.file_source_id(),
501            global_encoding,
502            guid: *header.guid().as_bytes(),
503            system_identifier: header.system_identifier().to_string(),
504            generating_software: header.generating_software().to_string(),
505            creation_day_of_year,
506            creation_year,
507            scale: (transforms.x.scale, transforms.y.scale, transforms.z.scale),
508            offset: Some((
509                transforms.x.offset,
510                transforms.y.offset,
511                transforms.z.offset,
512            )),
513            crs_records,
514            pass_through_vlrs,
515            pass_through_evlrs,
516        }
517    }
518
519    fn regular_crs_vlrs(&self) -> impl Iterator<Item = &las::Vlr> {
520        self.crs_records
521            .iter()
522            .filter(|record| !record.is_extended)
523            .map(|record| &record.vlr)
524    }
525
526    fn extended_crs_evlrs(&self) -> impl Iterator<Item = &las::Vlr> {
527        self.crs_records
528            .iter()
529            .filter(|record| record.is_extended)
530            .map(|record| &record.vlr)
531    }
532
533    fn regular_crs_vlr_count(&self) -> usize {
534        self.crs_records
535            .iter()
536            .filter(|record| !record.is_extended)
537            .count()
538    }
539
540    fn extended_crs_evlr_count(&self) -> usize {
541        self.crs_records
542            .iter()
543            .filter(|record| record.is_extended)
544            .count()
545    }
546
547    fn regular_crs_vlr_bytes(&self) -> Result<u32> {
548        self.regular_crs_vlrs().try_fold(0u32, |total, vlr| {
549            let data_len = u16::try_from(vlr.data.len()).map_err(|_| {
550                Error::InvalidInput(format!(
551                    "regular WKT CRS VLR is too large: {} byte(s)",
552                    vlr.data.len()
553                ))
554            })?;
555            total
556                .checked_add(LAS_VLR_HEADER_BYTES + u32::from(data_len))
557                .ok_or_else(|| Error::InvalidInput("CRS VLR byte size overflow".into()))
558        })
559    }
560
561    fn source_evlrs_after_hierarchy(&self) -> impl Iterator<Item = &las::Vlr> {
562        self.extended_crs_evlrs()
563            .chain(self.pass_through_evlrs.iter())
564    }
565
566    fn source_evlr_count_after_hierarchy(&self) -> usize {
567        self.extended_crs_evlr_count() + self.pass_through_evlrs.len()
568    }
569}
570
571#[derive(Clone, Copy, Debug, PartialEq)]
572struct PointStats {
573    gpstime_min: f64,
574    gpstime_max: f64,
575    extended_return_counts: [u64; 15],
576}
577
578impl PointStats {
579    fn new() -> Self {
580        Self {
581            gpstime_min: f64::INFINITY,
582            gpstime_max: f64::NEG_INFINITY,
583            extended_return_counts: [0; 15],
584        }
585    }
586
587    fn record(&mut self, index: usize, fields: &CopcPointFields) -> Result<()> {
588        validate_finite_value(&format!("point {index} GPS time"), fields.gps_time)?;
589        self.gpstime_min = self.gpstime_min.min(fields.gps_time);
590        self.gpstime_max = self.gpstime_max.max(fields.gps_time);
591        if (1..=15).contains(&fields.return_number) {
592            self.extended_return_counts[usize::from(fields.return_number - 1)] += 1;
593        }
594        Ok(())
595    }
596}
597
598#[derive(Debug, Clone, Copy)]
599pub struct CopcWriterParams {
600    pub max_points_per_node: u32,
601    pub max_depth: u32,
602}
603
604impl Default for CopcWriterParams {
605    fn default() -> Self {
606        Self {
607            max_points_per_node: 100_000,
608            max_depth: 8,
609        }
610    }
611}
612
613pub fn write_source<S: CopcPointSource>(
614    path: &Path,
615    source: &S,
616    has_color: bool,
617    bounds: Bounds,
618    params: &CopcWriterParams,
619) -> Result<()> {
620    write_source_with_cancel(path, source, has_color, bounds, params, &NeverCancel)
621}
622
623pub fn write_source_with_cancel<S: CopcPointSource>(
624    path: &Path,
625    source: &S,
626    has_color: bool,
627    bounds: Bounds,
628    params: &CopcWriterParams,
629    cancel: &dyn CancelCheck,
630) -> Result<()> {
631    cancel.check()?;
632    if source.is_empty() {
633        return Err(Error::InvalidInput(
634            "cannot write empty cloud to COPC".into(),
635        ));
636    }
637    write_copc_inner(
638        path,
639        source,
640        has_color,
641        bounds,
642        params,
643        cancel,
644        &OutputLasMetadata::default(),
645    )
646}
647
648pub fn write_streaming_with_cancel<I>(
649    path: &Path,
650    layout: StreamingLayout,
651    points: I,
652    params: &CopcWriterParams,
653    spill_dir: &Path,
654    cancel: &dyn CancelCheck,
655) -> Result<()>
656where
657    I: IntoIterator<Item = Result<LasPointRecord>>,
658{
659    cancel.check()?;
660    validate_streaming_layout_supported(&layout)?;
661    let mut spill = SpillWriter::create(spill_dir, layout)?;
662    for (index, item) in points.into_iter().enumerate() {
663        if index % CANCEL_POLL_STRIDE == 0 {
664            cancel.check()?;
665        }
666        let record = item?;
667        validate_record_coordinates(&record, index)?;
668        spill.push(&record)?;
669    }
670    cancel.check()?;
671    let reader = spill.finalize()?;
672    write_copc_from_spill(path, reader, params, cancel, &OutputLasMetadata::default())
673}
674
675pub fn convert_las_to_copc_streaming(
676    las_path: &Path,
677    copc_path: &Path,
678    params: &CopcWriterParams,
679    spill_dir: &Path,
680    cancel: &dyn CancelCheck,
681) -> Result<()> {
682    cancel.check()?;
683    let las_file = File::open(las_path).map_err(|e| Error::io("open source LAS/LAZ", e))?;
684    let mut reader = las::Reader::new(BufReader::with_capacity(LAS_INPUT_BUFFER_BYTES, las_file))
685        .map_err(|e| Error::Las(e.to_string()))?;
686    let source_evlrs = read_all_source_evlrs(las_path)?;
687    validate_las_conversion_supported(reader.header(), &source_evlrs)?;
688    let output_metadata = OutputLasMetadata::from_las_header(reader.header(), &source_evlrs);
689    let layout = StreamingLayout::from_las_header(reader.header());
690    let mut spill = SpillWriter::create(spill_dir, layout)?;
691    for (index, result) in reader.points().enumerate() {
692        if index % CANCEL_POLL_STRIDE == 0 {
693            cancel.check()?;
694        }
695        let point = result.map_err(|e| Error::Las(e.to_string()))?;
696        let record = LasPointRecord::from_las_point(&point);
697        validate_record_coordinates(&record, index)?;
698        spill.push(&record)?;
699    }
700    cancel.check()?;
701    let reader = spill.finalize()?;
702    write_copc_from_spill(copc_path, reader, params, cancel, &output_metadata)
703}
704
705fn read_all_source_evlrs(path: &Path) -> Result<Vec<las::Vlr>> {
706    let mut file = File::open(path).map_err(|e| Error::io("open source LAS/LAZ", e))?;
707    let raw_header =
708        raw::Header::read_from(&mut file).map_err(|e| Error::Las(format!("source header: {e}")))?;
709    let Some(evlr_header) = raw_header.evlr else {
710        return Ok(Vec::new());
711    };
712
713    file.seek(SeekFrom::Start(evlr_header.start_of_first_evlr))
714        .map_err(|e| Error::io("seek source EVLRs", e))?;
715    let evlr_count = usize::try_from(evlr_header.number_of_evlrs)
716        .map_err(|_| Error::InvalidInput("source EVLR count overflows usize".into()))?;
717    let mut evlrs = Vec::with_capacity(evlr_count);
718    for index in 0..evlr_header.number_of_evlrs {
719        let evlr = raw::Vlr::read_from(&mut file, true)
720            .map(las::Vlr::new)
721            .map_err(|e| Error::Las(format!("source EVLR {index}: {e}")))?;
722        evlrs.push(evlr);
723    }
724    Ok(evlrs)
725}
726
727fn validate_streaming_layout_supported(layout: &StreamingLayout) -> Result<()> {
728    let mut unsupported = Vec::new();
729    if layout.has_nir {
730        unsupported.push("NIR point data".to_string());
731    }
732    if layout.has_waveform {
733        unsupported.push("waveform point data".to_string());
734    }
735    if unsupported.is_empty() {
736        Ok(())
737    } else {
738        Err(Error::Unsupported(format!(
739            "COPC writer cannot preserve {}",
740            unsupported.join(", ")
741        )))
742    }
743}
744
745fn validate_las_conversion_supported(
746    header: &las::Header,
747    source_evlrs: &[las::Vlr],
748) -> Result<()> {
749    let mut unsupported = Vec::new();
750    let format = header.point_format();
751    if format.has_nir {
752        unsupported.push("NIR point data".to_string());
753    }
754    if format.has_waveform {
755        unsupported.push("waveform point data".to_string());
756    }
757    let source_has_wkt_crs_record = has_wkt_crs_record(header, source_evlrs);
758    let mut geotiff_crs_record_count = 0usize;
759    for vlr in header.vlrs() {
760        if is_geotiff_crs_vlr(vlr) && !source_has_wkt_crs_record {
761            geotiff_crs_record_count += 1;
762        }
763    }
764    for evlr in source_evlrs {
765        if is_geotiff_crs_vlr(evlr) && !source_has_wkt_crs_record {
766            geotiff_crs_record_count += 1;
767        }
768    }
769    if geotiff_crs_record_count > 0 {
770        unsupported.push(format!(
771            "{geotiff_crs_record_count} GeoTIFF CRS VLR/EVLR(s); GeoTIFF-to-WKT CRS conversion is not implemented in copc-writer"
772        ));
773    }
774
775    if unsupported.is_empty() {
776        Ok(())
777    } else {
778        Err(Error::Unsupported(format!(
779            "LAS-to-COPC streaming conversion cannot preserve {}",
780            unsupported.join(", ")
781        )))
782    }
783}
784
785fn is_laszip_vlr(vlr: &las::Vlr) -> bool {
786    vlr.user_id == LASZIP_VLR_USER_ID && vlr.record_id == LASZIP_VLR_RECORD_ID
787}
788
789fn is_copc_info_vlr(vlr: &las::Vlr) -> bool {
790    vlr.user_id == "copc" && vlr.record_id == 1
791}
792
793fn is_copc_hierarchy_evlr(vlr: &las::Vlr) -> bool {
794    vlr.user_id == "copc" && vlr.record_id == 1000
795}
796
797fn is_wkt_crs_vlr(vlr: &las::Vlr) -> bool {
798    vlr.user_id == LASF_PROJECTION_USER_ID && vlr.record_id == WKT_CRS_RECORD_ID
799}
800
801fn is_geotiff_crs_vlr(vlr: &las::Vlr) -> bool {
802    vlr.user_id == LASF_PROJECTION_USER_ID
803        && matches!(
804            vlr.record_id,
805            GEOTIFF_GEO_KEY_DIRECTORY_RECORD_ID
806                | GEOTIFF_DOUBLE_PARAMS_RECORD_ID
807                | GEOTIFF_ASCII_PARAMS_RECORD_ID
808        )
809}
810
811fn is_extra_bytes_descriptor_vlr(vlr: &las::Vlr) -> bool {
812    vlr.user_id == LASF_SPEC_USER_ID && vlr.record_id == EXTRA_BYTES_RECORD_ID
813}
814
815fn has_wkt_crs_record(header: &las::Header, source_evlrs: &[las::Vlr]) -> bool {
816    header.vlrs().iter().any(is_wkt_crs_vlr) || source_evlrs.iter().any(is_wkt_crs_vlr)
817}
818
819fn extract_source_wkt_crs_records(
820    header: &las::Header,
821    source_evlrs: &[las::Vlr],
822) -> Vec<OutputCrsRecord> {
823    let mut records = Vec::new();
824    for vlr in header.vlrs() {
825        if is_wkt_crs_vlr(vlr) {
826            records.push(OutputCrsRecord {
827                vlr: vlr.clone(),
828                is_extended: false,
829            });
830        }
831    }
832    for evlr in source_evlrs {
833        if is_wkt_crs_vlr(evlr) {
834            records.push(OutputCrsRecord {
835                vlr: evlr.clone(),
836                is_extended: true,
837            });
838        }
839    }
840    records
841}
842
843fn extract_pass_through_vlrs(header: &las::Header) -> Vec<las::Vlr> {
844    header
845        .vlrs()
846        .iter()
847        .filter(|vlr| !is_laszip_vlr(vlr))
848        .filter(|vlr| !is_copc_info_vlr(vlr))
849        .filter(|vlr| !is_copc_hierarchy_evlr(vlr))
850        .filter(|vlr| !is_wkt_crs_vlr(vlr))
851        .filter(|vlr| !is_geotiff_crs_vlr(vlr))
852        .filter(|vlr| !is_extra_bytes_descriptor_vlr(vlr))
853        .cloned()
854        .collect()
855}
856
857fn extract_pass_through_evlrs(source_evlrs: &[las::Vlr]) -> Vec<las::Vlr> {
858    source_evlrs
859        .iter()
860        .filter(|evlr| !is_laszip_vlr(evlr))
861        .filter(|evlr| !is_copc_info_vlr(evlr))
862        .filter(|evlr| !is_copc_hierarchy_evlr(evlr))
863        .filter(|evlr| !is_wkt_crs_vlr(evlr))
864        .filter(|evlr| !is_geotiff_crs_vlr(evlr))
865        .cloned()
866        .collect()
867}
868
869fn validate_record_coordinates(record: &LasPointRecord, index: usize) -> Result<()> {
870    validate_xyz_finite(index, record.x, record.y, record.z)
871}
872
873fn validate_coordinate_inputs<S: CopcPointSource>(
874    source: &S,
875    bounds: Bounds,
876    scale: (f64, f64, f64),
877    offset: (f64, f64, f64),
878    cancel: &dyn CancelCheck,
879) -> Result<PointStats> {
880    validate_bounds(bounds)?;
881    validate_transform(scale, offset)?;
882    let extra_byte_count = usize::from(source.extra_byte_count());
883    let mut stats = PointStats::new();
884    for index in 0..source.len() {
885        if index % CANCEL_POLL_STRIDE == 0 {
886            cancel.check()?;
887        }
888        let (x, y, z) = source.xyz(index);
889        validate_xyz_finite(index, x, y, z)?;
890        quantize_xyz(index, x, y, z, scale, offset)?;
891
892        let fields = source.fields(index)?;
893        validate_xyz_finite(index, fields.x, fields.y, fields.z)?;
894        quantize_xyz(index, fields.x, fields.y, fields.z, scale, offset)?;
895        validate_scan_angle(index, fields.scan_angle)?;
896        validate_point_flags(index, &fields)?;
897        if fields.extra_bytes.len() != extra_byte_count {
898            return Err(Error::InvalidInput(format!(
899                "point {index} has {} extra byte(s), expected {extra_byte_count}",
900                fields.extra_bytes.len()
901            )));
902        }
903        stats.record(index, &fields)?;
904    }
905    Ok(stats)
906}
907
908fn validate_point_flags(index: usize, fields: &CopcPointFields) -> Result<()> {
909    validate_point_field_range(index, "return_number", fields.return_number, 0, 15)?;
910    validate_point_field_range(index, "number_of_returns", fields.number_of_returns, 0, 15)?;
911    validate_point_field_range(index, "synthetic", fields.synthetic, 0, 1)?;
912    validate_point_field_range(index, "key_point", fields.key_point, 0, 1)?;
913    validate_point_field_range(index, "withheld", fields.withheld, 0, 1)?;
914    validate_point_field_range(index, "overlap", fields.overlap, 0, 1)?;
915    validate_point_field_range(index, "scan_channel", fields.scan_channel, 0, 3)?;
916    validate_point_field_range(
917        index,
918        "scan_direction_flag",
919        fields.scan_direction_flag,
920        0,
921        1,
922    )?;
923    validate_point_field_range(
924        index,
925        "edge_of_flight_line",
926        fields.edge_of_flight_line,
927        0,
928        1,
929    )
930}
931
932fn validate_scan_angle(index: usize, value: f32) -> Result<()> {
933    if !value.is_finite() {
934        return Err(Error::InvalidInput(format!(
935            "point {index} scan_angle must be finite, got {value}"
936        )));
937    }
938    let scaled = value / LAS_14_SCAN_ANGLE_SCALE;
939    if scaled < f32::from(i16::MIN) || scaled > f32::from(i16::MAX) {
940        return Err(Error::InvalidInput(format!(
941            "point {index} scan_angle {value} encodes to {scaled}, outside LAS i16 range"
942        )));
943    }
944    Ok(())
945}
946
947fn validate_point_field_range(index: usize, name: &str, value: u8, min: u8, max: u8) -> Result<()> {
948    if (min..=max).contains(&value) {
949        Ok(())
950    } else {
951        Err(Error::InvalidInput(format!(
952            "point {index} {name} must be in {min}..={max}, got {value}"
953        )))
954    }
955}
956
957fn validate_bounds(bounds: Bounds) -> Result<()> {
958    validate_finite_value("bounds min x", bounds.min.0)?;
959    validate_finite_value("bounds min y", bounds.min.1)?;
960    validate_finite_value("bounds min z", bounds.min.2)?;
961    validate_finite_value("bounds max x", bounds.max.0)?;
962    validate_finite_value("bounds max y", bounds.max.1)?;
963    validate_finite_value("bounds max z", bounds.max.2)?;
964    for (axis, min, max) in [
965        ("x", bounds.min.0, bounds.max.0),
966        ("y", bounds.min.1, bounds.max.1),
967        ("z", bounds.min.2, bounds.max.2),
968    ] {
969        if min > max {
970            return Err(Error::InvalidInput(format!(
971                "bounds {axis} min {min} exceeds max {max}"
972            )));
973        }
974        validate_finite_value(&format!("bounds {axis} span"), max - min)?;
975    }
976    Ok(())
977}
978
979fn validate_transform(scale: (f64, f64, f64), offset: (f64, f64, f64)) -> Result<()> {
980    for (axis, value) in [("x", scale.0), ("y", scale.1), ("z", scale.2)] {
981        if !value.is_finite() || value <= 0.0 {
982            return Err(Error::InvalidInput(format!(
983                "LAS {axis} scale must be finite and positive, got {value}"
984            )));
985        }
986    }
987    validate_finite_value("LAS x offset", offset.0)?;
988    validate_finite_value("LAS y offset", offset.1)?;
989    validate_finite_value("LAS z offset", offset.2)?;
990    Ok(())
991}
992
993fn validate_xyz_finite(index: usize, x: f64, y: f64, z: f64) -> Result<()> {
994    validate_point_axis_finite(index, "x", x)?;
995    validate_point_axis_finite(index, "y", y)?;
996    validate_point_axis_finite(index, "z", z)
997}
998
999fn validate_point_axis_finite(index: usize, axis: &str, value: f64) -> Result<()> {
1000    if value.is_finite() {
1001        Ok(())
1002    } else {
1003        Err(Error::InvalidInput(format!(
1004            "point {index} {axis} coordinate must be finite, got {value}"
1005        )))
1006    }
1007}
1008
1009fn validate_finite_value(name: &str, value: f64) -> Result<()> {
1010    if value.is_finite() {
1011        Ok(())
1012    } else {
1013        Err(Error::InvalidInput(format!(
1014            "{name} must be finite, got {value}"
1015        )))
1016    }
1017}
1018
1019fn quantize_xyz(
1020    index: usize,
1021    x: f64,
1022    y: f64,
1023    z: f64,
1024    scale: (f64, f64, f64),
1025    offset: (f64, f64, f64),
1026) -> Result<(i32, i32, i32)> {
1027    Ok((
1028        quantize_axis(index, "x", x, scale.0, offset.0)?,
1029        quantize_axis(index, "y", y, scale.1, offset.1)?,
1030        quantize_axis(index, "z", z, scale.2, offset.2)?,
1031    ))
1032}
1033
1034fn quantize_axis(index: usize, axis: &str, value: f64, scale: f64, offset: f64) -> Result<i32> {
1035    let scaled = ((value - offset) / scale).round();
1036    if !scaled.is_finite() {
1037        return Err(Error::InvalidInput(format!(
1038            "point {index} {axis} coordinate cannot be encoded with scale {scale} and offset {offset}"
1039        )));
1040    }
1041    if scaled < f64::from(i32::MIN) || scaled > f64::from(i32::MAX) {
1042        return Err(Error::InvalidInput(format!(
1043            "point {index} {axis} coordinate {value} encodes to {scaled}, outside LAS i32 range"
1044        )));
1045    }
1046    Ok(scaled as i32)
1047}
1048
1049fn write_copc_from_spill(
1050    path: &Path,
1051    reader: SpillReader,
1052    params: &CopcWriterParams,
1053    cancel: &dyn CancelCheck,
1054    metadata: &OutputLasMetadata,
1055) -> Result<()> {
1056    cancel.check()?;
1057    validate_streaming_layout_supported(reader.layout())?;
1058    if reader.is_empty() {
1059        return Err(Error::InvalidInput(
1060            "cannot write empty cloud to COPC".into(),
1061        ));
1062    }
1063    let has_color = reader.layout().has_color;
1064    let bounds = reader.bounds();
1065    let source = SpillSource { reader: &reader };
1066    write_copc_inner(path, &source, has_color, bounds, params, cancel, metadata)
1067}
1068
1069fn write_copc_inner<S: CopcPointSource>(
1070    path: &Path,
1071    source: &S,
1072    has_color: bool,
1073    bounds: Bounds,
1074    params: &CopcWriterParams,
1075    cancel: &dyn CancelCheck,
1076    metadata: &OutputLasMetadata,
1077) -> Result<()> {
1078    cancel.check()?;
1079    let point_format_id = if has_color { 7u8 } else { 6u8 };
1080    let mut point_format =
1081        LasFormat::new(point_format_id).map_err(|e| Error::Las(format!("point format: {e}")))?;
1082    let extra_byte_count = source.extra_byte_count();
1083    point_format.extra_bytes = extra_byte_count;
1084    let point_record_length = point_format.len();
1085
1086    let (scale_x, scale_y, scale_z) = metadata.scale;
1087    let (offset_x, offset_y, offset_z) =
1088        metadata
1089            .offset
1090            .unwrap_or((bounds.min.0, bounds.min.1, bounds.min.2));
1091    let point_stats = validate_coordinate_inputs(
1092        source,
1093        bounds,
1094        (scale_x, scale_y, scale_z),
1095        (offset_x, offset_y, offset_z),
1096        cancel,
1097    )?;
1098    let (center, halfsize) = cube_from_bounds(&bounds);
1099
1100    let lod_index = build_lod_index(source, center, halfsize, params, cancel)?;
1101    cancel.check()?;
1102
1103    let var_vlr = LazVlrBuilder::default()
1104        .with_point_format(point_format_id, extra_byte_count)
1105        .map_err(|e| Error::Las(format!("laz items: {e}")))?
1106        .with_variable_chunk_size()
1107        .build();
1108    let mut var_vlr_bytes = Vec::new();
1109    var_vlr
1110        .write_to(&mut var_vlr_bytes)
1111        .map_err(|e| Error::Las(format!("variable chunk LAZ VLR: {e}")))?;
1112
1113    let copc_info_vlr_size = 160u16;
1114    let las_header_size = 375u32;
1115    let regular_crs_vlr_count = metadata.regular_crs_vlr_count();
1116    let regular_crs_vlr_bytes = metadata.regular_crs_vlr_bytes()?;
1117    let extra_bytes_vlrs = source.extra_bytes_vlrs();
1118    let extra_bytes_vlr_bytes = regular_las_vlrs_bytes(extra_bytes_vlrs)?;
1119    let pass_through_vlr_bytes = regular_las_vlrs_bytes(&metadata.pass_through_vlrs)?;
1120    let number_of_vlrs = u32::try_from(
1121        2usize
1122            .checked_add(regular_crs_vlr_count)
1123            .and_then(|count| count.checked_add(extra_bytes_vlrs.len()))
1124            .and_then(|count| count.checked_add(metadata.pass_through_vlrs.len()))
1125            .ok_or_else(|| Error::InvalidInput("VLR count overflow".into()))?,
1126    )
1127    .map_err(|_| Error::InvalidInput("VLR count overflow".into()))?;
1128    let number_of_evlrs = u32::try_from(
1129        1usize
1130            .checked_add(metadata.source_evlr_count_after_hierarchy())
1131            .ok_or_else(|| Error::InvalidInput("EVLR count overflow".into()))?,
1132    )
1133    .map_err(|_| Error::InvalidInput("EVLR count overflow".into()))?;
1134    let var_vlr_body_size = u16::try_from(var_vlr_bytes.len())
1135        .map_err(|_| Error::InvalidInput("LAZ VLR byte size exceeds LAS VLR limit".into()))?;
1136    let var_vlr_storage_bytes = LAS_VLR_HEADER_BYTES
1137        .checked_add(u32::from(var_vlr_body_size))
1138        .ok_or_else(|| Error::InvalidInput("LAZ VLR byte size overflow".into()))?;
1139    let total_vlr_bytes = LAS_VLR_HEADER_BYTES
1140        .checked_add(u32::from(copc_info_vlr_size))
1141        .and_then(|total| total.checked_add(var_vlr_storage_bytes))
1142        .and_then(|total| total.checked_add(regular_crs_vlr_bytes))
1143        .and_then(|total| total.checked_add(extra_bytes_vlr_bytes))
1144        .and_then(|total| total.checked_add(pass_through_vlr_bytes))
1145        .ok_or_else(|| Error::InvalidInput("VLR byte size overflow".into()))?;
1146    let offset_to_point_data = las_header_size
1147        .checked_add(total_vlr_bytes)
1148        .ok_or_else(|| Error::InvalidInput("point data offset overflow".into()))?;
1149
1150    let file = File::create(path).map_err(|e| Error::io("create COPC file", e))?;
1151    let mut writer = BufWriter::with_capacity(COPC_OUTPUT_BUFFER_BYTES, file);
1152
1153    let header = LasHeader {
1154        point_data_format: point_format_id | 0x80,
1155        point_record_length,
1156        offset_to_point_data,
1157        number_of_vlrs,
1158        file_source_id: metadata.file_source_id,
1159        global_encoding: metadata.global_encoding,
1160        guid: metadata.guid,
1161        system_identifier: metadata.system_identifier.clone(),
1162        generating_software: metadata.generating_software.clone(),
1163        creation_day_of_year: metadata.creation_day_of_year,
1164        creation_year: metadata.creation_year,
1165        scale: (scale_x, scale_y, scale_z),
1166        offset: (offset_x, offset_y, offset_z),
1167        bounds,
1168        legacy_point_count: 0,
1169        total_point_count: source.len() as u64,
1170        offset_to_first_evlr: 0,
1171        number_of_evlrs,
1172        extended_return_counts: point_stats.extended_return_counts,
1173    };
1174    header.write(&mut writer)?;
1175
1176    write_vlr_header(&mut writer, "copc", 1, copc_info_vlr_size, "COPC info")?;
1177    let copc_info_payload_start = writer
1178        .stream_position()
1179        .map_err(|e| Error::io("record COPC info payload offset", e))?;
1180    writer
1181        .write_all(&[0u8; 160])
1182        .map_err(|e| Error::io("write COPC info placeholder", e))?;
1183
1184    write_vlr_header(
1185        &mut writer,
1186        LASZIP_VLR_USER_ID,
1187        LASZIP_VLR_RECORD_ID,
1188        var_vlr_body_size,
1189        "http://laszip.org",
1190    )?;
1191    writer
1192        .write_all(&var_vlr_bytes)
1193        .map_err(|e| Error::io("write LAZ VLR", e))?;
1194
1195    for vlr in metadata.regular_crs_vlrs() {
1196        write_las_vlr(&mut writer, vlr)?;
1197    }
1198    for vlr in extra_bytes_vlrs {
1199        write_las_vlr(&mut writer, vlr)?;
1200    }
1201    for vlr in &metadata.pass_through_vlrs {
1202        write_las_vlr(&mut writer, vlr)?;
1203    }
1204
1205    let point_data_actual_start = writer
1206        .stream_position()
1207        .map_err(|e| Error::io("record point data offset", e))?;
1208    if point_data_actual_start as u32 != offset_to_point_data {
1209        return Err(Error::InvalidInput(format!(
1210            "VLR size accounting mismatch: at {point_data_actual_start}, expected {offset_to_point_data}"
1211        )));
1212    }
1213
1214    let mut compressor = LasZipCompressor::new(&mut writer, var_vlr.clone())
1215        .map_err(|e| Error::Las(format!("compressor: {e}")))?;
1216    let mut hierarchy: Vec<Entry> = Vec::with_capacity(lod_index.nodes.len());
1217    let order_path: &Path = lod_index.order_path.as_ref();
1218    let mut index_reader = BufReader::with_capacity(
1219        INDEX_IO_BUFFER_BYTES,
1220        File::open(order_path).map_err(|e| Error::io("open LOD order", e))?,
1221    );
1222    let mut point_buf = vec![0u8; point_record_length as usize];
1223    let mut chunk_start_file_offset = compressor
1224        .get_mut()
1225        .stream_position()
1226        .map_err(|e| Error::io("record chunk start", e))?;
1227    chunk_start_file_offset += 8;
1228
1229    for node in &lod_index.nodes {
1230        cancel.check()?;
1231        index_reader
1232            .seek(SeekFrom::Start(node.start))
1233            .map_err(|e| Error::io("seek LOD order", e))?;
1234        for point_index in 0..node.count {
1235            if point_index % CANCEL_POLL_STRIDE == 0 {
1236                cancel.check()?;
1237            }
1238            let source_index = index_reader
1239                .read_u32::<LittleEndian>()
1240                .map_err(|e| Error::io("read LOD order", e))?
1241                as usize;
1242            let fields = source.fields(source_index as usize)?;
1243            encode_point_record(
1244                &mut point_buf,
1245                &fields,
1246                (scale_x, scale_y, scale_z),
1247                (offset_x, offset_y, offset_z),
1248                source_index as usize,
1249                &point_format,
1250            )?;
1251            compressor
1252                .compress_one(&point_buf)
1253                .map_err(|e| Error::Las(format!("compress point: {e}")))?;
1254        }
1255        compressor
1256            .finish_current_chunk()
1257            .map_err(|e| Error::Las(format!("finish chunk: {e}")))?;
1258        let after = compressor
1259            .get_mut()
1260            .stream_position()
1261            .map_err(|e| Error::io("record chunk end", e))?;
1262        let byte_size = i32::try_from(after - chunk_start_file_offset)
1263            .map_err(|_| Error::InvalidInput("LAZ chunk exceeds COPC i32 byte size".into()))?;
1264        let point_count = i32::try_from(node.count)
1265            .map_err(|_| Error::InvalidInput("node point count exceeds COPC i32 range".into()))?;
1266        hierarchy.push(Entry {
1267            key: node.key,
1268            offset: chunk_start_file_offset,
1269            byte_size,
1270            point_count,
1271        });
1272        chunk_start_file_offset = after;
1273    }
1274
1275    cancel.check()?;
1276    compressor
1277        .done()
1278        .map_err(|e| Error::Las(format!("finish compressor: {e}")))?;
1279    drop(compressor);
1280
1281    let hierarchy_evlr_start = writer
1282        .stream_position()
1283        .map_err(|e| Error::io("record hierarchy EVLR start", e))?;
1284    let root_hier_offset = hierarchy_evlr_start
1285        .checked_add(LAS_EVLR_HEADER_BYTES)
1286        .ok_or_else(|| Error::InvalidInput("hierarchy EVLR offset overflow".into()))?;
1287    let mut hierarchy_pages = plan_hierarchy_pages(&hierarchy, VoxelKey::root())?;
1288    let hierarchy_end = assign_hierarchy_page_offsets(&mut hierarchy_pages, root_hier_offset)?;
1289    let hierarchy_body_size = hierarchy_end
1290        .checked_sub(root_hier_offset)
1291        .ok_or_else(|| Error::InvalidInput("hierarchy size overflow".into()))?;
1292    write_evlr_header(
1293        &mut writer,
1294        "copc",
1295        1000,
1296        hierarchy_body_size,
1297        "COPC hierarchy",
1298    )?;
1299    let actual_root_hier_offset = writer
1300        .stream_position()
1301        .map_err(|e| Error::io("record root hierarchy offset", e))?;
1302    if actual_root_hier_offset != root_hier_offset {
1303        return Err(Error::InvalidInput(format!(
1304            "hierarchy offset accounting mismatch: at {actual_root_hier_offset}, expected {root_hier_offset}"
1305        )));
1306    }
1307    write_hierarchy_page_tree(&mut writer, &hierarchy_pages)?;
1308    for evlr in metadata.source_evlrs_after_hierarchy() {
1309        write_las_evlr(&mut writer, evlr)?;
1310    }
1311
1312    writer
1313        .seek(SeekFrom::Start(copc_info_payload_start))
1314        .map_err(|e| Error::io("seek COPC info payload", e))?;
1315    let info = CopcInfo {
1316        center,
1317        halfsize,
1318        spacing: halfsize / 128.0,
1319        root_hier_offset,
1320        root_hier_size: hierarchy_pages.byte_size,
1321        gpstime_min: point_stats.gpstime_min,
1322        gpstime_max: point_stats.gpstime_max,
1323    };
1324    writer
1325        .write_all(&info.write_le_bytes())
1326        .map_err(|e| Error::io("patch COPC info", e))?;
1327
1328    writer
1329        .seek(SeekFrom::Start(235))
1330        .map_err(|e| Error::io("seek first EVLR offset", e))?;
1331    writer
1332        .write_u64::<LittleEndian>(hierarchy_evlr_start)
1333        .map_err(|e| Error::io("patch first EVLR offset", e))?;
1334
1335    writer
1336        .flush()
1337        .map_err(|e| Error::io("flush COPC file", e))?;
1338    Ok(())
1339}
1340
1341#[derive(Debug)]
1342struct HierarchyPagePlan {
1343    key: VoxelKey,
1344    items: Vec<HierarchyPageItem>,
1345    offset: u64,
1346    byte_size: u64,
1347}
1348
1349#[derive(Debug)]
1350enum HierarchyPageItem {
1351    Point(Entry),
1352    Child(Box<HierarchyPagePlan>),
1353}
1354
1355fn plan_hierarchy_pages(entries: &[Entry], key: VoxelKey) -> Result<HierarchyPagePlan> {
1356    if entries.is_empty() {
1357        return Err(Error::InvalidInput(
1358            "cannot write empty hierarchy page".into(),
1359        ));
1360    }
1361    if entries.len() <= HIERARCHY_PAGE_MAX_ENTRIES {
1362        return Ok(HierarchyPagePlan {
1363            key,
1364            items: entries
1365                .iter()
1366                .copied()
1367                .map(HierarchyPageItem::Point)
1368                .collect(),
1369            offset: 0,
1370            byte_size: 0,
1371        });
1372    }
1373
1374    let mut point_entry = None;
1375    let mut child_entries: [Vec<Entry>; 8] = std::array::from_fn(|_| Vec::new());
1376    for entry in entries.iter().copied() {
1377        if entry.key == key {
1378            point_entry = Some(entry);
1379            continue;
1380        }
1381        let mut matched = false;
1382        for (octant, child_entries) in child_entries.iter_mut().enumerate() {
1383            let child_key = key.child(octant as u8);
1384            if key_contains(child_key, entry.key) {
1385                child_entries.push(entry);
1386                matched = true;
1387                break;
1388            }
1389        }
1390        if !matched {
1391            return Err(Error::InvalidInput(format!(
1392                "hierarchy entry {:?} is not under page key {:?}",
1393                entry.key, key
1394            )));
1395        }
1396    }
1397
1398    let mut items = Vec::new();
1399    if let Some(entry) = point_entry {
1400        items.push(HierarchyPageItem::Point(entry));
1401    }
1402    for (octant, child_entries) in child_entries.into_iter().enumerate() {
1403        if child_entries.is_empty() {
1404            continue;
1405        }
1406        items.push(HierarchyPageItem::Child(Box::new(plan_hierarchy_pages(
1407            &child_entries,
1408            key.child(octant as u8),
1409        )?)));
1410    }
1411    if items.len() > HIERARCHY_PAGE_MAX_ENTRIES {
1412        return Err(Error::InvalidInput(format!(
1413            "hierarchy page for {:?} has {} entries, max is {}",
1414            key,
1415            items.len(),
1416            HIERARCHY_PAGE_MAX_ENTRIES
1417        )));
1418    }
1419    Ok(HierarchyPagePlan {
1420        key,
1421        items,
1422        offset: 0,
1423        byte_size: 0,
1424    })
1425}
1426
1427fn assign_hierarchy_page_offsets(page: &mut HierarchyPagePlan, offset: u64) -> Result<u64> {
1428    page.offset = offset;
1429    page.byte_size = hierarchy_page_byte_size(page.items.len())?;
1430    let mut next = offset
1431        .checked_add(page.byte_size)
1432        .ok_or_else(|| Error::InvalidInput("hierarchy page offset overflow".into()))?;
1433    for item in &mut page.items {
1434        if let HierarchyPageItem::Child(child) = item {
1435            next = assign_hierarchy_page_offsets(child, next)?;
1436        }
1437    }
1438    Ok(next)
1439}
1440
1441fn hierarchy_page_byte_size(entry_count: usize) -> Result<u64> {
1442    let bytes = entry_count
1443        .checked_mul(HIERARCHY_ENTRY_BYTES)
1444        .ok_or_else(|| Error::InvalidInput("hierarchy page size overflow".into()))?;
1445    u64::try_from(bytes).map_err(|_| Error::InvalidInput("hierarchy page is too large".into()))
1446}
1447
1448fn write_hierarchy_page_tree<W: Write + Seek>(
1449    writer: &mut W,
1450    page: &HierarchyPagePlan,
1451) -> Result<()> {
1452    let position = writer
1453        .stream_position()
1454        .map_err(|e| Error::io("record hierarchy page offset", e))?;
1455    if position != page.offset {
1456        return Err(Error::InvalidInput(format!(
1457            "hierarchy page offset mismatch: at {position}, expected {}",
1458            page.offset
1459        )));
1460    }
1461    let mut entry_buf = [0u8; HIERARCHY_ENTRY_BYTES];
1462    for item in &page.items {
1463        hierarchy_page_item_entry(item)?.write_le(&mut entry_buf)?;
1464        writer
1465            .write_all(&entry_buf)
1466            .map_err(|e| Error::io("write hierarchy entry", e))?;
1467    }
1468    for item in &page.items {
1469        if let HierarchyPageItem::Child(child) = item {
1470            write_hierarchy_page_tree(writer, child)?;
1471        }
1472    }
1473    Ok(())
1474}
1475
1476fn hierarchy_page_item_entry(item: &HierarchyPageItem) -> Result<Entry> {
1477    match item {
1478        HierarchyPageItem::Point(entry) => Ok(*entry),
1479        HierarchyPageItem::Child(child) => Ok(Entry {
1480            key: child.key,
1481            offset: child.offset,
1482            byte_size: i32::try_from(child.byte_size).map_err(|_| {
1483                Error::InvalidInput("child hierarchy page exceeds COPC i32 byte size".into())
1484            })?,
1485            point_count: -1,
1486        }),
1487    }
1488}
1489
1490fn key_contains(ancestor: VoxelKey, key: VoxelKey) -> bool {
1491    if key.level < ancestor.level {
1492        return false;
1493    }
1494    let shift = (key.level - ancestor.level) as u32;
1495    (key.x >> shift) == ancestor.x
1496        && (key.y >> shift) == ancestor.y
1497        && (key.z >> shift) == ancestor.z
1498}
1499
1500struct LodIndex {
1501    nodes: Vec<LodNodeRange>,
1502    order_path: TempPath,
1503}
1504
1505#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1506struct LodNodeRange {
1507    key: VoxelKey,
1508    start: u64,
1509    count: usize,
1510}
1511
1512struct IndexRun {
1513    path: TempPath,
1514    start: u64,
1515    count: usize,
1516}
1517
1518fn build_lod_index<S: CopcPointSource>(
1519    source: &S,
1520    center: (f64, f64, f64),
1521    halfsize: f64,
1522    params: &CopcWriterParams,
1523    cancel: &dyn CancelCheck,
1524) -> Result<LodIndex> {
1525    cancel.check()?;
1526    let total_points = u32::try_from(source.len()).map_err(|_| {
1527        Error::InvalidInput("COPC writer supports at most u32::MAX points per file".into())
1528    })?;
1529    let max_points_per_node = params.max_points_per_node.max(1) as usize;
1530    let max_depth = params.max_depth.clamp(MIN_LEAF_DEPTH, MAX_LEAF_DEPTH);
1531    let root_run = write_root_index_run(total_points, cancel)?;
1532    let mut order_file = new_index_tempfile("order")?;
1533    let mut order_offset = 0;
1534    let mut nodes = Vec::new();
1535    {
1536        let mut order_writer =
1537            BufWriter::with_capacity(INDEX_IO_BUFFER_BYTES, order_file.as_file_mut());
1538        let mut builder = LodIndexBuilder {
1539            source,
1540            max_points_per_node,
1541            max_depth,
1542            cancel,
1543            order_writer: &mut order_writer,
1544            order_offset: &mut order_offset,
1545            nodes: &mut nodes,
1546        };
1547        builder.assign(VoxelKey::root(), root_run, Bounds::cube(center, halfsize))?;
1548        order_writer
1549            .flush()
1550            .map_err(|e| Error::io("flush LOD index order", e))?;
1551    }
1552    nodes.sort_by_key(|node| node.key);
1553    Ok(LodIndex {
1554        nodes,
1555        order_path: order_file.into_temp_path(),
1556    })
1557}
1558
1559struct LodIndexBuilder<'a, S: CopcPointSource, W: Write> {
1560    source: &'a S,
1561    max_points_per_node: usize,
1562    max_depth: u32,
1563    cancel: &'a dyn CancelCheck,
1564    order_writer: &'a mut W,
1565    order_offset: &'a mut u64,
1566    nodes: &'a mut Vec<LodNodeRange>,
1567}
1568
1569impl<S: CopcPointSource, W: Write> LodIndexBuilder<'_, S, W> {
1570    fn assign(&mut self, key: VoxelKey, run: IndexRun, bounds: Bounds) -> Result<()> {
1571        self.cancel.check()?;
1572        if run.count == 0 {
1573            return Ok(());
1574        }
1575        if run.count <= self.max_points_per_node || key.level as u32 >= self.max_depth {
1576            let start = *self.order_offset;
1577            append_index_run_to_order(&run, self.order_writer, self.order_offset, self.cancel)?;
1578            self.nodes.push(LodNodeRange {
1579                key,
1580                start,
1581                count: run.count,
1582            });
1583            return Ok(());
1584        }
1585
1586        let mut children = partition_index_run(self.source, &run, bounds, self.cancel)?;
1587        let start = *self.order_offset;
1588        let selected_counts = append_lod_selection_to_order(
1589            &children,
1590            self.max_points_per_node,
1591            self.order_writer,
1592            self.order_offset,
1593            self.cancel,
1594        )?;
1595        let selected_total = selected_counts.iter().sum();
1596        self.nodes.push(LodNodeRange {
1597            key,
1598            start,
1599            count: selected_total,
1600        });
1601
1602        for (octant, child) in children.iter_mut().enumerate() {
1603            let Some(mut child_run) = child.take() else {
1604                continue;
1605            };
1606            let selected = selected_counts[octant];
1607            if selected >= child_run.count {
1608                continue;
1609            }
1610            child_run.start += selected as u64 * INDEX_RECORD_BYTES;
1611            child_run.count -= selected;
1612            self.assign(
1613                key.child(octant as u8),
1614                child_run,
1615                bounds.octant(octant as u8),
1616            )?;
1617        }
1618        Ok(())
1619    }
1620}
1621
1622fn write_root_index_run(total_points: u32, cancel: &dyn CancelCheck) -> Result<IndexRun> {
1623    let mut writer = BufWriter::with_capacity(INDEX_IO_BUFFER_BYTES, new_index_tempfile("root")?);
1624    for index in 0..total_points {
1625        if index as usize % CANCEL_POLL_STRIDE == 0 {
1626            cancel.check()?;
1627        }
1628        writer
1629            .write_u32::<LittleEndian>(index)
1630            .map_err(|e| Error::io("write root LOD index", e))?;
1631    }
1632    let file = writer
1633        .into_inner()
1634        .map_err(|e| Error::io("flush root LOD index", e.into_error()))?;
1635    Ok(IndexRun {
1636        path: file.into_temp_path(),
1637        start: 0,
1638        count: total_points as usize,
1639    })
1640}
1641
1642fn partition_index_run<S: CopcPointSource>(
1643    source: &S,
1644    run: &IndexRun,
1645    bounds: Bounds,
1646    cancel: &dyn CancelCheck,
1647) -> Result<[Option<IndexRun>; 8]> {
1648    let mut reader = open_index_run(run)?;
1649    let mut writers: [Option<BufWriter<NamedTempFile>>; 8] = std::array::from_fn(|_| None);
1650    let mut counts = [0usize; 8];
1651    let center = bounds.center();
1652    for read_index in 0..run.count {
1653        if read_index % CANCEL_POLL_STRIDE == 0 {
1654            cancel.check()?;
1655        }
1656        let index = reader
1657            .read_u32::<LittleEndian>()
1658            .map_err(|e| Error::io("read LOD partition index", e))?;
1659        let (x, y, z) = source.xyz(index as usize);
1660        let octant = child_octant(center, x, y, z);
1661        if writers[octant].is_none() {
1662            writers[octant] = Some(BufWriter::with_capacity(
1663                INDEX_IO_BUFFER_BYTES,
1664                new_index_tempfile("partition")?,
1665            ));
1666        }
1667        writers[octant]
1668            .as_mut()
1669            .expect("partition writer exists")
1670            .write_u32::<LittleEndian>(index)
1671            .map_err(|e| Error::io("write LOD partition index", e))?;
1672        counts[octant] += 1;
1673    }
1674
1675    let mut children: [Option<IndexRun>; 8] = std::array::from_fn(|_| None);
1676    for octant in 0..8 {
1677        let Some(writer) = writers[octant].take() else {
1678            continue;
1679        };
1680        let file = writer
1681            .into_inner()
1682            .map_err(|e| Error::io("flush LOD partition index", e.into_error()))?;
1683        children[octant] = Some(IndexRun {
1684            path: file.into_temp_path(),
1685            start: 0,
1686            count: counts[octant],
1687        });
1688    }
1689    Ok(children)
1690}
1691
1692fn append_lod_selection_to_order<W: Write>(
1693    children: &[Option<IndexRun>; 8],
1694    max_points_per_node: usize,
1695    order_writer: &mut W,
1696    order_offset: &mut u64,
1697    cancel: &dyn CancelCheck,
1698) -> Result<[usize; 8]> {
1699    let mut readers: [Option<BufReader<File>>; 8] = std::array::from_fn(|_| None);
1700    for octant in 0..8 {
1701        if let Some(child) = &children[octant] {
1702            readers[octant] = Some(open_index_run(child)?);
1703        }
1704    }
1705
1706    let mut selected_counts = [0usize; 8];
1707    let mut selected_total = 0usize;
1708    while selected_total < max_points_per_node {
1709        cancel.check()?;
1710        let mut progressed = false;
1711        for octant in 0..8 {
1712            let Some(child) = &children[octant] else {
1713                continue;
1714            };
1715            if selected_counts[octant] >= child.count {
1716                continue;
1717            }
1718            let index = readers[octant]
1719                .as_mut()
1720                .expect("partition reader exists")
1721                .read_u32::<LittleEndian>()
1722                .map_err(|e| Error::io("read selected LOD index", e))?;
1723            append_index_to_order(order_writer, order_offset, index)?;
1724            selected_counts[octant] += 1;
1725            selected_total += 1;
1726            progressed = true;
1727            if selected_total == max_points_per_node {
1728                break;
1729            }
1730        }
1731        if !progressed {
1732            break;
1733        }
1734    }
1735    Ok(selected_counts)
1736}
1737
1738fn append_index_run_to_order<W: Write>(
1739    run: &IndexRun,
1740    order_writer: &mut W,
1741    order_offset: &mut u64,
1742    cancel: &dyn CancelCheck,
1743) -> Result<()> {
1744    let mut reader = open_index_run(run)?;
1745    for read_index in 0..run.count {
1746        if read_index % CANCEL_POLL_STRIDE == 0 {
1747            cancel.check()?;
1748        }
1749        let index = reader
1750            .read_u32::<LittleEndian>()
1751            .map_err(|e| Error::io("read LOD index", e))?;
1752        append_index_to_order(order_writer, order_offset, index)?;
1753    }
1754    Ok(())
1755}
1756
1757fn append_index_to_order<W: Write>(
1758    order_writer: &mut W,
1759    order_offset: &mut u64,
1760    index: u32,
1761) -> Result<()> {
1762    order_writer
1763        .write_u32::<LittleEndian>(index)
1764        .map_err(|e| Error::io("write LOD index order", e))?;
1765    *order_offset = order_offset
1766        .checked_add(INDEX_RECORD_BYTES)
1767        .ok_or_else(|| Error::InvalidInput("LOD index order exceeds u64 range".into()))?;
1768    Ok(())
1769}
1770
1771fn open_index_run(run: &IndexRun) -> Result<BufReader<File>> {
1772    let path: &Path = run.path.as_ref();
1773    let mut file = File::open(path).map_err(|e| Error::io("open LOD index", e))?;
1774    file.seek(SeekFrom::Start(run.start))
1775        .map_err(|e| Error::io("seek LOD index", e))?;
1776    Ok(BufReader::with_capacity(INDEX_IO_BUFFER_BYTES, file))
1777}
1778
1779fn new_index_tempfile(label: &str) -> Result<NamedTempFile> {
1780    let prefix = format!(".copc-writer-{label}.");
1781    tempfile::Builder::new()
1782        .prefix(&prefix)
1783        .suffix(".idx")
1784        .tempfile()
1785        .map_err(|e| Error::io("create LOD index file", e))
1786}
1787
1788fn child_octant(center: (f64, f64, f64), x: f64, y: f64, z: f64) -> usize {
1789    usize::from(x >= center.0)
1790        | (usize::from(y >= center.1) << 1)
1791        | (usize::from(z >= center.2) << 2)
1792}
1793
1794fn cube_from_bounds(bounds: &Bounds) -> ((f64, f64, f64), f64) {
1795    let dx = bounds.max.0 - bounds.min.0;
1796    let dy = bounds.max.1 - bounds.min.1;
1797    let dz = bounds.max.2 - bounds.min.2;
1798    let center = (
1799        bounds.min.0 + dx * 0.5,
1800        bounds.min.1 + dy * 0.5,
1801        bounds.min.2 + dz * 0.5,
1802    );
1803    let halfsize = (dx.max(dy).max(dz) * 0.5).max(1e-6);
1804    (center, halfsize)
1805}
1806
1807struct LasHeader {
1808    point_data_format: u8,
1809    point_record_length: u16,
1810    offset_to_point_data: u32,
1811    number_of_vlrs: u32,
1812    file_source_id: u16,
1813    global_encoding: u16,
1814    guid: [u8; 16],
1815    system_identifier: String,
1816    generating_software: String,
1817    creation_day_of_year: u16,
1818    creation_year: u16,
1819    scale: (f64, f64, f64),
1820    offset: (f64, f64, f64),
1821    bounds: Bounds,
1822    legacy_point_count: u32,
1823    total_point_count: u64,
1824    offset_to_first_evlr: u64,
1825    number_of_evlrs: u32,
1826    extended_return_counts: [u64; 15],
1827}
1828
1829impl LasHeader {
1830    fn write<W: Write>(&self, writer: &mut W) -> Result<()> {
1831        writer
1832            .write_all(b"LASF")
1833            .map_err(|e| Error::io("write LAS signature", e))?;
1834        writer
1835            .write_u16::<LittleEndian>(self.file_source_id)
1836            .map_err(|e| Error::io("write file source id", e))?;
1837        writer
1838            .write_u16::<LittleEndian>(self.global_encoding)
1839            .map_err(|e| Error::io("write global encoding", e))?;
1840        writer
1841            .write_all(&self.guid)
1842            .map_err(|e| Error::io("write GUID", e))?;
1843        writer
1844            .write_u8(1)
1845            .map_err(|e| Error::io("write version major", e))?;
1846        writer
1847            .write_u8(4)
1848            .map_err(|e| Error::io("write version minor", e))?;
1849        writer
1850            .write_all(&pad(self.system_identifier.as_bytes(), 32))
1851            .map_err(|e| Error::io("write system id", e))?;
1852        writer
1853            .write_all(&pad(self.generating_software.as_bytes(), 32))
1854            .map_err(|e| Error::io("write generating software", e))?;
1855        writer
1856            .write_u16::<LittleEndian>(self.creation_day_of_year)
1857            .map_err(|e| Error::io("write creation day", e))?;
1858        writer
1859            .write_u16::<LittleEndian>(self.creation_year)
1860            .map_err(|e| Error::io("write creation year", e))?;
1861        writer
1862            .write_u16::<LittleEndian>(375)
1863            .map_err(|e| Error::io("write header size", e))?;
1864        writer
1865            .write_u32::<LittleEndian>(self.offset_to_point_data)
1866            .map_err(|e| Error::io("write point data offset", e))?;
1867        writer
1868            .write_u32::<LittleEndian>(self.number_of_vlrs)
1869            .map_err(|e| Error::io("write VLR count", e))?;
1870        writer
1871            .write_u8(self.point_data_format)
1872            .map_err(|e| Error::io("write point format", e))?;
1873        writer
1874            .write_u16::<LittleEndian>(self.point_record_length)
1875            .map_err(|e| Error::io("write point record length", e))?;
1876        writer
1877            .write_u32::<LittleEndian>(self.legacy_point_count)
1878            .map_err(|e| Error::io("write legacy point count", e))?;
1879        for _ in 0..5 {
1880            writer
1881                .write_u32::<LittleEndian>(0)
1882                .map_err(|e| Error::io("write legacy returns", e))?;
1883        }
1884        writer
1885            .write_f64::<LittleEndian>(self.scale.0)
1886            .map_err(|e| Error::io("write x scale", e))?;
1887        writer
1888            .write_f64::<LittleEndian>(self.scale.1)
1889            .map_err(|e| Error::io("write y scale", e))?;
1890        writer
1891            .write_f64::<LittleEndian>(self.scale.2)
1892            .map_err(|e| Error::io("write z scale", e))?;
1893        writer
1894            .write_f64::<LittleEndian>(self.offset.0)
1895            .map_err(|e| Error::io("write x offset", e))?;
1896        writer
1897            .write_f64::<LittleEndian>(self.offset.1)
1898            .map_err(|e| Error::io("write y offset", e))?;
1899        writer
1900            .write_f64::<LittleEndian>(self.offset.2)
1901            .map_err(|e| Error::io("write z offset", e))?;
1902        writer
1903            .write_f64::<LittleEndian>(self.bounds.max.0)
1904            .map_err(|e| Error::io("write max x", e))?;
1905        writer
1906            .write_f64::<LittleEndian>(self.bounds.min.0)
1907            .map_err(|e| Error::io("write min x", e))?;
1908        writer
1909            .write_f64::<LittleEndian>(self.bounds.max.1)
1910            .map_err(|e| Error::io("write max y", e))?;
1911        writer
1912            .write_f64::<LittleEndian>(self.bounds.min.1)
1913            .map_err(|e| Error::io("write min y", e))?;
1914        writer
1915            .write_f64::<LittleEndian>(self.bounds.max.2)
1916            .map_err(|e| Error::io("write max z", e))?;
1917        writer
1918            .write_f64::<LittleEndian>(self.bounds.min.2)
1919            .map_err(|e| Error::io("write min z", e))?;
1920        writer
1921            .write_u64::<LittleEndian>(0)
1922            .map_err(|e| Error::io("write waveform packet start", e))?;
1923        writer
1924            .write_u64::<LittleEndian>(self.offset_to_first_evlr)
1925            .map_err(|e| Error::io("write first EVLR offset", e))?;
1926        writer
1927            .write_u32::<LittleEndian>(self.number_of_evlrs)
1928            .map_err(|e| Error::io("write EVLR count", e))?;
1929        writer
1930            .write_u64::<LittleEndian>(self.total_point_count)
1931            .map_err(|e| Error::io("write total point count", e))?;
1932        for count in self.extended_return_counts {
1933            writer
1934                .write_u64::<LittleEndian>(count)
1935                .map_err(|e| Error::io("write extended returns", e))?;
1936        }
1937        Ok(())
1938    }
1939}
1940
1941fn pad(value: &[u8], len: usize) -> Vec<u8> {
1942    let mut out = Vec::with_capacity(len);
1943    let take = value.len().min(len);
1944    out.extend_from_slice(&value[..take]);
1945    out.resize(len, 0);
1946    out
1947}
1948
1949fn write_vlr_header<W: Write>(
1950    writer: &mut W,
1951    user_id: &str,
1952    record_id: u16,
1953    body_size: u16,
1954    description: &str,
1955) -> Result<()> {
1956    writer
1957        .write_u16::<LittleEndian>(0)
1958        .map_err(|e| Error::io("write VLR reserved", e))?;
1959    writer
1960        .write_all(&pad(user_id.as_bytes(), 16))
1961        .map_err(|e| Error::io("write VLR user id", e))?;
1962    writer
1963        .write_u16::<LittleEndian>(record_id)
1964        .map_err(|e| Error::io("write VLR record id", e))?;
1965    writer
1966        .write_u16::<LittleEndian>(body_size)
1967        .map_err(|e| Error::io("write VLR body size", e))?;
1968    writer
1969        .write_all(&pad(description.as_bytes(), 32))
1970        .map_err(|e| Error::io("write VLR description", e))?;
1971    Ok(())
1972}
1973
1974fn write_las_vlr<W: Write>(writer: &mut W, vlr: &las::Vlr) -> Result<()> {
1975    let body_size = u16::try_from(vlr.data.len()).map_err(|_| {
1976        Error::InvalidInput(format!(
1977            "regular VLR {}:{} is too large: {} byte(s)",
1978            vlr.user_id,
1979            vlr.record_id,
1980            vlr.data.len()
1981        ))
1982    })?;
1983    write_vlr_header(
1984        writer,
1985        &vlr.user_id,
1986        vlr.record_id,
1987        body_size,
1988        &vlr.description,
1989    )?;
1990    writer
1991        .write_all(&vlr.data)
1992        .map_err(|e| Error::io("write VLR body", e))?;
1993    Ok(())
1994}
1995
1996fn regular_las_vlrs_bytes(vlrs: &[las::Vlr]) -> Result<u32> {
1997    vlrs.iter().try_fold(0u32, |total, vlr| {
1998        let data_len = u16::try_from(vlr.data.len()).map_err(|_| {
1999            Error::InvalidInput(format!(
2000                "regular VLR {}:{} is too large: {} byte(s)",
2001                vlr.user_id,
2002                vlr.record_id,
2003                vlr.data.len()
2004            ))
2005        })?;
2006        total
2007            .checked_add(LAS_VLR_HEADER_BYTES + u32::from(data_len))
2008            .ok_or_else(|| Error::InvalidInput("VLR byte size overflow".into()))
2009    })
2010}
2011
2012fn write_evlr_header<W: Write>(
2013    writer: &mut W,
2014    user_id: &str,
2015    record_id: u16,
2016    body_size: u64,
2017    description: &str,
2018) -> Result<()> {
2019    writer
2020        .write_u16::<LittleEndian>(0)
2021        .map_err(|e| Error::io("write EVLR reserved", e))?;
2022    writer
2023        .write_all(&pad(user_id.as_bytes(), 16))
2024        .map_err(|e| Error::io("write EVLR user id", e))?;
2025    writer
2026        .write_u16::<LittleEndian>(record_id)
2027        .map_err(|e| Error::io("write EVLR record id", e))?;
2028    writer
2029        .write_u64::<LittleEndian>(body_size)
2030        .map_err(|e| Error::io("write EVLR body size", e))?;
2031    writer
2032        .write_all(&pad(description.as_bytes(), 32))
2033        .map_err(|e| Error::io("write EVLR description", e))?;
2034    Ok(())
2035}
2036
2037fn write_las_evlr<W: Write>(writer: &mut W, vlr: &las::Vlr) -> Result<()> {
2038    let body_size = u64::try_from(vlr.data.len()).map_err(|_| {
2039        Error::InvalidInput(format!(
2040            "EVLR {}:{} is too large: {} byte(s)",
2041            vlr.user_id,
2042            vlr.record_id,
2043            vlr.data.len()
2044        ))
2045    })?;
2046    write_evlr_header(
2047        writer,
2048        &vlr.user_id,
2049        vlr.record_id,
2050        body_size,
2051        &vlr.description,
2052    )?;
2053    writer
2054        .write_all(&vlr.data)
2055        .map_err(|e| Error::io("write EVLR body", e))?;
2056    Ok(())
2057}
2058
2059fn encode_point_record(
2060    buf: &mut [u8],
2061    fields: &CopcPointFields,
2062    scale: (f64, f64, f64),
2063    offset: (f64, f64, f64),
2064    point_index: usize,
2065    format: &LasFormat,
2066) -> Result<()> {
2067    let mut cursor = Cursor::new(buf);
2068    let (ix, iy, iz) = quantize_xyz(point_index, fields.x, fields.y, fields.z, scale, offset)?;
2069    let flags =
2070        fields.synthetic | (fields.key_point << 1) | (fields.withheld << 2) | (fields.overlap << 3);
2071    let point = raw::Point {
2072        x: ix,
2073        y: iy,
2074        z: iz,
2075        intensity: fields.intensity,
2076        flags: raw::point::Flags::ThreeByte(
2077            fields.return_number | (fields.number_of_returns << 4),
2078            flags
2079                | (fields.scan_channel << 4)
2080                | (fields.scan_direction_flag << 6)
2081                | (fields.edge_of_flight_line << 7),
2082            fields.classification,
2083        ),
2084        scan_angle: raw::point::ScanAngle::from(fields.scan_angle),
2085        user_data: fields.user_data,
2086        point_source_id: fields.point_source_id,
2087        gps_time: Some(fields.gps_time),
2088        color: format
2089            .has_color
2090            .then_some(Color::new(fields.red, fields.green, fields.blue)),
2091        waveform: None,
2092        nir: None,
2093        extra_bytes: fields.extra_bytes.clone(),
2094    };
2095    point
2096        .write_to(&mut cursor, format)
2097        .map_err(|e| Error::Las(format!("write point record: {e}")))?;
2098    Ok(())
2099}
2100
2101#[cfg(test)]
2102mod tests {
2103    use super::*;
2104
2105    struct VecSource {
2106        points: Vec<CopcPointFields>,
2107    }
2108
2109    impl CopcPointSource for VecSource {
2110        fn len(&self) -> usize {
2111            self.points.len()
2112        }
2113
2114        fn xyz(&self, index: usize) -> (f64, f64, f64) {
2115            let point = &self.points[index];
2116            (point.x, point.y, point.z)
2117        }
2118
2119        fn fields(&self, index: usize) -> Result<CopcPointFields> {
2120            Ok(self.points[index].clone())
2121        }
2122    }
2123
2124    #[test]
2125    fn spooled_lod_index_covers_each_point_once() {
2126        let points = (0..257)
2127            .map(|i| CopcPointFields {
2128                x: f64::from((i * 37) % 101),
2129                y: f64::from((i * 53) % 103),
2130                z: f64::from((i * 71) % 107),
2131                intensity: 0,
2132                return_number: 1,
2133                number_of_returns: 1,
2134                synthetic: 0,
2135                key_point: 0,
2136                withheld: 0,
2137                overlap: 0,
2138                scan_channel: 0,
2139                scan_direction_flag: 0,
2140                edge_of_flight_line: 0,
2141                classification: 0,
2142                user_data: 0,
2143                scan_angle: 0.0,
2144                point_source_id: 0,
2145                gps_time: f64::from(i),
2146                red: 0,
2147                green: 0,
2148                blue: 0,
2149                extra_bytes: Vec::new(),
2150            })
2151            .collect();
2152        let source = VecSource { points };
2153        let bounds = source_bounds(&source);
2154        let (center, halfsize) = cube_from_bounds(&bounds);
2155        let params = CopcWriterParams {
2156            max_points_per_node: 7,
2157            max_depth: 5,
2158        };
2159
2160        let spooled = build_lod_index(&source, center, halfsize, &params, &NeverCancel).unwrap();
2161        let ranges = read_lod_index(&spooled).unwrap();
2162
2163        let mut seen = vec![false; source.len()];
2164        let mut total = 0usize;
2165        for (key, indices) in ranges {
2166            if key.level < params.max_depth as i32 {
2167                assert!(indices.len() <= params.max_points_per_node as usize);
2168            }
2169            for index in indices {
2170                let seen = &mut seen[index as usize];
2171                assert!(!*seen, "point index {index} was assigned more than once");
2172                *seen = true;
2173                total += 1;
2174            }
2175        }
2176        assert_eq!(source.len(), total);
2177        assert!(seen.into_iter().all(|value| value));
2178    }
2179
2180    #[test]
2181    fn dense_cluster_stays_bounded_below_giant_chunks() {
2182        // A dense cluster inside large bounds: at a shallow `max_depth` the whole
2183        // cluster would collapse into one oversized leaf, forcing the layered LAZ
2184        // compressor to buffer that entire chunk in memory (the multi-GB failure
2185        // mode on real clouds). The writer must keep subdividing dense nodes past
2186        // `max_depth` so no COPC chunk exceeds `max_points_per_node`.
2187        let field = |x: f64, y: f64, z: f64, i: u32| CopcPointFields {
2188            x,
2189            y,
2190            z,
2191            intensity: 0,
2192            return_number: 1,
2193            number_of_returns: 1,
2194            synthetic: 0,
2195            key_point: 0,
2196            withheld: 0,
2197            overlap: 0,
2198            scan_channel: 0,
2199            scan_direction_flag: 0,
2200            edge_of_flight_line: 0,
2201            classification: 0,
2202            user_data: 0,
2203            scan_angle: 0.0,
2204            point_source_id: 0,
2205            gps_time: f64::from(i),
2206            red: 0,
2207            green: 0,
2208            blue: 0,
2209            extra_bytes: Vec::new(),
2210        };
2211        // 4000 distinct points packed into a ~0.4-unit cluster ...
2212        let mut points: Vec<CopcPointFields> = (0..4_000u32)
2213            .map(|i| {
2214                let f = f64::from(i);
2215                field(
2216                    f * 1e-4,
2217                    (f * 1.7).fract() * 0.4,
2218                    (f * 2.3).fract() * 0.4,
2219                    i,
2220                )
2221            })
2222            .collect();
2223        // ... plus a few points spread wide to set large bounds around it.
2224        for i in 0..8u32 {
2225            points.push(field(
2226                f64::from(i) * 1000.0,
2227                f64::from(i) * 1000.0,
2228                f64::from(i) * 100.0,
2229                100_000 + i,
2230            ));
2231        }
2232        let max_points = 100usize;
2233        let source = VecSource { points };
2234        let bounds = source_bounds(&source);
2235        let (center, halfsize) = cube_from_bounds(&bounds);
2236        // Deliberately shallow — the writer must override it for the dense cluster.
2237        let params = CopcWriterParams {
2238            max_points_per_node: max_points as u32,
2239            max_depth: 3,
2240        };
2241
2242        let lod = build_lod_index(&source, center, halfsize, &params, &NeverCancel).unwrap();
2243        for (key, indices) in read_lod_index(&lod).unwrap() {
2244            assert!(
2245                indices.len() <= max_points,
2246                "node {key:?} holds {} points, exceeding max_points_per_node {max_points}",
2247                indices.len(),
2248            );
2249        }
2250    }
2251
2252    #[test]
2253    fn hierarchy_plan_splits_large_root_page() {
2254        let mut entries = vec![Entry {
2255            key: VoxelKey::root(),
2256            offset: 1,
2257            byte_size: 1,
2258            point_count: 1,
2259        }];
2260        let mut offset = 2;
2261        for z in 0..16 {
2262            for y in 0..16 {
2263                for x in 0..16 {
2264                    entries.push(Entry {
2265                        key: VoxelKey { level: 4, x, y, z },
2266                        offset,
2267                        byte_size: 1,
2268                        point_count: 1,
2269                    });
2270                    offset += 1;
2271                }
2272            }
2273        }
2274        entries.sort_by_key(|entry| entry.key);
2275
2276        let mut plan = plan_hierarchy_pages(&entries, VoxelKey::root()).unwrap();
2277        let start = 1024;
2278        let end = assign_hierarchy_page_offsets(&mut plan, start).unwrap();
2279
2280        assert!(plan.byte_size < hierarchy_page_byte_size(entries.len()).unwrap());
2281        assert!(plan
2282            .items
2283            .iter()
2284            .any(|item| matches!(item, HierarchyPageItem::Child(_))));
2285
2286        let mut out = Cursor::new(vec![0; start as usize]);
2287        out.seek(SeekFrom::Start(start)).unwrap();
2288        write_hierarchy_page_tree(&mut out, &plan).unwrap();
2289        assert_eq!(end, out.get_ref().len() as u64);
2290    }
2291
2292    fn source_bounds(source: &VecSource) -> Bounds {
2293        source.points.iter().fold(
2294            Bounds::point(source.points[0].x, source.points[0].y, source.points[0].z),
2295            |mut bounds, point| {
2296                bounds.extend(point.x, point.y, point.z);
2297                bounds
2298            },
2299        )
2300    }
2301
2302    fn read_lod_index(index: &LodIndex) -> Result<Vec<(VoxelKey, Vec<u32>)>> {
2303        let path: &Path = index.order_path.as_ref();
2304        let mut reader =
2305            BufReader::new(File::open(path).map_err(|e| Error::io("open LOD order", e))?);
2306        let mut out = Vec::new();
2307        for node in &index.nodes {
2308            reader
2309                .seek(SeekFrom::Start(node.start))
2310                .map_err(|e| Error::io("seek LOD order", e))?;
2311            let mut indices = Vec::with_capacity(node.count);
2312            for _ in 0..node.count {
2313                indices.push(
2314                    reader
2315                        .read_u32::<LittleEndian>()
2316                        .map_err(|e| Error::io("read LOD order", e))?,
2317                );
2318            }
2319            out.push((node.key, indices));
2320        }
2321        Ok(out)
2322    }
2323}