1use std::fs::File;
2use std::io::{BufReader, BufWriter, Cursor, Seek, SeekFrom, Write};
3use std::path::Path;
4
5use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
6use copc_core::{
7 Bounds, CancelCheck, ColumnData, CopcInfo, Entry, Error, LasColumnBatch, LasDimension,
8 LasPointRecord, NeverCancel, Result, StreamingLayout, VoxelKey, HIERARCHY_ENTRY_BYTES,
9};
10use las::{point::Format as LasFormat, raw, Color, Read as _};
11use laz::{LasZipCompressor, LazVlrBuilder};
12use tempfile::{NamedTempFile, TempPath};
13
14use crate::spill::{SpillReader, SpillWriter};
15
16const CANCEL_POLL_STRIDE: usize = 4_096;
17const HIERARCHY_PAGE_MAX_ENTRIES: usize = 4_096;
18const INDEX_RECORD_BYTES: u64 = 4;
19const MIN_LEAF_DEPTH: u32 = 21;
29const MAX_LEAF_DEPTH: u32 = 30;
30const LAS_14_SCAN_ANGLE_SCALE: f32 = 0.006;
31const LAS_VLR_HEADER_BYTES: u32 = 54;
32const LAS_EVLR_HEADER_BYTES: u64 = 60;
33const LASZIP_VLR_USER_ID: &str = "laszip encoded";
34const LASZIP_VLR_RECORD_ID: u16 = 22204;
35const LASF_PROJECTION_USER_ID: &str = "LASF_Projection";
36const WKT_CRS_RECORD_ID: u16 = 2112;
37const GEOTIFF_GEO_KEY_DIRECTORY_RECORD_ID: u16 = 34735;
38const GEOTIFF_DOUBLE_PARAMS_RECORD_ID: u16 = 34736;
39const GEOTIFF_ASCII_PARAMS_RECORD_ID: u16 = 34737;
40const LASF_SPEC_USER_ID: &str = "LASF_Spec";
41const EXTRA_BYTES_RECORD_ID: u16 = 4;
42const WKT_GLOBAL_ENCODING_BIT: u16 = 16;
43const LAS_INPUT_BUFFER_BYTES: usize = 1024 * 1024;
44const COPC_OUTPUT_BUFFER_BYTES: usize = 1024 * 1024;
45const INDEX_IO_BUFFER_BYTES: usize = 1024 * 1024;
46
47#[derive(Clone, Debug, PartialEq)]
49pub struct CopcPointFields {
50 pub x: f64,
51 pub y: f64,
52 pub z: f64,
53 pub intensity: u16,
54 pub return_number: u8,
55 pub number_of_returns: u8,
56 pub synthetic: u8,
57 pub key_point: u8,
58 pub withheld: u8,
59 pub overlap: u8,
60 pub scan_channel: u8,
61 pub scan_direction_flag: u8,
62 pub edge_of_flight_line: u8,
63 pub classification: u8,
64 pub user_data: u8,
65 pub scan_angle: f32,
67 pub point_source_id: u16,
68 pub gps_time: f64,
69 pub red: u16,
70 pub green: u16,
71 pub blue: u16,
72 pub extra_bytes: Vec<u8>,
73}
74
75pub trait CopcPointSource {
77 fn len(&self) -> usize;
78 fn xyz(&self, index: usize) -> (f64, f64, f64);
79 fn fields(&self, index: usize) -> Result<CopcPointFields>;
80 fn extra_byte_count(&self) -> u16 {
81 0
82 }
83 fn extra_bytes_vlrs(&self) -> &[las::Vlr] {
84 &[]
85 }
86
87 fn is_empty(&self) -> bool {
88 self.len() == 0
89 }
90}
91
92pub struct ColumnBatchSource<'a> {
94 batch: &'a LasColumnBatch,
95 x: &'a [f64],
96 y: &'a [f64],
97 z: &'a [f64],
98 intensity: Option<&'a [u16]>,
99 return_number: Option<&'a [u8]>,
100 number_of_returns: Option<&'a [u8]>,
101 synthetic: Option<&'a [bool]>,
102 key_point: Option<&'a [bool]>,
103 withheld: Option<&'a [bool]>,
104 overlap: Option<&'a [bool]>,
105 scan_channel: Option<&'a [u8]>,
106 scan_direction_flag: Option<&'a [bool]>,
107 edge_of_flight_line: Option<&'a [bool]>,
108 classification: Option<&'a [u8]>,
109 user_data: Option<&'a [u8]>,
110 scan_angle_rank: Option<&'a [i16]>,
111 point_source_id: Option<&'a [u16]>,
112 gps_time: Option<&'a [f64]>,
113 red: Option<&'a [u16]>,
114 green: Option<&'a [u16]>,
115 blue: Option<&'a [u16]>,
116 extra_bytes: Option<(&'a [u8], usize)>,
117}
118
119impl<'a> ColumnBatchSource<'a> {
120 pub fn new(batch: &'a LasColumnBatch) -> Result<Self> {
121 batch.validate()?;
122 validate_column_batch_writer_support(batch)?;
123
124 let x = required_f64_column(batch, LasDimension::X)?;
125 let y = required_f64_column(batch, LasDimension::Y)?;
126 let z = required_f64_column(batch, LasDimension::Z)?;
127 let red = optional_u16_column(batch, LasDimension::Red)?;
128 let green = optional_u16_column(batch, LasDimension::Green)?;
129 let blue = optional_u16_column(batch, LasDimension::Blue)?;
130 let extra_bytes = optional_extra_bytes_column(batch)?;
131 validate_color_columns(red, green, blue)?;
132
133 Ok(Self {
134 batch,
135 x,
136 y,
137 z,
138 intensity: optional_u16_column(batch, LasDimension::Intensity)?,
139 return_number: optional_u8_column(batch, LasDimension::ReturnNumber)?,
140 number_of_returns: optional_u8_column(batch, LasDimension::NumberOfReturns)?,
141 synthetic: optional_bool_column(batch, LasDimension::Synthetic)?,
142 key_point: optional_bool_column(batch, LasDimension::KeyPoint)?,
143 withheld: optional_bool_column(batch, LasDimension::Withheld)?,
144 overlap: optional_bool_column(batch, LasDimension::Overlap)?,
145 scan_channel: optional_u8_column(batch, LasDimension::ScanChannel)?,
146 scan_direction_flag: optional_bool_column(batch, LasDimension::ScanDirectionFlag)?,
147 edge_of_flight_line: optional_bool_column(batch, LasDimension::EdgeOfFlightLine)?,
148 classification: optional_u8_column(batch, LasDimension::Classification)?,
149 user_data: optional_u8_column(batch, LasDimension::UserData)?,
150 scan_angle_rank: optional_i16_column(batch, LasDimension::ScanAngleRank)?,
151 point_source_id: optional_u16_column(batch, LasDimension::PointSourceId)?,
152 gps_time: optional_f64_column(batch, LasDimension::GpsTime)?,
153 red,
154 green,
155 blue,
156 extra_bytes,
157 })
158 }
159
160 pub fn batch(&self) -> &LasColumnBatch {
161 self.batch
162 }
163
164 pub fn has_color(&self) -> bool {
165 self.red.is_some() && self.green.is_some() && self.blue.is_some()
166 }
167
168 pub fn extra_byte_width(&self) -> usize {
169 self.extra_bytes.map(|(_, width)| width).unwrap_or(0)
170 }
171
172 pub fn bounds(&self) -> Result<Bounds> {
173 if self.is_empty() {
174 return Err(Error::InvalidInput(
175 "cannot compute bounds for empty column batch".into(),
176 ));
177 }
178 let mut bounds = Bounds::point(self.x[0], self.y[0], self.z[0]);
179 for index in 1..self.len() {
180 bounds.extend(self.x[index], self.y[index], self.z[index]);
181 }
182 Ok(bounds)
183 }
184}
185
186impl CopcPointSource for ColumnBatchSource<'_> {
187 fn len(&self) -> usize {
188 self.batch.len()
189 }
190
191 #[inline]
192 fn xyz(&self, index: usize) -> (f64, f64, f64) {
193 (self.x[index], self.y[index], self.z[index])
194 }
195
196 fn fields(&self, index: usize) -> Result<CopcPointFields> {
197 Ok(CopcPointFields {
198 x: self.x[index],
199 y: self.y[index],
200 z: self.z[index],
201 intensity: at_u16(self.intensity, index),
202 return_number: at_u8(self.return_number, index),
203 number_of_returns: at_u8(self.number_of_returns, index),
204 synthetic: at_bool_u8(self.synthetic, index),
205 key_point: at_bool_u8(self.key_point, index),
206 withheld: at_bool_u8(self.withheld, index),
207 overlap: at_bool_u8(self.overlap, index),
208 scan_channel: at_u8(self.scan_channel, index),
209 scan_direction_flag: at_bool_u8(self.scan_direction_flag, index),
210 edge_of_flight_line: at_bool_u8(self.edge_of_flight_line, index),
211 classification: at_u8(self.classification, index),
212 user_data: at_u8(self.user_data, index),
213 scan_angle: self
214 .scan_angle_rank
215 .map(|column| column[index] as f32 * 90.0 / 180.0)
216 .unwrap_or(0.0),
217 point_source_id: at_u16(self.point_source_id, index),
218 gps_time: self.gps_time.map(|column| column[index]).unwrap_or(0.0),
219 red: at_u16(self.red, index),
220 green: at_u16(self.green, index),
221 blue: at_u16(self.blue, index),
222 extra_bytes: extra_bytes_at(self.extra_bytes, index),
223 })
224 }
225
226 fn extra_byte_count(&self) -> u16 {
227 self.extra_byte_width() as u16
228 }
229}
230
231fn at_bool_u8(column: Option<&[bool]>, index: usize) -> u8 {
232 column.map(|values| u8::from(values[index])).unwrap_or(0)
233}
234
235fn at_u8(column: Option<&[u8]>, index: usize) -> u8 {
236 column.map(|values| values[index]).unwrap_or(0)
237}
238
239fn at_u16(column: Option<&[u16]>, index: usize) -> u16 {
240 column.map(|values| values[index]).unwrap_or(0)
241}
242
243fn validate_column_batch_writer_support(batch: &LasColumnBatch) -> Result<()> {
244 let unsupported: Vec<_> = batch
245 .columns
246 .iter()
247 .filter_map(|(spec, _)| match spec.dimension {
248 LasDimension::Nir => Some("NIR point data"),
249 LasDimension::WaveformPacketDescriptorIndex
250 | LasDimension::WaveformPacketByteOffset
251 | LasDimension::WaveformPacketSize
252 | LasDimension::WavePacketReturnPointWaveformLocation => Some("waveform point data"),
253 _ => None,
254 })
255 .collect();
256 if unsupported.is_empty() {
257 Ok(())
258 } else {
259 Err(Error::Unsupported(format!(
260 "COPC writer cannot preserve {}",
261 unsupported.join(", ")
262 )))
263 }
264}
265
266fn validate_color_columns(
267 red: Option<&[u16]>,
268 green: Option<&[u16]>,
269 blue: Option<&[u16]>,
270) -> Result<()> {
271 let present =
272 usize::from(red.is_some()) + usize::from(green.is_some()) + usize::from(blue.is_some());
273 if present == 0 || present == 3 {
274 Ok(())
275 } else {
276 Err(Error::InvalidInput(
277 "Red, Green, and Blue columns must be supplied together".into(),
278 ))
279 }
280}
281
282fn required_f64_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<&[f64]> {
283 match batch.column(dimension) {
284 Some(ColumnData::F64(values)) => Ok(values),
285 Some(other) => Err(unexpected_column_type(dimension, "F64", other)),
286 None => Err(Error::InvalidInput(format!(
287 "ColumnBatchSource requires {dimension:?} column"
288 ))),
289 }
290}
291
292fn optional_f64_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<Option<&[f64]>> {
293 match batch.column(dimension) {
294 Some(ColumnData::F64(values)) => Ok(Some(values)),
295 Some(other) => Err(unexpected_column_type(dimension, "F64", other)),
296 None => Ok(None),
297 }
298}
299
300fn optional_i16_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<Option<&[i16]>> {
301 match batch.column(dimension) {
302 Some(ColumnData::I16(values)) => Ok(Some(values)),
303 Some(other) => Err(unexpected_column_type(dimension, "I16", other)),
304 None => Ok(None),
305 }
306}
307
308fn optional_u16_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<Option<&[u16]>> {
309 match batch.column(dimension) {
310 Some(ColumnData::U16(values)) => Ok(Some(values)),
311 Some(other) => Err(unexpected_column_type(dimension, "U16", other)),
312 None => Ok(None),
313 }
314}
315
316fn optional_u8_column(batch: &LasColumnBatch, dimension: LasDimension) -> Result<Option<&[u8]>> {
317 match batch.column(dimension) {
318 Some(ColumnData::U8(values)) => Ok(Some(values)),
319 Some(other) => Err(unexpected_column_type(dimension, "U8", other)),
320 None => Ok(None),
321 }
322}
323
324fn optional_extra_bytes_column(batch: &LasColumnBatch) -> Result<Option<(&[u8], usize)>> {
325 let mut extra_bytes = None;
326 for (spec, data) in &batch.columns {
327 if spec.dimension != LasDimension::ExtraBytes {
328 continue;
329 }
330 let width = spec.extra_byte_width().ok_or_else(|| {
331 Error::InvalidInput("ExtraBytes column requires a non-zero byte width".into())
332 })?;
333 if width > usize::from(u16::MAX) {
334 return Err(Error::InvalidInput(format!(
335 "ExtraBytes column width {width} exceeds LAS u16 range"
336 )));
337 }
338 let values = match data {
339 ColumnData::U8(values) => values.as_slice(),
340 other => {
341 return Err(unexpected_column_type(
342 LasDimension::ExtraBytes,
343 "U8",
344 other,
345 ))
346 }
347 };
348 if extra_bytes.replace((values, width)).is_some() {
349 return Err(Error::InvalidInput(
350 "ColumnBatchSource supports at most one ExtraBytes column".into(),
351 ));
352 }
353 }
354 Ok(extra_bytes)
355}
356
357fn extra_bytes_at(column: Option<(&[u8], usize)>, index: usize) -> Vec<u8> {
358 match column {
359 Some((values, width)) => {
360 let start = index * width;
361 values[start..start + width].to_vec()
362 }
363 None => Vec::new(),
364 }
365}
366
367fn optional_bool_column(
368 batch: &LasColumnBatch,
369 dimension: LasDimension,
370) -> Result<Option<&[bool]>> {
371 match batch.column(dimension) {
372 Some(ColumnData::Bool(values)) => Ok(Some(values)),
373 Some(other) => Err(unexpected_column_type(dimension, "Bool", other)),
374 None => Ok(None),
375 }
376}
377
378fn unexpected_column_type(dimension: LasDimension, expected: &str, actual: &ColumnData) -> Error {
379 Error::InvalidInput(format!(
380 "{dimension:?} column must be {expected}, found {:?}",
381 actual.scalar()
382 ))
383}
384
385struct SpillSource<'a> {
386 reader: &'a SpillReader,
387}
388
389impl CopcPointSource for SpillSource<'_> {
390 fn len(&self) -> usize {
391 self.reader.len()
392 }
393
394 #[inline]
395 fn xyz(&self, index: usize) -> (f64, f64, f64) {
396 self.reader.xyz_at(index)
397 }
398
399 fn fields(&self, index: usize) -> Result<CopcPointFields> {
400 let record = self.reader.record_at(index)?;
401 Ok(CopcPointFields {
402 x: record.x,
403 y: record.y,
404 z: record.z,
405 intensity: record.intensity,
406 return_number: record.return_number,
407 number_of_returns: record.number_of_returns,
408 synthetic: u8::from(record.synthetic),
409 key_point: u8::from(record.key_point),
410 withheld: u8::from(record.withheld),
411 overlap: u8::from(record.overlap),
412 scan_channel: record.scan_channel,
413 scan_direction_flag: u8::from(record.scan_direction_flag),
414 edge_of_flight_line: u8::from(record.edge_of_flight_line),
415 classification: record.classification,
416 user_data: record.user_data,
417 scan_angle: record.scan_angle,
418 point_source_id: record.point_source_id,
419 gps_time: record.gps_time,
420 red: record.red,
421 green: record.green,
422 blue: record.blue,
423 extra_bytes: record.extra_bytes,
424 })
425 }
426
427 fn extra_byte_count(&self) -> u16 {
428 self.reader.layout().extra_bytes
429 }
430
431 fn extra_bytes_vlrs(&self) -> &[las::Vlr] {
432 &self.reader.layout().extra_bytes_descriptors
433 }
434}
435
436#[derive(Clone, Debug)]
437struct OutputCrsRecord {
438 vlr: las::Vlr,
439 is_extended: bool,
440}
441
442#[derive(Clone, Debug)]
443struct OutputLasMetadata {
444 file_source_id: u16,
445 global_encoding: u16,
446 guid: [u8; 16],
447 system_identifier: String,
448 generating_software: String,
449 creation_day_of_year: u16,
450 creation_year: u16,
451 scale: (f64, f64, f64),
452 offset: Option<(f64, f64, f64)>,
453 crs_records: Vec<OutputCrsRecord>,
454 pass_through_vlrs: Vec<las::Vlr>,
455 pass_through_evlrs: Vec<las::Vlr>,
456}
457
458impl Default for OutputLasMetadata {
459 fn default() -> Self {
460 Self {
461 file_source_id: 0,
462 global_encoding: 0,
463 guid: [0; 16],
464 system_identifier: "copc-rust".to_string(),
465 generating_software: "copc-writer".to_string(),
466 creation_day_of_year: 0,
467 creation_year: 2026,
468 scale: (0.001, 0.001, 0.001),
469 offset: None,
470 crs_records: Vec::new(),
471 pass_through_vlrs: Vec::new(),
472 pass_through_evlrs: Vec::new(),
473 }
474 }
475}
476
477impl OutputLasMetadata {
478 fn from_las_header(header: &las::Header, source_evlrs: &[las::Vlr]) -> Self {
479 let mut global_encoding = u16::from(header.gps_time_type());
480 if header.has_synthetic_return_numbers() {
481 global_encoding |= 8;
482 }
483 let crs_records = extract_source_wkt_crs_records(header, source_evlrs);
484 if !crs_records.is_empty() {
485 global_encoding |= WKT_GLOBAL_ENCODING_BIT;
486 }
487 let pass_through_vlrs = extract_pass_through_vlrs(header);
488 let pass_through_evlrs = extract_pass_through_evlrs(source_evlrs);
489 let transforms = header.transforms();
490 let (creation_day_of_year, creation_year) = header
491 .date()
492 .map(|date| {
493 let year = date.format("%Y").to_string().parse().unwrap_or(0);
494 let day = date.format("%j").to_string().parse().unwrap_or(0);
495 (day, year)
496 })
497 .unwrap_or((0, 0));
498
499 Self {
500 file_source_id: header.file_source_id(),
501 global_encoding,
502 guid: *header.guid().as_bytes(),
503 system_identifier: header.system_identifier().to_string(),
504 generating_software: header.generating_software().to_string(),
505 creation_day_of_year,
506 creation_year,
507 scale: (transforms.x.scale, transforms.y.scale, transforms.z.scale),
508 offset: Some((
509 transforms.x.offset,
510 transforms.y.offset,
511 transforms.z.offset,
512 )),
513 crs_records,
514 pass_through_vlrs,
515 pass_through_evlrs,
516 }
517 }
518
519 fn regular_crs_vlrs(&self) -> impl Iterator<Item = &las::Vlr> {
520 self.crs_records
521 .iter()
522 .filter(|record| !record.is_extended)
523 .map(|record| &record.vlr)
524 }
525
526 fn extended_crs_evlrs(&self) -> impl Iterator<Item = &las::Vlr> {
527 self.crs_records
528 .iter()
529 .filter(|record| record.is_extended)
530 .map(|record| &record.vlr)
531 }
532
533 fn regular_crs_vlr_count(&self) -> usize {
534 self.crs_records
535 .iter()
536 .filter(|record| !record.is_extended)
537 .count()
538 }
539
540 fn extended_crs_evlr_count(&self) -> usize {
541 self.crs_records
542 .iter()
543 .filter(|record| record.is_extended)
544 .count()
545 }
546
547 fn regular_crs_vlr_bytes(&self) -> Result<u32> {
548 self.regular_crs_vlrs().try_fold(0u32, |total, vlr| {
549 let data_len = u16::try_from(vlr.data.len()).map_err(|_| {
550 Error::InvalidInput(format!(
551 "regular WKT CRS VLR is too large: {} byte(s)",
552 vlr.data.len()
553 ))
554 })?;
555 total
556 .checked_add(LAS_VLR_HEADER_BYTES + u32::from(data_len))
557 .ok_or_else(|| Error::InvalidInput("CRS VLR byte size overflow".into()))
558 })
559 }
560
561 fn source_evlrs_after_hierarchy(&self) -> impl Iterator<Item = &las::Vlr> {
562 self.extended_crs_evlrs()
563 .chain(self.pass_through_evlrs.iter())
564 }
565
566 fn source_evlr_count_after_hierarchy(&self) -> usize {
567 self.extended_crs_evlr_count() + self.pass_through_evlrs.len()
568 }
569}
570
571#[derive(Clone, Copy, Debug, PartialEq)]
572struct PointStats {
573 gpstime_min: f64,
574 gpstime_max: f64,
575 extended_return_counts: [u64; 15],
576}
577
578impl PointStats {
579 fn new() -> Self {
580 Self {
581 gpstime_min: f64::INFINITY,
582 gpstime_max: f64::NEG_INFINITY,
583 extended_return_counts: [0; 15],
584 }
585 }
586
587 fn record(&mut self, index: usize, fields: &CopcPointFields) -> Result<()> {
588 validate_finite_value(&format!("point {index} GPS time"), fields.gps_time)?;
589 self.gpstime_min = self.gpstime_min.min(fields.gps_time);
590 self.gpstime_max = self.gpstime_max.max(fields.gps_time);
591 if (1..=15).contains(&fields.return_number) {
592 self.extended_return_counts[usize::from(fields.return_number - 1)] += 1;
593 }
594 Ok(())
595 }
596}
597
598#[derive(Debug, Clone, Copy)]
599pub struct CopcWriterParams {
600 pub max_points_per_node: u32,
601 pub max_depth: u32,
602}
603
604impl Default for CopcWriterParams {
605 fn default() -> Self {
606 Self {
607 max_points_per_node: 100_000,
608 max_depth: 8,
609 }
610 }
611}
612
613pub fn write_source<S: CopcPointSource>(
614 path: &Path,
615 source: &S,
616 has_color: bool,
617 bounds: Bounds,
618 params: &CopcWriterParams,
619) -> Result<()> {
620 write_source_with_cancel(path, source, has_color, bounds, params, &NeverCancel)
621}
622
623pub fn write_source_with_cancel<S: CopcPointSource>(
624 path: &Path,
625 source: &S,
626 has_color: bool,
627 bounds: Bounds,
628 params: &CopcWriterParams,
629 cancel: &dyn CancelCheck,
630) -> Result<()> {
631 cancel.check()?;
632 if source.is_empty() {
633 return Err(Error::InvalidInput(
634 "cannot write empty cloud to COPC".into(),
635 ));
636 }
637 write_copc_inner(
638 path,
639 source,
640 has_color,
641 bounds,
642 params,
643 cancel,
644 &OutputLasMetadata::default(),
645 )
646}
647
648pub fn write_streaming_with_cancel<I>(
649 path: &Path,
650 layout: StreamingLayout,
651 points: I,
652 params: &CopcWriterParams,
653 spill_dir: &Path,
654 cancel: &dyn CancelCheck,
655) -> Result<()>
656where
657 I: IntoIterator<Item = Result<LasPointRecord>>,
658{
659 cancel.check()?;
660 validate_streaming_layout_supported(&layout)?;
661 let mut spill = SpillWriter::create(spill_dir, layout)?;
662 for (index, item) in points.into_iter().enumerate() {
663 if index % CANCEL_POLL_STRIDE == 0 {
664 cancel.check()?;
665 }
666 let record = item?;
667 validate_record_coordinates(&record, index)?;
668 spill.push(&record)?;
669 }
670 cancel.check()?;
671 let reader = spill.finalize()?;
672 write_copc_from_spill(path, reader, params, cancel, &OutputLasMetadata::default())
673}
674
675pub fn convert_las_to_copc_streaming(
676 las_path: &Path,
677 copc_path: &Path,
678 params: &CopcWriterParams,
679 spill_dir: &Path,
680 cancel: &dyn CancelCheck,
681) -> Result<()> {
682 cancel.check()?;
683 let las_file = File::open(las_path).map_err(|e| Error::io("open source LAS/LAZ", e))?;
684 let mut reader = las::Reader::new(BufReader::with_capacity(LAS_INPUT_BUFFER_BYTES, las_file))
685 .map_err(|e| Error::Las(e.to_string()))?;
686 let source_evlrs = read_all_source_evlrs(las_path)?;
687 validate_las_conversion_supported(reader.header(), &source_evlrs)?;
688 let output_metadata = OutputLasMetadata::from_las_header(reader.header(), &source_evlrs);
689 let layout = StreamingLayout::from_las_header(reader.header());
690 let mut spill = SpillWriter::create(spill_dir, layout)?;
691 for (index, result) in reader.points().enumerate() {
692 if index % CANCEL_POLL_STRIDE == 0 {
693 cancel.check()?;
694 }
695 let point = result.map_err(|e| Error::Las(e.to_string()))?;
696 let record = LasPointRecord::from_las_point(&point);
697 validate_record_coordinates(&record, index)?;
698 spill.push(&record)?;
699 }
700 cancel.check()?;
701 let reader = spill.finalize()?;
702 write_copc_from_spill(copc_path, reader, params, cancel, &output_metadata)
703}
704
705fn read_all_source_evlrs(path: &Path) -> Result<Vec<las::Vlr>> {
706 let mut file = File::open(path).map_err(|e| Error::io("open source LAS/LAZ", e))?;
707 let raw_header =
708 raw::Header::read_from(&mut file).map_err(|e| Error::Las(format!("source header: {e}")))?;
709 let Some(evlr_header) = raw_header.evlr else {
710 return Ok(Vec::new());
711 };
712
713 file.seek(SeekFrom::Start(evlr_header.start_of_first_evlr))
714 .map_err(|e| Error::io("seek source EVLRs", e))?;
715 let evlr_count = usize::try_from(evlr_header.number_of_evlrs)
716 .map_err(|_| Error::InvalidInput("source EVLR count overflows usize".into()))?;
717 let mut evlrs = Vec::with_capacity(evlr_count);
718 for index in 0..evlr_header.number_of_evlrs {
719 let evlr = raw::Vlr::read_from(&mut file, true)
720 .map(las::Vlr::new)
721 .map_err(|e| Error::Las(format!("source EVLR {index}: {e}")))?;
722 evlrs.push(evlr);
723 }
724 Ok(evlrs)
725}
726
727fn validate_streaming_layout_supported(layout: &StreamingLayout) -> Result<()> {
728 let mut unsupported = Vec::new();
729 if layout.has_nir {
730 unsupported.push("NIR point data".to_string());
731 }
732 if layout.has_waveform {
733 unsupported.push("waveform point data".to_string());
734 }
735 if unsupported.is_empty() {
736 Ok(())
737 } else {
738 Err(Error::Unsupported(format!(
739 "COPC writer cannot preserve {}",
740 unsupported.join(", ")
741 )))
742 }
743}
744
745fn validate_las_conversion_supported(
746 header: &las::Header,
747 source_evlrs: &[las::Vlr],
748) -> Result<()> {
749 let mut unsupported = Vec::new();
750 let format = header.point_format();
751 if format.has_nir {
752 unsupported.push("NIR point data".to_string());
753 }
754 if format.has_waveform {
755 unsupported.push("waveform point data".to_string());
756 }
757 let source_has_wkt_crs_record = has_wkt_crs_record(header, source_evlrs);
758 let mut geotiff_crs_record_count = 0usize;
759 for vlr in header.vlrs() {
760 if is_geotiff_crs_vlr(vlr) && !source_has_wkt_crs_record {
761 geotiff_crs_record_count += 1;
762 }
763 }
764 for evlr in source_evlrs {
765 if is_geotiff_crs_vlr(evlr) && !source_has_wkt_crs_record {
766 geotiff_crs_record_count += 1;
767 }
768 }
769 if geotiff_crs_record_count > 0 {
770 unsupported.push(format!(
771 "{geotiff_crs_record_count} GeoTIFF CRS VLR/EVLR(s); GeoTIFF-to-WKT CRS conversion is not implemented in copc-writer"
772 ));
773 }
774
775 if unsupported.is_empty() {
776 Ok(())
777 } else {
778 Err(Error::Unsupported(format!(
779 "LAS-to-COPC streaming conversion cannot preserve {}",
780 unsupported.join(", ")
781 )))
782 }
783}
784
785fn is_laszip_vlr(vlr: &las::Vlr) -> bool {
786 vlr.user_id == LASZIP_VLR_USER_ID && vlr.record_id == LASZIP_VLR_RECORD_ID
787}
788
789fn is_copc_info_vlr(vlr: &las::Vlr) -> bool {
790 vlr.user_id == "copc" && vlr.record_id == 1
791}
792
793fn is_copc_hierarchy_evlr(vlr: &las::Vlr) -> bool {
794 vlr.user_id == "copc" && vlr.record_id == 1000
795}
796
797fn is_wkt_crs_vlr(vlr: &las::Vlr) -> bool {
798 vlr.user_id == LASF_PROJECTION_USER_ID && vlr.record_id == WKT_CRS_RECORD_ID
799}
800
801fn is_geotiff_crs_vlr(vlr: &las::Vlr) -> bool {
802 vlr.user_id == LASF_PROJECTION_USER_ID
803 && matches!(
804 vlr.record_id,
805 GEOTIFF_GEO_KEY_DIRECTORY_RECORD_ID
806 | GEOTIFF_DOUBLE_PARAMS_RECORD_ID
807 | GEOTIFF_ASCII_PARAMS_RECORD_ID
808 )
809}
810
811fn is_extra_bytes_descriptor_vlr(vlr: &las::Vlr) -> bool {
812 vlr.user_id == LASF_SPEC_USER_ID && vlr.record_id == EXTRA_BYTES_RECORD_ID
813}
814
815fn has_wkt_crs_record(header: &las::Header, source_evlrs: &[las::Vlr]) -> bool {
816 header.vlrs().iter().any(is_wkt_crs_vlr) || source_evlrs.iter().any(is_wkt_crs_vlr)
817}
818
819fn extract_source_wkt_crs_records(
820 header: &las::Header,
821 source_evlrs: &[las::Vlr],
822) -> Vec<OutputCrsRecord> {
823 let mut records = Vec::new();
824 for vlr in header.vlrs() {
825 if is_wkt_crs_vlr(vlr) {
826 records.push(OutputCrsRecord {
827 vlr: vlr.clone(),
828 is_extended: false,
829 });
830 }
831 }
832 for evlr in source_evlrs {
833 if is_wkt_crs_vlr(evlr) {
834 records.push(OutputCrsRecord {
835 vlr: evlr.clone(),
836 is_extended: true,
837 });
838 }
839 }
840 records
841}
842
843fn extract_pass_through_vlrs(header: &las::Header) -> Vec<las::Vlr> {
844 header
845 .vlrs()
846 .iter()
847 .filter(|vlr| !is_laszip_vlr(vlr))
848 .filter(|vlr| !is_copc_info_vlr(vlr))
849 .filter(|vlr| !is_copc_hierarchy_evlr(vlr))
850 .filter(|vlr| !is_wkt_crs_vlr(vlr))
851 .filter(|vlr| !is_geotiff_crs_vlr(vlr))
852 .filter(|vlr| !is_extra_bytes_descriptor_vlr(vlr))
853 .cloned()
854 .collect()
855}
856
857fn extract_pass_through_evlrs(source_evlrs: &[las::Vlr]) -> Vec<las::Vlr> {
858 source_evlrs
859 .iter()
860 .filter(|evlr| !is_laszip_vlr(evlr))
861 .filter(|evlr| !is_copc_info_vlr(evlr))
862 .filter(|evlr| !is_copc_hierarchy_evlr(evlr))
863 .filter(|evlr| !is_wkt_crs_vlr(evlr))
864 .filter(|evlr| !is_geotiff_crs_vlr(evlr))
865 .cloned()
866 .collect()
867}
868
869fn validate_record_coordinates(record: &LasPointRecord, index: usize) -> Result<()> {
870 validate_xyz_finite(index, record.x, record.y, record.z)
871}
872
873fn validate_coordinate_inputs<S: CopcPointSource>(
874 source: &S,
875 bounds: Bounds,
876 scale: (f64, f64, f64),
877 offset: (f64, f64, f64),
878 cancel: &dyn CancelCheck,
879) -> Result<PointStats> {
880 validate_bounds(bounds)?;
881 validate_transform(scale, offset)?;
882 let extra_byte_count = usize::from(source.extra_byte_count());
883 let mut stats = PointStats::new();
884 for index in 0..source.len() {
885 if index % CANCEL_POLL_STRIDE == 0 {
886 cancel.check()?;
887 }
888 let (x, y, z) = source.xyz(index);
889 validate_xyz_finite(index, x, y, z)?;
890 quantize_xyz(index, x, y, z, scale, offset)?;
891
892 let fields = source.fields(index)?;
893 validate_xyz_finite(index, fields.x, fields.y, fields.z)?;
894 quantize_xyz(index, fields.x, fields.y, fields.z, scale, offset)?;
895 validate_scan_angle(index, fields.scan_angle)?;
896 validate_point_flags(index, &fields)?;
897 if fields.extra_bytes.len() != extra_byte_count {
898 return Err(Error::InvalidInput(format!(
899 "point {index} has {} extra byte(s), expected {extra_byte_count}",
900 fields.extra_bytes.len()
901 )));
902 }
903 stats.record(index, &fields)?;
904 }
905 Ok(stats)
906}
907
908fn validate_point_flags(index: usize, fields: &CopcPointFields) -> Result<()> {
909 validate_point_field_range(index, "return_number", fields.return_number, 0, 15)?;
910 validate_point_field_range(index, "number_of_returns", fields.number_of_returns, 0, 15)?;
911 validate_point_field_range(index, "synthetic", fields.synthetic, 0, 1)?;
912 validate_point_field_range(index, "key_point", fields.key_point, 0, 1)?;
913 validate_point_field_range(index, "withheld", fields.withheld, 0, 1)?;
914 validate_point_field_range(index, "overlap", fields.overlap, 0, 1)?;
915 validate_point_field_range(index, "scan_channel", fields.scan_channel, 0, 3)?;
916 validate_point_field_range(
917 index,
918 "scan_direction_flag",
919 fields.scan_direction_flag,
920 0,
921 1,
922 )?;
923 validate_point_field_range(
924 index,
925 "edge_of_flight_line",
926 fields.edge_of_flight_line,
927 0,
928 1,
929 )
930}
931
932fn validate_scan_angle(index: usize, value: f32) -> Result<()> {
933 if !value.is_finite() {
934 return Err(Error::InvalidInput(format!(
935 "point {index} scan_angle must be finite, got {value}"
936 )));
937 }
938 let scaled = value / LAS_14_SCAN_ANGLE_SCALE;
939 if scaled < f32::from(i16::MIN) || scaled > f32::from(i16::MAX) {
940 return Err(Error::InvalidInput(format!(
941 "point {index} scan_angle {value} encodes to {scaled}, outside LAS i16 range"
942 )));
943 }
944 Ok(())
945}
946
947fn validate_point_field_range(index: usize, name: &str, value: u8, min: u8, max: u8) -> Result<()> {
948 if (min..=max).contains(&value) {
949 Ok(())
950 } else {
951 Err(Error::InvalidInput(format!(
952 "point {index} {name} must be in {min}..={max}, got {value}"
953 )))
954 }
955}
956
957fn validate_bounds(bounds: Bounds) -> Result<()> {
958 validate_finite_value("bounds min x", bounds.min.0)?;
959 validate_finite_value("bounds min y", bounds.min.1)?;
960 validate_finite_value("bounds min z", bounds.min.2)?;
961 validate_finite_value("bounds max x", bounds.max.0)?;
962 validate_finite_value("bounds max y", bounds.max.1)?;
963 validate_finite_value("bounds max z", bounds.max.2)?;
964 for (axis, min, max) in [
965 ("x", bounds.min.0, bounds.max.0),
966 ("y", bounds.min.1, bounds.max.1),
967 ("z", bounds.min.2, bounds.max.2),
968 ] {
969 if min > max {
970 return Err(Error::InvalidInput(format!(
971 "bounds {axis} min {min} exceeds max {max}"
972 )));
973 }
974 validate_finite_value(&format!("bounds {axis} span"), max - min)?;
975 }
976 Ok(())
977}
978
979fn validate_transform(scale: (f64, f64, f64), offset: (f64, f64, f64)) -> Result<()> {
980 for (axis, value) in [("x", scale.0), ("y", scale.1), ("z", scale.2)] {
981 if !value.is_finite() || value <= 0.0 {
982 return Err(Error::InvalidInput(format!(
983 "LAS {axis} scale must be finite and positive, got {value}"
984 )));
985 }
986 }
987 validate_finite_value("LAS x offset", offset.0)?;
988 validate_finite_value("LAS y offset", offset.1)?;
989 validate_finite_value("LAS z offset", offset.2)?;
990 Ok(())
991}
992
993fn validate_xyz_finite(index: usize, x: f64, y: f64, z: f64) -> Result<()> {
994 validate_point_axis_finite(index, "x", x)?;
995 validate_point_axis_finite(index, "y", y)?;
996 validate_point_axis_finite(index, "z", z)
997}
998
999fn validate_point_axis_finite(index: usize, axis: &str, value: f64) -> Result<()> {
1000 if value.is_finite() {
1001 Ok(())
1002 } else {
1003 Err(Error::InvalidInput(format!(
1004 "point {index} {axis} coordinate must be finite, got {value}"
1005 )))
1006 }
1007}
1008
1009fn validate_finite_value(name: &str, value: f64) -> Result<()> {
1010 if value.is_finite() {
1011 Ok(())
1012 } else {
1013 Err(Error::InvalidInput(format!(
1014 "{name} must be finite, got {value}"
1015 )))
1016 }
1017}
1018
1019fn quantize_xyz(
1020 index: usize,
1021 x: f64,
1022 y: f64,
1023 z: f64,
1024 scale: (f64, f64, f64),
1025 offset: (f64, f64, f64),
1026) -> Result<(i32, i32, i32)> {
1027 Ok((
1028 quantize_axis(index, "x", x, scale.0, offset.0)?,
1029 quantize_axis(index, "y", y, scale.1, offset.1)?,
1030 quantize_axis(index, "z", z, scale.2, offset.2)?,
1031 ))
1032}
1033
1034fn quantize_axis(index: usize, axis: &str, value: f64, scale: f64, offset: f64) -> Result<i32> {
1035 let scaled = ((value - offset) / scale).round();
1036 if !scaled.is_finite() {
1037 return Err(Error::InvalidInput(format!(
1038 "point {index} {axis} coordinate cannot be encoded with scale {scale} and offset {offset}"
1039 )));
1040 }
1041 if scaled < f64::from(i32::MIN) || scaled > f64::from(i32::MAX) {
1042 return Err(Error::InvalidInput(format!(
1043 "point {index} {axis} coordinate {value} encodes to {scaled}, outside LAS i32 range"
1044 )));
1045 }
1046 Ok(scaled as i32)
1047}
1048
1049fn write_copc_from_spill(
1050 path: &Path,
1051 reader: SpillReader,
1052 params: &CopcWriterParams,
1053 cancel: &dyn CancelCheck,
1054 metadata: &OutputLasMetadata,
1055) -> Result<()> {
1056 cancel.check()?;
1057 validate_streaming_layout_supported(reader.layout())?;
1058 if reader.is_empty() {
1059 return Err(Error::InvalidInput(
1060 "cannot write empty cloud to COPC".into(),
1061 ));
1062 }
1063 let has_color = reader.layout().has_color;
1064 let bounds = reader.bounds();
1065 let source = SpillSource { reader: &reader };
1066 write_copc_inner(path, &source, has_color, bounds, params, cancel, metadata)
1067}
1068
1069fn write_copc_inner<S: CopcPointSource>(
1070 path: &Path,
1071 source: &S,
1072 has_color: bool,
1073 bounds: Bounds,
1074 params: &CopcWriterParams,
1075 cancel: &dyn CancelCheck,
1076 metadata: &OutputLasMetadata,
1077) -> Result<()> {
1078 cancel.check()?;
1079 let point_format_id = if has_color { 7u8 } else { 6u8 };
1080 let mut point_format =
1081 LasFormat::new(point_format_id).map_err(|e| Error::Las(format!("point format: {e}")))?;
1082 let extra_byte_count = source.extra_byte_count();
1083 point_format.extra_bytes = extra_byte_count;
1084 let point_record_length = point_format.len();
1085
1086 let (scale_x, scale_y, scale_z) = metadata.scale;
1087 let (offset_x, offset_y, offset_z) =
1088 metadata
1089 .offset
1090 .unwrap_or((bounds.min.0, bounds.min.1, bounds.min.2));
1091 let point_stats = validate_coordinate_inputs(
1092 source,
1093 bounds,
1094 (scale_x, scale_y, scale_z),
1095 (offset_x, offset_y, offset_z),
1096 cancel,
1097 )?;
1098 let (center, halfsize) = cube_from_bounds(&bounds);
1099
1100 let lod_index = build_lod_index(source, center, halfsize, params, cancel)?;
1101 cancel.check()?;
1102
1103 let var_vlr = LazVlrBuilder::default()
1104 .with_point_format(point_format_id, extra_byte_count)
1105 .map_err(|e| Error::Las(format!("laz items: {e}")))?
1106 .with_variable_chunk_size()
1107 .build();
1108 let mut var_vlr_bytes = Vec::new();
1109 var_vlr
1110 .write_to(&mut var_vlr_bytes)
1111 .map_err(|e| Error::Las(format!("variable chunk LAZ VLR: {e}")))?;
1112
1113 let copc_info_vlr_size = 160u16;
1114 let las_header_size = 375u32;
1115 let regular_crs_vlr_count = metadata.regular_crs_vlr_count();
1116 let regular_crs_vlr_bytes = metadata.regular_crs_vlr_bytes()?;
1117 let extra_bytes_vlrs = source.extra_bytes_vlrs();
1118 let extra_bytes_vlr_bytes = regular_las_vlrs_bytes(extra_bytes_vlrs)?;
1119 let pass_through_vlr_bytes = regular_las_vlrs_bytes(&metadata.pass_through_vlrs)?;
1120 let number_of_vlrs = u32::try_from(
1121 2usize
1122 .checked_add(regular_crs_vlr_count)
1123 .and_then(|count| count.checked_add(extra_bytes_vlrs.len()))
1124 .and_then(|count| count.checked_add(metadata.pass_through_vlrs.len()))
1125 .ok_or_else(|| Error::InvalidInput("VLR count overflow".into()))?,
1126 )
1127 .map_err(|_| Error::InvalidInput("VLR count overflow".into()))?;
1128 let number_of_evlrs = u32::try_from(
1129 1usize
1130 .checked_add(metadata.source_evlr_count_after_hierarchy())
1131 .ok_or_else(|| Error::InvalidInput("EVLR count overflow".into()))?,
1132 )
1133 .map_err(|_| Error::InvalidInput("EVLR count overflow".into()))?;
1134 let var_vlr_body_size = u16::try_from(var_vlr_bytes.len())
1135 .map_err(|_| Error::InvalidInput("LAZ VLR byte size exceeds LAS VLR limit".into()))?;
1136 let var_vlr_storage_bytes = LAS_VLR_HEADER_BYTES
1137 .checked_add(u32::from(var_vlr_body_size))
1138 .ok_or_else(|| Error::InvalidInput("LAZ VLR byte size overflow".into()))?;
1139 let total_vlr_bytes = LAS_VLR_HEADER_BYTES
1140 .checked_add(u32::from(copc_info_vlr_size))
1141 .and_then(|total| total.checked_add(var_vlr_storage_bytes))
1142 .and_then(|total| total.checked_add(regular_crs_vlr_bytes))
1143 .and_then(|total| total.checked_add(extra_bytes_vlr_bytes))
1144 .and_then(|total| total.checked_add(pass_through_vlr_bytes))
1145 .ok_or_else(|| Error::InvalidInput("VLR byte size overflow".into()))?;
1146 let offset_to_point_data = las_header_size
1147 .checked_add(total_vlr_bytes)
1148 .ok_or_else(|| Error::InvalidInput("point data offset overflow".into()))?;
1149
1150 let file = File::create(path).map_err(|e| Error::io("create COPC file", e))?;
1151 let mut writer = BufWriter::with_capacity(COPC_OUTPUT_BUFFER_BYTES, file);
1152
1153 let header = LasHeader {
1154 point_data_format: point_format_id | 0x80,
1155 point_record_length,
1156 offset_to_point_data,
1157 number_of_vlrs,
1158 file_source_id: metadata.file_source_id,
1159 global_encoding: metadata.global_encoding,
1160 guid: metadata.guid,
1161 system_identifier: metadata.system_identifier.clone(),
1162 generating_software: metadata.generating_software.clone(),
1163 creation_day_of_year: metadata.creation_day_of_year,
1164 creation_year: metadata.creation_year,
1165 scale: (scale_x, scale_y, scale_z),
1166 offset: (offset_x, offset_y, offset_z),
1167 bounds,
1168 legacy_point_count: 0,
1169 total_point_count: source.len() as u64,
1170 offset_to_first_evlr: 0,
1171 number_of_evlrs,
1172 extended_return_counts: point_stats.extended_return_counts,
1173 };
1174 header.write(&mut writer)?;
1175
1176 write_vlr_header(&mut writer, "copc", 1, copc_info_vlr_size, "COPC info")?;
1177 let copc_info_payload_start = writer
1178 .stream_position()
1179 .map_err(|e| Error::io("record COPC info payload offset", e))?;
1180 writer
1181 .write_all(&[0u8; 160])
1182 .map_err(|e| Error::io("write COPC info placeholder", e))?;
1183
1184 write_vlr_header(
1185 &mut writer,
1186 LASZIP_VLR_USER_ID,
1187 LASZIP_VLR_RECORD_ID,
1188 var_vlr_body_size,
1189 "http://laszip.org",
1190 )?;
1191 writer
1192 .write_all(&var_vlr_bytes)
1193 .map_err(|e| Error::io("write LAZ VLR", e))?;
1194
1195 for vlr in metadata.regular_crs_vlrs() {
1196 write_las_vlr(&mut writer, vlr)?;
1197 }
1198 for vlr in extra_bytes_vlrs {
1199 write_las_vlr(&mut writer, vlr)?;
1200 }
1201 for vlr in &metadata.pass_through_vlrs {
1202 write_las_vlr(&mut writer, vlr)?;
1203 }
1204
1205 let point_data_actual_start = writer
1206 .stream_position()
1207 .map_err(|e| Error::io("record point data offset", e))?;
1208 if point_data_actual_start as u32 != offset_to_point_data {
1209 return Err(Error::InvalidInput(format!(
1210 "VLR size accounting mismatch: at {point_data_actual_start}, expected {offset_to_point_data}"
1211 )));
1212 }
1213
1214 let mut compressor = LasZipCompressor::new(&mut writer, var_vlr.clone())
1215 .map_err(|e| Error::Las(format!("compressor: {e}")))?;
1216 let mut hierarchy: Vec<Entry> = Vec::with_capacity(lod_index.nodes.len());
1217 let order_path: &Path = lod_index.order_path.as_ref();
1218 let mut index_reader = BufReader::with_capacity(
1219 INDEX_IO_BUFFER_BYTES,
1220 File::open(order_path).map_err(|e| Error::io("open LOD order", e))?,
1221 );
1222 let mut point_buf = vec![0u8; point_record_length as usize];
1223 let mut chunk_start_file_offset = compressor
1224 .get_mut()
1225 .stream_position()
1226 .map_err(|e| Error::io("record chunk start", e))?;
1227 chunk_start_file_offset += 8;
1228
1229 for node in &lod_index.nodes {
1230 cancel.check()?;
1231 index_reader
1232 .seek(SeekFrom::Start(node.start))
1233 .map_err(|e| Error::io("seek LOD order", e))?;
1234 for point_index in 0..node.count {
1235 if point_index % CANCEL_POLL_STRIDE == 0 {
1236 cancel.check()?;
1237 }
1238 let source_index = index_reader
1239 .read_u32::<LittleEndian>()
1240 .map_err(|e| Error::io("read LOD order", e))?
1241 as usize;
1242 let fields = source.fields(source_index as usize)?;
1243 encode_point_record(
1244 &mut point_buf,
1245 &fields,
1246 (scale_x, scale_y, scale_z),
1247 (offset_x, offset_y, offset_z),
1248 source_index as usize,
1249 &point_format,
1250 )?;
1251 compressor
1252 .compress_one(&point_buf)
1253 .map_err(|e| Error::Las(format!("compress point: {e}")))?;
1254 }
1255 compressor
1256 .finish_current_chunk()
1257 .map_err(|e| Error::Las(format!("finish chunk: {e}")))?;
1258 let after = compressor
1259 .get_mut()
1260 .stream_position()
1261 .map_err(|e| Error::io("record chunk end", e))?;
1262 let byte_size = i32::try_from(after - chunk_start_file_offset)
1263 .map_err(|_| Error::InvalidInput("LAZ chunk exceeds COPC i32 byte size".into()))?;
1264 let point_count = i32::try_from(node.count)
1265 .map_err(|_| Error::InvalidInput("node point count exceeds COPC i32 range".into()))?;
1266 hierarchy.push(Entry {
1267 key: node.key,
1268 offset: chunk_start_file_offset,
1269 byte_size,
1270 point_count,
1271 });
1272 chunk_start_file_offset = after;
1273 }
1274
1275 cancel.check()?;
1276 compressor
1277 .done()
1278 .map_err(|e| Error::Las(format!("finish compressor: {e}")))?;
1279 drop(compressor);
1280
1281 let hierarchy_evlr_start = writer
1282 .stream_position()
1283 .map_err(|e| Error::io("record hierarchy EVLR start", e))?;
1284 let root_hier_offset = hierarchy_evlr_start
1285 .checked_add(LAS_EVLR_HEADER_BYTES)
1286 .ok_or_else(|| Error::InvalidInput("hierarchy EVLR offset overflow".into()))?;
1287 let mut hierarchy_pages = plan_hierarchy_pages(&hierarchy, VoxelKey::root())?;
1288 let hierarchy_end = assign_hierarchy_page_offsets(&mut hierarchy_pages, root_hier_offset)?;
1289 let hierarchy_body_size = hierarchy_end
1290 .checked_sub(root_hier_offset)
1291 .ok_or_else(|| Error::InvalidInput("hierarchy size overflow".into()))?;
1292 write_evlr_header(
1293 &mut writer,
1294 "copc",
1295 1000,
1296 hierarchy_body_size,
1297 "COPC hierarchy",
1298 )?;
1299 let actual_root_hier_offset = writer
1300 .stream_position()
1301 .map_err(|e| Error::io("record root hierarchy offset", e))?;
1302 if actual_root_hier_offset != root_hier_offset {
1303 return Err(Error::InvalidInput(format!(
1304 "hierarchy offset accounting mismatch: at {actual_root_hier_offset}, expected {root_hier_offset}"
1305 )));
1306 }
1307 write_hierarchy_page_tree(&mut writer, &hierarchy_pages)?;
1308 for evlr in metadata.source_evlrs_after_hierarchy() {
1309 write_las_evlr(&mut writer, evlr)?;
1310 }
1311
1312 writer
1313 .seek(SeekFrom::Start(copc_info_payload_start))
1314 .map_err(|e| Error::io("seek COPC info payload", e))?;
1315 let info = CopcInfo {
1316 center,
1317 halfsize,
1318 spacing: halfsize / 128.0,
1319 root_hier_offset,
1320 root_hier_size: hierarchy_pages.byte_size,
1321 gpstime_min: point_stats.gpstime_min,
1322 gpstime_max: point_stats.gpstime_max,
1323 };
1324 writer
1325 .write_all(&info.write_le_bytes())
1326 .map_err(|e| Error::io("patch COPC info", e))?;
1327
1328 writer
1329 .seek(SeekFrom::Start(235))
1330 .map_err(|e| Error::io("seek first EVLR offset", e))?;
1331 writer
1332 .write_u64::<LittleEndian>(hierarchy_evlr_start)
1333 .map_err(|e| Error::io("patch first EVLR offset", e))?;
1334
1335 writer
1336 .flush()
1337 .map_err(|e| Error::io("flush COPC file", e))?;
1338 Ok(())
1339}
1340
1341#[derive(Debug)]
1342struct HierarchyPagePlan {
1343 key: VoxelKey,
1344 items: Vec<HierarchyPageItem>,
1345 offset: u64,
1346 byte_size: u64,
1347}
1348
1349#[derive(Debug)]
1350enum HierarchyPageItem {
1351 Point(Entry),
1352 Child(Box<HierarchyPagePlan>),
1353}
1354
1355fn plan_hierarchy_pages(entries: &[Entry], key: VoxelKey) -> Result<HierarchyPagePlan> {
1356 if entries.is_empty() {
1357 return Err(Error::InvalidInput(
1358 "cannot write empty hierarchy page".into(),
1359 ));
1360 }
1361 if entries.len() <= HIERARCHY_PAGE_MAX_ENTRIES {
1362 return Ok(HierarchyPagePlan {
1363 key,
1364 items: entries
1365 .iter()
1366 .copied()
1367 .map(HierarchyPageItem::Point)
1368 .collect(),
1369 offset: 0,
1370 byte_size: 0,
1371 });
1372 }
1373
1374 let mut point_entry = None;
1375 let mut child_entries: [Vec<Entry>; 8] = std::array::from_fn(|_| Vec::new());
1376 for entry in entries.iter().copied() {
1377 if entry.key == key {
1378 point_entry = Some(entry);
1379 continue;
1380 }
1381 let mut matched = false;
1382 for (octant, child_entries) in child_entries.iter_mut().enumerate() {
1383 let child_key = key.child(octant as u8);
1384 if key_contains(child_key, entry.key) {
1385 child_entries.push(entry);
1386 matched = true;
1387 break;
1388 }
1389 }
1390 if !matched {
1391 return Err(Error::InvalidInput(format!(
1392 "hierarchy entry {:?} is not under page key {:?}",
1393 entry.key, key
1394 )));
1395 }
1396 }
1397
1398 let mut items = Vec::new();
1399 if let Some(entry) = point_entry {
1400 items.push(HierarchyPageItem::Point(entry));
1401 }
1402 for (octant, child_entries) in child_entries.into_iter().enumerate() {
1403 if child_entries.is_empty() {
1404 continue;
1405 }
1406 items.push(HierarchyPageItem::Child(Box::new(plan_hierarchy_pages(
1407 &child_entries,
1408 key.child(octant as u8),
1409 )?)));
1410 }
1411 if items.len() > HIERARCHY_PAGE_MAX_ENTRIES {
1412 return Err(Error::InvalidInput(format!(
1413 "hierarchy page for {:?} has {} entries, max is {}",
1414 key,
1415 items.len(),
1416 HIERARCHY_PAGE_MAX_ENTRIES
1417 )));
1418 }
1419 Ok(HierarchyPagePlan {
1420 key,
1421 items,
1422 offset: 0,
1423 byte_size: 0,
1424 })
1425}
1426
1427fn assign_hierarchy_page_offsets(page: &mut HierarchyPagePlan, offset: u64) -> Result<u64> {
1428 page.offset = offset;
1429 page.byte_size = hierarchy_page_byte_size(page.items.len())?;
1430 let mut next = offset
1431 .checked_add(page.byte_size)
1432 .ok_or_else(|| Error::InvalidInput("hierarchy page offset overflow".into()))?;
1433 for item in &mut page.items {
1434 if let HierarchyPageItem::Child(child) = item {
1435 next = assign_hierarchy_page_offsets(child, next)?;
1436 }
1437 }
1438 Ok(next)
1439}
1440
1441fn hierarchy_page_byte_size(entry_count: usize) -> Result<u64> {
1442 let bytes = entry_count
1443 .checked_mul(HIERARCHY_ENTRY_BYTES)
1444 .ok_or_else(|| Error::InvalidInput("hierarchy page size overflow".into()))?;
1445 u64::try_from(bytes).map_err(|_| Error::InvalidInput("hierarchy page is too large".into()))
1446}
1447
1448fn write_hierarchy_page_tree<W: Write + Seek>(
1449 writer: &mut W,
1450 page: &HierarchyPagePlan,
1451) -> Result<()> {
1452 let position = writer
1453 .stream_position()
1454 .map_err(|e| Error::io("record hierarchy page offset", e))?;
1455 if position != page.offset {
1456 return Err(Error::InvalidInput(format!(
1457 "hierarchy page offset mismatch: at {position}, expected {}",
1458 page.offset
1459 )));
1460 }
1461 let mut entry_buf = [0u8; HIERARCHY_ENTRY_BYTES];
1462 for item in &page.items {
1463 hierarchy_page_item_entry(item)?.write_le(&mut entry_buf)?;
1464 writer
1465 .write_all(&entry_buf)
1466 .map_err(|e| Error::io("write hierarchy entry", e))?;
1467 }
1468 for item in &page.items {
1469 if let HierarchyPageItem::Child(child) = item {
1470 write_hierarchy_page_tree(writer, child)?;
1471 }
1472 }
1473 Ok(())
1474}
1475
1476fn hierarchy_page_item_entry(item: &HierarchyPageItem) -> Result<Entry> {
1477 match item {
1478 HierarchyPageItem::Point(entry) => Ok(*entry),
1479 HierarchyPageItem::Child(child) => Ok(Entry {
1480 key: child.key,
1481 offset: child.offset,
1482 byte_size: i32::try_from(child.byte_size).map_err(|_| {
1483 Error::InvalidInput("child hierarchy page exceeds COPC i32 byte size".into())
1484 })?,
1485 point_count: -1,
1486 }),
1487 }
1488}
1489
1490fn key_contains(ancestor: VoxelKey, key: VoxelKey) -> bool {
1491 if key.level < ancestor.level {
1492 return false;
1493 }
1494 let shift = (key.level - ancestor.level) as u32;
1495 (key.x >> shift) == ancestor.x
1496 && (key.y >> shift) == ancestor.y
1497 && (key.z >> shift) == ancestor.z
1498}
1499
1500struct LodIndex {
1501 nodes: Vec<LodNodeRange>,
1502 order_path: TempPath,
1503}
1504
1505#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1506struct LodNodeRange {
1507 key: VoxelKey,
1508 start: u64,
1509 count: usize,
1510}
1511
1512struct IndexRun {
1513 path: TempPath,
1514 start: u64,
1515 count: usize,
1516}
1517
1518fn build_lod_index<S: CopcPointSource>(
1519 source: &S,
1520 center: (f64, f64, f64),
1521 halfsize: f64,
1522 params: &CopcWriterParams,
1523 cancel: &dyn CancelCheck,
1524) -> Result<LodIndex> {
1525 cancel.check()?;
1526 let total_points = u32::try_from(source.len()).map_err(|_| {
1527 Error::InvalidInput("COPC writer supports at most u32::MAX points per file".into())
1528 })?;
1529 let max_points_per_node = params.max_points_per_node.max(1) as usize;
1530 let max_depth = params.max_depth.clamp(MIN_LEAF_DEPTH, MAX_LEAF_DEPTH);
1531 let root_run = write_root_index_run(total_points, cancel)?;
1532 let mut order_file = new_index_tempfile("order")?;
1533 let mut order_offset = 0;
1534 let mut nodes = Vec::new();
1535 {
1536 let mut order_writer =
1537 BufWriter::with_capacity(INDEX_IO_BUFFER_BYTES, order_file.as_file_mut());
1538 let mut builder = LodIndexBuilder {
1539 source,
1540 max_points_per_node,
1541 max_depth,
1542 cancel,
1543 order_writer: &mut order_writer,
1544 order_offset: &mut order_offset,
1545 nodes: &mut nodes,
1546 };
1547 builder.assign(VoxelKey::root(), root_run, Bounds::cube(center, halfsize))?;
1548 order_writer
1549 .flush()
1550 .map_err(|e| Error::io("flush LOD index order", e))?;
1551 }
1552 nodes.sort_by_key(|node| node.key);
1553 Ok(LodIndex {
1554 nodes,
1555 order_path: order_file.into_temp_path(),
1556 })
1557}
1558
1559struct LodIndexBuilder<'a, S: CopcPointSource, W: Write> {
1560 source: &'a S,
1561 max_points_per_node: usize,
1562 max_depth: u32,
1563 cancel: &'a dyn CancelCheck,
1564 order_writer: &'a mut W,
1565 order_offset: &'a mut u64,
1566 nodes: &'a mut Vec<LodNodeRange>,
1567}
1568
1569impl<S: CopcPointSource, W: Write> LodIndexBuilder<'_, S, W> {
1570 fn assign(&mut self, key: VoxelKey, run: IndexRun, bounds: Bounds) -> Result<()> {
1571 self.cancel.check()?;
1572 if run.count == 0 {
1573 return Ok(());
1574 }
1575 if run.count <= self.max_points_per_node || key.level as u32 >= self.max_depth {
1576 let start = *self.order_offset;
1577 append_index_run_to_order(&run, self.order_writer, self.order_offset, self.cancel)?;
1578 self.nodes.push(LodNodeRange {
1579 key,
1580 start,
1581 count: run.count,
1582 });
1583 return Ok(());
1584 }
1585
1586 let mut children = partition_index_run(self.source, &run, bounds, self.cancel)?;
1587 let start = *self.order_offset;
1588 let selected_counts = append_lod_selection_to_order(
1589 &children,
1590 self.max_points_per_node,
1591 self.order_writer,
1592 self.order_offset,
1593 self.cancel,
1594 )?;
1595 let selected_total = selected_counts.iter().sum();
1596 self.nodes.push(LodNodeRange {
1597 key,
1598 start,
1599 count: selected_total,
1600 });
1601
1602 for (octant, child) in children.iter_mut().enumerate() {
1603 let Some(mut child_run) = child.take() else {
1604 continue;
1605 };
1606 let selected = selected_counts[octant];
1607 if selected >= child_run.count {
1608 continue;
1609 }
1610 child_run.start += selected as u64 * INDEX_RECORD_BYTES;
1611 child_run.count -= selected;
1612 self.assign(
1613 key.child(octant as u8),
1614 child_run,
1615 bounds.octant(octant as u8),
1616 )?;
1617 }
1618 Ok(())
1619 }
1620}
1621
1622fn write_root_index_run(total_points: u32, cancel: &dyn CancelCheck) -> Result<IndexRun> {
1623 let mut writer = BufWriter::with_capacity(INDEX_IO_BUFFER_BYTES, new_index_tempfile("root")?);
1624 for index in 0..total_points {
1625 if index as usize % CANCEL_POLL_STRIDE == 0 {
1626 cancel.check()?;
1627 }
1628 writer
1629 .write_u32::<LittleEndian>(index)
1630 .map_err(|e| Error::io("write root LOD index", e))?;
1631 }
1632 let file = writer
1633 .into_inner()
1634 .map_err(|e| Error::io("flush root LOD index", e.into_error()))?;
1635 Ok(IndexRun {
1636 path: file.into_temp_path(),
1637 start: 0,
1638 count: total_points as usize,
1639 })
1640}
1641
1642fn partition_index_run<S: CopcPointSource>(
1643 source: &S,
1644 run: &IndexRun,
1645 bounds: Bounds,
1646 cancel: &dyn CancelCheck,
1647) -> Result<[Option<IndexRun>; 8]> {
1648 let mut reader = open_index_run(run)?;
1649 let mut writers: [Option<BufWriter<NamedTempFile>>; 8] = std::array::from_fn(|_| None);
1650 let mut counts = [0usize; 8];
1651 let center = bounds.center();
1652 for read_index in 0..run.count {
1653 if read_index % CANCEL_POLL_STRIDE == 0 {
1654 cancel.check()?;
1655 }
1656 let index = reader
1657 .read_u32::<LittleEndian>()
1658 .map_err(|e| Error::io("read LOD partition index", e))?;
1659 let (x, y, z) = source.xyz(index as usize);
1660 let octant = child_octant(center, x, y, z);
1661 if writers[octant].is_none() {
1662 writers[octant] = Some(BufWriter::with_capacity(
1663 INDEX_IO_BUFFER_BYTES,
1664 new_index_tempfile("partition")?,
1665 ));
1666 }
1667 writers[octant]
1668 .as_mut()
1669 .expect("partition writer exists")
1670 .write_u32::<LittleEndian>(index)
1671 .map_err(|e| Error::io("write LOD partition index", e))?;
1672 counts[octant] += 1;
1673 }
1674
1675 let mut children: [Option<IndexRun>; 8] = std::array::from_fn(|_| None);
1676 for octant in 0..8 {
1677 let Some(writer) = writers[octant].take() else {
1678 continue;
1679 };
1680 let file = writer
1681 .into_inner()
1682 .map_err(|e| Error::io("flush LOD partition index", e.into_error()))?;
1683 children[octant] = Some(IndexRun {
1684 path: file.into_temp_path(),
1685 start: 0,
1686 count: counts[octant],
1687 });
1688 }
1689 Ok(children)
1690}
1691
1692fn append_lod_selection_to_order<W: Write>(
1693 children: &[Option<IndexRun>; 8],
1694 max_points_per_node: usize,
1695 order_writer: &mut W,
1696 order_offset: &mut u64,
1697 cancel: &dyn CancelCheck,
1698) -> Result<[usize; 8]> {
1699 let mut readers: [Option<BufReader<File>>; 8] = std::array::from_fn(|_| None);
1700 for octant in 0..8 {
1701 if let Some(child) = &children[octant] {
1702 readers[octant] = Some(open_index_run(child)?);
1703 }
1704 }
1705
1706 let mut selected_counts = [0usize; 8];
1707 let mut selected_total = 0usize;
1708 while selected_total < max_points_per_node {
1709 cancel.check()?;
1710 let mut progressed = false;
1711 for octant in 0..8 {
1712 let Some(child) = &children[octant] else {
1713 continue;
1714 };
1715 if selected_counts[octant] >= child.count {
1716 continue;
1717 }
1718 let index = readers[octant]
1719 .as_mut()
1720 .expect("partition reader exists")
1721 .read_u32::<LittleEndian>()
1722 .map_err(|e| Error::io("read selected LOD index", e))?;
1723 append_index_to_order(order_writer, order_offset, index)?;
1724 selected_counts[octant] += 1;
1725 selected_total += 1;
1726 progressed = true;
1727 if selected_total == max_points_per_node {
1728 break;
1729 }
1730 }
1731 if !progressed {
1732 break;
1733 }
1734 }
1735 Ok(selected_counts)
1736}
1737
1738fn append_index_run_to_order<W: Write>(
1739 run: &IndexRun,
1740 order_writer: &mut W,
1741 order_offset: &mut u64,
1742 cancel: &dyn CancelCheck,
1743) -> Result<()> {
1744 let mut reader = open_index_run(run)?;
1745 for read_index in 0..run.count {
1746 if read_index % CANCEL_POLL_STRIDE == 0 {
1747 cancel.check()?;
1748 }
1749 let index = reader
1750 .read_u32::<LittleEndian>()
1751 .map_err(|e| Error::io("read LOD index", e))?;
1752 append_index_to_order(order_writer, order_offset, index)?;
1753 }
1754 Ok(())
1755}
1756
1757fn append_index_to_order<W: Write>(
1758 order_writer: &mut W,
1759 order_offset: &mut u64,
1760 index: u32,
1761) -> Result<()> {
1762 order_writer
1763 .write_u32::<LittleEndian>(index)
1764 .map_err(|e| Error::io("write LOD index order", e))?;
1765 *order_offset = order_offset
1766 .checked_add(INDEX_RECORD_BYTES)
1767 .ok_or_else(|| Error::InvalidInput("LOD index order exceeds u64 range".into()))?;
1768 Ok(())
1769}
1770
1771fn open_index_run(run: &IndexRun) -> Result<BufReader<File>> {
1772 let path: &Path = run.path.as_ref();
1773 let mut file = File::open(path).map_err(|e| Error::io("open LOD index", e))?;
1774 file.seek(SeekFrom::Start(run.start))
1775 .map_err(|e| Error::io("seek LOD index", e))?;
1776 Ok(BufReader::with_capacity(INDEX_IO_BUFFER_BYTES, file))
1777}
1778
1779fn new_index_tempfile(label: &str) -> Result<NamedTempFile> {
1780 let prefix = format!(".copc-writer-{label}.");
1781 tempfile::Builder::new()
1782 .prefix(&prefix)
1783 .suffix(".idx")
1784 .tempfile()
1785 .map_err(|e| Error::io("create LOD index file", e))
1786}
1787
1788fn child_octant(center: (f64, f64, f64), x: f64, y: f64, z: f64) -> usize {
1789 usize::from(x >= center.0)
1790 | (usize::from(y >= center.1) << 1)
1791 | (usize::from(z >= center.2) << 2)
1792}
1793
1794fn cube_from_bounds(bounds: &Bounds) -> ((f64, f64, f64), f64) {
1795 let dx = bounds.max.0 - bounds.min.0;
1796 let dy = bounds.max.1 - bounds.min.1;
1797 let dz = bounds.max.2 - bounds.min.2;
1798 let center = (
1799 bounds.min.0 + dx * 0.5,
1800 bounds.min.1 + dy * 0.5,
1801 bounds.min.2 + dz * 0.5,
1802 );
1803 let halfsize = (dx.max(dy).max(dz) * 0.5).max(1e-6);
1804 (center, halfsize)
1805}
1806
1807struct LasHeader {
1808 point_data_format: u8,
1809 point_record_length: u16,
1810 offset_to_point_data: u32,
1811 number_of_vlrs: u32,
1812 file_source_id: u16,
1813 global_encoding: u16,
1814 guid: [u8; 16],
1815 system_identifier: String,
1816 generating_software: String,
1817 creation_day_of_year: u16,
1818 creation_year: u16,
1819 scale: (f64, f64, f64),
1820 offset: (f64, f64, f64),
1821 bounds: Bounds,
1822 legacy_point_count: u32,
1823 total_point_count: u64,
1824 offset_to_first_evlr: u64,
1825 number_of_evlrs: u32,
1826 extended_return_counts: [u64; 15],
1827}
1828
1829impl LasHeader {
1830 fn write<W: Write>(&self, writer: &mut W) -> Result<()> {
1831 writer
1832 .write_all(b"LASF")
1833 .map_err(|e| Error::io("write LAS signature", e))?;
1834 writer
1835 .write_u16::<LittleEndian>(self.file_source_id)
1836 .map_err(|e| Error::io("write file source id", e))?;
1837 writer
1838 .write_u16::<LittleEndian>(self.global_encoding)
1839 .map_err(|e| Error::io("write global encoding", e))?;
1840 writer
1841 .write_all(&self.guid)
1842 .map_err(|e| Error::io("write GUID", e))?;
1843 writer
1844 .write_u8(1)
1845 .map_err(|e| Error::io("write version major", e))?;
1846 writer
1847 .write_u8(4)
1848 .map_err(|e| Error::io("write version minor", e))?;
1849 writer
1850 .write_all(&pad(self.system_identifier.as_bytes(), 32))
1851 .map_err(|e| Error::io("write system id", e))?;
1852 writer
1853 .write_all(&pad(self.generating_software.as_bytes(), 32))
1854 .map_err(|e| Error::io("write generating software", e))?;
1855 writer
1856 .write_u16::<LittleEndian>(self.creation_day_of_year)
1857 .map_err(|e| Error::io("write creation day", e))?;
1858 writer
1859 .write_u16::<LittleEndian>(self.creation_year)
1860 .map_err(|e| Error::io("write creation year", e))?;
1861 writer
1862 .write_u16::<LittleEndian>(375)
1863 .map_err(|e| Error::io("write header size", e))?;
1864 writer
1865 .write_u32::<LittleEndian>(self.offset_to_point_data)
1866 .map_err(|e| Error::io("write point data offset", e))?;
1867 writer
1868 .write_u32::<LittleEndian>(self.number_of_vlrs)
1869 .map_err(|e| Error::io("write VLR count", e))?;
1870 writer
1871 .write_u8(self.point_data_format)
1872 .map_err(|e| Error::io("write point format", e))?;
1873 writer
1874 .write_u16::<LittleEndian>(self.point_record_length)
1875 .map_err(|e| Error::io("write point record length", e))?;
1876 writer
1877 .write_u32::<LittleEndian>(self.legacy_point_count)
1878 .map_err(|e| Error::io("write legacy point count", e))?;
1879 for _ in 0..5 {
1880 writer
1881 .write_u32::<LittleEndian>(0)
1882 .map_err(|e| Error::io("write legacy returns", e))?;
1883 }
1884 writer
1885 .write_f64::<LittleEndian>(self.scale.0)
1886 .map_err(|e| Error::io("write x scale", e))?;
1887 writer
1888 .write_f64::<LittleEndian>(self.scale.1)
1889 .map_err(|e| Error::io("write y scale", e))?;
1890 writer
1891 .write_f64::<LittleEndian>(self.scale.2)
1892 .map_err(|e| Error::io("write z scale", e))?;
1893 writer
1894 .write_f64::<LittleEndian>(self.offset.0)
1895 .map_err(|e| Error::io("write x offset", e))?;
1896 writer
1897 .write_f64::<LittleEndian>(self.offset.1)
1898 .map_err(|e| Error::io("write y offset", e))?;
1899 writer
1900 .write_f64::<LittleEndian>(self.offset.2)
1901 .map_err(|e| Error::io("write z offset", e))?;
1902 writer
1903 .write_f64::<LittleEndian>(self.bounds.max.0)
1904 .map_err(|e| Error::io("write max x", e))?;
1905 writer
1906 .write_f64::<LittleEndian>(self.bounds.min.0)
1907 .map_err(|e| Error::io("write min x", e))?;
1908 writer
1909 .write_f64::<LittleEndian>(self.bounds.max.1)
1910 .map_err(|e| Error::io("write max y", e))?;
1911 writer
1912 .write_f64::<LittleEndian>(self.bounds.min.1)
1913 .map_err(|e| Error::io("write min y", e))?;
1914 writer
1915 .write_f64::<LittleEndian>(self.bounds.max.2)
1916 .map_err(|e| Error::io("write max z", e))?;
1917 writer
1918 .write_f64::<LittleEndian>(self.bounds.min.2)
1919 .map_err(|e| Error::io("write min z", e))?;
1920 writer
1921 .write_u64::<LittleEndian>(0)
1922 .map_err(|e| Error::io("write waveform packet start", e))?;
1923 writer
1924 .write_u64::<LittleEndian>(self.offset_to_first_evlr)
1925 .map_err(|e| Error::io("write first EVLR offset", e))?;
1926 writer
1927 .write_u32::<LittleEndian>(self.number_of_evlrs)
1928 .map_err(|e| Error::io("write EVLR count", e))?;
1929 writer
1930 .write_u64::<LittleEndian>(self.total_point_count)
1931 .map_err(|e| Error::io("write total point count", e))?;
1932 for count in self.extended_return_counts {
1933 writer
1934 .write_u64::<LittleEndian>(count)
1935 .map_err(|e| Error::io("write extended returns", e))?;
1936 }
1937 Ok(())
1938 }
1939}
1940
1941fn pad(value: &[u8], len: usize) -> Vec<u8> {
1942 let mut out = Vec::with_capacity(len);
1943 let take = value.len().min(len);
1944 out.extend_from_slice(&value[..take]);
1945 out.resize(len, 0);
1946 out
1947}
1948
1949fn write_vlr_header<W: Write>(
1950 writer: &mut W,
1951 user_id: &str,
1952 record_id: u16,
1953 body_size: u16,
1954 description: &str,
1955) -> Result<()> {
1956 writer
1957 .write_u16::<LittleEndian>(0)
1958 .map_err(|e| Error::io("write VLR reserved", e))?;
1959 writer
1960 .write_all(&pad(user_id.as_bytes(), 16))
1961 .map_err(|e| Error::io("write VLR user id", e))?;
1962 writer
1963 .write_u16::<LittleEndian>(record_id)
1964 .map_err(|e| Error::io("write VLR record id", e))?;
1965 writer
1966 .write_u16::<LittleEndian>(body_size)
1967 .map_err(|e| Error::io("write VLR body size", e))?;
1968 writer
1969 .write_all(&pad(description.as_bytes(), 32))
1970 .map_err(|e| Error::io("write VLR description", e))?;
1971 Ok(())
1972}
1973
1974fn write_las_vlr<W: Write>(writer: &mut W, vlr: &las::Vlr) -> Result<()> {
1975 let body_size = u16::try_from(vlr.data.len()).map_err(|_| {
1976 Error::InvalidInput(format!(
1977 "regular VLR {}:{} is too large: {} byte(s)",
1978 vlr.user_id,
1979 vlr.record_id,
1980 vlr.data.len()
1981 ))
1982 })?;
1983 write_vlr_header(
1984 writer,
1985 &vlr.user_id,
1986 vlr.record_id,
1987 body_size,
1988 &vlr.description,
1989 )?;
1990 writer
1991 .write_all(&vlr.data)
1992 .map_err(|e| Error::io("write VLR body", e))?;
1993 Ok(())
1994}
1995
1996fn regular_las_vlrs_bytes(vlrs: &[las::Vlr]) -> Result<u32> {
1997 vlrs.iter().try_fold(0u32, |total, vlr| {
1998 let data_len = u16::try_from(vlr.data.len()).map_err(|_| {
1999 Error::InvalidInput(format!(
2000 "regular VLR {}:{} is too large: {} byte(s)",
2001 vlr.user_id,
2002 vlr.record_id,
2003 vlr.data.len()
2004 ))
2005 })?;
2006 total
2007 .checked_add(LAS_VLR_HEADER_BYTES + u32::from(data_len))
2008 .ok_or_else(|| Error::InvalidInput("VLR byte size overflow".into()))
2009 })
2010}
2011
2012fn write_evlr_header<W: Write>(
2013 writer: &mut W,
2014 user_id: &str,
2015 record_id: u16,
2016 body_size: u64,
2017 description: &str,
2018) -> Result<()> {
2019 writer
2020 .write_u16::<LittleEndian>(0)
2021 .map_err(|e| Error::io("write EVLR reserved", e))?;
2022 writer
2023 .write_all(&pad(user_id.as_bytes(), 16))
2024 .map_err(|e| Error::io("write EVLR user id", e))?;
2025 writer
2026 .write_u16::<LittleEndian>(record_id)
2027 .map_err(|e| Error::io("write EVLR record id", e))?;
2028 writer
2029 .write_u64::<LittleEndian>(body_size)
2030 .map_err(|e| Error::io("write EVLR body size", e))?;
2031 writer
2032 .write_all(&pad(description.as_bytes(), 32))
2033 .map_err(|e| Error::io("write EVLR description", e))?;
2034 Ok(())
2035}
2036
2037fn write_las_evlr<W: Write>(writer: &mut W, vlr: &las::Vlr) -> Result<()> {
2038 let body_size = u64::try_from(vlr.data.len()).map_err(|_| {
2039 Error::InvalidInput(format!(
2040 "EVLR {}:{} is too large: {} byte(s)",
2041 vlr.user_id,
2042 vlr.record_id,
2043 vlr.data.len()
2044 ))
2045 })?;
2046 write_evlr_header(
2047 writer,
2048 &vlr.user_id,
2049 vlr.record_id,
2050 body_size,
2051 &vlr.description,
2052 )?;
2053 writer
2054 .write_all(&vlr.data)
2055 .map_err(|e| Error::io("write EVLR body", e))?;
2056 Ok(())
2057}
2058
2059fn encode_point_record(
2060 buf: &mut [u8],
2061 fields: &CopcPointFields,
2062 scale: (f64, f64, f64),
2063 offset: (f64, f64, f64),
2064 point_index: usize,
2065 format: &LasFormat,
2066) -> Result<()> {
2067 let mut cursor = Cursor::new(buf);
2068 let (ix, iy, iz) = quantize_xyz(point_index, fields.x, fields.y, fields.z, scale, offset)?;
2069 let flags =
2070 fields.synthetic | (fields.key_point << 1) | (fields.withheld << 2) | (fields.overlap << 3);
2071 let point = raw::Point {
2072 x: ix,
2073 y: iy,
2074 z: iz,
2075 intensity: fields.intensity,
2076 flags: raw::point::Flags::ThreeByte(
2077 fields.return_number | (fields.number_of_returns << 4),
2078 flags
2079 | (fields.scan_channel << 4)
2080 | (fields.scan_direction_flag << 6)
2081 | (fields.edge_of_flight_line << 7),
2082 fields.classification,
2083 ),
2084 scan_angle: raw::point::ScanAngle::from(fields.scan_angle),
2085 user_data: fields.user_data,
2086 point_source_id: fields.point_source_id,
2087 gps_time: Some(fields.gps_time),
2088 color: format
2089 .has_color
2090 .then_some(Color::new(fields.red, fields.green, fields.blue)),
2091 waveform: None,
2092 nir: None,
2093 extra_bytes: fields.extra_bytes.clone(),
2094 };
2095 point
2096 .write_to(&mut cursor, format)
2097 .map_err(|e| Error::Las(format!("write point record: {e}")))?;
2098 Ok(())
2099}
2100
2101#[cfg(test)]
2102mod tests {
2103 use super::*;
2104
2105 struct VecSource {
2106 points: Vec<CopcPointFields>,
2107 }
2108
2109 impl CopcPointSource for VecSource {
2110 fn len(&self) -> usize {
2111 self.points.len()
2112 }
2113
2114 fn xyz(&self, index: usize) -> (f64, f64, f64) {
2115 let point = &self.points[index];
2116 (point.x, point.y, point.z)
2117 }
2118
2119 fn fields(&self, index: usize) -> Result<CopcPointFields> {
2120 Ok(self.points[index].clone())
2121 }
2122 }
2123
2124 #[test]
2125 fn spooled_lod_index_covers_each_point_once() {
2126 let points = (0..257)
2127 .map(|i| CopcPointFields {
2128 x: f64::from((i * 37) % 101),
2129 y: f64::from((i * 53) % 103),
2130 z: f64::from((i * 71) % 107),
2131 intensity: 0,
2132 return_number: 1,
2133 number_of_returns: 1,
2134 synthetic: 0,
2135 key_point: 0,
2136 withheld: 0,
2137 overlap: 0,
2138 scan_channel: 0,
2139 scan_direction_flag: 0,
2140 edge_of_flight_line: 0,
2141 classification: 0,
2142 user_data: 0,
2143 scan_angle: 0.0,
2144 point_source_id: 0,
2145 gps_time: f64::from(i),
2146 red: 0,
2147 green: 0,
2148 blue: 0,
2149 extra_bytes: Vec::new(),
2150 })
2151 .collect();
2152 let source = VecSource { points };
2153 let bounds = source_bounds(&source);
2154 let (center, halfsize) = cube_from_bounds(&bounds);
2155 let params = CopcWriterParams {
2156 max_points_per_node: 7,
2157 max_depth: 5,
2158 };
2159
2160 let spooled = build_lod_index(&source, center, halfsize, ¶ms, &NeverCancel).unwrap();
2161 let ranges = read_lod_index(&spooled).unwrap();
2162
2163 let mut seen = vec![false; source.len()];
2164 let mut total = 0usize;
2165 for (key, indices) in ranges {
2166 if key.level < params.max_depth as i32 {
2167 assert!(indices.len() <= params.max_points_per_node as usize);
2168 }
2169 for index in indices {
2170 let seen = &mut seen[index as usize];
2171 assert!(!*seen, "point index {index} was assigned more than once");
2172 *seen = true;
2173 total += 1;
2174 }
2175 }
2176 assert_eq!(source.len(), total);
2177 assert!(seen.into_iter().all(|value| value));
2178 }
2179
2180 #[test]
2181 fn dense_cluster_stays_bounded_below_giant_chunks() {
2182 let field = |x: f64, y: f64, z: f64, i: u32| CopcPointFields {
2188 x,
2189 y,
2190 z,
2191 intensity: 0,
2192 return_number: 1,
2193 number_of_returns: 1,
2194 synthetic: 0,
2195 key_point: 0,
2196 withheld: 0,
2197 overlap: 0,
2198 scan_channel: 0,
2199 scan_direction_flag: 0,
2200 edge_of_flight_line: 0,
2201 classification: 0,
2202 user_data: 0,
2203 scan_angle: 0.0,
2204 point_source_id: 0,
2205 gps_time: f64::from(i),
2206 red: 0,
2207 green: 0,
2208 blue: 0,
2209 extra_bytes: Vec::new(),
2210 };
2211 let mut points: Vec<CopcPointFields> = (0..4_000u32)
2213 .map(|i| {
2214 let f = f64::from(i);
2215 field(
2216 f * 1e-4,
2217 (f * 1.7).fract() * 0.4,
2218 (f * 2.3).fract() * 0.4,
2219 i,
2220 )
2221 })
2222 .collect();
2223 for i in 0..8u32 {
2225 points.push(field(
2226 f64::from(i) * 1000.0,
2227 f64::from(i) * 1000.0,
2228 f64::from(i) * 100.0,
2229 100_000 + i,
2230 ));
2231 }
2232 let max_points = 100usize;
2233 let source = VecSource { points };
2234 let bounds = source_bounds(&source);
2235 let (center, halfsize) = cube_from_bounds(&bounds);
2236 let params = CopcWriterParams {
2238 max_points_per_node: max_points as u32,
2239 max_depth: 3,
2240 };
2241
2242 let lod = build_lod_index(&source, center, halfsize, ¶ms, &NeverCancel).unwrap();
2243 for (key, indices) in read_lod_index(&lod).unwrap() {
2244 assert!(
2245 indices.len() <= max_points,
2246 "node {key:?} holds {} points, exceeding max_points_per_node {max_points}",
2247 indices.len(),
2248 );
2249 }
2250 }
2251
2252 #[test]
2253 fn hierarchy_plan_splits_large_root_page() {
2254 let mut entries = vec![Entry {
2255 key: VoxelKey::root(),
2256 offset: 1,
2257 byte_size: 1,
2258 point_count: 1,
2259 }];
2260 let mut offset = 2;
2261 for z in 0..16 {
2262 for y in 0..16 {
2263 for x in 0..16 {
2264 entries.push(Entry {
2265 key: VoxelKey { level: 4, x, y, z },
2266 offset,
2267 byte_size: 1,
2268 point_count: 1,
2269 });
2270 offset += 1;
2271 }
2272 }
2273 }
2274 entries.sort_by_key(|entry| entry.key);
2275
2276 let mut plan = plan_hierarchy_pages(&entries, VoxelKey::root()).unwrap();
2277 let start = 1024;
2278 let end = assign_hierarchy_page_offsets(&mut plan, start).unwrap();
2279
2280 assert!(plan.byte_size < hierarchy_page_byte_size(entries.len()).unwrap());
2281 assert!(plan
2282 .items
2283 .iter()
2284 .any(|item| matches!(item, HierarchyPageItem::Child(_))));
2285
2286 let mut out = Cursor::new(vec![0; start as usize]);
2287 out.seek(SeekFrom::Start(start)).unwrap();
2288 write_hierarchy_page_tree(&mut out, &plan).unwrap();
2289 assert_eq!(end, out.get_ref().len() as u64);
2290 }
2291
2292 fn source_bounds(source: &VecSource) -> Bounds {
2293 source.points.iter().fold(
2294 Bounds::point(source.points[0].x, source.points[0].y, source.points[0].z),
2295 |mut bounds, point| {
2296 bounds.extend(point.x, point.y, point.z);
2297 bounds
2298 },
2299 )
2300 }
2301
2302 fn read_lod_index(index: &LodIndex) -> Result<Vec<(VoxelKey, Vec<u32>)>> {
2303 let path: &Path = index.order_path.as_ref();
2304 let mut reader =
2305 BufReader::new(File::open(path).map_err(|e| Error::io("open LOD order", e))?);
2306 let mut out = Vec::new();
2307 for node in &index.nodes {
2308 reader
2309 .seek(SeekFrom::Start(node.start))
2310 .map_err(|e| Error::io("seek LOD order", e))?;
2311 let mut indices = Vec::with_capacity(node.count);
2312 for _ in 0..node.count {
2313 indices.push(
2314 reader
2315 .read_u32::<LittleEndian>()
2316 .map_err(|e| Error::io("read LOD order", e))?,
2317 );
2318 }
2319 out.push((node.key, indices));
2320 }
2321 Ok(out)
2322 }
2323}