gistools/readers/las/laz/
reader.rs

1use super::{
2    ItemReader, ItemReaders, arithmetic_decoder::ArithmeticDecoder,
3    integer_compressor::IntegerCompressor, modify_point14_raw_input,
4};
5use crate::{
6    parsers::{BufferReader, FeatureReader, Reader},
7    proj::Transformer,
8    readers::{
9        GeoStore, LASExtendedVariableLengthRecord, LASHeader, LASPoint, LASReaderOptions,
10        LASVectorFeature, LAZCompressor, LAZHeader, LAZHeaderItem, LAZHeaderItemType,
11        build_geo_key_directory, build_wkt, las_parse_variable_length_records,
12        laz::{v1::LAZPoint10v1Reader, v2::LAZPoint10v2Reader},
13        v1::{LAZGpsTime11v1Reader, LAZbyte10v1Reader, LAZrgb12v1Reader, LAZwavepacket13v1Reader},
14        v2::{LAZGpsTime11v2Reader, LAZbyte10v2Reader, LAZrgb12v2Reader},
15        v3::{
16            LAZPoint14v3Reader, LAZbyte14v3Reader, LAZrgb14v3Reader, LAZrgbNir14v3Reader,
17            LAZwavepacket14v3Reader,
18        },
19    },
20};
21use alloc::{collections::BTreeMap, rc::Rc, string::String, vec, vec::Vec};
22use core::{cell::RefCell, fmt::Debug};
23use s2json::{Properties, VectorFeature, VectorGeometry, VectorPoint};
24
25/// A state for the decompression
26#[derive(Debug, Clone)]
27pub struct LAZState<T: Reader + Debug> {
28    chunk_size: u32,
29    chunk_count: u32,
30    curr_chunk: u32,
31    number_chunks: u32,
32    tabled_chunks: u32,
33    chunk_totals: Vec<u32>,
34    chunk_starts: Vec<i64>,
35    point_start: i64,
36    readers: Vec<ItemReaders<T>>,
37}
38impl<T: Reader + Debug> Default for LAZState<T> {
39    fn default() -> Self {
40        Self {
41            chunk_size: u32::MAX,
42            chunk_count: 0,
43            curr_chunk: 0,
44            number_chunks: 0,
45            tabled_chunks: 0,
46            chunk_totals: vec![],
47            chunk_starts: vec![],
48            point_start: 0,
49            readers: vec![],
50        }
51    }
52}
53impl<T: Reader + Debug> LAZState<T> {
54    /// Setup
55    pub fn setup(&mut self, header: &LAZHeader) {
56        *self = Default::default();
57        if header.compressor != LAZCompressor::Pointwise {
58            self.chunk_count = header.chunk_size;
59            if header.chunk_size != 0 {
60                self.chunk_size = header.chunk_size;
61            }
62            self.number_chunks = u32::MAX;
63        }
64    }
65}
66
67/// # LASzip Reader
68///
69/// ## Description
70/// Reads LAS zipped data. Supports LAS 1.4 specification.
71/// [See specification](https://downloads.rapidlasso.de/doc/LAZ_Specification_1.4_R1.pdf)
72///
73/// Implements the [`FeatureReader`] trait
74///
75/// Data is stored like so:
76/// ```txt
77/// |            PUBLIC HEADER BLOCK           |
78/// |          VARIABLE LENGTH RECORDS         |
79/// |             POINT DATA RECORDS           |
80/// | Extended Variable Length Records (EVLRs) |
81/// |  Field Chunk table start position (EOF)  |
82/// ```
83///
84/// ## Usage
85///
86/// The methods you have access to:
87/// - [`LAZReader::new`]: Create a new LAZReader
88/// - [`LAZReader::len`]: Get the number of points stored
89/// - [`LAZReader::is_empty`]: Check if the reader is empty
90/// - [`LAZReader::get_point`]: Reads a point in at index
91/// - [`LAZReader::get_feature`]: Reads a feature in at index
92/// - [`LAZReader::iter`]: Create an iterator to collect the features
93/// - [`LAZReader::par_iter`]: Create a parallel iterator to collect the features
94///
95/// ```rust
96/// use gistools::{parsers::{FeatureReader, FileReader}, readers::LAZReader};
97/// use std::path::PathBuf;
98///
99/// let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
100/// path.push("tests/readers/las/fixtures/simple.laz");
101///
102/// let las_reader = LAZReader::new(FileReader::from(path.clone()), None);
103/// let features = las_reader.iter().collect::<Vec<_>>();
104/// assert_eq!(features.len(), 1_065);
105/// ```
106///
107/// ## Links
108/// - <https://www.usgs.gov/ngp-standards-and-specifications/lidar-base-specification-online>
109/// - <https://www.asprs.org/wp-content/uploads/2010/12/LAS_1_4_r13.pdf>
110/// - <https://liblas.org/development/index.html>
111/// - <https://downloads.rapidlasso.de/doc/LAZ_Specification_1.4_R1.pdf>
112/// - <https://github.com/PDAL/PDAL>
113/// - <https://github.com/libLAS/libLAS> (deprecated for PDAL)
114/// - <https://github.com/LASzip>
115#[derive(Debug, Clone)]
116pub struct LAZReader<T: Reader + Debug> {
117    reader: Rc<RefCell<T>>,
118    /// LAS Header Block
119    pub header: LASHeader,
120    /// LAZ Header Block
121    pub laz_header: LAZHeader,
122    /// Extended VARIABLE LENGTH RECORDS
123    pub variable_length_records: BTreeMap<u32, LASExtendedVariableLengthRecord>,
124    /// WKT projection string
125    pub wkt: Option<String>,
126    /// GeoKeyDirectory
127    pub geo_key_directory: GeoStore,
128    dec: Rc<RefCell<ArithmeticDecoder<T>>>,
129    // decompress_selective: u32, // all
130    // Is true if self file uses layered compression for LAS 1.4.
131    layered_las14_compression: bool,
132    transformer: Transformer,
133    dont_transform: bool,
134    state: RefCell<LAZState<T>>,
135}
136impl<T: Reader + Debug> LAZReader<T> {
137    /// Create a new LAZReader
138    pub fn new(reader: T, options: Option<LASReaderOptions>) -> Self {
139        let options = options.unwrap_or_default();
140        let header = LASHeader::from_reader(&reader);
141        let variable_length_records = las_parse_variable_length_records(&header, &reader);
142        let mut transformer = Transformer::new();
143        for (epsg_code, wkt) in options.epsg_codes.iter() {
144            transformer.insert_epsg_code(epsg_code.clone(), wkt.clone());
145        }
146        let wkt = build_wkt(&header, &variable_length_records, &mut transformer);
147        let geo_key_directory =
148            build_geo_key_directory(&variable_length_records, &mut transformer, wkt.is_none());
149        let reader = Rc::new(RefCell::new(reader));
150        let mut laz_reader = Self {
151            reader: reader.clone(),
152            header,
153            variable_length_records,
154            laz_header: LAZHeader::default(),
155            wkt,
156            geo_key_directory,
157            transformer,
158            dec: Rc::new(RefCell::new(ArithmeticDecoder::new(reader))),
159            // decompress_selective: 0xffffffff,
160            layered_las14_compression: false,
161            state: LAZState::default().into(),
162            dont_transform: options.dont_transform,
163        };
164        laz_reader.build_laz();
165        laz_reader.parse_extended_variable_length_records();
166        // setup other decoding variables
167        laz_reader.layered_las14_compression =
168            laz_reader.laz_header.compressor == LAZCompressor::LayeredAndChunked;
169        laz_reader.state.borrow_mut().setup(&laz_reader.laz_header);
170
171        laz_reader
172    }
173
174    /// Get the number of points stored
175    pub fn len(&self) -> u64 {
176        self.header.num_points as u64
177    }
178
179    /// Check if the reader is empty
180    pub fn is_empty(&self) -> bool {
181        self.len() == 0
182    }
183
184    /// If the LAZ variable length record is present, build a LAZ parser
185    fn build_laz(&mut self) {
186        if self.header.point_data_format_id >= 127 {
187            let laz_data_record = self.variable_length_records.get(&22204);
188            if let Some(laz_data_record) = laz_data_record
189                && let Some(laz_data) = &laz_data_record.data
190            {
191                self.laz_header = LAZHeader::from_bytes(laz_data.to_vec());
192                return;
193            }
194        }
195        panic!("LAZ data, but LAZ record not found.");
196    }
197
198    /// The Compressed Data Block can be followed by any number of EVLRs, which are identical to the
199    /// LAS 1.4 specification. The EVLR is similar to a VLR, but can carry a larger payload, as the Record
200    /// Length After Header field is 8 bytes instead of 2 bytes. The number of EVLRs is specified in the
201    /// Number of Extended Variable Length Records field in the Public Header Block. The start of the
202    /// first EVLR is at the file offset indicated by the Start of first Extended Variable Length Record in
203    /// the Public Header Block.
204    ///
205    /// The Extended Variable Length Records must be accessed sequentially, since the size of each
206    /// variable length record is contained in the Extended Variable Length Record Header. Each Extended
207    /// Variable Length Record Header (i.e. without the optional payload data) is 60 bytes in length.
208    fn parse_extended_variable_length_records(&mut self) {
209        let Self { reader, laz_header, .. } = self;
210        let LAZHeader { offset_special_evlrs, num_special_evlrs, .. } = laz_header;
211        let mut position = *offset_special_evlrs as u64;
212        let reader = reader.borrow();
213        for _ in 0..*num_special_evlrs {
214            let record = LASExtendedVariableLengthRecord::from_reader_extended(&*reader, position);
215            position += 60 + record.record_length;
216            self.variable_length_records.insert(record.record_id as u32, record);
217        }
218    }
219
220    /// returns the next point
221    pub fn get_point(&self) -> Option<VectorPoint<LASPoint>> {
222        if self.header.num_points == 0 {
223            return None;
224        }
225        let LAZHeader { compressor, .. } = self.laz_header;
226        let mut context = 0;
227        // no decoder means it wasn't compressed, just pull the point
228        let uncompressed = self.set_next_chunk();
229        self.state.borrow_mut().chunk_count += 1;
230        let mut point = LASPoint::default();
231        if uncompressed || compressor == LAZCompressor::None {
232            self.first_chunk_read(&mut context, &mut point);
233        } else {
234            self.pointwise_compress_read(&mut point, &mut context);
235        }
236        let mut vp = point.to_vector_point(&self.header);
237        // transform if needed
238        if !self.dont_transform {
239            self.transformer.forward_mut(&mut vp);
240        }
241
242        Some(vp)
243    }
244
245    /// Reads a point in at index as a feature
246    pub fn get_feature(&self) -> Option<LASVectorFeature> {
247        self.get_point().map(|point| {
248            VectorFeature::new_wm(
249                None,
250                Properties::default(),
251                VectorGeometry::new_point(point, None),
252                None,
253            )
254        })
255    }
256
257    /// If we are at the end of the chunk, we need to set up the next chunk.
258    ///
259    /// ## Returns
260    /// true if we are starting a new chunk, false otherwise
261    fn set_next_chunk(&self) -> bool {
262        if self.state.borrow().chunk_count == self.state.borrow().chunk_size {
263            if self.state.borrow().point_start != 0 {
264                self.state.borrow_mut().curr_chunk += 1;
265            }
266            self.init_dec();
267            let state = &mut self.state.borrow_mut();
268            let curr_chunk = state.curr_chunk;
269            let tabled_chunks = state.tabled_chunks;
270            if curr_chunk == tabled_chunks {
271                // no or incomplete chunk table?
272                state.chunk_starts[tabled_chunks as usize] = state.point_start; // needs fixing
273                state.tabled_chunks += 1;
274            } else if !state.chunk_totals.is_empty() {
275                // variable sized chunks?
276                state.chunk_size = state.chunk_totals[curr_chunk as usize + 1]
277                    - state.chunk_totals[curr_chunk as usize];
278            }
279            state.chunk_count = 0;
280            return true;
281        }
282        false
283    }
284
285    /// Initialize decoder
286    fn init_dec(&self) {
287        // maybe read chunk table (only if chunking enabled)
288        if self.state.borrow().number_chunks == u32::MAX {
289            self.read_chunk_table();
290            let state = &mut self.state.borrow_mut();
291            state.curr_chunk = 0;
292            if !state.chunk_totals.is_empty() {
293                state.chunk_size = state.chunk_totals[1];
294            }
295        }
296        self.state.borrow_mut().point_start = self.reader.borrow().tell() as i64;
297    }
298
299    /// Read the first chunk
300    ///
301    /// ## Parameters
302    /// - `context`: current context
303    /// - `point_data`: where to store the decompressed data
304    fn first_chunk_read(&self, context: &mut u32, point: &mut LASPoint) {
305        let state = &mut self.state.borrow_mut();
306        let LAZHeader { items, .. } = &self.laz_header;
307        let mut point_data: Vec<BufferReader> = vec![];
308        state.readers.clear();
309        // first read in the raw data
310        for item in items.iter() {
311            state.readers.push(self.get_point_compressed_reader(item));
312            let LAZHeaderItem { r#type, size, .. } = *item;
313            let mut raw_data: BufferReader = self.reader.borrow().seek_slice(size as usize).into();
314            if r#type == LAZHeaderItemType::Point14 {
315                raw_data = modify_point14_raw_input(&raw_data);
316            }
317            point_data.push(raw_data);
318        }
319        // now choose how we initialize
320        if self.layered_las14_compression {
321            // set decoder and grap count size
322            (*self.dec).borrow_mut().init(false);
323            let _count = self.reader.borrow().uint32_le(None);
324            // chunk sizes then init
325            for reader in state.readers.iter_mut() {
326                reader.chunk_sizes(&(*self.reader.borrow()));
327            }
328            for (i, reader) in state.readers.iter_mut().enumerate() {
329                reader.init(&point_data[i], point, context);
330            }
331        } else {
332            // initialize readers then init decoder
333            for (i, reader) in state.readers.iter_mut().enumerate() {
334                reader.init(&point_data[i], point, context);
335            }
336            (*self.dec).borrow_mut().init(true);
337        }
338    }
339
340    /// Get the compression reader for a point type
341    ///
342    /// ## Parameters
343    /// - `item`: the item to build readers for
344    ///
345    /// ## Returns
346    /// returns the compression reader for the item
347    fn get_point_compressed_reader(&self, item: &LAZHeaderItem) -> ItemReaders<T> {
348        let LAZHeaderItem { r#type, size, version } = *item;
349        if r#type == LAZHeaderItemType::Point10 {
350            if version == 1 {
351                return LAZPoint10v1Reader::new(self.dec.clone()).into();
352            } else if version == 2 {
353                return LAZPoint10v2Reader::new(self.dec.clone()).into();
354            }
355        } else if r#type == LAZHeaderItemType::GpsTime11 {
356            if version == 1 {
357                return LAZGpsTime11v1Reader::new(self.dec.clone()).into();
358            } else if version == 2 {
359                return LAZGpsTime11v2Reader::new(self.dec.clone()).into();
360            }
361        } else if r#type == LAZHeaderItemType::Rgb12 {
362            if version == 1 {
363                return LAZrgb12v1Reader::new(self.dec.clone()).into();
364            } else if version == 2 {
365                return LAZrgb12v2Reader::new(self.dec.clone()).into();
366            }
367        } else if (r#type == LAZHeaderItemType::WavePacket13) && (version == 1) {
368            return LAZwavepacket13v1Reader::new(self.dec.clone()).into();
369        } else if r#type == LAZHeaderItemType::Byte {
370            if version == 1 {
371                return LAZbyte10v1Reader::new(self.dec.clone(), size as u32).into();
372            } else if version == 2 {
373                return LAZbyte10v2Reader::new(self.dec.clone(), size as u32).into();
374            }
375        } else if r#type == LAZHeaderItemType::Point14 {
376            if version == 3 || version == 4 {
377                return LAZPoint14v3Reader::new(self.dec.clone(), None).into();
378            }
379        } else if r#type == LAZHeaderItemType::Rgb14 {
380            if version == 3 || version == 4 {
381                return LAZrgb14v3Reader::new(self.dec.clone(), None).into();
382            }
383        } else if r#type == LAZHeaderItemType::RgbNir14 {
384            if version == 3 || version == 4 {
385                return LAZrgbNir14v3Reader::new(self.dec.clone(), None).into();
386            }
387        } else if r#type == LAZHeaderItemType::WavePacket14 {
388            if version == 3 || version == 4 {
389                return LAZwavepacket14v3Reader::new(self.dec.clone(), None).into();
390            }
391        } else if r#type == LAZHeaderItemType::Byte14 && (version == 3 || version == 4) {
392            return LAZbyte14v3Reader::new(self.dec.clone(), size as u32, None).into();
393        }
394        panic!("Unsupported compressed point type: {:?} & version: {}", r#type, version);
395    }
396
397    /// A pointwise compression read
398    ///
399    /// ## Parameters
400    /// - `point_data`: where to store the decompressed data
401    /// - `context`: the context
402    fn pointwise_compress_read(&self, point: &mut LASPoint, context: &mut u32) {
403        for reader in self.state.borrow_mut().readers.iter_mut() {
404            reader.read(point, context);
405        }
406    }
407
408    /// If chunking is enabled, read the chunk table
409    fn read_chunk_table(&self) {
410        let Self { reader, .. } = self;
411        let reader = reader.borrow();
412        let mut chunk_table_start_position = reader.int64_le(None);
413        // self is where the chunks start
414        let chunks_start = reader.tell() as i64; // I64
415
416        if chunk_table_start_position == -1 {
417            // the compressor was writing to a non-seekable stream and wrote the chunk table start at the end
418            // read the last 8 bytes
419            chunk_table_start_position = reader.int64_le(Some(reader.len() - 8));
420        }
421
422        // read the chunk table
423        // move to where the chunk table starts
424        reader.seek(chunk_table_start_position as u64);
425        // fail if the version is wrong
426        let version = reader.uint32_le(None);
427        if version != 0 {
428            panic!("Bad version number. Aborting.");
429        }
430        // build the chunk table
431        let state = &mut self.state.borrow_mut();
432        state.number_chunks = reader.uint32_le(None);
433        state.chunk_totals = vec![];
434        // set chunk start and totals
435        if state.chunk_size == u32::MAX {
436            state.chunk_totals = vec![0; state.number_chunks as usize + 1];
437        } else {
438            state.chunk_starts.push(chunks_start);
439        }
440        state.tabled_chunks = 1;
441        if state.number_chunks > 0 {
442            (*self.dec).borrow_mut().init(true);
443            let mut ic = IntegerCompressor::new(self.dec.clone(), Some(32), Some(2), None, None);
444            ic.init_decompressor();
445            for i in 1..=state.number_chunks as usize {
446                if state.chunk_size == u32::MAX {
447                    let chunk_total = ic
448                        .decompress(if i > 1 { state.chunk_totals[i - 1] as i32 } else { 0 }, 0)
449                        as u32;
450                    state.chunk_totals.push(chunk_total);
451                }
452                let chunk_start = ic
453                    .decompress(if i > 1 { state.chunk_starts[i - 1] as i32 } else { 0 }, 1)
454                    as i64;
455                state.chunk_starts.push(chunk_start);
456                state.tabled_chunks += 1;
457            }
458            for i in 1..=state.number_chunks as usize {
459                if state.chunk_size == u32::MAX {
460                    state.chunk_totals[i] += state.chunk_totals[i - 1];
461                }
462                state.chunk_starts[i] += state.chunk_starts[i - 1];
463            }
464        }
465
466        reader.seek(chunks_start as u64);
467    }
468}
469
470/// The LAZ Iterator tool
471#[derive(Debug)]
472pub struct LAZIterator<'a, T: Reader + Debug> {
473    reader: &'a LAZReader<T>,
474    index: u64,
475    len: u64,
476}
477impl<T: Reader + Debug> Iterator for LAZIterator<'_, T> {
478    type Item = LASVectorFeature;
479
480    fn next(&mut self) -> Option<Self::Item> {
481        let las = &self.reader;
482        if self.index >= self.len {
483            return None;
484        }
485        self.index += 1;
486        if self.index > las.len() {
487            return None;
488        }
489        las.get_feature()
490    }
491}
492/// A feature reader trait with a callback-based approach
493impl<T: Reader + Debug> FeatureReader<(), Properties, LASPoint> for LAZReader<T> {
494    type FeatureIterator<'a>
495        = LAZIterator<'a, T>
496    where
497        T: 'a;
498
499    fn iter(&self) -> Self::FeatureIterator<'_> {
500        self.reader.borrow().seek(self.header.offset_to_points as u64);
501        self.state.borrow_mut().setup(&self.laz_header);
502        LAZIterator { reader: self, index: 0, len: self.len() }
503    }
504
505    fn par_iter(&self, pool_size: usize, thread_id: usize) -> Self::FeatureIterator<'_> {
506        self.reader.borrow().seek(self.header.offset_to_points as u64);
507        self.state.borrow_mut().setup(&self.laz_header);
508
509        let pool_size = pool_size as u64;
510        let thread_id = thread_id as u64;
511        let start = self.len() * thread_id / pool_size;
512        let end = self.len() * (thread_id + 1) / pool_size;
513        // run get_point as many times as it takes to start at the correct position
514        for _ in 0..start {
515            self.get_point();
516        }
517        LAZIterator { reader: self, index: start, len: end }
518    }
519}