rs_pcd/decoder/
binary.rs

1// Copyright 2025 bigpear0201
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6
7//     http://www.apache.org/licenses/LICENSE-2.0
8
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use crate::error::{PcdError, Result};
16use crate::header::ValueType;
17use crate::layout::PcdLayout;
18use crate::storage::PointBlock;
19use std::io::Read;
20
21/// Batch size for buffered reading - minimizes syscalls while keeping memory footprint reasonable
22const BATCH_SIZE: usize = 1024;
23
24pub struct BinaryReader<'a, R: Read> {
25    reader: &'a mut R,
26    layout: &'a PcdLayout,
27    points_to_read: usize,
28}
29
30impl<'a, R: Read> BinaryReader<'a, R> {
31    pub fn new(reader: &'a mut R, layout: &'a PcdLayout, points_to_read: usize) -> Self {
32        Self {
33            reader,
34            layout,
35            points_to_read,
36        }
37    }
38
39    pub fn decode(&mut self, output: &mut PointBlock) -> Result<()> {
40        let required_cols: Vec<String> =
41            self.layout.fields.iter().map(|f| f.name.clone()).collect();
42
43        // Ensure all columns exist
44        for name in &required_cols {
45            if output.get_column(name).is_none() {
46                return Err(PcdError::LayoutMismatch {
47                    expected: 0,
48                    got: 0,
49                });
50            }
51        }
52
53        output.resize(self.points_to_read);
54
55        // Get mutable references to all columns at once
56        let mut columns = output.get_columns_mut(&required_cols).ok_or_else(|| {
57            PcdError::Other("Failed to acquire columns mutable borrow".to_string())
58        })?;
59
60        let point_step = self.layout.total_size;
61        
62        // Batch read optimization: read multiple points at once to reduce syscalls
63        let batch_bytes = point_step * BATCH_SIZE;
64        let mut batch_buffer = vec![0u8; batch_bytes];
65
66        let mut point_idx = 0;
67        while point_idx < self.points_to_read {
68            let batch_end = (point_idx + BATCH_SIZE).min(self.points_to_read);
69            let points_in_batch = batch_end - point_idx;
70            let read_size = points_in_batch * point_step;
71
72            self.reader.read_exact(&mut batch_buffer[..read_size])?;
73
74            // Process all points in this batch
75            for batch_offset in 0..points_in_batch {
76                let buffer_start = batch_offset * point_step;
77                let i = point_idx + batch_offset;
78
79                for (field_idx, field) in self.layout.fields.iter().enumerate() {
80                    let col = &mut columns[field_idx];
81                    let start = buffer_start + field.offset;
82                    let end = start + field.size;
83                    let data = &batch_buffer[start..end];
84                    let dest_start = i * field.count;
85
86                    decode_field(col, field.type_, field.count, data, dest_start);
87                }
88            }
89
90            point_idx = batch_end;
91        }
92
93        Ok(())
94    }
95}
96
97/// Decode a single field from raw bytes into the column.
98/// Uses platform-optimized path for Little Endian systems.
99#[inline]
100fn decode_field(
101    col: &mut crate::storage::Column,
102    value_type: ValueType,
103    count: usize,
104    data: &[u8],
105    dest_start: usize,
106) {
107    match value_type {
108        ValueType::U8 => {
109            let vec = col.as_u8_mut().unwrap();
110            vec[dest_start..dest_start + count].copy_from_slice(data);
111        }
112        ValueType::I8 => {
113            let vec = col.as_i8_mut().unwrap();
114            for (k, &b) in data.iter().enumerate() {
115                vec[dest_start + k] = b as i8;
116            }
117        }
118        ValueType::U16 => {
119            let vec = col.as_u16_mut().unwrap();
120            decode_u16_slice(&data[..count * 2], &mut vec[dest_start..dest_start + count]);
121        }
122        ValueType::I16 => {
123            let vec = col.as_i16_mut().unwrap();
124            decode_i16_slice(&data[..count * 2], &mut vec[dest_start..dest_start + count]);
125        }
126        ValueType::U32 => {
127            let vec = col.as_u32_mut().unwrap();
128            decode_u32_slice(&data[..count * 4], &mut vec[dest_start..dest_start + count]);
129        }
130        ValueType::I32 => {
131            let vec = col.as_i32_mut().unwrap();
132            decode_i32_slice(&data[..count * 4], &mut vec[dest_start..dest_start + count]);
133        }
134        ValueType::F32 => {
135            let vec = col.as_f32_mut().unwrap();
136            decode_f32_slice(&data[..count * 4], &mut vec[dest_start..dest_start + count]);
137        }
138        ValueType::F64 => {
139            let vec = col.as_f64_mut().unwrap();
140            decode_f64_slice(&data[..count * 8], &mut vec[dest_start..dest_start + count]);
141        }
142    }
143}
144
145// Platform-optimized decode functions
146// On Little Endian platforms, we can use direct memory copy for significant speedup
147
148#[cfg(target_endian = "little")]
149#[inline]
150fn decode_f32_slice(src: &[u8], dest: &mut [f32]) {
151    // Safety: src length is pre-validated, and f32 is 4 bytes
152    // On LE platforms, the byte order matches, so direct copy is valid
153    assert!(src.len() >= dest.len() * 4);
154    unsafe {
155        std::ptr::copy_nonoverlapping(
156            src.as_ptr(),
157            dest.as_mut_ptr() as *mut u8,
158            dest.len() * 4,
159        );
160    }
161}
162
163#[cfg(not(target_endian = "little"))]
164#[inline]
165fn decode_f32_slice(src: &[u8], dest: &mut [f32]) {
166    use byteorder::{ByteOrder, LittleEndian};
167    for (i, chunk) in src.chunks_exact(4).enumerate() {
168        dest[i] = LittleEndian::read_f32(chunk);
169    }
170}
171
172#[cfg(target_endian = "little")]
173#[inline]
174fn decode_f64_slice(src: &[u8], dest: &mut [f64]) {
175    assert!(src.len() >= dest.len() * 8);
176    unsafe {
177        std::ptr::copy_nonoverlapping(
178            src.as_ptr(),
179            dest.as_mut_ptr() as *mut u8,
180            dest.len() * 8,
181        );
182    }
183}
184
185#[cfg(not(target_endian = "little"))]
186#[inline]
187fn decode_f64_slice(src: &[u8], dest: &mut [f64]) {
188    use byteorder::{ByteOrder, LittleEndian};
189    for (i, chunk) in src.chunks_exact(8).enumerate() {
190        dest[i] = LittleEndian::read_f64(chunk);
191    }
192}
193
194#[cfg(target_endian = "little")]
195#[inline]
196fn decode_u16_slice(src: &[u8], dest: &mut [u16]) {
197    assert!(src.len() >= dest.len() * 2);
198    unsafe {
199        std::ptr::copy_nonoverlapping(
200            src.as_ptr(),
201            dest.as_mut_ptr() as *mut u8,
202            dest.len() * 2,
203        );
204    }
205}
206
207#[cfg(not(target_endian = "little"))]
208#[inline]
209fn decode_u16_slice(src: &[u8], dest: &mut [u16]) {
210    use byteorder::{ByteOrder, LittleEndian};
211    for (i, chunk) in src.chunks_exact(2).enumerate() {
212        dest[i] = LittleEndian::read_u16(chunk);
213    }
214}
215
216#[cfg(target_endian = "little")]
217#[inline]
218fn decode_i16_slice(src: &[u8], dest: &mut [i16]) {
219    assert!(src.len() >= dest.len() * 2);
220    unsafe {
221        std::ptr::copy_nonoverlapping(
222            src.as_ptr(),
223            dest.as_mut_ptr() as *mut u8,
224            dest.len() * 2,
225        );
226    }
227}
228
229#[cfg(not(target_endian = "little"))]
230#[inline]
231fn decode_i16_slice(src: &[u8], dest: &mut [i16]) {
232    use byteorder::{ByteOrder, LittleEndian};
233    for (i, chunk) in src.chunks_exact(2).enumerate() {
234        dest[i] = LittleEndian::read_i16(chunk);
235    }
236}
237
238#[cfg(target_endian = "little")]
239#[inline]
240fn decode_u32_slice(src: &[u8], dest: &mut [u32]) {
241    assert!(src.len() >= dest.len() * 4);
242    unsafe {
243        std::ptr::copy_nonoverlapping(
244            src.as_ptr(),
245            dest.as_mut_ptr() as *mut u8,
246            dest.len() * 4,
247        );
248    }
249}
250
251#[cfg(not(target_endian = "little"))]
252#[inline]
253fn decode_u32_slice(src: &[u8], dest: &mut [u32]) {
254    use byteorder::{ByteOrder, LittleEndian};
255    for (i, chunk) in src.chunks_exact(4).enumerate() {
256        dest[i] = LittleEndian::read_u32(chunk);
257    }
258}
259
260#[cfg(target_endian = "little")]
261#[inline]
262fn decode_i32_slice(src: &[u8], dest: &mut [i32]) {
263    assert!(src.len() >= dest.len() * 4);
264    unsafe {
265        std::ptr::copy_nonoverlapping(
266            src.as_ptr(),
267            dest.as_mut_ptr() as *mut u8,
268            dest.len() * 4,
269        );
270    }
271}
272
273#[cfg(not(target_endian = "little"))]
274#[inline]
275fn decode_i32_slice(src: &[u8], dest: &mut [i32]) {
276    use byteorder::{ByteOrder, LittleEndian};
277    for (i, chunk) in src.chunks_exact(4).enumerate() {
278        dest[i] = LittleEndian::read_i32(chunk);
279    }
280}