read_vk6/reader/
mod.rs

1//! Inspired from the awesome Surfalize Python library
2
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{Cursor, Read, Seek, SeekFrom};
6use std::path::Path;
7
8use byteorder::{LittleEndian, ReadBytesExt};
9use chrono::NaiveDate;
10use ndarray::{Array2, Array3};
11use zip::ZipArchive;
12
13use crate::error::VkxError;
14use crate::types::{Metadata, RawSurface};
15
16const HEADER_SIZE: usize = 12;
17const FIXED_UNIT_PM_TO_UM: f64 = 1e-6; // 1 pm = 1e-6 µm
18
19/// Public entry point
20pub fn read_vk6(path: &Path, read_image_layers: bool) -> Result<RawSurface, VkxError> {
21    // Open zip file
22    let f = File::open(path)?;
23    let mut archive = ZipArchive::new(f)?;
24
25    // The internal file is named "Vk4File" according to Surfalize
26    let mut vk4_file = archive
27        .by_name("Vk4File")
28        .map_err(|_| VkxError::Format("Vk4File entry not found in archive".to_string()))?;
29
30    // Read the entire embedded file into memory (typical vk files are modest in size)
31    let mut vk4_bytes = Vec::with_capacity(vk4_file.size() as usize);
32    vk4_file.read_to_end(&mut vk4_bytes)?;
33    let mut cursor = Cursor::new(vk4_bytes);
34
35    // Skip header (12 bytes). Kept to mirror the original code.
36    {
37        let mut header = [0u8; HEADER_SIZE];
38        cursor.read_exact(&mut header)?;
39    }
40
41    // Read the offset table (surfalize uses Layout with many entries). We'll read 17 u32s
42    let offset_table = read_u32_list(&mut cursor, 17)?;
43    // Map the indices used downstream (matches Surfalize mapping)
44    let meas_conds_offset = offset_table[0] as u64;
45    let _color_peak_offset = offset_table[1] as u64;
46    let color_light_offset = offset_table[2] as u64;
47    let _light_offset = offset_table[3] as u64;
48    let height_offset = offset_table[6] as u64;
49    let string_data_offset = offset_table[16] as u64;
50
51    // Read measurement conditions block (Surfalize reads 76 u32 entries)
52    cursor.seek(SeekFrom::Start(meas_conds_offset))?;
53    let meas = read_u32_list(&mut cursor, 76)?;
54    
55    // Extract all measurement conditions following Surfalize's LAYOUT_MEASUREMENT_CONDITIONS
56    // Index 0: size (not used separately)
57    let year = meas.get(1).copied().unwrap_or(1970);
58    let month = meas.get(2).copied().unwrap_or(1);
59    let day = meas.get(3).copied().unwrap_or(1);
60    let hour = meas.get(4).copied().unwrap_or(0);
61    let minute = meas.get(5).copied().unwrap_or(0);
62    let second = meas.get(6).copied().unwrap_or(0);
63    let diff_from_utc = meas.get(7).copied().map(|v| v as i32);
64    
65    let img_attributes = meas.get(8).copied();
66    let user_interface_mode = meas.get(9).copied();
67    let color_composite_mode = meas.get(10).copied();
68    let img_layer_number = meas.get(11).copied();
69    let run_mode = meas.get(12).copied();
70    let peak_mode = meas.get(13).copied();
71    let sharpening_level = meas.get(14).copied();
72    let speed = meas.get(15).copied();
73    let distance = meas.get(16).copied();
74    let pitch = meas.get(17).copied();
75    let optical_zoom_raw = meas.get(18).copied();
76    let number_of_lines = meas.get(19).copied();
77    let line0_position = meas.get(20).copied();
78    // Indices 21-23 are reserved
79    let lens_magnification_raw = meas.get(24).copied();
80    let pmt_gain_mode = meas.get(25).copied();
81    let pmt_gain = meas.get(26).copied();
82    let pmt_offset = meas.get(27).copied();
83    let nd_filter = meas.get(28).copied();
84    // Index 29 is reserved
85    let persist_count = meas.get(30).copied();
86    let shutter_speed_mode = meas.get(31).copied();
87    let shutter_speed = meas.get(32).copied();
88    let white_balance_mode = meas.get(33).copied();
89    let white_balance_red = meas.get(34).copied();
90    let white_balance_blue = meas.get(35).copied();
91    let camera_gain = meas.get(36).copied();
92    let plane_compensation = meas.get(37).copied();
93    let xy_length_unit = meas.get(38).copied();
94    let z_length_unit = meas.get(39).copied();
95    let xy_decimal_place = meas.get(40).copied();
96    let z_decimal_place = meas.get(41).copied();
97    let x_length_per_pixel = meas.get(42).copied();
98    let y_length_per_pixel = meas.get(43).copied();
99    let z_length_per_digit = meas.get(44).copied();
100    // Indices 45-49 are reserved (20 bytes = 5 u32s)
101    let light_filter_type = meas.get(50).copied();
102    // Index 51 is reserved
103    let gamma_reverse = meas.get(52).copied();
104    let gamma = meas.get(53).copied();
105    let gamma_correction_offset = meas.get(54).copied();
106    let ccd_bw_offset = meas.get(55).copied();
107    let num_aperture = meas.get(56).copied();
108    let head_type = meas.get(57).copied();
109    let pmt_gain_2 = meas.get(58).copied();
110    let omit_color_img = meas.get(59).copied();
111    let lens_id = meas.get(60).copied();
112    let light_lut_mode = meas.get(61).copied();
113    let light_lut_in0 = meas.get(62).copied();
114    let light_lut_out0 = meas.get(63).copied();
115    let light_lut_in1 = meas.get(64).copied();
116    let light_lut_out1 = meas.get(65).copied();
117    let light_lut_in2 = meas.get(66).copied();
118    let light_lut_out2 = meas.get(67).copied();
119    let light_lut_in3 = meas.get(68).copied();
120    let light_lut_out3 = meas.get(69).copied();
121    let light_lut_in4 = meas.get(70).copied();
122    let light_lut_out4 = meas.get(71).copied();
123    let upper_position = meas.get(72).copied();
124    let lower_position = meas.get(73).copied();
125    let light_effective_bit_depth = meas.get(74).copied();
126    let height_effective_bit_depth = meas.get(75).copied();
127
128    // Parse main height layer
129    cursor.seek(SeekFrom::Start(height_offset))?;
130    let (width, height, bit_depth, _data_size, _pal_min, _pal_max, raw_height) =
131        parse_height_block(&mut cursor)?;
132
133    // Convert raw_height bytes to numeric array depending on bit_depth
134    let height_array = match bit_depth {
135        16 => {
136            let mut rdr = Cursor::new(raw_height);
137            let mut values = Vec::with_capacity((width as usize) * (height as usize));
138            for _ in 0..(width as usize * height as usize) {
139                let v = rdr.read_u16::<LittleEndian>()?;
140                values.push(v as f64);
141            }
142            Array2::from_shape_vec((height as usize, width as usize), values)
143                .map_err(|e| VkxError::Format(format!("reshape height failed: {}", e)))?
144        }
145        32 => {
146            let mut rdr = Cursor::new(raw_height);
147            let mut values = Vec::with_capacity((width as usize) * (height as usize));
148            for _ in 0..(width as usize * height as usize) {
149                let v = rdr.read_u32::<LittleEndian>()?;
150                values.push(v as f64);
151            }
152            Array2::from_shape_vec((height as usize, width as usize), values)
153                .map_err(|e| VkxError::Format(format!("reshape height failed: {}", e)))?
154        }
155        other => return Err(VkxError::UnsupportedBitDepth(other)),
156    };
157
158    // Optionally parse RGB image (color light = Laser+RGB)
159    // Try to read RGB even if omit_color_img flag is set, in case data is present
160    let rgb_image = if read_image_layers {
161        // We'll try reading color_light (Laser+RGB layer, which is what Surfalize uses)
162        if color_light_offset != 0 {
163            cursor.seek(SeekFrom::Start(color_light_offset))?;
164            if let Ok((w2, h2, _bd_img, _size_img, raw_img)) = parse_image_block(&mut cursor) {
165                // Expect 3 bytes per pixel; do bounds check
166                let expected = (w2 as usize) * (h2 as usize) * 3;
167                if raw_img.len() < expected {
168                    // If not enough bytes, treat as missing
169                    None
170                } else {
171                    // Fill ndarray Array3 (h, w, 3)
172                    let mut arr = Array3::<u8>::zeros((h2 as usize, w2 as usize, 3));
173                    // Surfalize flips the channel axis; data is stored as BGR, flip to get RGB
174                    for y in 0..(h2 as usize) {
175                        for x in 0..(w2 as usize) {
176                            let idx = (y * (w2 as usize) + x) * 3;
177                            let b = raw_img[idx];
178                            let g = raw_img[idx + 1];
179                            let r = raw_img[idx + 2];
180                            // Flip BGR -> RGB
181                            arr[[y, x, 0]] = r;
182                            arr[[y, x, 1]] = g;
183                            arr[[y, x, 2]] = b;
184                        }
185                    }
186                    Some(arr)
187                }
188            } else {
189                None
190            }
191        } else {
192            None
193        }
194    } else {
195        None
196    };
197
198    // Parse string data (title/lens) from string_data_offset
199    cursor.seek(SeekFrom::Start(string_data_offset))?;
200    let (title, lens_name) = parse_string_data(&mut cursor)?;
201
202    // Build metadata with all available fields
203    let timestamp = NaiveDate::from_ymd_opt(year as i32, month, day)
204        .and_then(|d| d.and_hms_opt(hour, minute, second));
205
206    let metadata = Metadata {
207        title,
208        lens_name,
209        timestamp,
210        diff_from_utc,
211        img_attributes,
212        user_interface_mode,
213        color_composite_mode,
214        img_layer_number,
215        run_mode,
216        peak_mode,
217        sharpening_level,
218        speed,
219        distance,
220        pitch,
221        optical_zoom: optical_zoom_raw.map(|v| (v as f64) / 10.0),
222        objective_magnification: lens_magnification_raw.map(|v| (v as f64) / 10.0),
223        lens_id,
224        num_aperture,
225        head_type,
226        number_of_lines,
227        line0_position,
228        pmt_gain_mode,
229        pmt_gain,
230        pmt_gain_2,
231        pmt_offset,
232        nd_filter,
233        light_filter_type,
234        persist_count,
235        shutter_speed_mode,
236        shutter_speed,
237        white_balance_mode,
238        white_balance_red,
239        white_balance_blue,
240        camera_gain,
241        omit_color_img,
242        plane_compensation,
243        xy_length_unit,
244        z_length_unit,
245        xy_decimal_place,
246        z_decimal_place,
247        x_length_per_pixel,
248        y_length_per_pixel,
249        z_length_per_digit,
250        gamma_reverse,
251        gamma,
252        gamma_correction_offset,
253        ccd_bw_offset,
254        light_lut_mode,
255        light_lut_in0,
256        light_lut_out0,
257        light_lut_in1,
258        light_lut_out1,
259        light_lut_in2,
260        light_lut_out2,
261        light_lut_in3,
262        light_lut_out3,
263        light_lut_in4,
264        light_lut_out4,
265        upper_position,
266        lower_position,
267        light_effective_bit_depth,
268        height_effective_bit_depth,
269        extra: HashMap::new(),
270    };
271
272    // Convert heights to micrometers using the metadata values
273    let scale_height = z_length_per_digit.unwrap_or(1) as f64 * FIXED_UNIT_PM_TO_UM;
274    let height_um = height_array.mapv(|v| v * scale_height);
275
276    let step_x = x_length_per_pixel.unwrap_or(0) as f64 * FIXED_UNIT_PM_TO_UM;
277    let step_y = y_length_per_pixel.unwrap_or(0) as f64 * FIXED_UNIT_PM_TO_UM;
278
279    Ok(RawSurface {
280        height: height_um,
281        step_x,
282        step_y,
283        metadata,
284        rgb_image,
285    })
286}
287
288/// Read `count` u32 little-endian values from reader
289fn read_u32_list<R: Read>(r: &mut R, count: usize) -> Result<Vec<u32>, VkxError> {
290    let mut out = Vec::with_capacity(count);
291    for _ in 0..count {
292        out.push(r.read_u32::<LittleEndian>()?);
293    }
294    Ok(out)
295}
296
297/// Parse the height layout similar to LAYOUT_HEIGHT_DATA in Surfalize
298/// Returns (width, height, bit_depth, data_byte_size, palette_min, palette_max, raw_bytes)
299fn parse_height_block<R: Read + Seek>(
300    r: &mut R,
301) -> Result<(u32, u32, u32, u32, i32, i32, Vec<u8>), VkxError> {
302    let width = r.read_u32::<LittleEndian>()?;
303    let height = r.read_u32::<LittleEndian>()?;
304    let bit_depth = r.read_u32::<LittleEndian>()?;
305    let _compression = r.read_u32::<LittleEndian>()?;
306    let data_byte_size = r.read_u32::<LittleEndian>()?;
307    let palette_range_min = r.read_i32::<LittleEndian>()?;
308    let palette_range_max = r.read_i32::<LittleEndian>()?;
309
310    // reserved area (768 bytes in original layout)
311    let mut reserved = vec![0u8; 768];
312    r.read_exact(&mut reserved)?;
313
314    // read raw bytes for the height map
315    let mut raw = vec![0u8; data_byte_size as usize];
316    r.read_exact(&mut raw)?;
317    Ok((
318        width,
319        height,
320        bit_depth,
321        data_byte_size,
322        palette_range_min,
323        palette_range_max,
324        raw,
325    ))
326}
327
328/// Parse image block like LAYOUT_IMAGE_DATA
329fn parse_image_block<R: Read + Seek>(r: &mut R) -> Result<(u32, u32, u32, u32, Vec<u8>), VkxError> {
330    let width = r.read_u32::<LittleEndian>()?;
331    let height = r.read_u32::<LittleEndian>()?;
332    let bit_depth = r.read_u32::<LittleEndian>()?;
333    let _compression = r.read_u32::<LittleEndian>()?;
334    let data_byte_size = r.read_u32::<LittleEndian>()?;
335
336    let mut raw = vec![0u8; data_byte_size as usize];
337    r.read_exact(&mut raw)?;
338    Ok((width, height, bit_depth, data_byte_size, raw))
339}
340
341/// Parse "string data" block: reads a u32 length then length*2 bytes for UTF-16-ish storage
342fn parse_string_data<R: Read + Seek>(r: &mut R) -> Result<(Option<String>, Option<String>), VkxError> {
343    // title
344    let size_title_chars = r.read_u32::<LittleEndian>()?;
345    let size_title_bytes = (size_title_chars as usize) * 2;
346    let mut title_bytes = vec![0u8; size_title_bytes];
347    r.read_exact(&mut title_bytes)?;
348    let title = decode_utf16_like(&title_bytes);
349
350    // lens_name
351    let size_lens_chars = r.read_u32::<LittleEndian>()?;
352    let size_lens_bytes = (size_lens_chars as usize) * 2;
353    let mut lens_bytes = vec![0u8; size_lens_bytes];
354    r.read_exact(&mut lens_bytes)?;
355    let lens_name = decode_utf16_like(&lens_bytes);
356
357    Ok((title, lens_name))
358}
359
360/// Try decode as UTF-16-LE, otherwise fallback to stripping every second byte and decoding as UTF-8
361fn decode_utf16_like(bytes: &[u8]) -> Option<String> {
362    if bytes.is_empty() {
363        return None;
364    }
365    // attempt utf16
366    let u16s: Vec<u16> = bytes
367        .chunks(2)
368        .map(|c| {
369            if c.len() == 2 {
370                u16::from_le_bytes([c[0], c[1]])
371            } else {
372                0u16
373            }
374        })
375        .collect();
376    if let Ok(s) = String::from_utf16(&u16s) {
377        let trimmed = s.trim_matches(char::from(0)).to_string();
378        if !trimmed.is_empty() {
379            return Some(trimmed);
380        }
381    }
382    // fallback: keep every other byte (remove null bytes)
383    let filtered: Vec<u8> = bytes.iter().cloned().step_by(2).collect();
384    if let Ok(s) = String::from_utf8(filtered) {
385        let trimmed = s.trim_matches(char::from(0)).to_string();
386        if !trimmed.is_empty() {
387            return Some(trimmed);
388        }
389    }
390    None
391}