mdf4_rs/blocks/
common.rs

1// blocks/common.rs
2//! Common types, traits, and helper functions for MDF block parsing.
3//!
4//! This module provides:
5//! - [`BlockHeader`]: The 24-byte header present in all MDF blocks
6//! - [`BlockParse`]: Trait for parsing blocks from bytes
7//! - [`DataType`]: Enum representing MDF data types
8//! - Byte parsing helper functions to reduce code duplication
9
10use crate::{
11    Error, Result,
12    blocks::{metadata_block::MetadataBlock, text_block::TextBlock},
13};
14use alloc::format;
15use alloc::string::{String, ToString};
16use alloc::vec::Vec;
17
18// ============================================================================
19// Byte Parsing Helpers
20// ============================================================================
21
22/// Read a u64 from a byte slice at the given offset (little-endian).
23///
24/// # Panics
25/// Panics if `offset + 8 > bytes.len()`. Use `read_u64_checked` for fallible version.
26#[inline]
27pub fn read_u64(bytes: &[u8], offset: usize) -> u64 {
28    u64::from_le_bytes([
29        bytes[offset],
30        bytes[offset + 1],
31        bytes[offset + 2],
32        bytes[offset + 3],
33        bytes[offset + 4],
34        bytes[offset + 5],
35        bytes[offset + 6],
36        bytes[offset + 7],
37    ])
38}
39
40/// Read a u32 from a byte slice at the given offset (little-endian).
41#[inline]
42pub fn read_u32(bytes: &[u8], offset: usize) -> u32 {
43    u32::from_le_bytes([
44        bytes[offset],
45        bytes[offset + 1],
46        bytes[offset + 2],
47        bytes[offset + 3],
48    ])
49}
50
51/// Read a u16 from a byte slice at the given offset (little-endian).
52#[inline]
53pub fn read_u16(bytes: &[u8], offset: usize) -> u16 {
54    u16::from_le_bytes([bytes[offset], bytes[offset + 1]])
55}
56
57/// Read an f64 from a byte slice at the given offset (little-endian).
58#[inline]
59pub fn read_f64(bytes: &[u8], offset: usize) -> f64 {
60    f64::from_le_bytes([
61        bytes[offset],
62        bytes[offset + 1],
63        bytes[offset + 2],
64        bytes[offset + 3],
65        bytes[offset + 4],
66        bytes[offset + 5],
67        bytes[offset + 6],
68        bytes[offset + 7],
69    ])
70}
71
72/// Read a u8 from a byte slice at the given offset.
73#[inline]
74pub fn read_u8(bytes: &[u8], offset: usize) -> u8 {
75    bytes[offset]
76}
77
78// ============================================================================
79// Validation Helpers
80// ============================================================================
81
82/// Validate that a buffer has at least `expected` bytes.
83///
84/// Returns `Err(TooShortBuffer)` if the buffer is too small.
85#[inline]
86pub fn validate_buffer_size(bytes: &[u8], expected: usize) -> Result<()> {
87    if bytes.len() < expected {
88        return Err(Error::TooShortBuffer {
89            actual: bytes.len(),
90            expected,
91            file: file!(),
92            line: line!(),
93        });
94    }
95    Ok(())
96}
97
98/// Validate that a block header has the expected ID.
99#[inline]
100pub fn validate_block_id(header: &BlockHeader, expected_id: &str) -> Result<()> {
101    if header.id != expected_id {
102        return Err(Error::BlockSerializationError(format!(
103            "Block must have ID '{}', found '{}'",
104            expected_id, header.id
105        )));
106    }
107    Ok(())
108}
109
110/// Validate that a block header has the expected length.
111#[inline]
112pub fn validate_block_length(header: &BlockHeader, expected: u64) -> Result<()> {
113    if header.length != expected {
114        return Err(Error::BlockSerializationError(format!(
115            "Block must have length={}, found {}",
116            expected, header.length
117        )));
118    }
119    Ok(())
120}
121
122/// Assert that a buffer size is 8-byte aligned (debug builds only).
123#[inline]
124pub fn debug_assert_aligned(size: usize) {
125    debug_assert_eq!(size % 8, 0, "Block size {} is not 8-byte aligned", size);
126}
127
128/// Calculate padding needed to reach 8-byte alignment.
129#[inline]
130pub const fn padding_to_align_8(size: usize) -> usize {
131    (8 - (size % 8)) % 8
132}
133
134/// Safely convert a u64 offset/address to usize for indexing.
135///
136/// On 64-bit systems, this is always safe. On 32-bit systems, returns an error
137/// if the value exceeds `usize::MAX`, preventing potential overflow issues.
138///
139/// # Arguments
140/// * `value` - The u64 value to convert (typically a file offset or address).
141/// * `context` - Description of what the value represents (for error messages).
142///
143/// # Returns
144/// The value as `usize`, or an error if conversion would overflow.
145#[inline]
146pub fn u64_to_usize(value: u64, context: &str) -> Result<usize> {
147    usize::try_from(value).map_err(|_| {
148        Error::BlockSerializationError(format!(
149            "{} value {} exceeds maximum addressable size on this platform",
150            context, value
151        ))
152    })
153}
154
155#[derive(Debug, Clone)]
156#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
157pub struct BlockHeader {
158    /// 4-byte block type identifier (e.g., "##HD", "##DG").
159    pub id: String,
160    /// Reserved field, always 0.
161    pub reserved: u32,
162    /// Total length of the block in bytes, including this header.
163    pub length: u64,
164    /// Number of link fields in this block.
165    pub link_count: u64,
166}
167
168impl Default for BlockHeader {
169    /// Returns a BlockHeader with id 'UNSET' and length 0 as a placeholder.
170    /// This is not a valid MDF block header and should be replaced before writing.
171    fn default() -> Self {
172        BlockHeader {
173            id: String::from("UNSET"),
174            reserved: 0,
175            length: 0,
176            link_count: 0,
177        }
178    }
179}
180
181impl BlockHeader {
182    /// Serializes the BlockHeader to bytes according to MDF 4.1 specification.
183    ///
184    /// The BlockHeader is always 24 bytes and consists of:
185    /// - id: 4 bytes (ASCII characters, must be exactly 4 bytes)
186    /// - reserved: 4 bytes (always 0)
187    /// - length: 8 bytes (total length of the block including this header)
188    /// - link_count: 8 bytes (number of links in this block)
189    ///
190    /// # Returns
191    /// - `Ok(Vec<u8>)` containing the serialized block header
192    /// - `Err(Error)` if serialization fails
193    pub fn to_bytes(&self) -> Result<Vec<u8>> {
194        let mut buffer = Vec::with_capacity(24);
195
196        // 1. Write the ID field (4 bytes)
197        let id_bytes = self.id.as_bytes();
198        let mut id_field = [0u8; 4];
199        let id_len = core::cmp::min(id_bytes.len(), 4);
200        id_field[..id_len].copy_from_slice(&id_bytes[..id_len]);
201        buffer.extend_from_slice(&id_field);
202
203        // 2. Write reserved field (4 bytes)
204        buffer.extend_from_slice(&self.reserved.to_le_bytes());
205
206        // 3. Write length field (8 bytes)
207        buffer.extend_from_slice(&self.length.to_le_bytes());
208
209        // 4. Write link_count field (8 bytes)
210        buffer.extend_from_slice(&self.link_count.to_le_bytes());
211
212        debug_assert_eq!(buffer.len(), 24);
213        Ok(buffer)
214    }
215
216    /// Parse a block header from the first 24 bytes of `bytes`.
217    ///
218    /// # Arguments
219    /// * `bytes` - Slice containing at least 24 bytes from the MDF file.
220    ///
221    /// # Returns
222    /// A [`BlockHeader`] on success or [`Error::TooShortBuffer`] when the
223    /// slice is smaller than 24 bytes.
224    pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
225        validate_buffer_size(bytes, 24)?;
226
227        let id = match core::str::from_utf8(&bytes[0..4]) {
228            Ok(s) => String::from(s),
229            Err(_) => String::from_utf8_lossy(&bytes[0..4]).into_owned(),
230        };
231
232        Ok(Self {
233            id,
234            reserved: read_u32(bytes, 4),
235            length: read_u64(bytes, 8),
236            link_count: read_u64(bytes, 16),
237        })
238    }
239}
240
241pub trait BlockParse<'a>: Sized {
242    const ID: &'static str;
243
244    fn parse_header(bytes: &[u8]) -> Result<BlockHeader> {
245        let header = BlockHeader::from_bytes(&bytes[0..24])?;
246        if header.id != Self::ID {
247            return Err(Error::BlockIDError {
248                actual: header.id.clone(),
249                expected: Self::ID.to_string(),
250            });
251        }
252        Ok(header)
253    }
254
255    fn from_bytes(bytes: &'a [u8]) -> Result<Self>;
256}
257
258#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
259#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
260pub enum DataType {
261    UnsignedIntegerLE,
262    UnsignedIntegerBE,
263    SignedIntegerLE,
264    SignedIntegerBE,
265    FloatLE,
266    FloatBE,
267    StringLatin1,
268    StringUtf8,
269    StringUtf16LE,
270    StringUtf16BE,
271    ByteArray,
272    MimeSample,
273    MimeStream,
274    CanOpenDate,
275    CanOpenTime,
276    ComplexLE,
277    ComplexBE,
278    Unknown(()),
279}
280
281impl DataType {
282    /// Converts the DataType enum value to its corresponding u8 representation
283    /// according to the MDF 4.1 specification.
284    ///
285    /// # Returns
286    /// The u8 value corresponding to this DataType
287    ///
288    /// # Note
289    /// For ComplexLE, ComplexBE, and Unknown variants, we use values that match
290    /// the MDF 4.1 specification (15, 16) or a default (0) for Unknown.
291    pub fn to_u8(&self) -> u8 {
292        match self {
293            DataType::UnsignedIntegerLE => 0,
294            DataType::UnsignedIntegerBE => 1,
295            DataType::SignedIntegerLE => 2,
296            DataType::SignedIntegerBE => 3,
297            DataType::FloatLE => 4,
298            DataType::FloatBE => 5,
299            DataType::StringLatin1 => 6,
300            DataType::StringUtf8 => 7,
301            DataType::StringUtf16LE => 8,
302            DataType::StringUtf16BE => 9,
303            DataType::ByteArray => 10,
304            DataType::MimeSample => 11,
305            DataType::MimeStream => 12,
306            DataType::CanOpenDate => 13,
307            DataType::CanOpenTime => 14,
308            DataType::ComplexLE => 15, // Complex numbers, little-endian
309            DataType::ComplexBE => 16, // Complex numbers, big-endian
310            DataType::Unknown(_) => 0, // Default to 0 for unknown types
311        }
312    }
313
314    /// Convert a numeric representation to the corresponding `DataType`.
315    /// Values outside the known range yield `DataType::Unknown`.
316    pub fn from_u8(value: u8) -> Self {
317        match value {
318            0 => DataType::UnsignedIntegerLE,
319            1 => DataType::UnsignedIntegerBE,
320            2 => DataType::SignedIntegerLE,
321            3 => DataType::SignedIntegerBE,
322            4 => DataType::FloatLE,
323            5 => DataType::FloatBE,
324            6 => DataType::StringLatin1,
325            7 => DataType::StringUtf8,
326            8 => DataType::StringUtf16LE,
327            9 => DataType::StringUtf16BE,
328            10 => DataType::ByteArray,
329            11 => DataType::MimeSample,
330            12 => DataType::MimeStream,
331            13 => DataType::CanOpenDate,
332            14 => DataType::CanOpenTime,
333            15 => DataType::ComplexLE,
334            16 => DataType::ComplexBE,
335            _ => DataType::Unknown(()),
336        }
337    }
338
339    /// Returns a typical bit width for this data type.
340    /// This is used when creating channels without an explicit bit count.
341    pub fn default_bits(&self) -> u32 {
342        match self {
343            DataType::UnsignedIntegerLE
344            | DataType::UnsignedIntegerBE
345            | DataType::SignedIntegerLE
346            | DataType::SignedIntegerBE => 32,
347            DataType::FloatLE | DataType::FloatBE => 32,
348            DataType::StringLatin1
349            | DataType::StringUtf8
350            | DataType::StringUtf16LE
351            | DataType::StringUtf16BE
352            | DataType::ByteArray
353            | DataType::MimeSample
354            | DataType::MimeStream => 8,
355            DataType::CanOpenDate | DataType::CanOpenTime => 64,
356            DataType::ComplexLE | DataType::ComplexBE => 64,
357            DataType::Unknown(_) => 8,
358        }
359    }
360}
361
362impl core::fmt::Display for DataType {
363    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
364        match self {
365            DataType::UnsignedIntegerLE => write!(f, "uint (LE)"),
366            DataType::UnsignedIntegerBE => write!(f, "uint (BE)"),
367            DataType::SignedIntegerLE => write!(f, "int (LE)"),
368            DataType::SignedIntegerBE => write!(f, "int (BE)"),
369            DataType::FloatLE => write!(f, "float (LE)"),
370            DataType::FloatBE => write!(f, "float (BE)"),
371            DataType::StringLatin1 => write!(f, "string (Latin-1)"),
372            DataType::StringUtf8 => write!(f, "string (UTF-8)"),
373            DataType::StringUtf16LE => write!(f, "string (UTF-16 LE)"),
374            DataType::StringUtf16BE => write!(f, "string (UTF-16 BE)"),
375            DataType::ByteArray => write!(f, "byte array"),
376            DataType::MimeSample => write!(f, "MIME sample"),
377            DataType::MimeStream => write!(f, "MIME stream"),
378            DataType::CanOpenDate => write!(f, "CANopen date"),
379            DataType::CanOpenTime => write!(f, "CANopen time"),
380            DataType::ComplexLE => write!(f, "complex (LE)"),
381            DataType::ComplexBE => write!(f, "complex (BE)"),
382            DataType::Unknown(_) => write!(f, "unknown"),
383        }
384    }
385}
386
387/// Read a text or metadata block at `address` and return its contents.
388///
389/// # Arguments
390/// * `mmap` - The full memory mapped MDF file.
391/// * `address` - Offset of the target block; use `0` for no block.
392///
393/// # Returns
394/// The block's string contents if present or `Ok(None)` if `address` is zero or
395/// the block type is not text or metadata.
396pub fn read_string_block(mmap: &[u8], address: u64) -> Result<Option<String>> {
397    if address == 0 {
398        return Ok(None);
399    }
400
401    let offset = u64_to_usize(address, "block address")?;
402    validate_buffer_size(mmap, offset + 24)?;
403    let header = BlockHeader::from_bytes(&mmap[offset..offset + 24])?;
404
405    match header.id.as_str() {
406        "##TX" => Ok(Some(TextBlock::from_bytes(&mmap[offset..])?.text)),
407        "##MD" => Ok(Some(MetadataBlock::from_bytes(&mmap[offset..])?.xml)),
408        _ => Ok(None),
409    }
410}