fef 0.2.3

Rust implementation of a parser of the FEF format
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
use std::{cmp::Ordering, io::Write};

use crate::{
    common::traits::private::Sealed,
    v0::{
        config::Config,
        raw::error,
        traits::{ReadFrom, WriteTo},
    },
};

use super::error::VariableLengthEnumError;

/// Represents a variable length enum in the FEF specification.
///
/// Holds an unsigned integer of arbitrary size. Implementation of this type is not stabilized.
///
/// # Examples
/// ```rust
/// # use fef::v0::raw::VariableLengthEnum;
/// fn get_variable_length_enum() -> VariableLengthEnum {
///     /* Some code providing a variable length enum */
/// #   VariableLengthEnum::from(42)
/// }
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let variable_length_enum = get_variable_length_enum();
/// let value: usize = variable_length_enum.try_into()?;
/// if value == 42 {
///     println!("The value is 42!");
/// #   assert!(true);
/// } else {
///     println!("The value is not 42, it is {}", value);
/// #   assert!(false);
/// }
///
/// # Ok(())
/// # }
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct VariableLengthEnum {
    // This is not public because settling on an implementation may be dangerous, since we don't know what the future FEF versions will use the enum for.
    // Instead we will encourage users to use try_from and into to convert to and from this type.
    // Most likely, these conversions will be rare, so them not being ergonomic is not a big issue.
    value: VariableLengthEnumStorage,
}

/// Stores value of a variable length enum.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum VariableLengthEnumStorage {
    /// This variant is selected when the value fits into a u64
    U64(u64),
    /// If it doesn't fit into a u64, it is stored as a Vec<u8> according to the FEF specification without leading `0x80` bytes.
    /// Double indirection of the Vec<u8> may seem unnecessary, but in the case, when the value is too large to fit into a u64, performance is of zero concern,
    /// since the use case is probably very degenerate. It however equalizes the size of the enum variants, which results in smaller allocation in case of the
    /// much more common variant of the enum.
    Overflow(Box<Vec<u8>>),
}

impl PartialOrd for VariableLengthEnumStorage {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for VariableLengthEnumStorage {
    fn cmp(&self, other: &Self) -> Ordering {
        match self {
            VariableLengthEnumStorage::U64(self_u64) => match other {
                VariableLengthEnumStorage::U64(other_u64) => self_u64.cmp(other_u64), // Both fit into u64, compare them
                VariableLengthEnumStorage::Overflow(_) => Ordering::Less, // self fits into u64, other doesn't, self is less
            },
            VariableLengthEnumStorage::Overflow(self_overflow) => match other {
                VariableLengthEnumStorage::U64(_) => Ordering::Greater, // self doesn't fit into u64, other does, self is greater
                VariableLengthEnumStorage::Overflow(other_overflow) => {
                    // Both don't fit into u64, compare their lengths first
                    let len_cmp = self_overflow.len().cmp(&other_overflow.len());
                    if len_cmp != Ordering::Equal {
                        return len_cmp;
                    }
                    // If lengths are equal, compare the bytes
                    self_overflow.iter().rev().cmp(other_overflow.iter().rev())
                }
            },
        }
    }
}

/// Creating a variable length enum from a usize.
///
/// # Examples
/// ```rust
/// # use fef::v0::raw::VariableLengthEnum;
/// let selector = 42;
///
/// let variable_length_enum = VariableLengthEnum::from(selector);
/// ```
///
impl From<usize> for VariableLengthEnum {
    fn from(value: usize) -> Self {
        VariableLengthEnum {
            value: VariableLengthEnumStorage::U64(value as u64),
        }
    }
}

impl Sealed for VariableLengthEnum {}

/// Reading a variable length enum from a byte stream.
///
/// This reads from a bytes reader and interprets the bytes as a variable length enum.
/// It ends when it finished reading a variable length enum.
/// It returns an error if it encounters an unexpected end of the byte stream or other I/O errors.
///
/// # Examples
///
/// Simple reading of a small variable length enum:
/// ```rust
/// # use fef::v0::raw::VariableLengthEnum;
/// # use fef::v0::traits::ReadFrom;
/// # use std::io::Read;
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let configuration = fef::v0::config::OverridableConfig::default();
/// let file: Vec<u8> = vec![0x81, 0x80, 0x00, 0x12];
/// let mut file_reader = &mut file.as_slice();
///
/// let variable_length_enum = VariableLengthEnum::read_from(&mut file_reader, &configuration)?;
///
/// assert_eq!(variable_length_enum, VariableLengthEnum::from(0b1_0000000_0000000));
///
/// let mut next_byte = [0; 1];
///
/// file_reader.read_exact(&mut next_byte)?;
/// assert_eq!(next_byte[0], 0x12);
///
/// file_reader.read_exact(&mut next_byte).expect_err("Expected EOF");
///
/// # Ok(())
/// # }
/// ```
///
/// Reading a large variable length enum with leading `0x80` bytes:
/// ```rust
/// # use fef::v0::raw::VariableLengthEnum;
/// # use fef::v0::traits::ReadFrom;
/// # use std::io::Read;
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let configuration = fef::v0::config::OverridableConfig::default();
/// let file: Vec<u8> = vec![0x80, 0xFF, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x12];
/// let mut file_reader1 = &mut file.as_slice();
/// let mut file_reader2 = &mut file.as_slice();
///
/// let mut void_bytes: [u8; 1] = [0; 1];
/// file_reader1.read_exact(&mut void_bytes)?; // Skip the leading 0x80, it should have no effect
///
/// let variable_length_enum = VariableLengthEnum::read_from(&mut file_reader1, &configuration)?;
/// let variable_length_enum2 = VariableLengthEnum::read_from(&mut file_reader2, &configuration)?;
///
/// assert_eq!(variable_length_enum, variable_length_enum2);
///
/// let mut next_byte = [0; 1];
///
/// file_reader1.read_exact(&mut next_byte)?;
/// assert_eq!(next_byte[0], 0x12);
///
/// file_reader1.read_exact(&mut next_byte).expect_err("Expected EOF");
///
/// file_reader2.read_exact(&mut next_byte)?;
/// assert_eq!(next_byte[0], 0x12);
///
/// file_reader2.read_exact(&mut next_byte).expect_err("Expected EOF");
///
/// # Ok(())
/// # }
/// ```
///
/// Reading from a passed `&mut Read<R>`:
/// ```rust
/// # use fef::v0::raw::VariableLengthEnum;
/// # use std::io::Read;
/// # use fef::v0::traits::ReadFrom;
/// # use fef::v0::config::Config;
///
/// fn read_two_variable_length_enums<R: std::io::Read + ?Sized, C: Config>(reader: &mut R, configuration: &C) -> Result<(VariableLengthEnum, VariableLengthEnum), Box<dyn std::error::Error>> {
///     let enum1 = VariableLengthEnum::read_from(reader, configuration)?; // Notice the reborrowing here
///     let enum2 = VariableLengthEnum::read_from(reader,configuration)?;
///
///     Ok((enum1, enum2))
/// }
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let configuration = fef::v0::config::OverridableConfig::default();
/// let file = vec![0x80, 0x80, 0x00, 0x82, 0x80, 0x04, 0x12];
/// let mut file_reader = &mut file.as_slice();
/// let (enum1, enum2) = read_two_variable_length_enums(&mut file_reader, &configuration)?;
///
/// assert_eq!(enum1, VariableLengthEnum::from(0));
/// assert_eq!(enum2, VariableLengthEnum::from(0b10_0000000_0000100));
///
/// let mut next_byte = [0; 1];
/// file_reader.read_exact(&mut next_byte)?;
/// assert_eq!(next_byte[0], 0x12);
///
/// file_reader.read_exact(&mut next_byte).expect_err("Expected EOF");
///
/// # Ok(())
/// # }
impl<R> ReadFrom<R> for VariableLengthEnum
where
    R: std::io::Read + ?Sized,
{
    type ReadError = VariableLengthEnumError;

    fn read_from<C: ?Sized + Config>(reader: &mut R, config: &C) -> Result<Self, Self::ReadError> {
        let mut byte_vec = Vec::new();
        let mut accumulator: Option<u64> = Some(0);

        loop {
            // We read the next byte from the stream
            let mut bytes: [u8; 1] = [0; 1];
            reader.read_exact(&mut bytes)?;
            let byte = bytes[0];

            if byte == 0x80 && accumulator.is_none_or(|acc| acc == 0) {
                // Leading 0x80 is ignored
                // This is only padding as defined in the FEF specification, so we ignore it
                continue;
            }

            accumulator = if let Some(inner) = accumulator {
                if inner.leading_zeros() < 7 {
                    // Value is too large to fit into a u64, we need to store it in a Vec<u8>
                    if byte_vec.is_empty() {
                        // If the vec is empty, we will add bytes from the accumulator to it
                        let inner_as_vre: VariableLengthEnum =
                            VariableLengthEnum::from(inner as usize);
                        inner_as_vre.write_to(&mut byte_vec, config)?; // Write the accumulator to the vec
                    }
                    byte_vec.push(byte);
                    None
                } else {
                    // We have enough space to shift the accumulator left by 7 bits and add the new byte
                    Some(inner << 7 | (byte & 0x7F) as u64)
                }
            } else {
                byte_vec.push(byte);
                None
            };

            if byte < 0x80 {
                // The leading bit is 0, so this is the last byte, we stop reading
                break;
            }
        }

        // If the value fits into a `u64`, we have the accumulator set and can use it, else we use the byte_vec
        if let Some(accumulator) = accumulator {
            // If we have an accumulator, we use it as the value
            Ok(VariableLengthEnum {
                value: VariableLengthEnumStorage::U64(accumulator),
            })
        } else {
            // If we don't have an accumulator, we use the byte_vec as the value
            Ok(VariableLengthEnum {
                value: VariableLengthEnumStorage::Overflow(Box::new(byte_vec)),
            })
        }
    }
}

/// Converting a variable length enum to a usize for easier use.
///
/// This conversion is fallible, as the value may be too large to fit into a usize.
/// This is however very unlikely - the FEF specification uses variable length enums to store values that are not too large.
///
/// # Examples
/// ```rust
/// # use fef::v0::raw::VariableLengthEnum;
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let variable_length_enum = VariableLengthEnum::from(42);
///
/// let value: usize = variable_length_enum.try_into()?;
///
/// match value {
///     42 => assert!(true),
///      _ => assert!(false),
/// }
///
/// # Ok(())
/// # }
/// ```
///
/// Example of a value that is too large to fit into a usize:
/// ```rust
/// # use fef::v0::raw::VariableLengthEnum;
/// # use std::io::Read;
/// # use fef::v0::traits::ReadFrom;
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let configuration = fef::v0::config::OverridableConfig::default();
/// let mut file = [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00];
/// let mut file_reader = &mut file.as_ref();
///
/// let variable_length_enum = VariableLengthEnum::read_from(&mut file_reader, &configuration)?;
/// let value: Result<usize, _> = variable_length_enum.try_into(); // This will error
///
/// assert!(value.is_err());
/// # Ok(())
/// # }
/// ```
impl TryInto<usize> for VariableLengthEnum {
    type Error = error::VariableLengthEnumError; // This is a placeholder, we can change it to a more specific error type later

    fn try_into(self) -> Result<usize, Self::Error> {
        match self.value {
            VariableLengthEnumStorage::U64(u64_value) => u64_value
                .try_into()
                .map_err(|_| error::VariableLengthEnumError::TooBig),
            VariableLengthEnumStorage::Overflow(_) => Err(error::VariableLengthEnumError::TooBig),
        }
    }
}

/// Conversion to string of a variable length enum
///
/// For values lower than or equal to `u64::MAX`, this implementation guarantees that it will format the value as a decimal string representation.
/// For values over `u64::MAX`, the exact output of this formatting is unspecified.
///
/// # Examples
/// ```rust
/// # use fef::v0::raw::VariableLengthEnum;
/// let value = 51;
/// let vre = VariableLengthEnum::from(value);
///
/// println!("{}", value); // Prints "51"
/// # assert_eq!("51", format!("{}", value));
/// println!("{}", vre); // Also prints "51"
/// # assert_eq!("51", format!("{}", vre));
/// # assert_eq!(format!("{}", usize::MAX), format!("{}", VariableLengthEnum::from(usize::MAX)));
/// ```
impl std::fmt::Display for VariableLengthEnum {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match &self.value {
            VariableLengthEnumStorage::U64(u64_value) => write!(f, "{}", u64_value),
            VariableLengthEnumStorage::Overflow(byte_vec) => {
                write!(f, "0x")?;
                let mut accumulator: u16 = 0;
                let mut bit_length: u8 = 0;
                for byte in byte_vec.iter() {
                    accumulator = accumulator << 7 | (byte & 0x7F) as u16;
                    bit_length += 7;

                    if bit_length >= 8 {
                        let byte = ((accumulator >> (bit_length - 8)) & 0xFF) as u8;
                        bit_length -= 8;
                        write!(f, "{:02x}", byte)?
                    }
                }
                Ok(())
            }
        }
    }
}

impl<W> WriteTo<W> for VariableLengthEnum
where
    W: Write + ?Sized,
{
    type WriteError = VariableLengthEnumError;
    /// Writes a variable length enum to the given writer according to the [FEF specification](https://github.com/jiricekcz/fef-specification/blob/main/binary_types/Variable%20Length%20Enum.md).
    ///
    /// # Examples
    ///
    /// With a single byte value:
    /// ```rust
    /// # use fef::v0::raw::VariableLengthEnum;
    /// # use fef::v0::traits::WriteTo;
    /// # use std::io::Write;
    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
    /// let variable_length_enum = VariableLengthEnum::from(63);
    /// let mut file = Vec::new();
    /// variable_length_enum.write_to(&mut file, &fef::v0::config::OverridableConfig::default())?;
    /// assert_eq!(file, vec![0x3F]);
    /// # Ok(())
    /// # }
    /// ```
    /// With a multi-byte value:
    /// ```rust
    /// # use fef::v0::raw::VariableLengthEnum;
    /// # use fef::v0::traits::WriteTo;
    /// # use std::io::Write;
    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
    /// let variable_length_enum = VariableLengthEnum::from(16385);
    /// let mut file = Vec::new();
    /// variable_length_enum.write_to(&mut file, &fef::v0::config::OverridableConfig::default())?;
    /// assert_eq!(file, vec![0x81, 0x80, 0x01]);
    /// # Ok(())
    /// # }
    /// ```
    fn write_to<C: ?Sized + Config>(
        &self,
        writer: &mut W,
        _configuration: &C,
    ) -> Result<(), Self::WriteError> {
        match &self.value {
            VariableLengthEnumStorage::U64(u64_value) => {
                let value = *u64_value;

                let digits = if value == 0 { 1 } else { value.ilog2() + 1 }; // Number of digits in the value
                let byte_count = digits.div_ceil(7); // 7 bits per byte

                let mut bytes = Vec::with_capacity(byte_count as usize);

                // If best-case performance would be an issue, it is possible to write directly to the writer without allocating a Vec<u8>.
                // However this implementation significantly improves performance in cases, when non-buffering writers are used and penalty
                // for allocating an at most 8 byte Vec<u8> should be negligible.

                for byte_index in (1..byte_count).rev() {
                    // Write all non-least significant bytes with the leading bit set
                    let byte_value = (value >> (byte_index * 7)) & 0x7F | 0x80;
                    bytes.push(byte_value as u8);
                }

                bytes.push((value & 0x7F) as u8); // Write the least significant byte without the leading bit

                writer.write_all(&bytes)?;
            }
            VariableLengthEnumStorage::Overflow(byte_vec) => {
                writer.write_all(&byte_vec)?;
            }
        }
        Ok(())
    }
}

impl VariableLengthEnum {
    pub(crate) fn min_byte_length_of_usize(value: usize) -> usize {
        let digits = if value > 0 { value.ilog2() + 1 } else { 1 }; // Number of digits in the value
        let byte_count = digits.div_ceil(7); // 7 bits per byte
        byte_count as usize
    }
    pub(crate) fn min_byte_length(&self) -> usize {
        match &self.value {
            VariableLengthEnumStorage::U64(u64_value) => {
                let value = *u64_value;

                let digits = if value > 0 { value.ilog2() + 1 } else { 1 }; // Number of digits in the value
                let byte_count = digits.div_ceil(7); // 7 bits per byte
                byte_count as usize
            }
            VariableLengthEnumStorage::Overflow(byte_vec) => byte_vec.len(),
        }
    }
}