tinyklv 0.1.2

The simplest Key-Length-Value (KLV) framework in Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
//! Basic Encoding Rules (BER) length and OID codecs
//!
//! Provides the [`BerLength`] and [`BerOid`] types along with the sealed
//! [`OfBerCommon`] trait that constrains their type parameter to the
//! fixed-width unsigned integers (`u8`..=`u128`).
//!
//! BER length encoding is used in the KLV "L" field. Short form encodes
//! lengths 0..=127 in a single byte. Long form encodes larger lengths as
//! `0x80 | num_bytes` followed by `num_bytes` big-endian bytes.
//!
//! BER-OID encoding is used for the KLV "K" (key) field. Each value is
//! packed 7 bits per byte, MSB-first, with the MSB of each byte indicating
//! whether more bytes follow (continuation = 1, terminator = 0).
//!
//! Free-function wrappers for direct use in `#[klv(...)]` attributes are
//! in [`dec`] and [`enc`].
//!
//! Author: aav
// --------------------------------------------------
// mods
// --------------------------------------------------
pub mod dec;
pub mod enc;

// --------------------------------------------------
// local
// --------------------------------------------------
use crate::prelude::*;

// --------------------------------------------------
// external
// --------------------------------------------------
use num_traits::{
    bounds::UpperBounded, AsPrimitive, FromPrimitive, ToBytes, ToPrimitive, Unsigned,
};
use winnow::error::ParserError;
use winnow::token::{take, take_while};

/// Sealed marker restricting [`OfBerCommon`] to the fixed-width unsigned
/// integer types, so external crates cannot add their own implementations
mod private {
    pub trait Sealed {}
    impl Sealed for u8 {}
    impl Sealed for u16 {}
    impl Sealed for u32 {}
    impl Sealed for u64 {}
    impl Sealed for u128 {}
    impl Sealed for usize {}
}

/// The set of unsigned integer types usable as a BER length or BER-OID value.
///
/// This is a sealed trait: it is blanket-implemented for the fixed-width
/// unsigned integers (`u8`..=`u128`) and cannot be implemented downstream
pub trait OfBerCommon:
    Copy
    + ToBytes
    + Unsigned
    + UpperBounded
    + PartialOrd
    + ToPrimitive
    + FromPrimitive
    + AsPrimitive<u128>
    + private::Sealed
{
}
impl<T> OfBerCommon for T where
    T: Copy
        + ToBytes
        + Unsigned
        + UpperBounded
        + PartialOrd
        + ToPrimitive
        + FromPrimitive
        + AsPrimitive<u128>
        + private::Sealed
{
}

#[derive(Debug, PartialEq)]
/// Enum representing Basic-Encoding-Rules (BER) Length Encoding.
///
/// Maximum precision: [`u128`]
///
/// * See: [https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1563-0-200204-S!!PDF-E.pdf](https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1563-0-200204-S!!PDF-E.pdf)
/// * See: [https://upload.wikimedia.org/wikipedia/commons/1/19/MISB_Standard_0601.pdf](https://upload.wikimedia.org/wikipedia/commons/1/19/MISB_Standard_0601.pdf) page 7
///
/// # Example
///
/// ```
/// use tinyklv::prelude::*;
/// use tinyklv::codecs::ber::BerLength;
///
/// assert_eq!(vec![128 + 3, 129, 182, 2], BerLength::new(8_500_738_u32).encode_value());
/// assert_eq!(BerLength::new(8_500_738_u32), BerLength::decode_value(&mut &vec![128 + 3, 129, 182, 2][..]).unwrap());
/// ```
pub enum BerLength<T: OfBerCommon> {
    Short(u8),
    Long(T),
}
/// [`BerLength`] implementation
impl<T: OfBerCommon> BerLength<T> {
    #[inline(always)]
    /// Returns `true` when `val` can be represented in BER short form
    ///
    /// BER short form requires the value to be strictly less than 128 (0x80),
    /// so that the single length byte has its MSB clear. Values >= 128 must
    /// use long form encoding
    fn can_be_short(val: &T) -> bool {
        #![allow(
            clippy::expect_used,
            reason = "this should never panic, due to trait bounds"
        )]
        val < &T::from_u8(0x80).expect(
            "converting 128 -> u{8,16,32,64,128} should always be permissible, why did this panic?",
        )
    }

    /// Creates a new [`BerLength`] from an unsigned integer, choosing the correct BER form
    ///
    /// Inspects `len` against the short-form threshold (< 128). Values that fit
    /// in short form are stored as [`BerLength::Short`]; all others as
    /// [`BerLength::Long`]. This is the canonical constructor used by both
    /// [`BerLength::encode_value`] and the [`crate::traits::EncodeValue`] impl.
    ///
    /// # Arguments
    ///
    /// * `len` - The unsigned integer length to wrap
    ///
    /// # Returns
    ///
    /// A [`BerLength`] in either `Short` or `Long` variant depending on the magnitude of `len`
    ///
    /// # Example
    ///
    /// ```rust
    /// use tinyklv::codecs::ber::BerLength;
    ///
    /// assert!(matches!(BerLength::new(47_u32), BerLength::Short(47)));
    /// assert!(matches!(BerLength::new(200_u32), BerLength::Long(200)));
    /// ```
    ///
    /// # Safety
    ///
    /// Uses `expect` internally when converting the value to `u8` for short form;
    /// this cannot panic because the value is verified to be < 128 before the cast,
    /// which is always representable as `u8`
    pub fn new(len: T) -> Self {
        if Self::can_be_short(&len) { BerLength::Short(len.to_u8().expect("if unsigned int is less than 128, then it can always fit into u8, why did this panic?")) } else { BerLength::Long(len) }
    }

    /// Convenience static entry point: constructs a [`BerLength`] and immediately encodes it
    ///
    /// Equivalent to `BerLength::new(len).encode_value()`. Useful when you need
    /// the encoded bytes without retaining the wrapper struct.
    ///
    /// # Arguments
    ///
    /// * `len` - The length value to encode
    ///
    /// # Returns
    ///
    /// A [`Vec<u8>`] containing the BER-encoded length
    ///
    /// # Example
    ///
    /// ```rust
    /// use tinyklv::codecs::ber::BerLength;
    ///
    /// assert_eq!(BerLength::encode_value(47_u64), vec![47]);
    /// assert_eq!(BerLength::encode_value(201_u64), vec![128 + 1, 201]);
    /// ```
    #[must_use]
    pub fn encode_value(len: T) -> Vec<u8> {
        Self::new(len).encode_value()
    }

    /// Returns the wrapped length value as a [`u128`], regardless of which BER form was used
    ///
    /// Both `Short` and `Long` variants are widened to [`u128`] without loss.
    /// Used by [`crate::codecs::ber::dec::ber_length`] to produce a
    /// uniform [`usize`] for `take(len)` calls.
    pub fn as_u128(&self) -> u128 {
        match self {
            BerLength::Short(len) => *len as u128,
            BerLength::Long(len) => len.as_(),
        }
    }
}
/// [`BerLength`] implementation of [`EncodeValue`]
impl<T: OfBerCommon> crate::EncodeValue<Vec<u8>> for BerLength<T> {
    /// Encode a [`BerLength`] into a [`Vec<u8>`]
    ///
    /// # Example
    ///
    /// ```
    /// use tinyklv::prelude::*;
    /// use tinyklv::codecs::ber::BerLength;
    ///
    /// let value0 = BerLength::new(47_u64);
    /// let value1 = BerLength::new(201_u64);
    /// let value2 = BerLength::new(123891829038102_u64);
    ///
    /// assert_eq!(value0.encode_value(), vec![47]);
    /// assert_eq!(value1.encode_value(), vec![128 + 1, 201]);
    /// assert_eq!(value2.encode_value(), vec![128 + 6, 112, 173, 208, 117, 220, 22]);
    ///
    /// // Can also directly encode:
    /// let value0_encoded = BerLength::encode_value(47_u64);
    /// let value1_encoded = BerLength::encode_value(201_u64);
    ///
    /// assert_eq!(value0_encoded, vec![47]);
    /// assert_eq!(value1_encoded, vec![128 + 1, 201]);
    /// ```
    fn encode_value(&self) -> Vec<u8> {
        match self {
            BerLength::Short(len) => vec![*len],
            BerLength::Long(len) => {
                // --------------------------------------------------
                // Edge case: If the length fits within a single byte, use the Short form.
                // --------------------------------------------------
                // This should never happen: upon creation, length is checked to be < 128
                // --------------------------------------------------
                if Self::can_be_short(len) {
                    #[allow(
                        clippy::expect_used,
                        reason = "this should never panic, due to trait bounds"
                    )]
                    return vec![len.to_u8().expect("if unsigned int is less than 128, then it can always fit into u8, why did this panic?")];
                }
                // --------------------------------------------------
                // skip leading zeroes
                // --------------------------------------------------
                let mut encoded = len
                    .to_be_bytes()
                    .as_ref()
                    .iter()
                    .skip_while(|&&b| b == 0)
                    .copied()
                    .collect::<Vec<u8>>();
                // --------------------------------------------------
                // prefix byte with MSB set to 1, followed by the length
                // --------------------------------------------------
                let prefix = 0b1000_0000 | (encoded.len() as u8);
                // --------------------------------------------------
                // prepend the prefix byte and return
                // --------------------------------------------------
                let mut result = Vec::with_capacity(encoded.len() + 1);
                result.push(prefix);
                result.append(&mut encoded);
                result
            }
        }
    }
}
/// [`BerLength`] implementation of [`crate::traits::DecodeValue`]
impl<T: OfBerCommon> crate::DecodeValue<&[u8]> for BerLength<T> {
    /// Decode a [`BerLength`] from a [`&[u8]`]
    ///
    /// # Example
    ///
    /// ```
    /// use tinyklv::prelude::*;
    /// use tinyklv::codecs::ber::BerLength;
    ///
    /// let value0 = vec![47];
    /// let value1 = vec![128 + 1, 201];
    /// let value2 = vec![128 + 6, 112, 173, 208, 117, 220, 22];
    ///
    /// assert_eq!(BerLength::decode_value(&mut &value0[..]).unwrap(), BerLength::new(47_u64));
    /// assert_eq!(BerLength::decode_value(&mut &value1[..]).unwrap(), BerLength::new(201_u64));
    /// assert_eq!(BerLength::decode_value(&mut &value2[..]).unwrap(), BerLength::new(123891829038102_u64));
    /// ```
    fn decode_value(input: &mut &[u8]) -> crate::Result<Self> {
        let checkpoint = input.checkpoint();
        // --------------------------------------------------
        // err if no bytes
        // --------------------------------------------------
        let first_byte = take_one(input)?;
        let first_byte = first_byte[0];
        // --------------------------------------------------
        // if MSB is not set, it's a short length (single byte)
        // --------------------------------------------------
        if first_byte & 0x80 == 0 {
            return Ok(BerLength::Short(first_byte));
        }
        // --------------------------------------------------
        // extract the number of bytes used for length encoding
        // --------------------------------------------------
        let num_bytes = (first_byte & 0x7F) as usize;
        // --------------------------------------------------
        // ensure there are enough bytes in the stream
        // --------------------------------------------------
        // since 1 was taken from input, this should be
        // `input.len() + 1 < num_bytes + 1`
        // but can be shortened
        // --------------------------------------------------
        if input.len() < num_bytes {
            return Err(winnow::error::ContextError::from_input(input)
                .add_context(
                    input,
                    &checkpoint,
                    winnow::error::StrContext::Label("BER length value"),
                )
                .add_context(
                    input,
                    &checkpoint,
                    winnow::error::StrContext::Expected(
                        winnow::error::StrContextValue::Description(
                            "enough bytes in stream for length encoding",
                        ),
                    ),
                ));
        }
        // --------------------------------------------------
        // decode the length from the specified number of bytes
        // --------------------------------------------------
        let Some(output) = T::from_u128(parse_length_u128(input, num_bytes)?) else {
            return Err(winnow::error::ContextError::from_input(input)
                .add_context(
                    input,
                    &checkpoint,
                    winnow::error::StrContext::Label("BER length value"),
                )
                .add_context(
                    input,
                    &checkpoint,
                    winnow::error::StrContext::Expected(
                        winnow::error::StrContextValue::Description("less than u128::MAX"),
                    ),
                ));
        };
        Ok(BerLength::Long(output))
    }
}

#[derive(Debug, PartialEq)]
/// Struct representing Basic Encoding Rules (BER) Object Identifier (OID) encoding.
///
/// Maximum precision: [`u128`]
///
/// * See: [https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1563-0-200204-S!!PDF-E.pdf](https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.1563-0-200204-S!!PDF-E.pdf)
/// * See: [https://upload.wikimedia.org/wikipedia/commons/1/19/MISB_Standard_0601.pdf](https://upload.wikimedia.org/wikipedia/commons/1/19/MISB_Standard_0601.pdf) page 7
///
/// # Example
///
/// ```
/// use tinyklv::prelude::*;
/// use tinyklv::codecs::ber::BerOid;
///
/// assert_eq!(vec![129, 182, 2], BerOid::encode_value(23298_u64));
/// assert_eq!(23298_u64, BerOid::decode_value(&mut &vec![129, 182, 2][..]).unwrap().value());
/// ```
pub struct BerOid<T: OfBerCommon> {
    value: T,
}
/// [`BerOid`] implementation
impl<T: OfBerCommon> BerOid<T> {
    /// Wraps an unsigned integer in a [`BerOid`] newtype
    ///
    /// This is the canonical constructor used by both [`BerOid::encode_value`]
    /// and the [`crate::traits::DecodeValue`] impl. The value is stored
    /// verbatim; no encoding happens at construction time.
    ///
    /// # Arguments
    ///
    /// * `value` - The unsigned integer OID value to wrap
    ///
    /// # Example
    ///
    /// ```rust
    /// use tinyklv::codecs::ber::BerOid;
    ///
    /// let oid = BerOid::new(23298_u64);
    /// assert_eq!(oid.value(), 23298_u64);
    /// ```
    pub fn new(value: T) -> Self {
        Self { value }
    }

    /// Returns the wrapped OID value by copy
    ///
    /// Since `T: OfBerCommon` implies `Copy`, this returns the value without
    /// moving or cloning.
    #[must_use]
    pub fn value(&self) -> T {
        self.value
    }

    /// Convenience static entry point: wraps a value in [`BerOid`] and immediately encodes it
    ///
    /// Equivalent to `BerOid::new(value).encode_value()`. Useful when you need
    /// the BER-OID bytes without retaining the wrapper struct.
    ///
    /// # Arguments
    ///
    /// * `value` - The unsigned integer value to encode
    ///
    /// # Returns
    ///
    /// A [`Vec<u8>`] containing the BER-OID-encoded representation of the value
    ///
    /// # Example
    ///
    /// ```rust
    /// use tinyklv::codecs::ber::BerOid;
    ///
    /// assert_eq!(BerOid::encode_value(23298_u64), vec![129, 182, 2]);
    /// ```
    #[must_use]
    pub fn encode_value(value: T) -> Vec<u8> {
        Self::new(value).encode_value()
    }
}
/// [`BerOid`] implementation of [`crate::traits::EncodeValue`]
impl<T: OfBerCommon> crate::EncodeValue<Vec<u8>> for BerOid<T> {
    /// Encode a [`BerOid`] into a [`Vec<u8>`]
    ///
    /// # Example
    ///
    /// ```
    /// use tinyklv::prelude::*;
    /// use tinyklv::codecs::ber::BerOid;
    ///
    /// assert_eq!(vec![129, 182, 2], BerOid::encode_value(23298_u64));
    /// ```
    ///
    /// Please use [`crate::codecs::ber::enc::ber_oid`] instead for
    /// all parsing needs. This struct is meant to be used as a development
    /// tool for encoding values to BER format.
    fn encode_value(&self) -> Vec<u8> {
        let mut output = Vec::new();
        let mut value = self.value.as_();
        let mut first_byte = true;
        while value > 0 {
            // --------------------------------------------------
            // extract 7 bits at a time
            // --------------------------------------------------
            let byte = (value & 0x7F) as u8;
            value >>= 7;
            if first_byte {
                first_byte = false;
                output.push(byte);
            } else { output.push(byte | 0x80) }
        }
        output.reverse();
        output
    }
}
/// [`BerOid`] implementation of [`crate::traits::DecodeValue`]
impl<T: OfBerCommon> crate::DecodeValue<&[u8]> for BerOid<T> {
    /// Decode a [`BerOid`] from a [`&[u8]`]
    ///
    /// # Example
    ///
    /// ```
    /// use tinyklv::prelude::*;
    /// use tinyklv::codecs::ber::BerOid;
    ///
    /// assert_eq!(23298_u64, BerOid::decode_value(&mut &vec![129, 182, 2][..]).unwrap().value());
    /// ```
    ///
    /// Please use [`crate::codecs::ber::dec::ber_oid`] instead for
    /// all parsing needs. This struct is meant to be used as a development
    /// tool for parsing BER encoded values.
    fn decode_value(input: &mut &[u8]) -> crate::Result<Self> {
        let checkpoint = input.checkpoint();
        // --------------------------------------------------
        // BER-OID grammar: `(msb-set)* (msb-unset)`
        //   - zero or more continuation bytes with MSB = 1
        //   - exactly one terminator byte with MSB = 0
        // --------------------------------------------------
        let prefix: &[u8] = take_while(0.., msb_is_set)
            .context(winnow::error::StrContext::Label("BER-OID continuation bytes"))
            .parse_next(input)?;
        let terminator = winnow::binary::be_u8
            .context(winnow::error::StrContext::Label(
                "BER-OID missing terminator byte (MSB unset) at end of input",
            ))
            .parse_next(input)?;
        // --------------------------------------------------
        // accumulate 7 bits per byte, prefix first then terminator
        // --------------------------------------------------
        let output = prefix
            .iter()
            .copied()
            .chain(std::iter::once(terminator))
            .fold(0u128, |acc, b| (acc << 7) | (b & 0x7F) as u128);
        let Some(output) = T::from_u128(output) else {
            return Err(winnow::error::ContextError::from_input(input)
                .add_context(
                    input,
                    &checkpoint,
                    winnow::error::StrContext::Label("BER-OID value"),
                )
                .add_context(
                    input,
                    &checkpoint,
                    winnow::error::StrContext::Expected(
                        winnow::error::StrContextValue::Description("less than u128::MAX"),
                    ),
                ));
        };
        Ok(BerOid::new(output))
    }
}

#[inline(always)]
/// Consumes exactly one byte from the input and returns it as a 1-element slice
///
/// Used at the start of both [`BerLength`] and (implicitly) the BER-OID decoder
/// to read the first framing byte before branching on short vs long form.
/// Returns an error if the stream is empty.
fn take_one<'s>(input: &mut &'s [u8]) -> crate::Result<&'s [u8]> {
    take(1usize).parse_next(input)
}

#[inline(always)]
/// Returns `true` when the most-significant bit of `b` is set (i.e. `b >= 0x80`)
///
/// Used as the predicate for `take_while` in [`BerOid`]'s decoder to collect
/// all continuation bytes before the final terminator byte
fn msb_is_set(b: u8) -> bool {
    (b & 0x80) != 0
}

#[inline(always)]
/// Consumes `num_bytes` from the input and combines them into a big-endian [`u128`]
///
/// Each byte is shifted into the accumulator from the right: the first byte
/// becomes the most significant. Called from [`BerLength`]'s decoder after the
/// long-form prefix byte has already been consumed and its `num_bytes` field
/// extracted. Returns an error if fewer than `num_bytes` remain in the stream.
fn parse_length_u128(input: &mut &[u8], num_bytes: usize) -> crate::Result<u128> {
    take(num_bytes)
        .map(|bytes: &[u8]| {
            bytes
                .iter()
                .fold(0u128, |acc, &byte| (acc << 8) | byte as u128)
        })
        .parse_next(input)
}