Skip to main content

mlt_core/frames/v01/stream/
logical.rs

1use std::fmt;
2use std::fmt::Debug;
3use std::iter::repeat_n;
4
5use num_traits::{PrimInt, ToPrimitive as _};
6
7use crate::MltError::{ParsingLogicalTechnique, RleRunLenInvalid, UnsupportedLogicalEncoding};
8use crate::codecs::morton::{decode_morton_codes, decode_morton_delta};
9use crate::codecs::rle::encode_rle;
10use crate::codecs::zigzag::{
11    decode_componentwise_delta_vec2s, decode_zigzag, decode_zigzag_delta, encode_zigzag,
12    encode_zigzag_delta,
13};
14use crate::errors::{AsMltError as _, fail_if_invalid_stream_size};
15use crate::utils::AsUsize as _;
16use crate::v01::{LogicalEncoding, LogicalTechnique, LogicalValue, RleMeta, StreamMeta};
17use crate::{Decoder, MltError, MltResult};
18
19/// RLE-encode a sequence into `[run-lengths | unique-values]` and return the matching `RleMeta`.
20/// `num_logical` is the expanded output length (stored in `RleMeta::num_rle_values`).
21fn apply_rle<T: PrimInt + Debug>(
22    data: &[T],
23    num_logical: usize,
24) -> Result<(Vec<T>, RleMeta), MltError> {
25    let (runs_vec, vals_vec) = encode_rle(data);
26    let meta = RleMeta {
27        runs: u32::try_from(runs_vec.len())?,
28        num_rle_values: u32::try_from(num_logical)?,
29    };
30    let mut combined = runs_vec;
31    combined.extend(vals_vec);
32    Ok((combined, meta))
33}
34
35impl RleMeta {
36    /// Decode RLE (Run-Length Encoding) data.
37    /// Charges the decoder for the expanded output allocation.
38    pub fn decode<T: PrimInt + Debug>(
39        self,
40        data: &[T],
41        dec: &mut Decoder,
42    ) -> Result<Vec<T>, MltError> {
43        let expected_len = self.runs.as_usize().checked_mul(2).or_overflow()?;
44        fail_if_invalid_stream_size(data.len(), expected_len)?;
45
46        let (run_lens, values) = data.split_at(self.runs.as_usize());
47        fail_if_invalid_stream_size(self.num_rle_values, Self::calc_size(run_lens)?)?;
48
49        let alloc_size = self.num_rle_values.as_usize();
50        let mut result = dec.alloc(alloc_size)?;
51        for (&run_len, &val) in run_lens.iter().zip(values.iter()) {
52            let run = run_len
53                .to_usize()
54                .ok_or_else(|| RleRunLenInvalid(run_len.to_i128().unwrap_or_default()))?;
55            result.extend(repeat_n(val, run));
56        }
57        dec.adjust_alloc(&result, alloc_size);
58        Ok(result)
59    }
60
61    fn calc_size<T: PrimInt + Debug>(run_lens: &[T]) -> MltResult<u32> {
62        run_lens
63            .iter()
64            .try_fold(T::zero(), |a, v| a.checked_add(v))
65            .and_then(|v| v.to_u32())
66            .ok_or_else(|| RleRunLenInvalid(run_lens.len().to_i128().unwrap_or_default()))
67    }
68}
69
70impl LogicalTechnique {
71    pub fn parse(value: u8) -> MltResult<Self> {
72        Self::try_from(value).or(Err(ParsingLogicalTechnique(value)))
73    }
74}
75
76impl Debug for LogicalEncoding {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        match self {
79            Self::None => write!(f, "None"),
80            Self::Delta => write!(f, "Delta"),
81            Self::ComponentwiseDelta => write!(f, "ComponentwiseDelta"),
82            Self::PseudoDecimal => write!(f, "PseudoDecimal"),
83            Self::DeltaRle(v) => write!(f, "DeltaRle({v:?})"),
84            Self::Rle(v) => write!(f, "Rle({v:?})"),
85            Self::Morton(v) => write!(f, "Morton({v:?})"),
86            Self::MortonDelta(v) => write!(f, "MortonDelta({v:?})"),
87            Self::MortonRle(v) => write!(f, "MortonRle({v:?})"),
88        }
89    }
90}
91
92impl LogicalValue {
93    #[must_use]
94    pub fn new(meta: StreamMeta) -> Self {
95        Self { meta }
96    }
97
98    /// Logically decode `data` (physically decoded u32 words) into `Vec<i32>`.
99    ///
100    /// Never called for `LogicalEncoding::None` — that case is handled directly
101    /// in the bridge (physical buffer decoded into a fresh output Vec).
102    pub fn decode_i32(self, data: &[u32], dec: &mut Decoder) -> Result<Vec<i32>, MltError> {
103        match self.meta.encoding.logical {
104            LogicalEncoding::None => decode_zigzag(data, dec),
105            LogicalEncoding::Rle(rle) => decode_zigzag(&rle.decode(data, dec)?, dec),
106            LogicalEncoding::ComponentwiseDelta => decode_componentwise_delta_vec2s(data, dec),
107            LogicalEncoding::Delta => decode_zigzag_delta::<i32, _>(data, dec),
108            LogicalEncoding::DeltaRle(rle) => {
109                let expanded = rle.decode(data, dec)?;
110                decode_zigzag_delta::<i32, _>(&expanded, dec)
111            }
112            LogicalEncoding::Morton(meta) => decode_morton_codes(data, meta, dec),
113            LogicalEncoding::MortonDelta(meta) => decode_morton_delta(data, meta, dec),
114            LogicalEncoding::MortonRle(_) => Err(UnsupportedLogicalEncoding(
115                self.meta.encoding.logical,
116                "i32 (MortonRle)",
117            )),
118            LogicalEncoding::PseudoDecimal => Err(UnsupportedLogicalEncoding(
119                self.meta.encoding.logical,
120                "i32",
121            )),
122        }
123    }
124
125    /// Logically decode `data` (physically decoded u32 words) into `Vec<u32>`.
126    ///
127    /// Not called for `LogicalEncoding::None` — that case is handled entirely
128    /// in the bridge (physical buffer decoded directly into the output Vec).
129    pub fn decode_u32(self, data: &[u32], dec: &mut Decoder) -> Result<Vec<u32>, MltError> {
130        let num = self.meta.num_values.as_usize();
131        match self.meta.encoding.logical {
132            LogicalEncoding::None => {
133                // Caller should have used the direct-output path; this is a fallback.
134                dec.consume_items::<u32>(num)?;
135                Ok(data.to_vec())
136            }
137            LogicalEncoding::Rle(rle) => rle.decode(data, dec),
138            LogicalEncoding::Delta => decode_zigzag_delta::<i32, _>(data, dec),
139            LogicalEncoding::DeltaRle(rle) => {
140                decode_zigzag_delta::<i32, _>(&rle.decode(data, dec)?, dec)
141            }
142            _ => Err(UnsupportedLogicalEncoding(
143                self.meta.encoding.logical,
144                "u32",
145            )),
146        }
147    }
148
149    /// Logically decode `data` (physically decoded u64 words) into `Vec<i64>`.
150    ///
151    /// Never called for `LogicalEncoding::None` — that case is handled directly
152    /// in the bridge (physical buffer decoded into a fresh output Vec).
153    pub fn decode_i64(self, data: &[u64], dec: &mut Decoder) -> Result<Vec<i64>, MltError> {
154        match self.meta.encoding.logical {
155            LogicalEncoding::None => decode_zigzag(data, dec),
156            LogicalEncoding::Delta => decode_zigzag_delta::<i64, _>(data, dec),
157            LogicalEncoding::DeltaRle(rle) => {
158                let expanded = rle.decode(data, dec)?;
159                decode_zigzag_delta::<i64, _>(&expanded, dec)
160            }
161            LogicalEncoding::Rle(rle) => {
162                // rle.decode() charges for expanded u64 vec; decode_zigzag charges for i64 vec
163                let expanded = rle.decode(data, dec)?;
164                decode_zigzag(&expanded, dec)
165            }
166            _ => Err(UnsupportedLogicalEncoding(
167                self.meta.encoding.logical,
168                "i64",
169            )),
170        }
171    }
172
173    /// Logically decode `data` (physically decoded u64 words) into `Vec<u64>`.
174    ///
175    /// Not called for `LogicalEncoding::None` — that case is handled entirely
176    /// in the bridge (physical buffer decoded directly into the output Vec).
177    pub fn decode_u64(self, data: &[u64], dec: &mut Decoder) -> Result<Vec<u64>, MltError> {
178        let num = self.meta.num_values.as_usize();
179        match self.meta.encoding.logical {
180            LogicalEncoding::None => {
181                // Caller should have used the direct-output path; this is a fallback.
182                dec.consume_items::<u64>(num)?;
183                Ok(data.to_vec())
184            }
185            LogicalEncoding::Rle(rle) => rle.decode(data, dec),
186            LogicalEncoding::Delta => decode_zigzag_delta::<i64, _>(data, dec),
187            LogicalEncoding::DeltaRle(rle) => {
188                let expanded = rle.decode(data, dec)?;
189                decode_zigzag_delta::<i64, _>(&expanded, dec)
190            }
191            _ => Err(UnsupportedLogicalEncoding(
192                self.meta.encoding.logical,
193                "u64",
194            )),
195        }
196    }
197}
198
199#[derive(Debug, Eq, PartialEq, Clone, Copy, Default, strum::EnumIter)]
200#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
201#[cfg_attr(all(not(test), feature = "arbitrary"), derive(arbitrary::Arbitrary))]
202pub enum LogicalEncoder {
203    #[default]
204    None,
205    Delta,
206    DeltaRle,
207    Rle,
208    // FIXME: add more of the LogicalEncoding strategies
209}
210impl LogicalEncoder {
211    /// Logically encode `u32` values, returning the physically-stored sequence and the concrete decoder.
212    ///
213    /// [`LogicalEncoding`] is derived from the actual data.
214    /// See [`LogicalValue::decode_u32`] for the reverse operation.
215    pub fn encode_u32s(self, values: &[u32]) -> Result<(Vec<u32>, LogicalEncoding), MltError> {
216        match self {
217            Self::None => Ok((values.to_vec(), LogicalEncoding::None)),
218            Self::Delta => {
219                let values = values.iter().map(|&v| v.cast_signed()).collect::<Vec<_>>();
220                let u32s = encode_zigzag_delta(&values);
221                Ok((u32s, LogicalEncoding::Delta))
222            }
223            Self::Rle => {
224                let (u32s, meta) = apply_rle(values, values.len())?;
225                Ok((u32s, LogicalEncoding::Rle(meta)))
226            }
227            Self::DeltaRle => {
228                let values = values.iter().map(|&v| v.cast_signed()).collect::<Vec<_>>();
229                let delta = encode_zigzag_delta(&values);
230                let (u32s, meta) = apply_rle(&delta, values.len())?;
231                Ok((u32s, LogicalEncoding::DeltaRle(meta)))
232            }
233        }
234    }
235
236    /// Logically encode `i32` values into the `u32` physical representation.
237    ///
238    /// [`LogicalEncoding`] is derived from the actual data.
239    /// See [`LogicalValue::decode_i32`] for the reverse operation.
240    pub fn encode_i32s(self, values: &[i32]) -> Result<(Vec<u32>, LogicalEncoding), MltError> {
241        match self {
242            Self::None => Ok((encode_zigzag(values), LogicalEncoding::None)),
243            Self::Delta => Ok((encode_zigzag_delta(values), LogicalEncoding::Delta)),
244            Self::Rle => {
245                let (u32s, meta) = apply_rle(&encode_zigzag(values), values.len())?;
246                Ok((u32s, LogicalEncoding::Rle(meta)))
247            }
248            Self::DeltaRle => {
249                let (u32s, meta) = apply_rle(&encode_zigzag_delta(values), values.len())?;
250                Ok((u32s, LogicalEncoding::DeltaRle(meta)))
251            }
252        }
253    }
254
255    /// Logically encode `u64` values into the `u64` physical representation.
256    ///
257    /// [`LogicalEncoding`] is derived from the actual data.
258    /// See [`LogicalValue::decode_u64`] for the reverse operation.
259    pub fn encode_u64s(self, values: &[u64]) -> Result<(Vec<u64>, LogicalEncoding), MltError> {
260        match self {
261            Self::None => Ok((values.to_vec(), LogicalEncoding::None)),
262            Self::Delta => Ok((
263                encode_zigzag_delta(&values.iter().map(|&v| v.cast_signed()).collect::<Vec<_>>()),
264                LogicalEncoding::Delta,
265            )),
266            Self::Rle => {
267                let (u64s, meta) = apply_rle(values, values.len())?;
268                Ok((u64s, LogicalEncoding::Rle(meta)))
269            }
270            Self::DeltaRle => {
271                let delta = encode_zigzag_delta(
272                    &values.iter().map(|&v| v.cast_signed()).collect::<Vec<_>>(),
273                );
274                let (u64s, meta) = apply_rle(&delta, values.len())?;
275                Ok((u64s, LogicalEncoding::DeltaRle(meta)))
276            }
277        }
278    }
279
280    /// Logically encode `i64` values into the `u64` physical representation.
281    ///
282    /// [`LogicalEncoding`] is derived from the actual data.
283    /// See [`LogicalValue::decode_i64`] for the reverse operation.
284    pub fn encode_i64s(self, values: &[i64]) -> Result<(Vec<u64>, LogicalEncoding), MltError> {
285        match self {
286            Self::None => Ok((encode_zigzag(values), LogicalEncoding::None)),
287            Self::Delta => Ok((encode_zigzag_delta(values), LogicalEncoding::Delta)),
288            Self::Rle => {
289                let (u64s, meta) = apply_rle(&encode_zigzag(values), values.len())?;
290                Ok((u64s, LogicalEncoding::Rle(meta)))
291            }
292            Self::DeltaRle => {
293                let (u64s, meta) = apply_rle(&encode_zigzag_delta(values), values.len())?;
294                Ok((u64s, LogicalEncoding::DeltaRle(meta)))
295            }
296        }
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use proptest::prelude::*;
303
304    use super::*;
305    use crate::MltError::InvalidDecodingStreamSize;
306    use crate::test_helpers::dec;
307    use crate::v01::{DictionaryType, IntEncoding, PhysicalEncoding, StreamType};
308
309    fn make_meta(logical_encoding: LogicalEncoding, num_values: usize) -> StreamMeta {
310        let num_values =
311            u32::try_from(num_values).expect("proptest to not generate that large of a vec");
312        StreamMeta::new(
313            StreamType::Data(DictionaryType::None),
314            IntEncoding::new(logical_encoding, PhysicalEncoding::None),
315            num_values,
316        )
317    }
318
319    proptest! {
320        #[test]
321        fn test_u32_logical_roundtrip(
322            values in prop::collection::vec(any::<u32>(), 0..100),
323            logical in any::<LogicalEncoder>(),
324        ) {
325            let (encoded, computed) = logical.encode_u32s(&values).unwrap();
326            let meta = make_meta(computed, values.len());
327            let decoded = LogicalValue::new(meta).decode_u32(&encoded, &mut dec()).unwrap();
328            prop_assert_eq!(decoded, values);
329        }
330
331        #[test]
332        fn test_i32_logical_roundtrip(
333            values in prop::collection::vec(any::<i32>(), 0..100),
334            logical in any::<LogicalEncoder>(),
335        ) {
336            let (encoded, computed) = logical.encode_i32s(&values).unwrap();
337            let meta = make_meta(computed, values.len());
338            let decoded = LogicalValue::new(meta).decode_i32(&encoded, &mut dec()).unwrap();
339            prop_assert_eq!(decoded, values);
340        }
341
342        #[test]
343        fn test_u64_logical_roundtrip(
344            values in prop::collection::vec(any::<u64>(), 0..100),
345            logical in any::<LogicalEncoder>(),
346        ) {
347            let (encoded, computed) = logical.encode_u64s(&values).unwrap();
348            let meta = make_meta(computed, values.len());
349            let decoded = LogicalValue::new(meta).decode_u64(&encoded, &mut dec()).unwrap();
350            prop_assert_eq!(decoded, values);
351        }
352
353        #[test]
354        fn test_i64_logical_roundtrip(
355            values in prop::collection::vec(any::<i64>(), 0..100),
356            logical in any::<LogicalEncoder>(),
357        ) {
358            let (encoded, computed) = logical.encode_i64s(&values).unwrap();
359            let meta = make_meta(computed, values.len());
360            let decoded = LogicalValue::new(meta).decode_i64(&encoded, &mut dec()).unwrap();
361            prop_assert_eq!(decoded, values);
362        }
363    }
364
365    #[test]
366    fn test_decode_rle_empty() {
367        let rle = RleMeta {
368            runs: 0,
369            num_rle_values: 0,
370        };
371        assert!(rle.decode::<u32>(&[], &mut dec()).unwrap().is_empty());
372    }
373
374    #[test]
375    fn test_decode_rle_invalid_stream_size() {
376        // Valid RLE for runs=2 needs 4 elements (2 run lengths + 2 values). Only 3 provided.
377        let rle = RleMeta {
378            runs: 2,
379            num_rle_values: 3,
380        };
381        let data = [1u32, 2, 3];
382        let err = rle.decode::<u32>(&data, &mut dec()).unwrap_err();
383        assert!(matches!(err, InvalidDecodingStreamSize(3, 4)));
384    }
385}