csf/coding/
geom.rs

1use std::mem::size_of_val;
2use std::io::{Read, Write};
3use std::io;
4use minimum_redundancy::{DecodingResult, entropy_to_bpf, Frequencies};
5use std::borrow::Borrow;
6use std::collections::HashMap;
7use dyn_size_of::GetSize;
8use crate::coding::{BuildCoding, Coding, Decoder, SerializableCoding};
9use super::U8Code;
10
11#[derive(Copy, Clone)]
12pub struct GeometricUnlimitedDecoder {
13    threshold: u8,
14    value: u16
15}
16
17impl GeometricUnlimitedDecoder {    // <V: Default>
18    pub fn new(threshold: u8) -> Self { Self{ threshold, value: Default::default()  } }
19}
20
21impl Decoder for GeometricUnlimitedDecoder //where V: Clone + AddAssign, u16: TryInto<V>
22{
23    type Value = u16;
24    type Decoded = u16;
25
26    #[inline] fn consume_checked(&mut self, fragment: u8) -> DecodingResult<Self::Decoded> {
27        //self.value += unsafe { fragment.try_into().unwrap_unchecked() };
28        self.value += fragment as u16;
29        if fragment == self.threshold {
30            DecodingResult::Incomplete
31        } else {
32            DecodingResult::Value(self.value)
33            //DecodingResult::Value(self.value.clone())
34        }
35    }
36}
37
38#[derive(Copy, Clone)]
39pub struct GeometricUnlimited //<V = u16>
40{
41    threshold: u8,
42    bits_per_fragment: u8,
43    //v_type: PhantomData<V>
44}
45
46impl GeometricUnlimited {
47    #[inline] pub fn new(bits_per_fragment: u8) -> Self {
48        Self { threshold: (1<<bits_per_fragment)-1, bits_per_fragment }
49    }
50}
51
52impl Coding for GeometricUnlimited
53//where V: Clone + Into<u16> /*, u16: TryInto<V>*/
54{
55    type Value = u16;
56    //type Decoder<'d> where V: 'd = GeometricUnlimitedDecoder;
57    type Decoder<'d> = GeometricUnlimitedDecoder;
58    type Encoder<'e> = ();
59    //type Code = u16;
60    type Codeword = U8Code;
61
62    #[inline] fn bits_per_fragment(&self) -> u8 {
63        self.bits_per_fragment
64    }
65
66    #[inline] fn decoder(&self) -> Self::Decoder<'_> {
67        GeometricUnlimitedDecoder::new(self.threshold)
68    }
69
70    fn encoder(&self) -> Self::Encoder<'_> { () }
71
72    #[inline] fn len_of(&self, code: Self::Codeword) -> u8 {
73        //(code / self.threshold) as u8 + 1
74        code.len
75    }
76
77    fn fragment_of(&self, code: Self::Codeword, index: u8) -> u8 {
78        if index+1 == code.len {
79            code.content
80        } else {
81            self.threshold
82        }
83    }
84
85    fn remove_first_fragment_of(&self, code: &mut Self::Codeword) -> bool {
86        code.len -= 1;
87        code.len == 0
88    }
89
90    fn code_of<'e, Q>(&self, _encoder: &Self::Encoder<'e>, to_encode: &Q) -> Self::Codeword where Q: Borrow<Self::Value> {
91        let v = *to_encode.borrow();
92        Self::Codeword { content: v as u8 & self.threshold, len: (v >> self.bits_per_fragment) as u8 + 1 }
93        //to_encode.borrow()
94    }
95
96    /*fn fragment_of(&self, code: Self::Code, index: u8) -> u8 {
97        (if index * self.threshold < code {
98            self.threshold
99        } else {
100            code % self.threshold
101        }) as u8
102    }
103
104    fn remove_first_fragment_of(&self, code: &mut Self::Code) -> bool {
105        if *code <= self.threshold {
106            false
107        } else {
108            *code -= self.threshold;
109            true
110        }
111    }
112
113    fn value_of<Q>(&self, _encoder: &Self::Encoder, to_encode: &Q) -> Self::Code where Q: Borrow<Self::Value> {
114        to_encode.borrow().clone().into()
115    }*/
116}
117
118impl SerializableCoding for GeometricUnlimited
119    //where V: Clone + Into<u16>
120    //Default + Clone + AddAssign + Into<u32>, u32: TryInto<V>
121{
122    fn write_bytes(&self, _bytes_per_value: usize) -> usize {
123        size_of_val(&self.bits_per_fragment)
124    }
125
126    fn write<F>(&self, output: &mut dyn Write, _write_value: F) -> io::Result<()> where F: FnMut(&mut dyn Write, &Self::Value) -> io::Result<()> {
127        output.write_all(std::slice::from_ref(&self.bits_per_fragment)).map(|_| ())
128    }
129
130    fn read<F>(input: &mut dyn Read, _read_value: F) -> io::Result<Self> where F: FnMut(&mut dyn Read) -> io::Result<Self::Value>, Self: Sized {
131        let mut bits_per_fragment = 0u8;
132        input.read_exact(std::slice::from_mut(&mut bits_per_fragment))?;
133        Ok(Self::new(bits_per_fragment))
134    }
135}
136
137impl GetSize for GeometricUnlimited {}
138
139#[derive(Default, Copy, Clone)]
140pub struct BuildGeometricUnlimited {
141    pub bits_per_fragment: u8
142}
143
144impl BuildCoding<u16> for BuildGeometricUnlimited
145    //where Value: Default + Clone + AddAssign + Into<u16> + Hash + Eq, u16: TryInto<Value>
146{
147    //type Coding = GeometricUnlimited<Value>;
148    type Coding = GeometricUnlimited;
149
150    fn name(&self) -> String {
151        return if self.bits_per_fragment == 0 {
152            "geometric_unlimited".to_owned()
153        } else {
154            format!("geometric_unlimited_b{}", self.bits_per_fragment)
155        }
156    }
157
158    fn build_from_iter<Iter>(&self, iter: Iter, mut bits_per_fragment: u8) -> Self::Coding
159        where Iter: IntoIterator, Iter::Item: Borrow<<Self::Coding as Coding>::Value>
160    {
161        if bits_per_fragment == 0 { bits_per_fragment = self.bits_per_fragment; }
162        if bits_per_fragment == 0 {
163            //bits_per_fragment = entropy_to_bpf(HashMap::<Value, u32>::with_counted_all(iter).entropy()-0.2);
164            bits_per_fragment = entropy_to_bpf(HashMap::<u16, u32>::with_occurrences_of(iter).entropy()-0.2);
165            // old: we use 1 bit less than sound maximum... probably better heuristic exists
166            //bits_per_fragment = bits_to_store!(iter.into_iter().map(|v|Into::<u32>::into(v.borrow().clone())).max().unwrap_or(0));
167            //if bits_per_fragment > 1 { bits_per_fragment -= 1; }
168        }
169        Self::Coding::new(bits_per_fragment)
170    }
171}