rust_gd/
lib.rs

1mod dict;
2mod error;
3mod gd_bit_unit;
4mod gd_byte_unit;
5mod separator;
6
7use async_trait::async_trait;
8use dict::BasisDict;
9use error::*;
10use gd_bit_unit::BitGD;
11use gd_byte_unit::ByteGD;
12use libecc::{types::*, *};
13
14///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
15#[derive(Debug, Clone)]
16pub enum GD {
17  ReedSolomon(usize, usize),
18  Hamming(usize),
19}
20impl GD {
21  pub async fn setup(&self, dict_size: usize) -> Result<GDInner> {
22    // TODO: consider parallelization using async
23    match self {
24      GD::ReedSolomon(a, b) => Ok(GDInner::ReedSolomon(ByteGD {
25        code: ReedSolomon::new(*a, *b).await?,
26        basis_dict: BasisDict::<U8VRep>::new(dict_size),
27        chunk_bytelen: *a,
28      })),
29
30      GD::Hamming(a) => {
31        let code = Hamming::new(*a as u32)?;
32        ensure!(code.code_bit_len >= 8, "Insufficient code length");
33        let chunk_bytelen = (code.code_bit_len - code.code_bit_len % 8) / 8;
34        Ok(GDInner::Hamming(BitGD {
35          code,
36          basis_dict: BasisDict::<BVRep>::new(dict_size),
37          chunk_bytelen,
38        }))
39      }
40    }
41  }
42}
43///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
44#[derive(Debug, Clone)]
45pub enum GDInner {
46  ReedSolomon(ByteGD<ReedSolomon>),
47  Hamming(BitGD<Hamming>),
48}
49
50impl GDInner {
51  pub fn unit_check(&self) {
52    match &self {
53      GDInner::Hamming(x) => x.unit_check(),
54      GDInner::ReedSolomon(x) => x.unit_check(),
55    }
56  }
57  // Asynchronous APIs
58  // TODO: consider some parallelization only for 'decoding' operation to split chunk into base and deviation.
59  // TODO: also consider for 'encoding' as well
60  pub async fn dedup(&mut self, buf: &U8SRep) -> Result<Deduped> {
61    match self {
62      GDInner::Hamming(x) => x.dedup(buf).await,
63      GDInner::ReedSolomon(x) => x.dedup(buf).await,
64    }
65  }
66
67  pub async fn dup(&mut self, deduped: &Deduped) -> Result<U8VRep> {
68    match self {
69      GDInner::Hamming(x) => x.dup(deduped).await,
70      GDInner::ReedSolomon(x) => x.dup(deduped).await,
71    }
72  }
73  pub async fn set_error_alignment(&mut self, trans: &[U8VRep]) -> Result<()> {
74    match self {
75      GDInner::Hamming(_) => Err(anyhow!("No such method for Hamming codes")),
76      GDInner::ReedSolomon(x) => x.set_error_alignment(trans).await,
77    }
78  }
79}
80
81///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
82#[async_trait]
83pub trait GDTrait {
84  fn unit_check(&self);
85  async fn dedup(&mut self, buf: &U8SRep) -> Result<Deduped>;
86  async fn dup(&mut self, deduped: &Deduped) -> Result<U8VRep>;
87}
88
89///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
90#[derive(Debug, Clone)]
91pub struct Deduped {
92  pub data: U8VRep,
93  pub last_chunk_pad_bytelen: usize,
94}
95/////////////////////////////////////////
96
97#[cfg(test)]
98mod tests {
99  use super::*;
100  use rand::Rng;
101
102  // const WORD_STR: &str = "寿限無(じゅげむ)寿限無(じゅげむ)五劫(ごこう)のすりきれ海砂利(かいじゃり)水魚(すいぎょ)の水行末(すいぎょうまつ) ";
103  // const WORD_STR: &str = "寿限無(じゅげむ)寿限無(じゅげむ)五劫(ごこう)のすりきれ海砂利(かいじゃり)水魚(すいぎょ)の水行末(すいぎょうまつ) 雲来末(うんらいまつ) 風来末(ふうらいまつ)食(く)う寝(ね)るところに住(す)むところやぶらこうじのぶらこうじパイポパイポパイポのシューリンガンシューリンガンのグーリンダイグーリンダイのポンポコピーのポンポコナの長久命(ちょうきゅうめい)の長助(ちょうすけ)";
104  const WORD_STR: &str =
105    "寿限無(じゅげむ)寿限無(じゅげむ)五劫(ごこう)のすりきれ海砂利(かいじゃり)padpadpadpadpadpadpadpad"; // Byte alignment is quire needed...
106
107  #[tokio::test]
108  async fn hamming_works() {
109    let words = WORD_STR.to_string().repeat(128).into_bytes();
110
111    for hamming_deg in 4..11 {
112      let hamming_dict_size = 511;
113
114      let mut gd_dedup = GD::Hamming(hamming_deg)
115        .setup(hamming_dict_size)
116        .await
117        .unwrap();
118      let mut gd_dup = GD::Hamming(hamming_deg)
119        .setup(hamming_dict_size)
120        .await
121        .unwrap();
122      // gd_dedup.unit_check();
123
124      // println!("Hamimng code deg = {}", hamming_deg);
125      // println!("> org size: {} bits", words.len() * 8);
126      let x = gd_dedup.dedup(&words).await.unwrap();
127      // println!("> deduped size {} bits", x.data.len());
128      let y = gd_dup.dup(&x).await.unwrap();
129      // println!("> duped size {} bits", y.len() * 8);
130      assert_eq!(y, words);
131      println!(
132        "Hamimng code deg = {} > Deduped rate: {:.2} %",
133        hamming_deg,
134        100.0 * (x.data.len() as f32) / (y.len() as f32)
135      );
136      // println!()
137    }
138  }
139
140  const RS_MAX_DICT_BITS: usize = 8;
141  const RS_DICT_PARAM: usize = 2;
142  const RS_REPEAT: usize = 1024;
143
144  #[tokio::test]
145  async fn rs_works() {
146    let mut rng = rand::thread_rng();
147    let words_org = WORD_STR.to_string().into_bytes().repeat(RS_REPEAT);
148
149    for code_len in vec![128].into_iter() {
150      for msg_len in 2isize.max(code_len as isize - 8) as usize..code_len {
151        let dict_size = (1 << ((code_len - msg_len) * RS_DICT_PARAM).min(RS_MAX_DICT_BITS)) - 1;
152
153        let mut gd_dedup = GD::ReedSolomon(code_len, msg_len)
154          .setup(dict_size)
155          .await
156          .unwrap();
157        let mut gd_dup = GD::ReedSolomon(code_len, msg_len)
158          .setup(dict_size)
159          .await
160          .unwrap();
161        // gd_dedup.unit_check();
162
163        let words: Vec<u8> = words_org
164          .clone()
165          .into_iter()
166          .enumerate()
167          .map(|(idx, b)| {
168            if idx % code_len < msg_len {
169              b
170            } else {
171              let random_pad: u8 = rng.gen();
172              b ^ random_pad
173            }
174          })
175          .collect();
176
177        // println!("RS code ({}, {}) over GF(256)", code_len, msg_len);
178        // println!("> org size: {} bits", words.len() * 8);
179        let x = gd_dedup.dedup(&words).await.unwrap();
180        // println!("> deduped size {} bits", x.data.len());
181        let y = gd_dup.dup(&x).await.unwrap();
182        // println!("> duped size {} bits", y.len() * 8);
183        assert_eq!(y, words);
184        // println!("{:?}", gd);
185        println!(
186          "RS code ({}, {}) over GF(256) of dict size {} > Deduped rate: {:.2} %",
187          code_len,
188          msg_len,
189          dict_size,
190          100.0 * (x.data.len() as f32) / (y.len() as f32)
191        );
192        // println!()
193      }
194    }
195  }
196
197  #[tokio::test]
198  async fn rs_align_error_works() {
199    let trans: Vec<Vec<u8>> = vec![
200      vec![1u8, 0, 0, 0],
201      vec![1u8, 1, 1, 4],
202      vec![1u8, 1, 3, 0],
203      vec![1u8, 2, 0, 0],
204    ];
205    let dict_size = 15;
206    let code_len = 4;
207    let msg_len = 3;
208
209    let mut gd_dedup = GD::ReedSolomon(code_len, msg_len)
210      .setup(dict_size)
211      .await
212      .unwrap();
213    let mut gd_dup = GD::ReedSolomon(code_len, msg_len)
214      .setup(dict_size)
215      .await
216      .unwrap();
217    let res_dedup = gd_dedup.set_error_alignment(&trans).await;
218    let res_dup = gd_dup.set_error_alignment(&trans).await;
219    assert!(res_dedup.is_ok());
220    assert!(res_dup.is_ok());
221
222    let words = WORD_STR.to_string().into_bytes().repeat(RS_REPEAT);
223
224    // println!("RS code ({}, {}) over GF(256)", code_len, msg_len);
225    // println!("> org size: {} bits", words.len() * 8);
226    let x = gd_dedup.dedup(&words).await.unwrap();
227    // println!("> deduped size {} bits", x.data.len());
228    let y = gd_dup.dup(&x).await.unwrap();
229    // println!("> duped size {} bits", y.len() * 8);
230    assert_eq!(y, words);
231    // println!("{:?}", gd);
232    println!(
233      "RS code ({}, {}) over GF(256) of dict size {} > Deduped rate: {:.2} %",
234      code_len,
235      msg_len,
236      dict_size,
237      100.0 * (x.data.len() as f32) / (y.len() as f32)
238    );
239  }
240}