diff --git a/Cargo.toml b/Cargo.toml
index 181a138..f83fb68 100644
@@ -46,6 +46,16 @@ harness = false
name = "full_rlnc_decoder"
harness = false
+[[bench]]
+name = "bench_simd_ops"
+harness = false
+
+[profile.bench]
+codegen-units = 1
+lto = true
+opt-level = 3
+debug = true
+
[profile.optimized]
inherits = "release"
codegen-units = 1
diff --git a/benches/bench_simd_ops.rs b/benches/bench_simd_ops.rs
new file mode 100644
index 0000000..aead705
@@ -0,0 +1,154 @@
+use criterion::*;
+use rand::Rng;
+use rlnc::common;
+use std::{fmt::Debug, hint::black_box, time::Duration};
+
+struct RLNCConfig {
+ data_byte_len: usize,
+}
+
+fn bytes_to_human_readable(bytes: usize) -> String {
+ let units = ["B", "KB", "MB", "GB", "TB"];
+ let mut bytes = bytes as f64;
+ let mut unit_index = 0;
+
+ while bytes >= 1024.0 && unit_index < units.len() - 1 {
+ bytes /= 1024.0;
+ unit_index += 1;
+ }
+
+ format!("{:.1}{}", bytes, units[unit_index])
+}
+
+impl Debug for RLNCConfig {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.write_str(&format!("{}", &bytes_to_human_readable(self.data_byte_len)))
+ }
+}
+
+const ARGS: &[RLNCConfig] = &[
+ RLNCConfig { data_byte_len: 1usize << 20 },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 20,
+ // piece_count: 1usize << 5,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 20,
+ // piece_count: 1usize << 6,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 20,
+ // piece_count: 1usize << 7,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 20,
+ // piece_count: 1usize << 8,
+ // },
+ // RLNCConfig { data_byte_len: 1usize << 23 },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 24,
+ // piece_count: 1usize << 5,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 24,
+ // piece_count: 1usize << 6,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 24,
+ // piece_count: 1usize << 7,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 24,
+ // piece_count: 1usize << 8,
+ // },
+ // RLNCConfig { data_byte_len: 1usize << 25 },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 25,
+ // piece_count: 1usize << 5,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 25,
+ // piece_count: 1usize << 6,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 25,
+ // piece_count: 1usize << 7,
+ // },
+ // RLNCConfig {
+ // data_byte_len: 1usize << 25,
+ // piece_count: 1usize << 8,
+ // },
+];
+
+// fn bench_gf256_inplace_mul_vec_by_scalar(c: &mut Criterion) {
+// let mut group = c.benchmark_group("gf256_inplace_mul_vec_by_scalar");
+
+// for rlnc_config in ARGS {
+// let mut rng = rand::rng();
+
+// let mut vec = (0..rlnc_config.data_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
+// let scalar: u8 = rng.random();
+
+// group.measurement_time(Duration::from_secs(20));
+// group.sample_size(100);
+
+// // Number of bytes used as input to encoder + Number of bytes for each coded piece
+// group.throughput(Throughput::Bytes(vec.len() as u64));
+// group.bench_function(format!("{:?}", rlnc_config), move |b| {
+// b.iter(|| common::simd::gf256_inplace_mul_vec_by_scalar(black_box(&mut vec), black_box(scalar)));
+// });
+// }
+
+// group.finish();
+// }
+
+// fn bench_gf256_inplace_add_vectors(c: &mut Criterion) {
+// let mut group = c.benchmark_group("gf256_inplace_add_vectors");
+
+// for rlnc_config in ARGS {
+// let mut rng = rand::rng();
+
+// let src = (0..rlnc_config.data_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
+// let mut dst = (0..rlnc_config.data_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
+
+// group.measurement_time(Duration::from_secs(20));
+// group.sample_size(100);
+
+// // Number of bytes used as input to encoder + Number of bytes for each coded piece
+// group.throughput(Throughput::Bytes(src.len() as u64));
+// group.bench_function(format!("{:?}", rlnc_config), move |b| {
+// b.iter(|| common::simd::gf256_inplace_add_vectors(black_box(&mut dst), black_box(&src)));
+// });
+// }
+
+// group.finish();
+// }
+
+fn bench_gf256_mul_vec_by_scalar_then_add_into_vec(c: &mut Criterion) {
+ let mut group = c.benchmark_group("gf256_mul_vec_by_scalar_then_add_into_vec");
+
+ for rlnc_config in ARGS {
+ let mut rng = rand::rng();
+
+ let mut add_vec = (0..rlnc_config.data_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
+ let mul_vec = (0..rlnc_config.data_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
+ let scalar: u8 = rng.random();
+
+ group.measurement_time(Duration::from_secs(20));
+ group.sample_size(100);
+
+ // Number of bytes used as input to encoder + Number of bytes for each coded piece
+ group.throughput(Throughput::Bytes((add_vec.len() + mul_vec.len()) as u64));
+ group.bench_function(format!("{:?}", rlnc_config), move |b| {
+ b.iter(|| common::simd::gf256_mul_vec_by_scalar_then_add_into_vec(black_box(&mut add_vec), black_box(&mul_vec), black_box(scalar)));
+ });
+ }
+
+ group.finish();
+}
+
+criterion_group!(
+ simd_ops,
+ /*bench_gf256_inplace_mul_vec_by_scalar , bench_gf256_inplace_add_vectors,*/ bench_gf256_mul_vec_by_scalar_then_add_into_vec
+);
+criterion_main!(simd_ops);
diff --git a/src/lib.rs b/src/lib.rs
index 366656a..b0a1896 100644
@@ -124,7 +124,7 @@
//!
//! For more see README in `rlnc` repository @ <https://github.com/itzmeanjan/rlnc>.
-mod common;
+pub mod common;
pub mod full;
pub use crate::common::errors::RLNCError;