extern crate binary_vec_io;
extern crate io_utils;
extern crate pretty_trace;
use binary_vec_io::{binary_read_to_ref, binary_read_vec, binary_write_vec};
use std::cmp::max;
#[derive(Clone)]
pub struct MirrorSparseMatrix {
x: Vec<u8>,
}
pub fn get_code_version_from_file(f: &str) -> u32 {
assert_eq!(std::mem::size_of::<usize>(), 8);
let mut ff = std::fs::File::open(&f).unwrap();
let mut x = vec![0 as u32; 11];
binary_read_to_ref::<u32>(&mut ff, &mut x[0], 11).unwrap();
x[10]
}
pub fn read_from_file(s: &mut MirrorSparseMatrix, f: &str) {
let mut ff = std::fs::File::open(&f).unwrap();
binary_read_vec::<u8>(&mut ff, &mut s.x).unwrap();
if s.code_version() != 0 && s.code_version() != 1 {
panic!(
"\nMirrorSparseMatrix: code_version has to be 0 or 1, but it is {}.\n",
s.code_version()
);
}
if s.storage_version() != 0 && s.storage_version() != 1 {
panic!(
"\nMirrorSparseMatrix: storage_version has to be 0 or 1, but it is {}.\n",
s.storage_version()
);
}
}
pub fn write_to_file(s: &MirrorSparseMatrix, f: &str) {
assert!(s.code_version() > 0);
let mut ff = std::fs::File::create(&f).expect(&format!("Failed to create file {}.", f));
binary_write_vec::<u8>(&mut ff, &s.x).unwrap();
}
fn get_u8_at_pos(v: &Vec<u8>, pos: usize) -> u8 {
v[pos]
}
fn get_u16_at_pos(v: &Vec<u8>, pos: usize) -> u16 {
let mut z = [0 as u8; 2];
for i in 0..2 {
z[i] = v[pos + i];
}
u16::from_le_bytes(z)
}
fn get_u32_at_pos(v: &Vec<u8>, pos: usize) -> u32 {
let mut z = [0 as u8; 4];
for i in 0..4 {
z[i] = v[pos + i];
}
u32::from_le_bytes(z)
}
fn _put_u8_at_pos(v: &mut Vec<u8>, pos: usize, val: u8) {
v[pos] = val;
}
fn _put_u16_at_pos(v: &mut Vec<u8>, pos: usize, val: u16) {
let z = val.to_le_bytes();
for i in 0..2 {
v[pos + i] = z[i];
}
}
fn put_u32_at_pos(v: &mut Vec<u8>, pos: usize, val: u32) {
let z = val.to_le_bytes();
for i in 0..4 {
v[pos + i] = z[i];
}
}
fn push_u8(v: &mut Vec<u8>, val: u8) {
v.push(val);
}
fn push_u16(v: &mut Vec<u8>, val: u16) {
let z = val.to_le_bytes();
for i in 0..2 {
v.push(z[i]);
}
}
fn push_u32(v: &mut Vec<u8>, val: u32) {
let z = val.to_le_bytes();
for i in 0..4 {
v.push(z[i]);
}
}
impl MirrorSparseMatrix {
pub fn new() -> MirrorSparseMatrix {
let v = Vec::<u8>::new();
MirrorSparseMatrix { x: v }
}
pub fn initialized(&self) -> bool {
!self.x.is_empty()
}
fn header_size() -> usize {
32
+ 4
+ 4
+ 4
+ 4
}
fn code_version(&self) -> usize {
get_u32_at_pos(&self.x, 32) as usize
}
fn storage_version(&self) -> usize {
get_u32_at_pos(&self.x, 36) as usize
}
pub fn build_from_vec(
x: &Vec<Vec<(i32, i32)>>,
row_labels: &Vec<String>,
col_labels: &Vec<String>,
) -> MirrorSparseMatrix {
let mut max_col = 0 as i32;
for i in 0..x.len() {
for j in 0..x[i].len() {
max_col = max(max_col, x[i][j].0);
}
}
let mut storage_version = 0 as u32;
if max_col >= 65536 {
storage_version = 1 as u32;
}
let hs = MirrorSparseMatrix::header_size();
let mut v = Vec::<u8>::new();
let mut total_bytes = hs + 4 * x.len();
for i in 0..x.len() {
let (mut m1, mut m2, mut m4) = (0, 0, 0);
for j in 0..x[i].len() {
if x[i][j].1 < 256 {
m1 += 1;
} else if x[i][j].1 < 65536 {
m2 += 1;
} else {
m4 += 1;
}
}
if storage_version == 0 {
total_bytes += 6 + 3 * m1 + 4 * m2 + 6 * m4;
} else {
total_bytes += 12 + 5 * m1 + 6 * m2 + 8 * m4;
}
}
let (n, k) = (row_labels.len(), col_labels.len());
assert_eq!(n, x.len());
total_bytes += 4 * (1 + n);
total_bytes += 4 * (1 + k);
let byte_start_of_row_labels = total_bytes;
for i in 0..n {
total_bytes += row_labels[i].len();
}
let byte_start_of_col_labels = total_bytes;
for j in 0..k {
total_bytes += col_labels[j].len();
}
v.reserve(total_bytes);
v.append(&mut b"MirrorSparseMatrix binary file \n".to_vec());
assert_eq!(v.len(), 32);
const CURRENT_CODE_VERSION: usize = 1;
let code_version = CURRENT_CODE_VERSION as u32;
push_u32(&mut v, code_version);
push_u32(&mut v, storage_version);
push_u32(&mut v, n as u32);
push_u32(&mut v, k as u32);
assert_eq!(v.len(), hs);
for _ in 0..n {
push_u32(&mut v, 0 as u32);
}
let mut pos = byte_start_of_row_labels;
for i in 0..=n {
push_u32(&mut v, pos as u32);
if i < n {
pos += row_labels[i].len();
}
}
let mut pos = byte_start_of_col_labels;
for j in 0..=k {
push_u32(&mut v, pos as u32);
if j < k {
pos += col_labels[j].len();
}
}
for i in 0..n {
let p = v.len() as u32;
put_u32_at_pos(&mut v, hs + 4 * i, p);
let (mut m1, mut m2, mut m4) = (0, 0, 0);
for j in 0..x[i].len() {
if x[i][j].1 < 256 {
m1 += 1;
} else if x[i][j].1 < 65536 {
m2 += 1;
} else {
m4 += 1;
}
}
if storage_version == 0 {
push_u16(&mut v, m1 as u16);
push_u16(&mut v, m2 as u16);
push_u16(&mut v, m4 as u16);
} else {
push_u32(&mut v, m1 as u32);
push_u32(&mut v, m2 as u32);
push_u32(&mut v, m4 as u32);
}
for j in 0..x[i].len() {
if x[i][j].1 < 256 {
if storage_version == 0 {
push_u16(&mut v, x[i][j].0 as u16);
} else {
push_u32(&mut v, x[i][j].0 as u32);
}
push_u8(&mut v, x[i][j].1 as u8);
}
}
for j in 0..x[i].len() {
if x[i][j].1 >= 256 && x[i][j].1 < 65536 {
if storage_version == 0 {
push_u16(&mut v, x[i][j].0 as u16);
} else {
push_u32(&mut v, x[i][j].0 as u32);
}
push_u16(&mut v, x[i][j].1 as u16);
}
}
for j in 0..x[i].len() {
if x[i][j].1 >= 65536 {
if storage_version == 0 {
push_u16(&mut v, x[i][j].0 as u16);
} else {
push_u32(&mut v, x[i][j].0 as u32);
}
push_u32(&mut v, x[i][j].1 as u32);
}
}
}
for i in 0..n {
for p in 0..row_labels[i].len() {
v.push(row_labels[i].as_bytes()[p]);
}
}
for j in 0..k {
for p in 0..col_labels[j].len() {
v.push(col_labels[j].as_bytes()[p]);
pos += 1;
}
}
assert_eq!(total_bytes, v.len());
MirrorSparseMatrix { x: v }
}
pub fn nrows(&self) -> usize {
get_u32_at_pos(&self.x, 40) as usize
}
pub fn ncols(&self) -> usize {
get_u32_at_pos(&self.x, 44) as usize
}
fn start_of_row(&self, row: usize) -> usize {
let pos = MirrorSparseMatrix::header_size() + row * 4;
get_u32_at_pos(&self.x, pos) as usize
}
pub fn row_label(&self, i: usize) -> String {
let row_labels_start = MirrorSparseMatrix::header_size() + self.nrows() * 4;
let label_start = get_u32_at_pos(&self.x, row_labels_start + i * 4);
let label_stop = get_u32_at_pos(&self.x, row_labels_start + (i + 1) * 4);
let label_bytes = &self.x[label_start as usize..label_stop as usize];
String::from_utf8(label_bytes.to_vec()).unwrap()
}
pub fn col_label(&self, j: usize) -> String {
let col_labels_start = MirrorSparseMatrix::header_size() + self.nrows() * 8 + 4;
let label_start = get_u32_at_pos(&self.x, col_labels_start + j * 4);
let label_stop = get_u32_at_pos(&self.x, col_labels_start + (j + 1) * 4);
let label_bytes = &self.x[label_start as usize..label_stop as usize];
String::from_utf8(label_bytes.to_vec()).unwrap()
}
pub fn row(&self, row: usize) -> Vec<(usize, usize)> {
let mut all = Vec::<(usize, usize)>::new();
let s = self.start_of_row(row);
if self.storage_version() == 0 {
let m1 = get_u16_at_pos(&self.x, s) as usize;
let m2 = get_u16_at_pos(&self.x, s + 2) as usize;
let m4 = get_u16_at_pos(&self.x, s + 4) as usize;
for i in 0..m1 {
let pos = s + 6 + 3 * i;
let col = get_u16_at_pos(&self.x, pos) as usize;
let entry = get_u8_at_pos(&self.x, pos + 2) as usize;
all.push((col, entry));
}
for i in 0..m2 {
let pos = s + 6 + 3 * m1 + 4 * i;
let col = get_u16_at_pos(&self.x, pos) as usize;
let entry = get_u16_at_pos(&self.x, pos + 2) as usize;
all.push((col, entry));
}
for i in 0..m4 {
let pos = s + 6 + 3 * m1 + 4 * m2 + 6 * i;
let col = get_u16_at_pos(&self.x, pos) as usize;
let entry = get_u32_at_pos(&self.x, pos + 2) as usize;
all.push((col, entry));
}
} else {
let m1 = get_u32_at_pos(&self.x, s) as usize;
let m2 = get_u32_at_pos(&self.x, s + 4) as usize;
let m4 = get_u32_at_pos(&self.x, s + 8) as usize;
for i in 0..m1 {
let pos = s + 12 + 5 * i;
let col = get_u32_at_pos(&self.x, pos) as usize;
let entry = get_u8_at_pos(&self.x, pos + 4) as usize;
all.push((col, entry));
}
for i in 0..m2 {
let pos = s + 12 + 5 * m1 + 6 * i;
let col = get_u32_at_pos(&self.x, pos) as usize;
let entry = get_u16_at_pos(&self.x, pos + 4) as usize;
all.push((col, entry));
}
for i in 0..m4 {
let pos = s + 12 + 5 * m1 + 6 * m2 + 8 * i;
let col = get_u32_at_pos(&self.x, pos) as usize;
let entry = get_u32_at_pos(&self.x, pos + 4) as usize;
all.push((col, entry));
}
}
all.sort();
all
}
pub fn sum_of_row(&self, row: usize) -> usize {
let s = self.start_of_row(row);
let mut sum = 0;
if self.storage_version() == 0 {
let m1 = get_u16_at_pos(&self.x, s) as usize;
let m2 = get_u16_at_pos(&self.x, s + 2) as usize;
let m4 = get_u16_at_pos(&self.x, s + 4) as usize;
for i in 0..m1 {
let pos = s + 6 + 3 * i + 2;
sum += get_u8_at_pos(&self.x, pos) as usize;
}
for i in 0..m2 {
let pos = s + 6 + 3 * m1 + 4 * i + 2;
sum += get_u16_at_pos(&self.x, pos) as usize;
}
for i in 0..m4 {
let pos = s + 6 + 3 * m1 + 4 * m2 + 6 * i + 2;
sum += get_u32_at_pos(&self.x, pos) as usize;
}
} else {
let m1 = get_u32_at_pos(&self.x, s) as usize;
let m2 = get_u32_at_pos(&self.x, s + 4) as usize;
let m4 = get_u32_at_pos(&self.x, s + 8) as usize;
for i in 0..m1 {
let pos = s + 12 + 5 * i + 4;
sum += get_u8_at_pos(&self.x, pos) as usize;
}
for i in 0..m2 {
let pos = s + 12 + 5 * m1 + 6 * i + 4;
sum += get_u16_at_pos(&self.x, pos) as usize;
}
for i in 0..m4 {
let pos = s + 12 + 5 * m1 + 6 * m2 + 8 * i + 4;
sum += get_u32_at_pos(&self.x, pos) as usize;
}
}
sum
}
#[allow(dead_code)]
pub fn sum_of_col(&self, col: usize) -> usize {
let mut sum = 0;
if self.storage_version() == 0 {
for row in 0..self.nrows() {
let s = self.start_of_row(row);
let m1 = get_u16_at_pos(&self.x, s) as usize;
let m2 = get_u16_at_pos(&self.x, s + 2) as usize;
let m4 = get_u16_at_pos(&self.x, s + 4) as usize;
for i in 0..m1 {
let pos = s + 6 + 3 * i;
let f = get_u16_at_pos(&self.x, pos) as usize;
if f == col {
sum += get_u8_at_pos(&self.x, pos + 2) as usize;
}
}
for i in 0..m2 {
let pos = s + 6 + 3 * m1 + 4 * i;
let f = get_u16_at_pos(&self.x, pos) as usize;
if f == col {
sum += get_u16_at_pos(&self.x, pos + 2) as usize;
}
}
for i in 0..m4 {
let pos = s + 6 + 3 * m1 + 4 * m2 + 6 * i;
let f = get_u16_at_pos(&self.x, pos) as usize;
if f == col {
sum += get_u32_at_pos(&self.x, pos + 2) as usize;
}
}
}
} else {
for row in 0..self.nrows() {
let s = self.start_of_row(row);
let m1 = get_u32_at_pos(&self.x, s) as usize;
let m2 = get_u32_at_pos(&self.x, s + 4) as usize;
let m4 = get_u32_at_pos(&self.x, s + 8) as usize;
for i in 0..m1 {
let pos = s + 12 + 5 * i;
let f = get_u32_at_pos(&self.x, pos) as usize;
if f == col {
sum += get_u8_at_pos(&self.x, pos + 4) as usize;
}
}
for i in 0..m2 {
let pos = s + 12 + 5 * m1 + 6 * i;
let f = get_u32_at_pos(&self.x, pos) as usize;
if f == col {
sum += get_u16_at_pos(&self.x, pos + 4) as usize;
}
}
for i in 0..m4 {
let pos = s + 12 + 5 * m1 + 6 * m2 + 8 * i;
let f = get_u32_at_pos(&self.x, pos) as usize;
if f == col {
sum += get_u32_at_pos(&self.x, pos + 4) as usize;
}
}
}
}
sum
}
pub fn value(&self, row: usize, col: usize) -> usize {
let s = self.start_of_row(row);
if self.storage_version() == 0 {
let m1 = get_u16_at_pos(&self.x, s) as usize;
let m2 = get_u16_at_pos(&self.x, s + 2) as usize;
let m4 = get_u16_at_pos(&self.x, s + 4) as usize;
for i in 0..m1 {
let pos = s + 6 + 3 * i;
let f = get_u16_at_pos(&self.x, pos) as usize;
if f == col {
return get_u8_at_pos(&self.x, pos + 2) as usize;
}
}
for i in 0..m2 {
let pos = s + 6 + 3 * m1 + 4 * i;
let f = get_u16_at_pos(&self.x, pos) as usize;
if f == col {
return get_u16_at_pos(&self.x, pos + 2) as usize;
}
}
for i in 0..m4 {
let pos = s + 6 + 3 * m1 + 4 * m2 + 6 * i;
let f = get_u16_at_pos(&self.x, pos) as usize;
if f == col {
return get_u16_at_pos(&self.x, pos + 2) as usize;
}
}
0
} else {
let m1 = get_u32_at_pos(&self.x, s) as usize;
let m2 = get_u32_at_pos(&self.x, s + 4) as usize;
let m4 = get_u32_at_pos(&self.x, s + 8) as usize;
for i in 0..m1 {
let pos = s + 12 + 5 * i;
let f = get_u32_at_pos(&self.x, pos) as usize;
if f == col {
return get_u8_at_pos(&self.x, pos + 4) as usize;
}
}
for i in 0..m2 {
let pos = s + 12 + 5 * m1 + 6 * i;
let f = get_u32_at_pos(&self.x, pos) as usize;
if f == col {
return get_u16_at_pos(&self.x, pos + 4) as usize;
}
}
for i in 0..m4 {
let pos = s + 12 + 5 * m1 + 6 * m2 + 8 * i;
let f = get_u32_at_pos(&self.x, pos) as usize;
if f == col {
return get_u16_at_pos(&self.x, pos + 4) as usize;
}
}
0
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use io_utils::*;
use pretty_trace::*;
#[test]
fn test_mirror_sparse_matrix() {
PrettyTrace::new().on();
for storage_version in 0..2 {
printme!(storage_version);
let mut x = Vec::<Vec<(i32, i32)>>::new();
let (n, k) = (10, 100);
for i in 0..n {
let mut y = Vec::<(i32, i32)>::new();
for j in 0..k {
let col: usize;
if storage_version == 0 {
col = i + j;
} else {
col = 10000 * i + j;
}
y.push((col as i32, (i * i * j) as i32));
}
x.push(y);
}
let test_row = 9;
let mut row_sum = 0;
for j in 0..x[test_row].len() {
row_sum += x[test_row][j].1 as usize;
}
let (mut row_labels, mut col_labels) = (Vec::<String>::new(), Vec::<String>::new());
for i in 0..n {
row_labels.push(format!("row {}", i));
}
for j in 0..k {
col_labels.push(format!("col {}", j));
}
let y = MirrorSparseMatrix::build_from_vec(&x, &row_labels, &col_labels);
let row_sum2 = y.sum_of_row(test_row);
assert_eq!(row_sum, row_sum2);
let test_col;
if storage_version == 0 {
test_col = 15;
} else {
test_col = 90001;
}
let mut col_sum = 0;
for i in 0..x.len() {
for j in 0..x[i].len() {
assert_eq!(x[i][j].1 as usize, y.value(i, x[i][j].0 as usize));
if x[i][j].0 as usize == test_col {
col_sum += x[i][j].1 as usize;
}
}
}
let col_sum2 = y.sum_of_col(test_col);
printme!(col_sum, col_sum2);
assert_eq!(col_sum, col_sum2);
assert_eq!(y.storage_version(), storage_version);
assert_eq!(y.row_label(5), row_labels[5]);
assert_eq!(y.col_label(7), col_labels[7]);
}
}
}