use helpers::{create_or_open_file, hash, get_size};
use helpers::{read_one_line, seek_from_start, write_buffer};
use {SEP};
use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::error::Error;
use std::fmt::{self, Display};
use std::fs::{self, File};
use std::hash::{Hash, Hasher};
use std::io::{BufRead, BufReader, BufWriter};
use std::mem;
use std::ops::AddAssign;
use std::str::FromStr;
const TEMP_SUFFIX: &'static str = ".hash_file";
const DAT_SUFFIX: &'static str = ".dat";
struct KeyIndex<K: Display + FromStr + Hash> {
key: K,
idx: u64,
count: usize,
}
impl<K: Display + FromStr + Hash> KeyIndex<K> {
pub fn new(key: K) -> KeyIndex<K> {
KeyIndex {
key: key,
idx: 0,
count: 0,
}
}
}
impl<K: Display + FromStr + Hash> Display for KeyIndex<K> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}{}{}{}{}", self.key, SEP, self.idx, SEP, self.count)
}
}
impl<K: Display + FromStr + Hash> FromStr for KeyIndex<K> {
type Err = String;
fn from_str(s: &str) -> Result<KeyIndex<K>, String> {
let mut split = s.split(SEP);
Ok(KeyIndex {
key: try!(split.next().unwrap_or("")
.parse::<K>()
.map_err(|_| format!("Cannot parse the key!"))),
idx: try!(split.next().unwrap_or("")
.parse::<u64>()
.map_err(|_| format!("Cannot parse the index!"))),
count: try!(split.next().unwrap_or("")
.parse::<usize>()
.map_err(|_| format!("Cannot parse 'overwritten' count"))),
})
}
}
impl<K: Display + FromStr + Hash> AddAssign for KeyIndex<K> {
fn add_assign(&mut self, other: KeyIndex<K>) {
self.idx = other.idx;
self.count += 1;
}
}
impl<K: Display + FromStr + Hash> Hash for KeyIndex<K> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.key.hash(state);
}
}
pub struct HashFile<K: Display + FromStr + Hash, V: Display + FromStr> {
file: File,
path: String,
size: u64,
data_file: File,
data_path: String,
data_idx: u64,
hashed: BTreeMap<u64, (KeyIndex<K>, V)>,
capacity: usize,
line_length: usize,
}
impl<K: Display + FromStr + Hash, V: Display + FromStr> HashFile<K, V> {
pub fn new(path: &str) -> Result<HashFile<K, V>, String> {
let mut file = try!(create_or_open_file(&path));
let file_size = get_size(&file).unwrap_or(0);
let data_path = format!("{}{}", path, DAT_SUFFIX);
Ok(HashFile {
hashed: BTreeMap::new(),
capacity: 0,
line_length: match file_size > 0 {
true => {
let line = try!(read_one_line(&mut file));
line.len()
},
false => 0,
},
file: {
try!(seek_from_start(&mut file, 0));
file
},
data_file: try!(create_or_open_file(&data_path)),
data_path: data_path,
data_idx: 0,
path: path.to_owned(),
size: file_size,
})
}
pub fn set_capacity(mut self, capacity: usize) -> HashFile<K, V> {
self.capacity = capacity;
self
}
fn rename_temp_file(&mut self, rename_dat: bool) -> Result<(), String> {
if rename_dat {
try!(fs::rename(format!("{}{}", &self.data_path, TEMP_SUFFIX), &self.data_path)
.map_err(|e| format!("Cannot rename the temp data file! ({})", e.description())));
self.data_file = try!(create_or_open_file(&self.data_path));
}
try!(fs::rename(format!("{}{}", &self.path, TEMP_SUFFIX), &self.path)
.map_err(|e| format!("Cannot rename the temp file! ({})", e.description())));
self.file = try!(create_or_open_file(&self.path));
self.size = try!(get_size(&self.file));
Ok(())
}
pub fn finish(&mut self) -> Result<(), String> {
if self.hashed.len() > 0 {
try!(self.flush_map());
}
{
let buf_reader = BufReader::new(&mut self.file);
let mut out_file = try!(create_or_open_file(&format!("{}{}", &self.path, TEMP_SUFFIX)));
let mut buf_writer = BufWriter::new(&mut out_file);
let mut data_file = try!(create_or_open_file(&format!("{}{}", &self.data_path, TEMP_SUFFIX)));
let mut data_writer = BufWriter::new(&mut data_file);
self.data_idx = 0;
for ref line in buf_reader.lines().filter_map(|l| l.ok()) {
let mut key_index = try!(line.parse::<KeyIndex<K>>());
try!(seek_from_start(&mut self.data_file, key_index.idx));
let value = try!(read_one_line(&mut self.data_file));
key_index.idx = self.data_idx;
self.data_idx += try!(write_buffer(&mut data_writer, &value, &mut 0));
try!(write_buffer(&mut buf_writer, &key_index.to_string(), &mut self.line_length));
}
}
self.rename_temp_file(true)
}
fn flush_map(&mut self) -> Result<(), String> {
let map = mem::replace(&mut self.hashed, BTreeMap::new());
{
try!(seek_from_start(&mut self.file, 0));
try!(seek_from_start(&mut self.data_file, self.data_idx));
let buf_reader = BufReader::new(&mut self.file);
let mut out_file = try!(create_or_open_file(&format!("{}{}", &self.path, TEMP_SUFFIX)));
let mut buf_writer = BufWriter::new(&mut out_file);
let mut data_writer = BufWriter::new(&mut self.data_file);
let mut file_iter = buf_reader.lines().filter_map(|l| l.ok()).peekable();
let mut map_iter = map.into_iter().peekable();
loop {
let compare_result = match (file_iter.peek(), map_iter.peek()) {
(Some(file_line), Some(&(btree_key_hash, _))) => {
let key = file_line.split(SEP).next().unwrap();
let file_key_hash = match key.parse::<K>() {
Ok(k_v) => hash(&k_v),
Err(_) => {
continue
},
};
file_key_hash.cmp(&btree_key_hash)
},
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => break,
};
match compare_result {
Ordering::Equal => {
let file_line = file_iter.next().unwrap();
let (_, (mut btree_key, val)) = map_iter.next().unwrap();
btree_key.idx = self.data_idx;
self.data_idx += try!(write_buffer(&mut data_writer, &val.to_string(), &mut 0));
let mut file_key = match file_line.parse::<KeyIndex<K>>() {
Ok(k_i) => k_i,
Err(_) => continue, };
file_key += btree_key;
try!(write_buffer(&mut buf_writer, &file_key.to_string(),
&mut self.line_length));
},
Ordering::Less => {
try!(write_buffer(&mut buf_writer, &file_iter.next().unwrap(),
&mut self.line_length));
},
Ordering::Greater => {
let (_, (mut btree_key, val)) = map_iter.next().unwrap();
btree_key.idx = self.data_idx;
self.data_idx += try!(write_buffer(&mut data_writer, &val.to_string(), &mut 0));
try!(write_buffer(&mut buf_writer, &(btree_key.to_string()),
&mut self.line_length));
},
}
}
}
self.rename_temp_file(false)
}
pub fn insert(&mut self, key: K, value: V) -> Result<(), String> {
let mut key_idx = KeyIndex::new(key);
let hashed = hash(&key_idx);
if let Some(key_val) = self.hashed.get_mut(&hashed) {
*key_val = {
key_idx.count += 1;
(key_idx, value)
};
return Ok(())
}
self.hashed.insert(hashed, (key_idx, value));
if self.hashed.len() > self.capacity { try!(self.flush_map());
}
Ok(())
}
pub fn get(&mut self, key: &K) -> Result<Option<(V, usize)>, String> {
let hashed_key = hash(key);
if self.size == 0 || try!(get_size(&self.data_file)) == 0 {
return Ok(None)
}
let row_length = (self.line_length + 1) as u64;
let mut low = 0;
let mut high = self.size;
while low <= high {
let mid = (low + high) / 2;
let new_line_pos = mid - (mid + row_length) % row_length;
try!(seek_from_start(&mut self.file, new_line_pos));
let line = try!(read_one_line(&mut self.file));
let mut split = line.split(SEP);
let key_str = split.next().unwrap();
let key = try!(key_str.parse::<K>()
.map_err(|_| format!("Cannot parse the key from file!")));
let hashed = hash(&key);
if hashed == hashed_key {
let key_index = try!(line.parse::<KeyIndex<K>>());
try!(seek_from_start(&mut self.data_file, key_index.idx));
let line = try!(read_one_line(&mut self.data_file));
let value = try!(line.parse::<V>()
.map_err(|_| format!("Cannot parse the value from file!")));
return Ok(Some((value, key_index.count)))
} else if hashed < hashed_key {
low = mid + 1;
} else {
high = mid - 1;
}
}
Ok(None)
}
}