#![doc = include_str!("../README.md")]
pub mod unicode_string_helpers;
mod database;
mod key;
pub use key::Key;
mod records;
pub use records::RecordID;
mod table_config;
pub use table_config::{TableConfig, DistanceFunction, DefaultTableConfig, MAX_KEY_LENGTH};
mod key_groups;
mod sym_spell;
mod perf_counters;
mod table;
mod encode_decode;
pub use encode_decode::Coder;
pub use table::Table;
pub use perf_counters::PerfCounterFields;
#[cfg(feature = "bincode")]
pub use encode_decode::bincode_interface::BincodeCoder;
#[cfg(feature = "bincode")]
#[allow(dead_code)]
mod bincode_helpers;
#[cfg(feature = "msgpack")]
pub use encode_decode::msgpack_interface::MsgPackCoder;
#[cfg(feature = "msgpack")]
pub type DefaultCoder = MsgPackCoder;
#[cfg(all(feature = "bincode", not(feature = "msgpack")))]
pub type DefaultCoder = BincodeCoder;
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use std::fs;
use std::path::PathBuf;
use csv::ReaderBuilder;
use serde::{Serialize, Deserialize};
use crate::{*};
use crate::unicode_string_helpers::{*};
#[test]
fn geonames_test() {
let mut geonames_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
geonames_file_path.push("geonames_megacities.txt");
struct Config();
impl TableConfig for Config {
type KeyCharT = char;
type DistanceT = u8;
type ValueT = i32;
type CoderT = DefaultCoder;
}
let mut table = Table::<Config, true>::new("geonames.rocks", Config()).unwrap();
table.reset().unwrap();
#[derive(Clone, Debug, Serialize, Deserialize)]
struct GeoName {
geonameid : i32, name : String, asciiname : String, alternatenames : String, latitude : f32, longitude : f32, feature_class : char, feature_code : String,country_code : String,cc2 : String, admin1_code : String,admin2_code : String, admin3_code : String,admin4_code : String,population : i64, #[serde(deserialize_with = "default_if_empty")]
elevation : i32, #[serde(deserialize_with = "default_if_empty")]
dem : i32, timezone : String, modification_date : String, }
fn default_if_empty<'de, D, T>(de: D) -> Result<T, D::Error>
where D: serde::Deserializer<'de>, T: serde::Deserialize<'de> + Default,
{
Option::<T>::deserialize(de).map(|x| x.unwrap_or_else(|| T::default()))
}
let tsv_file_contents = fs::read_to_string(geonames_file_path).expect("Error reading geonames file");
let mut tsv_parser = ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(false)
.flexible(true) .quote(0)
.double_quote(false)
.from_reader(tsv_file_contents.as_bytes());
let mut record_id = RecordID::NULL;
let mut tsv_record_count = 0;
for geoname in tsv_parser.deserialize::<GeoName>().map(|result| result.unwrap()) {
let mut names : HashSet<String> = HashSet::from_iter(geoname.alternatenames.split(',').map(|string| string.to_lowercase()));
names.insert(geoname.name.to_lowercase());
let names_vec : Vec<String> = names.into_iter()
.map(|string| unicode_truncate(string.as_str(), MAX_KEY_LENGTH))
.collect();
record_id = table.create(&names_vec[..], &geoname.geonameid).unwrap();
tsv_record_count += 1;
if record_id.0 % 500 == 499 {
println!("inserting... {}, {}", geoname.name.to_lowercase(), record_id.0);
}
}
assert_eq!(record_id.0 + 1, tsv_record_count);
let london_results : Vec<i32> = table.lookup_exact("london").unwrap().map(|record_id| table.get_value(record_id).unwrap()).collect();
assert!(london_results.contains(&2643743)); let rio_results : Vec<i32> = table.lookup_exact("rio de janeiro").unwrap().map(|record_id| table.get_value(record_id).unwrap()).collect();
assert!(rio_results.contains(&3451190)); drop(table);
drop(london_results);
let table = Table::<Config, true>::new("geonames.rocks", Config()).unwrap();
let london_results : Vec<i32> = table.lookup_exact("london").unwrap().map(|record_id| table.get_value(record_id).unwrap()).collect();
assert!(london_results.contains(&2643743)); }
#[test]
fn fuzzy_rocks_test() {
struct Config();
impl TableConfig for Config {
type KeyCharT = char;
type DistanceT = u8;
type ValueT = String;
type CoderT = DefaultCoder;
const MEANINGFUL_KEY_LEN : usize = 8;
}
let mut table = Table::<Config, true>::new("basic_test.rocks", Config()).unwrap();
table.reset().unwrap();
let sun = table.insert("Sunday", &"Nichiyoubi".to_string()).unwrap();
let sat = table.insert("Saturday", &"Douyoubi".to_string()).unwrap();
let fri = table.insert("Friday", &"Kinyoubi".to_string()).unwrap();
let thu = table.insert("Thursday", &"Mokuyoubi".to_string()).unwrap();
let wed = table.insert("Wednesday", &"Suiyoubi".to_string()).unwrap();
let tue = table.insert("Tuesday", &"Kayoubi".to_string()).unwrap();
let mon = table.insert("Monday", &"Getsuyoubi".to_string()).unwrap();
let results : Vec<(String, String)> = table.lookup_exact("Friday").unwrap().map(|record_id| table.get(record_id).unwrap()).collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, "Friday");
assert_eq!(results[0].1, "Kinyoubi");
let results : Vec<RecordID> = table.lookup_exact("friday").unwrap().collect();
assert_eq!(results.len(), 0);
let results : Vec<RecordID> = table.lookup_best("Bonday").unwrap().collect();
assert_eq!(results.len(), 1);
assert!(results.contains(&mon));
let results : Vec<RecordID> = table.lookup_best("Rahu").unwrap().collect();
assert_eq!(results.len(), 0);
let results : Vec<(String, String, u8)> = table.lookup_fuzzy("Saturday", Some(2))
.unwrap().map(|(record_id, distance)| {
let (key, val) = table.get(record_id).unwrap();
(key, val, distance)
}).collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, "Saturday");
assert_eq!(results[0].1, "Douyoubi");
assert_eq!(results[0].2, 0);
let results : Vec<(String, String, u8)> = table.lookup_fuzzy("Tuesday", Some(2))
.unwrap().map(|(record_id, distance)| {
let (key, val) = table.get(record_id).unwrap();
(key, val, distance)
}).collect();
assert_eq!(results.len(), 2);
assert!(results.contains(&("Tuesday".to_string(), "Kayoubi".to_string(), 0)));
assert!(results.contains(&("Thursday".to_string(), "Mokuyoubi".to_string(), 2)));
let results : Vec<(RecordID, u8)> = table.lookup_fuzzy("Rahu", Some(2)).unwrap().collect();
assert_eq!(results.len(), 0);
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Sunday. That's my fun day.").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], sun);
table.delete(tue).unwrap();
assert!(table.get_one_key(tue).is_err());
let results : Vec<RecordID> = table.lookup_best("Tuesday").unwrap().collect();
assert_eq!(results.len(), 1);
assert!(results.contains(&thu));
table.delete(sat).unwrap();
assert!(table.get_one_key(sat).is_err());
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Saturday").unwrap().collect();
assert_eq!(results.len(), 0);
table.replace_keys(wed, &["Miercoles"]).unwrap();
table.replace_value(wed, &"Zhousan".to_string()).unwrap();
let results : Vec<(String, String)> = table.lookup_exact("Miercoles").unwrap().map(|record_id| (table.get_one_key(record_id).unwrap(), table.get_value(record_id).unwrap())).collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, "Miercoles");
assert_eq!(results[0].1, "Zhousan");
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Mercoledi").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], wed);
assert!(table.replace_keys(sat, &["Sabado"]).is_err());
assert!(table.replace_value(sat, &"Zhouliu".to_string()).is_err());
let empty_slice : &[&str] = &[];
assert!(table.replace_keys(sat, empty_slice).is_err());
assert!(table.replace_keys(RecordID::NULL, &["Nullday"]).is_err());
assert!(table.replace_value(RecordID::NULL, &"Null".to_string()).is_err());
let empty_slice : &[&str] = &[];
assert!(table.create(empty_slice, &"Douyoubi".to_string()).is_err());
let sat = table.create(&["Saturday", "Saturday"], &"Douyoubi".to_string()).unwrap();
table.add_keys(sat, &["Saturday", "Saturday"]).unwrap();
assert_eq!(table.keys_count(sat).unwrap(), 1);
table.add_keys(sat, &["Sabado", "Zhouliu"]).unwrap();
assert_eq!(table.keys_count(sat).unwrap(), 3);
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Saturday").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], sat);
let results : Vec<RecordID> = table.lookup_exact("Zhouliu").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], sat);
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Sabato").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], sat);
table.remove_keys(sat, &["Sabado"]).unwrap();
assert_eq!(table.keys_count(sat).unwrap(), 2);
let results : Vec<RecordID> = table.lookup_exact("Sabado").unwrap().collect();
assert_eq!(results.len(), 0);
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Sabato").unwrap().collect();
assert_eq!(results.len(), 0);
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Saturnsday").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], sat);
let results : Vec<RecordID> = table.lookup_exact("Zhouliu").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], sat);
assert!(table.remove_keys(sat, &["Saturday", "Zhouliu"]).is_err());
assert_eq!(table.keys_count(sat).unwrap(), 2);
let results : Vec<RecordID> = table.lookup_exact("Saturday").unwrap().collect();
assert_eq!(results.len(), 1);
let results : Vec<RecordID> = table.lookup_exact("Zhouliu").unwrap().collect();
assert_eq!(results.len(), 1);
table.replace_keys(sat, &["Sabado"]).unwrap();
assert_eq!(table.keys_count(sat).unwrap(), 1);
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Saturday").unwrap().collect();
assert_eq!(results.len(), 0);
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Zhouliu").unwrap().collect();
assert_eq!(results.len(), 0);
table.add_keys(sat, &["Sabado"]).unwrap();
assert_eq!(table.keys_count(sat).unwrap(), 1);
table.add_keys(sat, &["Saturday", "Saturday"]).unwrap();
assert_eq!(table.keys_count(sat).unwrap(), 2);
table.add_keys(fri, &["Geumyoil", "Viernes", "Venerdi", "Vendredi"]).unwrap();
assert_eq!(table.keys_count(fri).unwrap(), 5);
table.remove_keys(fri, &["Vendredi"]).unwrap();
assert_eq!(table.keys_count(fri).unwrap(), 4);
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Vendredi").unwrap().collect();
assert_eq!(results.len(), 1); table.remove_keys(fri, &["Vendredi"]).unwrap();
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Vendredi").unwrap().collect();
assert_eq!(results.len(), 1); assert_eq!(table.keys_count(fri).unwrap(), 4);
table.remove_keys(fri, &["Venerdi"]).unwrap();
let results : Vec<RecordID> = table.lookup_fuzzy_raw("Vendredi").unwrap().collect();
assert_eq!(results.len(), 0);
assert_eq!(table.keys_count(fri).unwrap(), 3);
let sun_japanese = table.insert("日曜日", &"Sunday".to_string()).unwrap();
let key_array = ['日', '曜', '日'];
let results : Vec<RecordID> = table.lookup_exact(&key_array).unwrap().collect();
assert_eq!(results.len(), 1);
assert!(results.contains(&sun_japanese));
let results : Vec<RecordID> = table.lookup_fuzzy_raw(&key_array).unwrap().collect();
assert_eq!(results.len(), 1);
assert!(results.contains(&sun_japanese));
let key_array = ['土', '曜', '日'];
let sat_japanese = table.insert(&key_array, &"Saturday".to_string()).unwrap();
let results : Vec<RecordID> = table.lookup_exact("土曜日").unwrap().collect();
assert_eq!(results.len(), 1);
assert!(results.contains(&sat_japanese));
let results : Vec<RecordID> = table.lookup_fuzzy_raw("土曜日").unwrap().collect();
assert_eq!(results.len(), 2);
assert!(results.contains(&sat_japanese));
assert!(results.contains(&sun_japanese));
}
#[test]
fn non_unicode_key_test() {
struct Config();
impl TableConfig for Config {
type KeyCharT = u8;
type DistanceT = u8;
type ValueT = f32;
type CoderT = DefaultCoder;
const MAX_DELETES : usize = 1;
const MEANINGFUL_KEY_LEN : usize = 8;
const UTF8_KEYS : bool = false;
}
let mut table = Table::<Config, false>::new("non_unicode_test.rocks", Config()).unwrap();
table.reset().unwrap();
let one = table.insert(b"One", &1.0).unwrap();
let _two = table.insert(b"Dos", &2.0).unwrap();
let _three = table.insert(b"San", &3.0).unwrap();
let pi = table.insert(b"Pi", &3.1415926535).unwrap();
let results : Vec<RecordID> = table.lookup_best(b"P").unwrap().collect();
assert_eq!(results.len(), 1);
assert!(results.contains(&pi));
let results : Vec<RecordID> = table.lookup_fuzzy_raw(b"ne").unwrap().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0], one);
}
#[test]
fn perf_counters_test() {
struct Config();
impl TableConfig for Config {
type KeyCharT = char;
type DistanceT = u8;
type ValueT = i32;
type CoderT = DefaultCoder;
}
let table = Table::<Config, true>::new("all_cities.geonames.rocks", Config()).unwrap();
let iter = table.lookup_fuzzy_raw("").unwrap();
assert_eq!(iter.count(), 0);
#[cfg(feature = "perf_counters")]
{
table.reset_perf_counters();
let _iter = table.lookup_exact("london").unwrap();
assert_eq!(table.get_perf_counters().key_group_load_count, 0);
table.reset_perf_counters();
let iter = table.lookup_fuzzy("london", None).unwrap();
let _ = iter.count();
assert!(table.get_perf_counters().variant_lookup_count > 0);
assert!(table.get_perf_counters().variant_load_count > 0);
assert!(table.get_perf_counters().key_group_ref_count > 0);
assert!(table.get_perf_counters().max_variant_entry_refs > 0);
assert!(table.get_perf_counters().key_group_load_count > 0);
assert!(table.get_perf_counters().keys_found_count > 0);
assert!(table.get_perf_counters().distance_function_invocation_count > 0);
assert!(table.get_perf_counters().records_found_count > 0);
println!("-=-=-=-=-=-=-=-=- lookup_fuzzy london test -=-=-=-=-=-=-=-=-");
println!("variant_lookup_count {}", table.get_perf_counters().variant_lookup_count);
println!("variant_load_count {}", table.get_perf_counters().variant_load_count);
println!("key_group_ref_count {}", table.get_perf_counters().key_group_ref_count);
println!("max_variant_entry_refs {}", table.get_perf_counters().max_variant_entry_refs);
println!("key_group_load_count {}", table.get_perf_counters().key_group_load_count);
println!("keys_found_count {}", table.get_perf_counters().keys_found_count);
println!("distance_function_invocation_count {}", table.get_perf_counters().distance_function_invocation_count);
println!("records_found_count {}", table.get_perf_counters().records_found_count);
table.reset_perf_counters();
let iter = table.lookup_exact("london").unwrap();
let _ = iter.count();
println!("-=-=-=-=-=-=-=-=- lookup_exact london test -=-=-=-=-=-=-=-=-");
println!("variant_lookup_count {}", table.get_perf_counters().variant_lookup_count);
println!("variant_load_count {}", table.get_perf_counters().variant_load_count);
println!("key_group_ref_count {}", table.get_perf_counters().key_group_ref_count);
println!("max_variant_entry_refs {}", table.get_perf_counters().max_variant_entry_refs);
println!("key_group_load_count {}", table.get_perf_counters().key_group_load_count);
println!("keys_found_count {}", table.get_perf_counters().keys_found_count);
println!("distance_function_invocation_count {}", table.get_perf_counters().distance_function_invocation_count);
println!("records_found_count {}", table.get_perf_counters().records_found_count);
}
#[cfg(not(feature = "perf_counters"))]
{
println!("perf_counters feature not enabled");
}
}
}