pub struct Set { /* private fields */ }
Expand description
Fast and compact indexed string set using front coding.
This implements an indexed set of strings in a compressed format based on front coding.
n
strings in the set are indexed with integers from [0..n-1]
and assigned in the lexicographical order.
§Supported queries
Locate
gets the index of a string key.Decode
gets the string with an index.Predict
enumerates the strings starting from a prefix.
§Limitations
Input keys must not contain \0
character because the character is used for the terminator.
§Example
use fcsd::Set;
// Input string keys should be sorted and unique.
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
// Builds an indexed set.
let set = Set::new(keys).unwrap();
assert_eq!(set.len(), keys.len());
// Gets indexes associated with given keys.
let mut locator = set.locator();
assert_eq!(locator.run(b"ICML"), Some(1));
assert_eq!(locator.run(b"SIGMOD"), Some(4));
assert_eq!(locator.run(b"SIGSPATIAL"), None);
// Decodes string keys from given indexes.
let mut decoder = set.decoder();
assert_eq!(decoder.run(0), b"ICDM".to_vec());
assert_eq!(decoder.run(3), b"SIGKDD".to_vec());
// Enumerates indexes and keys stored in the set.
let mut iter = set.iter();
assert_eq!(iter.next(), Some((0, b"ICDM".to_vec())));
assert_eq!(iter.next(), Some((1, b"ICML".to_vec())));
assert_eq!(iter.next(), Some((2, b"SIGIR".to_vec())));
assert_eq!(iter.next(), Some((3, b"SIGKDD".to_vec())));
assert_eq!(iter.next(), Some((4, b"SIGMOD".to_vec())));
assert_eq!(iter.next(), None);
// Enumerates indexes and keys starting with a prefix.
let mut iter = set.predictive_iter(b"SIG");
assert_eq!(iter.next(), Some((2, b"SIGIR".to_vec())));
assert_eq!(iter.next(), Some((3, b"SIGKDD".to_vec())));
assert_eq!(iter.next(), Some((4, b"SIGMOD".to_vec())));
assert_eq!(iter.next(), None);
// Serialization / Deserialization
let mut data = Vec::<u8>::new();
set.serialize_into(&mut data).unwrap();
assert_eq!(data.len(), set.size_in_bytes());
let other = Set::deserialize_from(&data[..]).unwrap();
assert_eq!(data.len(), other.size_in_bytes());
Implementations§
Source§impl Set
impl Set
Sourcepub fn new<I, P>(keys: I) -> Result<Self>
pub fn new<I, P>(keys: I) -> Result<Self>
Builds a new Set
from string keys.
§Arguments
keys
: string keys that are unique and sorted.
§Notes
It will set the bucket size to DEFAULT_BUCKET_SIZE
.
If you want to optionally set the parameter, use Set::with_bucket_size
instead.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::new(keys).unwrap();
assert_eq!(set.len(), keys.len());
Sourcepub fn with_bucket_size<I, P>(keys: I, bucket_size: usize) -> Result<Self>
pub fn with_bucket_size<I, P>(keys: I, bucket_size: usize) -> Result<Self>
Builds a new Set
from string keys with a specified bucket size.
§Arguments
keys
: string keys that are unique and sorted.bucket_size
: The number of strings in each bucket, which must be a power of two.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::with_bucket_size(keys, 4).unwrap();
assert_eq!(set.len(), keys.len());
Sourcepub fn size_in_bytes(&self) -> usize
pub fn size_in_bytes(&self) -> usize
Returns the number of bytes needed to write the dictionary.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::new(keys).unwrap();
assert_eq!(set.size_in_bytes(), 110);
Sourcepub fn serialize_into<W>(&self, writer: W) -> Result<()>where
W: Write,
pub fn serialize_into<W>(&self, writer: W) -> Result<()>where
W: Write,
Sourcepub fn deserialize_from<R>(reader: R) -> Result<Self>where
R: Read,
pub fn deserialize_from<R>(reader: R) -> Result<Self>where
R: Read,
Deserializes the dictionary from a reader.
§Arguments
reader
: Readable stream.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::new(keys).unwrap();
let mut data = Vec::<u8>::new();
set.serialize_into(&mut data).unwrap();
let other = Set::deserialize_from(&data[..]).unwrap();
assert_eq!(set.size_in_bytes(), other.size_in_bytes());
Sourcepub fn locator(&self) -> Locator<'_>
pub fn locator(&self) -> Locator<'_>
Makes a class to get ids of given string keys.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::new(keys).unwrap();
let mut locator = set.locator();
assert_eq!(locator.run(b"ICML"), Some(1));
assert_eq!(locator.run(b"SIGMOD"), Some(4));
assert_eq!(locator.run(b"SIGSPATIAL"), None);
Sourcepub fn decoder(&self) -> Decoder<'_>
pub fn decoder(&self) -> Decoder<'_>
Makes a class to decode stored keys associated with given ids.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::new(keys).unwrap();
let mut decoder = set.decoder();
assert_eq!(decoder.run(0), b"ICDM".to_vec());
assert_eq!(decoder.run(3), b"SIGKDD".to_vec());
Sourcepub fn iter(&self) -> Iter<'_> ⓘ
pub fn iter(&self) -> Iter<'_> ⓘ
Makes an iterator to enumerate keys stored in the dictionary.
The keys will be reported in the lexicographical order.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR"];
let set = Set::new(keys).unwrap();
let mut iter = set.iter();
assert_eq!(iter.next(), Some((0, b"ICDM".to_vec())));
assert_eq!(iter.next(), Some((1, b"ICML".to_vec())));
assert_eq!(iter.next(), Some((2, b"SIGIR".to_vec())));
assert_eq!(iter.next(), None);
Sourcepub fn predictive_iter<P>(&self, prefix: P) -> PredictiveIter<'_> ⓘ
pub fn predictive_iter<P>(&self, prefix: P) -> PredictiveIter<'_> ⓘ
Makes a predictive iterator to enumerate keys starting from a given string.
The keys will be reported in the lexicographical order.
§Arguments
prefix
: Prefix of keys to be predicted.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::new(keys).unwrap();
let mut iter = set.predictive_iter(b"SIG");
assert_eq!(iter.next(), Some((2, b"SIGIR".to_vec())));
assert_eq!(iter.next(), Some((3, b"SIGKDD".to_vec())));
assert_eq!(iter.next(), Some((4, b"SIGMOD".to_vec())));
assert_eq!(iter.next(), None);
Sourcepub const fn len(&self) -> usize
pub const fn len(&self) -> usize
Gets the number of stored keys.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::new(keys).unwrap();
assert_eq!(set.len(), keys.len());
Sourcepub const fn num_buckets(&self) -> usize
pub const fn num_buckets(&self) -> usize
Gets the number of defined buckets.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::with_bucket_size(keys, 4).unwrap();
assert_eq!(set.num_buckets(), 2);
Sourcepub const fn bucket_size(&self) -> usize
pub const fn bucket_size(&self) -> usize
Gets the bucket size.
§Example
use fcsd::Set;
let keys = ["ICDM", "ICML", "SIGIR", "SIGKDD", "SIGMOD"];
let set = Set::with_bucket_size(keys, 4).unwrap();
assert_eq!(set.bucket_size(), 4);