use std::collections::HashMap;
pub struct Bow {
map: HashMap<Vec<u8>, usize>,
}
impl Default for Bow {
fn default() -> Self {
Bow::new()
}
}
impl Bow {
pub fn new() -> Self {
Bow {
map: HashMap::new(),
}
}
pub fn add(&mut self, token: &[u8]) {
*self.map.entry(token.to_owned()).or_insert(0) += 1;
}
pub fn freq(&self, token: &[u8]) -> usize {
*self.map.get(token).unwrap_or(&0)
}
pub fn add_all<I>(&mut self, tokens: I)
where
I: IntoIterator,
I::Item: AsRef<[u8]>,
{
for token in tokens {
self.add(token.as_ref());
}
}
pub fn serialize(self) -> Vec<u8> {
let mut ordered_bow: Vec<(Vec<u8>, usize)> = self.map.into_iter().collect();
ordered_bow.sort_by(|a, b| a.0.cmp(&b.0));
ordered_bow
.into_iter()
.map(|(word, count)| format!("{}:{}", String::from_utf8_lossy(&word), count))
.collect::<Vec<_>>()
.join("|")
.into_bytes()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new() {
let bow = Bow::new();
assert_eq!(bow.map.len(), 0);
assert_eq!(bow.freq(b"test"), 0);
}
#[test]
fn test_add_and_freq() {
let mut bow = Bow::new();
bow.add(b"hello");
bow.add(b"hello");
assert_eq!(bow.freq(b"hello"), 2);
assert_eq!(bow.freq(b"Hello"), 0);
}
#[test]
fn test_add_all() {
let mut bow = Bow::new();
let tokens = vec![b"foo", b"foo", b"bar"];
bow.add_all(tokens);
assert_eq!(bow.freq(b"foo"), 2);
assert_eq!(bow.freq(b"bar"), 1);
assert_eq!(bow.freq(b"Bar"), 0);
}
#[test]
fn test_serialize() {
let mut bow1 = Bow::new();
bow1.add(b"apple");
bow1.add(b"banana");
bow1.add(b"apple");
let mut bow2 = Bow::new();
bow2.add(b"banana");
bow2.add(b"apple");
bow2.add(b"apple");
let serialized1 = bow1.serialize();
let serialized2 = bow2.serialize();
assert_eq!(serialized1, serialized2);
}
}