use bit_vec::BitVec;
use itertools::Itertools;
use crate::base58;
use crate::hashes::{minimum_hash, sliding_window, xxhash32};
use crate::normalization::text_normalize;
const WINDOW_SIZE_CID_T: usize = 13;
const HEAD_CID_T: u8 = 0x10;
const HEAD_CID_T_PCF: u8 = 0x11;
pub fn content_id_text(text: &str, partial: bool) -> String {
let text = text_normalize(text, false);
let n_grams: Vec<String> = sliding_window(&text, WINDOW_SIZE_CID_T)
.iter()
.map(|w| w.chars().intersperse('\u{0020}').collect())
.collect();
let features: Vec<u32> = n_grams.iter().map(|n| xxhash32(n.as_bytes())).collect();
let minhash = minimum_hash(features);
let lsb: BitVec = minhash.iter().map(|x| (x & 1) == 1).collect();
let lsb_bytes = lsb.to_bytes();
let mut content_id_digest = if partial {
vec![HEAD_CID_T_PCF]
} else {
vec![HEAD_CID_T]
};
content_id_digest.extend(&lsb_bytes);
base58::encode(&content_id_digest)
}