use crate::{compressed_size, TrainItem};
use std::cmp::{max, min};
#[derive(Debug, Clone)]
pub struct CachedTrainItem {
class: String,
item: CachedItem,
}
impl CachedTrainItem {
pub fn class(&self) -> &str {
&self.class
}
}
impl From<TrainItem> for CachedTrainItem {
fn from(value: TrainItem) -> Self {
Self {
class: value.class,
item: value.text.into(),
}
}
}
#[derive(Clone, Debug)]
pub struct CachedItem {
text: String,
compressed_size: usize,
}
impl<'a> From<&'a CachedTrainItem> for &'a CachedItem {
fn from(value: &'a CachedTrainItem) -> Self {
&value.item
}
}
impl<S: Into<String>> From<S> for CachedItem {
fn from(value: S) -> Self {
let value: String = value.into();
Self {
compressed_size: compressed_size(&value),
text: value,
}
}
}
pub fn normalized_compression_distance(x: &CachedItem, y: &CachedItem) -> f64 {
let c_x = x.compressed_size;
let c_y = y.compressed_size;
let c_xy = compressed_size((x.text.clone() + y.text.as_str()).as_str());
(c_xy - min(c_x, c_y)) as f64 / max(c_x, c_y) as f64
}