tf_idf_vectorizer/vectorizer/
serde.rs1use std::sync::Arc;
2
3use ahash::RandomState;
4use indexmap::IndexSet;
5use num::Num;
6use serde::{ser::SerializeStruct, Deserialize, Serialize};
7
8use crate::{vectorizer::{tfidf::{DefaultTFIDFEngine, TFIDFEngine}, IDFVector, TFVector}, Corpus, TFIDFVectorizer};
9
10#[derive(Debug, Deserialize, Serialize)]
14pub struct TFIDFData<N = f32, K = String, E = DefaultTFIDFEngine>
15where
16 N: Num + Copy,
17 E: TFIDFEngine<N>,
18{
19 pub documents: Vec<TFVector<N, K>>,
21 pub token_dim_sample: IndexSet<Box<str>, RandomState>,
23 pub idf: IDFVector<N>,
25 #[serde(default, skip_serializing, skip_deserializing)]
26 _marker: std::marker::PhantomData<E>,
27}
28
29impl<N, K, E> TFIDFData<N, K, E>
30where
31 N: Num + Copy,
32 E: TFIDFEngine<N>,
33{
34 pub fn into_tf_idf_vectorizer(self, corpus_ref: Arc<Corpus>) -> TFIDFVectorizer<N, K, E>
37 {
38 let mut instance = TFIDFVectorizer {
39 documents: self.documents,
40 token_dim_sample: self.token_dim_sample.clone(),
41 corpus_ref,
42 idf: self.idf,
43 _marker: std::marker::PhantomData,
44 };
45 instance.update_idf();
46 instance
47 }
48}
49
50impl<N, K, E> Serialize for TFIDFVectorizer<N, K, E>
51where
52 N: Num + Copy + Serialize,
53 K: Serialize,
54 E: TFIDFEngine<N>,
55{
56 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
60 where
61 S: serde::Serializer,
62 {
63 let mut state = serializer.serialize_struct("TFIDFVectorizer", 3)?;
64 state.serialize_field("documents", &self.documents)?;
65 state.serialize_field("token_dim_sample", &self.token_dim_sample)?;
66 state.serialize_field("idf", &self.idf)?;
67 state.end()
68 }
69}