TFIDFVectorizer

Struct TFIDFVectorizer 

Source
pub struct TFIDFVectorizer<'a, N = f32, K = String, E = DefaultTFIDFEngine, C = DefaultCompare>
where N: Num + Copy, E: TFIDFEngine<N>, C: Compare<N>,
{ pub documents: Vec<TFVector<N, K>>, pub token_dim_sample: Vec<String>, pub token_dim_set: HashSet<String>, pub corpus_ref: &'a Corpus, pub idf: IDFVector<N>, /* private fields */ }

Fields§

§documents: Vec<TFVector<N, K>>

ドキュメントのTFベクトル

§token_dim_sample: Vec<String>

TFベクトルのトークンの次元サンプル

§token_dim_set: HashSet<String>

高速存在判定用の語彙セット (token_dim_sample と常に同期)

§corpus_ref: &'a Corpus

コーパスの参照

§idf: IDFVector<N>

IDFベクトル

Implementations§

Source§

impl<'a, N, K, E> TFIDFVectorizer<'a, N, K, E>
where K: Clone, N: Num + Copy, E: TFIDFEngine<N>, DefaultCompare: Compare<N>,

Source

pub fn similarity(&self, query: SimilarityQuery) -> Hits<K>

Examples found in repository?
examples/basic.rs (line 17)
3fn main() {
4    let corpus = Corpus::new();
5    let mut freq1 = TokenFrequency::new();
6    freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7    let mut freq2 = TokenFrequency::new();
8    freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10    let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);    
11    vectorizer.add_doc("doc1".to_string(), &freq1);
12    vectorizer.add_doc("doc2".to_string(), &freq2);
13
14    let mut query_tokens = TokenFrequency::new();
15    query_tokens.add_tokens(&["rust", "高速"]);
16    let query = SimilarityQuery::CosineSimilarity(query_tokens);
17    let mut result = vectorizer.similarity(query);
18    result.sort_by_score();
19
20    result.list.iter().for_each(|(k, s)| {
21        println!("doc: {}, score: {}", k, s);
22    });    
23}
Source§

impl<'a, N, K, E, C> TFIDFVectorizer<'a, N, K, E, C>
where N: Num + Copy, E: TFIDFEngine<N>, C: Compare<N>,

Source

pub fn new(corpus_ref: &'a Corpus) -> Self

Create a new TFIDFVectorizer instance

Examples found in repository?
examples/basic.rs (line 10)
3fn main() {
4    let corpus = Corpus::new();
5    let mut freq1 = TokenFrequency::new();
6    freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7    let mut freq2 = TokenFrequency::new();
8    freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10    let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);    
11    vectorizer.add_doc("doc1".to_string(), &freq1);
12    vectorizer.add_doc("doc2".to_string(), &freq2);
13
14    let mut query_tokens = TokenFrequency::new();
15    query_tokens.add_tokens(&["rust", "高速"]);
16    let query = SimilarityQuery::CosineSimilarity(query_tokens);
17    let mut result = vectorizer.similarity(query);
18    result.sort_by_score();
19
20    result.list.iter().for_each(|(k, s)| {
21        println!("doc: {}, score: {}", k, s);
22    });    
23}
Source

pub fn set_corpus_ref(&mut self, corpus_ref: &'a Corpus)

Corpusを指定する

Source

pub fn update_idf(&mut self)

Corpusに変更があればIDFを再計算する

Source§

impl<'a, N, K, E, C> TFIDFVectorizer<'a, N, K, E, C>
where N: Num + Copy, E: TFIDFEngine<N>, C: Compare<N>,

Source

pub fn add_doc(&mut self, doc_id: K, doc: &TokenFrequency)

ドキュメントを追加します 即時参照されているCorpusも更新されます

Examples found in repository?
examples/basic.rs (line 11)
3fn main() {
4    let corpus = Corpus::new();
5    let mut freq1 = TokenFrequency::new();
6    freq1.add_tokens(&["rust", "高速", "並列", "rust"]);
7    let mut freq2 = TokenFrequency::new();
8    freq2.add_tokens(&["rust", "柔軟", "安全", "rust"]);
9
10    let mut vectorizer: TFIDFVectorizer<u16> = TFIDFVectorizer::new(&corpus);    
11    vectorizer.add_doc("doc1".to_string(), &freq1);
12    vectorizer.add_doc("doc2".to_string(), &freq2);
13
14    let mut query_tokens = TokenFrequency::new();
15    query_tokens.add_tokens(&["rust", "高速"]);
16    let query = SimilarityQuery::CosineSimilarity(query_tokens);
17    let mut result = vectorizer.similarity(query);
18    result.sort_by_score();
19
20    result.list.iter().for_each(|(k, s)| {
21        println!("doc: {}, score: {}", k, s);
22    });    
23}

Trait Implementations§

Source§

impl<'a, N, K: Debug, E, C> Debug for TFIDFVectorizer<'a, N, K, E, C>
where N: Num + Copy + Debug, E: TFIDFEngine<N> + Debug, C: Compare<N> + Debug,

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<'a, N, K, E, C> Serialize for TFIDFVectorizer<'a, N, K, E, C>
where N: Num + Copy + Serialize, K: Serialize, E: TFIDFEngine<N>, C: Compare<N>,

Source§

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

TFIDFVectorizerをシリアライズします これは参照を含んでるため、それを除外したものになります。 デシリアライズするにはTFIDFDataを使用してください。

Auto Trait Implementations§

§

impl<'a, N, K, E, C> Freeze for TFIDFVectorizer<'a, N, K, E, C>

§

impl<'a, N = f32, K = String, E = DefaultTFIDFEngine, C = DefaultCompare> !RefUnwindSafe for TFIDFVectorizer<'a, N, K, E, C>

§

impl<'a, N, K, E, C> Send for TFIDFVectorizer<'a, N, K, E, C>
where E: Send, C: Send, K: Send, N: Send,

§

impl<'a, N, K, E, C> Sync for TFIDFVectorizer<'a, N, K, E, C>
where E: Sync, C: Sync, K: Sync, N: Sync,

§

impl<'a, N, K, E, C> Unpin for TFIDFVectorizer<'a, N, K, E, C>
where E: Unpin, C: Unpin, K: Unpin, N: Unpin,

§

impl<'a, N = f32, K = String, E = DefaultTFIDFEngine, C = DefaultCompare> !UnwindSafe for TFIDFVectorizer<'a, N, K, E, C>

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.