pub struct EnhancedTfidfVectorizer { /* private fields */ }Expand description
Enhanced TF-IDF vectorizer with IDF smoothing options
Implementations§
Source§impl EnhancedTfidfVectorizer
impl EnhancedTfidfVectorizer
Sourcepub fn new() -> Self
pub fn new() -> Self
Create a new enhanced TF-IDF vectorizer
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 83)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Sourcepub fn set_use_idf(self, useidf: bool) -> Self
pub fn set_use_idf(self, useidf: bool) -> Self
Set whether to use IDF weighting
Sourcepub fn set_smooth_idf(self, smoothidf: bool) -> Self
pub fn set_smooth_idf(self, smoothidf: bool) -> Self
Set whether to smooth IDF weights
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 86)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Sourcepub fn set_sublinear_tf(self, sublineartf: bool) -> Self
pub fn set_sublinear_tf(self, sublineartf: bool) -> Self
Set whether to use sublinear TF scaling
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 87)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Sourcepub fn set_norm(self, norm: Option<String>) -> Result<Self>
pub fn set_norm(self, norm: Option<String>) -> Result<Self>
Set the normalization method (None, “l1”, or “l2”)
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 88)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Sourcepub fn set_ngram_range(self, range: (usize, usize)) -> Result<Self>
pub fn set_ngram_range(self, range: (usize, usize)) -> Result<Self>
Set n-gram range
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 84)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Sourcepub fn set_max_features(self, maxfeatures: Option<usize>) -> Self
pub fn set_max_features(self, maxfeatures: Option<usize>) -> Self
Set maximum features
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 85)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Sourcepub fn vocabulary(&self) -> &Vocabulary
pub fn vocabulary(&self) -> &Vocabulary
Get the vocabulary
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 102)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Sourcepub fn fit(&mut self, texts: &[&str]) -> Result<()>
pub fn fit(&mut self, texts: &[&str]) -> Result<()>
Fit the vectorizer on a corpus
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 90)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Sourcepub fn transform_batch(&self, texts: &[&str]) -> Result<Array2<f64>>
pub fn transform_batch(&self, texts: &[&str]) -> Result<Array2<f64>>
Transform multiple texts into a TF-IDF matrix
Examples found in repository?
examples/enhanced_vectorization_demo.rs (line 91)
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("=== Enhanced Text Vectorization Demo ===\n");
15
16 let documents = vec![
17 "The quick brown fox jumps over the lazy dog.",
18 "A fast red fox leaped over the sleeping canine.",
19 "Machine learning algorithms process textual data efficiently.",
20 "Text processing and natural language understanding are important.",
21 "Natural language processing is a field of artificial intelligence.",
22 "Deep learning models can understand complex text patterns.",
23 ];
24
25 // 1. Enhanced Count Vectorizer with Unigrams
26 println!("1. Enhanced Count Vectorizer (Unigrams only)");
27 let mut count_vec_unigram = EnhancedCountVectorizer::new()
28 .set_binary(false)
29 .set_max_features(Some(20));
30
31 count_vec_unigram.fit(&documents)?;
32 let count_matrix = count_vec_unigram.transform_batch(&documents)?;
33
34 println!("Vocabulary size: {}", count_vec_unigram.vocabulary().len());
35 println!("Count matrix shape: {:?}", count_matrix.shape());
36 println!();
37
38 // 2. Enhanced Count Vectorizer with N-grams
39 println!("2. Enhanced Count Vectorizer (Unigrams + Bigrams)");
40 let mut count_vec_ngram = EnhancedCountVectorizer::new()
41 .set_ngram_range((1, 2))?
42 .set_max_features(Some(30));
43
44 count_vec_ngram.fit(&documents)?;
45 let ngram_matrix = count_vec_ngram.transform_batch(&documents)?;
46
47 println!(
48 "Vocabulary size with n-grams: {}",
49 count_vec_ngram.vocabulary().len()
50 );
51 println!("N-gram count matrix shape: {:?}", ngram_matrix.shape());
52
53 // Show some n-gram tokens
54 let vocab = count_vec_ngram.vocabulary();
55 let mut ngram_tokens: Vec<String> = Vec::new();
56 for i in 0..vocab.len().min(10) {
57 if let Some(token) = vocab.get_token(i) {
58 if token.contains(' ') {
59 // This is a bigram
60 ngram_tokens.push(token.to_string());
61 }
62 }
63 }
64 println!("Sample bigrams: {ngram_tokens:?}");
65 println!();
66
67 // 3. Enhanced Count Vectorizer with Document Frequency Filtering
68 println!("3. Count Vectorizer with Document Frequency Filtering");
69 let mut count_vec_filtered = EnhancedCountVectorizer::new()
70 .set_min_df(0.3)? // Token must appear in at least 30% of documents
71 .set_max_df(0.8)?; // Token must appear in at most 80% of documents
72
73 count_vec_filtered.fit(&documents)?;
74
75 println!(
76 "Vocabulary size after DF filtering: {}",
77 count_vec_filtered.vocabulary().len()
78 );
79 println!();
80
81 // 4. Enhanced TF-IDF Vectorizer with N-grams
82 println!("4. Enhanced TF-IDF Vectorizer with N-grams");
83 let mut tfidf_vec = EnhancedTfidfVectorizer::new()
84 .set_ngram_range((1, 2))?
85 .set_max_features(Some(50))
86 .set_smooth_idf(true)
87 .set_sublinear_tf(true)
88 .set_norm(Some("l2".to_string()))?;
89
90 tfidf_vec.fit(&documents)?;
91 let tfidf_matrix = tfidf_vec.transform_batch(&documents)?;
92
93 println!("TF-IDF matrix shape: {:?}", tfidf_matrix.shape());
94 println!("TF-IDF with smoothing and sublinear TF applied");
95
96 // Show TF-IDF values for first document
97 let first_doc_tfidf = tfidf_matrix.row(0);
98 let mut top_features: Vec<(String, f64)> = Vec::new();
99
100 for (idx, &value) in first_doc_tfidf.iter().enumerate() {
101 if value > 0.0 {
102 if let Some(token) = tfidf_vec.vocabulary().get_token(idx) {
103 top_features.push((token.to_string(), value));
104 }
105 }
106 }
107
108 top_features.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
109 println!("\nTop TF-IDF features for first document:");
110 for (token, score) in top_features.iter().take(5) {
111 println!(" {token}: {score:.3}");
112 }
113 println!();
114
115 // 5. Processing with Text Cleaning
116 println!("5. Vectorization with Text Preprocessing");
117 let cleaner = BasicTextCleaner::new(true, true, true);
118
119 // Clean documents first
120 let cleaned_docs: Result<Vec<_>, _> = documents.iter().map(|doc| cleaner.clean(doc)).collect();
121 let cleaned_docs = cleaned_docs?;
122 let cleaned_refs: Vec<&str> = cleaned_docs.iter().map(|s| s.as_str()).collect();
123
124 let mut tfidf_cleaned = EnhancedTfidfVectorizer::new()
125 .set_ngram_range((1, 2))?
126 .set_max_features(Some(30));
127
128 tfidf_cleaned.fit(&cleaned_refs)?;
129 let cleaned_matrix = tfidf_cleaned.transform_batch(&cleaned_refs)?;
130
131 println!("TF-IDF shape after cleaning: {:?}", cleaned_matrix.shape());
132 println!("Processing pipeline: Clean -> Tokenize -> Vectorize");
133
134 Ok(())
135}Trait Implementations§
Auto Trait Implementations§
impl Freeze for EnhancedTfidfVectorizer
impl RefUnwindSafe for EnhancedTfidfVectorizer
impl Send for EnhancedTfidfVectorizer
impl Sync for EnhancedTfidfVectorizer
impl Unpin for EnhancedTfidfVectorizer
impl UnwindSafe for EnhancedTfidfVectorizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.