pub struct TextDataset {
pub texts: Vec<String>,
pub labels: Vec<String>,
/* private fields */
}Expand description
Text classification dataset
Fields§
§texts: Vec<String>The text samples
labels: Vec<String>The labels for each text
Implementations§
Source§impl TextDataset
impl TextDataset
Sourcepub fn new(texts: Vec<String>, labels: Vec<String>) -> Result<Self>
pub fn new(texts: Vec<String>, labels: Vec<String>) -> Result<Self>
Create a new text dataset
Examples found in repository?
examples/ml_sentiment_demo.rs (line 230)
161fn create_sentiment_dataset() -> Result<(TextDataset, TextDataset), Box<dyn std::error::Error>> {
162 // Training data
163 let traintexts = vec![
164 "I absolutely loved this movie! The acting was superb.",
165 "Terrible experience, would not recommend to anyone.",
166 "The product was okay, nothing special but it works.",
167 "Great customer service and fast delivery.",
168 "Disappointing quality for the price paid.",
169 "This is the best purchase I've made all year!",
170 "Waste of money, doesn't work as advertised.",
171 "Mixed feelings about this. Some parts good, others bad.",
172 "Pleasantly surprised by how well this performs.",
173 "Not worth the price. Broke after two weeks.",
174 "Amazing value for the price. Highly recommended!",
175 "Mediocre at best. Wouldn't buy again.",
176 "Fantastic product that exceeds expectations.",
177 "Poor construction quality, arrived damaged.",
178 "It's decent but there are better options available.",
179 "This changed my life! Can't imagine living without it.",
180 "Regret buying this. Customer service was unhelpful.",
181 "Satisfied with my purchase, does what it claims.",
182 "Best in its class. Outstanding performance.",
183 "Very disappointed, doesn't match the description.",
184 "Just average, nothing to write home about.",
185 "Exceeded my expectations in every way.",
186 "One of the worst products I've ever bought.",
187 "Good enough for the price, but has limitations.",
188 "Incredible value! Works perfectly for my needs.",
189 "Would not purchase again. Many flaws.",
190 "Does the job fine, but nothing spectacular.",
191 "Absolutely worthless. Don't waste your money.",
192 "A solid choice. Reliable and well-designed.",
193 "Not impressed at all. Many issues from day one.",
194 ];
195
196 let train_labels = vec![
197 "positive", "negative", "neutral", "positive", "negative", "positive", "negative",
198 "neutral", "positive", "negative", "positive", "negative", "positive", "negative",
199 "neutral", "positive", "negative", "neutral", "positive", "negative", "neutral",
200 "positive", "negative", "neutral", "positive", "negative", "neutral", "negative",
201 "positive", "negative",
202 ];
203
204 // Test data (different examples)
205 let testtexts = [
206 "Loved every minute of it. Highly recommended!",
207 "Terrible product. Complete waste of money.",
208 "It's okay, nothing special but gets the job done.",
209 "Outstanding quality and service. Will buy again!",
210 "Very poor experience. Many issues encountered.",
211 "Adequate for basic needs, but lacks advanced features.",
212 "Couldn't be happier with this purchase.",
213 "Avoid at all costs. Terrible quality.",
214 "Average performance. Neither good nor bad.",
215 "Top-notch quality and design. Very impressed!",
216 ];
217
218 let test_labels = [
219 "positive", "negative", "neutral", "positive", "negative", "neutral", "positive",
220 "negative", "neutral", "positive",
221 ];
222
223 // Convert to strings
224 let traintexts = traintexts.iter().map(|t| t.to_string()).collect();
225 let train_labels = train_labels.iter().map(|l| l.to_string()).collect();
226 let testtexts = testtexts.iter().map(|t| t.to_string()).collect();
227 let test_labels = test_labels.iter().map(|l| l.to_string()).collect();
228
229 // Create datasets
230 let train_dataset = TextDataset::new(traintexts, train_labels)?;
231 let test_dataset = TextDataset::new(testtexts, test_labels)?;
232
233 Ok((train_dataset, test_dataset))
234}More examples
examples/text_classification_demo.rs (line 32)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Text Classification Demo");
10 println!("=======================\n");
11
12 // Create sample dataset
13 let texts = vec![
14 "This movie is absolutely fantastic and amazing!".to_string(),
15 "I really hated this film, it was terrible.".to_string(),
16 "The acting was superb and the plot was engaging.".to_string(),
17 "Worst movie I've ever seen, complete waste of time.".to_string(),
18 "A masterpiece of cinema, truly exceptional work.".to_string(),
19 "Boring, predictable, and poorly executed.".to_string(),
20 ];
21
22 let labels = vec![
23 "positive".to_string(),
24 "negative".to_string(),
25 "positive".to_string(),
26 "negative".to_string(),
27 "positive".to_string(),
28 "negative".to_string(),
29 ];
30
31 // Create dataset
32 let dataset = TextDataset::new(texts, labels)?;
33 println!("Dataset Statistics:");
34 println!(" Total samples: {}", dataset.len());
35 println!(" Number of classes: {}", dataset.unique_labels().len());
36 println!();
37
38 // Split into train and test
39 let (train_dataset, test_dataset) = dataset.train_test_split(0.33, Some(42))?;
40 println!("Train/Test Split:");
41 println!(" Training samples: {}", train_dataset.len());
42 println!(" Test samples: {}", test_dataset.len());
43 println!();
44
45 // Create text processing pipeline
46 let mut pipeline = TextClassificationPipeline::with_tfidf();
47
48 // Fit the pipeline
49 pipeline.fit(&train_dataset)?;
50
51 // Transform to features
52 let train_features = pipeline.transform(&train_dataset)?;
53 let test_features = pipeline.transform(&test_dataset)?;
54
55 println!("Feature Extraction:");
56 println!(
57 " Train feature shape: ({}, {})",
58 train_features.nrows(),
59 train_features.ncols()
60 );
61 println!(
62 " Test feature shape: ({}, {})",
63 test_features.nrows(),
64 test_features.ncols()
65 );
66 println!();
67
68 // Demonstrate feature selection
69 let mut feature_selector = TextFeatureSelector::new()
70 .set_max_features(10.0)?
71 .set_min_df(0.1)?
72 .set_max_df(0.9)?;
73
74 let selected_train_features = feature_selector.fit_transform(&train_features)?;
75 println!("Feature Selection:");
76 println!(" Selected features: {}", selected_train_features.ncols());
77 println!();
78
79 // Simulate classification results (in a real scenario, you'd use a classifier)
80 // For demo purposes, we'll create mock predictions based on simple heuristics
81 let _unique_labels = train_dataset.unique_labels();
82
83 // Create binary labels (0 for negative, 1 for positive) for this demo
84 let mut train_labels = Vec::new();
85 let mut test_labels = Vec::new();
86
87 for label in &train_dataset.labels {
88 train_labels.push(if label == "positive" { 1 } else { 0 });
89 }
90
91 for label in &test_dataset.labels {
92 test_labels.push(if label == "positive" { 1 } else { 0 });
93 }
94
95 // Mock predictions (in practice, use a real classifier)
96 let predictions = test_labels.clone(); // Perfect predictions for demo
97
98 // Calculate metrics
99 let metrics = TextClassificationMetrics::new();
100 let accuracy = metrics.accuracy(&predictions, &test_labels)?;
101 let (precision, recall, f1) = metrics.binary_metrics(&predictions, &test_labels)?;
102
103 println!("Classification Metrics:");
104 println!(" Accuracy: {:.2}%", accuracy * 100.0);
105 println!(" Precision: {:.2}%", precision * 100.0);
106 println!(" Recall: {:.2}%", recall * 100.0);
107 println!(" F1 Score: {:.2}%", f1 * 100.0);
108 println!();
109
110 // Create a simple confusion matrix manually since the method isn't available
111 let mut true_positive = 0;
112 let mut true_negative = 0;
113 let mut false_positive = 0;
114 let mut false_negative = 0;
115
116 for (pred, actual) in predictions.iter().zip(test_labels.iter()) {
117 match (pred, actual) {
118 (1, 1) => true_positive += 1,
119 (0, 0) => true_negative += 1,
120 (1, 0) => false_positive += 1,
121 (0, 1) => false_negative += 1,
122 _ => {}
123 }
124 }
125
126 println!("Confusion Matrix:");
127 println!("[ {true_negative} {false_positive} ]");
128 println!("[ {false_negative} {true_positive} ]");
129
130 Ok(())
131}examples/ml_integration_demo.rs (lines 30-33)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Sourcepub fn len(&self) -> usize
pub fn len(&self) -> usize
Get the number of samples
Examples found in repository?
examples/text_classification_demo.rs (line 34)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Text Classification Demo");
10 println!("=======================\n");
11
12 // Create sample dataset
13 let texts = vec![
14 "This movie is absolutely fantastic and amazing!".to_string(),
15 "I really hated this film, it was terrible.".to_string(),
16 "The acting was superb and the plot was engaging.".to_string(),
17 "Worst movie I've ever seen, complete waste of time.".to_string(),
18 "A masterpiece of cinema, truly exceptional work.".to_string(),
19 "Boring, predictable, and poorly executed.".to_string(),
20 ];
21
22 let labels = vec![
23 "positive".to_string(),
24 "negative".to_string(),
25 "positive".to_string(),
26 "negative".to_string(),
27 "positive".to_string(),
28 "negative".to_string(),
29 ];
30
31 // Create dataset
32 let dataset = TextDataset::new(texts, labels)?;
33 println!("Dataset Statistics:");
34 println!(" Total samples: {}", dataset.len());
35 println!(" Number of classes: {}", dataset.unique_labels().len());
36 println!();
37
38 // Split into train and test
39 let (train_dataset, test_dataset) = dataset.train_test_split(0.33, Some(42))?;
40 println!("Train/Test Split:");
41 println!(" Training samples: {}", train_dataset.len());
42 println!(" Test samples: {}", test_dataset.len());
43 println!();
44
45 // Create text processing pipeline
46 let mut pipeline = TextClassificationPipeline::with_tfidf();
47
48 // Fit the pipeline
49 pipeline.fit(&train_dataset)?;
50
51 // Transform to features
52 let train_features = pipeline.transform(&train_dataset)?;
53 let test_features = pipeline.transform(&test_dataset)?;
54
55 println!("Feature Extraction:");
56 println!(
57 " Train feature shape: ({}, {})",
58 train_features.nrows(),
59 train_features.ncols()
60 );
61 println!(
62 " Test feature shape: ({}, {})",
63 test_features.nrows(),
64 test_features.ncols()
65 );
66 println!();
67
68 // Demonstrate feature selection
69 let mut feature_selector = TextFeatureSelector::new()
70 .set_max_features(10.0)?
71 .set_min_df(0.1)?
72 .set_max_df(0.9)?;
73
74 let selected_train_features = feature_selector.fit_transform(&train_features)?;
75 println!("Feature Selection:");
76 println!(" Selected features: {}", selected_train_features.ncols());
77 println!();
78
79 // Simulate classification results (in a real scenario, you'd use a classifier)
80 // For demo purposes, we'll create mock predictions based on simple heuristics
81 let _unique_labels = train_dataset.unique_labels();
82
83 // Create binary labels (0 for negative, 1 for positive) for this demo
84 let mut train_labels = Vec::new();
85 let mut test_labels = Vec::new();
86
87 for label in &train_dataset.labels {
88 train_labels.push(if label == "positive" { 1 } else { 0 });
89 }
90
91 for label in &test_dataset.labels {
92 test_labels.push(if label == "positive" { 1 } else { 0 });
93 }
94
95 // Mock predictions (in practice, use a real classifier)
96 let predictions = test_labels.clone(); // Perfect predictions for demo
97
98 // Calculate metrics
99 let metrics = TextClassificationMetrics::new();
100 let accuracy = metrics.accuracy(&predictions, &test_labels)?;
101 let (precision, recall, f1) = metrics.binary_metrics(&predictions, &test_labels)?;
102
103 println!("Classification Metrics:");
104 println!(" Accuracy: {:.2}%", accuracy * 100.0);
105 println!(" Precision: {:.2}%", precision * 100.0);
106 println!(" Recall: {:.2}%", recall * 100.0);
107 println!(" F1 Score: {:.2}%", f1 * 100.0);
108 println!();
109
110 // Create a simple confusion matrix manually since the method isn't available
111 let mut true_positive = 0;
112 let mut true_negative = 0;
113 let mut false_positive = 0;
114 let mut false_negative = 0;
115
116 for (pred, actual) in predictions.iter().zip(test_labels.iter()) {
117 match (pred, actual) {
118 (1, 1) => true_positive += 1,
119 (0, 0) => true_negative += 1,
120 (1, 0) => false_positive += 1,
121 (0, 1) => false_negative += 1,
122 _ => {}
123 }
124 }
125
126 println!("Confusion Matrix:");
127 println!("[ {true_negative} {false_positive} ]");
128 println!("[ {false_negative} {true_positive} ]");
129
130 Ok(())
131}Sourcepub fn unique_labels(&self) -> Vec<String>
pub fn unique_labels(&self) -> Vec<String>
Get the unique labels in the dataset
Examples found in repository?
examples/text_classification_demo.rs (line 35)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Text Classification Demo");
10 println!("=======================\n");
11
12 // Create sample dataset
13 let texts = vec![
14 "This movie is absolutely fantastic and amazing!".to_string(),
15 "I really hated this film, it was terrible.".to_string(),
16 "The acting was superb and the plot was engaging.".to_string(),
17 "Worst movie I've ever seen, complete waste of time.".to_string(),
18 "A masterpiece of cinema, truly exceptional work.".to_string(),
19 "Boring, predictable, and poorly executed.".to_string(),
20 ];
21
22 let labels = vec![
23 "positive".to_string(),
24 "negative".to_string(),
25 "positive".to_string(),
26 "negative".to_string(),
27 "positive".to_string(),
28 "negative".to_string(),
29 ];
30
31 // Create dataset
32 let dataset = TextDataset::new(texts, labels)?;
33 println!("Dataset Statistics:");
34 println!(" Total samples: {}", dataset.len());
35 println!(" Number of classes: {}", dataset.unique_labels().len());
36 println!();
37
38 // Split into train and test
39 let (train_dataset, test_dataset) = dataset.train_test_split(0.33, Some(42))?;
40 println!("Train/Test Split:");
41 println!(" Training samples: {}", train_dataset.len());
42 println!(" Test samples: {}", test_dataset.len());
43 println!();
44
45 // Create text processing pipeline
46 let mut pipeline = TextClassificationPipeline::with_tfidf();
47
48 // Fit the pipeline
49 pipeline.fit(&train_dataset)?;
50
51 // Transform to features
52 let train_features = pipeline.transform(&train_dataset)?;
53 let test_features = pipeline.transform(&test_dataset)?;
54
55 println!("Feature Extraction:");
56 println!(
57 " Train feature shape: ({}, {})",
58 train_features.nrows(),
59 train_features.ncols()
60 );
61 println!(
62 " Test feature shape: ({}, {})",
63 test_features.nrows(),
64 test_features.ncols()
65 );
66 println!();
67
68 // Demonstrate feature selection
69 let mut feature_selector = TextFeatureSelector::new()
70 .set_max_features(10.0)?
71 .set_min_df(0.1)?
72 .set_max_df(0.9)?;
73
74 let selected_train_features = feature_selector.fit_transform(&train_features)?;
75 println!("Feature Selection:");
76 println!(" Selected features: {}", selected_train_features.ncols());
77 println!();
78
79 // Simulate classification results (in a real scenario, you'd use a classifier)
80 // For demo purposes, we'll create mock predictions based on simple heuristics
81 let _unique_labels = train_dataset.unique_labels();
82
83 // Create binary labels (0 for negative, 1 for positive) for this demo
84 let mut train_labels = Vec::new();
85 let mut test_labels = Vec::new();
86
87 for label in &train_dataset.labels {
88 train_labels.push(if label == "positive" { 1 } else { 0 });
89 }
90
91 for label in &test_dataset.labels {
92 test_labels.push(if label == "positive" { 1 } else { 0 });
93 }
94
95 // Mock predictions (in practice, use a real classifier)
96 let predictions = test_labels.clone(); // Perfect predictions for demo
97
98 // Calculate metrics
99 let metrics = TextClassificationMetrics::new();
100 let accuracy = metrics.accuracy(&predictions, &test_labels)?;
101 let (precision, recall, f1) = metrics.binary_metrics(&predictions, &test_labels)?;
102
103 println!("Classification Metrics:");
104 println!(" Accuracy: {:.2}%", accuracy * 100.0);
105 println!(" Precision: {:.2}%", precision * 100.0);
106 println!(" Recall: {:.2}%", recall * 100.0);
107 println!(" F1 Score: {:.2}%", f1 * 100.0);
108 println!();
109
110 // Create a simple confusion matrix manually since the method isn't available
111 let mut true_positive = 0;
112 let mut true_negative = 0;
113 let mut false_positive = 0;
114 let mut false_negative = 0;
115
116 for (pred, actual) in predictions.iter().zip(test_labels.iter()) {
117 match (pred, actual) {
118 (1, 1) => true_positive += 1,
119 (0, 0) => true_negative += 1,
120 (1, 0) => false_positive += 1,
121 (0, 1) => false_negative += 1,
122 _ => {}
123 }
124 }
125
126 println!("Confusion Matrix:");
127 println!("[ {true_negative} {false_positive} ]");
128 println!("[ {false_negative} {true_positive} ]");
129
130 Ok(())
131}Sourcepub fn build_label_index(&mut self) -> Result<&mut Self>
pub fn build_label_index(&mut self) -> Result<&mut Self>
Build a label index mapping
Sourcepub fn get_label_indices(&self) -> Result<Vec<usize>>
pub fn get_label_indices(&self) -> Result<Vec<usize>>
Get label indices
Sourcepub fn train_test_split(
&self,
test_size: f64,
random_seed: Option<u64>,
) -> Result<(Self, Self)>
pub fn train_test_split( &self, test_size: f64, random_seed: Option<u64>, ) -> Result<(Self, Self)>
Split the dataset into train and test sets
Examples found in repository?
examples/text_classification_demo.rs (line 39)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Text Classification Demo");
10 println!("=======================\n");
11
12 // Create sample dataset
13 let texts = vec![
14 "This movie is absolutely fantastic and amazing!".to_string(),
15 "I really hated this film, it was terrible.".to_string(),
16 "The acting was superb and the plot was engaging.".to_string(),
17 "Worst movie I've ever seen, complete waste of time.".to_string(),
18 "A masterpiece of cinema, truly exceptional work.".to_string(),
19 "Boring, predictable, and poorly executed.".to_string(),
20 ];
21
22 let labels = vec![
23 "positive".to_string(),
24 "negative".to_string(),
25 "positive".to_string(),
26 "negative".to_string(),
27 "positive".to_string(),
28 "negative".to_string(),
29 ];
30
31 // Create dataset
32 let dataset = TextDataset::new(texts, labels)?;
33 println!("Dataset Statistics:");
34 println!(" Total samples: {}", dataset.len());
35 println!(" Number of classes: {}", dataset.unique_labels().len());
36 println!();
37
38 // Split into train and test
39 let (train_dataset, test_dataset) = dataset.train_test_split(0.33, Some(42))?;
40 println!("Train/Test Split:");
41 println!(" Training samples: {}", train_dataset.len());
42 println!(" Test samples: {}", test_dataset.len());
43 println!();
44
45 // Create text processing pipeline
46 let mut pipeline = TextClassificationPipeline::with_tfidf();
47
48 // Fit the pipeline
49 pipeline.fit(&train_dataset)?;
50
51 // Transform to features
52 let train_features = pipeline.transform(&train_dataset)?;
53 let test_features = pipeline.transform(&test_dataset)?;
54
55 println!("Feature Extraction:");
56 println!(
57 " Train feature shape: ({}, {})",
58 train_features.nrows(),
59 train_features.ncols()
60 );
61 println!(
62 " Test feature shape: ({}, {})",
63 test_features.nrows(),
64 test_features.ncols()
65 );
66 println!();
67
68 // Demonstrate feature selection
69 let mut feature_selector = TextFeatureSelector::new()
70 .set_max_features(10.0)?
71 .set_min_df(0.1)?
72 .set_max_df(0.9)?;
73
74 let selected_train_features = feature_selector.fit_transform(&train_features)?;
75 println!("Feature Selection:");
76 println!(" Selected features: {}", selected_train_features.ncols());
77 println!();
78
79 // Simulate classification results (in a real scenario, you'd use a classifier)
80 // For demo purposes, we'll create mock predictions based on simple heuristics
81 let _unique_labels = train_dataset.unique_labels();
82
83 // Create binary labels (0 for negative, 1 for positive) for this demo
84 let mut train_labels = Vec::new();
85 let mut test_labels = Vec::new();
86
87 for label in &train_dataset.labels {
88 train_labels.push(if label == "positive" { 1 } else { 0 });
89 }
90
91 for label in &test_dataset.labels {
92 test_labels.push(if label == "positive" { 1 } else { 0 });
93 }
94
95 // Mock predictions (in practice, use a real classifier)
96 let predictions = test_labels.clone(); // Perfect predictions for demo
97
98 // Calculate metrics
99 let metrics = TextClassificationMetrics::new();
100 let accuracy = metrics.accuracy(&predictions, &test_labels)?;
101 let (precision, recall, f1) = metrics.binary_metrics(&predictions, &test_labels)?;
102
103 println!("Classification Metrics:");
104 println!(" Accuracy: {:.2}%", accuracy * 100.0);
105 println!(" Precision: {:.2}%", precision * 100.0);
106 println!(" Recall: {:.2}%", recall * 100.0);
107 println!(" F1 Score: {:.2}%", f1 * 100.0);
108 println!();
109
110 // Create a simple confusion matrix manually since the method isn't available
111 let mut true_positive = 0;
112 let mut true_negative = 0;
113 let mut false_positive = 0;
114 let mut false_negative = 0;
115
116 for (pred, actual) in predictions.iter().zip(test_labels.iter()) {
117 match (pred, actual) {
118 (1, 1) => true_positive += 1,
119 (0, 0) => true_negative += 1,
120 (1, 0) => false_positive += 1,
121 (0, 1) => false_negative += 1,
122 _ => {}
123 }
124 }
125
126 println!("Confusion Matrix:");
127 println!("[ {true_negative} {false_positive} ]");
128 println!("[ {false_negative} {true_positive} ]");
129
130 Ok(())
131}More examples
examples/ml_integration_demo.rs (line 120)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Trait Implementations§
Source§impl Clone for TextDataset
impl Clone for TextDataset
Source§fn clone(&self) -> TextDataset
fn clone(&self) -> TextDataset
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreAuto Trait Implementations§
impl Freeze for TextDataset
impl RefUnwindSafe for TextDataset
impl Send for TextDataset
impl Sync for TextDataset
impl Unpin for TextDataset
impl UnwindSafe for TextDataset
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.