pub struct MLTextPreprocessor { /* private fields */ }Expand description
Machine learning text preprocessor
Implementations§
Source§impl MLTextPreprocessor
impl MLTextPreprocessor
Sourcepub fn new(mode: FeatureExtractionMode) -> Self
pub fn new(mode: FeatureExtractionMode) -> Self
Create a new ML text preprocessor
Examples found in repository?
examples/ml_integration_demo.rs (line 39)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Sourcepub fn with_tfidf_params(
self,
min_df: f64,
max_df: f64,
max_features: Option<usize>,
) -> Self
pub fn with_tfidf_params( self, min_df: f64, max_df: f64, max_features: Option<usize>, ) -> Self
Configure TF-IDF parameters
Examples found in repository?
examples/ml_integration_demo.rs (line 40)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Sourcepub fn with_topic_modeling(self, ntopics: usize) -> Self
pub fn with_topic_modeling(self, ntopics: usize) -> Self
Configure topic modeling
Examples found in repository?
examples/ml_integration_demo.rs (line 64)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Sourcepub fn with_word_embeddings(self, embeddings: Word2Vec) -> Self
pub fn with_word_embeddings(self, embeddings: Word2Vec) -> Self
Configure word embeddings
Sourcepub fn with_feature_selection(self, maxfeatures: usize) -> Self
pub fn with_feature_selection(self, maxfeatures: usize) -> Self
Configure feature selection
Examples found in repository?
examples/ml_integration_demo.rs (line 97)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Sourcepub fn fit(&mut self, texts: &[&str]) -> Result<()>
pub fn fit(&mut self, texts: &[&str]) -> Result<()>
Fit the preprocessor on training data
Examples found in repository?
examples/ml_integration_demo.rs (line 127)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Sourcepub fn transform(&self, texts: &[&str]) -> Result<TextFeatures>
pub fn transform(&self, texts: &[&str]) -> Result<TextFeatures>
Transform texts to feature matrix
Examples found in repository?
examples/ml_integration_demo.rs (line 129)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Sourcepub fn fit_transform(&mut self, texts: &[&str]) -> Result<TextFeatures>
pub fn fit_transform(&mut self, texts: &[&str]) -> Result<TextFeatures>
Fit and transform in one step
Examples found in repository?
examples/ml_integration_demo.rs (line 43)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Auto Trait Implementations§
impl Freeze for MLTextPreprocessor
impl !RefUnwindSafe for MLTextPreprocessor
impl Send for MLTextPreprocessor
impl Sync for MLTextPreprocessor
impl Unpin for MLTextPreprocessor
impl !UnwindSafe for MLTextPreprocessor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.