pub struct BatchTextProcessor { /* private fields */ }Expand description
Batch text processor for large datasets
Implementations§
Source§impl BatchTextProcessor
impl BatchTextProcessor
Sourcepub fn new(batchsize: usize) -> Self
pub fn new(batchsize: usize) -> Self
Create a new batch processor
Examples found in repository?
examples/ml_integration_demo.rs (line 107)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Sourcepub fn process_batches(&mut self, texts: &[&str]) -> Result<Vec<TextFeatures>>
pub fn process_batches(&mut self, texts: &[&str]) -> Result<Vec<TextFeatures>>
Process texts in batches
Examples found in repository?
examples/ml_integration_demo.rs (line 108)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 // Sample dataset for demonstration
13 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 // Create dataset
30 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 // Demonstrate different feature extraction modes
36 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 // Topic modeling features
60 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 // Combined features
78 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 // ML Pipeline
91 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 // Batch processing for large datasets
104 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 // Feature extraction for classification
116 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 // Split data
120 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 // Extract features
123 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 // In a real scenario, you would now use these features with a classifier
136 println!("\nFeatures are ready for machine learning models!");
137
138 // Demonstrate feature statistics
139 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}Auto Trait Implementations§
impl Freeze for BatchTextProcessor
impl !RefUnwindSafe for BatchTextProcessor
impl Send for BatchTextProcessor
impl Sync for BatchTextProcessor
impl Unpin for BatchTextProcessor
impl !UnwindSafe for BatchTextProcessor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.