ml_integration_demo/
ml_integration_demo.rs1use scirs2_text::{
4 BatchTextProcessor, FeatureExtractionMode, MLTextPreprocessor, TextDataset, TextMLPipeline,
5};
6
7#[allow(dead_code)]
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("Machine Learning Integration Demo");
10 println!("================================\n");
11
12 let texts = [
14 "This product is absolutely amazing! I love it.",
15 "Terrible experience, would not recommend.",
16 "It's okay, nothing special but works fine.",
17 "Excellent quality and fast shipping.",
18 "Complete waste of money, very disappointed.",
19 "Good value for the price, satisfied with purchase.",
20 "Outstanding service and great product!",
21 "Not worth it, many issues with this item.",
22 ];
23
24 let labels = [
25 "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26 "negative",
27 ];
28
29 let dataset = TextDataset::new(
31 texts.iter().map(|s| s.to_string()).collect(),
32 labels.iter().map(|s| s.to_string()).collect(),
33 )?;
34
35 println!("1. TF-IDF Feature Extraction");
37 println!("---------------------------");
38
39 let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40 .with_tfidf_params(0.1, 0.9, Some(100));
41
42 let text_refs = texts.to_vec();
43 let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45 println!(
46 "TF-IDF Features shape: {:?}",
47 tfidf_features.features.shape()
48 );
49 println!(
50 "First document features (first 5 values): {:?}\n",
51 &tfidf_features
52 .features
53 .row(0)
54 .iter()
55 .take(5)
56 .collect::<Vec<_>>()
57 );
58
59 println!("2. Topic Modeling Features");
61 println!("-------------------------");
62
63 let mut topic_processor =
64 MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66 let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68 println!(
69 "Topic Features shape: {:?}",
70 topic_features.features.shape()
71 );
72 println!(
73 "Topic distribution for first document: {:?}\n",
74 topic_features.features.row(0)
75 );
76
77 println!("3. Combined Features");
79 println!("-------------------");
80
81 let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82 let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84 println!(
85 "Combined Features shape: {:?}",
86 combined_features.features.shape()
87 );
88 println!("Metadata: {:?}\n", combined_features.metadata);
89
90 println!("4. ML Pipeline with Classification");
92 println!("---------------------------------");
93
94 let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95 .configure_preprocessor(|p| {
96 p.with_tfidf_params(0.0, 1.0, Some(50))
97 .with_feature_selection(20)
98 });
99
100 let features = pipeline.process(&text_refs)?;
101 println!("Pipeline features shape: {:?}", features.features.shape());
102
103 println!("\n5. Batch Processing");
105 println!("-------------------");
106
107 let mut batch_processor = BatchTextProcessor::new(3);
108 let batches = batch_processor.process_batches(&text_refs)?;
109
110 println!("Number of batches: {}", batches.len());
111 for (i, batch) in batches.iter().enumerate() {
112 println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113 }
114
115 println!("\n6. Classification with ML Features");
117 println!("----------------------------------");
118
119 let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122 let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124 let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126 let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127 feature_extractor.fit(&traintexts)?;
128
129 let train_features = feature_extractor.transform(&traintexts)?;
130 let test_features = feature_extractor.transform(&testtexts)?;
131
132 println!("Training features: {:?}", train_features.features.shape());
133 println!("Test features: {:?}", test_features.features.shape());
134
135 println!("\nFeatures are ready for machine learning models!");
137
138 println!("\n7. Feature Statistics");
140 println!("--------------------");
141
142 let feature_means = train_features
143 .features
144 .mean_axis(scirs2_core::ndarray::Axis(0))
145 .unwrap();
146 let feature_stds = train_features
147 .features
148 .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150 println!(
151 "Mean of first 5 features: {:?}",
152 &feature_means.iter().take(5).collect::<Vec<_>>()
153 );
154 println!(
155 "Std of first 5 features: {:?}",
156 &feature_stds.iter().take(5).collect::<Vec<_>>()
157 );
158
159 Ok(())
160}