ml_integration_demo/
ml_integration_demo.rs

1//! Machine learning integration example
2
3use scirs2_text::{
4    BatchTextProcessor, FeatureExtractionMode, MLTextPreprocessor, TextDataset, TextMLPipeline,
5};
6
7#[allow(dead_code)]
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9    println!("Machine Learning Integration Demo");
10    println!("================================\n");
11
12    // Sample dataset for demonstration
13    let texts = [
14        "This product is absolutely amazing! I love it.",
15        "Terrible experience, would not recommend.",
16        "It's okay, nothing special but works fine.",
17        "Excellent quality and fast shipping.",
18        "Complete waste of money, very disappointed.",
19        "Good value for the price, satisfied with purchase.",
20        "Outstanding service and great product!",
21        "Not worth it, many issues with this item.",
22    ];
23
24    let labels = [
25        "positive", "negative", "neutral", "positive", "negative", "positive", "positive",
26        "negative",
27    ];
28
29    // Create dataset
30    let dataset = TextDataset::new(
31        texts.iter().map(|s| s.to_string()).collect(),
32        labels.iter().map(|s| s.to_string()).collect(),
33    )?;
34
35    // Demonstrate different feature extraction modes
36    println!("1. TF-IDF Feature Extraction");
37    println!("---------------------------");
38
39    let mut tfidf_processor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf)
40        .with_tfidf_params(0.1, 0.9, Some(100));
41
42    let text_refs = texts.to_vec();
43    let tfidf_features = tfidf_processor.fit_transform(&text_refs)?;
44
45    println!(
46        "TF-IDF Features shape: {:?}",
47        tfidf_features.features.shape()
48    );
49    println!(
50        "First document features (first 5 values): {:?}\n",
51        &tfidf_features
52            .features
53            .row(0)
54            .iter()
55            .take(5)
56            .collect::<Vec<_>>()
57    );
58
59    // Topic modeling features
60    println!("2. Topic Modeling Features");
61    println!("-------------------------");
62
63    let mut topic_processor =
64        MLTextPreprocessor::new(FeatureExtractionMode::TopicModeling).with_topic_modeling(3);
65
66    let topic_features = topic_processor.fit_transform(&text_refs)?;
67
68    println!(
69        "Topic Features shape: {:?}",
70        topic_features.features.shape()
71    );
72    println!(
73        "Topic distribution for first document: {:?}\n",
74        topic_features.features.row(0)
75    );
76
77    // Combined features
78    println!("3. Combined Features");
79    println!("-------------------");
80
81    let mut combined_processor = MLTextPreprocessor::new(FeatureExtractionMode::Combined);
82    let combined_features = combined_processor.fit_transform(&text_refs)?;
83
84    println!(
85        "Combined Features shape: {:?}",
86        combined_features.features.shape()
87    );
88    println!("Metadata: {:?}\n", combined_features.metadata);
89
90    // ML Pipeline
91    println!("4. ML Pipeline with Classification");
92    println!("---------------------------------");
93
94    let mut pipeline = TextMLPipeline::with_mode(FeatureExtractionMode::TfIdf)
95        .configure_preprocessor(|p| {
96            p.with_tfidf_params(0.0, 1.0, Some(50))
97                .with_feature_selection(20)
98        });
99
100    let features = pipeline.process(&text_refs)?;
101    println!("Pipeline features shape: {:?}", features.features.shape());
102
103    // Batch processing for large datasets
104    println!("\n5. Batch Processing");
105    println!("-------------------");
106
107    let mut batch_processor = BatchTextProcessor::new(3);
108    let batches = batch_processor.process_batches(&text_refs)?;
109
110    println!("Number of batches: {}", batches.len());
111    for (i, batch) in batches.iter().enumerate() {
112        println!("Batch {} shape: {:?}", i + 1, batch.features.shape());
113    }
114
115    // Feature extraction for classification
116    println!("\n6. Classification with ML Features");
117    println!("----------------------------------");
118
119    // Split data
120    let (train_dataset, test_dataset) = dataset.train_test_split(0.25, Some(42))?;
121
122    // Extract features
123    let traintexts: Vec<&str> = train_dataset.texts.iter().map(|s| s.as_ref()).collect();
124    let testtexts: Vec<&str> = test_dataset.texts.iter().map(|s| s.as_ref()).collect();
125
126    let mut feature_extractor = MLTextPreprocessor::new(FeatureExtractionMode::TfIdf);
127    feature_extractor.fit(&traintexts)?;
128
129    let train_features = feature_extractor.transform(&traintexts)?;
130    let test_features = feature_extractor.transform(&testtexts)?;
131
132    println!("Training features: {:?}", train_features.features.shape());
133    println!("Test features: {:?}", test_features.features.shape());
134
135    // In a real scenario, you would now use these features with a classifier
136    println!("\nFeatures are ready for machine learning models!");
137
138    // Demonstrate feature statistics
139    println!("\n7. Feature Statistics");
140    println!("--------------------");
141
142    let feature_means = train_features
143        .features
144        .mean_axis(scirs2_core::ndarray::Axis(0))
145        .unwrap();
146    let feature_stds = train_features
147        .features
148        .std_axis(scirs2_core::ndarray::Axis(0), 0.0);
149
150    println!(
151        "Mean of first 5 features: {:?}",
152        &feature_means.iter().take(5).collect::<Vec<_>>()
153    );
154    println!(
155        "Std of first 5 features: {:?}",
156        &feature_stds.iter().take(5).collect::<Vec<_>>()
157    );
158
159    Ok(())
160}