TextClassificationPipeline

Struct TextClassificationPipeline 

Source
pub struct TextClassificationPipeline { /* private fields */ }
Expand description

Pipeline for text classification

Implementations§

Source§

impl TextClassificationPipeline

Source

pub fn with_tfidf() -> Self

Create a new pipeline with a default TF-IDF vectorizer

Examples found in repository?
examples/text_classification_demo.rs (line 46)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9    println!("Text Classification Demo");
10    println!("=======================\n");
11
12    // Create sample dataset
13    let texts = vec![
14        "This movie is absolutely fantastic and amazing!".to_string(),
15        "I really hated this film, it was terrible.".to_string(),
16        "The acting was superb and the plot was engaging.".to_string(),
17        "Worst movie I've ever seen, complete waste of time.".to_string(),
18        "A masterpiece of cinema, truly exceptional work.".to_string(),
19        "Boring, predictable, and poorly executed.".to_string(),
20    ];
21
22    let labels = vec![
23        "positive".to_string(),
24        "negative".to_string(),
25        "positive".to_string(),
26        "negative".to_string(),
27        "positive".to_string(),
28        "negative".to_string(),
29    ];
30
31    // Create dataset
32    let dataset = TextDataset::new(texts, labels)?;
33    println!("Dataset Statistics:");
34    println!("  Total samples: {}", dataset.len());
35    println!("  Number of classes: {}", dataset.unique_labels().len());
36    println!();
37
38    // Split into train and test
39    let (train_dataset, test_dataset) = dataset.train_test_split(0.33, Some(42))?;
40    println!("Train/Test Split:");
41    println!("  Training samples: {}", train_dataset.len());
42    println!("  Test samples: {}", test_dataset.len());
43    println!();
44
45    // Create text processing pipeline
46    let mut pipeline = TextClassificationPipeline::with_tfidf();
47
48    // Fit the pipeline
49    pipeline.fit(&train_dataset)?;
50
51    // Transform to features
52    let train_features = pipeline.transform(&train_dataset)?;
53    let test_features = pipeline.transform(&test_dataset)?;
54
55    println!("Feature Extraction:");
56    println!(
57        "  Train feature shape: ({}, {})",
58        train_features.nrows(),
59        train_features.ncols()
60    );
61    println!(
62        "  Test feature shape: ({}, {})",
63        test_features.nrows(),
64        test_features.ncols()
65    );
66    println!();
67
68    // Demonstrate feature selection
69    let mut feature_selector = TextFeatureSelector::new()
70        .set_max_features(10.0)?
71        .set_min_df(0.1)?
72        .set_max_df(0.9)?;
73
74    let selected_train_features = feature_selector.fit_transform(&train_features)?;
75    println!("Feature Selection:");
76    println!("  Selected features: {}", selected_train_features.ncols());
77    println!();
78
79    // Simulate classification results (in a real scenario, you'd use a classifier)
80    // For demo purposes, we'll create mock predictions based on simple heuristics
81    let _unique_labels = train_dataset.unique_labels();
82
83    // Create binary labels (0 for negative, 1 for positive) for this demo
84    let mut train_labels = Vec::new();
85    let mut test_labels = Vec::new();
86
87    for label in &train_dataset.labels {
88        train_labels.push(if label == "positive" { 1 } else { 0 });
89    }
90
91    for label in &test_dataset.labels {
92        test_labels.push(if label == "positive" { 1 } else { 0 });
93    }
94
95    // Mock predictions (in practice, use a real classifier)
96    let predictions = test_labels.clone(); // Perfect predictions for demo
97
98    // Calculate metrics
99    let metrics = TextClassificationMetrics::new();
100    let accuracy = metrics.accuracy(&predictions, &test_labels)?;
101    let (precision, recall, f1) = metrics.binary_metrics(&predictions, &test_labels)?;
102
103    println!("Classification Metrics:");
104    println!("  Accuracy: {:.2}%", accuracy * 100.0);
105    println!("  Precision: {:.2}%", precision * 100.0);
106    println!("  Recall: {:.2}%", recall * 100.0);
107    println!("  F1 Score: {:.2}%", f1 * 100.0);
108    println!();
109
110    // Create a simple confusion matrix manually since the method isn't available
111    let mut true_positive = 0;
112    let mut true_negative = 0;
113    let mut false_positive = 0;
114    let mut false_negative = 0;
115
116    for (pred, actual) in predictions.iter().zip(test_labels.iter()) {
117        match (pred, actual) {
118            (1, 1) => true_positive += 1,
119            (0, 0) => true_negative += 1,
120            (1, 0) => false_positive += 1,
121            (0, 1) => false_negative += 1,
122            _ => {}
123        }
124    }
125
126    println!("Confusion Matrix:");
127    println!("[ {true_negative} {false_positive} ]");
128    println!("[ {false_negative} {true_positive} ]");
129
130    Ok(())
131}
Source

pub fn new(vectorizer: TfidfVectorizer) -> Self

Create a new pipeline with the given vectorizer

Source

pub fn with_feature_selector(self, selector: TextFeatureSelector) -> Self

Add a feature selector to the pipeline

Source

pub fn fit(&mut self, dataset: &TextDataset) -> Result<&mut Self>

Fit the pipeline to training data

Examples found in repository?
examples/text_classification_demo.rs (line 49)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9    println!("Text Classification Demo");
10    println!("=======================\n");
11
12    // Create sample dataset
13    let texts = vec![
14        "This movie is absolutely fantastic and amazing!".to_string(),
15        "I really hated this film, it was terrible.".to_string(),
16        "The acting was superb and the plot was engaging.".to_string(),
17        "Worst movie I've ever seen, complete waste of time.".to_string(),
18        "A masterpiece of cinema, truly exceptional work.".to_string(),
19        "Boring, predictable, and poorly executed.".to_string(),
20    ];
21
22    let labels = vec![
23        "positive".to_string(),
24        "negative".to_string(),
25        "positive".to_string(),
26        "negative".to_string(),
27        "positive".to_string(),
28        "negative".to_string(),
29    ];
30
31    // Create dataset
32    let dataset = TextDataset::new(texts, labels)?;
33    println!("Dataset Statistics:");
34    println!("  Total samples: {}", dataset.len());
35    println!("  Number of classes: {}", dataset.unique_labels().len());
36    println!();
37
38    // Split into train and test
39    let (train_dataset, test_dataset) = dataset.train_test_split(0.33, Some(42))?;
40    println!("Train/Test Split:");
41    println!("  Training samples: {}", train_dataset.len());
42    println!("  Test samples: {}", test_dataset.len());
43    println!();
44
45    // Create text processing pipeline
46    let mut pipeline = TextClassificationPipeline::with_tfidf();
47
48    // Fit the pipeline
49    pipeline.fit(&train_dataset)?;
50
51    // Transform to features
52    let train_features = pipeline.transform(&train_dataset)?;
53    let test_features = pipeline.transform(&test_dataset)?;
54
55    println!("Feature Extraction:");
56    println!(
57        "  Train feature shape: ({}, {})",
58        train_features.nrows(),
59        train_features.ncols()
60    );
61    println!(
62        "  Test feature shape: ({}, {})",
63        test_features.nrows(),
64        test_features.ncols()
65    );
66    println!();
67
68    // Demonstrate feature selection
69    let mut feature_selector = TextFeatureSelector::new()
70        .set_max_features(10.0)?
71        .set_min_df(0.1)?
72        .set_max_df(0.9)?;
73
74    let selected_train_features = feature_selector.fit_transform(&train_features)?;
75    println!("Feature Selection:");
76    println!("  Selected features: {}", selected_train_features.ncols());
77    println!();
78
79    // Simulate classification results (in a real scenario, you'd use a classifier)
80    // For demo purposes, we'll create mock predictions based on simple heuristics
81    let _unique_labels = train_dataset.unique_labels();
82
83    // Create binary labels (0 for negative, 1 for positive) for this demo
84    let mut train_labels = Vec::new();
85    let mut test_labels = Vec::new();
86
87    for label in &train_dataset.labels {
88        train_labels.push(if label == "positive" { 1 } else { 0 });
89    }
90
91    for label in &test_dataset.labels {
92        test_labels.push(if label == "positive" { 1 } else { 0 });
93    }
94
95    // Mock predictions (in practice, use a real classifier)
96    let predictions = test_labels.clone(); // Perfect predictions for demo
97
98    // Calculate metrics
99    let metrics = TextClassificationMetrics::new();
100    let accuracy = metrics.accuracy(&predictions, &test_labels)?;
101    let (precision, recall, f1) = metrics.binary_metrics(&predictions, &test_labels)?;
102
103    println!("Classification Metrics:");
104    println!("  Accuracy: {:.2}%", accuracy * 100.0);
105    println!("  Precision: {:.2}%", precision * 100.0);
106    println!("  Recall: {:.2}%", recall * 100.0);
107    println!("  F1 Score: {:.2}%", f1 * 100.0);
108    println!();
109
110    // Create a simple confusion matrix manually since the method isn't available
111    let mut true_positive = 0;
112    let mut true_negative = 0;
113    let mut false_positive = 0;
114    let mut false_negative = 0;
115
116    for (pred, actual) in predictions.iter().zip(test_labels.iter()) {
117        match (pred, actual) {
118            (1, 1) => true_positive += 1,
119            (0, 0) => true_negative += 1,
120            (1, 0) => false_positive += 1,
121            (0, 1) => false_negative += 1,
122            _ => {}
123        }
124    }
125
126    println!("Confusion Matrix:");
127    println!("[ {true_negative} {false_positive} ]");
128    println!("[ {false_negative} {true_positive} ]");
129
130    Ok(())
131}
Source

pub fn transform(&self, dataset: &TextDataset) -> Result<Array2<f64>>

Transform text data using the pipeline

Examples found in repository?
examples/text_classification_demo.rs (line 52)
8fn main() -> Result<(), Box<dyn std::error::Error>> {
9    println!("Text Classification Demo");
10    println!("=======================\n");
11
12    // Create sample dataset
13    let texts = vec![
14        "This movie is absolutely fantastic and amazing!".to_string(),
15        "I really hated this film, it was terrible.".to_string(),
16        "The acting was superb and the plot was engaging.".to_string(),
17        "Worst movie I've ever seen, complete waste of time.".to_string(),
18        "A masterpiece of cinema, truly exceptional work.".to_string(),
19        "Boring, predictable, and poorly executed.".to_string(),
20    ];
21
22    let labels = vec![
23        "positive".to_string(),
24        "negative".to_string(),
25        "positive".to_string(),
26        "negative".to_string(),
27        "positive".to_string(),
28        "negative".to_string(),
29    ];
30
31    // Create dataset
32    let dataset = TextDataset::new(texts, labels)?;
33    println!("Dataset Statistics:");
34    println!("  Total samples: {}", dataset.len());
35    println!("  Number of classes: {}", dataset.unique_labels().len());
36    println!();
37
38    // Split into train and test
39    let (train_dataset, test_dataset) = dataset.train_test_split(0.33, Some(42))?;
40    println!("Train/Test Split:");
41    println!("  Training samples: {}", train_dataset.len());
42    println!("  Test samples: {}", test_dataset.len());
43    println!();
44
45    // Create text processing pipeline
46    let mut pipeline = TextClassificationPipeline::with_tfidf();
47
48    // Fit the pipeline
49    pipeline.fit(&train_dataset)?;
50
51    // Transform to features
52    let train_features = pipeline.transform(&train_dataset)?;
53    let test_features = pipeline.transform(&test_dataset)?;
54
55    println!("Feature Extraction:");
56    println!(
57        "  Train feature shape: ({}, {})",
58        train_features.nrows(),
59        train_features.ncols()
60    );
61    println!(
62        "  Test feature shape: ({}, {})",
63        test_features.nrows(),
64        test_features.ncols()
65    );
66    println!();
67
68    // Demonstrate feature selection
69    let mut feature_selector = TextFeatureSelector::new()
70        .set_max_features(10.0)?
71        .set_min_df(0.1)?
72        .set_max_df(0.9)?;
73
74    let selected_train_features = feature_selector.fit_transform(&train_features)?;
75    println!("Feature Selection:");
76    println!("  Selected features: {}", selected_train_features.ncols());
77    println!();
78
79    // Simulate classification results (in a real scenario, you'd use a classifier)
80    // For demo purposes, we'll create mock predictions based on simple heuristics
81    let _unique_labels = train_dataset.unique_labels();
82
83    // Create binary labels (0 for negative, 1 for positive) for this demo
84    let mut train_labels = Vec::new();
85    let mut test_labels = Vec::new();
86
87    for label in &train_dataset.labels {
88        train_labels.push(if label == "positive" { 1 } else { 0 });
89    }
90
91    for label in &test_dataset.labels {
92        test_labels.push(if label == "positive" { 1 } else { 0 });
93    }
94
95    // Mock predictions (in practice, use a real classifier)
96    let predictions = test_labels.clone(); // Perfect predictions for demo
97
98    // Calculate metrics
99    let metrics = TextClassificationMetrics::new();
100    let accuracy = metrics.accuracy(&predictions, &test_labels)?;
101    let (precision, recall, f1) = metrics.binary_metrics(&predictions, &test_labels)?;
102
103    println!("Classification Metrics:");
104    println!("  Accuracy: {:.2}%", accuracy * 100.0);
105    println!("  Precision: {:.2}%", precision * 100.0);
106    println!("  Recall: {:.2}%", recall * 100.0);
107    println!("  F1 Score: {:.2}%", f1 * 100.0);
108    println!();
109
110    // Create a simple confusion matrix manually since the method isn't available
111    let mut true_positive = 0;
112    let mut true_negative = 0;
113    let mut false_positive = 0;
114    let mut false_negative = 0;
115
116    for (pred, actual) in predictions.iter().zip(test_labels.iter()) {
117        match (pred, actual) {
118            (1, 1) => true_positive += 1,
119            (0, 0) => true_negative += 1,
120            (1, 0) => false_positive += 1,
121            (0, 1) => false_negative += 1,
122            _ => {}
123        }
124    }
125
126    println!("Confusion Matrix:");
127    println!("[ {true_negative} {false_positive} ]");
128    println!("[ {false_negative} {true_positive} ]");
129
130    Ok(())
131}
Source

pub fn fit_transform(&mut self, dataset: &TextDataset) -> Result<Array2<f64>>

Fit and transform in one step

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

Source§

fn to_subset(&self) -> Option<SS>

The inverse inclusion map: attempts to construct self from the equivalent element of its superset. Read more
Source§

fn is_in_subset(&self) -> bool

Checks if self is actually part of its subset T (and can be converted to it).
Source§

fn to_subset_unchecked(&self) -> SS

Use with care! Same as self.to_subset but without any property checks. Always succeeds.
Source§

fn from_subset(element: &SS) -> SP

The inclusion map: converts self to the equivalent element of its superset.
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V