stemming_comparison_demo/
stemming_comparison_demo.rs1use scirs2_text::{LancasterStemmer, PorterStemmer, SimpleLemmatizer, SnowballStemmer, Stemmer};
2use std::error::Error;
3
4#[allow(dead_code)]
5fn main() -> Result<(), Box<dyn Error>> {
6 println!("Stemming Algorithms Comparison Demo");
7 println!("-----------------------------------");
8
9 let porter_stemmer = PorterStemmer::new();
11 let snowball_stemmer = SnowballStemmer::new("english")?;
12 let lancaster_stemmer = LancasterStemmer::new();
13 let lemmatizer = SimpleLemmatizer::new();
14
15 let test_words = vec![
17 "running",
18 "ran",
19 "runs",
20 "easily",
21 "fishing",
22 "fished",
23 "troubled",
24 "troubling",
25 "troubles",
26 "production",
27 "productive",
28 "argument",
29 "arguing",
30 "university",
31 "universities",
32 "maximizing",
33 "maximum",
34 "presumably",
35 "multiply",
36 "opposition",
37 "computational",
38 ];
39
40 println!(
42 "{:<15} {:<15} {:<15} {:<15} {:<15}",
43 "Original", "Porter", "Snowball", "Lancaster", "Lemmatizer"
44 );
45 println!("{}", "-".repeat(75));
46
47 for word in test_words {
48 let porter_result = porter_stemmer.stem(word)?;
49 let snowball_result = snowball_stemmer.stem(word)?;
50 let lancaster_result = lancaster_stemmer.stem(word)?;
51 let lemma_result = lemmatizer.stem(word)?;
52
53 println!(
54 "{word:<15} {porter_result:<15} {snowball_result:<15} {lancaster_result:<15} {lemma_result:<15}"
55 );
56 }
57
58 println!("\nLancaster Stemmer Configuration Options");
60 println!("------------------------------------");
61
62 let default_lancaster = LancasterStemmer::new();
63 let custom_lancaster = LancasterStemmer::new()
64 .with_min_stemmed_length(3)
65 .with_acceptable_check(false);
66
67 println!(
68 "{:<15} {:<20} {:<20}",
69 "Original", "Default Lancaster", "Custom Lancaster"
70 );
71 println!("{}", "-".repeat(55));
72
73 let custom_test_words = vec!["provision", "ear", "me", "fishing", "multiply"];
74
75 for word in custom_test_words {
76 let default_result = default_lancaster.stem(word)?;
77 let custom_result = custom_lancaster.stem(word)?;
78
79 println!("{word:<15} {default_result:<20} {custom_result:<20}");
80 }
81
82 println!("\nNotes:");
83 println!("- Porter stemmer: Established algorithm, medium aggressiveness");
84 println!("- Snowball stemmer: Improved Porter algorithm with language-specific rules");
85 println!("- Lancaster stemmer: Most aggressive stemming, can be configured");
86 println!("- Lemmatizer: Dictionary-based approach, produces actual words");
87
88 Ok(())
89}