stemming_comparison_demo/
stemming_comparison_demo.rs

1use scirs2_text::{LancasterStemmer, PorterStemmer, SimpleLemmatizer, SnowballStemmer, Stemmer};
2use std::error::Error;
3
4#[allow(dead_code)]
5fn main() -> Result<(), Box<dyn Error>> {
6    println!("Stemming Algorithms Comparison Demo");
7    println!("-----------------------------------");
8
9    // Create instances of different stemmers
10    let porter_stemmer = PorterStemmer::new();
11    let snowball_stemmer = SnowballStemmer::new("english")?;
12    let lancaster_stemmer = LancasterStemmer::new();
13    let lemmatizer = SimpleLemmatizer::new();
14
15    // Test words to compare stemming results
16    let test_words = vec![
17        "running",
18        "ran",
19        "runs",
20        "easily",
21        "fishing",
22        "fished",
23        "troubled",
24        "troubling",
25        "troubles",
26        "production",
27        "productive",
28        "argument",
29        "arguing",
30        "university",
31        "universities",
32        "maximizing",
33        "maximum",
34        "presumably",
35        "multiply",
36        "opposition",
37        "computational",
38    ];
39
40    // Print results in a table format
41    println!(
42        "{:<15} {:<15} {:<15} {:<15} {:<15}",
43        "Original", "Porter", "Snowball", "Lancaster", "Lemmatizer"
44    );
45    println!("{}", "-".repeat(75));
46
47    for word in test_words {
48        let porter_result = porter_stemmer.stem(word)?;
49        let snowball_result = snowball_stemmer.stem(word)?;
50        let lancaster_result = lancaster_stemmer.stem(word)?;
51        let lemma_result = lemmatizer.stem(word)?;
52
53        println!(
54            "{word:<15} {porter_result:<15} {snowball_result:<15} {lancaster_result:<15} {lemma_result:<15}"
55        );
56    }
57
58    // Demonstrate configurability of the Lancaster stemmer
59    println!("\nLancaster Stemmer Configuration Options");
60    println!("------------------------------------");
61
62    let default_lancaster = LancasterStemmer::new();
63    let custom_lancaster = LancasterStemmer::new()
64        .with_min_stemmed_length(3)
65        .with_acceptable_check(false);
66
67    println!(
68        "{:<15} {:<20} {:<20}",
69        "Original", "Default Lancaster", "Custom Lancaster"
70    );
71    println!("{}", "-".repeat(55));
72
73    let custom_test_words = vec!["provision", "ear", "me", "fishing", "multiply"];
74
75    for word in custom_test_words {
76        let default_result = default_lancaster.stem(word)?;
77        let custom_result = custom_lancaster.stem(word)?;
78
79        println!("{word:<15} {default_result:<20} {custom_result:<20}");
80    }
81
82    println!("\nNotes:");
83    println!("- Porter stemmer: Established algorithm, medium aggressiveness");
84    println!("- Snowball stemmer: Improved Porter algorithm with language-specific rules");
85    println!("- Lancaster stemmer: Most aggressive stemming, can be configured");
86    println!("- Lemmatizer: Dictionary-based approach, produces actual words");
87
88    Ok(())
89}