multilingual_demo/
multilingual_demo.rs

1//! Multilingual text processing and language detection example
2
3use scirs2_text::{Language, LanguageDetector, MultilingualProcessor, StopWords};
4
5#[allow(dead_code)]
6fn main() -> Result<(), Box<dyn std::error::Error>> {
7    println!("Multilingual Text Processing Demo");
8    println!("================================\n");
9
10    // Create language detector
11    let detector = LanguageDetector::new();
12
13    // Example texts in different languages
14    let texts = vec![
15        (
16            "This is a sample text in English to test language detection.",
17            "English",
18        ),
19        (
20            "Este es un texto de ejemplo en español para probar la detección de idioma.",
21            "Spanish",
22        ),
23        (
24            "Ceci est un exemple de texte en français pour tester la détection de langue.",
25            "French",
26        ),
27        (
28            "Dies ist ein Beispieltext auf Deutsch zum Testen der Spracherkennung.",
29            "German",
30        ),
31        ("The quick brown fox jumps over the lazy dog.", "English"),
32        (
33            "El rápido zorro marrón salta sobre el perro perezoso.",
34            "Spanish",
35        ),
36        (
37            "Le rapide renard brun saute par-dessus le chien paresseux.",
38            "French",
39        ),
40        (
41            "Der schnelle braune Fuchs springt über den faulen Hund.",
42            "German",
43        ),
44    ];
45
46    println!("Language Detection:");
47    println!("==================");
48
49    for (text, expected) in &texts {
50        let result = detector.detect(text)?;
51        println!("\nText: \"{text}\"");
52        println!("Expected: {expected}");
53        println!(
54            "Detected: {} (confidence: {:.2}%)",
55            result.language.name(),
56            result.confidence * 100.0
57        );
58
59        if !result.alternatives.is_empty() {
60            println!("Alternatives:");
61            for (lang, score) in &result.alternatives {
62                println!("  - {}: {:.2}%", lang.name(), score * 100.0);
63            }
64        }
65    }
66
67    // Demonstrate stop words functionality
68    println!("\n\nStop Words Processing:");
69    println!("=====================");
70
71    let stop_words = StopWords::new();
72
73    let test_sentences = vec![
74        ("The cat is on the mat", Language::English),
75        ("Le chat est sur le tapis", Language::French),
76        ("El gato está en la alfombra", Language::Spanish),
77    ];
78
79    for (sentence, language) in &test_sentences {
80        let tokens: Vec<String> = sentence.split_whitespace().map(|s| s.to_string()).collect();
81
82        let filtered = stop_words.remove_stop_words(&tokens, *language);
83
84        println!("\nLanguage: {}", language.name());
85        println!("Original: {sentence}");
86        println!("Tokens: {tokens:?}");
87        println!("Without stop words: {filtered:?}");
88    }
89
90    // Demonstrate multilingual processor
91    println!("\n\nMultilingual Processor:");
92    println!("======================");
93
94    let processor = MultilingualProcessor::new();
95
96    let mixedtexts = vec![
97        "Machine learning algorithms are transforming artificial intelligence",
98        "Los algoritmos de aprendizaje automático están transformando la inteligencia artificial",
99        "Les algorithmes d'apprentissage automatique transforment l'intelligence artificielle",
100    ];
101
102    for text in &mixedtexts {
103        let result = processor.process(text)?;
104
105        println!("\nOriginal: \"{text}\"");
106        println!(
107            "Detected Language: {} (confidence: {:.2}%)",
108            result.language.name(),
109            result.confidence * 100.0
110        );
111        println!("Tokens: {} total", result.tokens.len());
112        println!(
113            "Filtered Tokens: {} after stop word removal",
114            result.filtered_tokens.len()
115        );
116
117        if result.filtered_tokens.len() <= 5 {
118            println!("Filtered: {:?}", result.filtered_tokens);
119        } else {
120            println!("First 5 filtered: {:?}...", &result.filtered_tokens[..5]);
121        }
122    }
123
124    // Language code conversions
125    println!("\n\nLanguage Code Conversions:");
126    println!("=========================");
127
128    let languages = vec![
129        Language::English,
130        Language::Spanish,
131        Language::French,
132        Language::German,
133        Language::Chinese,
134        Language::Japanese,
135    ];
136
137    for lang in &languages {
138        println!("{}: ISO code = {}", lang.name(), lang.iso_code());
139    }
140
141    println!("\nReverse lookup:");
142    let codes = vec!["en", "es", "fr", "de", "zh", "ja", "xx"];
143    for code in &codes {
144        let lang = Language::from_iso_code(code);
145        println!("{} -> {}", code, lang.name());
146    }
147
148    Ok(())
149}