multilingual_demo/
multilingual_demo.rs1use scirs2_text::{Language, LanguageDetector, MultilingualProcessor, StopWords};
4
5#[allow(dead_code)]
6fn main() -> Result<(), Box<dyn std::error::Error>> {
7 println!("Multilingual Text Processing Demo");
8 println!("================================\n");
9
10 let detector = LanguageDetector::new();
12
13 let texts = vec![
15 (
16 "This is a sample text in English to test language detection.",
17 "English",
18 ),
19 (
20 "Este es un texto de ejemplo en español para probar la detección de idioma.",
21 "Spanish",
22 ),
23 (
24 "Ceci est un exemple de texte en français pour tester la détection de langue.",
25 "French",
26 ),
27 (
28 "Dies ist ein Beispieltext auf Deutsch zum Testen der Spracherkennung.",
29 "German",
30 ),
31 ("The quick brown fox jumps over the lazy dog.", "English"),
32 (
33 "El rápido zorro marrón salta sobre el perro perezoso.",
34 "Spanish",
35 ),
36 (
37 "Le rapide renard brun saute par-dessus le chien paresseux.",
38 "French",
39 ),
40 (
41 "Der schnelle braune Fuchs springt über den faulen Hund.",
42 "German",
43 ),
44 ];
45
46 println!("Language Detection:");
47 println!("==================");
48
49 for (text, expected) in &texts {
50 let result = detector.detect(text)?;
51 println!("\nText: \"{text}\"");
52 println!("Expected: {expected}");
53 println!(
54 "Detected: {} (confidence: {:.2}%)",
55 result.language.name(),
56 result.confidence * 100.0
57 );
58
59 if !result.alternatives.is_empty() {
60 println!("Alternatives:");
61 for (lang, score) in &result.alternatives {
62 println!(" - {}: {:.2}%", lang.name(), score * 100.0);
63 }
64 }
65 }
66
67 println!("\n\nStop Words Processing:");
69 println!("=====================");
70
71 let stop_words = StopWords::new();
72
73 let test_sentences = vec![
74 ("The cat is on the mat", Language::English),
75 ("Le chat est sur le tapis", Language::French),
76 ("El gato está en la alfombra", Language::Spanish),
77 ];
78
79 for (sentence, language) in &test_sentences {
80 let tokens: Vec<String> = sentence.split_whitespace().map(|s| s.to_string()).collect();
81
82 let filtered = stop_words.remove_stop_words(&tokens, *language);
83
84 println!("\nLanguage: {}", language.name());
85 println!("Original: {sentence}");
86 println!("Tokens: {tokens:?}");
87 println!("Without stop words: {filtered:?}");
88 }
89
90 println!("\n\nMultilingual Processor:");
92 println!("======================");
93
94 let processor = MultilingualProcessor::new();
95
96 let mixedtexts = vec![
97 "Machine learning algorithms are transforming artificial intelligence",
98 "Los algoritmos de aprendizaje automático están transformando la inteligencia artificial",
99 "Les algorithmes d'apprentissage automatique transforment l'intelligence artificielle",
100 ];
101
102 for text in &mixedtexts {
103 let result = processor.process(text)?;
104
105 println!("\nOriginal: \"{text}\"");
106 println!(
107 "Detected Language: {} (confidence: {:.2}%)",
108 result.language.name(),
109 result.confidence * 100.0
110 );
111 println!("Tokens: {} total", result.tokens.len());
112 println!(
113 "Filtered Tokens: {} after stop word removal",
114 result.filtered_tokens.len()
115 );
116
117 if result.filtered_tokens.len() <= 5 {
118 println!("Filtered: {:?}", result.filtered_tokens);
119 } else {
120 println!("First 5 filtered: {:?}...", &result.filtered_tokens[..5]);
121 }
122 }
123
124 println!("\n\nLanguage Code Conversions:");
126 println!("=========================");
127
128 let languages = vec![
129 Language::English,
130 Language::Spanish,
131 Language::French,
132 Language::German,
133 Language::Chinese,
134 Language::Japanese,
135 ];
136
137 for lang in &languages {
138 println!("{}: ISO code = {}", lang.name(), lang.iso_code());
139 }
140
141 println!("\nReverse lookup:");
142 let codes = vec!["en", "es", "fr", "de", "zh", "ja", "xx"];
143 for code in &codes {
144 let lang = Language::from_iso_code(code);
145 println!("{} -> {}", code, lang.name());
146 }
147
148 Ok(())
149}